All of lore.kernel.org
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH v2] Add in-kernel irqchip
@ 2009-10-07 22:08 Glauber Costa
  2009-10-07 22:08 ` [Qemu-devel] [PATCH v2 1/9] add base-addr field to io apic state Glauber Costa
  0 siblings, 1 reply; 104+ messages in thread
From: Glauber Costa @ 2009-10-07 22:08 UTC (permalink / raw)
  To: qemu-devel; +Cc: aliguori

This is a resend of the irqchip series, with most of the suggestions
merged. This series include code to disable the irqchip.

If you have new suggestions, or I forgot something, let me know

^ permalink raw reply	[flat|nested] 104+ messages in thread

* [Qemu-devel] [PATCH v2 1/9] add base-addr field to io apic state
  2009-10-07 22:08 [Qemu-devel] [PATCH v2] Add in-kernel irqchip Glauber Costa
@ 2009-10-07 22:08 ` Glauber Costa
  2009-10-07 22:08   ` [Qemu-devel] [PATCH v2 2/9] Save missing fields in VMState Glauber Costa
  0 siblings, 1 reply; 104+ messages in thread
From: Glauber Costa @ 2009-10-07 22:08 UTC (permalink / raw)
  To: qemu-devel; +Cc: aliguori

It is not saved in savevm. Will do it in another patch.

Signed-off-by: Glauber Costa <glommer@redhat.com>
---
 hw/ioapic.c |    5 ++++-
 1 files changed, 4 insertions(+), 1 deletions(-)

diff --git a/hw/ioapic.c b/hw/ioapic.c
index b0ad78f..e37157f 100644
--- a/hw/ioapic.c
+++ b/hw/ioapic.c
@@ -28,6 +28,7 @@
 //#define DEBUG_IOAPIC
 
 #define IOAPIC_NUM_PINS			0x18
+#define IOAPIC_DEFAULT_BASE_ADDRESS  0xfec00000
 #define IOAPIC_LVT_MASKED 		(1<<16)
 
 #define IOAPIC_TRIGGER_EDGE		0
@@ -45,6 +46,7 @@
 struct IOAPICState {
     uint8_t id;
     uint8_t ioregsel;
+    uint64_t base_address;
 
     uint32_t irr;
     uint64_t ioredtbl[IOAPIC_NUM_PINS];
@@ -210,6 +212,7 @@ static void ioapic_reset(void *opaque)
     int i;
 
     memset(s, 0, sizeof(*s));
+    s->base_address = IOAPIC_DEFAULT_BASE_ADDRESS;
     for(i = 0; i < IOAPIC_NUM_PINS; i++)
         s->ioredtbl[i] = 1 << 16; /* mask LVT */
 }
@@ -237,7 +240,7 @@ qemu_irq *ioapic_init(void)
 
     io_memory = cpu_register_io_memory(ioapic_mem_read,
                                        ioapic_mem_write, s);
-    cpu_register_physical_memory(0xfec00000, 0x1000, io_memory);
+    cpu_register_physical_memory(IOAPIC_DEFAULT_BASE_ADDRESS, 0x1000, io_memory);
 
     vmstate_register(0, &vmstate_ioapic, s);
     qemu_register_reset(ioapic_reset, s);
-- 
1.6.2.5

^ permalink raw reply related	[flat|nested] 104+ messages in thread

* [Qemu-devel] [PATCH v2 2/9] Save missing fields in VMState
  2009-10-07 22:08 ` [Qemu-devel] [PATCH v2 1/9] add base-addr field to io apic state Glauber Costa
@ 2009-10-07 22:08   ` Glauber Costa
  2009-10-07 22:08     ` [Qemu-devel] [PATCH v2 3/9] provide in-kernel ioapic Glauber Costa
  0 siblings, 1 reply; 104+ messages in thread
From: Glauber Costa @ 2009-10-07 22:08 UTC (permalink / raw)
  To: qemu-devel; +Cc: aliguori

irr and base_address are not saved. Bump version to two, and make sure they
are part of the game here.

Signed-off-by: Glauber Costa <glommer@redhat.com>
---
 hw/ioapic.c |    4 +++-
 1 files changed, 3 insertions(+), 1 deletions(-)

diff --git a/hw/ioapic.c b/hw/ioapic.c
index e37157f..d475654 100644
--- a/hw/ioapic.c
+++ b/hw/ioapic.c
@@ -195,12 +195,14 @@ static void ioapic_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t va
 
 static const VMStateDescription vmstate_ioapic = {
     .name = "ioapic",
-    .version_id = 1,
+    .version_id = 2,
     .minimum_version_id = 1,
     .minimum_version_id_old = 1,
     .fields      = (VMStateField []) {
         VMSTATE_UINT8(id, IOAPICState),
         VMSTATE_UINT8(ioregsel, IOAPICState),
+        VMSTATE_UINT64_V(base_address, IOAPICState, 2),
+        VMSTATE_UINT32_V(irr, IOAPICState, 2),
         VMSTATE_UINT64_ARRAY(ioredtbl, IOAPICState, IOAPIC_NUM_PINS),
         VMSTATE_END_OF_LIST()
     }
-- 
1.6.2.5

^ permalink raw reply related	[flat|nested] 104+ messages in thread

* [Qemu-devel] [PATCH v2 3/9] provide in-kernel ioapic
  2009-10-07 22:08   ` [Qemu-devel] [PATCH v2 2/9] Save missing fields in VMState Glauber Costa
@ 2009-10-07 22:08     ` Glauber Costa
  2009-10-07 22:08       ` [Qemu-devel] [PATCH v2 4/9] provide in-kernel apic Glauber Costa
                         ` (2 more replies)
  0 siblings, 3 replies; 104+ messages in thread
From: Glauber Costa @ 2009-10-07 22:08 UTC (permalink / raw)
  To: qemu-devel; +Cc: aliguori

This patch provides kvm with an in-kernel ioapic. We are currently not enabling it.
The code is heavily based on what's in qemu-kvm.git.

Signed-off-by: Glauber Costa <glommer@redhat.com>
---
 hw/ioapic.c |   84 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 kvm-all.c   |   20 ++++++++++++++
 kvm.h       |    4 +++
 3 files changed, 107 insertions(+), 1 deletions(-)

diff --git a/hw/ioapic.c b/hw/ioapic.c
index d475654..d6a9dac 100644
--- a/hw/ioapic.c
+++ b/hw/ioapic.c
@@ -24,6 +24,7 @@
 #include "pc.h"
 #include "qemu-timer.h"
 #include "host-utils.h"
+#include "kvm.h"
 
 //#define DEBUG_IOAPIC
 
@@ -193,6 +194,79 @@ static void ioapic_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t va
     }
 }
 
+static int kvm_kernel_ioapic_load_from_user(IOAPICState *s)
+{
+    int r = 0;
+#if defined(KVM_CAP_IRQCHIP) && defined(TARGET_I386)
+    struct kvm_irqchip chip;
+    struct kvm_ioapic_state *kioapic;
+    int i;
+
+    if (!(kvm_enabled() && kvm_irqchip_in_kernel()))
+        return 0;
+
+    chip.chip_id = KVM_IRQCHIP_IOAPIC;
+    kioapic = &chip.chip.ioapic;
+
+    kioapic->id = s->id;
+    kioapic->ioregsel = s->ioregsel;
+    kioapic->base_address = IOAPIC_DEFAULT_BASE_ADDRESS;
+    kioapic->irr = s->irr;
+    for (i = 0; i < IOAPIC_NUM_PINS; i++) {
+        kioapic->redirtbl[i].bits = s->ioredtbl[i];
+    }
+
+    r = kvm_set_irqchip(&chip);
+#endif
+    return r;
+}
+
+static void kvm_kernel_ioapic_save_to_user(IOAPICState *s)
+{
+#if defined(KVM_CAP_IRQCHIP) && defined(TARGET_I386)
+    struct kvm_irqchip chip;
+    struct kvm_ioapic_state *kioapic;
+    int i;
+
+    if (!(kvm_enabled() && kvm_irqchip_in_kernel()))
+        return;
+    chip.chip_id = KVM_IRQCHIP_IOAPIC;
+    kvm_get_irqchip(&chip);
+    kioapic = &chip.chip.ioapic;
+
+    s->id = kioapic->id;
+    s->ioregsel = kioapic->ioregsel;
+    s->irr = kioapic->irr;
+    for (i = 0; i < IOAPIC_NUM_PINS; i++) {
+        s->ioredtbl[i] = kioapic->redirtbl[i].bits;
+    }
+#endif
+}
+
+static void ioapic_pre_save(void *opaque)
+{
+    IOAPICState *s = (void *)opaque;
+
+    kvm_kernel_ioapic_save_to_user(s);
+}
+
+static int ioapic_pre_load(void *opaque)
+{
+    IOAPICState *s = opaque;
+
+    /* in case we are doing version 1, we just set these to sane values */
+    s->irr = 0;
+    return 0;
+}
+
+static int ioapic_post_load(void *opaque, int version_id)
+{
+    IOAPICState *s = opaque;
+
+    return kvm_kernel_ioapic_load_from_user(s);
+}
+
+
 static const VMStateDescription vmstate_ioapic = {
     .name = "ioapic",
     .version_id = 2,
@@ -205,7 +279,10 @@ static const VMStateDescription vmstate_ioapic = {
         VMSTATE_UINT32_V(irr, IOAPICState, 2),
         VMSTATE_UINT64_ARRAY(ioredtbl, IOAPICState, IOAPIC_NUM_PINS),
         VMSTATE_END_OF_LIST()
-    }
+    },
+    .pre_load = ioapic_pre_load,
+    .post_load = ioapic_post_load,
+    .pre_save = ioapic_pre_save,
 };
 
 static void ioapic_reset(void *opaque)
@@ -217,6 +294,11 @@ static void ioapic_reset(void *opaque)
     s->base_address = IOAPIC_DEFAULT_BASE_ADDRESS;
     for(i = 0; i < IOAPIC_NUM_PINS; i++)
         s->ioredtbl[i] = 1 << 16; /* mask LVT */
+#ifdef KVM_CAP_IRQCHIP
+    if (kvm_enabled() && kvm_irqchip_in_kernel()) {
+        kvm_kernel_ioapic_load_from_user(s);
+    }
+#endif
 }
 
 static CPUReadMemoryFunc * const ioapic_mem_read[3] = {
diff --git a/kvm-all.c b/kvm-all.c
index 48ae26c..d795285 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -411,6 +411,26 @@ int kvm_check_extension(KVMState *s, unsigned int extension)
     return ret;
 }
 
+#ifdef KVM_CAP_IRQCHIP
+int kvm_set_irqchip(struct kvm_irqchip *chip)
+{
+    if (!kvm_state->irqchip_in_kernel) {
+        return 0;
+    }
+
+    return kvm_vm_ioctl(kvm_state, KVM_SET_IRQCHIP, chip);
+}
+
+int kvm_get_irqchip(struct kvm_irqchip *chip)
+{
+    if (!kvm_state->irqchip_in_kernel) {
+        return 0;
+    }
+
+    return kvm_vm_ioctl(kvm_state, KVM_GET_IRQCHIP, chip);
+}
+#endif
+
 int kvm_init(int smp_cpus)
 {
     static const char upgrade_note[] =
diff --git a/kvm.h b/kvm.h
index e7d5beb..8d4afa0 100644
--- a/kvm.h
+++ b/kvm.h
@@ -16,6 +16,7 @@
 
 #include "config.h"
 #include "qemu-queue.h"
+#include <linux/kvm.h>
 
 #ifdef CONFIG_KVM
 extern int kvm_allowed;
@@ -63,6 +64,9 @@ int kvm_update_guest_debug(CPUState *env, unsigned long reinject_trap);
 int kvm_pit_in_kernel(void);
 int kvm_irqchip_in_kernel(void);
 
+int kvm_set_irqchip(struct kvm_irqchip *chip);
+int kvm_get_irqchip(struct kvm_irqchip *chip);
+
 /* internal API */
 
 struct KVMState;
-- 
1.6.2.5

^ permalink raw reply related	[flat|nested] 104+ messages in thread

* [Qemu-devel] [PATCH v2 4/9] provide in-kernel apic
  2009-10-07 22:08     ` [Qemu-devel] [PATCH v2 3/9] provide in-kernel ioapic Glauber Costa
@ 2009-10-07 22:08       ` Glauber Costa
  2009-10-07 22:08         ` [Qemu-devel] [PATCH v2 5/9] provide apic_set_irq_delivered Glauber Costa
  2009-10-08 13:55           ` [Qemu-devel] " Anthony Liguori
  2009-10-08 11:46       ` [Qemu-devel] Re: [PATCH v2 3/9] provide in-kernel ioapic Jan Kiszka
  2009-10-08 13:49         ` [Qemu-devel] " Anthony Liguori
  2 siblings, 2 replies; 104+ messages in thread
From: Glauber Costa @ 2009-10-07 22:08 UTC (permalink / raw)
  To: qemu-devel; +Cc: aliguori

This patch provides kvm with an in-kernel apic. We are currently not enabling it.
The code is heavily based on what's in qemu-kvm.git.

Signed-off-by: Glauber Costa <glommer@redhat.com>
---
 hw/apic.c         |  135 +++++++++++++++++++++++++++++++++++++++++++++++++++--
 kvm.h             |    3 +
 target-i386/kvm.c |   18 +++++++
 3 files changed, 152 insertions(+), 4 deletions(-)

diff --git a/hw/apic.c b/hw/apic.c
index c89008e..5635607 100644
--- a/hw/apic.c
+++ b/hw/apic.c
@@ -299,7 +299,11 @@ void cpu_set_apic_base(CPUState *env, uint64_t val)
 #endif
     if (!s)
         return;
-    s->apicbase = (val & 0xfffff000) |
+
+    if (kvm_enabled() && kvm_irqchip_in_kernel())
+        s->apicbase = val;
+    else
+        s->apicbase = (val & 0xfffff000) |
         (s->apicbase & (MSR_IA32_APICBASE_BSP | MSR_IA32_APICBASE_ENABLE));
     /* if disabled, cannot be enabled again */
     if (!(val & MSR_IA32_APICBASE_ENABLE)) {
@@ -497,6 +501,13 @@ void apic_init_reset(CPUState *env)
     s->wait_for_sipi = 1;
 
     env->halted = !(s->apicbase & MSR_IA32_APICBASE_BSP);
+
+#ifdef KVM_CAP_MP_STATE
+    if (kvm_enabled() && kvm_irqchip_in_kernel())
+        env->mp_state
+            = env->halted ? KVM_MP_STATE_UNINITIALIZED : KVM_MP_STATE_RUNNABLE;
+#endif
+
 }
 
 static void apic_startup(APICState *s, int vector_num)
@@ -903,12 +914,120 @@ static int apic_load_old(QEMUFile *f, void *opaque, int version_id)
     return 0;
 }
 
+#if defined(KVM_CAP_IRQCHIP) && defined(TARGET_I386)
+static inline uint32_t kapic_reg(struct kvm_lapic_state *kapic, int reg_id)
+{
+    return *((uint32_t *) (kapic->regs + (reg_id << 4)));
+}
+
+static inline void kapic_set_reg(struct kvm_lapic_state *kapic,
+                                 int reg_id, uint32_t val)
+{
+    *((uint32_t *) (kapic->regs + (reg_id << 4))) = val;
+}
+#endif
+
+static int kvm_kernel_lapic_load_from_user(APICState *s)
+{
+    int r = 0;
+#if defined(KVM_CAP_IRQCHIP) && defined(TARGET_I386)
+    struct kvm_lapic_state apic;
+    struct kvm_lapic_state *klapic = &apic;
+    int i;
+
+    if (!(kvm_enabled() && kvm_irqchip_in_kernel()))
+        return 0;
+
+    memset(klapic, 0, sizeof apic);
+    kapic_set_reg(klapic, 0x2, s->id << 24);
+    kapic_set_reg(klapic, 0x8, s->tpr);
+    kapic_set_reg(klapic, 0xd, s->log_dest << 24);
+    kapic_set_reg(klapic, 0xe, s->dest_mode << 28 | 0x0fffffff);
+    kapic_set_reg(klapic, 0xf, s->spurious_vec);
+    for (i = 0; i < 8; i++) {
+        kapic_set_reg(klapic, 0x10 + i, s->isr[i]);
+        kapic_set_reg(klapic, 0x18 + i, s->tmr[i]);
+        kapic_set_reg(klapic, 0x20 + i, s->irr[i]);
+    }
+    kapic_set_reg(klapic, 0x28, s->esr);
+    kapic_set_reg(klapic, 0x30, s->icr[0]);
+    kapic_set_reg(klapic, 0x31, s->icr[1]);
+    for (i = 0; i < APIC_LVT_NB; i++)
+        kapic_set_reg(klapic, 0x32 + i, s->lvt[i]);
+    kapic_set_reg(klapic, 0x38, s->initial_count);
+    kapic_set_reg(klapic, 0x3e, s->divide_conf);
+
+    r = kvm_set_lapic(s->cpu_env, klapic);
+#endif
+    return r;
+}
+
+static void kvm_kernel_lapic_save_to_user(APICState *s)
+{
+#if defined(KVM_CAP_IRQCHIP) && defined(TARGET_I386)
+    struct kvm_lapic_state apic;
+    struct kvm_lapic_state *kapic = &apic;
+    int i, v;
+
+    if (!(kvm_enabled() && kvm_irqchip_in_kernel()))
+        return;
+
+    kvm_get_lapic(s->cpu_env, kapic);
+
+    s->id = kapic_reg(kapic, 0x2) >> 24;
+    s->tpr = kapic_reg(kapic, 0x8);
+    s->arb_id = kapic_reg(kapic, 0x9);
+    s->log_dest = kapic_reg(kapic, 0xd) >> 24;
+    s->dest_mode = kapic_reg(kapic, 0xe) >> 28;
+    s->spurious_vec = kapic_reg(kapic, 0xf);
+    for (i = 0; i < 8; i++) {
+        s->isr[i] = kapic_reg(kapic, 0x10 + i);
+        s->tmr[i] = kapic_reg(kapic, 0x18 + i);
+        s->irr[i] = kapic_reg(kapic, 0x20 + i);
+    }
+    s->esr = kapic_reg(kapic, 0x28);
+    s->icr[0] = kapic_reg(kapic, 0x30);
+    s->icr[1] = kapic_reg(kapic, 0x31);
+    for (i = 0; i < APIC_LVT_NB; i++)
+    s->lvt[i] = kapic_reg(kapic, 0x32 + i);
+    s->initial_count = kapic_reg(kapic, 0x38);
+    s->divide_conf = kapic_reg(kapic, 0x3e);
+
+    v = (s->divide_conf & 3) | ((s->divide_conf >> 1) & 4);
+    s->count_shift = (v + 1) & 7;
+
+    s->initial_count_load_time = qemu_get_clock(vm_clock);
+    apic_timer_update(s, s->initial_count_load_time);
+#endif
+}
+
+static void qemu_kvm_load_lapic(CPUState *env)
+{
+    kvm_kernel_lapic_load_from_user(env->apic_state);
+}
+
+static void apic_pre_save(void *opaque)
+{
+    APICState *s = (void *)opaque;
+
+    kvm_kernel_lapic_save_to_user(s);
+}
+
+static int apic_post_load(void *opaque, int version_id)
+{
+    APICState *s = opaque;
+
+    return kvm_kernel_lapic_load_from_user(s);
+}
+
 static const VMStateDescription vmstate_apic = {
     .name = "apic",
     .version_id = 3,
     .minimum_version_id = 3,
     .minimum_version_id_old = 1,
     .load_state_old = apic_load_old,
+    .pre_save = apic_pre_save,
+    .post_load = apic_post_load,
     .fields      = (VMStateField []) {
         VMSTATE_UINT32(apicbase, APICState),
         VMSTATE_UINT8(id, APICState),
@@ -933,9 +1052,8 @@ static const VMStateDescription vmstate_apic = {
     }
 };
 
-static void apic_reset(void *opaque)
+static void apic_do_reset(APICState *s)
 {
-    APICState *s = opaque;
     int bsp;
 
     cpu_synchronize_state(s->cpu_env);
@@ -957,6 +1075,15 @@ static void apic_reset(void *opaque)
     }
 }
 
+static void apic_reset(void *opaque)
+{
+    APICState *s = opaque;
+
+    apic_do_reset(s);
+
+    qemu_kvm_load_lapic(s->cpu_env);
+}
+
 static CPUReadMemoryFunc * const apic_mem_read[3] = {
     apic_mem_readb,
     apic_mem_readw,
@@ -981,7 +1108,7 @@ int apic_init(CPUState *env)
     s->id = env->cpuid_apic_id;
     s->cpu_env = env;
 
-    apic_reset(s);
+    apic_do_reset(s);
     msix_supported = 1;
 
     /* XXX: mapping more APICs at the same memory location */
diff --git a/kvm.h b/kvm.h
index 8d4afa0..099b55e 100644
--- a/kvm.h
+++ b/kvm.h
@@ -97,6 +97,9 @@ int kvm_arch_init(KVMState *s, int smp_cpus);
 
 int kvm_arch_init_vcpu(CPUState *env);
 
+int kvm_set_lapic(CPUState *env, struct kvm_lapic_state *s);
+int kvm_get_lapic(CPUState *env, struct kvm_lapic_state *s);
+
 struct kvm_guest_debug;
 struct kvm_debug_exit_arch;
 
diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index 7010999..d485d4c 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -957,3 +957,21 @@ void kvm_arch_update_guest_debug(CPUState *env, struct kvm_guest_debug *dbg)
     }
 }
 #endif /* KVM_CAP_SET_GUEST_DEBUG */
+
+#ifdef KVM_CAP_IRQCHIP
+int kvm_set_lapic(CPUState *env, struct kvm_lapic_state *s)
+{
+    if (!kvm_irqchip_in_kernel())
+        return 0;
+
+    return kvm_vcpu_ioctl(env, KVM_SET_LAPIC, s);
+}
+
+int kvm_get_lapic(CPUState *env, struct kvm_lapic_state *s)
+{
+    if (!kvm_irqchip_in_kernel())
+        return 0;
+
+    return kvm_vcpu_ioctl(env, KVM_GET_LAPIC, s);
+}
+#endif
-- 
1.6.2.5

^ permalink raw reply related	[flat|nested] 104+ messages in thread

* [Qemu-devel] [PATCH v2 5/9] provide apic_set_irq_delivered
  2009-10-07 22:08       ` [Qemu-devel] [PATCH v2 4/9] provide in-kernel apic Glauber Costa
@ 2009-10-07 22:08         ` Glauber Costa
  2009-10-07 22:08           ` [Qemu-devel] [PATCH v2 6/9] provide in-kernel i8259 chip Glauber Costa
  2009-10-08 13:55           ` [Qemu-devel] " Anthony Liguori
  1 sibling, 1 reply; 104+ messages in thread
From: Glauber Costa @ 2009-10-07 22:08 UTC (permalink / raw)
  To: qemu-devel; +Cc: aliguori

i8259 chip will use it, so provide it, and export it through pc.h

Signed-off-by: Glauber Costa <glommer@redhat.com>
---
 hw/apic.c |    5 +++++
 hw/pc.h   |    1 +
 2 files changed, 6 insertions(+), 0 deletions(-)

diff --git a/hw/apic.c b/hw/apic.c
index 5635607..c32e2b5 100644
--- a/hw/apic.c
+++ b/hw/apic.c
@@ -392,6 +392,11 @@ void apic_reset_irq_delivered(void)
     apic_irq_delivered = 0;
 }
 
+void apic_set_irq_delivered(void)
+{
+    apic_irq_delivered = 1;
+}
+
 int apic_get_irq_delivered(void)
 {
     return apic_irq_delivered;
diff --git a/hw/pc.h b/hw/pc.h
index d85f7fd..15d0140 100644
--- a/hw/pc.h
+++ b/hw/pc.h
@@ -46,6 +46,7 @@ int apic_get_interrupt(CPUState *env);
 qemu_irq *ioapic_init(void);
 void ioapic_set_irq(void *opaque, int vector, int level);
 void apic_reset_irq_delivered(void);
+void apic_set_irq_delivered(void);
 int apic_get_irq_delivered(void);
 
 /* i8254.c */
-- 
1.6.2.5

^ permalink raw reply related	[flat|nested] 104+ messages in thread

* [Qemu-devel] [PATCH v2 6/9] provide in-kernel i8259 chip
  2009-10-07 22:08         ` [Qemu-devel] [PATCH v2 5/9] provide apic_set_irq_delivered Glauber Costa
@ 2009-10-07 22:08           ` Glauber Costa
  2009-10-07 22:08             ` [Qemu-devel] [PATCH v2 7/9] initialize " Glauber Costa
  0 siblings, 1 reply; 104+ messages in thread
From: Glauber Costa @ 2009-10-07 22:08 UTC (permalink / raw)
  To: qemu-devel; +Cc: aliguori

This patch provides kvm with an in-kernel i8259 chip. We are currently not enabling it.
The code is heavily based on what's in qemu-kvm.git.

Signed-off-by: Glauber Costa <glommer@redhat.com>
---
 hw/i8259.c |  122 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 hw/pc.h    |    1 +
 kvm-all.c  |   24 ++++++++++++
 kvm.h      |    2 +
 4 files changed, 149 insertions(+), 0 deletions(-)

diff --git a/hw/i8259.c b/hw/i8259.c
index 3de22e3..d07dd1c 100644
--- a/hw/i8259.c
+++ b/hw/i8259.c
@@ -26,6 +26,7 @@
 #include "isa.h"
 #include "monitor.h"
 #include "qemu-timer.h"
+#include "kvm.h"
 
 /* debug PIC */
 //#define DEBUG_PIC
@@ -446,9 +447,96 @@ static uint32_t elcr_ioport_read(void *opaque, uint32_t addr1)
     return s->elcr;
 }
 
+static int kvm_kernel_pic_load_from_user(PicState *s)
+{
+    int r = 0;
+#if defined(KVM_CAP_IRQCHIP) && defined(TARGET_I386)
+    struct kvm_irqchip chip;
+    struct kvm_pic_state *kpic;
+
+    if (!(kvm_enabled() && kvm_irqchip_in_kernel()))
+        return 0;
+
+    chip.chip_id = (&s->pics_state->pics[0] == s) ?
+                   KVM_IRQCHIP_PIC_MASTER :
+                   KVM_IRQCHIP_PIC_SLAVE;
+    kpic = &chip.chip.pic;
+
+    kpic->last_irr = s->last_irr;
+    kpic->irr = s->irr;
+    kpic->imr = s->imr;
+    kpic->isr = s->isr;
+    kpic->priority_add = s->priority_add;
+    kpic->irq_base = s->irq_base;
+    kpic->read_reg_select = s->read_reg_select;
+    kpic->poll = s->poll;
+    kpic->special_mask = s->special_mask;
+    kpic->init_state = s->init_state;
+    kpic->auto_eoi = s->auto_eoi;
+    kpic->rotate_on_auto_eoi = s->rotate_on_auto_eoi;
+    kpic->special_fully_nested_mode = s->special_fully_nested_mode;
+    kpic->init4 = s->init4;
+    kpic->elcr = s->elcr;
+    kpic->elcr_mask = s->elcr_mask;
+
+    r = kvm_set_irqchip(&chip);
+#endif
+    return r;
+}
+
+static void kvm_kernel_pic_save_to_user(PicState *s)
+{
+#if defined(KVM_CAP_IRQCHIP) && defined(TARGET_I386)
+    struct kvm_irqchip chip;
+    struct kvm_pic_state *kpic;
+
+    if (!(kvm_enabled() && kvm_irqchip_in_kernel()))
+        return;
+
+    chip.chip_id = (&s->pics_state->pics[0] == s) ?
+                   KVM_IRQCHIP_PIC_MASTER :
+                   KVM_IRQCHIP_PIC_SLAVE;
+    kvm_get_irqchip(&chip);
+    kpic = &chip.chip.pic;
+
+    s->last_irr = kpic->last_irr;
+    s->irr = kpic->irr;
+    s->imr = kpic->imr;
+    s->isr = kpic->isr;
+    s->priority_add = kpic->priority_add;
+    s->irq_base = kpic->irq_base;
+    s->read_reg_select = kpic->read_reg_select;
+    s->poll = kpic->poll;
+    s->special_mask = kpic->special_mask;
+    s->init_state = kpic->init_state;
+    s->auto_eoi = kpic->auto_eoi;
+    s->rotate_on_auto_eoi = kpic->rotate_on_auto_eoi;
+    s->special_fully_nested_mode = kpic->special_fully_nested_mode;
+    s->init4 = kpic->init4;
+    s->elcr = kpic->elcr;
+    s->elcr_mask = kpic->elcr_mask;
+#endif
+}
+
+static int i8259_post_load(void *opaque, int version_id)
+{
+    PicState *s = (void *)opaque;
+
+    return kvm_kernel_pic_load_from_user(s);
+}
+
+static void i8259_pre_save(void *opaque)
+{
+    PicState *s = (void *)opaque;
+
+    kvm_kernel_pic_save_to_user(s);
+}
+
 static const VMStateDescription vmstate_pic = {
     .name = "i8259",
     .version_id = 1,
+    .pre_save = i8259_pre_save,
+    .post_load = i8259_post_load,
     .minimum_version_id = 1,
     .minimum_version_id_old = 1,
     .fields      = (VMStateField []) {
@@ -535,3 +623,37 @@ qemu_irq *i8259_init(qemu_irq parent_irq)
     isa_pic = s;
     return qemu_allocate_irqs(i8259_set_irq, s, 16);
 }
+
+#if defined(KVM_CAP_IRQCHIP) && defined(TARGET_I386)
+static void kvm_i8259_set_irq(void *opaque, int irq, int level)
+{
+    int pic_ret;
+    if (kvm_set_irq(irq, level, &pic_ret)) {
+        if (pic_ret != 0)
+            apic_set_irq_delivered();
+        return;
+    }
+}
+
+static void kvm_pic_init1(int io_addr, PicState *s)
+{
+    vmstate_register(io_addr, &vmstate_pic, s);
+    qemu_register_reset(pic_reset, s);
+}
+
+qemu_irq *kvm_i8259_init(qemu_irq parent_irq)
+{
+    PicState2 *s;
+
+    s = qemu_mallocz(sizeof(PicState2));
+
+    kvm_pic_init1(0x20, &s->pics[0]);
+    kvm_pic_init1(0xa0, &s->pics[1]);
+    s->parent_irq = parent_irq;
+    s->pics[0].pics_state = s;
+    s->pics[1].pics_state = s;
+    isa_pic = s;
+    return qemu_allocate_irqs(kvm_i8259_set_irq, s, 24);
+}
+#endif
+
diff --git a/hw/pc.h b/hw/pc.h
index 15d0140..5eed584 100644
--- a/hw/pc.h
+++ b/hw/pc.h
@@ -27,6 +27,7 @@ extern PicState2 *isa_pic;
 void pic_set_irq(int irq, int level);
 void pic_set_irq_new(void *opaque, int irq, int level);
 qemu_irq *i8259_init(qemu_irq parent_irq);
+qemu_irq *kvm_i8259_init(qemu_irq parent_irq);
 int pic_read_irq(PicState2 *s);
 void pic_update_irq(PicState2 *s);
 uint32_t pic_intack_read(PicState2 *s);
diff --git a/kvm-all.c b/kvm-all.c
index d795285..e98024a 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -431,6 +431,30 @@ int kvm_get_irqchip(struct kvm_irqchip *chip)
 }
 #endif
 
+int kvm_set_irq(int irq, int level, int *status)
+{
+    struct kvm_irq_level event;
+    int r;
+
+    if (!kvm_state->irqchip_in_kernel) {
+        return 0;
+    }
+
+    event.level = level;
+    event.irq = irq;
+
+    r = kvm_vm_ioctl(kvm_state, KVM_IRQ_LINE_STATUS, &event);
+
+    if (r < 0)
+        return 0;
+
+    if (status) {
+        *status = event.status;
+    }
+
+    return 1;
+}
+
 int kvm_init(int smp_cpus)
 {
     static const char upgrade_note[] =
diff --git a/kvm.h b/kvm.h
index 099b55e..f0c9201 100644
--- a/kvm.h
+++ b/kvm.h
@@ -67,6 +67,8 @@ int kvm_irqchip_in_kernel(void);
 int kvm_set_irqchip(struct kvm_irqchip *chip);
 int kvm_get_irqchip(struct kvm_irqchip *chip);
 
+int kvm_set_irq(int irq, int level, int *status);
+
 /* internal API */
 
 struct KVMState;
-- 
1.6.2.5

^ permalink raw reply related	[flat|nested] 104+ messages in thread

* [Qemu-devel] [PATCH v2 7/9] initialize i8259 chip
  2009-10-07 22:08           ` [Qemu-devel] [PATCH v2 6/9] provide in-kernel i8259 chip Glauber Costa
@ 2009-10-07 22:08             ` Glauber Costa
  2009-10-07 22:08               ` [Qemu-devel] [PATCH v2 8/9] Initialize in-kernel irqchip Glauber Costa
  2009-10-08 12:02               ` [Qemu-devel] Re: [PATCH v2 7/9] initialize i8259 chip Jan Kiszka
  0 siblings, 2 replies; 104+ messages in thread
From: Glauber Costa @ 2009-10-07 22:08 UTC (permalink / raw)
  To: qemu-devel; +Cc: aliguori

If we have irqchip in kernel (which we currently do not), intialize
the i8259 chip. This code is heavily inspirated by the one in qemu-kvm.git

Note that we wire isa irqs trough it too.

Signed-off-by: Glauber Costa <glommer@redhat.com>
---
 hw/pc.c   |   14 +++++++++++---
 kvm-all.c |    2 +-
 2 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/hw/pc.c b/hw/pc.c
index 2ca15a3..5cfe99f 100644
--- a/hw/pc.c
+++ b/hw/pc.c
@@ -39,6 +39,7 @@
 #include "ide.h"
 #include "loader.h"
 #include "elf.h"
+#include "kvm.h"
 
 /* output Bochs bios info messages */
 //#define DEBUG_BIOS
@@ -1171,10 +1172,17 @@ static void pc_init1(ram_addr_t ram_size,
     }
 
     cpu_irq = qemu_allocate_irqs(pic_irq_request, NULL, 1);
-    i8259 = i8259_init(cpu_irq[0]);
     isa_irq_state = qemu_mallocz(sizeof(*isa_irq_state));
-    isa_irq_state->i8259 = i8259;
-    isa_irq = qemu_allocate_irqs(isa_irq_handler, isa_irq_state, 24);
+
+#ifdef KVM_CAP_IRQCHIP
+    if (kvm_enabled() && kvm_irqchip_in_kernel()) {
+        isa_irq = i8259 = kvm_i8259_init(cpu_irq[0]);
+    } else
+#endif
+    {
+        isa_irq_state->i8259 = i8259 = i8259_init(cpu_irq[0]);
+        isa_irq = qemu_allocate_irqs(isa_irq_handler, isa_irq_state, 24);
+    }
 
     if (pci_enabled) {
         pci_bus = i440fx_init(&i440fx_state, &piix3_devfn, isa_irq);
diff --git a/kvm-all.c b/kvm-all.c
index e98024a..418d8bc 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -429,7 +429,6 @@ int kvm_get_irqchip(struct kvm_irqchip *chip)
 
     return kvm_vm_ioctl(kvm_state, KVM_GET_IRQCHIP, chip);
 }
-#endif
 
 int kvm_set_irq(int irq, int level, int *status)
 {
@@ -454,6 +453,7 @@ int kvm_set_irq(int irq, int level, int *status)
 
     return 1;
 }
+#endif
 
 int kvm_init(int smp_cpus)
 {
-- 
1.6.2.5

^ permalink raw reply related	[flat|nested] 104+ messages in thread

* [Qemu-devel] [PATCH v2 8/9] Initialize in-kernel irqchip
  2009-10-07 22:08             ` [Qemu-devel] [PATCH v2 7/9] initialize " Glauber Costa
@ 2009-10-07 22:08               ` Glauber Costa
  2009-10-07 22:08                 ` [Qemu-devel] [PATCH v2 9/9] Add -kvm option Glauber Costa
  2009-10-08 12:02               ` [Qemu-devel] Re: [PATCH v2 7/9] initialize i8259 chip Jan Kiszka
  1 sibling, 1 reply; 104+ messages in thread
From: Glauber Costa @ 2009-10-07 22:08 UTC (permalink / raw)
  To: qemu-devel; +Cc: aliguori

Now that we have all devices set up, this patch initializes the irqchip.
This is dependant on the io-thread, since we need someone to pull ourselves
out of the halted state.

I believe this should be the default when we are running over the io-thread.
Later on, I plan to post a patch that makes it optional.

Signed-off-by: Glauber Costa <glommer@redhat.com>
---
 kvm-all.c |   21 +++++++++++++++++++++
 1 files changed, 21 insertions(+), 0 deletions(-)

diff --git a/kvm-all.c b/kvm-all.c
index 418d8bc..f33354d 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -453,6 +453,22 @@ int kvm_set_irq(int irq, int level, int *status)
 
     return 1;
 }
+
+static int kvm_create_irqchip(KVMState *s)
+{
+    int ret = 0;
+#if defined(CONFIG_IOTHREAD)
+    if (!kvm_check_extension(s, KVM_CAP_IRQCHIP))
+        return -1;
+
+    ret = kvm_vm_ioctl(s, KVM_CREATE_IRQCHIP);
+    if (ret < 0)
+        return ret;
+
+    s->irqchip_in_kernel = 1;
+#endif
+    return ret;
+}
 #endif
 
 int kvm_init(int smp_cpus)
@@ -541,6 +557,11 @@ int kvm_init(int smp_cpus)
     }
 #endif
 
+#if defined(KVM_CAP_IRQCHIP)
+    ret = kvm_create_irqchip(s);
+    if (ret < 0)
+        goto err;
+#endif
     ret = kvm_arch_init(s, smp_cpus);
     if (ret < 0)
         goto err;
-- 
1.6.2.5

^ permalink raw reply related	[flat|nested] 104+ messages in thread

* [Qemu-devel] [PATCH v2 9/9] Add -kvm option
  2009-10-07 22:08               ` [Qemu-devel] [PATCH v2 8/9] Initialize in-kernel irqchip Glauber Costa
@ 2009-10-07 22:08                 ` Glauber Costa
  2009-10-07 23:00                   ` [Qemu-devel] " Anthony Liguori
  0 siblings, 1 reply; 104+ messages in thread
From: Glauber Costa @ 2009-10-07 22:08 UTC (permalink / raw)
  To: qemu-devel; +Cc: aliguori

This option deprecates --enable-kvm. It is a more flexible option,
that makes use of qemu-opts, and allow us to pass on options to enable or
disable kernel irqchip, for example.

Signed-off-by: Glauber Costa <glommer@redhat.com>
---
 kvm-all.c       |    1 +
 kvm.h           |    1 +
 qemu-config.c   |   16 ++++++++++++++++
 qemu-config.h   |    1 +
 qemu-options.hx |   13 +++++++++----
 vl.c            |   11 +++++++++++
 6 files changed, 39 insertions(+), 4 deletions(-)

diff --git a/kvm-all.c b/kvm-all.c
index f33354d..b31d085 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -51,6 +51,7 @@ typedef struct KVMSlot
 typedef struct kvm_dirty_log KVMDirtyLog;
 
 int kvm_allowed = 0;
+int kvm_use_kernel_chip = 0;
 
 struct KVMState
 {
diff --git a/kvm.h b/kvm.h
index f0c9201..49a2b56 100644
--- a/kvm.h
+++ b/kvm.h
@@ -20,6 +20,7 @@
 
 #ifdef CONFIG_KVM
 extern int kvm_allowed;
+extern int kvm_use_kernel_chip;
 
 #define kvm_enabled() (kvm_allowed)
 #else
diff --git a/qemu-config.c b/qemu-config.c
index bafaea2..9461766 100644
--- a/qemu-config.c
+++ b/qemu-config.c
@@ -184,12 +184,28 @@ QemuOptsList qemu_rtc_opts = {
     },
 };
 
+QemuOptsList qemu_kvm_opts = {
+    .name = "kvm",
+    .head = QTAILQ_HEAD_INITIALIZER(qemu_kvm_opts.head),
+    .desc = {
+        {
+            .name = "irqchip-in-kernel",
+            .type = QEMU_OPT_BOOL,
+        },{
+            .name = "enabled",
+            .type = QEMU_OPT_BOOL,
+        },
+        { /* end if list */ }
+    },
+};
+
 static QemuOptsList *lists[] = {
     &qemu_drive_opts,
     &qemu_chardev_opts,
     &qemu_device_opts,
     &qemu_net_opts,
     &qemu_rtc_opts,
+    &qemu_kvm_opts,
     NULL,
 };
 
diff --git a/qemu-config.h b/qemu-config.h
index cdad5ac..58cead2 100644
--- a/qemu-config.h
+++ b/qemu-config.h
@@ -6,6 +6,7 @@ extern QemuOptsList qemu_chardev_opts;
 extern QemuOptsList qemu_device_opts;
 extern QemuOptsList qemu_net_opts;
 extern QemuOptsList qemu_rtc_opts;
+extern QemuOptsList qemu_kvm_opts;
 
 int qemu_set_option(const char *str);
 
diff --git a/qemu-options.hx b/qemu-options.hx
index 3dd76b3..1cb8431 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -1431,15 +1431,20 @@ Set the filename for the BIOS.
 ETEXI
 
 #ifdef CONFIG_KVM
-DEF("enable-kvm", 0, QEMU_OPTION_enable_kvm, \
-    "-enable-kvm     enable KVM full virtualization support\n")
-#endif
+HXCOMM Options deprecated by -kvm
+DEF("enable-kvm", 0, QEMU_OPTION_enable_kvm, "")
+
+DEF("kvm", HAS_ARG, QEMU_OPTION_kvm, \
+    "-kvm enable=on|off,irqchip-in-kernel=on|off\n" \
+    "                enable KVM full virtualization support\n")
 STEXI
-@item -enable-kvm
+@item -kvm [enable=on|off][,irqchip-in-kernel=on|off]
 Enable KVM full virtualization support. This option is only available
 if KVM support is enabled when compiling.
 ETEXI
 
+#endif
+
 #ifdef CONFIG_XEN
 DEF("xen-domid", HAS_ARG, QEMU_OPTION_xen_domid,
     "-xen-domid id   specify xen guest domain id\n")
diff --git a/vl.c b/vl.c
index afe01af..a6f9eb7 100644
--- a/vl.c
+++ b/vl.c
@@ -5353,6 +5353,17 @@ int main(int argc, char **argv, char **envp)
             case QEMU_OPTION_enable_kvm:
                 kvm_allowed = 1;
                 break;
+            case QEMU_OPTION_kvm:
+
+                opts = qemu_opts_parse(&qemu_kvm_opts, optarg, NULL);
+                if (!opts) {
+                    fprintf(stderr, "parse error: %s\n", optarg);
+                    exit(1);
+                }
+
+                kvm_allowed = qemu_opt_get_bool(opts, "enabled", 1);
+                kvm_use_kernel_chip = qemu_opt_get_bool(opts, "irqchip-in-kernel", 1);
+                break;
 #endif
             case QEMU_OPTION_usb:
                 usb_enabled = 1;
-- 
1.6.2.5

^ permalink raw reply related	[flat|nested] 104+ messages in thread

* [Qemu-devel] Re: [PATCH v2 9/9] Add -kvm option
  2009-10-07 22:08                 ` [Qemu-devel] [PATCH v2 9/9] Add -kvm option Glauber Costa
@ 2009-10-07 23:00                   ` Anthony Liguori
  2009-10-07 23:14                     ` Glauber Costa
  2009-10-08 14:07                     ` Avi Kivity
  0 siblings, 2 replies; 104+ messages in thread
From: Anthony Liguori @ 2009-10-07 23:00 UTC (permalink / raw)
  To: Glauber Costa; +Cc: Andre Przywara, qemu-devel, Gerd Hoffmann

Glauber Costa wrote:
> This option deprecates --enable-kvm. It is a more flexible option,
> that makes use of qemu-opts, and allow us to pass on options to enable or
> disable kernel irqchip, for example.
>   

With proper qdev support, you could select kvm device models based on 
-device so I think this option isn't all that useful.

What I'd like to see in the interim is a kvm specific machine type 
that's defaulted to if kvm is enabled.  I think this would be useful not 
only for enabling things like in-kernel apic, but also for selecting a 
default cpu model.

> Signed-off-by: Glauber Costa <glommer@redhat.com>
> ---
>  kvm-all.c       |    1 +
>  kvm.h           |    1 +
>  qemu-config.c   |   16 ++++++++++++++++
>  qemu-config.h   |    1 +
>  qemu-options.hx |   13 +++++++++----
>  vl.c            |   11 +++++++++++
>  6 files changed, 39 insertions(+), 4 deletions(-)
>
> diff --git a/kvm-all.c b/kvm-all.c
> index f33354d..b31d085 100644
> --- a/kvm-all.c
> +++ b/kvm-all.c
> @@ -51,6 +51,7 @@ typedef struct KVMSlot
>  typedef struct kvm_dirty_log KVMDirtyLog;
>
>  int kvm_allowed = 0;
> +int kvm_use_kernel_chip = 0;
>
>  struct KVMState
>  {
> diff --git a/kvm.h b/kvm.h
> index f0c9201..49a2b56 100644
> --- a/kvm.h
> +++ b/kvm.h
> @@ -20,6 +20,7 @@
>
>  #ifdef CONFIG_KVM
>  extern int kvm_allowed;
> +extern int kvm_use_kernel_chip;
>
>  #define kvm_enabled() (kvm_allowed)
>  #else
> diff --git a/qemu-config.c b/qemu-config.c
> index bafaea2..9461766 100644
> --- a/qemu-config.c
> +++ b/qemu-config.c
> @@ -184,12 +184,28 @@ QemuOptsList qemu_rtc_opts = {
>      },
>  };
>
> +QemuOptsList qemu_kvm_opts = {
> +    .name = "kvm",
> +    .head = QTAILQ_HEAD_INITIALIZER(qemu_kvm_opts.head),
> +    .desc = {
> +        {
> +            .name = "irqchip-in-kernel",
> +            .type = QEMU_OPT_BOOL,
> +        },{
> +            .name = "enabled",
> +            .type = QEMU_OPT_BOOL,
> +        },
> +        { /* end if list */ }
> +    },
> +};
> +
>  static QemuOptsList *lists[] = {
>      &qemu_drive_opts,
>      &qemu_chardev_opts,
>      &qemu_device_opts,
>      &qemu_net_opts,
>      &qemu_rtc_opts,
> +    &qemu_kvm_opts,
>      NULL,
>  };
>
> diff --git a/qemu-config.h b/qemu-config.h
> index cdad5ac..58cead2 100644
> --- a/qemu-config.h
> +++ b/qemu-config.h
> @@ -6,6 +6,7 @@ extern QemuOptsList qemu_chardev_opts;
>  extern QemuOptsList qemu_device_opts;
>  extern QemuOptsList qemu_net_opts;
>  extern QemuOptsList qemu_rtc_opts;
> +extern QemuOptsList qemu_kvm_opts;
>
>  int qemu_set_option(const char *str);
>
> diff --git a/qemu-options.hx b/qemu-options.hx
> index 3dd76b3..1cb8431 100644
> --- a/qemu-options.hx
> +++ b/qemu-options.hx
> @@ -1431,15 +1431,20 @@ Set the filename for the BIOS.
>  ETEXI
>
>  #ifdef CONFIG_KVM
> -DEF("enable-kvm", 0, QEMU_OPTION_enable_kvm, \
> -    "-enable-kvm     enable KVM full virtualization support\n")
> -#endif
> +HXCOMM Options deprecated by -kvm
> +DEF("enable-kvm", 0, QEMU_OPTION_enable_kvm, "")
> +
> +DEF("kvm", HAS_ARG, QEMU_OPTION_kvm, \
> +    "-kvm enable=on|off,irqchip-in-kernel=on|off\n" \
> +    "                enable KVM full virtualization support\n")
>  STEXI
> -@item -enable-kvm
> +@item -kvm [enable=on|off][,irqchip-in-kernel=on|off]
>  Enable KVM full virtualization support. This option is only available
>  if KVM support is enabled when compiling.
>  ETEXI
>
> +#endif
> +
>  #ifdef CONFIG_XEN
>  DEF("xen-domid", HAS_ARG, QEMU_OPTION_xen_domid,
>      "-xen-domid id   specify xen guest domain id\n")
> diff --git a/vl.c b/vl.c
> index afe01af..a6f9eb7 100644
> --- a/vl.c
> +++ b/vl.c
> @@ -5353,6 +5353,17 @@ int main(int argc, char **argv, char **envp)
>              case QEMU_OPTION_enable_kvm:
>                  kvm_allowed = 1;
>                  break;
> +            case QEMU_OPTION_kvm:
> +
> +                opts = qemu_opts_parse(&qemu_kvm_opts, optarg, NULL);
> +                if (!opts) {
> +                    fprintf(stderr, "parse error: %s\n", optarg);
> +                    exit(1);
> +                }
> +
> +                kvm_allowed = qemu_opt_get_bool(opts, "enabled", 1);
> +                kvm_use_kernel_chip = qemu_opt_get_bool(opts, "irqchip-in-kernel", 1);
> +                break;
>  #endif
>              case QEMU_OPTION_usb:
>                  usb_enabled = 1;
>   


-- 
Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 104+ messages in thread

* [Qemu-devel] Re: [PATCH v2 9/9] Add -kvm option
  2009-10-07 23:00                   ` [Qemu-devel] " Anthony Liguori
@ 2009-10-07 23:14                     ` Glauber Costa
  2009-10-07 23:28                       ` Anthony Liguori
  2009-10-08 14:07                     ` Avi Kivity
  1 sibling, 1 reply; 104+ messages in thread
From: Glauber Costa @ 2009-10-07 23:14 UTC (permalink / raw)
  To: Anthony Liguori; +Cc: Andre Przywara, qemu-devel, Gerd Hoffmann

On Wed, Oct 07, 2009 at 06:00:34PM -0500, Anthony Liguori wrote:
> Glauber Costa wrote:
>> This option deprecates --enable-kvm. It is a more flexible option,
>> that makes use of qemu-opts, and allow us to pass on options to enable or
>> disable kernel irqchip, for example.
>>   
>
> With proper qdev support, you could select kvm device models based on  
> -device so I think this option isn't all that useful.
>
> What I'd like to see in the interim is a kvm specific machine type  
> that's defaulted to if kvm is enabled.  I think this would be useful not  
> only for enabling things like in-kernel apic, but also for selecting a  
> default cpu model.
I don't really follow.

even if we have qdev on the irq controllers, one could still come up with
situations in which we'd like to force the use of one device over another.

>
>> Signed-off-by: Glauber Costa <glommer@redhat.com>
>> ---
>>  kvm-all.c       |    1 +
>>  kvm.h           |    1 +
>>  qemu-config.c   |   16 ++++++++++++++++
>>  qemu-config.h   |    1 +
>>  qemu-options.hx |   13 +++++++++----
>>  vl.c            |   11 +++++++++++
>>  6 files changed, 39 insertions(+), 4 deletions(-)
>>
>> diff --git a/kvm-all.c b/kvm-all.c
>> index f33354d..b31d085 100644
>> --- a/kvm-all.c
>> +++ b/kvm-all.c
>> @@ -51,6 +51,7 @@ typedef struct KVMSlot
>>  typedef struct kvm_dirty_log KVMDirtyLog;
>>
>>  int kvm_allowed = 0;
>> +int kvm_use_kernel_chip = 0;
>>
>>  struct KVMState
>>  {
>> diff --git a/kvm.h b/kvm.h
>> index f0c9201..49a2b56 100644
>> --- a/kvm.h
>> +++ b/kvm.h
>> @@ -20,6 +20,7 @@
>>
>>  #ifdef CONFIG_KVM
>>  extern int kvm_allowed;
>> +extern int kvm_use_kernel_chip;
>>
>>  #define kvm_enabled() (kvm_allowed)
>>  #else
>> diff --git a/qemu-config.c b/qemu-config.c
>> index bafaea2..9461766 100644
>> --- a/qemu-config.c
>> +++ b/qemu-config.c
>> @@ -184,12 +184,28 @@ QemuOptsList qemu_rtc_opts = {
>>      },
>>  };
>>
>> +QemuOptsList qemu_kvm_opts = {
>> +    .name = "kvm",
>> +    .head = QTAILQ_HEAD_INITIALIZER(qemu_kvm_opts.head),
>> +    .desc = {
>> +        {
>> +            .name = "irqchip-in-kernel",
>> +            .type = QEMU_OPT_BOOL,
>> +        },{
>> +            .name = "enabled",
>> +            .type = QEMU_OPT_BOOL,
>> +        },
>> +        { /* end if list */ }
>> +    },
>> +};
>> +
>>  static QemuOptsList *lists[] = {
>>      &qemu_drive_opts,
>>      &qemu_chardev_opts,
>>      &qemu_device_opts,
>>      &qemu_net_opts,
>>      &qemu_rtc_opts,
>> +    &qemu_kvm_opts,
>>      NULL,
>>  };
>>
>> diff --git a/qemu-config.h b/qemu-config.h
>> index cdad5ac..58cead2 100644
>> --- a/qemu-config.h
>> +++ b/qemu-config.h
>> @@ -6,6 +6,7 @@ extern QemuOptsList qemu_chardev_opts;
>>  extern QemuOptsList qemu_device_opts;
>>  extern QemuOptsList qemu_net_opts;
>>  extern QemuOptsList qemu_rtc_opts;
>> +extern QemuOptsList qemu_kvm_opts;
>>
>>  int qemu_set_option(const char *str);
>>
>> diff --git a/qemu-options.hx b/qemu-options.hx
>> index 3dd76b3..1cb8431 100644
>> --- a/qemu-options.hx
>> +++ b/qemu-options.hx
>> @@ -1431,15 +1431,20 @@ Set the filename for the BIOS.
>>  ETEXI
>>
>>  #ifdef CONFIG_KVM
>> -DEF("enable-kvm", 0, QEMU_OPTION_enable_kvm, \
>> -    "-enable-kvm     enable KVM full virtualization support\n")
>> -#endif
>> +HXCOMM Options deprecated by -kvm
>> +DEF("enable-kvm", 0, QEMU_OPTION_enable_kvm, "")
>> +
>> +DEF("kvm", HAS_ARG, QEMU_OPTION_kvm, \
>> +    "-kvm enable=on|off,irqchip-in-kernel=on|off\n" \
>> +    "                enable KVM full virtualization support\n")
>>  STEXI
>> -@item -enable-kvm
>> +@item -kvm [enable=on|off][,irqchip-in-kernel=on|off]
>>  Enable KVM full virtualization support. This option is only available
>>  if KVM support is enabled when compiling.
>>  ETEXI
>>
>> +#endif
>> +
>>  #ifdef CONFIG_XEN
>>  DEF("xen-domid", HAS_ARG, QEMU_OPTION_xen_domid,
>>      "-xen-domid id   specify xen guest domain id\n")
>> diff --git a/vl.c b/vl.c
>> index afe01af..a6f9eb7 100644
>> --- a/vl.c
>> +++ b/vl.c
>> @@ -5353,6 +5353,17 @@ int main(int argc, char **argv, char **envp)
>>              case QEMU_OPTION_enable_kvm:
>>                  kvm_allowed = 1;
>>                  break;
>> +            case QEMU_OPTION_kvm:
>> +
>> +                opts = qemu_opts_parse(&qemu_kvm_opts, optarg, NULL);
>> +                if (!opts) {
>> +                    fprintf(stderr, "parse error: %s\n", optarg);
>> +                    exit(1);
>> +                }
>> +
>> +                kvm_allowed = qemu_opt_get_bool(opts, "enabled", 1);
>> +                kvm_use_kernel_chip = qemu_opt_get_bool(opts, "irqchip-in-kernel", 1);
>> +                break;
>>  #endif
>>              case QEMU_OPTION_usb:
>>                  usb_enabled = 1;
>>   
>
>
> -- 
> Regards,
>
> Anthony Liguori
>

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [Qemu-devel] Re: [PATCH v2 9/9] Add -kvm option
  2009-10-07 23:14                     ` Glauber Costa
@ 2009-10-07 23:28                       ` Anthony Liguori
  2009-10-12 11:58                         ` Gerd Hoffmann
  0 siblings, 1 reply; 104+ messages in thread
From: Anthony Liguori @ 2009-10-07 23:28 UTC (permalink / raw)
  To: Glauber Costa; +Cc: Andre Przywara, Anthony Liguori, qemu-devel, Gerd Hoffmann

Glauber Costa wrote:
> On Wed, Oct 07, 2009 at 06:00:34PM -0500, Anthony Liguori wrote:
>   
>> Glauber Costa wrote:
>>     
>>> This option deprecates --enable-kvm. It is a more flexible option,
>>> that makes use of qemu-opts, and allow us to pass on options to enable or
>>> disable kernel irqchip, for example.
>>>   
>>>       
>> With proper qdev support, you could select kvm device models based on  
>> -device so I think this option isn't all that useful.
>>
>> What I'd like to see in the interim is a kvm specific machine type  
>> that's defaulted to if kvm is enabled.  I think this would be useful not  
>> only for enabling things like in-kernel apic, but also for selecting a  
>> default cpu model.
>>     
> I don't really follow.
>
> even if we have qdev on the irq controllers, one could still come up with
> situations in which we'd like to force the use of one device over another.
>   

Right, my assumption is that the devices will have different qdev names 
and therefore a user can select which one gets used.

Right now, -device is just additive.  I'm not sure the best way to 
express something like, replace this existing device with this new 
device.  Maybe some trickery with id=.  Gerd, any thoughts?

Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 104+ messages in thread

* [Qemu-devel] Re: [PATCH v2 3/9] provide in-kernel ioapic
  2009-10-07 22:08     ` [Qemu-devel] [PATCH v2 3/9] provide in-kernel ioapic Glauber Costa
  2009-10-07 22:08       ` [Qemu-devel] [PATCH v2 4/9] provide in-kernel apic Glauber Costa
@ 2009-10-08 11:46       ` Jan Kiszka
  2009-10-08 13:49         ` [Qemu-devel] " Anthony Liguori
  2 siblings, 0 replies; 104+ messages in thread
From: Jan Kiszka @ 2009-10-08 11:46 UTC (permalink / raw)
  To: Glauber Costa; +Cc: aliguori, qemu-devel

Glauber Costa wrote:
> This patch provides kvm with an in-kernel ioapic. We are currently not enabling it.
> The code is heavily based on what's in qemu-kvm.git.
> 
> Signed-off-by: Glauber Costa <glommer@redhat.com>
> ---
>  hw/ioapic.c |   84 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
>  kvm-all.c   |   20 ++++++++++++++
>  kvm.h       |    4 +++
>  3 files changed, 107 insertions(+), 1 deletions(-)
> 
> diff --git a/hw/ioapic.c b/hw/ioapic.c
> index d475654..d6a9dac 100644
> --- a/hw/ioapic.c
> +++ b/hw/ioapic.c
> @@ -24,6 +24,7 @@
>  #include "pc.h"
>  #include "qemu-timer.h"
>  #include "host-utils.h"
> +#include "kvm.h"
>  
>  //#define DEBUG_IOAPIC
>  
> @@ -193,6 +194,79 @@ static void ioapic_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t va
>      }
>  }
>  
> +static int kvm_kernel_ioapic_load_from_user(IOAPICState *s)
> +{
> +    int r = 0;
> +#if defined(KVM_CAP_IRQCHIP) && defined(TARGET_I386)

Hmm, here we additionally depend on TARGET_I386...

> +    struct kvm_irqchip chip;
> +    struct kvm_ioapic_state *kioapic;
> +    int i;
> +
> +    if (!(kvm_enabled() && kvm_irqchip_in_kernel()))
> +        return 0;
> +
> +    chip.chip_id = KVM_IRQCHIP_IOAPIC;
> +    kioapic = &chip.chip.ioapic;
> +
> +    kioapic->id = s->id;
> +    kioapic->ioregsel = s->ioregsel;
> +    kioapic->base_address = IOAPIC_DEFAULT_BASE_ADDRESS;
> +    kioapic->irr = s->irr;
> +    for (i = 0; i < IOAPIC_NUM_PINS; i++) {
> +        kioapic->redirtbl[i].bits = s->ioredtbl[i];
> +    }
> +
> +    r = kvm_set_irqchip(&chip);
> +#endif
> +    return r;
> +}
> +
> +static void kvm_kernel_ioapic_save_to_user(IOAPICState *s)
> +{
> +#if defined(KVM_CAP_IRQCHIP) && defined(TARGET_I386)
> +    struct kvm_irqchip chip;
> +    struct kvm_ioapic_state *kioapic;
> +    int i;
> +
> +    if (!(kvm_enabled() && kvm_irqchip_in_kernel()))
> +        return;
> +    chip.chip_id = KVM_IRQCHIP_IOAPIC;
> +    kvm_get_irqchip(&chip);
> +    kioapic = &chip.chip.ioapic;
> +
> +    s->id = kioapic->id;
> +    s->ioregsel = kioapic->ioregsel;
> +    s->irr = kioapic->irr;
> +    for (i = 0; i < IOAPIC_NUM_PINS; i++) {
> +        s->ioredtbl[i] = kioapic->redirtbl[i].bits;
> +    }
> +#endif
> +}
> +
> +static void ioapic_pre_save(void *opaque)
> +{
> +    IOAPICState *s = (void *)opaque;
> +
> +    kvm_kernel_ioapic_save_to_user(s);
> +}
> +
> +static int ioapic_pre_load(void *opaque)
> +{
> +    IOAPICState *s = opaque;
> +
> +    /* in case we are doing version 1, we just set these to sane values */
> +    s->irr = 0;
> +    return 0;
> +}
> +
> +static int ioapic_post_load(void *opaque, int version_id)
> +{
> +    IOAPICState *s = opaque;
> +
> +    return kvm_kernel_ioapic_load_from_user(s);
> +}
> +
> +
>  static const VMStateDescription vmstate_ioapic = {
>      .name = "ioapic",
>      .version_id = 2,
> @@ -205,7 +279,10 @@ static const VMStateDescription vmstate_ioapic = {
>          VMSTATE_UINT32_V(irr, IOAPICState, 2),
>          VMSTATE_UINT64_ARRAY(ioredtbl, IOAPICState, IOAPIC_NUM_PINS),
>          VMSTATE_END_OF_LIST()
> -    }
> +    },
> +    .pre_load = ioapic_pre_load,
> +    .post_load = ioapic_post_load,
> +    .pre_save = ioapic_pre_save,
>  };
>  
>  static void ioapic_reset(void *opaque)
> @@ -217,6 +294,11 @@ static void ioapic_reset(void *opaque)
>      s->base_address = IOAPIC_DEFAULT_BASE_ADDRESS;
>      for(i = 0; i < IOAPIC_NUM_PINS; i++)
>          s->ioredtbl[i] = 1 << 16; /* mask LVT */
> +#ifdef KVM_CAP_IRQCHIP

... but here only KVM_CAP_IRQCHIP suffices?

> +    if (kvm_enabled() && kvm_irqchip_in_kernel()) {
> +        kvm_kernel_ioapic_load_from_user(s);
> +    }
> +#endif

Independent of this, both kvm_irqchip_in_kernel() and
kvm_kernel_ioapic_load_from_user() should also be defined for the
!KVM_CAP_IRQCHIP case to avoid #ifdefs here and in many other places
this series touches.

>  }
>  
>  static CPUReadMemoryFunc * const ioapic_mem_read[3] = {
> diff --git a/kvm-all.c b/kvm-all.c
> index 48ae26c..d795285 100644
> --- a/kvm-all.c
> +++ b/kvm-all.c
> @@ -411,6 +411,26 @@ int kvm_check_extension(KVMState *s, unsigned int extension)
>      return ret;
>  }
>  
> +#ifdef KVM_CAP_IRQCHIP

Again, only KVM_CAP_IRQCHIP - what is the actually required dependency?

> +int kvm_set_irqchip(struct kvm_irqchip *chip)
> +{
> +    if (!kvm_state->irqchip_in_kernel) {
> +        return 0;
> +    }
> +
> +    return kvm_vm_ioctl(kvm_state, KVM_SET_IRQCHIP, chip);
> +}
> +
> +int kvm_get_irqchip(struct kvm_irqchip *chip)
> +{
> +    if (!kvm_state->irqchip_in_kernel) {
> +        return 0;
> +    }
> +
> +    return kvm_vm_ioctl(kvm_state, KVM_GET_IRQCHIP, chip);
> +}
> +#endif
> +
>  int kvm_init(int smp_cpus)
>  {
>      static const char upgrade_note[] =
> diff --git a/kvm.h b/kvm.h
> index e7d5beb..8d4afa0 100644
> --- a/kvm.h
> +++ b/kvm.h
> @@ -16,6 +16,7 @@
>  
>  #include "config.h"
>  #include "qemu-queue.h"
> +#include <linux/kvm.h>
>  
>  #ifdef CONFIG_KVM
>  extern int kvm_allowed;
> @@ -63,6 +64,9 @@ int kvm_update_guest_debug(CPUState *env, unsigned long reinject_trap);
>  int kvm_pit_in_kernel(void);
>  int kvm_irqchip_in_kernel(void);
>  
> +int kvm_set_irqchip(struct kvm_irqchip *chip);
> +int kvm_get_irqchip(struct kvm_irqchip *chip);
> +
>  /* internal API */
>  
>  struct KVMState;

Jan

-- 
Siemens AG, Corporate Technology, CT SE 2
Corporate Competence Center Embedded Linux

^ permalink raw reply	[flat|nested] 104+ messages in thread

* [Qemu-devel] Re: [PATCH v2 7/9] initialize i8259 chip
  2009-10-07 22:08             ` [Qemu-devel] [PATCH v2 7/9] initialize " Glauber Costa
  2009-10-07 22:08               ` [Qemu-devel] [PATCH v2 8/9] Initialize in-kernel irqchip Glauber Costa
@ 2009-10-08 12:02               ` Jan Kiszka
  1 sibling, 0 replies; 104+ messages in thread
From: Jan Kiszka @ 2009-10-08 12:02 UTC (permalink / raw)
  To: Glauber Costa; +Cc: aliguori, qemu-devel

Glauber Costa wrote:
> diff --git a/kvm-all.c b/kvm-all.c
> index e98024a..418d8bc 100644
> --- a/kvm-all.c
> +++ b/kvm-all.c
> @@ -429,7 +429,6 @@ int kvm_get_irqchip(struct kvm_irqchip *chip)
>  
>      return kvm_vm_ioctl(kvm_state, KVM_GET_IRQCHIP, chip);
>  }
> -#endif
>  
>  int kvm_set_irq(int irq, int level, int *status)
>  {
> @@ -454,6 +453,7 @@ int kvm_set_irq(int irq, int level, int *status)
>  
>      return 1;
>  }
> +#endif

I think these two changes could be avoided by including them in the
previous patch.

Jan

-- 
Siemens AG, Corporate Technology, CT SE 2
Corporate Competence Center Embedded Linux

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH v2 3/9] provide in-kernel ioapic
  2009-10-07 22:08     ` [Qemu-devel] [PATCH v2 3/9] provide in-kernel ioapic Glauber Costa
@ 2009-10-08 13:49         ` Anthony Liguori
  2009-10-08 11:46       ` [Qemu-devel] Re: [PATCH v2 3/9] provide in-kernel ioapic Jan Kiszka
  2009-10-08 13:49         ` [Qemu-devel] " Anthony Liguori
  2 siblings, 0 replies; 104+ messages in thread
From: Anthony Liguori @ 2009-10-08 13:49 UTC (permalink / raw)
  To: Glauber Costa; +Cc: qemu-devel, kvm-devel, Avi Kivity

Glauber Costa wrote:
> This patch provides kvm with an in-kernel ioapic. We are currently not enabling it.
> The code is heavily based on what's in qemu-kvm.git.
>   

It really ought to be it's own file and own device model.  Having the 
code mixed in with ioapic.c is confusing because it's unclear what code 
is in use when the in-kernel model is used.

> @@ -193,6 +194,79 @@ static void ioapic_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t va
>      }
>  }
>
> +static int kvm_kernel_ioapic_load_from_user(IOAPICState *s)
> +{
> +    int r = 0;
> +#if defined(KVM_CAP_IRQCHIP) && defined(TARGET_I386)
>   

No point in checking the KVM_CAP_IRQCHIP.  Just require it during 
build.  Otherwise, !KVM_CAP_IRQCHIP is dead code since I'm sure noone is 
actually testing kernels that old with modern qemu.

There's no point in restricting to I386 either.

> +    struct kvm_irqchip chip;
> +    struct kvm_ioapic_state *kioapic;
> +    int i;
> +
> +    if (!(kvm_enabled() && kvm_irqchip_in_kernel()))
> +        return 0;
> +
> +    chip.chip_id = KVM_IRQCHIP_IOAPIC;
> +    kioapic = &chip.chip.ioapic;
> +
> +    kioapic->id = s->id;
> +    kioapic->ioregsel = s->ioregsel;
> +    kioapic->base_address = IOAPIC_DEFAULT_BASE_ADDRESS;
> +    kioapic->irr = s->irr;
> +    for (i = 0; i < IOAPIC_NUM_PINS; i++) {
> +        kioapic->redirtbl[i].bits = s->ioredtbl[i];
> +    }
> +
> +    r = kvm_set_irqchip(&chip);
> +#endif
> +    return r;
> +}
> +
> +static void kvm_kernel_ioapic_save_to_user(IOAPICState *s)
> +{
> +#if defined(KVM_CAP_IRQCHIP) && defined(TARGET_I386)
> +    struct kvm_irqchip chip;
> +    struct kvm_ioapic_state *kioapic;
> +    int i;
> +
> +    if (!(kvm_enabled() && kvm_irqchip_in_kernel()))
> +        return;
> +    chip.chip_id = KVM_IRQCHIP_IOAPIC;
> +    kvm_get_irqchip(&chip);
> +    kioapic = &chip.chip.ioapic;
> +
> +    s->id = kioapic->id;
> +    s->ioregsel = kioapic->ioregsel;
> +    s->irr = kioapic->irr;
> +    for (i = 0; i < IOAPIC_NUM_PINS; i++) {
> +        s->ioredtbl[i] = kioapic->redirtbl[i].bits;
> +    }
> +#endif
> +}
> +
> +static void ioapic_pre_save(void *opaque)
> +{
> +    IOAPICState *s = (void *)opaque;
> +
> +    kvm_kernel_ioapic_save_to_user(s);
> +}
> +
> +static int ioapic_pre_load(void *opaque)
> +{
> +    IOAPICState *s = opaque;
> +
> +    /* in case we are doing version 1, we just set these to sane values */
> +    s->irr = 0;
> +    return 0;
> +}
> +
> +static int ioapic_post_load(void *opaque, int version_id)
> +{
> +    IOAPICState *s = opaque;
> +
> +    return kvm_kernel_ioapic_load_from_user(s);
> +}
> +
> +
>  static const VMStateDescription vmstate_ioapic = {
>      .name = "ioapic",
>      .version_id = 2,
> @@ -205,7 +279,10 @@ static const VMStateDescription vmstate_ioapic = {
>          VMSTATE_UINT32_V(irr, IOAPICState, 2),
>          VMSTATE_UINT64_ARRAY(ioredtbl, IOAPICState, IOAPIC_NUM_PINS),
>          VMSTATE_END_OF_LIST()
> -    }
> +    },
> +    .pre_load = ioapic_pre_load,
> +    .post_load = ioapic_post_load,
> +    .pre_save = ioapic_pre_save,
>  };
>   

The in kernel apic should be a separate device model with a separate 
savevm section.  They are different devices and there's no real 
advantage to pretending like they're the same device.

>  static CPUReadMemoryFunc * const ioapic_mem_read[3] = {
> diff --git a/kvm-all.c b/kvm-all.c
> index 48ae26c..d795285 100644
> --- a/kvm-all.c
> +++ b/kvm-all.c
> @@ -411,6 +411,26 @@ int kvm_check_extension(KVMState *s, unsigned int extension)
>      return ret;
>  }
>
> +#ifdef KVM_CAP_IRQCHIP
> +int kvm_set_irqchip(struct kvm_irqchip *chip)
> +{
> +    if (!kvm_state->irqchip_in_kernel) {
> +        return 0;
> +    }
> +
> +    return kvm_vm_ioctl(kvm_state, KVM_SET_IRQCHIP, chip);
> +}
>   

irqchip_in_kernel ought to disappear.

-- 
Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 104+ messages in thread

* [Qemu-devel] Re: [PATCH v2 3/9] provide in-kernel ioapic
@ 2009-10-08 13:49         ` Anthony Liguori
  0 siblings, 0 replies; 104+ messages in thread
From: Anthony Liguori @ 2009-10-08 13:49 UTC (permalink / raw)
  To: Glauber Costa; +Cc: qemu-devel, kvm-devel, Avi Kivity

Glauber Costa wrote:
> This patch provides kvm with an in-kernel ioapic. We are currently not enabling it.
> The code is heavily based on what's in qemu-kvm.git.
>   

It really ought to be it's own file and own device model.  Having the 
code mixed in with ioapic.c is confusing because it's unclear what code 
is in use when the in-kernel model is used.

> @@ -193,6 +194,79 @@ static void ioapic_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t va
>      }
>  }
>
> +static int kvm_kernel_ioapic_load_from_user(IOAPICState *s)
> +{
> +    int r = 0;
> +#if defined(KVM_CAP_IRQCHIP) && defined(TARGET_I386)
>   

No point in checking the KVM_CAP_IRQCHIP.  Just require it during 
build.  Otherwise, !KVM_CAP_IRQCHIP is dead code since I'm sure noone is 
actually testing kernels that old with modern qemu.

There's no point in restricting to I386 either.

> +    struct kvm_irqchip chip;
> +    struct kvm_ioapic_state *kioapic;
> +    int i;
> +
> +    if (!(kvm_enabled() && kvm_irqchip_in_kernel()))
> +        return 0;
> +
> +    chip.chip_id = KVM_IRQCHIP_IOAPIC;
> +    kioapic = &chip.chip.ioapic;
> +
> +    kioapic->id = s->id;
> +    kioapic->ioregsel = s->ioregsel;
> +    kioapic->base_address = IOAPIC_DEFAULT_BASE_ADDRESS;
> +    kioapic->irr = s->irr;
> +    for (i = 0; i < IOAPIC_NUM_PINS; i++) {
> +        kioapic->redirtbl[i].bits = s->ioredtbl[i];
> +    }
> +
> +    r = kvm_set_irqchip(&chip);
> +#endif
> +    return r;
> +}
> +
> +static void kvm_kernel_ioapic_save_to_user(IOAPICState *s)
> +{
> +#if defined(KVM_CAP_IRQCHIP) && defined(TARGET_I386)
> +    struct kvm_irqchip chip;
> +    struct kvm_ioapic_state *kioapic;
> +    int i;
> +
> +    if (!(kvm_enabled() && kvm_irqchip_in_kernel()))
> +        return;
> +    chip.chip_id = KVM_IRQCHIP_IOAPIC;
> +    kvm_get_irqchip(&chip);
> +    kioapic = &chip.chip.ioapic;
> +
> +    s->id = kioapic->id;
> +    s->ioregsel = kioapic->ioregsel;
> +    s->irr = kioapic->irr;
> +    for (i = 0; i < IOAPIC_NUM_PINS; i++) {
> +        s->ioredtbl[i] = kioapic->redirtbl[i].bits;
> +    }
> +#endif
> +}
> +
> +static void ioapic_pre_save(void *opaque)
> +{
> +    IOAPICState *s = (void *)opaque;
> +
> +    kvm_kernel_ioapic_save_to_user(s);
> +}
> +
> +static int ioapic_pre_load(void *opaque)
> +{
> +    IOAPICState *s = opaque;
> +
> +    /* in case we are doing version 1, we just set these to sane values */
> +    s->irr = 0;
> +    return 0;
> +}
> +
> +static int ioapic_post_load(void *opaque, int version_id)
> +{
> +    IOAPICState *s = opaque;
> +
> +    return kvm_kernel_ioapic_load_from_user(s);
> +}
> +
> +
>  static const VMStateDescription vmstate_ioapic = {
>      .name = "ioapic",
>      .version_id = 2,
> @@ -205,7 +279,10 @@ static const VMStateDescription vmstate_ioapic = {
>          VMSTATE_UINT32_V(irr, IOAPICState, 2),
>          VMSTATE_UINT64_ARRAY(ioredtbl, IOAPICState, IOAPIC_NUM_PINS),
>          VMSTATE_END_OF_LIST()
> -    }
> +    },
> +    .pre_load = ioapic_pre_load,
> +    .post_load = ioapic_post_load,
> +    .pre_save = ioapic_pre_save,
>  };
>   

The in kernel apic should be a separate device model with a separate 
savevm section.  They are different devices and there's no real 
advantage to pretending like they're the same device.

>  static CPUReadMemoryFunc * const ioapic_mem_read[3] = {
> diff --git a/kvm-all.c b/kvm-all.c
> index 48ae26c..d795285 100644
> --- a/kvm-all.c
> +++ b/kvm-all.c
> @@ -411,6 +411,26 @@ int kvm_check_extension(KVMState *s, unsigned int extension)
>      return ret;
>  }
>
> +#ifdef KVM_CAP_IRQCHIP
> +int kvm_set_irqchip(struct kvm_irqchip *chip)
> +{
> +    if (!kvm_state->irqchip_in_kernel) {
> +        return 0;
> +    }
> +
> +    return kvm_vm_ioctl(kvm_state, KVM_SET_IRQCHIP, chip);
> +}
>   

irqchip_in_kernel ought to disappear.

-- 
Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH v2 3/9] provide in-kernel ioapic
  2009-10-08 13:49         ` [Qemu-devel] " Anthony Liguori
@ 2009-10-08 13:54           ` Avi Kivity
  -1 siblings, 0 replies; 104+ messages in thread
From: Avi Kivity @ 2009-10-08 13:54 UTC (permalink / raw)
  To: Anthony Liguori; +Cc: Glauber Costa, qemu-devel, kvm-devel

On 10/08/2009 03:49 PM, Anthony Liguori wrote:
> Glauber Costa wrote:
>> This patch provides kvm with an in-kernel ioapic. We are currently 
>> not enabling it.
>> The code is heavily based on what's in qemu-kvm.git.
>
> It really ought to be it's own file and own device model.  Having the 
> code mixed in with ioapic.c is confusing because it's unclear what 
> code is in use when the in-kernel model is used.

I disagree.  It's the same device with the same guest-visible interface 
and the same host-visible interface (save/restore, 'info ioapic' if we 
write one).  Splitting it into two files will only result in code 
duplication.

Think of it as an ioapic accelerator.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 104+ messages in thread

* [Qemu-devel] Re: [PATCH v2 3/9] provide in-kernel ioapic
@ 2009-10-08 13:54           ` Avi Kivity
  0 siblings, 0 replies; 104+ messages in thread
From: Avi Kivity @ 2009-10-08 13:54 UTC (permalink / raw)
  To: Anthony Liguori; +Cc: Glauber Costa, qemu-devel, kvm-devel

On 10/08/2009 03:49 PM, Anthony Liguori wrote:
> Glauber Costa wrote:
>> This patch provides kvm with an in-kernel ioapic. We are currently 
>> not enabling it.
>> The code is heavily based on what's in qemu-kvm.git.
>
> It really ought to be it's own file and own device model.  Having the 
> code mixed in with ioapic.c is confusing because it's unclear what 
> code is in use when the in-kernel model is used.

I disagree.  It's the same device with the same guest-visible interface 
and the same host-visible interface (save/restore, 'info ioapic' if we 
write one).  Splitting it into two files will only result in code 
duplication.

Think of it as an ioapic accelerator.

-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH v2 4/9] provide in-kernel apic
  2009-10-07 22:08       ` [Qemu-devel] [PATCH v2 4/9] provide in-kernel apic Glauber Costa
@ 2009-10-08 13:55           ` Anthony Liguori
  2009-10-08 13:55           ` [Qemu-devel] " Anthony Liguori
  1 sibling, 0 replies; 104+ messages in thread
From: Anthony Liguori @ 2009-10-08 13:55 UTC (permalink / raw)
  To: Glauber Costa; +Cc: qemu-devel, kvm-devel, Avi Kivity

Glauber Costa wrote:
> This patch provides kvm with an in-kernel apic. We are currently not enabling it.
> The code is heavily based on what's in qemu-kvm.git.
>
> Signed-off-by: Glauber Costa <glommer@redhat.com>
> ---
>  hw/apic.c         |  135 +++++++++++++++++++++++++++++++++++++++++++++++++++--
>  kvm.h             |    3 +
>  target-i386/kvm.c |   18 +++++++
>  3 files changed, 152 insertions(+), 4 deletions(-)
>
> diff --git a/hw/apic.c b/hw/apic.c
> index c89008e..5635607 100644
> --- a/hw/apic.c
> +++ b/hw/apic.c
> @@ -299,7 +299,11 @@ void cpu_set_apic_base(CPUState *env, uint64_t val)
>  #endif
>      if (!s)
>          return;
> -    s->apicbase = (val & 0xfffff000) |
> +
> +    if (kvm_enabled() && kvm_irqchip_in_kernel())
> +        s->apicbase = val;
> +    else
> +        s->apicbase = (val & 0xfffff000) |
>          (s->apicbase & (MSR_IA32_APICBASE_BSP | MSR_IA32_APICBASE_ENABLE));
>      /* if disabled, cannot be enabled again */
>      if (!(val & MSR_IA32_APICBASE_ENABLE)) {
> @@ -497,6 +501,13 @@ void apic_init_reset(CPUState *env)
>      s->wait_for_sipi = 1;
>
>      env->halted = !(s->apicbase & MSR_IA32_APICBASE_BSP);
> +
> +#ifdef KVM_CAP_MP_STATE
> +    if (kvm_enabled() && kvm_irqchip_in_kernel())
> +        env->mp_state
> +            = env->halted ? KVM_MP_STATE_UNINITIALIZED : KVM_MP_STATE_RUNNABLE;
> +#endif
>   

I don't think CAP_MP_STATE should be treated as an optional feature.

> +static int kvm_kernel_lapic_load_from_user(APICState *s)
> +{
> +    int r = 0;
> +#if defined(KVM_CAP_IRQCHIP) && defined(TARGET_I386)
> +    struct kvm_lapic_state apic;
> +    struct kvm_lapic_state *klapic = &apic;
> +    int i;
> +
> +    if (!(kvm_enabled() && kvm_irqchip_in_kernel()))
> +        return 0;
> +
> +    memset(klapic, 0, sizeof apic);
> +    kapic_set_reg(klapic, 0x2, s->id << 24);
> +    kapic_set_reg(klapic, 0x8, s->tpr);
> +    kapic_set_reg(klapic, 0xd, s->log_dest << 24);
> +    kapic_set_reg(klapic, 0xe, s->dest_mode << 28 | 0x0fffffff);
> +    kapic_set_reg(klapic, 0xf, s->spurious_vec);
> +    for (i = 0; i < 8; i++) {
> +        kapic_set_reg(klapic, 0x10 + i, s->isr[i]);
> +        kapic_set_reg(klapic, 0x18 + i, s->tmr[i]);
> +        kapic_set_reg(klapic, 0x20 + i, s->irr[i]);
> +    }
> +    kapic_set_reg(klapic, 0x28, s->esr);
> +    kapic_set_reg(klapic, 0x30, s->icr[0]);
> +    kapic_set_reg(klapic, 0x31, s->icr[1]);
> +    for (i = 0; i < APIC_LVT_NB; i++)
> +        kapic_set_reg(klapic, 0x32 + i, s->lvt[i]);
> +    kapic_set_reg(klapic, 0x38, s->initial_count);
> +    kapic_set_reg(klapic, 0x3e, s->divide_conf);
> +
> +    r = kvm_set_lapic(s->cpu_env, klapic);
> +#endif
> +    return r;
> +}
>   

You should probably just setup VMState such that it directly saves 
kvm_lapic_state and then have the pre/post functions call the kernel 
ioctls to sync it.  There's not a whole lot of point switching the state 
between two different structures.

>  static const VMStateDescription vmstate_apic = {
>      .name = "apic",
>      .version_id = 3,
>      .minimum_version_id = 3,
>      .minimum_version_id_old = 1,
>      .load_state_old = apic_load_old,
> +    .pre_save = apic_pre_save,
> +    .post_load = apic_post_load,
>      .fields      = (VMStateField []) {
>          VMSTATE_UINT32(apicbase, APICState),
>          VMSTATE_UINT8(id, APICState),
> @@ -933,9 +1052,8 @@ static const VMStateDescription vmstate_apic = {
>      }
>  };
>   

Same applies here as ioapic.  Should be a separate device.

-- 
Regards,

Anthony Liguori


^ permalink raw reply	[flat|nested] 104+ messages in thread

* [Qemu-devel] Re: [PATCH v2 4/9] provide in-kernel apic
@ 2009-10-08 13:55           ` Anthony Liguori
  0 siblings, 0 replies; 104+ messages in thread
From: Anthony Liguori @ 2009-10-08 13:55 UTC (permalink / raw)
  To: Glauber Costa; +Cc: qemu-devel, kvm-devel, Avi Kivity

Glauber Costa wrote:
> This patch provides kvm with an in-kernel apic. We are currently not enabling it.
> The code is heavily based on what's in qemu-kvm.git.
>
> Signed-off-by: Glauber Costa <glommer@redhat.com>
> ---
>  hw/apic.c         |  135 +++++++++++++++++++++++++++++++++++++++++++++++++++--
>  kvm.h             |    3 +
>  target-i386/kvm.c |   18 +++++++
>  3 files changed, 152 insertions(+), 4 deletions(-)
>
> diff --git a/hw/apic.c b/hw/apic.c
> index c89008e..5635607 100644
> --- a/hw/apic.c
> +++ b/hw/apic.c
> @@ -299,7 +299,11 @@ void cpu_set_apic_base(CPUState *env, uint64_t val)
>  #endif
>      if (!s)
>          return;
> -    s->apicbase = (val & 0xfffff000) |
> +
> +    if (kvm_enabled() && kvm_irqchip_in_kernel())
> +        s->apicbase = val;
> +    else
> +        s->apicbase = (val & 0xfffff000) |
>          (s->apicbase & (MSR_IA32_APICBASE_BSP | MSR_IA32_APICBASE_ENABLE));
>      /* if disabled, cannot be enabled again */
>      if (!(val & MSR_IA32_APICBASE_ENABLE)) {
> @@ -497,6 +501,13 @@ void apic_init_reset(CPUState *env)
>      s->wait_for_sipi = 1;
>
>      env->halted = !(s->apicbase & MSR_IA32_APICBASE_BSP);
> +
> +#ifdef KVM_CAP_MP_STATE
> +    if (kvm_enabled() && kvm_irqchip_in_kernel())
> +        env->mp_state
> +            = env->halted ? KVM_MP_STATE_UNINITIALIZED : KVM_MP_STATE_RUNNABLE;
> +#endif
>   

I don't think CAP_MP_STATE should be treated as an optional feature.

> +static int kvm_kernel_lapic_load_from_user(APICState *s)
> +{
> +    int r = 0;
> +#if defined(KVM_CAP_IRQCHIP) && defined(TARGET_I386)
> +    struct kvm_lapic_state apic;
> +    struct kvm_lapic_state *klapic = &apic;
> +    int i;
> +
> +    if (!(kvm_enabled() && kvm_irqchip_in_kernel()))
> +        return 0;
> +
> +    memset(klapic, 0, sizeof apic);
> +    kapic_set_reg(klapic, 0x2, s->id << 24);
> +    kapic_set_reg(klapic, 0x8, s->tpr);
> +    kapic_set_reg(klapic, 0xd, s->log_dest << 24);
> +    kapic_set_reg(klapic, 0xe, s->dest_mode << 28 | 0x0fffffff);
> +    kapic_set_reg(klapic, 0xf, s->spurious_vec);
> +    for (i = 0; i < 8; i++) {
> +        kapic_set_reg(klapic, 0x10 + i, s->isr[i]);
> +        kapic_set_reg(klapic, 0x18 + i, s->tmr[i]);
> +        kapic_set_reg(klapic, 0x20 + i, s->irr[i]);
> +    }
> +    kapic_set_reg(klapic, 0x28, s->esr);
> +    kapic_set_reg(klapic, 0x30, s->icr[0]);
> +    kapic_set_reg(klapic, 0x31, s->icr[1]);
> +    for (i = 0; i < APIC_LVT_NB; i++)
> +        kapic_set_reg(klapic, 0x32 + i, s->lvt[i]);
> +    kapic_set_reg(klapic, 0x38, s->initial_count);
> +    kapic_set_reg(klapic, 0x3e, s->divide_conf);
> +
> +    r = kvm_set_lapic(s->cpu_env, klapic);
> +#endif
> +    return r;
> +}
>   

You should probably just setup VMState such that it directly saves 
kvm_lapic_state and then have the pre/post functions call the kernel 
ioctls to sync it.  There's not a whole lot of point switching the state 
between two different structures.

>  static const VMStateDescription vmstate_apic = {
>      .name = "apic",
>      .version_id = 3,
>      .minimum_version_id = 3,
>      .minimum_version_id_old = 1,
>      .load_state_old = apic_load_old,
> +    .pre_save = apic_pre_save,
> +    .post_load = apic_post_load,
>      .fields      = (VMStateField []) {
>          VMSTATE_UINT32(apicbase, APICState),
>          VMSTATE_UINT8(id, APICState),
> @@ -933,9 +1052,8 @@ static const VMStateDescription vmstate_apic = {
>      }
>  };
>   

Same applies here as ioapic.  Should be a separate device.

-- 
Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [Qemu-devel] Re: [PATCH v2 9/9] Add -kvm option
  2009-10-07 23:00                   ` [Qemu-devel] " Anthony Liguori
  2009-10-07 23:14                     ` Glauber Costa
@ 2009-10-08 14:07                     ` Avi Kivity
  2009-10-08 14:23                       ` Anthony Liguori
  1 sibling, 1 reply; 104+ messages in thread
From: Avi Kivity @ 2009-10-08 14:07 UTC (permalink / raw)
  To: Anthony Liguori; +Cc: Andre Przywara, Glauber Costa, qemu-devel, Gerd Hoffmann

On 10/08/2009 01:00 AM, Anthony Liguori wrote:
> Glauber Costa wrote:
>> This option deprecates --enable-kvm. It is a more flexible option,
>> that makes use of qemu-opts, and allow us to pass on options to 
>> enable or
>> disable kernel irqchip, for example.
>
> With proper qdev support, you could select kvm device models based on 
> -device so I think this option isn't all that useful.

qdev (mostly?) is about the guest interface; this is more about the host 
implementation, so akin to -drive file= and cache=.

> What I'd like to see in the interim is a kvm specific machine type 
> that's defaulted to if kvm is enabled.  I think this would be useful 
> not only for enabling things like in-kernel apic, but also for 
> selecting a default cpu model.

We should make a distinction between guest-visible changes and 
accelerators like kvm-irqchip and vhost.

-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: Re: [PATCH v2 4/9] provide in-kernel apic
  2009-10-08 13:55           ` [Qemu-devel] " Anthony Liguori
@ 2009-10-08 14:09             ` Avi Kivity
  -1 siblings, 0 replies; 104+ messages in thread
From: Avi Kivity @ 2009-10-08 14:09 UTC (permalink / raw)
  To: Anthony Liguori; +Cc: Glauber Costa, qemu-devel, kvm-devel

On 10/08/2009 03:55 PM, Anthony Liguori wrote:
> Glauber Costa wrote:
>> This patch provides kvm with an in-kernel apic. We are currently not 
>> enabling it.
>> The code is heavily based on what's in qemu-kvm.git.
>>
>> Signed-off-by: Glauber Costa <glommer@redhat.com>
>> ---
>>  hw/apic.c         |  135 
>> +++++++++++++++++++++++++++++++++++++++++++++++++++--
>>  kvm.h             |    3 +
>>  target-i386/kvm.c |   18 +++++++
>>  3 files changed, 152 insertions(+), 4 deletions(-)
>>
>> diff --git a/hw/apic.c b/hw/apic.c
>> index c89008e..5635607 100644
>> --- a/hw/apic.c
>> +++ b/hw/apic.c
>> @@ -299,7 +299,11 @@ void cpu_set_apic_base(CPUState *env, uint64_t val)
>>  #endif
>>      if (!s)
>>          return;
>> -    s->apicbase = (val & 0xfffff000) |
>> +
>> +    if (kvm_enabled() && kvm_irqchip_in_kernel())
>> +        s->apicbase = val;
>> +    else
>> +        s->apicbase = (val & 0xfffff000) |
>>          (s->apicbase & (MSR_IA32_APICBASE_BSP | 
>> MSR_IA32_APICBASE_ENABLE));
>>      /* if disabled, cannot be enabled again */
>>      if (!(val & MSR_IA32_APICBASE_ENABLE)) {
>> @@ -497,6 +501,13 @@ void apic_init_reset(CPUState *env)
>>      s->wait_for_sipi = 1;
>>
>>      env->halted = !(s->apicbase & MSR_IA32_APICBASE_BSP);
>> +
>> +#ifdef KVM_CAP_MP_STATE
>> +    if (kvm_enabled() && kvm_irqchip_in_kernel())
>> +        env->mp_state
>> +            = env->halted ? KVM_MP_STATE_UNINITIALIZED : 
>> KVM_MP_STATE_RUNNABLE;
>> +#endif
>
> I don't think CAP_MP_STATE should be treated as an optional feature.
>
>> +static int kvm_kernel_lapic_load_from_user(APICState *s)
>> +{
>> +    int r = 0;
>> +#if defined(KVM_CAP_IRQCHIP) && defined(TARGET_I386)
>> +    struct kvm_lapic_state apic;
>> +    struct kvm_lapic_state *klapic = &apic;
>> +    int i;
>> +
>> +    if (!(kvm_enabled() && kvm_irqchip_in_kernel()))
>> +        return 0;
>> +
>> +    memset(klapic, 0, sizeof apic);
>> +    kapic_set_reg(klapic, 0x2, s->id << 24);
>> +    kapic_set_reg(klapic, 0x8, s->tpr);
>> +    kapic_set_reg(klapic, 0xd, s->log_dest << 24);
>> +    kapic_set_reg(klapic, 0xe, s->dest_mode << 28 | 0x0fffffff);
>> +    kapic_set_reg(klapic, 0xf, s->spurious_vec);
>> +    for (i = 0; i < 8; i++) {
>> +        kapic_set_reg(klapic, 0x10 + i, s->isr[i]);
>> +        kapic_set_reg(klapic, 0x18 + i, s->tmr[i]);
>> +        kapic_set_reg(klapic, 0x20 + i, s->irr[i]);
>> +    }
>> +    kapic_set_reg(klapic, 0x28, s->esr);
>> +    kapic_set_reg(klapic, 0x30, s->icr[0]);
>> +    kapic_set_reg(klapic, 0x31, s->icr[1]);
>> +    for (i = 0; i < APIC_LVT_NB; i++)
>> +        kapic_set_reg(klapic, 0x32 + i, s->lvt[i]);
>> +    kapic_set_reg(klapic, 0x38, s->initial_count);
>> +    kapic_set_reg(klapic, 0x3e, s->divide_conf);
>> +
>> +    r = kvm_set_lapic(s->cpu_env, klapic);
>> +#endif
>> +    return r;
>> +}
>
> You should probably just setup VMState such that it directly saves 
> kvm_lapic_state and then have the pre/post functions call the kernel 
> ioctls to sync it.  There's not a whole lot of point switching the 
> state between two different structures.

It ensures the two models are compatible.  Since they're the same device 
from the point of view of the guest, there's no reason for them to have 
different representations or to be incompatible.


-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [Qemu-devel] Re: [PATCH v2 4/9] provide in-kernel apic
@ 2009-10-08 14:09             ` Avi Kivity
  0 siblings, 0 replies; 104+ messages in thread
From: Avi Kivity @ 2009-10-08 14:09 UTC (permalink / raw)
  To: Anthony Liguori; +Cc: Glauber Costa, qemu-devel, kvm-devel

On 10/08/2009 03:55 PM, Anthony Liguori wrote:
> Glauber Costa wrote:
>> This patch provides kvm with an in-kernel apic. We are currently not 
>> enabling it.
>> The code is heavily based on what's in qemu-kvm.git.
>>
>> Signed-off-by: Glauber Costa <glommer@redhat.com>
>> ---
>>  hw/apic.c         |  135 
>> +++++++++++++++++++++++++++++++++++++++++++++++++++--
>>  kvm.h             |    3 +
>>  target-i386/kvm.c |   18 +++++++
>>  3 files changed, 152 insertions(+), 4 deletions(-)
>>
>> diff --git a/hw/apic.c b/hw/apic.c
>> index c89008e..5635607 100644
>> --- a/hw/apic.c
>> +++ b/hw/apic.c
>> @@ -299,7 +299,11 @@ void cpu_set_apic_base(CPUState *env, uint64_t val)
>>  #endif
>>      if (!s)
>>          return;
>> -    s->apicbase = (val & 0xfffff000) |
>> +
>> +    if (kvm_enabled() && kvm_irqchip_in_kernel())
>> +        s->apicbase = val;
>> +    else
>> +        s->apicbase = (val & 0xfffff000) |
>>          (s->apicbase & (MSR_IA32_APICBASE_BSP | 
>> MSR_IA32_APICBASE_ENABLE));
>>      /* if disabled, cannot be enabled again */
>>      if (!(val & MSR_IA32_APICBASE_ENABLE)) {
>> @@ -497,6 +501,13 @@ void apic_init_reset(CPUState *env)
>>      s->wait_for_sipi = 1;
>>
>>      env->halted = !(s->apicbase & MSR_IA32_APICBASE_BSP);
>> +
>> +#ifdef KVM_CAP_MP_STATE
>> +    if (kvm_enabled() && kvm_irqchip_in_kernel())
>> +        env->mp_state
>> +            = env->halted ? KVM_MP_STATE_UNINITIALIZED : 
>> KVM_MP_STATE_RUNNABLE;
>> +#endif
>
> I don't think CAP_MP_STATE should be treated as an optional feature.
>
>> +static int kvm_kernel_lapic_load_from_user(APICState *s)
>> +{
>> +    int r = 0;
>> +#if defined(KVM_CAP_IRQCHIP) && defined(TARGET_I386)
>> +    struct kvm_lapic_state apic;
>> +    struct kvm_lapic_state *klapic = &apic;
>> +    int i;
>> +
>> +    if (!(kvm_enabled() && kvm_irqchip_in_kernel()))
>> +        return 0;
>> +
>> +    memset(klapic, 0, sizeof apic);
>> +    kapic_set_reg(klapic, 0x2, s->id << 24);
>> +    kapic_set_reg(klapic, 0x8, s->tpr);
>> +    kapic_set_reg(klapic, 0xd, s->log_dest << 24);
>> +    kapic_set_reg(klapic, 0xe, s->dest_mode << 28 | 0x0fffffff);
>> +    kapic_set_reg(klapic, 0xf, s->spurious_vec);
>> +    for (i = 0; i < 8; i++) {
>> +        kapic_set_reg(klapic, 0x10 + i, s->isr[i]);
>> +        kapic_set_reg(klapic, 0x18 + i, s->tmr[i]);
>> +        kapic_set_reg(klapic, 0x20 + i, s->irr[i]);
>> +    }
>> +    kapic_set_reg(klapic, 0x28, s->esr);
>> +    kapic_set_reg(klapic, 0x30, s->icr[0]);
>> +    kapic_set_reg(klapic, 0x31, s->icr[1]);
>> +    for (i = 0; i < APIC_LVT_NB; i++)
>> +        kapic_set_reg(klapic, 0x32 + i, s->lvt[i]);
>> +    kapic_set_reg(klapic, 0x38, s->initial_count);
>> +    kapic_set_reg(klapic, 0x3e, s->divide_conf);
>> +
>> +    r = kvm_set_lapic(s->cpu_env, klapic);
>> +#endif
>> +    return r;
>> +}
>
> You should probably just setup VMState such that it directly saves 
> kvm_lapic_state and then have the pre/post functions call the kernel 
> ioctls to sync it.  There's not a whole lot of point switching the 
> state between two different structures.

It ensures the two models are compatible.  Since they're the same device 
from the point of view of the guest, there's no reason for them to have 
different representations or to be incompatible.


-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [Qemu-devel] Re: [PATCH v2 4/9] provide in-kernel apic
  2009-10-08 14:09             ` [Qemu-devel] " Avi Kivity
  (?)
@ 2009-10-08 14:22             ` Glauber Costa
  2009-10-09 10:06                 ` [Qemu-devel] " Jamie Lokier
  -1 siblings, 1 reply; 104+ messages in thread
From: Glauber Costa @ 2009-10-08 14:22 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Anthony Liguori, qemu-devel, kvm-devel

On Thu, Oct 08, 2009 at 04:09:27PM +0200, Avi Kivity wrote:
> On 10/08/2009 03:55 PM, Anthony Liguori wrote:
>> Glauber Costa wrote:
>>> This patch provides kvm with an in-kernel apic. We are currently not  
>>> enabling it.
>>> The code is heavily based on what's in qemu-kvm.git.
>>>
>>> Signed-off-by: Glauber Costa <glommer@redhat.com>
>>> ---
>>>  hw/apic.c         |  135  
>>> +++++++++++++++++++++++++++++++++++++++++++++++++++--
>>>  kvm.h             |    3 +
>>>  target-i386/kvm.c |   18 +++++++
>>>  3 files changed, 152 insertions(+), 4 deletions(-)
>>>
>>> diff --git a/hw/apic.c b/hw/apic.c
>>> index c89008e..5635607 100644
>>> --- a/hw/apic.c
>>> +++ b/hw/apic.c
>>> @@ -299,7 +299,11 @@ void cpu_set_apic_base(CPUState *env, uint64_t val)
>>>  #endif
>>>      if (!s)
>>>          return;
>>> -    s->apicbase = (val & 0xfffff000) |
>>> +
>>> +    if (kvm_enabled() && kvm_irqchip_in_kernel())
>>> +        s->apicbase = val;
>>> +    else
>>> +        s->apicbase = (val & 0xfffff000) |
>>>          (s->apicbase & (MSR_IA32_APICBASE_BSP |  
>>> MSR_IA32_APICBASE_ENABLE));
>>>      /* if disabled, cannot be enabled again */
>>>      if (!(val & MSR_IA32_APICBASE_ENABLE)) {
>>> @@ -497,6 +501,13 @@ void apic_init_reset(CPUState *env)
>>>      s->wait_for_sipi = 1;
>>>
>>>      env->halted = !(s->apicbase & MSR_IA32_APICBASE_BSP);
>>> +
>>> +#ifdef KVM_CAP_MP_STATE
>>> +    if (kvm_enabled() && kvm_irqchip_in_kernel())
>>> +        env->mp_state
>>> +            = env->halted ? KVM_MP_STATE_UNINITIALIZED :  
>>> KVM_MP_STATE_RUNNABLE;
>>> +#endif
>>
>> I don't think CAP_MP_STATE should be treated as an optional feature.
>>
>>> +static int kvm_kernel_lapic_load_from_user(APICState *s)
>>> +{
>>> +    int r = 0;
>>> +#if defined(KVM_CAP_IRQCHIP) && defined(TARGET_I386)
>>> +    struct kvm_lapic_state apic;
>>> +    struct kvm_lapic_state *klapic = &apic;
>>> +    int i;
>>> +
>>> +    if (!(kvm_enabled() && kvm_irqchip_in_kernel()))
>>> +        return 0;
>>> +
>>> +    memset(klapic, 0, sizeof apic);
>>> +    kapic_set_reg(klapic, 0x2, s->id << 24);
>>> +    kapic_set_reg(klapic, 0x8, s->tpr);
>>> +    kapic_set_reg(klapic, 0xd, s->log_dest << 24);
>>> +    kapic_set_reg(klapic, 0xe, s->dest_mode << 28 | 0x0fffffff);
>>> +    kapic_set_reg(klapic, 0xf, s->spurious_vec);
>>> +    for (i = 0; i < 8; i++) {
>>> +        kapic_set_reg(klapic, 0x10 + i, s->isr[i]);
>>> +        kapic_set_reg(klapic, 0x18 + i, s->tmr[i]);
>>> +        kapic_set_reg(klapic, 0x20 + i, s->irr[i]);
>>> +    }
>>> +    kapic_set_reg(klapic, 0x28, s->esr);
>>> +    kapic_set_reg(klapic, 0x30, s->icr[0]);
>>> +    kapic_set_reg(klapic, 0x31, s->icr[1]);
>>> +    for (i = 0; i < APIC_LVT_NB; i++)
>>> +        kapic_set_reg(klapic, 0x32 + i, s->lvt[i]);
>>> +    kapic_set_reg(klapic, 0x38, s->initial_count);
>>> +    kapic_set_reg(klapic, 0x3e, s->divide_conf);
>>> +
>>> +    r = kvm_set_lapic(s->cpu_env, klapic);
>>> +#endif
>>> +    return r;
>>> +}
>>
>> You should probably just setup VMState such that it directly saves  
>> kvm_lapic_state and then have the pre/post functions call the kernel  
>> ioctls to sync it.  There's not a whole lot of point switching the  
>> state between two different structures.
>
> It ensures the two models are compatible.  Since they're the same device  
> from the point of view of the guest, there's no reason for them to have  
> different representations or to be incompatible.

live migration between something that has in-kernel irqchip and something that
has not is likely to be a completely untested thing. And this is the only reason
we might think of it as the same device. I don't see any value in supporting this
combination

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [Qemu-devel] Re: [PATCH v2 9/9] Add -kvm option
  2009-10-08 14:07                     ` Avi Kivity
@ 2009-10-08 14:23                       ` Anthony Liguori
  2009-10-08 14:30                         ` Avi Kivity
  0 siblings, 1 reply; 104+ messages in thread
From: Anthony Liguori @ 2009-10-08 14:23 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Andre Przywara, Anthony Liguori, Gerd Hoffmann, Glauber Costa,
	qemu-devel

Avi Kivity wrote:
> On 10/08/2009 01:00 AM, Anthony Liguori wrote:
>> Glauber Costa wrote:
>>> This option deprecates --enable-kvm. It is a more flexible option,
>>> that makes use of qemu-opts, and allow us to pass on options to 
>>> enable or
>>> disable kernel irqchip, for example.
>>
>> With proper qdev support, you could select kvm device models based on 
>> -device so I think this option isn't all that useful.
>
> qdev (mostly?) is about the guest interface; this is more about the 
> host implementation, so akin to -drive file= and cache=.

If we make the kvm device a separate device, then it's about choosing 
which device to use.

Regardless, even if it were a single device, it's still a property of 
the device so a qdev property would be the logical way to expose it.

>> What I'd like to see in the interim is a kvm specific machine type 
>> that's defaulted to if kvm is enabled.  I think this would be useful 
>> not only for enabling things like in-kernel apic, but also for 
>> selecting a default cpu model.
>
> We should make a distinction between guest-visible changes and 
> accelerators like kvm-irqchip and vhost.

They are different device models that happen to implement the same 
guest-visible device.

Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [Qemu-devel] Re: [PATCH v2 4/9] provide in-kernel apic
  2009-10-08 14:09             ` [Qemu-devel] " Avi Kivity
@ 2009-10-08 14:26               ` Anthony Liguori
  -1 siblings, 0 replies; 104+ messages in thread
From: Anthony Liguori @ 2009-10-08 14:26 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Anthony Liguori, Glauber Costa, qemu-devel, kvm-devel

Avi Kivity wrote:
> On 10/08/2009 03:55 PM, Anthony Liguori wrote:
>>
>> You should probably just setup VMState such that it directly saves 
>> kvm_lapic_state and then have the pre/post functions call the kernel 
>> ioctls to sync it.  There's not a whole lot of point switching the 
>> state between two different structures.
>
> It ensures the two models are compatible.  Since they're the same 
> device from the point of view of the guest, there's no reason for them 
> to have different representations or to be incompatible.

The problem is, the in-kernel apic is not part of the qemu source tree.  
If we add a field to the qemu apic, then we would break the in-kernel 
apic and vice-versa.  It's far easier to just have the in-kernel apic as 
a separate model.

Most importantly though, the code should reflect how things behave.  
It's extremely difficult to look at the apic code and figure out what 
bits are used and what bits aren't used when the in-kernel apic is enabled.

Regards,

Anthony Liguori



^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [Qemu-devel] Re: [PATCH v2 4/9] provide in-kernel apic
@ 2009-10-08 14:26               ` Anthony Liguori
  0 siblings, 0 replies; 104+ messages in thread
From: Anthony Liguori @ 2009-10-08 14:26 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Anthony Liguori, Glauber Costa, kvm-devel, qemu-devel

Avi Kivity wrote:
> On 10/08/2009 03:55 PM, Anthony Liguori wrote:
>>
>> You should probably just setup VMState such that it directly saves 
>> kvm_lapic_state and then have the pre/post functions call the kernel 
>> ioctls to sync it.  There's not a whole lot of point switching the 
>> state between two different structures.
>
> It ensures the two models are compatible.  Since they're the same 
> device from the point of view of the guest, there's no reason for them 
> to have different representations or to be incompatible.

The problem is, the in-kernel apic is not part of the qemu source tree.  
If we add a field to the qemu apic, then we would break the in-kernel 
apic and vice-versa.  It's far easier to just have the in-kernel apic as 
a separate model.

Most importantly though, the code should reflect how things behave.  
It's extremely difficult to look at the apic code and figure out what 
bits are used and what bits aren't used when the in-kernel apic is enabled.

Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [Qemu-devel] Re: [PATCH v2 9/9] Add -kvm option
  2009-10-08 14:23                       ` Anthony Liguori
@ 2009-10-08 14:30                         ` Avi Kivity
  2009-10-08 14:33                           ` Anthony Liguori
  2009-10-08 14:34                           ` Gleb Natapov
  0 siblings, 2 replies; 104+ messages in thread
From: Avi Kivity @ 2009-10-08 14:30 UTC (permalink / raw)
  To: Anthony Liguori
  Cc: Andre Przywara, Anthony Liguori, Gerd Hoffmann, Glauber Costa,
	qemu-devel

On 10/08/2009 04:23 PM, Anthony Liguori wrote:
>>> What I'd like to see in the interim is a kvm specific machine type 
>>> that's defaulted to if kvm is enabled.  I think this would be useful 
>>> not only for enabling things like in-kernel apic, but also for 
>>> selecting a default cpu model.
>>
>> We should make a distinction between guest-visible changes and 
>> accelerators like kvm-irqchip and vhost.
>
> They are different device models that happen to implement the same 
> guest-visible device. 

I think this separation is artificial.  From both the guest's view and 
the user's view, there's no difference between qemu ioapic and kvm 
ioapic (modulo bugs).  To me this indicates they're the same device.

We'll have the same problem with vhost-net, only there the duplication 
will be much greater if we split the implementation.

-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [Qemu-devel] Re: [PATCH v2 4/9] provide in-kernel apic
  2009-10-08 14:26               ` Anthony Liguori
@ 2009-10-08 14:31                 ` Avi Kivity
  -1 siblings, 0 replies; 104+ messages in thread
From: Avi Kivity @ 2009-10-08 14:31 UTC (permalink / raw)
  To: Anthony Liguori; +Cc: Anthony Liguori, Glauber Costa, qemu-devel, kvm-devel

On 10/08/2009 04:26 PM, Anthony Liguori wrote:
> Avi Kivity wrote:
>> On 10/08/2009 03:55 PM, Anthony Liguori wrote:
>>>
>>> You should probably just setup VMState such that it directly saves 
>>> kvm_lapic_state and then have the pre/post functions call the kernel 
>>> ioctls to sync it.  There's not a whole lot of point switching the 
>>> state between two different structures.
>>
>> It ensures the two models are compatible.  Since they're the same 
>> device from the point of view of the guest, there's no reason for 
>> them to have different representations or to be incompatible.
>
> The problem is, the in-kernel apic is not part of the qemu source 
> tree.  If we add a field to the qemu apic, then we would break the 
> in-kernel apic and vice-versa.  It's far easier to just have the 
> in-kernel apic as a separate model.
>

You need to handle it anyway due to save/restore; that is the new field 
and whatever functionality it has must be optional.

> Most importantly though, the code should reflect how things behave.  
> It's extremely difficult to look at the apic code and figure out what 
> bits are used and what bits aren't used when the in-kernel apic is 
> enabled.

That doesn't mean they need to be separate devices.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [Qemu-devel] Re: [PATCH v2 4/9] provide in-kernel apic
@ 2009-10-08 14:31                 ` Avi Kivity
  0 siblings, 0 replies; 104+ messages in thread
From: Avi Kivity @ 2009-10-08 14:31 UTC (permalink / raw)
  To: Anthony Liguori; +Cc: Anthony Liguori, Glauber Costa, kvm-devel, qemu-devel

On 10/08/2009 04:26 PM, Anthony Liguori wrote:
> Avi Kivity wrote:
>> On 10/08/2009 03:55 PM, Anthony Liguori wrote:
>>>
>>> You should probably just setup VMState such that it directly saves 
>>> kvm_lapic_state and then have the pre/post functions call the kernel 
>>> ioctls to sync it.  There's not a whole lot of point switching the 
>>> state between two different structures.
>>
>> It ensures the two models are compatible.  Since they're the same 
>> device from the point of view of the guest, there's no reason for 
>> them to have different representations or to be incompatible.
>
> The problem is, the in-kernel apic is not part of the qemu source 
> tree.  If we add a field to the qemu apic, then we would break the 
> in-kernel apic and vice-versa.  It's far easier to just have the 
> in-kernel apic as a separate model.
>

You need to handle it anyway due to save/restore; that is the new field 
and whatever functionality it has must be optional.

> Most importantly though, the code should reflect how things behave.  
> It's extremely difficult to look at the apic code and figure out what 
> bits are used and what bits aren't used when the in-kernel apic is 
> enabled.

That doesn't mean they need to be separate devices.

-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [Qemu-devel] Re: [PATCH v2 9/9] Add -kvm option
  2009-10-08 14:30                         ` Avi Kivity
@ 2009-10-08 14:33                           ` Anthony Liguori
  2009-10-08 14:41                             ` Avi Kivity
  2009-10-08 14:34                           ` Gleb Natapov
  1 sibling, 1 reply; 104+ messages in thread
From: Anthony Liguori @ 2009-10-08 14:33 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Mark McLoughlin, Anthony Liguori, Gerd Hoffmann, Glauber Costa,
	qemu-devel

Avi Kivity wrote:
> On 10/08/2009 04:23 PM, Anthony Liguori wrote:
>>>> What I'd like to see in the interim is a kvm specific machine type 
>>>> that's defaulted to if kvm is enabled.  I think this would be 
>>>> useful not only for enabling things like in-kernel apic, but also 
>>>> for selecting a default cpu model.
>>>
>>> We should make a distinction between guest-visible changes and 
>>> accelerators like kvm-irqchip and vhost.
>>
>> They are different device models that happen to implement the same 
>> guest-visible device. 
>
> I think this separation is artificial.  From both the guest's view and 
> the user's view, there's no difference between qemu ioapic and kvm 
> ioapic (modulo bugs).  To me this indicates they're the same device.

That's not always been true historically.  For instance, the in-kernel 
pit does interrupt catchup whereas the userspace pit does not.

> We'll have the same problem with vhost-net, only there the duplication 
> will be much greater if we split the implementation.

I'd rather not treat vhost-net like a device model and instead treat it 
like a -net backend.  If we can bounce requests through userspace (and 
we can), we should be able to use it for any device model.  In the case 
of virtio-net, we should be able to short-circuit things such that we 
don't have to go through userspace.  It's admittedly very hairy without 
a point-to-point net abstraction.

Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [Qemu-devel] Re: [PATCH v2 9/9] Add -kvm option
  2009-10-08 14:30                         ` Avi Kivity
  2009-10-08 14:33                           ` Anthony Liguori
@ 2009-10-08 14:34                           ` Gleb Natapov
  1 sibling, 0 replies; 104+ messages in thread
From: Gleb Natapov @ 2009-10-08 14:34 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Andre Przywara, Anthony Liguori, Glauber Costa, qemu-devel,
	Gerd Hoffmann

On Thu, Oct 08, 2009 at 04:30:17PM +0200, Avi Kivity wrote:
> On 10/08/2009 04:23 PM, Anthony Liguori wrote:
> >>>What I'd like to see in the interim is a kvm specific machine
> >>>type that's defaulted to if kvm is enabled.  I think this
> >>>would be useful not only for enabling things like in-kernel
> >>>apic, but also for selecting a default cpu model.
> >>
> >>We should make a distinction between guest-visible changes and
> >>accelerators like kvm-irqchip and vhost.
> >
> >They are different device models that happen to implement the same
> >guest-visible device.
> 
> I think this separation is artificial.  From both the guest's view
> and the user's view, there's no difference between qemu ioapic and
> kvm ioapic (modulo bugs).  To me this indicates they're the same
> device.
> 
Same device but different implementation. The only two things that are
used from qemu apic implementation with in-kernel apic are migration and
reset. Both depends on implementation. Reusing code will require us to
keep implementation in sync. This reuse is a constant source of bugs
BTW.

> We'll have the same problem with vhost-net, only there the
> duplication will be much greater if we split the implementation.
> 
> -- 
> error compiling committee.c: too many arguments to function
> 
> 

--
			Gleb.

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [Qemu-devel] Re: [PATCH v2 4/9] provide in-kernel apic
  2009-10-08 14:31                 ` Avi Kivity
  (?)
@ 2009-10-08 14:39                 ` Anthony Liguori
  2009-10-08 14:46                     ` Glauber Costa
  -1 siblings, 1 reply; 104+ messages in thread
From: Anthony Liguori @ 2009-10-08 14:39 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Glauber Costa, qemu-devel, kvm-devel

Avi Kivity wrote:
>> The problem is, the in-kernel apic is not part of the qemu source 
>> tree.  If we add a field to the qemu apic, then we would break the 
>> in-kernel apic and vice-versa.  It's far easier to just have the 
>> in-kernel apic as a separate model.
>>
>
> You need to handle it anyway due to save/restore; that is the new 
> field and whatever functionality it has must be optional.

Not necessarily.  If it's a bug fix, we may disable the older versions 
of savevm (which we have to do from time to time).

> That doesn't mean they need to be separate devices.

But they are.  The in-kernel device models are not mere accelerations.  
There are features that are only enabled with in-kernel device models 
(like PCI passthrough).  In fact, in-kernel PIT is not at all an 
acceleration, it's there because it's functionally different.

The sync stuff is really ugly too.  It would be much cleaner to have a 
separate state for the in-kernel device models that saved the structures 
from the kernel directly instead of having to translate between 
formats.  More straight forward code means it's all easier to 
understand, easier to debug, etc.

Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [Qemu-devel] Re: [PATCH v2 9/9] Add -kvm option
  2009-10-08 14:33                           ` Anthony Liguori
@ 2009-10-08 14:41                             ` Avi Kivity
  2009-10-08 14:56                               ` Anthony Liguori
  0 siblings, 1 reply; 104+ messages in thread
From: Avi Kivity @ 2009-10-08 14:41 UTC (permalink / raw)
  To: Anthony Liguori
  Cc: Mark McLoughlin, Anthony Liguori, Gerd Hoffmann, Glauber Costa,
	qemu-devel

On 10/08/2009 04:33 PM, Anthony Liguori wrote:
> Avi Kivity wrote:
>> On 10/08/2009 04:23 PM, Anthony Liguori wrote:
>>>>> What I'd like to see in the interim is a kvm specific machine type 
>>>>> that's defaulted to if kvm is enabled.  I think this would be 
>>>>> useful not only for enabling things like in-kernel apic, but also 
>>>>> for selecting a default cpu model.
>>>>
>>>> We should make a distinction between guest-visible changes and 
>>>> accelerators like kvm-irqchip and vhost.
>>>
>>> They are different device models that happen to implement the same 
>>> guest-visible device. 
>>
>> I think this separation is artificial.  From both the guest's view 
>> and the user's view, there's no difference between qemu ioapic and 
>> kvm ioapic (modulo bugs).  To me this indicates they're the same device.
>
> That's not always been true historically.  For instance, the in-kernel 
> pit does interrupt catchup whereas the userspace pit does not.

That's a bug in the userspace pit :)

I'm worried about things like 'info pit' needing dual implementations.

>> We'll have the same problem with vhost-net, only there the 
>> duplication will be much greater if we split the implementation.
>
> I'd rather not treat vhost-net like a device model and instead treat 
> it like a -net backend.  If we can bounce requests through userspace 
> (and we can), we should be able to use it for any device model.  In 
> the case of virtio-net, we should be able to short-circuit things such 
> that we don't have to go through userspace.  It's admittedly very 
> hairy without a point-to-point net abstraction.
>

Interesting idea.  I think it'll be hairy even with a point-to-point net 
abstraction, but it's worth trying.

-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [Qemu-devel] Re: [PATCH v2 4/9] provide in-kernel apic
  2009-10-08 14:31                 ` Avi Kivity
@ 2009-10-08 14:44                   ` Glauber Costa
  -1 siblings, 0 replies; 104+ messages in thread
From: Glauber Costa @ 2009-10-08 14:44 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Anthony Liguori, Anthony Liguori, qemu-devel, kvm-devel

On Thu, Oct 08, 2009 at 04:31:57PM +0200, Avi Kivity wrote:
> On 10/08/2009 04:26 PM, Anthony Liguori wrote:
>> Avi Kivity wrote:
>>> On 10/08/2009 03:55 PM, Anthony Liguori wrote:
>>>>
>>>> You should probably just setup VMState such that it directly saves  
>>>> kvm_lapic_state and then have the pre/post functions call the 
>>>> kernel ioctls to sync it.  There's not a whole lot of point 
>>>> switching the state between two different structures.
>>>
>>> It ensures the two models are compatible.  Since they're the same  
>>> device from the point of view of the guest, there's no reason for  
>>> them to have different representations or to be incompatible.
>>
>> The problem is, the in-kernel apic is not part of the qemu source  
>> tree.  If we add a field to the qemu apic, then we would break the  
>> in-kernel apic and vice-versa.  It's far easier to just have the  
>> in-kernel apic as a separate model.
>>
>
> You need to handle it anyway due to save/restore; that is the new field  
> and whatever functionality it has must be optional.
Not necessarily. You can grab the structures directly from the kernel definition
, copy that over, issue the ioctl, and just make sure the source and destination
have compatible kernels.



^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [Qemu-devel] Re: [PATCH v2 4/9] provide in-kernel apic
@ 2009-10-08 14:44                   ` Glauber Costa
  0 siblings, 0 replies; 104+ messages in thread
From: Glauber Costa @ 2009-10-08 14:44 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Anthony Liguori, qemu-devel, kvm-devel

On Thu, Oct 08, 2009 at 04:31:57PM +0200, Avi Kivity wrote:
> On 10/08/2009 04:26 PM, Anthony Liguori wrote:
>> Avi Kivity wrote:
>>> On 10/08/2009 03:55 PM, Anthony Liguori wrote:
>>>>
>>>> You should probably just setup VMState such that it directly saves  
>>>> kvm_lapic_state and then have the pre/post functions call the 
>>>> kernel ioctls to sync it.  There's not a whole lot of point 
>>>> switching the state between two different structures.
>>>
>>> It ensures the two models are compatible.  Since they're the same  
>>> device from the point of view of the guest, there's no reason for  
>>> them to have different representations or to be incompatible.
>>
>> The problem is, the in-kernel apic is not part of the qemu source  
>> tree.  If we add a field to the qemu apic, then we would break the  
>> in-kernel apic and vice-versa.  It's far easier to just have the  
>> in-kernel apic as a separate model.
>>
>
> You need to handle it anyway due to save/restore; that is the new field  
> and whatever functionality it has must be optional.
Not necessarily. You can grab the structures directly from the kernel definition
, copy that over, issue the ioctl, and just make sure the source and destination
have compatible kernels.

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [Qemu-devel] Re: [PATCH v2 4/9] provide in-kernel apic
  2009-10-08 14:39                 ` Anthony Liguori
@ 2009-10-08 14:46                     ` Glauber Costa
  0 siblings, 0 replies; 104+ messages in thread
From: Glauber Costa @ 2009-10-08 14:46 UTC (permalink / raw)
  To: Anthony Liguori; +Cc: Avi Kivity, qemu-devel, kvm-devel

On Thu, Oct 08, 2009 at 09:39:13AM -0500, Anthony Liguori wrote:
> The sync stuff is really ugly too.  It would be much cleaner to have a  
> separate state for the in-kernel device models that saved the structures  
> from the kernel directly instead of having to translate between formats.  
> More straight forward code means it's all easier to understand, easier to 
> debug, etc.
>
One may arguee that I am stupid, but I have caught myself reading code that
was totally unused in the quest for irqchip related bugs...


^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [Qemu-devel] Re: [PATCH v2 4/9] provide in-kernel apic
@ 2009-10-08 14:46                     ` Glauber Costa
  0 siblings, 0 replies; 104+ messages in thread
From: Glauber Costa @ 2009-10-08 14:46 UTC (permalink / raw)
  To: Anthony Liguori; +Cc: Avi Kivity, kvm-devel, qemu-devel

On Thu, Oct 08, 2009 at 09:39:13AM -0500, Anthony Liguori wrote:
> The sync stuff is really ugly too.  It would be much cleaner to have a  
> separate state for the in-kernel device models that saved the structures  
> from the kernel directly instead of having to translate between formats.  
> More straight forward code means it's all easier to understand, easier to 
> debug, etc.
>
One may arguee that I am stupid, but I have caught myself reading code that
was totally unused in the quest for irqchip related bugs...

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [Qemu-devel] Re: [PATCH v2 9/9] Add -kvm option
  2009-10-08 14:41                             ` Avi Kivity
@ 2009-10-08 14:56                               ` Anthony Liguori
  2009-10-08 15:05                                 ` Avi Kivity
  0 siblings, 1 reply; 104+ messages in thread
From: Anthony Liguori @ 2009-10-08 14:56 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Mark McLoughlin, Anthony Liguori, Gerd Hoffmann, Glauber Costa,
	qemu-devel

Avi Kivity wrote:
> That's a bug in the userspace pit :)
>
> I'm worried about things like 'info pit' needing dual implementations.

You mean info qdm?

It'll Just Work.

>>> We'll have the same problem with vhost-net, only there the 
>>> duplication will be much greater if we split the implementation.
>>
>> I'd rather not treat vhost-net like a device model and instead treat 
>> it like a -net backend.  If we can bounce requests through userspace 
>> (and we can), we should be able to use it for any device model.  In 
>> the case of virtio-net, we should be able to short-circuit things 
>> such that we don't have to go through userspace.  It's admittedly 
>> very hairy without a point-to-point net abstraction.
>>
>
> Interesting idea.  I think it'll be hairy even with a point-to-point 
> net abstraction, but it's worth trying.

Could be interesting trying to tie the e1000 to vhost-net performance 
wise...

Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [Qemu-devel] Re: [PATCH v2 9/9] Add -kvm option
  2009-10-08 14:56                               ` Anthony Liguori
@ 2009-10-08 15:05                                 ` Avi Kivity
  2009-10-08 15:14                                   ` Anthony Liguori
  0 siblings, 1 reply; 104+ messages in thread
From: Avi Kivity @ 2009-10-08 15:05 UTC (permalink / raw)
  To: Anthony Liguori
  Cc: Mark McLoughlin, Anthony Liguori, Gerd Hoffmann, Glauber Costa,
	qemu-devel

On 10/08/2009 04:56 PM, Anthony Liguori wrote:
> Avi Kivity wrote:
>> That's a bug in the userspace pit :)
>>
>> I'm worried about things like 'info pit' needing dual implementations.
>
> You mean info qdm?
>
> It'll Just Work.
>

I mean

    (qemu) info pit
    pit: counter 0x1234 period 0x3453

or something.

>> Interesting idea.  I think it'll be hairy even with a point-to-point 
>> net abstraction, but it's worth trying.
>
> Could be interesting trying to tie the e1000 to vhost-net performance 
> wise...

I don't think there's much chance of that - e1000 places important 
registers (like interrupt disable) in BARs whereas virtio puts them in 
the ring.

-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [Qemu-devel] Re: [PATCH v2 9/9] Add -kvm option
  2009-10-08 15:05                                 ` Avi Kivity
@ 2009-10-08 15:14                                   ` Anthony Liguori
  2009-10-12 11:15                                     ` Gerd Hoffmann
  0 siblings, 1 reply; 104+ messages in thread
From: Anthony Liguori @ 2009-10-08 15:14 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Mark McLoughlin, Anthony Liguori, Gerd Hoffmann, Glauber Costa,
	qemu-devel

Avi Kivity wrote:
> On 10/08/2009 04:56 PM, Anthony Liguori wrote:
>> Avi Kivity wrote:
>>> That's a bug in the userspace pit :)
>>>
>>> I'm worried about things like 'info pit' needing dual implementations.
>>
>> You mean info qdm?
>>
>> It'll Just Work.
>>
>
> I mean
>
>    (qemu) info pit
>    pit: counter 0x1234 period 0x3453
>
> or something.

When the pit is qdev converted, it will have properties.  counter and 
period could be two of those properties in which case, 'info qtree 
i8254' would show the above output or 'info qtree i8254-kvm' would show it.

In fact, a very good reason to have two device models is that there are 
certain properties that make sense for i8254-kvm that don't make sense 
for i8254.  For instance, you could expose information about interrupt 
catch up.

Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH v2 3/9] provide in-kernel ioapic
  2009-10-08 13:54           ` [Qemu-devel] " Avi Kivity
@ 2009-10-08 15:53             ` Jan Kiszka
  -1 siblings, 0 replies; 104+ messages in thread
From: Jan Kiszka @ 2009-10-08 15:53 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Anthony Liguori, Glauber Costa, qemu-devel, kvm-devel

Avi Kivity wrote:
> On 10/08/2009 03:49 PM, Anthony Liguori wrote:
>> Glauber Costa wrote:
>>> This patch provides kvm with an in-kernel ioapic. We are currently
>>> not enabling it.
>>> The code is heavily based on what's in qemu-kvm.git.
>>
>> It really ought to be it's own file and own device model.  Having the
>> code mixed in with ioapic.c is confusing because it's unclear what
>> code is in use when the in-kernel model is used.
> 
> I disagree.  It's the same device with the same guest-visible interface
> and the same host-visible interface (save/restore, 'info ioapic' if we
> write one).  Splitting it into two files will only result in code
> duplication.

Shared functions can be pushed into a commonly used module
(ioapic-common.c). And everyone touching it should realize then that it
could affect more than one user.

Jan

-- 
Siemens AG, Corporate Technology, CT SE 2
Corporate Competence Center Embedded Linux

^ permalink raw reply	[flat|nested] 104+ messages in thread

* [Qemu-devel] Re: [PATCH v2 3/9] provide in-kernel ioapic
@ 2009-10-08 15:53             ` Jan Kiszka
  0 siblings, 0 replies; 104+ messages in thread
From: Jan Kiszka @ 2009-10-08 15:53 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Anthony Liguori, Glauber Costa, kvm-devel, qemu-devel

Avi Kivity wrote:
> On 10/08/2009 03:49 PM, Anthony Liguori wrote:
>> Glauber Costa wrote:
>>> This patch provides kvm with an in-kernel ioapic. We are currently
>>> not enabling it.
>>> The code is heavily based on what's in qemu-kvm.git.
>>
>> It really ought to be it's own file and own device model.  Having the
>> code mixed in with ioapic.c is confusing because it's unclear what
>> code is in use when the in-kernel model is used.
> 
> I disagree.  It's the same device with the same guest-visible interface
> and the same host-visible interface (save/restore, 'info ioapic' if we
> write one).  Splitting it into two files will only result in code
> duplication.

Shared functions can be pushed into a commonly used module
(ioapic-common.c). And everyone touching it should realize then that it
could affect more than one user.

Jan

-- 
Siemens AG, Corporate Technology, CT SE 2
Corporate Competence Center Embedded Linux

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [Qemu-devel] Re: [PATCH v2 3/9] provide in-kernel ioapic
  2009-10-08 13:54           ` [Qemu-devel] " Avi Kivity
@ 2009-10-08 16:07             ` Jamie Lokier
  -1 siblings, 0 replies; 104+ messages in thread
From: Jamie Lokier @ 2009-10-08 16:07 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Anthony Liguori, Glauber Costa, qemu-devel, kvm-devel

Avi Kivity wrote:
> On 10/08/2009 03:49 PM, Anthony Liguori wrote:
> >Glauber Costa wrote:
> >>This patch provides kvm with an in-kernel ioapic. We are currently 
> >>not enabling it.
> >>The code is heavily based on what's in qemu-kvm.git.
> >
> >It really ought to be it's own file and own device model.  Having the 
> >code mixed in with ioapic.c is confusing because it's unclear what 
> >code is in use when the in-kernel model is used.
> 
> I disagree.  It's the same device with the same guest-visible interface 
> and the same host-visible interface (save/restore, 'info ioapic' if we 
> write one).  Splitting it into two files will only result in code 
> duplication.
> 
> Think of it as an ioapic accelerator.

Haven't we already confirmed that it *isn't* just an ioapic accelerator
because you can't migrate between in-kernel iopic and qemu's ioapic?

Imho, if they cannot be swapped transparently, they are different
device emulations.

OF course there's nothing wrong with sharing lots of code.

Maybe ioapic.c and ioapic-kvm.c, with shared code in ioapic-common.c?

-- Jamie

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [Qemu-devel] Re: [PATCH v2 3/9] provide in-kernel ioapic
@ 2009-10-08 16:07             ` Jamie Lokier
  0 siblings, 0 replies; 104+ messages in thread
From: Jamie Lokier @ 2009-10-08 16:07 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Anthony Liguori, Glauber Costa, kvm-devel, qemu-devel

Avi Kivity wrote:
> On 10/08/2009 03:49 PM, Anthony Liguori wrote:
> >Glauber Costa wrote:
> >>This patch provides kvm with an in-kernel ioapic. We are currently 
> >>not enabling it.
> >>The code is heavily based on what's in qemu-kvm.git.
> >
> >It really ought to be it's own file and own device model.  Having the 
> >code mixed in with ioapic.c is confusing because it's unclear what 
> >code is in use when the in-kernel model is used.
> 
> I disagree.  It's the same device with the same guest-visible interface 
> and the same host-visible interface (save/restore, 'info ioapic' if we 
> write one).  Splitting it into two files will only result in code 
> duplication.
> 
> Think of it as an ioapic accelerator.

Haven't we already confirmed that it *isn't* just an ioapic accelerator
because you can't migrate between in-kernel iopic and qemu's ioapic?

Imho, if they cannot be swapped transparently, they are different
device emulations.

OF course there's nothing wrong with sharing lots of code.

Maybe ioapic.c and ioapic-kvm.c, with shared code in ioapic-common.c?

-- Jamie

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [Qemu-devel] Re: [PATCH v2 3/9] provide in-kernel ioapic
  2009-10-08 16:07             ` Jamie Lokier
@ 2009-10-08 16:12               ` Anthony Liguori
  -1 siblings, 0 replies; 104+ messages in thread
From: Anthony Liguori @ 2009-10-08 16:12 UTC (permalink / raw)
  To: Jamie Lokier; +Cc: Avi Kivity, Glauber Costa, qemu-devel, kvm-devel

Jamie Lokier wrote:
> Avi Kivity wrote:
>   
>> On 10/08/2009 03:49 PM, Anthony Liguori wrote:
>>     
>>> Glauber Costa wrote:
>>>       
>>>> This patch provides kvm with an in-kernel ioapic. We are currently 
>>>> not enabling it.
>>>> The code is heavily based on what's in qemu-kvm.git.
>>>>         
>>> It really ought to be it's own file and own device model.  Having the 
>>> code mixed in with ioapic.c is confusing because it's unclear what 
>>> code is in use when the in-kernel model is used.
>>>       
>> I disagree.  It's the same device with the same guest-visible interface 
>> and the same host-visible interface (save/restore, 'info ioapic' if we 
>> write one).  Splitting it into two files will only result in code 
>> duplication.
>>
>> Think of it as an ioapic accelerator.
>>     
>
> Haven't we already confirmed that it *isn't* just an ioapic accelerator
> because you can't migrate between in-kernel iopic and qemu's ioapic?
>
> Imho, if they cannot be swapped transparently, they are different
> device emulations.
>
> OF course there's nothing wrong with sharing lots of code.
>   

If you avoid having a common save format, you get an overall reduction 
in code size and there's virtually no code to share.

-- 
Regards,

Anthony Liguori


^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [Qemu-devel] Re: [PATCH v2 3/9] provide in-kernel ioapic
@ 2009-10-08 16:12               ` Anthony Liguori
  0 siblings, 0 replies; 104+ messages in thread
From: Anthony Liguori @ 2009-10-08 16:12 UTC (permalink / raw)
  To: Jamie Lokier; +Cc: Glauber Costa, Avi Kivity, kvm-devel, qemu-devel

Jamie Lokier wrote:
> Avi Kivity wrote:
>   
>> On 10/08/2009 03:49 PM, Anthony Liguori wrote:
>>     
>>> Glauber Costa wrote:
>>>       
>>>> This patch provides kvm with an in-kernel ioapic. We are currently 
>>>> not enabling it.
>>>> The code is heavily based on what's in qemu-kvm.git.
>>>>         
>>> It really ought to be it's own file and own device model.  Having the 
>>> code mixed in with ioapic.c is confusing because it's unclear what 
>>> code is in use when the in-kernel model is used.
>>>       
>> I disagree.  It's the same device with the same guest-visible interface 
>> and the same host-visible interface (save/restore, 'info ioapic' if we 
>> write one).  Splitting it into two files will only result in code 
>> duplication.
>>
>> Think of it as an ioapic accelerator.
>>     
>
> Haven't we already confirmed that it *isn't* just an ioapic accelerator
> because you can't migrate between in-kernel iopic and qemu's ioapic?
>
> Imho, if they cannot be swapped transparently, they are different
> device emulations.
>
> OF course there's nothing wrong with sharing lots of code.
>   

If you avoid having a common save format, you get an overall reduction 
in code size and there's virtually no code to share.

-- 
Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [Qemu-devel] Re: [PATCH v2 3/9] provide in-kernel ioapic
  2009-10-08 16:07             ` Jamie Lokier
@ 2009-10-08 16:17               ` Avi Kivity
  -1 siblings, 0 replies; 104+ messages in thread
From: Avi Kivity @ 2009-10-08 16:17 UTC (permalink / raw)
  To: Jamie Lokier; +Cc: Anthony Liguori, Glauber Costa, qemu-devel, kvm-devel

On 10/08/2009 06:07 PM, Jamie Lokier wrote:
> Haven't we already confirmed that it *isn't* just an ioapic accelerator
> because you can't migrate between in-kernel iopic and qemu's ioapic?
>    

We haven't confirmed it.  Both implement the same spec, and if you can't 
migrate between them, one of them has a bug (for example, qemu ioapic 
doesn't implement polarity - but it's still just a bug).

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [Qemu-devel] Re: [PATCH v2 3/9] provide in-kernel ioapic
@ 2009-10-08 16:17               ` Avi Kivity
  0 siblings, 0 replies; 104+ messages in thread
From: Avi Kivity @ 2009-10-08 16:17 UTC (permalink / raw)
  To: Jamie Lokier; +Cc: Anthony Liguori, Glauber Costa, kvm-devel, qemu-devel

On 10/08/2009 06:07 PM, Jamie Lokier wrote:
> Haven't we already confirmed that it *isn't* just an ioapic accelerator
> because you can't migrate between in-kernel iopic and qemu's ioapic?
>    

We haven't confirmed it.  Both implement the same spec, and if you can't 
migrate between them, one of them has a bug (for example, qemu ioapic 
doesn't implement polarity - but it's still just a bug).

-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [Qemu-devel] Re: [PATCH v2 3/9] provide in-kernel ioapic
  2009-10-08 16:17               ` Avi Kivity
@ 2009-10-08 16:22                 ` Gleb Natapov
  -1 siblings, 0 replies; 104+ messages in thread
From: Gleb Natapov @ 2009-10-08 16:22 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Jamie Lokier, Anthony Liguori, Glauber Costa, qemu-devel, kvm-devel

On Thu, Oct 08, 2009 at 06:17:57PM +0200, Avi Kivity wrote:
> On 10/08/2009 06:07 PM, Jamie Lokier wrote:
> >Haven't we already confirmed that it *isn't* just an ioapic accelerator
> >because you can't migrate between in-kernel iopic and qemu's ioapic?
> 
> We haven't confirmed it.  Both implement the same spec, and if you
> can't migrate between them, one of them has a bug (for example, qemu
> ioapic doesn't implement polarity - but it's still just a bug).
> 
Are you saying that HW spec (that only describes software visible behavior)
completely defines implementation? No other internal state is needed
that may be done differently by different implementations?

--
			Gleb.

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [Qemu-devel] Re: [PATCH v2 3/9] provide in-kernel ioapic
@ 2009-10-08 16:22                 ` Gleb Natapov
  0 siblings, 0 replies; 104+ messages in thread
From: Gleb Natapov @ 2009-10-08 16:22 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Anthony Liguori, Glauber Costa, kvm-devel, qemu-devel

On Thu, Oct 08, 2009 at 06:17:57PM +0200, Avi Kivity wrote:
> On 10/08/2009 06:07 PM, Jamie Lokier wrote:
> >Haven't we already confirmed that it *isn't* just an ioapic accelerator
> >because you can't migrate between in-kernel iopic and qemu's ioapic?
> 
> We haven't confirmed it.  Both implement the same spec, and if you
> can't migrate between them, one of them has a bug (for example, qemu
> ioapic doesn't implement polarity - but it's still just a bug).
> 
Are you saying that HW spec (that only describes software visible behavior)
completely defines implementation? No other internal state is needed
that may be done differently by different implementations?

--
			Gleb.

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [Qemu-devel] Re: [PATCH v2 3/9] provide in-kernel ioapic
  2009-10-08 16:22                 ` Gleb Natapov
@ 2009-10-08 16:29                   ` Avi Kivity
  -1 siblings, 0 replies; 104+ messages in thread
From: Avi Kivity @ 2009-10-08 16:29 UTC (permalink / raw)
  To: Gleb Natapov
  Cc: Jamie Lokier, Anthony Liguori, Glauber Costa, qemu-devel, kvm-devel

On 10/08/2009 06:22 PM, Gleb Natapov wrote:
> On Thu, Oct 08, 2009 at 06:17:57PM +0200, Avi Kivity wrote:
>    
>> On 10/08/2009 06:07 PM, Jamie Lokier wrote:
>>      
>>> Haven't we already confirmed that it *isn't* just an ioapic accelerator
>>> because you can't migrate between in-kernel iopic and qemu's ioapic?
>>>        
>> We haven't confirmed it.  Both implement the same spec, and if you
>> can't migrate between them, one of them has a bug (for example, qemu
>> ioapic doesn't implement polarity - but it's still just a bug).
>>
>>      
> Are you saying that HW spec (that only describes software visible behavior)
> completely defines implementation? No other internal state is needed
> that may be done differently by different implementations?
>    

It may be done differently (for example, selecting the cpu to deliver 
the interrupt to), but as the guest cannot rely on the differences, 
there's no need to save the state that can cause these differences.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [Qemu-devel] Re: [PATCH v2 3/9] provide in-kernel ioapic
@ 2009-10-08 16:29                   ` Avi Kivity
  0 siblings, 0 replies; 104+ messages in thread
From: Avi Kivity @ 2009-10-08 16:29 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: Anthony Liguori, Glauber Costa, kvm-devel, qemu-devel

On 10/08/2009 06:22 PM, Gleb Natapov wrote:
> On Thu, Oct 08, 2009 at 06:17:57PM +0200, Avi Kivity wrote:
>    
>> On 10/08/2009 06:07 PM, Jamie Lokier wrote:
>>      
>>> Haven't we already confirmed that it *isn't* just an ioapic accelerator
>>> because you can't migrate between in-kernel iopic and qemu's ioapic?
>>>        
>> We haven't confirmed it.  Both implement the same spec, and if you
>> can't migrate between them, one of them has a bug (for example, qemu
>> ioapic doesn't implement polarity - but it's still just a bug).
>>
>>      
> Are you saying that HW spec (that only describes software visible behavior)
> completely defines implementation? No other internal state is needed
> that may be done differently by different implementations?
>    

It may be done differently (for example, selecting the cpu to deliver 
the interrupt to), but as the guest cannot rely on the differences, 
there's no need to save the state that can cause these differences.

-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [Qemu-devel] Re: [PATCH v2 3/9] provide in-kernel ioapic
  2009-10-08 16:29                   ` Avi Kivity
@ 2009-10-08 16:34                     ` Gleb Natapov
  -1 siblings, 0 replies; 104+ messages in thread
From: Gleb Natapov @ 2009-10-08 16:34 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Jamie Lokier, Anthony Liguori, Glauber Costa, qemu-devel, kvm-devel

On Thu, Oct 08, 2009 at 06:29:53PM +0200, Avi Kivity wrote:
> On 10/08/2009 06:22 PM, Gleb Natapov wrote:
> >On Thu, Oct 08, 2009 at 06:17:57PM +0200, Avi Kivity wrote:
> >>On 10/08/2009 06:07 PM, Jamie Lokier wrote:
> >>>Haven't we already confirmed that it *isn't* just an ioapic accelerator
> >>>because you can't migrate between in-kernel iopic and qemu's ioapic?
> >>We haven't confirmed it.  Both implement the same spec, and if you
> >>can't migrate between them, one of them has a bug (for example, qemu
> >>ioapic doesn't implement polarity - but it's still just a bug).
> >>
> >Are you saying that HW spec (that only describes software visible behavior)
> >completely defines implementation? No other internal state is needed
> >that may be done differently by different implementations?
> 
> It may be done differently (for example, selecting the cpu to
> deliver the interrupt to), but as the guest cannot rely on the
> differences, there's no need to save the state that can cause these
> differences.
> 
So suppose I have simple watchdog device that required to be poked every
second, otherwise it resets a computer. On migration we have to migrate
time elapsed since last poke, but if device doesn't expose it to
software in any way you are saying we can recreate is some other way?

--
			Gleb.

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [Qemu-devel] Re: [PATCH v2 3/9] provide in-kernel ioapic
@ 2009-10-08 16:34                     ` Gleb Natapov
  0 siblings, 0 replies; 104+ messages in thread
From: Gleb Natapov @ 2009-10-08 16:34 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Anthony Liguori, Glauber Costa, kvm-devel, qemu-devel

On Thu, Oct 08, 2009 at 06:29:53PM +0200, Avi Kivity wrote:
> On 10/08/2009 06:22 PM, Gleb Natapov wrote:
> >On Thu, Oct 08, 2009 at 06:17:57PM +0200, Avi Kivity wrote:
> >>On 10/08/2009 06:07 PM, Jamie Lokier wrote:
> >>>Haven't we already confirmed that it *isn't* just an ioapic accelerator
> >>>because you can't migrate between in-kernel iopic and qemu's ioapic?
> >>We haven't confirmed it.  Both implement the same spec, and if you
> >>can't migrate between them, one of them has a bug (for example, qemu
> >>ioapic doesn't implement polarity - but it's still just a bug).
> >>
> >Are you saying that HW spec (that only describes software visible behavior)
> >completely defines implementation? No other internal state is needed
> >that may be done differently by different implementations?
> 
> It may be done differently (for example, selecting the cpu to
> deliver the interrupt to), but as the guest cannot rely on the
> differences, there's no need to save the state that can cause these
> differences.
> 
So suppose I have simple watchdog device that required to be poked every
second, otherwise it resets a computer. On migration we have to migrate
time elapsed since last poke, but if device doesn't expose it to
software in any way you are saying we can recreate is some other way?

--
			Gleb.

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [Qemu-devel] Re: [PATCH v2 3/9] provide in-kernel ioapic
  2009-10-08 16:34                     ` Gleb Natapov
@ 2009-10-08 16:42                       ` Avi Kivity
  -1 siblings, 0 replies; 104+ messages in thread
From: Avi Kivity @ 2009-10-08 16:42 UTC (permalink / raw)
  To: Gleb Natapov
  Cc: Jamie Lokier, Anthony Liguori, Glauber Costa, qemu-devel, kvm-devel

On 10/08/2009 06:34 PM, Gleb Natapov wrote:
> So suppose I have simple watchdog device that required to be poked every
> second, otherwise it resets a computer. On migration we have to migrate
> time elapsed since last poke, but if device doesn't expose it to
> software in any way you are saying we can recreate is some other way?
>    

The time is exposed (you can measure it by poking the device and 
measuring the time till reset) and will be described in the spec 
(otherwise users will be surprised when their machine resets).

You don't have to migrate all exposed state, just exposed state that the 
guest may rely on.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [Qemu-devel] Re: [PATCH v2 3/9] provide in-kernel ioapic
@ 2009-10-08 16:42                       ` Avi Kivity
  0 siblings, 0 replies; 104+ messages in thread
From: Avi Kivity @ 2009-10-08 16:42 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: Anthony Liguori, Glauber Costa, kvm-devel, qemu-devel

On 10/08/2009 06:34 PM, Gleb Natapov wrote:
> So suppose I have simple watchdog device that required to be poked every
> second, otherwise it resets a computer. On migration we have to migrate
> time elapsed since last poke, but if device doesn't expose it to
> software in any way you are saying we can recreate is some other way?
>    

The time is exposed (you can measure it by poking the device and 
measuring the time till reset) and will be described in the spec 
(otherwise users will be surprised when their machine resets).

You don't have to migrate all exposed state, just exposed state that the 
guest may rely on.

-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [Qemu-devel] Re: [PATCH v2 3/9] provide in-kernel ioapic
  2009-10-08 16:42                       ` Avi Kivity
@ 2009-10-08 17:11                         ` Gleb Natapov
  -1 siblings, 0 replies; 104+ messages in thread
From: Gleb Natapov @ 2009-10-08 17:11 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Jamie Lokier, Anthony Liguori, Glauber Costa, qemu-devel, kvm-devel

On Thu, Oct 08, 2009 at 06:42:07PM +0200, Avi Kivity wrote:
> On 10/08/2009 06:34 PM, Gleb Natapov wrote:
> >So suppose I have simple watchdog device that required to be poked every
> >second, otherwise it resets a computer. On migration we have to migrate
> >time elapsed since last poke, but if device doesn't expose it to
> >software in any way you are saying we can recreate is some other way?
> 
> The time is exposed (you can measure it by poking the device and
The time yes, not its internal representation. What if one implementation 
stores how much time passed and another how much time left. One counts
in wall clack another only when guest runs. etc... and this is a device
with only one write only register.

> measuring the time till reset) and will be described in the spec
> (otherwise users will be surprised when their machine resets).
> 
> You don't have to migrate all exposed state, just exposed state that
> the guest may rely on.

--
			Gleb.

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [Qemu-devel] Re: [PATCH v2 3/9] provide in-kernel ioapic
@ 2009-10-08 17:11                         ` Gleb Natapov
  0 siblings, 0 replies; 104+ messages in thread
From: Gleb Natapov @ 2009-10-08 17:11 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Anthony Liguori, Glauber Costa, kvm-devel, qemu-devel

On Thu, Oct 08, 2009 at 06:42:07PM +0200, Avi Kivity wrote:
> On 10/08/2009 06:34 PM, Gleb Natapov wrote:
> >So suppose I have simple watchdog device that required to be poked every
> >second, otherwise it resets a computer. On migration we have to migrate
> >time elapsed since last poke, but if device doesn't expose it to
> >software in any way you are saying we can recreate is some other way?
> 
> The time is exposed (you can measure it by poking the device and
The time yes, not its internal representation. What if one implementation 
stores how much time passed and another how much time left. One counts
in wall clack another only when guest runs. etc... and this is a device
with only one write only register.

> measuring the time till reset) and will be described in the spec
> (otherwise users will be surprised when their machine resets).
> 
> You don't have to migrate all exposed state, just exposed state that
> the guest may rely on.

--
			Gleb.

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: Re: [PATCH v2 3/9] provide in-kernel ioapic
  2009-10-08 17:11                         ` Gleb Natapov
@ 2009-10-09 10:02                           ` Jamie Lokier
  -1 siblings, 0 replies; 104+ messages in thread
From: Jamie Lokier @ 2009-10-09 10:02 UTC (permalink / raw)
  To: Gleb Natapov
  Cc: Anthony Liguori, Glauber Costa, Avi Kivity, kvm-devel, qemu-devel

Gleb Natapov wrote:
> On Thu, Oct 08, 2009 at 06:42:07PM +0200, Avi Kivity wrote:
> > On 10/08/2009 06:34 PM, Gleb Natapov wrote:
> > >So suppose I have simple watchdog device that required to be poked every
> > >second, otherwise it resets a computer. On migration we have to migrate
> > >time elapsed since last poke, but if device doesn't expose it to
> > >software in any way you are saying we can recreate is some other way?
> > 
> > The time is exposed (you can measure it by poking the device and
> The time yes, not its internal representation. What if one implementation 
> stores how much time passed and another how much time left.
> One counts in wall clack another only when guest runs. etc... and
> this is a device with only one write only register.

In that case you can decide between calling it two different devices
(which have the same guest-visible behaviour but are not
interchangable), or calling them different implementations of one
device - by adding a little more code to save state in a common format.

(Although they may have to be different devices for qemu
configuration, it's ok for them to have the same PCI IDs and for the
guest not to know the difference)

For your watchdog example, it's not hard to pick a saved state which
works for both.

ioapic will be harder to find a useful common saved state, and there
might need to be an *optional hint* section (e.g. for selecting the
next CPU to get an interrupt), but I think it would be worth it in
this case.  Being able to load a KVM image into TCG and vice versa is
quite useful sometimes.  E.g. I've had to do some OS installs using
TCG at first, then switch to KVM later for performance.

-- Jamie

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [Qemu-devel] Re: [PATCH v2 3/9] provide in-kernel ioapic
@ 2009-10-09 10:02                           ` Jamie Lokier
  0 siblings, 0 replies; 104+ messages in thread
From: Jamie Lokier @ 2009-10-09 10:02 UTC (permalink / raw)
  To: Gleb Natapov
  Cc: Anthony Liguori, Glauber Costa, Avi Kivity, kvm-devel, qemu-devel

Gleb Natapov wrote:
> On Thu, Oct 08, 2009 at 06:42:07PM +0200, Avi Kivity wrote:
> > On 10/08/2009 06:34 PM, Gleb Natapov wrote:
> > >So suppose I have simple watchdog device that required to be poked every
> > >second, otherwise it resets a computer. On migration we have to migrate
> > >time elapsed since last poke, but if device doesn't expose it to
> > >software in any way you are saying we can recreate is some other way?
> > 
> > The time is exposed (you can measure it by poking the device and
> The time yes, not its internal representation. What if one implementation 
> stores how much time passed and another how much time left.
> One counts in wall clack another only when guest runs. etc... and
> this is a device with only one write only register.

In that case you can decide between calling it two different devices
(which have the same guest-visible behaviour but are not
interchangable), or calling them different implementations of one
device - by adding a little more code to save state in a common format.

(Although they may have to be different devices for qemu
configuration, it's ok for them to have the same PCI IDs and for the
guest not to know the difference)

For your watchdog example, it's not hard to pick a saved state which
works for both.

ioapic will be harder to find a useful common saved state, and there
might need to be an *optional hint* section (e.g. for selecting the
next CPU to get an interrupt), but I think it would be worth it in
this case.  Being able to load a KVM image into TCG and vice versa is
quite useful sometimes.  E.g. I've had to do some OS installs using
TCG at first, then switch to KVM later for performance.

-- Jamie

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: Re: [PATCH v2 4/9] provide in-kernel apic
  2009-10-08 14:22             ` Glauber Costa
@ 2009-10-09 10:06                 ` Jamie Lokier
  0 siblings, 0 replies; 104+ messages in thread
From: Jamie Lokier @ 2009-10-09 10:06 UTC (permalink / raw)
  To: Glauber Costa; +Cc: Anthony Liguori, Avi Kivity, kvm-devel, qemu-devel

Glauber Costa wrote:
> > It ensures the two models are compatible.  Since they're the same device  
> > from the point of view of the guest, there's no reason for them to have  
> > different representations or to be incompatible.
> 
> live migration between something that has in-kernel irqchip and
> something that has not is likely to be a completely untested
> thing. And this is the only reason we might think of it as the same
> device. I don't see any value in supporting this combination

Not just live migration.  ACPI sleep + savevm + loadvm + ACPI resume,
for example.

-- Jamie

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [Qemu-devel] Re: [PATCH v2 4/9] provide in-kernel apic
@ 2009-10-09 10:06                 ` Jamie Lokier
  0 siblings, 0 replies; 104+ messages in thread
From: Jamie Lokier @ 2009-10-09 10:06 UTC (permalink / raw)
  To: Glauber Costa; +Cc: Anthony Liguori, Avi Kivity, kvm-devel, qemu-devel

Glauber Costa wrote:
> > It ensures the two models are compatible.  Since they're the same device  
> > from the point of view of the guest, there's no reason for them to have  
> > different representations or to be incompatible.
> 
> live migration between something that has in-kernel irqchip and
> something that has not is likely to be a completely untested
> thing. And this is the only reason we might think of it as the same
> device. I don't see any value in supporting this combination

Not just live migration.  ACPI sleep + savevm + loadvm + ACPI resume,
for example.

-- Jamie

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [Qemu-devel] Re: [PATCH v2 3/9] provide in-kernel ioapic
  2009-10-09 10:02                           ` [Qemu-devel] " Jamie Lokier
@ 2009-10-09 12:02                             ` Gleb Natapov
  -1 siblings, 0 replies; 104+ messages in thread
From: Gleb Natapov @ 2009-10-09 12:02 UTC (permalink / raw)
  To: Jamie Lokier
  Cc: Avi Kivity, Anthony Liguori, Glauber Costa, qemu-devel, kvm-devel

On Fri, Oct 09, 2009 at 11:02:31AM +0100, Jamie Lokier wrote:
> Gleb Natapov wrote:
> > On Thu, Oct 08, 2009 at 06:42:07PM +0200, Avi Kivity wrote:
> > > On 10/08/2009 06:34 PM, Gleb Natapov wrote:
> > > >So suppose I have simple watchdog device that required to be poked every
> > > >second, otherwise it resets a computer. On migration we have to migrate
> > > >time elapsed since last poke, but if device doesn't expose it to
> > > >software in any way you are saying we can recreate is some other way?
> > > 
> > > The time is exposed (you can measure it by poking the device and
> > The time yes, not its internal representation. What if one implementation 
> > stores how much time passed and another how much time left.
> > One counts in wall clack another only when guest runs. etc... and
> > this is a device with only one write only register.
> 
> In that case you can decide between calling it two different devices
> (which have the same guest-visible behaviour but are not
> interchangable), or calling them different implementations of one
> device - by adding a little more code to save state in a common format.
> 
That is what currency done for in-kernel/out-of-kernel irq chips. Save
state transformation. The problem begins if one of the devices has more
state (not just the same state but in a different format). You need to
drop info on migration.

> (Although they may have to be different devices for qemu
> configuration, it's ok for them to have the same PCI IDs and for the
> guest not to know the difference)
> 
> For your watchdog example, it's not hard to pick a saved state which
> works for both.
If you can't migrate from one to the other why even bother? In my
example if one device counts wallclock time and another guest cpu time
you can't migrate from one implementation to another.

> 
> ioapic will be harder to find a useful common saved state, and there
> might need to be an *optional hint* section (e.g. for selecting the
> next CPU to get an interrupt), but I think it would be worth it in
> this case.  Being able to load a KVM image into TCG and vice versa is
> quite useful sometimes.  E.g. I've had to do some OS installs using
> TCG at first, then switch to KVM later for performance.
> 
Reboot :)

--
			Gleb.

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [Qemu-devel] Re: [PATCH v2 3/9] provide in-kernel ioapic
@ 2009-10-09 12:02                             ` Gleb Natapov
  0 siblings, 0 replies; 104+ messages in thread
From: Gleb Natapov @ 2009-10-09 12:02 UTC (permalink / raw)
  To: Jamie Lokier
  Cc: Anthony Liguori, Glauber Costa, Avi Kivity, kvm-devel, qemu-devel

On Fri, Oct 09, 2009 at 11:02:31AM +0100, Jamie Lokier wrote:
> Gleb Natapov wrote:
> > On Thu, Oct 08, 2009 at 06:42:07PM +0200, Avi Kivity wrote:
> > > On 10/08/2009 06:34 PM, Gleb Natapov wrote:
> > > >So suppose I have simple watchdog device that required to be poked every
> > > >second, otherwise it resets a computer. On migration we have to migrate
> > > >time elapsed since last poke, but if device doesn't expose it to
> > > >software in any way you are saying we can recreate is some other way?
> > > 
> > > The time is exposed (you can measure it by poking the device and
> > The time yes, not its internal representation. What if one implementation 
> > stores how much time passed and another how much time left.
> > One counts in wall clack another only when guest runs. etc... and
> > this is a device with only one write only register.
> 
> In that case you can decide between calling it two different devices
> (which have the same guest-visible behaviour but are not
> interchangable), or calling them different implementations of one
> device - by adding a little more code to save state in a common format.
> 
That is what currency done for in-kernel/out-of-kernel irq chips. Save
state transformation. The problem begins if one of the devices has more
state (not just the same state but in a different format). You need to
drop info on migration.

> (Although they may have to be different devices for qemu
> configuration, it's ok for them to have the same PCI IDs and for the
> guest not to know the difference)
> 
> For your watchdog example, it's not hard to pick a saved state which
> works for both.
If you can't migrate from one to the other why even bother? In my
example if one device counts wallclock time and another guest cpu time
you can't migrate from one implementation to another.

> 
> ioapic will be harder to find a useful common saved state, and there
> might need to be an *optional hint* section (e.g. for selecting the
> next CPU to get an interrupt), but I think it would be worth it in
> this case.  Being able to load a KVM image into TCG and vice versa is
> quite useful sometimes.  E.g. I've had to do some OS installs using
> TCG at first, then switch to KVM later for performance.
> 
Reboot :)

--
			Gleb.

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: Re: [PATCH v2 4/9] provide in-kernel apic
  2009-10-09 10:06                 ` [Qemu-devel] " Jamie Lokier
@ 2009-10-09 14:30                   ` Glauber Costa
  -1 siblings, 0 replies; 104+ messages in thread
From: Glauber Costa @ 2009-10-09 14:30 UTC (permalink / raw)
  To: Jamie Lokier; +Cc: Anthony Liguori, Avi Kivity, kvm-devel, qemu-devel

On Fri, Oct 09, 2009 at 11:06:41AM +0100, Jamie Lokier wrote:
> Glauber Costa wrote:
> > > It ensures the two models are compatible.  Since they're the same device  
> > > from the point of view of the guest, there's no reason for them to have  
> > > different representations or to be incompatible.
> > 
> > live migration between something that has in-kernel irqchip and
> > something that has not is likely to be a completely untested
> > thing. And this is the only reason we might think of it as the same
> > device. I don't see any value in supporting this combination
> 
> Not just live migration.  ACPI sleep + savevm + loadvm + ACPI resume,
> for example.
Yes, but in this case too, I'd expect the irqchipness of qemu not to change.

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [Qemu-devel] Re: [PATCH v2 4/9] provide in-kernel apic
@ 2009-10-09 14:30                   ` Glauber Costa
  0 siblings, 0 replies; 104+ messages in thread
From: Glauber Costa @ 2009-10-09 14:30 UTC (permalink / raw)
  To: Jamie Lokier; +Cc: Anthony Liguori, Avi Kivity, kvm-devel, qemu-devel

On Fri, Oct 09, 2009 at 11:06:41AM +0100, Jamie Lokier wrote:
> Glauber Costa wrote:
> > > It ensures the two models are compatible.  Since they're the same device  
> > > from the point of view of the guest, there's no reason for them to have  
> > > different representations or to be incompatible.
> > 
> > live migration between something that has in-kernel irqchip and
> > something that has not is likely to be a completely untested
> > thing. And this is the only reason we might think of it as the same
> > device. I don't see any value in supporting this combination
> 
> Not just live migration.  ACPI sleep + savevm + loadvm + ACPI resume,
> for example.
Yes, but in this case too, I'd expect the irqchipness of qemu not to change.

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: Re: [PATCH v2 3/9] provide in-kernel ioapic
  2009-10-08 16:22                 ` Gleb Natapov
@ 2009-10-09 14:32                   ` Glauber Costa
  -1 siblings, 0 replies; 104+ messages in thread
From: Glauber Costa @ 2009-10-09 14:32 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: Anthony Liguori, Avi Kivity, kvm-devel, qemu-devel

On Thu, Oct 08, 2009 at 06:22:48PM +0200, Gleb Natapov wrote:
> On Thu, Oct 08, 2009 at 06:17:57PM +0200, Avi Kivity wrote:
> > On 10/08/2009 06:07 PM, Jamie Lokier wrote:
> > >Haven't we already confirmed that it *isn't* just an ioapic accelerator
> > >because you can't migrate between in-kernel iopic and qemu's ioapic?
> > 
> > We haven't confirmed it.  Both implement the same spec, and if you
> > can't migrate between them, one of them has a bug (for example, qemu
> > ioapic doesn't implement polarity - but it's still just a bug).
> > 
> Are you saying that HW spec (that only describes software visible behavior)
> completely defines implementation? No other internal state is needed
> that may be done differently by different implementations?
Most specifications leaves a lot as implementation specific.

It's not hard to imagine a case in which both devices will follow the spec correctly,
(no bugs involved), and yet differ in the implementation.

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [Qemu-devel] Re: [PATCH v2 3/9] provide in-kernel ioapic
@ 2009-10-09 14:32                   ` Glauber Costa
  0 siblings, 0 replies; 104+ messages in thread
From: Glauber Costa @ 2009-10-09 14:32 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: Anthony Liguori, Avi Kivity, kvm-devel, qemu-devel

On Thu, Oct 08, 2009 at 06:22:48PM +0200, Gleb Natapov wrote:
> On Thu, Oct 08, 2009 at 06:17:57PM +0200, Avi Kivity wrote:
> > On 10/08/2009 06:07 PM, Jamie Lokier wrote:
> > >Haven't we already confirmed that it *isn't* just an ioapic accelerator
> > >because you can't migrate between in-kernel iopic and qemu's ioapic?
> > 
> > We haven't confirmed it.  Both implement the same spec, and if you
> > can't migrate between them, one of them has a bug (for example, qemu
> > ioapic doesn't implement polarity - but it's still just a bug).
> > 
> Are you saying that HW spec (that only describes software visible behavior)
> completely defines implementation? No other internal state is needed
> that may be done differently by different implementations?
Most specifications leaves a lot as implementation specific.

It's not hard to imagine a case in which both devices will follow the spec correctly,
(no bugs involved), and yet differ in the implementation.

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [Qemu-devel] Re: [PATCH v2 4/9] provide in-kernel apic
  2009-10-09 14:30                   ` [Qemu-devel] " Glauber Costa
@ 2009-10-09 16:48                     ` Jamie Lokier
  -1 siblings, 0 replies; 104+ messages in thread
From: Jamie Lokier @ 2009-10-09 16:48 UTC (permalink / raw)
  To: Glauber Costa; +Cc: Avi Kivity, Anthony Liguori, qemu-devel, kvm-devel

Glauber Costa wrote:
> On Fri, Oct 09, 2009 at 11:06:41AM +0100, Jamie Lokier wrote:
> > Glauber Costa wrote:
> > > > It ensures the two models are compatible.  Since they're the same device  
> > > > from the point of view of the guest, there's no reason for them to have  
> > > > different representations or to be incompatible.
> > > 
> > > live migration between something that has in-kernel irqchip and
> > > something that has not is likely to be a completely untested
> > > thing. And this is the only reason we might think of it as the same
> > > device. I don't see any value in supporting this combination
> > 
> > Not just live migration.  ACPI sleep + savevm + loadvm + ACPI resume,
> > for example.
> Yes, but in this case too, I'd expect the irqchipness of qemu not to change.

If I've just been sent an image produced by someone running KVM, and
my machine is not KVM-capable, or I cannot upgrade the KVM kernel
module because it's in use by other VMs (had this problem a few
times), there's no choice but to change the irqchipness.

-- Jamie

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [Qemu-devel] Re: [PATCH v2 4/9] provide in-kernel apic
@ 2009-10-09 16:48                     ` Jamie Lokier
  0 siblings, 0 replies; 104+ messages in thread
From: Jamie Lokier @ 2009-10-09 16:48 UTC (permalink / raw)
  To: Glauber Costa; +Cc: Anthony Liguori, Avi Kivity, kvm-devel, qemu-devel

Glauber Costa wrote:
> On Fri, Oct 09, 2009 at 11:06:41AM +0100, Jamie Lokier wrote:
> > Glauber Costa wrote:
> > > > It ensures the two models are compatible.  Since they're the same device  
> > > > from the point of view of the guest, there's no reason for them to have  
> > > > different representations or to be incompatible.
> > > 
> > > live migration between something that has in-kernel irqchip and
> > > something that has not is likely to be a completely untested
> > > thing. And this is the only reason we might think of it as the same
> > > device. I don't see any value in supporting this combination
> > 
> > Not just live migration.  ACPI sleep + savevm + loadvm + ACPI resume,
> > for example.
> Yes, but in this case too, I'd expect the irqchipness of qemu not to change.

If I've just been sent an image produced by someone running KVM, and
my machine is not KVM-capable, or I cannot upgrade the KVM kernel
module because it's in use by other VMs (had this problem a few
times), there's no choice but to change the irqchipness.

-- Jamie

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [Qemu-devel] Re: [PATCH v2 3/9] provide in-kernel ioapic
  2009-10-09 14:32                   ` [Qemu-devel] " Glauber Costa
@ 2009-10-09 16:49                     ` Jamie Lokier
  -1 siblings, 0 replies; 104+ messages in thread
From: Jamie Lokier @ 2009-10-09 16:49 UTC (permalink / raw)
  To: Glauber Costa
  Cc: Gleb Natapov, Avi Kivity, Anthony Liguori, qemu-devel, kvm-devel

Glauber Costa wrote:
> On Thu, Oct 08, 2009 at 06:22:48PM +0200, Gleb Natapov wrote:
> > On Thu, Oct 08, 2009 at 06:17:57PM +0200, Avi Kivity wrote:
> > > On 10/08/2009 06:07 PM, Jamie Lokier wrote:
> > > >Haven't we already confirmed that it *isn't* just an ioapic accelerator
> > > >because you can't migrate between in-kernel iopic and qemu's ioapic?
> > > 
> > > We haven't confirmed it.  Both implement the same spec, and if you
> > > can't migrate between them, one of them has a bug (for example, qemu
> > > ioapic doesn't implement polarity - but it's still just a bug).
> > > 
> > Are you saying that HW spec (that only describes software visible behavior)
> > completely defines implementation? No other internal state is needed
> > that may be done differently by different implementations?
> Most specifications leaves a lot as implementation specific.
> 
> It's not hard to imagine a case in which both devices will follow
> the spec correctly, (no bugs involved), and yet differ in the
> implementation.

Avi's not saying the implementations won't differ.  I believe he's
saying that implementation-specific states don't need to be saved if
they have no effect on guest visible behaviour.

-- Jamie


^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [Qemu-devel] Re: [PATCH v2 3/9] provide in-kernel ioapic
@ 2009-10-09 16:49                     ` Jamie Lokier
  0 siblings, 0 replies; 104+ messages in thread
From: Jamie Lokier @ 2009-10-09 16:49 UTC (permalink / raw)
  To: Glauber Costa
  Cc: kvm-devel, Anthony Liguori, Avi Kivity, Gleb Natapov, qemu-devel

Glauber Costa wrote:
> On Thu, Oct 08, 2009 at 06:22:48PM +0200, Gleb Natapov wrote:
> > On Thu, Oct 08, 2009 at 06:17:57PM +0200, Avi Kivity wrote:
> > > On 10/08/2009 06:07 PM, Jamie Lokier wrote:
> > > >Haven't we already confirmed that it *isn't* just an ioapic accelerator
> > > >because you can't migrate between in-kernel iopic and qemu's ioapic?
> > > 
> > > We haven't confirmed it.  Both implement the same spec, and if you
> > > can't migrate between them, one of them has a bug (for example, qemu
> > > ioapic doesn't implement polarity - but it's still just a bug).
> > > 
> > Are you saying that HW spec (that only describes software visible behavior)
> > completely defines implementation? No other internal state is needed
> > that may be done differently by different implementations?
> Most specifications leaves a lot as implementation specific.
> 
> It's not hard to imagine a case in which both devices will follow
> the spec correctly, (no bugs involved), and yet differ in the
> implementation.

Avi's not saying the implementations won't differ.  I believe he's
saying that implementation-specific states don't need to be saved if
they have no effect on guest visible behaviour.

-- Jamie

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: Re: [PATCH v2 4/9] provide in-kernel apic
  2009-10-09 16:48                     ` Jamie Lokier
@ 2009-10-09 18:06                       ` Glauber Costa
  -1 siblings, 0 replies; 104+ messages in thread
From: Glauber Costa @ 2009-10-09 18:06 UTC (permalink / raw)
  To: Jamie Lokier; +Cc: Anthony Liguori, Avi Kivity, kvm-devel, qemu-devel

On Fri, Oct 09, 2009 at 05:48:31PM +0100, Jamie Lokier wrote:
> Glauber Costa wrote:
> > On Fri, Oct 09, 2009 at 11:06:41AM +0100, Jamie Lokier wrote:
> > > Glauber Costa wrote:
> > > > > It ensures the two models are compatible.  Since they're the same device  
> > > > > from the point of view of the guest, there's no reason for them to have  
> > > > > different representations or to be incompatible.
> > > > 
> > > > live migration between something that has in-kernel irqchip and
> > > > something that has not is likely to be a completely untested
> > > > thing. And this is the only reason we might think of it as the same
> > > > device. I don't see any value in supporting this combination
> > > 
> > > Not just live migration.  ACPI sleep + savevm + loadvm + ACPI resume,
> > > for example.
> > Yes, but in this case too, I'd expect the irqchipness of qemu not to change.
> 
> If I've just been sent an image produced by someone running KVM, and
> my machine is not KVM-capable, or I cannot upgrade the KVM kernel
> module because it's in use by other VMs (had this problem a few
> times), there's no choice but to change the irqchipness.
As gleb mentioned, requiring such a change to happen offline (across a reboot)
is not that much of a pain.

There are thousands of scenarios in which it will have to happen anyway,
including major bumps in qemu version. 

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [Qemu-devel] Re: [PATCH v2 4/9] provide in-kernel apic
@ 2009-10-09 18:06                       ` Glauber Costa
  0 siblings, 0 replies; 104+ messages in thread
From: Glauber Costa @ 2009-10-09 18:06 UTC (permalink / raw)
  To: Jamie Lokier; +Cc: Anthony Liguori, Avi Kivity, kvm-devel, qemu-devel

On Fri, Oct 09, 2009 at 05:48:31PM +0100, Jamie Lokier wrote:
> Glauber Costa wrote:
> > On Fri, Oct 09, 2009 at 11:06:41AM +0100, Jamie Lokier wrote:
> > > Glauber Costa wrote:
> > > > > It ensures the two models are compatible.  Since they're the same device  
> > > > > from the point of view of the guest, there's no reason for them to have  
> > > > > different representations or to be incompatible.
> > > > 
> > > > live migration between something that has in-kernel irqchip and
> > > > something that has not is likely to be a completely untested
> > > > thing. And this is the only reason we might think of it as the same
> > > > device. I don't see any value in supporting this combination
> > > 
> > > Not just live migration.  ACPI sleep + savevm + loadvm + ACPI resume,
> > > for example.
> > Yes, but in this case too, I'd expect the irqchipness of qemu not to change.
> 
> If I've just been sent an image produced by someone running KVM, and
> my machine is not KVM-capable, or I cannot upgrade the KVM kernel
> module because it's in use by other VMs (had this problem a few
> times), there's no choice but to change the irqchipness.
As gleb mentioned, requiring such a change to happen offline (across a reboot)
is not that much of a pain.

There are thousands of scenarios in which it will have to happen anyway,
including major bumps in qemu version. 

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [Qemu-devel] Re: [PATCH v2 4/9] provide in-kernel apic
  2009-10-09 16:48                     ` Jamie Lokier
@ 2009-10-09 19:49                       ` Anthony Liguori
  -1 siblings, 0 replies; 104+ messages in thread
From: Anthony Liguori @ 2009-10-09 19:49 UTC (permalink / raw)
  To: Jamie Lokier; +Cc: Glauber Costa, Avi Kivity, qemu-devel, kvm-devel

Jamie Lokier wrote:
> Glauber Costa wrote:
>   
>> On Fri, Oct 09, 2009 at 11:06:41AM +0100, Jamie Lokier wrote:
>>     
>>> Glauber Costa wrote:
>>>       
>>>>> It ensures the two models are compatible.  Since they're the same device  
>>>>> from the point of view of the guest, there's no reason for them to have  
>>>>> different representations or to be incompatible.
>>>>>           
>>>> live migration between something that has in-kernel irqchip and
>>>> something that has not is likely to be a completely untested
>>>> thing. And this is the only reason we might think of it as the same
>>>> device. I don't see any value in supporting this combination
>>>>         
>>> Not just live migration.  ACPI sleep + savevm + loadvm + ACPI resume,
>>> for example.
>>>       
>> Yes, but in this case too, I'd expect the irqchipness of qemu not to change.
>>     
>
> If I've just been sent an image produced by someone running KVM, and
> my machine is not KVM-capable, or I cannot upgrade the KVM kernel
> module because it's in use by other VMs (had this problem a few
> times), there's no choice but to change the irqchipness.
>   

You cannot migrate from KVM to TCG so this use-case is irrelevant.


-- 
Regards,

Anthony Liguori


^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [Qemu-devel] Re: [PATCH v2 4/9] provide in-kernel apic
@ 2009-10-09 19:49                       ` Anthony Liguori
  0 siblings, 0 replies; 104+ messages in thread
From: Anthony Liguori @ 2009-10-09 19:49 UTC (permalink / raw)
  To: Jamie Lokier; +Cc: Glauber Costa, Avi Kivity, kvm-devel, qemu-devel

Jamie Lokier wrote:
> Glauber Costa wrote:
>   
>> On Fri, Oct 09, 2009 at 11:06:41AM +0100, Jamie Lokier wrote:
>>     
>>> Glauber Costa wrote:
>>>       
>>>>> It ensures the two models are compatible.  Since they're the same device  
>>>>> from the point of view of the guest, there's no reason for them to have  
>>>>> different representations or to be incompatible.
>>>>>           
>>>> live migration between something that has in-kernel irqchip and
>>>> something that has not is likely to be a completely untested
>>>> thing. And this is the only reason we might think of it as the same
>>>> device. I don't see any value in supporting this combination
>>>>         
>>> Not just live migration.  ACPI sleep + savevm + loadvm + ACPI resume,
>>> for example.
>>>       
>> Yes, but in this case too, I'd expect the irqchipness of qemu not to change.
>>     
>
> If I've just been sent an image produced by someone running KVM, and
> my machine is not KVM-capable, or I cannot upgrade the KVM kernel
> module because it's in use by other VMs (had this problem a few
> times), there's no choice but to change the irqchipness.
>   

You cannot migrate from KVM to TCG so this use-case is irrelevant.


-- 
Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH v2 3/9] provide in-kernel ioapic
  2009-10-09 16:49                     ` Jamie Lokier
  (?)
@ 2009-10-09 19:55                     ` Juan Quintela
  2009-10-09 21:34                         ` [Qemu-devel] " Glauber Costa
  2009-10-12 13:20                         ` [Qemu-devel] " Anthony Liguori
  -1 siblings, 2 replies; 104+ messages in thread
From: Juan Quintela @ 2009-10-09 19:55 UTC (permalink / raw)
  To: Jamie Lokier
  Cc: Glauber Costa, kvm-devel, Anthony Liguori, Avi Kivity,
	Gleb Natapov, qemu-devel

Jamie Lokier <jamie@shareable.org> wrote:
> Glauber Costa wrote:
>> On Thu, Oct 08, 2009 at 06:22:48PM +0200, Gleb Natapov wrote:
>> > On Thu, Oct 08, 2009 at 06:17:57PM +0200, Avi Kivity wrote:
>> > > On 10/08/2009 06:07 PM, Jamie Lokier wrote:
>> > > >Haven't we already confirmed that it *isn't* just an ioapic accelerator
>> > > >because you can't migrate between in-kernel iopic and qemu's ioapic?
>> > > 
>> > > We haven't confirmed it.  Both implement the same spec, and if you
>> > > can't migrate between them, one of them has a bug (for example, qemu
>> > > ioapic doesn't implement polarity - but it's still just a bug).
>> > > 
>> > Are you saying that HW spec (that only describes software visible behavior)
>> > completely defines implementation? No other internal state is needed
>> > that may be done differently by different implementations?
>> Most specifications leaves a lot as implementation specific.
>> 
>> It's not hard to imagine a case in which both devices will follow
>> the spec correctly, (no bugs involved), and yet differ in the
>> implementation.
>
> Avi's not saying the implementations won't differ.  I believe he's
> saying that implementation-specific states don't need to be saved if
> they have no effect on guest visible behaviour.

Just to re-state.  I would also prefer to have a single device.  Reasons
(majority already told in the thread):
- We can switch between devices more easily
- They are emulating the same device.
- At the moment that you have two different devices, one of them will
  rot :(
- Adding state to the save/load format that is used only from one device
  is not a problem.

I notice that discussion is going nowhere, basically we are in the
state:
- people that want one device
  * they emulate the same hardware
  * lots of code is shared
  * they should be interchageable
  * if they are not interchageable, it is a bug
  * once that they are split, it is basically imposible to join then
    again.
- people that want 2 devices:
  * The devices can more easily diverge if they are two devices
  * They are not interchageable now
  * It allows you more freedom in changing any of them if they are
    separate.

As you can see, none of the proposals is a clear winner.  And what is
worse, we have the two maintainers (avi and anthony), the two with more
experience having to deal with this kind of situation disagreeing.

How to fix the impass?

Later, Juan.

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH v2 3/9] provide in-kernel ioapic
  2009-10-09 19:55                     ` Juan Quintela
@ 2009-10-09 21:34                         ` Glauber Costa
  2009-10-12 13:20                         ` [Qemu-devel] " Anthony Liguori
  1 sibling, 0 replies; 104+ messages in thread
From: Glauber Costa @ 2009-10-09 21:34 UTC (permalink / raw)
  To: Juan Quintela
  Cc: Anthony Liguori, Gleb Natapov, kvm-devel, qemu-devel, Avi Kivity

On Fri, Oct 09, 2009 at 09:55:13PM +0200, Juan Quintela wrote:
> Jamie Lokier <jamie@shareable.org> wrote:
> > Glauber Costa wrote:
> >> On Thu, Oct 08, 2009 at 06:22:48PM +0200, Gleb Natapov wrote:
> >> > On Thu, Oct 08, 2009 at 06:17:57PM +0200, Avi Kivity wrote:
> >> > > On 10/08/2009 06:07 PM, Jamie Lokier wrote:
> >> > > >Haven't we already confirmed that it *isn't* just an ioapic accelerator
> >> > > >because you can't migrate between in-kernel iopic and qemu's ioapic?
> >> > > 
> >> > > We haven't confirmed it.  Both implement the same spec, and if you
> >> > > can't migrate between them, one of them has a bug (for example, qemu
> >> > > ioapic doesn't implement polarity - but it's still just a bug).
> >> > > 
> >> > Are you saying that HW spec (that only describes software visible behavior)
> >> > completely defines implementation? No other internal state is needed
> >> > that may be done differently by different implementations?
> >> Most specifications leaves a lot as implementation specific.
> >> 
> >> It's not hard to imagine a case in which both devices will follow
> >> the spec correctly, (no bugs involved), and yet differ in the
> >> implementation.
> >
> > Avi's not saying the implementations won't differ.  I believe he's
> > saying that implementation-specific states don't need to be saved if
> > they have no effect on guest visible behaviour.
> 
> Just to re-state.  I would also prefer to have a single device.  Reasons
> (majority already told in the thread):
> - We can switch between devices more easily
> - They are emulating the same device.
> - At the moment that you have two different devices, one of them will
>   rot :(
> - Adding state to the save/load format that is used only from one device
>   is not a problem.
> 
> I notice that discussion is going nowhere, basically we are in the
> state:
> - people that want one device
>   * they emulate the same hardware
>   * lots of code is shared
>   * they should be interchageable
>   * if they are not interchageable, it is a bug
>   * once that they are split, it is basically imposible to join then
>     again.
> - people that want 2 devices:
>   * The devices can more easily diverge if they are two devices
>   * They are not interchageable now
>   * It allows you more freedom in changing any of them if they are
>     separate.
> 
> As you can see, none of the proposals is a clear winner.  And what is
> worse, we have the two maintainers (avi and anthony), the two with more
> experience having to deal with this kind of situation disagreeing.
> 
> How to fix the impass?
a deathmatch?

^ permalink raw reply	[flat|nested] 104+ messages in thread

* [Qemu-devel] Re: [PATCH v2 3/9] provide in-kernel ioapic
@ 2009-10-09 21:34                         ` Glauber Costa
  0 siblings, 0 replies; 104+ messages in thread
From: Glauber Costa @ 2009-10-09 21:34 UTC (permalink / raw)
  To: Juan Quintela
  Cc: Anthony Liguori, Gleb Natapov, kvm-devel, qemu-devel, Avi Kivity

On Fri, Oct 09, 2009 at 09:55:13PM +0200, Juan Quintela wrote:
> Jamie Lokier <jamie@shareable.org> wrote:
> > Glauber Costa wrote:
> >> On Thu, Oct 08, 2009 at 06:22:48PM +0200, Gleb Natapov wrote:
> >> > On Thu, Oct 08, 2009 at 06:17:57PM +0200, Avi Kivity wrote:
> >> > > On 10/08/2009 06:07 PM, Jamie Lokier wrote:
> >> > > >Haven't we already confirmed that it *isn't* just an ioapic accelerator
> >> > > >because you can't migrate between in-kernel iopic and qemu's ioapic?
> >> > > 
> >> > > We haven't confirmed it.  Both implement the same spec, and if you
> >> > > can't migrate between them, one of them has a bug (for example, qemu
> >> > > ioapic doesn't implement polarity - but it's still just a bug).
> >> > > 
> >> > Are you saying that HW spec (that only describes software visible behavior)
> >> > completely defines implementation? No other internal state is needed
> >> > that may be done differently by different implementations?
> >> Most specifications leaves a lot as implementation specific.
> >> 
> >> It's not hard to imagine a case in which both devices will follow
> >> the spec correctly, (no bugs involved), and yet differ in the
> >> implementation.
> >
> > Avi's not saying the implementations won't differ.  I believe he's
> > saying that implementation-specific states don't need to be saved if
> > they have no effect on guest visible behaviour.
> 
> Just to re-state.  I would also prefer to have a single device.  Reasons
> (majority already told in the thread):
> - We can switch between devices more easily
> - They are emulating the same device.
> - At the moment that you have two different devices, one of them will
>   rot :(
> - Adding state to the save/load format that is used only from one device
>   is not a problem.
> 
> I notice that discussion is going nowhere, basically we are in the
> state:
> - people that want one device
>   * they emulate the same hardware
>   * lots of code is shared
>   * they should be interchageable
>   * if they are not interchageable, it is a bug
>   * once that they are split, it is basically imposible to join then
>     again.
> - people that want 2 devices:
>   * The devices can more easily diverge if they are two devices
>   * They are not interchageable now
>   * It allows you more freedom in changing any of them if they are
>     separate.
> 
> As you can see, none of the proposals is a clear winner.  And what is
> worse, we have the two maintainers (avi and anthony), the two with more
> experience having to deal with this kind of situation disagreeing.
> 
> How to fix the impass?
a deathmatch?

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: Re: [PATCH v2 4/9] provide in-kernel apic
  2009-10-09 19:49                       ` Anthony Liguori
@ 2009-10-11  9:10                         ` Avi Kivity
  -1 siblings, 0 replies; 104+ messages in thread
From: Avi Kivity @ 2009-10-11  9:10 UTC (permalink / raw)
  To: Anthony Liguori; +Cc: Glauber Costa, qemu-devel, kvm-devel

On 10/09/2009 09:49 PM, Anthony Liguori wrote:
>> If I've just been sent an image produced by someone running KVM, and
>> my machine is not KVM-capable, or I cannot upgrade the KVM kernel
>> module because it's in use by other VMs (had this problem a few
>> times), there's no choice but to change the irqchipness.
>
>
> You cannot migrate from KVM to TCG so this use-case is irrelevant.

I agree it's mostly irrelevant, however nothing in principle prevents 
such a migration, as long as the cpuid feature bits are implemented on 
both sides.

-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [Qemu-devel] Re: [PATCH v2 4/9] provide in-kernel apic
@ 2009-10-11  9:10                         ` Avi Kivity
  0 siblings, 0 replies; 104+ messages in thread
From: Avi Kivity @ 2009-10-11  9:10 UTC (permalink / raw)
  To: Anthony Liguori; +Cc: Glauber Costa, qemu-devel, kvm-devel

On 10/09/2009 09:49 PM, Anthony Liguori wrote:
>> If I've just been sent an image produced by someone running KVM, and
>> my machine is not KVM-capable, or I cannot upgrade the KVM kernel
>> module because it's in use by other VMs (had this problem a few
>> times), there's no choice but to change the irqchipness.
>
>
> You cannot migrate from KVM to TCG so this use-case is irrelevant.

I agree it's mostly irrelevant, however nothing in principle prevents 
such a migration, as long as the cpuid feature bits are implemented on 
both sides.

-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [Qemu-devel] Re: [PATCH v2 9/9] Add -kvm option
  2009-10-08 15:14                                   ` Anthony Liguori
@ 2009-10-12 11:15                                     ` Gerd Hoffmann
  0 siblings, 0 replies; 104+ messages in thread
From: Gerd Hoffmann @ 2009-10-12 11:15 UTC (permalink / raw)
  To: Anthony Liguori
  Cc: Mark McLoughlin, Anthony Liguori, Glauber Costa, Avi Kivity, qemu-devel

>> (qemu) info pit
>> pit: counter 0x1234 period 0x3453
>>
>> or something.
>
> When the pit is qdev converted, it will have properties. counter and
> period could be two of those properties in which case, 'info qtree
> i8254' would show the above output or 'info qtree i8254-kvm' would show it.

Device state belongs more into vmstate than into properties.  Ideally 
each qdev device has the vmstate entry filled as well, so looking up the 
device in the tree then dump the state can be done.

cheers,
   Gerd

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [Qemu-devel] Re: [PATCH v2 9/9] Add -kvm option
  2009-10-07 23:28                       ` Anthony Liguori
@ 2009-10-12 11:58                         ` Gerd Hoffmann
  2009-10-12 14:04                           ` Anthony Liguori
  0 siblings, 1 reply; 104+ messages in thread
From: Gerd Hoffmann @ 2009-10-12 11:58 UTC (permalink / raw)
  To: Anthony Liguori
  Cc: Andre Przywara, Glauber Costa, Anthony Liguori, qemu-devel

On 10/08/09 01:28, Anthony Liguori wrote:
> Glauber Costa wrote:
>> even if we have qdev on the irq controllers, one could still come up with
>> situations in which we'd like to force the use of one device over
>> another.
>
> Right, my assumption is that the devices will have different qdev names
> and therefore a user can select which one gets used.
>
> Right now, -device is just additive. I'm not sure the best way to
> express something like, replace this existing device with this new
> device. Maybe some trickery with id=. Gerd, any thoughts?

-M $machine gives you a barebone machine with all core devices which 
belong to it. You can't easily remove and/or replace devices. 
Especially not something central as the IRQ controller.  But also no 
other core components, i.e. you wouldn't stick a piix4 ide controller 
into a Q35 machine.  Just say 'no'.

That leaves two options:

   (1) create two devices, create new machines which use the kvm
       versions (aka -M pc-kvm).
   (2) make using the in-kernel kvm code a device property.

I think (2) is the way to go.  Especially with multiple devices having 
kvm support (apic, pit, more?) (1) becomes unmanageable.

Right now there is no way to set properties for devices *not* added via 
-device[1].  We'll need support for that anyway though (to set rtc 
properties for example), so we can have a apic.kvm property and switch 
between user/kernel implementions using it.

cheers,
   Gerd

[1] my idea for implementing that is extending the compat property
     mechanism.

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH v2 3/9] provide in-kernel ioapic
  2009-10-09 19:55                     ` Juan Quintela
@ 2009-10-12 13:20                         ` Anthony Liguori
  2009-10-12 13:20                         ` [Qemu-devel] " Anthony Liguori
  1 sibling, 0 replies; 104+ messages in thread
From: Anthony Liguori @ 2009-10-12 13:20 UTC (permalink / raw)
  To: Juan Quintela
  Cc: Jamie Lokier, Glauber Costa, kvm-devel, Avi Kivity, Gleb Natapov,
	qemu-devel

Juan Quintela wrote:
> I notice that discussion is going nowhere, basically we are in the
> state:
> - people that want one device
>   * they emulate the same hardware
>   * lots of code is shared
>   * they should be interchageable
>   * if they are not interchageable, it is a bug
>   * once that they are split, it is basically imposible to join then
>     again.
> - people that want 2 devices:
>   * The devices can more easily diverge if they are two devices
>   * They are not interchageable now
>   * It allows you more freedom in changing any of them if they are
>     separate.
>
> As you can see, none of the proposals is a clear winner.  And what is
> worse, we have the two maintainers (avi and anthony), the two with more
> experience having to deal with this kind of situation disagreeing.
>
> How to fix the impass?
>   

We already have the single device model implementation and the 
limitations are well known.  The best way to move forward is for someone 
to send out patches implementing separate device models.

At that point, it becomes a discussion of two concrete pieces of code 
verses hand waving.

> Later, Juan.
>   


-- 
Regards,

Anthony Liguori


^ permalink raw reply	[flat|nested] 104+ messages in thread

* [Qemu-devel] Re: [PATCH v2 3/9] provide in-kernel ioapic
@ 2009-10-12 13:20                         ` Anthony Liguori
  0 siblings, 0 replies; 104+ messages in thread
From: Anthony Liguori @ 2009-10-12 13:20 UTC (permalink / raw)
  To: Juan Quintela
  Cc: Gleb Natapov, kvm-devel, Glauber Costa, qemu-devel, Avi Kivity

Juan Quintela wrote:
> I notice that discussion is going nowhere, basically we are in the
> state:
> - people that want one device
>   * they emulate the same hardware
>   * lots of code is shared
>   * they should be interchageable
>   * if they are not interchageable, it is a bug
>   * once that they are split, it is basically imposible to join then
>     again.
> - people that want 2 devices:
>   * The devices can more easily diverge if they are two devices
>   * They are not interchageable now
>   * It allows you more freedom in changing any of them if they are
>     separate.
>
> As you can see, none of the proposals is a clear winner.  And what is
> worse, we have the two maintainers (avi and anthony), the two with more
> experience having to deal with this kind of situation disagreeing.
>
> How to fix the impass?
>   

We already have the single device model implementation and the 
limitations are well known.  The best way to move forward is for someone 
to send out patches implementing separate device models.

At that point, it becomes a discussion of two concrete pieces of code 
verses hand waving.

> Later, Juan.
>   


-- 
Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [Qemu-devel] Re: [PATCH v2 4/9] provide in-kernel apic
  2009-10-11  9:10                         ` [Qemu-devel] " Avi Kivity
@ 2009-10-12 13:41                           ` Anthony Liguori
  -1 siblings, 0 replies; 104+ messages in thread
From: Anthony Liguori @ 2009-10-12 13:41 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Jamie Lokier, Glauber Costa, qemu-devel, kvm-devel

Avi Kivity wrote:
> On 10/09/2009 09:49 PM, Anthony Liguori wrote:
>>> If I've just been sent an image produced by someone running KVM, and
>>> my machine is not KVM-capable, or I cannot upgrade the KVM kernel
>>> module because it's in use by other VMs (had this problem a few
>>> times), there's no choice but to change the irqchipness.
>>
>>
>> You cannot migrate from KVM to TCG so this use-case is irrelevant.
>
> I agree it's mostly irrelevant, however nothing in principle prevents 
> such a migration, as long as the cpuid feature bits are implemented on 
> both sides.

Sure, in principle it's certainly possible but in practice, it isn't 
today and I don't see anything that's likely to happen in the near term 
future that would make it.

-- 
Regards,

Anthony Liguori


^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [Qemu-devel] Re: [PATCH v2 4/9] provide in-kernel apic
@ 2009-10-12 13:41                           ` Anthony Liguori
  0 siblings, 0 replies; 104+ messages in thread
From: Anthony Liguori @ 2009-10-12 13:41 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Glauber Costa, qemu-devel, kvm-devel

Avi Kivity wrote:
> On 10/09/2009 09:49 PM, Anthony Liguori wrote:
>>> If I've just been sent an image produced by someone running KVM, and
>>> my machine is not KVM-capable, or I cannot upgrade the KVM kernel
>>> module because it's in use by other VMs (had this problem a few
>>> times), there's no choice but to change the irqchipness.
>>
>>
>> You cannot migrate from KVM to TCG so this use-case is irrelevant.
>
> I agree it's mostly irrelevant, however nothing in principle prevents 
> such a migration, as long as the cpuid feature bits are implemented on 
> both sides.

Sure, in principle it's certainly possible but in practice, it isn't 
today and I don't see anything that's likely to happen in the near term 
future that would make it.

-- 
Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [Qemu-devel] Re: [PATCH v2 9/9] Add -kvm option
  2009-10-12 11:58                         ` Gerd Hoffmann
@ 2009-10-12 14:04                           ` Anthony Liguori
  2009-10-12 15:34                             ` Gerd Hoffmann
  0 siblings, 1 reply; 104+ messages in thread
From: Anthony Liguori @ 2009-10-12 14:04 UTC (permalink / raw)
  To: Gerd Hoffmann; +Cc: Andre Przywara, Glauber Costa, qemu-devel

Gerd Hoffmann wrote:
> On 10/08/09 01:28, Anthony Liguori wrote:
>> Glauber Costa wrote:
>>> even if we have qdev on the irq controllers, one could still come up 
>>> with
>>> situations in which we'd like to force the use of one device over
>>> another.
>>
>> Right, my assumption is that the devices will have different qdev names
>> and therefore a user can select which one gets used.
>>
>> Right now, -device is just additive. I'm not sure the best way to
>> express something like, replace this existing device with this new
>> device. Maybe some trickery with id=. Gerd, any thoughts?
>
> -M $machine gives you a barebone machine with all core devices which 
> belong to it. You can't easily remove and/or replace devices. 
> Especially not something central as the IRQ controller.  But also no 
> other core components, i.e. you wouldn't stick a piix4 ide controller 
> into a Q35 machine.  Just say 'no'.

This seems fundamentally flawed to me.  If you cannot remove a device 
from a machine using command line options, then how do we support 
something like -net none?  Do we make the default machine not contain a nic?

The value of a machine type to a user is that it presents a useful 
machine--not a barebones machine.  A user should not have to think about 
which type of nic they need or whether they want to enable usb.

>
> That leaves two options:
>
>   (1) create two devices, create new machines which use the kvm
>       versions (aka -M pc-kvm).
>   (2) make using the in-kernel kvm code a device property.

(3) Add the ability to remove device from a machine type.

-- 
Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH v2 3/9] provide in-kernel ioapic
  2009-10-12 13:20                         ` [Qemu-devel] " Anthony Liguori
@ 2009-10-12 14:18                           ` Jamie Lokier
  -1 siblings, 0 replies; 104+ messages in thread
From: Jamie Lokier @ 2009-10-12 14:18 UTC (permalink / raw)
  To: Anthony Liguori
  Cc: Juan Quintela, Glauber Costa, kvm-devel, Avi Kivity,
	Gleb Natapov, qemu-devel

Anthony Liguori wrote:
> We already have the single device model implementation and the 
> limitations are well known.  The best way to move forward is for someone 
> to send out patches implementing separate device models.
> 
> At that point, it becomes a discussion of two concrete pieces of code 
> verses hand waving.

Out of curiosity now, what _are_ the behavioural differences between
the in-kernel irqchip and the qemu one?

Are the differences significant to guests, such that it might be
necessary to disable the in-kernel irqchip for some guests, or
conversely, necessary to use KVM for some guests?

Thanks,
-- Jamie

^ permalink raw reply	[flat|nested] 104+ messages in thread

* [Qemu-devel] Re: [PATCH v2 3/9] provide in-kernel ioapic
@ 2009-10-12 14:18                           ` Jamie Lokier
  0 siblings, 0 replies; 104+ messages in thread
From: Jamie Lokier @ 2009-10-12 14:18 UTC (permalink / raw)
  To: Anthony Liguori
  Cc: kvm-devel, Gleb Natapov, Glauber Costa, qemu-devel,
	Juan Quintela, Avi Kivity

Anthony Liguori wrote:
> We already have the single device model implementation and the 
> limitations are well known.  The best way to move forward is for someone 
> to send out patches implementing separate device models.
> 
> At that point, it becomes a discussion of two concrete pieces of code 
> verses hand waving.

Out of curiosity now, what _are_ the behavioural differences between
the in-kernel irqchip and the qemu one?

Are the differences significant to guests, such that it might be
necessary to disable the in-kernel irqchip for some guests, or
conversely, necessary to use KVM for some guests?

Thanks,
-- Jamie

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH v2 3/9] provide in-kernel ioapic
  2009-10-12 14:18                           ` [Qemu-devel] " Jamie Lokier
@ 2009-10-12 14:49                             ` Anthony Liguori
  -1 siblings, 0 replies; 104+ messages in thread
From: Anthony Liguori @ 2009-10-12 14:49 UTC (permalink / raw)
  To: Jamie Lokier
  Cc: Anthony Liguori, Juan Quintela, Glauber Costa, kvm-devel,
	Avi Kivity, Gleb Natapov, qemu-devel

Jamie Lokier wrote:
> Anthony Liguori wrote:
>   
>> We already have the single device model implementation and the 
>> limitations are well known.  The best way to move forward is for someone 
>> to send out patches implementing separate device models.
>>
>> At that point, it becomes a discussion of two concrete pieces of code 
>> verses hand waving.
>>     
>
> Out of curiosity now, what _are_ the behavioural differences between
> the in-kernel irqchip and the qemu one?
>
> Are the differences significant to guests, such that it might be
> necessary to disable the in-kernel irqchip for some guests, or
> conversely, necessary to use KVM for some guests?
>   

No, the behavior differences are not terribly significant for the apic.  
Disabling it is really most useful for debugging purposes.

Regards,

Anthony Liguori


^ permalink raw reply	[flat|nested] 104+ messages in thread

* [Qemu-devel] Re: [PATCH v2 3/9] provide in-kernel ioapic
@ 2009-10-12 14:49                             ` Anthony Liguori
  0 siblings, 0 replies; 104+ messages in thread
From: Anthony Liguori @ 2009-10-12 14:49 UTC (permalink / raw)
  To: Jamie Lokier
  Cc: Anthony Liguori, kvm-devel, Gleb Natapov, Glauber Costa,
	qemu-devel, Juan Quintela, Avi Kivity

Jamie Lokier wrote:
> Anthony Liguori wrote:
>   
>> We already have the single device model implementation and the 
>> limitations are well known.  The best way to move forward is for someone 
>> to send out patches implementing separate device models.
>>
>> At that point, it becomes a discussion of two concrete pieces of code 
>> verses hand waving.
>>     
>
> Out of curiosity now, what _are_ the behavioural differences between
> the in-kernel irqchip and the qemu one?
>
> Are the differences significant to guests, such that it might be
> necessary to disable the in-kernel irqchip for some guests, or
> conversely, necessary to use KVM for some guests?
>   

No, the behavior differences are not terribly significant for the apic.  
Disabling it is really most useful for debugging purposes.

Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [Qemu-devel] Re: [PATCH v2 9/9] Add -kvm option
  2009-10-12 14:04                           ` Anthony Liguori
@ 2009-10-12 15:34                             ` Gerd Hoffmann
  2009-10-12 16:31                               ` Anthony Liguori
  0 siblings, 1 reply; 104+ messages in thread
From: Gerd Hoffmann @ 2009-10-12 15:34 UTC (permalink / raw)
  To: Anthony Liguori; +Cc: Andre Przywara, Glauber Costa, qemu-devel

On 10/12/09 16:04, Anthony Liguori wrote:
> Gerd Hoffmann wrote:
>> -M $machine gives you a barebone machine with all core devices which
>> belong to it. You can't easily remove and/or replace devices.
>> Especially not something central as the IRQ controller. But also no
>> other core components, i.e. you wouldn't stick a piix4 ide controller
>> into a Q35 machine. Just say 'no'.
>
> This seems fundamentally flawed to me. If you cannot remove a device
> from a machine using command line options, then how do we support
> something like -net none?

'-net none' does not remove a nic.  It makes qemu not add the default nic.

> Do we make the default machine not contain a nic?
 >
> The value of a machine type to a user is that it presents a useful
> machine--not a barebones machine. A user should not have to think about
> which type of nic they need or whether they want to enable usb.

There are a bunch of places where qemu does something like

    if (user-did-not-specify-a-$foo-device) {
        add_$foo_device_with_defaults()
    }

That applies (for pc) to:

   * nic.
   * vga.
   * serial port.
   * parallel port.
   * cdrom drive.

For all of these (except cdrom) you can disable the creation of the 
default device via "-$dev none".  I don't consider them core devices.
Core devices are the ones which a machine can't live without, i.e. rtc, 
pic, ...

You can't add/remove core stuff like rtc, interrupt controller.  The 
virtual machine will simply not work then ...

>> (1) create two devices, create new machines which use the kvm
>> versions (aka -M pc-kvm).
>> (2) make using the in-kernel kvm code a device property.
>
> (3) Add the ability to remove device from a machine type.

-rtc none ?  Have fun booting your machine.

cheers,
   Gerd

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [Qemu-devel] Re: [PATCH v2 9/9] Add -kvm option
  2009-10-12 15:34                             ` Gerd Hoffmann
@ 2009-10-12 16:31                               ` Anthony Liguori
  2009-10-13  8:05                                 ` Gerd Hoffmann
  0 siblings, 1 reply; 104+ messages in thread
From: Anthony Liguori @ 2009-10-12 16:31 UTC (permalink / raw)
  To: Gerd Hoffmann; +Cc: Glauber Costa, qemu-devel

Gerd Hoffmann wrote:
> On 10/12/09 16:04, Anthony Liguori wrote:
>> Gerd Hoffmann wrote:
>>> -M $machine gives you a barebone machine with all core devices which
>>> belong to it. You can't easily remove and/or replace devices.
>>> Especially not something central as the IRQ controller. But also no
>>> other core components, i.e. you wouldn't stick a piix4 ide controller
>>> into a Q35 machine. Just say 'no'.
>>
>> This seems fundamentally flawed to me. If you cannot remove a device
>> from a machine using command line options, then how do we support
>> something like -net none?
>
> '-net none' does not remove a nic.  It makes qemu not add the default 
> nic.

qemu should not have the concept of a default nic.   Instead, if you 
choose the pc machine type, by default you get an e1000.  I think it's 
also reasonable to get a USB controller too along with the balloon 
driver and anything else we think a user could benefit from.

If you give devices well known ids, then changing a default device is 
not really that big of a deal.  It involves removing the device and 
adding a new device with that same device id.

For platform devices, like the interrupt controller/pic, the same 
principle could be applied to switch out a userspace irqchip/pit with 
the kvm kernel implementations.

-- 
Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [Qemu-devel] Re: [PATCH v2 9/9] Add -kvm option
  2009-10-12 16:31                               ` Anthony Liguori
@ 2009-10-13  8:05                                 ` Gerd Hoffmann
  2009-10-13 15:33                                   ` Anthony Liguori
  2009-10-13 15:36                                   ` Anthony Liguori
  0 siblings, 2 replies; 104+ messages in thread
From: Gerd Hoffmann @ 2009-10-13  8:05 UTC (permalink / raw)
  To: Anthony Liguori; +Cc: Glauber Costa, qemu-devel

On 10/12/09 18:31, Anthony Liguori wrote:
> Gerd Hoffmann wrote:
>> On 10/12/09 16:04, Anthony Liguori wrote:
>>> Gerd Hoffmann wrote:
>>>> -M $machine gives you a barebone machine with all core devices which
>>>> belong to it. You can't easily remove and/or replace devices.
>>>> Especially not something central as the IRQ controller. But also no
>>>> other core components, i.e. you wouldn't stick a piix4 ide controller
>>>> into a Q35 machine. Just say 'no'.
>>>
>>> This seems fundamentally flawed to me. If you cannot remove a device
>>> from a machine using command line options, then how do we support
>>> something like -net none?
>>
>> '-net none' does not remove a nic. It makes qemu not add the default nic.
>
> qemu should not have the concept of a default nic.

Well, right now it has (likewise for serial, vga, ...).  And it causes 
problems with the qdev way of managing devices, so we have to find a way 
to deal with it.

Right now the default devices are tied to a command line switch, i.e. in 
case there isn't a -serial switch specified qemu will add a default 
serial device, even in case one was added via -device isa-serial.

> Instead, if you
> choose the pc machine type, by default you get an e1000. I think it's
> also reasonable to get a USB controller too along with the balloon
> driver and anything else we think a user could benefit from.

Hmm.  It makes sense to have a usable default or example configuration.

I don't think the machine type should be that though.  IMO the machine 
type should be more like a chipset, i.e. the current pc type should 
include all core stuff (pit, pic, apic, rtc, ...) and the piix devices 
(host bridge, isa bridge, apci controller, ide controller, usb 
controller) but nothing else.

> If you give devices well known ids, then changing a default device is
> not really that big of a deal. It involves removing the device and
> adding a new device with that same device id.

Might be workable for anything added via -device, because you could do 
the operations in QemuOpts space, before actually creating qdev device 
instances.  And the user could switch the nic type via
'-set device.defaultnic.driver=rtl8139' then.

I still don't like the concept though.  Configuring a second nic would 
be different from configuring the first nic, because for the first 
you'll modify the default device, the second is added instead.  libvirt 
folks will hate us for doing this.

> For platform devices, like the interrupt controller/pic, the same
> principle could be applied to switch out a userspace irqchip/pit with
> the kvm kernel implementations.

Doesn't fly.  You can't simply add interrupt controllers via -device. 
They are tied way to much with the other core devices.

cheers,
   Gerd

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [Qemu-devel] Re: [PATCH v2 9/9] Add -kvm option
  2009-10-13  8:05                                 ` Gerd Hoffmann
@ 2009-10-13 15:33                                   ` Anthony Liguori
  2009-10-13 19:26                                     ` Markus Armbruster
  2009-10-13 15:36                                   ` Anthony Liguori
  1 sibling, 1 reply; 104+ messages in thread
From: Anthony Liguori @ 2009-10-13 15:33 UTC (permalink / raw)
  To: Gerd Hoffmann; +Cc: Glauber Costa, qemu-devel

Gerd Hoffmann wrote:
>> qemu should not have the concept of a default nic.
>
> Well, right now it has (likewise for serial, vga, ...).  And it causes 
> problems with the qdev way of managing devices, so we have to find a 
> way to deal with it.

There are a few separate issues that should be treated differently.  For 
vga, it should just be a default device in the machine description.  A 
special id should be used for the default vga card and -vga std should 
have the effect of modifying the a vga device of that id to be std.  
IOW, it should be -device-remove id=default_vga -device 
stdvga,id=default_vga.

For serial, we default to having one port.  Again, it should have a well 
known id and -serial none should be -device-remove id=default_serial.  
For nics, it's a bit complicated by the fact that -net nic doesn't 
require a model and by default, each machine may have a default model.  
We will likely have to preserve this as a config option which is 
unfortunate.

That said, without -net none, we get a single network device.  Again, it 
should have a well known id and -net none should remove that device.
> Right now the default devices are tied to a command line switch, i.e. 
> in case there isn't a -serial switch specified qemu will add a default 
> serial device, even in case one was added via -device isa-serial.

I don't think -device has to have the same property of -serial.  That 
is, -device isa-serial doesn't need to replace the default serial device 
which is connected to a vc.  Rather, we should allow a user to modify 
the char device associated with that default serial device via -set.
 
We shouldn't think of it like we add a default -serial device if no 
-serial switch was used.  Instead, there has always been a default 
isa-serial device.  You can modify it via -set or you can remove it.  
-serial none removes it.  For the first occurrence of -serial, it 
behaves like -set.  Future occurrences of -set behave like -device 
isa-serial.

-- 
Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [Qemu-devel] Re: [PATCH v2 9/9] Add -kvm option
  2009-10-13  8:05                                 ` Gerd Hoffmann
  2009-10-13 15:33                                   ` Anthony Liguori
@ 2009-10-13 15:36                                   ` Anthony Liguori
  2009-10-13 22:57                                     ` Jamie Lokier
  1 sibling, 1 reply; 104+ messages in thread
From: Anthony Liguori @ 2009-10-13 15:36 UTC (permalink / raw)
  To: Gerd Hoffmann; +Cc: Glauber Costa, qemu-devel

Gerd Hoffmann wrote:
>> For platform devices, like the interrupt controller/pic, the same
>> principle could be applied to switch out a userspace irqchip/pit with
>> the kvm kernel implementations.
>
> Doesn't fly.  You can't simply add interrupt controllers via -device. 
> They are tied way to much with the other core devices.

We definitely want a pc machine type and a kvm-pc machine type.  The 
later would change the default cpu to kvm64 and would use the in-kernel 
apic.

If we want to support non in-kernel apic for debugging, I'd suggest that 
we support it by introducing a third machine type: kvm-pc-debug.

When we complete the qdev conversion of pc, different machine types are 
just config files so it seems like a reasonable thing to do.

> cheers,
>   Gerd


-- 
Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [Qemu-devel] Re: [PATCH v2 9/9] Add -kvm option
  2009-10-13 15:33                                   ` Anthony Liguori
@ 2009-10-13 19:26                                     ` Markus Armbruster
  2009-10-13 20:43                                       ` Anthony Liguori
  0 siblings, 1 reply; 104+ messages in thread
From: Markus Armbruster @ 2009-10-13 19:26 UTC (permalink / raw)
  To: Anthony Liguori; +Cc: Glauber Costa, Gerd Hoffmann, qemu-devel

Anthony Liguori <aliguori@us.ibm.com> writes:

> Gerd Hoffmann wrote:
>>> qemu should not have the concept of a default nic.
>>
>> Well, right now it has (likewise for serial, vga, ...).  And it
>> causes problems with the qdev way of managing devices, so we have to
>> find a way to deal with it.
>
> There are a few separate issues that should be treated differently.
> For vga, it should just be a default device in the machine
> description.  A special id should be used for the default vga card and
> -vga std should have the effect of modifying the a vga device of that
> id to be std.  IOW, it should be -device-remove id=default_vga -device
> stdvga,id=default_vga.
>
> For serial, we default to having one port.  Again, it should have a
> well known id and -serial none should be -device-remove
> id=default_serial.  For nics, it's a bit complicated by the fact that
> -net nic doesn't require a model and by default, each machine may have
> a default model.  We will likely have to preserve this as a config
> option which is unfortunate.
>
> That said, without -net none, we get a single network device.  Again,
> it should have a well known id and -net none should remove that
> device.
>> Right now the default devices are tied to a command line switch,
>> i.e. in case there isn't a -serial switch specified qemu will add a
>> default serial device, even in case one was added via -device
>> isa-serial.
>
> I don't think -device has to have the same property of -serial.  That
> is, -device isa-serial doesn't need to replace the default serial
> device which is connected to a vc.  Rather, we should allow a user to
> modify the char device associated with that default serial device via
> -set.
>
> We shouldn't think of it like we add a default -serial device if no
> -serial switch was used.  Instead, there has always been a default
> isa-serial device.  You can modify it via -set or you can remove it.
> -serial none removes it.  For the first occurrence of -serial, it
> behaves like -set.  Future occurrences of -set behave like -device
> isa-serial.

The problem with that approach is in a part of Gerd's reply you snipped:

>> I still don't like the concept though.  Configuring a second nic would
>> be different from configuring the first nic, because for the first
>> you'll modify the default device, the second is added instead.
>> libvirt folks will hate us for doing this.

Having to use -set for configuring the first device of a kind, and
-device for the second is a bad user interface.  It's made worse by the
fact that you need -set only for some kinds of devices, namely the kinds
where QEMU provides a default.

I agree with Gerd that we should distinguish between required and
optional devices.  It's fine to require -set for modifying required
devices like RTC.  But when I configure my first and second NIC, I don't
really want to know that I'm actually modifying the first NIC and adding
the second NIC.

What about this: give the user a default FOO (for FOO in serial, NIC,
...) if he didn't configure one (no matter how, be it -device or some
legacy option to ask for FOOs).  This way, you ask for your first FOO
exactly like any other: -device.

Extra points for providing a way to say "do not give me any optional
devices I didn't explicitely ask for".

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [Qemu-devel] Re: [PATCH v2 9/9] Add -kvm option
  2009-10-13 19:26                                     ` Markus Armbruster
@ 2009-10-13 20:43                                       ` Anthony Liguori
  2009-10-14  8:08                                         ` Gerd Hoffmann
  0 siblings, 1 reply; 104+ messages in thread
From: Anthony Liguori @ 2009-10-13 20:43 UTC (permalink / raw)
  To: Markus Armbruster
  Cc: qemu-devel, Anthony Liguori, Glauber Costa, Gerd Hoffmann

>
>>> I still don't like the concept though.  Configuring a second nic would
>>> be different from configuring the first nic, because for the first
>>> you'll modify the default device, the second is added instead.
>>> libvirt folks will hate us for doing this.
>>>       
>
> Having to use -set for configuring the first device of a kind, and
> -device for the second is a bad user interface.  It's made worse by the
> fact that you need -set only for some kinds of devices, namely the kinds
> where QEMU provides a default.
>   

The guest already has a nic, if you want to modify an existing nic, then 
you use -set.  If you want to add a second nic, you use -device.  This 
isn't two difference behaviors.

> I agree with Gerd that we should distinguish between required and
> optional devices.  It's fine to require -set for modifying required
> devices like RTC.  But when I configure my first and second NIC, I don't
> really want to know that I'm actually modifying the first NIC and adding
> the second NIC.
>   

I think what you're arguing for is a truly bare bones machine type where 
there are no assumptions about having a first nic.

But let's look at this from a much higher level.  What is a user 
typically going to want to do?  They're probably going to want to change 
the networking settings globally.  They'll either want to always use tap 
and use the default network device or they're going to want to always 
have two nics because they have to physical devices with separate networks.

A user would either create a new machine type to use for all of their 
machines, or they would modify a global host config to change from slirp 
to tap.  It shouldn't be the common case that they are manually 
specifying -device command line options.  -device is pretty obtuse and 
really is an expert option for things like libvirt and as a placeholder 
for a proper config.

> What about this: give the user a default FOO (for FOO in serial, NIC,
> ...) if he didn't configure one (no matter how, be it -device or some
> legacy option to ask for FOOs).  This way, you ask for your first FOO
> exactly like any other: -device.
>   

You cannot express the concept "give a user FOO if they didn't ask for 
one" in a machine config file.  That forces machine information to be 
baked into qemu which would be unfortunate.  It suggests that you need 
another mechanism to configure what the type of these defaults are.

Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [Qemu-devel] Re: [PATCH v2 9/9] Add -kvm option
  2009-10-13 15:36                                   ` Anthony Liguori
@ 2009-10-13 22:57                                     ` Jamie Lokier
  0 siblings, 0 replies; 104+ messages in thread
From: Jamie Lokier @ 2009-10-13 22:57 UTC (permalink / raw)
  To: Anthony Liguori; +Cc: Glauber Costa, Gerd Hoffmann, qemu-devel

Anthony Liguori wrote:
> We definitely want a pc machine type and a kvm-pc machine type.  The 
> later would change the default cpu to kvm64 and would use the in-kernel 
> apic.

kvm32-pc for 32-bit hosts?

-- Jamie

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [Qemu-devel] Re: [PATCH v2 9/9] Add -kvm option
  2009-10-13 20:43                                       ` Anthony Liguori
@ 2009-10-14  8:08                                         ` Gerd Hoffmann
  0 siblings, 0 replies; 104+ messages in thread
From: Gerd Hoffmann @ 2009-10-14  8:08 UTC (permalink / raw)
  To: Anthony Liguori
  Cc: Anthony Liguori, Glauber Costa, Markus Armbruster, qemu-devel

   Hi,

> I think what you're arguing for is a truly bare bones machine type where
> there are no assumptions about having a first nic.
>
> But let's look at this from a much higher level. What is a user
> typically going to want to do? They're probably going to want to change
> the networking settings globally. They'll either want to always use tap
> and use the default network device or they're going to want to always
> have two nics because they have to physical devices with separate networks.
>
> A user would either create a new machine type to use for all of their
> machines, or they would modify a global host config to change from slirp
> to tap. It shouldn't be the common case that they are manually
> specifying -device command line options. -device is pretty obtuse and
> really is an expert option for things like libvirt and as a placeholder
> for a proper config.

Time to polish and post my config file patches for discussion ...

I expect we'll have *two* kinds of config files:

#1 describes a machine type, i.e. -M $machine with only the really 
essential hardware.  Users should not have to deal with this at all. 
Something like the dtc bits from pbrook.

#2 describes the configuration of a virtual machine, i.e. what 
additional devices are plugged in, virtual disks, network setup, ...

I have patches for #2 which essentially read/write QemuOpts to a 
git-style config file.

We could simply ship a type #2 default config file with nic, vga, serial 
etc.  Or two, with the second being the -nographic variant.

cheers,
   Gerd

^ permalink raw reply	[flat|nested] 104+ messages in thread

* [Qemu-devel] [PATCH v2 8/9] Initialize in-kernel irqchip
  2009-11-16 17:12             ` [Qemu-devel] [PATCH v2 7/9] provide apic-kvm Glauber Costa
@ 2009-11-16 17:12               ` Glauber Costa
  0 siblings, 0 replies; 104+ messages in thread
From: Glauber Costa @ 2009-11-16 17:12 UTC (permalink / raw)
  To: qemu-devel; +Cc: aliguori

Now that we have all devices set up, this patch initializes the irqchip.
This is dependant on the io-thread, since we need someone to pull ourselves
out of the halted state.

I believe this should be the default when we are running over the io-thread.
Later on, I plan to post a patch that makes it optional.

Signed-off-by: Glauber Costa <glommer@redhat.com>
---
 kvm-all.c |   20 ++++++++++++++++++++
 1 files changed, 20 insertions(+), 0 deletions(-)

diff --git a/kvm-all.c b/kvm-all.c
index c46a411..7a3671f 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -433,6 +433,22 @@ int kvm_set_irq(int irq, int level, int *status)
     return 1;
 }
 
+static int kvm_create_irqchip(KVMState *s)
+{
+    int ret = 0;
+#if defined(CONFIG_IOTHREAD)
+    if (!kvm_check_extension(s, KVM_CAP_IRQCHIP))
+        return -1;
+
+    ret = kvm_vm_ioctl(s, KVM_CREATE_IRQCHIP);
+    if (ret < 0)
+        return ret;
+
+    s->irqchip_in_kernel = 1;
+#endif
+    return ret;
+}
+
 int kvm_init(int smp_cpus)
 {
     static const char upgrade_note[] =
@@ -519,6 +535,10 @@ int kvm_init(int smp_cpus)
     }
 #endif
 
+    ret = kvm_create_irqchip(s);
+    if (ret < 0)
+        goto err;
+
     ret = kvm_arch_init(s, smp_cpus);
     if (ret < 0)
         goto err;
-- 
1.6.2.5

^ permalink raw reply related	[flat|nested] 104+ messages in thread

end of thread, other threads:[~2009-11-16 17:12 UTC | newest]

Thread overview: 104+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-10-07 22:08 [Qemu-devel] [PATCH v2] Add in-kernel irqchip Glauber Costa
2009-10-07 22:08 ` [Qemu-devel] [PATCH v2 1/9] add base-addr field to io apic state Glauber Costa
2009-10-07 22:08   ` [Qemu-devel] [PATCH v2 2/9] Save missing fields in VMState Glauber Costa
2009-10-07 22:08     ` [Qemu-devel] [PATCH v2 3/9] provide in-kernel ioapic Glauber Costa
2009-10-07 22:08       ` [Qemu-devel] [PATCH v2 4/9] provide in-kernel apic Glauber Costa
2009-10-07 22:08         ` [Qemu-devel] [PATCH v2 5/9] provide apic_set_irq_delivered Glauber Costa
2009-10-07 22:08           ` [Qemu-devel] [PATCH v2 6/9] provide in-kernel i8259 chip Glauber Costa
2009-10-07 22:08             ` [Qemu-devel] [PATCH v2 7/9] initialize " Glauber Costa
2009-10-07 22:08               ` [Qemu-devel] [PATCH v2 8/9] Initialize in-kernel irqchip Glauber Costa
2009-10-07 22:08                 ` [Qemu-devel] [PATCH v2 9/9] Add -kvm option Glauber Costa
2009-10-07 23:00                   ` [Qemu-devel] " Anthony Liguori
2009-10-07 23:14                     ` Glauber Costa
2009-10-07 23:28                       ` Anthony Liguori
2009-10-12 11:58                         ` Gerd Hoffmann
2009-10-12 14:04                           ` Anthony Liguori
2009-10-12 15:34                             ` Gerd Hoffmann
2009-10-12 16:31                               ` Anthony Liguori
2009-10-13  8:05                                 ` Gerd Hoffmann
2009-10-13 15:33                                   ` Anthony Liguori
2009-10-13 19:26                                     ` Markus Armbruster
2009-10-13 20:43                                       ` Anthony Liguori
2009-10-14  8:08                                         ` Gerd Hoffmann
2009-10-13 15:36                                   ` Anthony Liguori
2009-10-13 22:57                                     ` Jamie Lokier
2009-10-08 14:07                     ` Avi Kivity
2009-10-08 14:23                       ` Anthony Liguori
2009-10-08 14:30                         ` Avi Kivity
2009-10-08 14:33                           ` Anthony Liguori
2009-10-08 14:41                             ` Avi Kivity
2009-10-08 14:56                               ` Anthony Liguori
2009-10-08 15:05                                 ` Avi Kivity
2009-10-08 15:14                                   ` Anthony Liguori
2009-10-12 11:15                                     ` Gerd Hoffmann
2009-10-08 14:34                           ` Gleb Natapov
2009-10-08 12:02               ` [Qemu-devel] Re: [PATCH v2 7/9] initialize i8259 chip Jan Kiszka
2009-10-08 13:55         ` [PATCH v2 4/9] provide in-kernel apic Anthony Liguori
2009-10-08 13:55           ` [Qemu-devel] " Anthony Liguori
2009-10-08 14:09           ` Avi Kivity
2009-10-08 14:09             ` [Qemu-devel] " Avi Kivity
2009-10-08 14:22             ` Glauber Costa
2009-10-09 10:06               ` Jamie Lokier
2009-10-09 10:06                 ` [Qemu-devel] " Jamie Lokier
2009-10-09 14:30                 ` Glauber Costa
2009-10-09 14:30                   ` [Qemu-devel] " Glauber Costa
2009-10-09 16:48                   ` Jamie Lokier
2009-10-09 16:48                     ` Jamie Lokier
2009-10-09 18:06                     ` Glauber Costa
2009-10-09 18:06                       ` [Qemu-devel] " Glauber Costa
2009-10-09 19:49                     ` Anthony Liguori
2009-10-09 19:49                       ` Anthony Liguori
2009-10-11  9:10                       ` Avi Kivity
2009-10-11  9:10                         ` [Qemu-devel] " Avi Kivity
2009-10-12 13:41                         ` Anthony Liguori
2009-10-12 13:41                           ` Anthony Liguori
2009-10-08 14:26             ` Anthony Liguori
2009-10-08 14:26               ` Anthony Liguori
2009-10-08 14:31               ` Avi Kivity
2009-10-08 14:31                 ` Avi Kivity
2009-10-08 14:39                 ` Anthony Liguori
2009-10-08 14:46                   ` Glauber Costa
2009-10-08 14:46                     ` Glauber Costa
2009-10-08 14:44                 ` Glauber Costa
2009-10-08 14:44                   ` Glauber Costa
2009-10-08 11:46       ` [Qemu-devel] Re: [PATCH v2 3/9] provide in-kernel ioapic Jan Kiszka
2009-10-08 13:49       ` Anthony Liguori
2009-10-08 13:49         ` [Qemu-devel] " Anthony Liguori
2009-10-08 13:54         ` Avi Kivity
2009-10-08 13:54           ` [Qemu-devel] " Avi Kivity
2009-10-08 15:53           ` Jan Kiszka
2009-10-08 15:53             ` [Qemu-devel] " Jan Kiszka
2009-10-08 16:07           ` Jamie Lokier
2009-10-08 16:07             ` Jamie Lokier
2009-10-08 16:12             ` Anthony Liguori
2009-10-08 16:12               ` Anthony Liguori
2009-10-08 16:17             ` Avi Kivity
2009-10-08 16:17               ` Avi Kivity
2009-10-08 16:22               ` Gleb Natapov
2009-10-08 16:22                 ` Gleb Natapov
2009-10-08 16:29                 ` Avi Kivity
2009-10-08 16:29                   ` Avi Kivity
2009-10-08 16:34                   ` Gleb Natapov
2009-10-08 16:34                     ` Gleb Natapov
2009-10-08 16:42                     ` Avi Kivity
2009-10-08 16:42                       ` Avi Kivity
2009-10-08 17:11                       ` Gleb Natapov
2009-10-08 17:11                         ` Gleb Natapov
2009-10-09 10:02                         ` Jamie Lokier
2009-10-09 10:02                           ` [Qemu-devel] " Jamie Lokier
2009-10-09 12:02                           ` Gleb Natapov
2009-10-09 12:02                             ` Gleb Natapov
2009-10-09 14:32                 ` Glauber Costa
2009-10-09 14:32                   ` [Qemu-devel] " Glauber Costa
2009-10-09 16:49                   ` Jamie Lokier
2009-10-09 16:49                     ` Jamie Lokier
2009-10-09 19:55                     ` Juan Quintela
2009-10-09 21:34                       ` Glauber Costa
2009-10-09 21:34                         ` [Qemu-devel] " Glauber Costa
2009-10-12 13:20                       ` Anthony Liguori
2009-10-12 13:20                         ` [Qemu-devel] " Anthony Liguori
2009-10-12 14:18                         ` Jamie Lokier
2009-10-12 14:18                           ` [Qemu-devel] " Jamie Lokier
2009-10-12 14:49                           ` Anthony Liguori
2009-10-12 14:49                             ` [Qemu-devel] " Anthony Liguori
2009-11-16 17:11 [Qemu-devel] [PATCH v2 0/9] in-kernel irqchip Glauber Costa
2009-11-16 17:11 ` [Qemu-devel] [PATCH v2 1/9] introduce VMSTATE_U64 Glauber Costa
2009-11-16 17:12   ` [Qemu-devel] [PATCH v2 2/9] Provide ioapic-kvm Glauber Costa
2009-11-16 17:12     ` [Qemu-devel] [PATCH v2 3/9] provide apic_set_irq_delivered Glauber Costa
2009-11-16 17:12       ` [Qemu-devel] [PATCH v2 4/9] provide i8259-kvm Glauber Costa
2009-11-16 17:12         ` [Qemu-devel] [PATCH v2 5/9] Don't call apic functions directly from kvm code Glauber Costa
2009-11-16 17:12           ` [Qemu-devel] [PATCH v2 6/9] export kvm_put_mp_state Glauber Costa
2009-11-16 17:12             ` [Qemu-devel] [PATCH v2 7/9] provide apic-kvm Glauber Costa
2009-11-16 17:12               ` [Qemu-devel] [PATCH v2 8/9] Initialize in-kernel irqchip Glauber Costa

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.