All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCHv3 01/13] qemu: make default_write_config use mask table
       [not found] <cover.1244192535.git.mst@redhat.com>
@ 2009-06-05 10:22   ` Michael S. Tsirkin
  2009-06-05 10:22   ` [Qemu-devel] " Michael S. Tsirkin
                     ` (24 subsequent siblings)
  25 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-05 10:22 UTC (permalink / raw)
  To: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm, Rusty Russell, vi

Change much of hw/pci to use symbolic constants and a table-driven
design: add a mask table with writable bits set and readonly bits unset.
Detect change by comparing original and new registers.

This makes it easy to support capabilities where read-only/writeable
bit layout differs between devices, depending on capabilities present.

As a result, writing a single byte in BAR registers now works as
it should. Writing to upper limit registers in the bridge
also works as it should. Code is also shorter.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/pci.c |  145 ++++++++++++-------------------------------------------------
 hw/pci.h |   18 +++++++-
 2 files changed, 46 insertions(+), 117 deletions(-)

diff --git a/hw/pci.c b/hw/pci.c
index 0ab5b94..235191d 100644
--- a/hw/pci.c
+++ b/hw/pci.c
@@ -239,6 +239,17 @@ int pci_assign_devaddr(const char *addr, int *domp, int *busp, unsigned *slotp)
     return pci_parse_devaddr(devaddr, domp, busp, slotp);
 }
 
+static void pci_init_mask(PCIDevice *dev)
+{
+    int i;
+    dev->wmask[PCI_CACHE_LINE_SIZE] = 0xff;
+    dev->wmask[PCI_INTERRUPT_LINE] = 0xff;
+    dev->wmask[PCI_COMMAND] = PCI_COMMAND_IO | PCI_COMMAND_MEMORY
+                              | PCI_COMMAND_MASTER;
+    for (i = PCI_CONFIG_HEADER_SIZE; i < PCI_CONFIG_SPACE_SIZE; ++i)
+        dev->wmask[i] = 0xff;
+}
+
 /* -1 for devfn means auto assign */
 static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus,
                                          const char *name, int devfn,
@@ -261,6 +272,7 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus,
     pstrcpy(pci_dev->name, sizeof(pci_dev->name), name);
     memset(pci_dev->irq_state, 0, sizeof(pci_dev->irq_state));
     pci_set_default_subsystem_id(pci_dev);
+    pci_init_mask(pci_dev);
 
     if (!config_read)
         config_read = pci_default_read_config;
@@ -334,6 +346,7 @@ void pci_register_io_region(PCIDevice *pci_dev, int region_num,
 {
     PCIIORegion *r;
     uint32_t addr;
+    uint32_t wmask;
 
     if ((unsigned int)region_num >= PCI_NUM_REGIONS)
         return;
@@ -349,12 +362,17 @@ void pci_register_io_region(PCIDevice *pci_dev, int region_num,
     r->size = size;
     r->type = type;
     r->map_func = map_func;
+
+    wmask = ~(size - 1);
     if (region_num == PCI_ROM_SLOT) {
         addr = 0x30;
+        /* ROM enable bit is writeable */
+        wmask |= 1;
     } else {
         addr = 0x10 + region_num * 4;
     }
     *(uint32_t *)(pci_dev->config + addr) = cpu_to_le32(type);
+    *(uint32_t *)(pci_dev->wmask + addr) = cpu_to_le32(wmask);
 }
 
 static void pci_update_mappings(PCIDevice *d)
@@ -463,118 +481,21 @@ uint32_t pci_default_read_config(PCIDevice *d,
     return val;
 }
 
-void pci_default_write_config(PCIDevice *d,
-                              uint32_t address, uint32_t val, int len)
+void pci_default_write_config(PCIDevice *d, uint32_t addr, uint32_t val, int l)
 {
-    int can_write, i;
-    uint32_t end, addr;
-
-    if (len == 4 && ((address >= 0x10 && address < 0x10 + 4 * 6) ||
-                     (address >= 0x30 && address < 0x34))) {
-        PCIIORegion *r;
-        int reg;
+    uint8_t orig[PCI_CONFIG_SPACE_SIZE];
+    int i;
 
-        if ( address >= 0x30 ) {
-            reg = PCI_ROM_SLOT;
-        }else{
-            reg = (address - 0x10) >> 2;
-        }
-        r = &d->io_regions[reg];
-        if (r->size == 0)
-            goto default_config;
-        /* compute the stored value */
-        if (reg == PCI_ROM_SLOT) {
-            /* keep ROM enable bit */
-            val &= (~(r->size - 1)) | 1;
-        } else {
-            val &= ~(r->size - 1);
-            val |= r->type;
-        }
-        *(uint32_t *)(d->config + address) = cpu_to_le32(val);
-        pci_update_mappings(d);
-        return;
-    }
- default_config:
     /* not efficient, but simple */
-    addr = address;
-    for(i = 0; i < len; i++) {
-        /* default read/write accesses */
-        switch(d->config[0x0e]) {
-        case 0x00:
-        case 0x80:
-            switch(addr) {
-            case 0x00:
-            case 0x01:
-            case 0x02:
-            case 0x03:
-            case 0x06:
-            case 0x07:
-            case 0x08:
-            case 0x09:
-            case 0x0a:
-            case 0x0b:
-            case 0x0e:
-            case 0x10 ... 0x27: /* base */
-            case 0x2c ... 0x2f: /* read-only subsystem ID & vendor ID */
-            case 0x30 ... 0x33: /* rom */
-            case 0x3d:
-                can_write = 0;
-                break;
-            default:
-                can_write = 1;
-                break;
-            }
-            break;
-        default:
-        case 0x01:
-            switch(addr) {
-            case 0x00:
-            case 0x01:
-            case 0x02:
-            case 0x03:
-            case 0x06:
-            case 0x07:
-            case 0x08:
-            case 0x09:
-            case 0x0a:
-            case 0x0b:
-            case 0x0e:
-            case 0x2c ... 0x2f: /* read-only subsystem ID & vendor ID */
-            case 0x38 ... 0x3b: /* rom */
-            case 0x3d:
-                can_write = 0;
-                break;
-            default:
-                can_write = 1;
-                break;
-            }
-            break;
-        }
-        if (can_write) {
-            /* Mask out writes to reserved bits in registers */
-            switch (addr) {
-	    case 0x05:
-                val &= ~PCI_COMMAND_RESERVED_MASK_HI;
-                break;
-            case 0x06:
-                val &= ~PCI_STATUS_RESERVED_MASK_LO;
-                break;
-            case 0x07:
-                val &= ~PCI_STATUS_RESERVED_MASK_HI;
-                break;
-            }
-            d->config[addr] = val;
-        }
-        if (++addr > 0xff)
-        	break;
-        val >>= 8;
+    memcpy(orig, d->config, PCI_CONFIG_SPACE_SIZE);
+    for(i = 0; i < l && addr < PCI_CONFIG_SPACE_SIZE; val >>= 8, ++i, ++addr) {
+        uint8_t wmask = d->wmask[addr];
+        d->config[addr] = (d->config[addr] & ~wmask) | (val & wmask);
     }
-
-    end = address + len;
-    if (end > PCI_COMMAND && address < (PCI_COMMAND + 2)) {
-        /* if the command register is modified, we must modify the mappings */
+    if (memcmp(orig + PCI_BASE_ADDRESS_0, d->config + PCI_BASE_ADDRESS_0, 24)
+        || ((orig[PCI_COMMAND] ^ d->config[PCI_COMMAND])
+            & (PCI_COMMAND_MEMORY | PCI_COMMAND_IO)))
         pci_update_mappings(d);
-    }
 }
 
 void pci_data_write(void *opaque, uint32_t addr, uint32_t val, int len)
@@ -847,16 +768,8 @@ static void pci_bridge_write_config(PCIDevice *d,
 {
     PCIBridge *s = (PCIBridge *)d;
 
-    if (address == 0x19 || (address == 0x18 && len > 1)) {
-        if (address == 0x19)
-            s->bus->bus_num = val & 0xff;
-        else
-            s->bus->bus_num = (val >> 8) & 0xff;
-#if defined(DEBUG_PCI)
-        printf ("pci-bridge: %s: Assigned bus %d\n", d->name, s->bus->bus_num);
-#endif
-    }
     pci_default_write_config(d, address, val, len);
+    s->bus->bus_num = d->config[PCI_SECONDARY_BUS];
 }
 
 PCIBus *pci_find_bus(int bus_num)
diff --git a/hw/pci.h b/hw/pci.h
index 0405837..d0db402 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -100,16 +100,24 @@ typedef struct PCIIORegion {
 #define PCI_COMMAND		0x04	/* 16 bits */
 #define  PCI_COMMAND_IO		0x1	/* Enable response in I/O space */
 #define  PCI_COMMAND_MEMORY	0x2	/* Enable response in Memory space */
+#define  PCI_COMMAND_MASTER	0x4	/* Enable bus master */
 #define PCI_STATUS              0x06    /* 16 bits */
 #define PCI_REVISION_ID         0x08    /* 8 bits  */
 #define PCI_CLASS_DEVICE        0x0a    /* Device class */
+#define PCI_CACHE_LINE_SIZE	0x0c	/* 8 bits */
+#define PCI_LATENCY_TIMER	0x0d	/* 8 bits */
 #define PCI_HEADER_TYPE         0x0e    /* 8 bits */
 #define  PCI_HEADER_TYPE_NORMAL		0
 #define  PCI_HEADER_TYPE_BRIDGE		1
 #define  PCI_HEADER_TYPE_CARDBUS	2
 #define  PCI_HEADER_TYPE_MULTI_FUNCTION 0x80
+#define PCI_BASE_ADDRESS_0	0x10	/* 32 bits */
+#define PCI_PRIMARY_BUS		0x18	/* Primary bus number */
+#define PCI_SECONDARY_BUS	0x19	/* Secondary bus number */
+#define PCI_SEC_STATUS		0x1e	/* Secondary status register, only bit 14 used */
 #define PCI_SUBSYSTEM_VENDOR_ID 0x2c    /* 16 bits */
 #define PCI_SUBSYSTEM_ID        0x2e    /* 16 bits */
+#define PCI_CAPABILITY_LIST	0x34	/* Offset of first capability list entry */
 #define PCI_INTERRUPT_LINE	0x3c	/* 8 bits */
 #define PCI_INTERRUPT_PIN	0x3d	/* 8 bits */
 #define PCI_MIN_GNT		0x3e	/* 8 bits */
@@ -139,10 +147,18 @@ typedef struct PCIIORegion {
 
 #define PCI_COMMAND_RESERVED_MASK_HI (PCI_COMMAND_RESERVED >> 8)
 
+/* Size of the standard PCI config header */
+#define PCI_CONFIG_HEADER_SIZE 0x40
+/* Size of the standard PCI config space */
+#define PCI_CONFIG_SPACE_SIZE 0x100
+
 struct PCIDevice {
     DeviceState qdev;
     /* PCI config space */
-    uint8_t config[256];
+    uint8_t config[PCI_CONFIG_SPACE_SIZE];
+
+    /* Used to implement R/W bytes */
+    uint8_t wmask[PCI_CONFIG_SPACE_SIZE];
 
     /* the following fields are read only */
     PCIBus *bus;
-- 
1.6.3.1.56.g79e1.dirty


^ permalink raw reply related	[flat|nested] 457+ messages in thread

* [Qemu-devel] [PATCHv3 01/13] qemu: make default_write_config use mask table
@ 2009-06-05 10:22   ` Michael S. Tsirkin
  0 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-05 10:22 UTC (permalink / raw)
  To: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm,
	Rusty Russell, virtualization, Christian Borntraeger, Blue Swirl,
	Anthony Liguori

Change much of hw/pci to use symbolic constants and a table-driven
design: add a mask table with writable bits set and readonly bits unset.
Detect change by comparing original and new registers.

This makes it easy to support capabilities where read-only/writeable
bit layout differs between devices, depending on capabilities present.

As a result, writing a single byte in BAR registers now works as
it should. Writing to upper limit registers in the bridge
also works as it should. Code is also shorter.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/pci.c |  145 ++++++++++++-------------------------------------------------
 hw/pci.h |   18 +++++++-
 2 files changed, 46 insertions(+), 117 deletions(-)

diff --git a/hw/pci.c b/hw/pci.c
index 0ab5b94..235191d 100644
--- a/hw/pci.c
+++ b/hw/pci.c
@@ -239,6 +239,17 @@ int pci_assign_devaddr(const char *addr, int *domp, int *busp, unsigned *slotp)
     return pci_parse_devaddr(devaddr, domp, busp, slotp);
 }
 
+static void pci_init_mask(PCIDevice *dev)
+{
+    int i;
+    dev->wmask[PCI_CACHE_LINE_SIZE] = 0xff;
+    dev->wmask[PCI_INTERRUPT_LINE] = 0xff;
+    dev->wmask[PCI_COMMAND] = PCI_COMMAND_IO | PCI_COMMAND_MEMORY
+                              | PCI_COMMAND_MASTER;
+    for (i = PCI_CONFIG_HEADER_SIZE; i < PCI_CONFIG_SPACE_SIZE; ++i)
+        dev->wmask[i] = 0xff;
+}
+
 /* -1 for devfn means auto assign */
 static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus,
                                          const char *name, int devfn,
@@ -261,6 +272,7 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus,
     pstrcpy(pci_dev->name, sizeof(pci_dev->name), name);
     memset(pci_dev->irq_state, 0, sizeof(pci_dev->irq_state));
     pci_set_default_subsystem_id(pci_dev);
+    pci_init_mask(pci_dev);
 
     if (!config_read)
         config_read = pci_default_read_config;
@@ -334,6 +346,7 @@ void pci_register_io_region(PCIDevice *pci_dev, int region_num,
 {
     PCIIORegion *r;
     uint32_t addr;
+    uint32_t wmask;
 
     if ((unsigned int)region_num >= PCI_NUM_REGIONS)
         return;
@@ -349,12 +362,17 @@ void pci_register_io_region(PCIDevice *pci_dev, int region_num,
     r->size = size;
     r->type = type;
     r->map_func = map_func;
+
+    wmask = ~(size - 1);
     if (region_num == PCI_ROM_SLOT) {
         addr = 0x30;
+        /* ROM enable bit is writeable */
+        wmask |= 1;
     } else {
         addr = 0x10 + region_num * 4;
     }
     *(uint32_t *)(pci_dev->config + addr) = cpu_to_le32(type);
+    *(uint32_t *)(pci_dev->wmask + addr) = cpu_to_le32(wmask);
 }
 
 static void pci_update_mappings(PCIDevice *d)
@@ -463,118 +481,21 @@ uint32_t pci_default_read_config(PCIDevice *d,
     return val;
 }
 
-void pci_default_write_config(PCIDevice *d,
-                              uint32_t address, uint32_t val, int len)
+void pci_default_write_config(PCIDevice *d, uint32_t addr, uint32_t val, int l)
 {
-    int can_write, i;
-    uint32_t end, addr;
-
-    if (len == 4 && ((address >= 0x10 && address < 0x10 + 4 * 6) ||
-                     (address >= 0x30 && address < 0x34))) {
-        PCIIORegion *r;
-        int reg;
+    uint8_t orig[PCI_CONFIG_SPACE_SIZE];
+    int i;
 
-        if ( address >= 0x30 ) {
-            reg = PCI_ROM_SLOT;
-        }else{
-            reg = (address - 0x10) >> 2;
-        }
-        r = &d->io_regions[reg];
-        if (r->size == 0)
-            goto default_config;
-        /* compute the stored value */
-        if (reg == PCI_ROM_SLOT) {
-            /* keep ROM enable bit */
-            val &= (~(r->size - 1)) | 1;
-        } else {
-            val &= ~(r->size - 1);
-            val |= r->type;
-        }
-        *(uint32_t *)(d->config + address) = cpu_to_le32(val);
-        pci_update_mappings(d);
-        return;
-    }
- default_config:
     /* not efficient, but simple */
-    addr = address;
-    for(i = 0; i < len; i++) {
-        /* default read/write accesses */
-        switch(d->config[0x0e]) {
-        case 0x00:
-        case 0x80:
-            switch(addr) {
-            case 0x00:
-            case 0x01:
-            case 0x02:
-            case 0x03:
-            case 0x06:
-            case 0x07:
-            case 0x08:
-            case 0x09:
-            case 0x0a:
-            case 0x0b:
-            case 0x0e:
-            case 0x10 ... 0x27: /* base */
-            case 0x2c ... 0x2f: /* read-only subsystem ID & vendor ID */
-            case 0x30 ... 0x33: /* rom */
-            case 0x3d:
-                can_write = 0;
-                break;
-            default:
-                can_write = 1;
-                break;
-            }
-            break;
-        default:
-        case 0x01:
-            switch(addr) {
-            case 0x00:
-            case 0x01:
-            case 0x02:
-            case 0x03:
-            case 0x06:
-            case 0x07:
-            case 0x08:
-            case 0x09:
-            case 0x0a:
-            case 0x0b:
-            case 0x0e:
-            case 0x2c ... 0x2f: /* read-only subsystem ID & vendor ID */
-            case 0x38 ... 0x3b: /* rom */
-            case 0x3d:
-                can_write = 0;
-                break;
-            default:
-                can_write = 1;
-                break;
-            }
-            break;
-        }
-        if (can_write) {
-            /* Mask out writes to reserved bits in registers */
-            switch (addr) {
-	    case 0x05:
-                val &= ~PCI_COMMAND_RESERVED_MASK_HI;
-                break;
-            case 0x06:
-                val &= ~PCI_STATUS_RESERVED_MASK_LO;
-                break;
-            case 0x07:
-                val &= ~PCI_STATUS_RESERVED_MASK_HI;
-                break;
-            }
-            d->config[addr] = val;
-        }
-        if (++addr > 0xff)
-        	break;
-        val >>= 8;
+    memcpy(orig, d->config, PCI_CONFIG_SPACE_SIZE);
+    for(i = 0; i < l && addr < PCI_CONFIG_SPACE_SIZE; val >>= 8, ++i, ++addr) {
+        uint8_t wmask = d->wmask[addr];
+        d->config[addr] = (d->config[addr] & ~wmask) | (val & wmask);
     }
-
-    end = address + len;
-    if (end > PCI_COMMAND && address < (PCI_COMMAND + 2)) {
-        /* if the command register is modified, we must modify the mappings */
+    if (memcmp(orig + PCI_BASE_ADDRESS_0, d->config + PCI_BASE_ADDRESS_0, 24)
+        || ((orig[PCI_COMMAND] ^ d->config[PCI_COMMAND])
+            & (PCI_COMMAND_MEMORY | PCI_COMMAND_IO)))
         pci_update_mappings(d);
-    }
 }
 
 void pci_data_write(void *opaque, uint32_t addr, uint32_t val, int len)
@@ -847,16 +768,8 @@ static void pci_bridge_write_config(PCIDevice *d,
 {
     PCIBridge *s = (PCIBridge *)d;
 
-    if (address == 0x19 || (address == 0x18 && len > 1)) {
-        if (address == 0x19)
-            s->bus->bus_num = val & 0xff;
-        else
-            s->bus->bus_num = (val >> 8) & 0xff;
-#if defined(DEBUG_PCI)
-        printf ("pci-bridge: %s: Assigned bus %d\n", d->name, s->bus->bus_num);
-#endif
-    }
     pci_default_write_config(d, address, val, len);
+    s->bus->bus_num = d->config[PCI_SECONDARY_BUS];
 }
 
 PCIBus *pci_find_bus(int bus_num)
diff --git a/hw/pci.h b/hw/pci.h
index 0405837..d0db402 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -100,16 +100,24 @@ typedef struct PCIIORegion {
 #define PCI_COMMAND		0x04	/* 16 bits */
 #define  PCI_COMMAND_IO		0x1	/* Enable response in I/O space */
 #define  PCI_COMMAND_MEMORY	0x2	/* Enable response in Memory space */
+#define  PCI_COMMAND_MASTER	0x4	/* Enable bus master */
 #define PCI_STATUS              0x06    /* 16 bits */
 #define PCI_REVISION_ID         0x08    /* 8 bits  */
 #define PCI_CLASS_DEVICE        0x0a    /* Device class */
+#define PCI_CACHE_LINE_SIZE	0x0c	/* 8 bits */
+#define PCI_LATENCY_TIMER	0x0d	/* 8 bits */
 #define PCI_HEADER_TYPE         0x0e    /* 8 bits */
 #define  PCI_HEADER_TYPE_NORMAL		0
 #define  PCI_HEADER_TYPE_BRIDGE		1
 #define  PCI_HEADER_TYPE_CARDBUS	2
 #define  PCI_HEADER_TYPE_MULTI_FUNCTION 0x80
+#define PCI_BASE_ADDRESS_0	0x10	/* 32 bits */
+#define PCI_PRIMARY_BUS		0x18	/* Primary bus number */
+#define PCI_SECONDARY_BUS	0x19	/* Secondary bus number */
+#define PCI_SEC_STATUS		0x1e	/* Secondary status register, only bit 14 used */
 #define PCI_SUBSYSTEM_VENDOR_ID 0x2c    /* 16 bits */
 #define PCI_SUBSYSTEM_ID        0x2e    /* 16 bits */
+#define PCI_CAPABILITY_LIST	0x34	/* Offset of first capability list entry */
 #define PCI_INTERRUPT_LINE	0x3c	/* 8 bits */
 #define PCI_INTERRUPT_PIN	0x3d	/* 8 bits */
 #define PCI_MIN_GNT		0x3e	/* 8 bits */
@@ -139,10 +147,18 @@ typedef struct PCIIORegion {
 
 #define PCI_COMMAND_RESERVED_MASK_HI (PCI_COMMAND_RESERVED >> 8)
 
+/* Size of the standard PCI config header */
+#define PCI_CONFIG_HEADER_SIZE 0x40
+/* Size of the standard PCI config space */
+#define PCI_CONFIG_SPACE_SIZE 0x100
+
 struct PCIDevice {
     DeviceState qdev;
     /* PCI config space */
-    uint8_t config[256];
+    uint8_t config[PCI_CONFIG_SPACE_SIZE];
+
+    /* Used to implement R/W bytes */
+    uint8_t wmask[PCI_CONFIG_SPACE_SIZE];
 
     /* the following fields are read only */
     PCIBus *bus;
-- 
1.6.3.1.56.g79e1.dirty

^ permalink raw reply related	[flat|nested] 457+ messages in thread

* [PATCHv3 01/13] qemu: make default_write_config use mask table
       [not found] <cover.1244192535.git.mst@redhat.com>
@ 2009-06-05 10:22 ` Michael S. Tsirkin
  2009-06-05 10:22   ` [Qemu-devel] " Michael S. Tsirkin
                   ` (24 subsequent siblings)
  25 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-05 10:22 UTC (permalink / raw)
  To: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm, Rusty Russell

Change much of hw/pci to use symbolic constants and a table-driven
design: add a mask table with writable bits set and readonly bits unset.
Detect change by comparing original and new registers.

This makes it easy to support capabilities where read-only/writeable
bit layout differs between devices, depending on capabilities present.

As a result, writing a single byte in BAR registers now works as
it should. Writing to upper limit registers in the bridge
also works as it should. Code is also shorter.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/pci.c |  145 ++++++++++++-------------------------------------------------
 hw/pci.h |   18 +++++++-
 2 files changed, 46 insertions(+), 117 deletions(-)

diff --git a/hw/pci.c b/hw/pci.c
index 0ab5b94..235191d 100644
--- a/hw/pci.c
+++ b/hw/pci.c
@@ -239,6 +239,17 @@ int pci_assign_devaddr(const char *addr, int *domp, int *busp, unsigned *slotp)
     return pci_parse_devaddr(devaddr, domp, busp, slotp);
 }
 
+static void pci_init_mask(PCIDevice *dev)
+{
+    int i;
+    dev->wmask[PCI_CACHE_LINE_SIZE] = 0xff;
+    dev->wmask[PCI_INTERRUPT_LINE] = 0xff;
+    dev->wmask[PCI_COMMAND] = PCI_COMMAND_IO | PCI_COMMAND_MEMORY
+                              | PCI_COMMAND_MASTER;
+    for (i = PCI_CONFIG_HEADER_SIZE; i < PCI_CONFIG_SPACE_SIZE; ++i)
+        dev->wmask[i] = 0xff;
+}
+
 /* -1 for devfn means auto assign */
 static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus,
                                          const char *name, int devfn,
@@ -261,6 +272,7 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus,
     pstrcpy(pci_dev->name, sizeof(pci_dev->name), name);
     memset(pci_dev->irq_state, 0, sizeof(pci_dev->irq_state));
     pci_set_default_subsystem_id(pci_dev);
+    pci_init_mask(pci_dev);
 
     if (!config_read)
         config_read = pci_default_read_config;
@@ -334,6 +346,7 @@ void pci_register_io_region(PCIDevice *pci_dev, int region_num,
 {
     PCIIORegion *r;
     uint32_t addr;
+    uint32_t wmask;
 
     if ((unsigned int)region_num >= PCI_NUM_REGIONS)
         return;
@@ -349,12 +362,17 @@ void pci_register_io_region(PCIDevice *pci_dev, int region_num,
     r->size = size;
     r->type = type;
     r->map_func = map_func;
+
+    wmask = ~(size - 1);
     if (region_num == PCI_ROM_SLOT) {
         addr = 0x30;
+        /* ROM enable bit is writeable */
+        wmask |= 1;
     } else {
         addr = 0x10 + region_num * 4;
     }
     *(uint32_t *)(pci_dev->config + addr) = cpu_to_le32(type);
+    *(uint32_t *)(pci_dev->wmask + addr) = cpu_to_le32(wmask);
 }
 
 static void pci_update_mappings(PCIDevice *d)
@@ -463,118 +481,21 @@ uint32_t pci_default_read_config(PCIDevice *d,
     return val;
 }
 
-void pci_default_write_config(PCIDevice *d,
-                              uint32_t address, uint32_t val, int len)
+void pci_default_write_config(PCIDevice *d, uint32_t addr, uint32_t val, int l)
 {
-    int can_write, i;
-    uint32_t end, addr;
-
-    if (len == 4 && ((address >= 0x10 && address < 0x10 + 4 * 6) ||
-                     (address >= 0x30 && address < 0x34))) {
-        PCIIORegion *r;
-        int reg;
+    uint8_t orig[PCI_CONFIG_SPACE_SIZE];
+    int i;
 
-        if ( address >= 0x30 ) {
-            reg = PCI_ROM_SLOT;
-        }else{
-            reg = (address - 0x10) >> 2;
-        }
-        r = &d->io_regions[reg];
-        if (r->size == 0)
-            goto default_config;
-        /* compute the stored value */
-        if (reg == PCI_ROM_SLOT) {
-            /* keep ROM enable bit */
-            val &= (~(r->size - 1)) | 1;
-        } else {
-            val &= ~(r->size - 1);
-            val |= r->type;
-        }
-        *(uint32_t *)(d->config + address) = cpu_to_le32(val);
-        pci_update_mappings(d);
-        return;
-    }
- default_config:
     /* not efficient, but simple */
-    addr = address;
-    for(i = 0; i < len; i++) {
-        /* default read/write accesses */
-        switch(d->config[0x0e]) {
-        case 0x00:
-        case 0x80:
-            switch(addr) {
-            case 0x00:
-            case 0x01:
-            case 0x02:
-            case 0x03:
-            case 0x06:
-            case 0x07:
-            case 0x08:
-            case 0x09:
-            case 0x0a:
-            case 0x0b:
-            case 0x0e:
-            case 0x10 ... 0x27: /* base */
-            case 0x2c ... 0x2f: /* read-only subsystem ID & vendor ID */
-            case 0x30 ... 0x33: /* rom */
-            case 0x3d:
-                can_write = 0;
-                break;
-            default:
-                can_write = 1;
-                break;
-            }
-            break;
-        default:
-        case 0x01:
-            switch(addr) {
-            case 0x00:
-            case 0x01:
-            case 0x02:
-            case 0x03:
-            case 0x06:
-            case 0x07:
-            case 0x08:
-            case 0x09:
-            case 0x0a:
-            case 0x0b:
-            case 0x0e:
-            case 0x2c ... 0x2f: /* read-only subsystem ID & vendor ID */
-            case 0x38 ... 0x3b: /* rom */
-            case 0x3d:
-                can_write = 0;
-                break;
-            default:
-                can_write = 1;
-                break;
-            }
-            break;
-        }
-        if (can_write) {
-            /* Mask out writes to reserved bits in registers */
-            switch (addr) {
-	    case 0x05:
-                val &= ~PCI_COMMAND_RESERVED_MASK_HI;
-                break;
-            case 0x06:
-                val &= ~PCI_STATUS_RESERVED_MASK_LO;
-                break;
-            case 0x07:
-                val &= ~PCI_STATUS_RESERVED_MASK_HI;
-                break;
-            }
-            d->config[addr] = val;
-        }
-        if (++addr > 0xff)
-        	break;
-        val >>= 8;
+    memcpy(orig, d->config, PCI_CONFIG_SPACE_SIZE);
+    for(i = 0; i < l && addr < PCI_CONFIG_SPACE_SIZE; val >>= 8, ++i, ++addr) {
+        uint8_t wmask = d->wmask[addr];
+        d->config[addr] = (d->config[addr] & ~wmask) | (val & wmask);
     }
-
-    end = address + len;
-    if (end > PCI_COMMAND && address < (PCI_COMMAND + 2)) {
-        /* if the command register is modified, we must modify the mappings */
+    if (memcmp(orig + PCI_BASE_ADDRESS_0, d->config + PCI_BASE_ADDRESS_0, 24)
+        || ((orig[PCI_COMMAND] ^ d->config[PCI_COMMAND])
+            & (PCI_COMMAND_MEMORY | PCI_COMMAND_IO)))
         pci_update_mappings(d);
-    }
 }
 
 void pci_data_write(void *opaque, uint32_t addr, uint32_t val, int len)
@@ -847,16 +768,8 @@ static void pci_bridge_write_config(PCIDevice *d,
 {
     PCIBridge *s = (PCIBridge *)d;
 
-    if (address == 0x19 || (address == 0x18 && len > 1)) {
-        if (address == 0x19)
-            s->bus->bus_num = val & 0xff;
-        else
-            s->bus->bus_num = (val >> 8) & 0xff;
-#if defined(DEBUG_PCI)
-        printf ("pci-bridge: %s: Assigned bus %d\n", d->name, s->bus->bus_num);
-#endif
-    }
     pci_default_write_config(d, address, val, len);
+    s->bus->bus_num = d->config[PCI_SECONDARY_BUS];
 }
 
 PCIBus *pci_find_bus(int bus_num)
diff --git a/hw/pci.h b/hw/pci.h
index 0405837..d0db402 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -100,16 +100,24 @@ typedef struct PCIIORegion {
 #define PCI_COMMAND		0x04	/* 16 bits */
 #define  PCI_COMMAND_IO		0x1	/* Enable response in I/O space */
 #define  PCI_COMMAND_MEMORY	0x2	/* Enable response in Memory space */
+#define  PCI_COMMAND_MASTER	0x4	/* Enable bus master */
 #define PCI_STATUS              0x06    /* 16 bits */
 #define PCI_REVISION_ID         0x08    /* 8 bits  */
 #define PCI_CLASS_DEVICE        0x0a    /* Device class */
+#define PCI_CACHE_LINE_SIZE	0x0c	/* 8 bits */
+#define PCI_LATENCY_TIMER	0x0d	/* 8 bits */
 #define PCI_HEADER_TYPE         0x0e    /* 8 bits */
 #define  PCI_HEADER_TYPE_NORMAL		0
 #define  PCI_HEADER_TYPE_BRIDGE		1
 #define  PCI_HEADER_TYPE_CARDBUS	2
 #define  PCI_HEADER_TYPE_MULTI_FUNCTION 0x80
+#define PCI_BASE_ADDRESS_0	0x10	/* 32 bits */
+#define PCI_PRIMARY_BUS		0x18	/* Primary bus number */
+#define PCI_SECONDARY_BUS	0x19	/* Secondary bus number */
+#define PCI_SEC_STATUS		0x1e	/* Secondary status register, only bit 14 used */
 #define PCI_SUBSYSTEM_VENDOR_ID 0x2c    /* 16 bits */
 #define PCI_SUBSYSTEM_ID        0x2e    /* 16 bits */
+#define PCI_CAPABILITY_LIST	0x34	/* Offset of first capability list entry */
 #define PCI_INTERRUPT_LINE	0x3c	/* 8 bits */
 #define PCI_INTERRUPT_PIN	0x3d	/* 8 bits */
 #define PCI_MIN_GNT		0x3e	/* 8 bits */
@@ -139,10 +147,18 @@ typedef struct PCIIORegion {
 
 #define PCI_COMMAND_RESERVED_MASK_HI (PCI_COMMAND_RESERVED >> 8)
 
+/* Size of the standard PCI config header */
+#define PCI_CONFIG_HEADER_SIZE 0x40
+/* Size of the standard PCI config space */
+#define PCI_CONFIG_SPACE_SIZE 0x100
+
 struct PCIDevice {
     DeviceState qdev;
     /* PCI config space */
-    uint8_t config[256];
+    uint8_t config[PCI_CONFIG_SPACE_SIZE];
+
+    /* Used to implement R/W bytes */
+    uint8_t wmask[PCI_CONFIG_SPACE_SIZE];
 
     /* the following fields are read only */
     PCIBus *bus;
-- 
1.6.3.1.56.g79e1.dirty

^ permalink raw reply related	[flat|nested] 457+ messages in thread

* [PATCHv3 02/13] qemu: capability bits in pci save/restore
       [not found] <cover.1244192535.git.mst@redhat.com>
@ 2009-06-05 10:23   ` Michael S. Tsirkin
  2009-06-05 10:22   ` [Qemu-devel] " Michael S. Tsirkin
                     ` (24 subsequent siblings)
  25 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-05 10:23 UTC (permalink / raw)
  To: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm, Rusty Russell, vi

Add support for capability bits in save/restore for pci.
These will be used for MSI, where the capability might
be present or not as requested by user, which does not
map well into a single version number.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/pci.c |   14 ++++++++++++--
 hw/pci.h |    4 ++++
 2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/hw/pci.c b/hw/pci.c
index 235191d..361d741 100644
--- a/hw/pci.c
+++ b/hw/pci.c
@@ -127,12 +127,15 @@ int pci_bus_num(PCIBus *s)
 
 void pci_device_save(PCIDevice *s, QEMUFile *f)
 {
+    int version = s->cap_present ? 3 : 2;
     int i;
 
-    qemu_put_be32(f, 2); /* PCI device version */
+    qemu_put_be32(f, version); /* PCI device version */
     qemu_put_buffer(f, s->config, 256);
     for (i = 0; i < 4; i++)
         qemu_put_be32(f, s->irq_state[i]);
+    if (version >= 3)
+        qemu_put_be32(f, s->cap_present);
 }
 
 int pci_device_load(PCIDevice *s, QEMUFile *f)
@@ -141,7 +144,7 @@ int pci_device_load(PCIDevice *s, QEMUFile *f)
     int i;
 
     version_id = qemu_get_be32(f);
-    if (version_id > 2)
+    if (version_id > 3)
         return -EINVAL;
     qemu_get_buffer(f, s->config, 256);
     pci_update_mappings(s);
@@ -149,6 +152,13 @@ int pci_device_load(PCIDevice *s, QEMUFile *f)
     if (version_id >= 2)
         for (i = 0; i < 4; i ++)
             s->irq_state[i] = qemu_get_be32(f);
+    if (version_id >= 3)
+        s->cap_present = qemu_get_be32(f);
+    else
+        s->cap_present = 0;
+
+    if (s->cap_present & ~s->cap_supported)
+        return -EINVAL;
 
     return 0;
 }
diff --git a/hw/pci.h b/hw/pci.h
index d0db402..6f0803f 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -178,6 +178,10 @@ struct PCIDevice {
 
     /* Current IRQ levels.  Used internally by the generic PCI code.  */
     int irq_state[4];
+
+    /* Capability bits for save/load */
+    uint32_t cap_supported;
+    uint32_t cap_present;
 };
 
 PCIDevice *pci_register_device(PCIBus *bus, const char *name,
-- 
1.6.3.1.56.g79e1.dirty


^ permalink raw reply related	[flat|nested] 457+ messages in thread

* [Qemu-devel] [PATCHv3 02/13] qemu: capability bits in pci save/restore
@ 2009-06-05 10:23   ` Michael S. Tsirkin
  0 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-05 10:23 UTC (permalink / raw)
  To: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm,
	Rusty Russell, virtualization, Christian Borntraeger, Blue Swirl,
	Anthony Liguori

Add support for capability bits in save/restore for pci.
These will be used for MSI, where the capability might
be present or not as requested by user, which does not
map well into a single version number.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/pci.c |   14 ++++++++++++--
 hw/pci.h |    4 ++++
 2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/hw/pci.c b/hw/pci.c
index 235191d..361d741 100644
--- a/hw/pci.c
+++ b/hw/pci.c
@@ -127,12 +127,15 @@ int pci_bus_num(PCIBus *s)
 
 void pci_device_save(PCIDevice *s, QEMUFile *f)
 {
+    int version = s->cap_present ? 3 : 2;
     int i;
 
-    qemu_put_be32(f, 2); /* PCI device version */
+    qemu_put_be32(f, version); /* PCI device version */
     qemu_put_buffer(f, s->config, 256);
     for (i = 0; i < 4; i++)
         qemu_put_be32(f, s->irq_state[i]);
+    if (version >= 3)
+        qemu_put_be32(f, s->cap_present);
 }
 
 int pci_device_load(PCIDevice *s, QEMUFile *f)
@@ -141,7 +144,7 @@ int pci_device_load(PCIDevice *s, QEMUFile *f)
     int i;
 
     version_id = qemu_get_be32(f);
-    if (version_id > 2)
+    if (version_id > 3)
         return -EINVAL;
     qemu_get_buffer(f, s->config, 256);
     pci_update_mappings(s);
@@ -149,6 +152,13 @@ int pci_device_load(PCIDevice *s, QEMUFile *f)
     if (version_id >= 2)
         for (i = 0; i < 4; i ++)
             s->irq_state[i] = qemu_get_be32(f);
+    if (version_id >= 3)
+        s->cap_present = qemu_get_be32(f);
+    else
+        s->cap_present = 0;
+
+    if (s->cap_present & ~s->cap_supported)
+        return -EINVAL;
 
     return 0;
 }
diff --git a/hw/pci.h b/hw/pci.h
index d0db402..6f0803f 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -178,6 +178,10 @@ struct PCIDevice {
 
     /* Current IRQ levels.  Used internally by the generic PCI code.  */
     int irq_state[4];
+
+    /* Capability bits for save/load */
+    uint32_t cap_supported;
+    uint32_t cap_present;
 };
 
 PCIDevice *pci_register_device(PCIBus *bus, const char *name,
-- 
1.6.3.1.56.g79e1.dirty

^ permalink raw reply related	[flat|nested] 457+ messages in thread

* [PATCHv3 02/13] qemu: capability bits in pci save/restore
       [not found] <cover.1244192535.git.mst@redhat.com>
  2009-06-05 10:22 ` [PATCHv3 01/13] qemu: make default_write_config use mask table Michael S. Tsirkin
  2009-06-05 10:22   ` [Qemu-devel] " Michael S. Tsirkin
@ 2009-06-05 10:23 ` Michael S. Tsirkin
  2009-06-05 10:23   ` [Qemu-devel] " Michael S. Tsirkin
                   ` (22 subsequent siblings)
  25 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-05 10:23 UTC (permalink / raw)
  To: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm, Rusty Russell

Add support for capability bits in save/restore for pci.
These will be used for MSI, where the capability might
be present or not as requested by user, which does not
map well into a single version number.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/pci.c |   14 ++++++++++++--
 hw/pci.h |    4 ++++
 2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/hw/pci.c b/hw/pci.c
index 235191d..361d741 100644
--- a/hw/pci.c
+++ b/hw/pci.c
@@ -127,12 +127,15 @@ int pci_bus_num(PCIBus *s)
 
 void pci_device_save(PCIDevice *s, QEMUFile *f)
 {
+    int version = s->cap_present ? 3 : 2;
     int i;
 
-    qemu_put_be32(f, 2); /* PCI device version */
+    qemu_put_be32(f, version); /* PCI device version */
     qemu_put_buffer(f, s->config, 256);
     for (i = 0; i < 4; i++)
         qemu_put_be32(f, s->irq_state[i]);
+    if (version >= 3)
+        qemu_put_be32(f, s->cap_present);
 }
 
 int pci_device_load(PCIDevice *s, QEMUFile *f)
@@ -141,7 +144,7 @@ int pci_device_load(PCIDevice *s, QEMUFile *f)
     int i;
 
     version_id = qemu_get_be32(f);
-    if (version_id > 2)
+    if (version_id > 3)
         return -EINVAL;
     qemu_get_buffer(f, s->config, 256);
     pci_update_mappings(s);
@@ -149,6 +152,13 @@ int pci_device_load(PCIDevice *s, QEMUFile *f)
     if (version_id >= 2)
         for (i = 0; i < 4; i ++)
             s->irq_state[i] = qemu_get_be32(f);
+    if (version_id >= 3)
+        s->cap_present = qemu_get_be32(f);
+    else
+        s->cap_present = 0;
+
+    if (s->cap_present & ~s->cap_supported)
+        return -EINVAL;
 
     return 0;
 }
diff --git a/hw/pci.h b/hw/pci.h
index d0db402..6f0803f 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -178,6 +178,10 @@ struct PCIDevice {
 
     /* Current IRQ levels.  Used internally by the generic PCI code.  */
     int irq_state[4];
+
+    /* Capability bits for save/load */
+    uint32_t cap_supported;
+    uint32_t cap_present;
 };
 
 PCIDevice *pci_register_device(PCIBus *bus, const char *name,
-- 
1.6.3.1.56.g79e1.dirty

^ permalink raw reply related	[flat|nested] 457+ messages in thread

* [PATCHv3 03/13] qemu: add routines to manage PCI capabilities
       [not found] <cover.1244192535.git.mst@redhat.com>
@ 2009-06-05 10:23   ` Michael S. Tsirkin
  2009-06-05 10:22   ` [Qemu-devel] " Michael S. Tsirkin
                     ` (24 subsequent siblings)
  25 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-05 10:23 UTC (permalink / raw)
  To: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm, Rusty Russell, vi

Add routines to manage PCI capability list. First user will be MSI-X.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/pci.c |   98 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++------
 hw/pci.h |   18 +++++++++++-
 2 files changed, 106 insertions(+), 10 deletions(-)

diff --git a/hw/pci.c b/hw/pci.c
index 361d741..ed011b5 100644
--- a/hw/pci.c
+++ b/hw/pci.c
@@ -130,12 +130,13 @@ void pci_device_save(PCIDevice *s, QEMUFile *f)
     int version = s->cap_present ? 3 : 2;
     int i;
 
-    qemu_put_be32(f, version); /* PCI device version */
+    /* PCI device version and capabilities */
+    qemu_put_be32(f, version);
+    if (version >= 3)
+        qemu_put_be32(f, s->cap_present);
     qemu_put_buffer(f, s->config, 256);
     for (i = 0; i < 4; i++)
         qemu_put_be32(f, s->irq_state[i]);
-    if (version >= 3)
-        qemu_put_be32(f, s->cap_present);
 }
 
 int pci_device_load(PCIDevice *s, QEMUFile *f)
@@ -146,12 +147,6 @@ int pci_device_load(PCIDevice *s, QEMUFile *f)
     version_id = qemu_get_be32(f);
     if (version_id > 3)
         return -EINVAL;
-    qemu_get_buffer(f, s->config, 256);
-    pci_update_mappings(s);
-
-    if (version_id >= 2)
-        for (i = 0; i < 4; i ++)
-            s->irq_state[i] = qemu_get_be32(f);
     if (version_id >= 3)
         s->cap_present = qemu_get_be32(f);
     else
@@ -160,6 +155,18 @@ int pci_device_load(PCIDevice *s, QEMUFile *f)
     if (s->cap_present & ~s->cap_supported)
         return -EINVAL;
 
+    qemu_get_buffer(f, s->config, 256);
+    pci_update_mappings(s);
+
+    if (version_id >= 2)
+        for (i = 0; i < 4; i ++)
+            s->irq_state[i] = qemu_get_be32(f);
+    /* Clear wmask and used bits for capabilities.
+       Must be restored separately, since capabilities can
+       be placed anywhere in config space. */
+    memset(s->used, 0, PCI_CONFIG_SPACE_SIZE);
+    for (i = PCI_CONFIG_HEADER_SIZE; i < PCI_CONFIG_SPACE_SIZE; ++i)
+        s->wmask[i] = 0xff;
     return 0;
 }
 
@@ -870,3 +877,76 @@ PCIDevice *pci_create_simple(PCIBus *bus, int devfn, const char *name)
 
     return (PCIDevice *)dev;
 }
+
+static int pci_find_space(PCIDevice *pdev, uint8_t size)
+{
+    int offset = PCI_CONFIG_HEADER_SIZE;
+    int i;
+    for (i = PCI_CONFIG_HEADER_SIZE; i < PCI_CONFIG_SPACE_SIZE; ++i)
+        if (pdev->used[i])
+            offset = i + 1;
+        else if (i - offset + 1 == size)
+            return offset;
+    return 0;
+}
+
+static uint8_t pci_find_capability_list(PCIDevice *pdev, uint8_t cap_id,
+                                        uint8_t *prev_p)
+{
+    uint8_t next, prev;
+
+    if (!(pdev->config[PCI_STATUS] & PCI_STATUS_CAP_LIST))
+        return 0;
+
+    for (prev = PCI_CAPABILITY_LIST; (next = pdev->config[prev]);
+         prev = next + PCI_CAP_LIST_NEXT)
+        if (pdev->config[next + PCI_CAP_LIST_ID] == cap_id)
+            break;
+
+    *prev_p = prev;
+    return next;
+}
+
+/* Reserve space and add capability to the linked list in pci config space */
+int pci_add_capability(PCIDevice *pdev, uint8_t cap_id, uint8_t size)
+{
+    uint8_t offset = pci_find_space(pdev, size);
+    uint8_t *config = pdev->config + offset;
+    if (!offset)
+        return -ENOSPC;
+    config[PCI_CAP_LIST_ID] = cap_id;
+    config[PCI_CAP_LIST_NEXT] = pdev->config[PCI_CAPABILITY_LIST];
+    pdev->config[PCI_CAPABILITY_LIST] = offset;
+    pdev->config[PCI_STATUS] |= PCI_STATUS_CAP_LIST;
+    memset(pdev->used + offset, 0xFF, size);
+    /* Make capability read-only by default */
+    memset(pdev->wmask + offset, 0, size);
+    return offset;
+}
+
+/* Unlink capability from the pci config space. */
+void pci_del_capability(PCIDevice *pdev, uint8_t cap_id, uint8_t size)
+{
+    uint8_t prev, offset = pci_find_capability_list(pdev, cap_id, &prev);
+    if (!offset)
+        return;
+    pdev->config[prev] = pdev->config[offset + PCI_CAP_LIST_NEXT];
+    /* Make capability writeable again */
+    memset(pdev->wmask + offset, 0xff, size);
+    memset(pdev->used + offset, 0, size);
+
+    if (!pdev->config[PCI_CAPABILITY_LIST])
+        pdev->config[PCI_STATUS] &= ~PCI_STATUS_CAP_LIST;
+}
+
+/* Reserve space for capability at a known offset (to call after load). */
+void pci_reserve_capability(PCIDevice *pdev, uint8_t offset, uint8_t size)
+{
+    memset(pdev->used + offset, 0xff, size);
+}
+
+uint8_t pci_find_capability(PCIDevice *pdev, uint8_t cap_id)
+{
+    uint8_t prev;
+    return pci_find_capability_list(pdev, cap_id, &prev);
+}
diff --git a/hw/pci.h b/hw/pci.h
index 6f0803f..4838c59 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -123,6 +123,10 @@ typedef struct PCIIORegion {
 #define PCI_MIN_GNT		0x3e	/* 8 bits */
 #define PCI_MAX_LAT		0x3f	/* 8 bits */
 
+/* Capability lists */
+#define PCI_CAP_LIST_ID		0	/* Capability ID */
+#define PCI_CAP_LIST_NEXT	1	/* Next capability in the list */
+
 #define PCI_REVISION            0x08    /* obsolete, use PCI_REVISION_ID */
 #define PCI_SUBVENDOR_ID        0x2c    /* obsolete, use PCI_SUBSYSTEM_VENDOR_ID */
 #define PCI_SUBDEVICE_ID        0x2e    /* obsolete, use PCI_SUBSYSTEM_ID */
@@ -130,7 +134,7 @@ typedef struct PCIIORegion {
 /* Bits in the PCI Status Register (PCI 2.3 spec) */
 #define PCI_STATUS_RESERVED1	0x007
 #define PCI_STATUS_INT_STATUS	0x008
-#define PCI_STATUS_CAPABILITIES	0x010
+#define PCI_STATUS_CAP_LIST	0x010
 #define PCI_STATUS_66MHZ	0x020
 #define PCI_STATUS_RESERVED2	0x040
 #define PCI_STATUS_FAST_BACK	0x080
@@ -160,6 +164,9 @@ struct PCIDevice {
     /* Used to implement R/W bytes */
     uint8_t wmask[PCI_CONFIG_SPACE_SIZE];
 
+    /* Used to allocate config space for capabilities. */
+    uint8_t used[PCI_CONFIG_SPACE_SIZE];
+
     /* the following fields are read only */
     PCIBus *bus;
     int devfn;
@@ -194,6 +201,15 @@ void pci_register_io_region(PCIDevice *pci_dev, int region_num,
                             uint32_t size, int type,
                             PCIMapIORegionFunc *map_func);
 
+int pci_add_capability(PCIDevice *pci_dev, uint8_t cap_id, uint8_t cap_size);
+
+void pci_del_capability(PCIDevice *pci_dev, uint8_t cap_id, uint8_t cap_size);
+
+void pci_reserve_capability(PCIDevice *pci_dev, uint8_t offset, uint8_t size);
+
+uint8_t pci_find_capability(PCIDevice *pci_dev, uint8_t cap_id);
+
+
 uint32_t pci_default_read_config(PCIDevice *d,
                                  uint32_t address, int len);
 void pci_default_write_config(PCIDevice *d,
-- 
1.6.3.1.56.g79e1.dirty


^ permalink raw reply related	[flat|nested] 457+ messages in thread

* [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities
@ 2009-06-05 10:23   ` Michael S. Tsirkin
  0 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-05 10:23 UTC (permalink / raw)
  To: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm,
	Rusty Russell, virtualization, Christian Borntraeger, Blue Swirl,
	Anthony Liguori

Add routines to manage PCI capability list. First user will be MSI-X.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/pci.c |   98 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++------
 hw/pci.h |   18 +++++++++++-
 2 files changed, 106 insertions(+), 10 deletions(-)

diff --git a/hw/pci.c b/hw/pci.c
index 361d741..ed011b5 100644
--- a/hw/pci.c
+++ b/hw/pci.c
@@ -130,12 +130,13 @@ void pci_device_save(PCIDevice *s, QEMUFile *f)
     int version = s->cap_present ? 3 : 2;
     int i;
 
-    qemu_put_be32(f, version); /* PCI device version */
+    /* PCI device version and capabilities */
+    qemu_put_be32(f, version);
+    if (version >= 3)
+        qemu_put_be32(f, s->cap_present);
     qemu_put_buffer(f, s->config, 256);
     for (i = 0; i < 4; i++)
         qemu_put_be32(f, s->irq_state[i]);
-    if (version >= 3)
-        qemu_put_be32(f, s->cap_present);
 }
 
 int pci_device_load(PCIDevice *s, QEMUFile *f)
@@ -146,12 +147,6 @@ int pci_device_load(PCIDevice *s, QEMUFile *f)
     version_id = qemu_get_be32(f);
     if (version_id > 3)
         return -EINVAL;
-    qemu_get_buffer(f, s->config, 256);
-    pci_update_mappings(s);
-
-    if (version_id >= 2)
-        for (i = 0; i < 4; i ++)
-            s->irq_state[i] = qemu_get_be32(f);
     if (version_id >= 3)
         s->cap_present = qemu_get_be32(f);
     else
@@ -160,6 +155,18 @@ int pci_device_load(PCIDevice *s, QEMUFile *f)
     if (s->cap_present & ~s->cap_supported)
         return -EINVAL;
 
+    qemu_get_buffer(f, s->config, 256);
+    pci_update_mappings(s);
+
+    if (version_id >= 2)
+        for (i = 0; i < 4; i ++)
+            s->irq_state[i] = qemu_get_be32(f);
+    /* Clear wmask and used bits for capabilities.
+       Must be restored separately, since capabilities can
+       be placed anywhere in config space. */
+    memset(s->used, 0, PCI_CONFIG_SPACE_SIZE);
+    for (i = PCI_CONFIG_HEADER_SIZE; i < PCI_CONFIG_SPACE_SIZE; ++i)
+        s->wmask[i] = 0xff;
     return 0;
 }
 
@@ -870,3 +877,76 @@ PCIDevice *pci_create_simple(PCIBus *bus, int devfn, const char *name)
 
     return (PCIDevice *)dev;
 }
+
+static int pci_find_space(PCIDevice *pdev, uint8_t size)
+{
+    int offset = PCI_CONFIG_HEADER_SIZE;
+    int i;
+    for (i = PCI_CONFIG_HEADER_SIZE; i < PCI_CONFIG_SPACE_SIZE; ++i)
+        if (pdev->used[i])
+            offset = i + 1;
+        else if (i - offset + 1 == size)
+            return offset;
+    return 0;
+}
+
+static uint8_t pci_find_capability_list(PCIDevice *pdev, uint8_t cap_id,
+                                        uint8_t *prev_p)
+{
+    uint8_t next, prev;
+
+    if (!(pdev->config[PCI_STATUS] & PCI_STATUS_CAP_LIST))
+        return 0;
+
+    for (prev = PCI_CAPABILITY_LIST; (next = pdev->config[prev]);
+         prev = next + PCI_CAP_LIST_NEXT)
+        if (pdev->config[next + PCI_CAP_LIST_ID] == cap_id)
+            break;
+
+    *prev_p = prev;
+    return next;
+}
+
+/* Reserve space and add capability to the linked list in pci config space */
+int pci_add_capability(PCIDevice *pdev, uint8_t cap_id, uint8_t size)
+{
+    uint8_t offset = pci_find_space(pdev, size);
+    uint8_t *config = pdev->config + offset;
+    if (!offset)
+        return -ENOSPC;
+    config[PCI_CAP_LIST_ID] = cap_id;
+    config[PCI_CAP_LIST_NEXT] = pdev->config[PCI_CAPABILITY_LIST];
+    pdev->config[PCI_CAPABILITY_LIST] = offset;
+    pdev->config[PCI_STATUS] |= PCI_STATUS_CAP_LIST;
+    memset(pdev->used + offset, 0xFF, size);
+    /* Make capability read-only by default */
+    memset(pdev->wmask + offset, 0, size);
+    return offset;
+}
+
+/* Unlink capability from the pci config space. */
+void pci_del_capability(PCIDevice *pdev, uint8_t cap_id, uint8_t size)
+{
+    uint8_t prev, offset = pci_find_capability_list(pdev, cap_id, &prev);
+    if (!offset)
+        return;
+    pdev->config[prev] = pdev->config[offset + PCI_CAP_LIST_NEXT];
+    /* Make capability writeable again */
+    memset(pdev->wmask + offset, 0xff, size);
+    memset(pdev->used + offset, 0, size);
+
+    if (!pdev->config[PCI_CAPABILITY_LIST])
+        pdev->config[PCI_STATUS] &= ~PCI_STATUS_CAP_LIST;
+}
+
+/* Reserve space for capability at a known offset (to call after load). */
+void pci_reserve_capability(PCIDevice *pdev, uint8_t offset, uint8_t size)
+{
+    memset(pdev->used + offset, 0xff, size);
+}
+
+uint8_t pci_find_capability(PCIDevice *pdev, uint8_t cap_id)
+{
+    uint8_t prev;
+    return pci_find_capability_list(pdev, cap_id, &prev);
+}
diff --git a/hw/pci.h b/hw/pci.h
index 6f0803f..4838c59 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -123,6 +123,10 @@ typedef struct PCIIORegion {
 #define PCI_MIN_GNT		0x3e	/* 8 bits */
 #define PCI_MAX_LAT		0x3f	/* 8 bits */
 
+/* Capability lists */
+#define PCI_CAP_LIST_ID		0	/* Capability ID */
+#define PCI_CAP_LIST_NEXT	1	/* Next capability in the list */
+
 #define PCI_REVISION            0x08    /* obsolete, use PCI_REVISION_ID */
 #define PCI_SUBVENDOR_ID        0x2c    /* obsolete, use PCI_SUBSYSTEM_VENDOR_ID */
 #define PCI_SUBDEVICE_ID        0x2e    /* obsolete, use PCI_SUBSYSTEM_ID */
@@ -130,7 +134,7 @@ typedef struct PCIIORegion {
 /* Bits in the PCI Status Register (PCI 2.3 spec) */
 #define PCI_STATUS_RESERVED1	0x007
 #define PCI_STATUS_INT_STATUS	0x008
-#define PCI_STATUS_CAPABILITIES	0x010
+#define PCI_STATUS_CAP_LIST	0x010
 #define PCI_STATUS_66MHZ	0x020
 #define PCI_STATUS_RESERVED2	0x040
 #define PCI_STATUS_FAST_BACK	0x080
@@ -160,6 +164,9 @@ struct PCIDevice {
     /* Used to implement R/W bytes */
     uint8_t wmask[PCI_CONFIG_SPACE_SIZE];
 
+    /* Used to allocate config space for capabilities. */
+    uint8_t used[PCI_CONFIG_SPACE_SIZE];
+
     /* the following fields are read only */
     PCIBus *bus;
     int devfn;
@@ -194,6 +201,15 @@ void pci_register_io_region(PCIDevice *pci_dev, int region_num,
                             uint32_t size, int type,
                             PCIMapIORegionFunc *map_func);
 
+int pci_add_capability(PCIDevice *pci_dev, uint8_t cap_id, uint8_t cap_size);
+
+void pci_del_capability(PCIDevice *pci_dev, uint8_t cap_id, uint8_t cap_size);
+
+void pci_reserve_capability(PCIDevice *pci_dev, uint8_t offset, uint8_t size);
+
+uint8_t pci_find_capability(PCIDevice *pci_dev, uint8_t cap_id);
+
+
 uint32_t pci_default_read_config(PCIDevice *d,
                                  uint32_t address, int len);
 void pci_default_write_config(PCIDevice *d,
-- 
1.6.3.1.56.g79e1.dirty

^ permalink raw reply related	[flat|nested] 457+ messages in thread

* [PATCHv3 03/13] qemu: add routines to manage PCI capabilities
       [not found] <cover.1244192535.git.mst@redhat.com>
                   ` (4 preceding siblings ...)
  2009-06-05 10:23   ` [Qemu-devel] " Michael S. Tsirkin
@ 2009-06-05 10:23 ` Michael S. Tsirkin
  2009-06-05 10:23   ` [Qemu-devel] " Michael S. Tsirkin
                   ` (19 subsequent siblings)
  25 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-05 10:23 UTC (permalink / raw)
  To: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm, Rusty Russell

Add routines to manage PCI capability list. First user will be MSI-X.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/pci.c |   98 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++------
 hw/pci.h |   18 +++++++++++-
 2 files changed, 106 insertions(+), 10 deletions(-)

diff --git a/hw/pci.c b/hw/pci.c
index 361d741..ed011b5 100644
--- a/hw/pci.c
+++ b/hw/pci.c
@@ -130,12 +130,13 @@ void pci_device_save(PCIDevice *s, QEMUFile *f)
     int version = s->cap_present ? 3 : 2;
     int i;
 
-    qemu_put_be32(f, version); /* PCI device version */
+    /* PCI device version and capabilities */
+    qemu_put_be32(f, version);
+    if (version >= 3)
+        qemu_put_be32(f, s->cap_present);
     qemu_put_buffer(f, s->config, 256);
     for (i = 0; i < 4; i++)
         qemu_put_be32(f, s->irq_state[i]);
-    if (version >= 3)
-        qemu_put_be32(f, s->cap_present);
 }
 
 int pci_device_load(PCIDevice *s, QEMUFile *f)
@@ -146,12 +147,6 @@ int pci_device_load(PCIDevice *s, QEMUFile *f)
     version_id = qemu_get_be32(f);
     if (version_id > 3)
         return -EINVAL;
-    qemu_get_buffer(f, s->config, 256);
-    pci_update_mappings(s);
-
-    if (version_id >= 2)
-        for (i = 0; i < 4; i ++)
-            s->irq_state[i] = qemu_get_be32(f);
     if (version_id >= 3)
         s->cap_present = qemu_get_be32(f);
     else
@@ -160,6 +155,18 @@ int pci_device_load(PCIDevice *s, QEMUFile *f)
     if (s->cap_present & ~s->cap_supported)
         return -EINVAL;
 
+    qemu_get_buffer(f, s->config, 256);
+    pci_update_mappings(s);
+
+    if (version_id >= 2)
+        for (i = 0; i < 4; i ++)
+            s->irq_state[i] = qemu_get_be32(f);
+    /* Clear wmask and used bits for capabilities.
+       Must be restored separately, since capabilities can
+       be placed anywhere in config space. */
+    memset(s->used, 0, PCI_CONFIG_SPACE_SIZE);
+    for (i = PCI_CONFIG_HEADER_SIZE; i < PCI_CONFIG_SPACE_SIZE; ++i)
+        s->wmask[i] = 0xff;
     return 0;
 }
 
@@ -870,3 +877,76 @@ PCIDevice *pci_create_simple(PCIBus *bus, int devfn, const char *name)
 
     return (PCIDevice *)dev;
 }
+
+static int pci_find_space(PCIDevice *pdev, uint8_t size)
+{
+    int offset = PCI_CONFIG_HEADER_SIZE;
+    int i;
+    for (i = PCI_CONFIG_HEADER_SIZE; i < PCI_CONFIG_SPACE_SIZE; ++i)
+        if (pdev->used[i])
+            offset = i + 1;
+        else if (i - offset + 1 == size)
+            return offset;
+    return 0;
+}
+
+static uint8_t pci_find_capability_list(PCIDevice *pdev, uint8_t cap_id,
+                                        uint8_t *prev_p)
+{
+    uint8_t next, prev;
+
+    if (!(pdev->config[PCI_STATUS] & PCI_STATUS_CAP_LIST))
+        return 0;
+
+    for (prev = PCI_CAPABILITY_LIST; (next = pdev->config[prev]);
+         prev = next + PCI_CAP_LIST_NEXT)
+        if (pdev->config[next + PCI_CAP_LIST_ID] == cap_id)
+            break;
+
+    *prev_p = prev;
+    return next;
+}
+
+/* Reserve space and add capability to the linked list in pci config space */
+int pci_add_capability(PCIDevice *pdev, uint8_t cap_id, uint8_t size)
+{
+    uint8_t offset = pci_find_space(pdev, size);
+    uint8_t *config = pdev->config + offset;
+    if (!offset)
+        return -ENOSPC;
+    config[PCI_CAP_LIST_ID] = cap_id;
+    config[PCI_CAP_LIST_NEXT] = pdev->config[PCI_CAPABILITY_LIST];
+    pdev->config[PCI_CAPABILITY_LIST] = offset;
+    pdev->config[PCI_STATUS] |= PCI_STATUS_CAP_LIST;
+    memset(pdev->used + offset, 0xFF, size);
+    /* Make capability read-only by default */
+    memset(pdev->wmask + offset, 0, size);
+    return offset;
+}
+
+/* Unlink capability from the pci config space. */
+void pci_del_capability(PCIDevice *pdev, uint8_t cap_id, uint8_t size)
+{
+    uint8_t prev, offset = pci_find_capability_list(pdev, cap_id, &prev);
+    if (!offset)
+        return;
+    pdev->config[prev] = pdev->config[offset + PCI_CAP_LIST_NEXT];
+    /* Make capability writeable again */
+    memset(pdev->wmask + offset, 0xff, size);
+    memset(pdev->used + offset, 0, size);
+
+    if (!pdev->config[PCI_CAPABILITY_LIST])
+        pdev->config[PCI_STATUS] &= ~PCI_STATUS_CAP_LIST;
+}
+
+/* Reserve space for capability at a known offset (to call after load). */
+void pci_reserve_capability(PCIDevice *pdev, uint8_t offset, uint8_t size)
+{
+    memset(pdev->used + offset, 0xff, size);
+}
+
+uint8_t pci_find_capability(PCIDevice *pdev, uint8_t cap_id)
+{
+    uint8_t prev;
+    return pci_find_capability_list(pdev, cap_id, &prev);
+}
diff --git a/hw/pci.h b/hw/pci.h
index 6f0803f..4838c59 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -123,6 +123,10 @@ typedef struct PCIIORegion {
 #define PCI_MIN_GNT		0x3e	/* 8 bits */
 #define PCI_MAX_LAT		0x3f	/* 8 bits */
 
+/* Capability lists */
+#define PCI_CAP_LIST_ID		0	/* Capability ID */
+#define PCI_CAP_LIST_NEXT	1	/* Next capability in the list */
+
 #define PCI_REVISION            0x08    /* obsolete, use PCI_REVISION_ID */
 #define PCI_SUBVENDOR_ID        0x2c    /* obsolete, use PCI_SUBSYSTEM_VENDOR_ID */
 #define PCI_SUBDEVICE_ID        0x2e    /* obsolete, use PCI_SUBSYSTEM_ID */
@@ -130,7 +134,7 @@ typedef struct PCIIORegion {
 /* Bits in the PCI Status Register (PCI 2.3 spec) */
 #define PCI_STATUS_RESERVED1	0x007
 #define PCI_STATUS_INT_STATUS	0x008
-#define PCI_STATUS_CAPABILITIES	0x010
+#define PCI_STATUS_CAP_LIST	0x010
 #define PCI_STATUS_66MHZ	0x020
 #define PCI_STATUS_RESERVED2	0x040
 #define PCI_STATUS_FAST_BACK	0x080
@@ -160,6 +164,9 @@ struct PCIDevice {
     /* Used to implement R/W bytes */
     uint8_t wmask[PCI_CONFIG_SPACE_SIZE];
 
+    /* Used to allocate config space for capabilities. */
+    uint8_t used[PCI_CONFIG_SPACE_SIZE];
+
     /* the following fields are read only */
     PCIBus *bus;
     int devfn;
@@ -194,6 +201,15 @@ void pci_register_io_region(PCIDevice *pci_dev, int region_num,
                             uint32_t size, int type,
                             PCIMapIORegionFunc *map_func);
 
+int pci_add_capability(PCIDevice *pci_dev, uint8_t cap_id, uint8_t cap_size);
+
+void pci_del_capability(PCIDevice *pci_dev, uint8_t cap_id, uint8_t cap_size);
+
+void pci_reserve_capability(PCIDevice *pci_dev, uint8_t offset, uint8_t size);
+
+uint8_t pci_find_capability(PCIDevice *pci_dev, uint8_t cap_id);
+
+
 uint32_t pci_default_read_config(PCIDevice *d,
                                  uint32_t address, int len);
 void pci_default_write_config(PCIDevice *d,
-- 
1.6.3.1.56.g79e1.dirty

^ permalink raw reply related	[flat|nested] 457+ messages in thread

* [PATCHv3 04/13] qemu: helper routines for pci access.
       [not found] <cover.1244192535.git.mst@redhat.com>
@ 2009-06-05 10:23   ` Michael S. Tsirkin
  2009-06-05 10:22   ` [Qemu-devel] " Michael S. Tsirkin
                     ` (24 subsequent siblings)
  25 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-05 10:23 UTC (permalink / raw)
  To: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm, Rusty Russell, vi

Add inline routines for convenient access to pci devices
with correct (little) endianness. Will be used by MSI-X support.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/pci.h |   30 +++++++++++++++++++++++++++---
 1 files changed, 27 insertions(+), 3 deletions(-)

diff --git a/hw/pci.h b/hw/pci.h
index 4838c59..477aa64 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -240,21 +240,45 @@ PCIBus *pci_bridge_init(PCIBus *bus, int devfn, uint16_t vid, uint16_t did,
                         pci_map_irq_fn map_irq, const char *name);
 
 static inline void
+pci_set_word(uint8_t *config, uint16_t val)
+{
+    cpu_to_le16wu((uint16_t *)config, val);
+}
+
+static inline uint16_t
+pci_get_word(uint8_t *config)
+{
+    return le16_to_cpupu((uint16_t *)config);
+}
+
+static inline void
+pci_set_long(uint8_t *config, uint32_t val)
+{
+    cpu_to_le32wu((uint32_t *)config, val);
+}
+
+static inline uint32_t
+pci_get_long(uint8_t *config)
+{
+    return le32_to_cpupu((uint32_t *)config);
+}
+
+static inline void
 pci_config_set_vendor_id(uint8_t *pci_config, uint16_t val)
 {
-    cpu_to_le16wu((uint16_t *)&pci_config[PCI_VENDOR_ID], val);
+    pci_set_word(&pci_config[PCI_VENDOR_ID], val);
 }
 
 static inline void
 pci_config_set_device_id(uint8_t *pci_config, uint16_t val)
 {
-    cpu_to_le16wu((uint16_t *)&pci_config[PCI_DEVICE_ID], val);
+    pci_set_word(&pci_config[PCI_DEVICE_ID], val);
 }
 
 static inline void
 pci_config_set_class(uint8_t *pci_config, uint16_t val)
 {
-    cpu_to_le16wu((uint16_t *)&pci_config[PCI_CLASS_DEVICE], val);
+    pci_set_word(&pci_config[PCI_CLASS_DEVICE], val);
 }
 
 typedef void (*pci_qdev_initfn)(PCIDevice *dev);
-- 
1.6.3.1.56.g79e1.dirty


^ permalink raw reply related	[flat|nested] 457+ messages in thread

* [Qemu-devel] [PATCHv3 04/13] qemu: helper routines for pci access.
@ 2009-06-05 10:23   ` Michael S. Tsirkin
  0 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-05 10:23 UTC (permalink / raw)
  To: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm,
	Rusty Russell, virtualization, Christian Borntraeger, Blue Swirl,
	Anthony Liguori

Add inline routines for convenient access to pci devices
with correct (little) endianness. Will be used by MSI-X support.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/pci.h |   30 +++++++++++++++++++++++++++---
 1 files changed, 27 insertions(+), 3 deletions(-)

diff --git a/hw/pci.h b/hw/pci.h
index 4838c59..477aa64 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -240,21 +240,45 @@ PCIBus *pci_bridge_init(PCIBus *bus, int devfn, uint16_t vid, uint16_t did,
                         pci_map_irq_fn map_irq, const char *name);
 
 static inline void
+pci_set_word(uint8_t *config, uint16_t val)
+{
+    cpu_to_le16wu((uint16_t *)config, val);
+}
+
+static inline uint16_t
+pci_get_word(uint8_t *config)
+{
+    return le16_to_cpupu((uint16_t *)config);
+}
+
+static inline void
+pci_set_long(uint8_t *config, uint32_t val)
+{
+    cpu_to_le32wu((uint32_t *)config, val);
+}
+
+static inline uint32_t
+pci_get_long(uint8_t *config)
+{
+    return le32_to_cpupu((uint32_t *)config);
+}
+
+static inline void
 pci_config_set_vendor_id(uint8_t *pci_config, uint16_t val)
 {
-    cpu_to_le16wu((uint16_t *)&pci_config[PCI_VENDOR_ID], val);
+    pci_set_word(&pci_config[PCI_VENDOR_ID], val);
 }
 
 static inline void
 pci_config_set_device_id(uint8_t *pci_config, uint16_t val)
 {
-    cpu_to_le16wu((uint16_t *)&pci_config[PCI_DEVICE_ID], val);
+    pci_set_word(&pci_config[PCI_DEVICE_ID], val);
 }
 
 static inline void
 pci_config_set_class(uint8_t *pci_config, uint16_t val)
 {
-    cpu_to_le16wu((uint16_t *)&pci_config[PCI_CLASS_DEVICE], val);
+    pci_set_word(&pci_config[PCI_CLASS_DEVICE], val);
 }
 
 typedef void (*pci_qdev_initfn)(PCIDevice *dev);
-- 
1.6.3.1.56.g79e1.dirty

^ permalink raw reply related	[flat|nested] 457+ messages in thread

* [PATCHv3 04/13] qemu: helper routines for pci access.
       [not found] <cover.1244192535.git.mst@redhat.com>
                   ` (6 preceding siblings ...)
  2009-06-05 10:23   ` [Qemu-devel] " Michael S. Tsirkin
@ 2009-06-05 10:23 ` Michael S. Tsirkin
  2009-06-05 10:23   ` [Qemu-devel] " Michael S. Tsirkin
                   ` (17 subsequent siblings)
  25 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-05 10:23 UTC (permalink / raw)
  To: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm, Rusty Russell

Add inline routines for convenient access to pci devices
with correct (little) endianness. Will be used by MSI-X support.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/pci.h |   30 +++++++++++++++++++++++++++---
 1 files changed, 27 insertions(+), 3 deletions(-)

diff --git a/hw/pci.h b/hw/pci.h
index 4838c59..477aa64 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -240,21 +240,45 @@ PCIBus *pci_bridge_init(PCIBus *bus, int devfn, uint16_t vid, uint16_t did,
                         pci_map_irq_fn map_irq, const char *name);
 
 static inline void
+pci_set_word(uint8_t *config, uint16_t val)
+{
+    cpu_to_le16wu((uint16_t *)config, val);
+}
+
+static inline uint16_t
+pci_get_word(uint8_t *config)
+{
+    return le16_to_cpupu((uint16_t *)config);
+}
+
+static inline void
+pci_set_long(uint8_t *config, uint32_t val)
+{
+    cpu_to_le32wu((uint32_t *)config, val);
+}
+
+static inline uint32_t
+pci_get_long(uint8_t *config)
+{
+    return le32_to_cpupu((uint32_t *)config);
+}
+
+static inline void
 pci_config_set_vendor_id(uint8_t *pci_config, uint16_t val)
 {
-    cpu_to_le16wu((uint16_t *)&pci_config[PCI_VENDOR_ID], val);
+    pci_set_word(&pci_config[PCI_VENDOR_ID], val);
 }
 
 static inline void
 pci_config_set_device_id(uint8_t *pci_config, uint16_t val)
 {
-    cpu_to_le16wu((uint16_t *)&pci_config[PCI_DEVICE_ID], val);
+    pci_set_word(&pci_config[PCI_DEVICE_ID], val);
 }
 
 static inline void
 pci_config_set_class(uint8_t *pci_config, uint16_t val)
 {
-    cpu_to_le16wu((uint16_t *)&pci_config[PCI_CLASS_DEVICE], val);
+    pci_set_word(&pci_config[PCI_CLASS_DEVICE], val);
 }
 
 typedef void (*pci_qdev_initfn)(PCIDevice *dev);
-- 
1.6.3.1.56.g79e1.dirty

^ permalink raw reply related	[flat|nested] 457+ messages in thread

* [PATCHv3 05/13] qemu: MSI-X support functions
       [not found] <cover.1244192535.git.mst@redhat.com>
@ 2009-06-05 10:23   ` Michael S. Tsirkin
  2009-06-05 10:22   ` [Qemu-devel] " Michael S. Tsirkin
                     ` (24 subsequent siblings)
  25 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-05 10:23 UTC (permalink / raw)
  To: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm, Rusty Russell, vi

Add functions implementing MSI-X support. First user will be virtio-pci.
Note that platform must set a flag to declare MSI supported.
For PC this will be set by APIC.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 Makefile.target |    2 +-
 hw/msix.c       |  423 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 hw/msix.h       |   35 +++++
 hw/pci.h        |   20 +++
 4 files changed, 479 insertions(+), 1 deletions(-)
 create mode 100644 hw/msix.c
 create mode 100644 hw/msix.h

diff --git a/Makefile.target b/Makefile.target
index 664a1e3..87b2859 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -486,7 +486,7 @@ endif #CONFIG_BSD_USER
 ifndef CONFIG_USER_ONLY
 
 OBJS=vl.o osdep.o monitor.o pci.o loader.o isa_mmio.o machine.o \
-     gdbstub.o gdbstub-xml.o
+     gdbstub.o gdbstub-xml.o msix.o
 # virtio has to be here due to weird dependency between PCI and virtio-net.
 # need to fix this properly
 OBJS+=virtio-blk.o virtio-balloon.o virtio-net.o virtio-console.o
diff --git a/hw/msix.c b/hw/msix.c
new file mode 100644
index 0000000..1b5aec8
--- /dev/null
+++ b/hw/msix.c
@@ -0,0 +1,423 @@
+/*
+ * MSI-X device support
+ *
+ * This module includes support for MSI-X in pci devices.
+ *
+ * Author: Michael S. Tsirkin <mst@redhat.com>
+ *
+ *  Copyright (c) 2009, Red Hat Inc, Michael S. Tsirkin (mst@redhat.com)
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#include "hw.h"
+#include "msix.h"
+#include "pci.h"
+
+/* Declaration from linux/pci_regs.h */
+#define  PCI_CAP_ID_MSIX 0x11 /* MSI-X */
+#define  PCI_MSIX_FLAGS 2     /* Table at lower 11 bits */
+#define  PCI_MSIX_FLAGS_QSIZE	0x7FF
+#define  PCI_MSIX_FLAGS_ENABLE	(1 << 15)
+#define  PCI_MSIX_FLAGS_BIRMASK	(7 << 0)
+
+/* MSI-X capability structure */
+#define MSIX_TABLE_OFFSET 4
+#define MSIX_PBA_OFFSET 8
+#define MSIX_CAP_LENGTH 12
+
+/* MSI enable bit is in byte 1 in FLAGS register */
+#define MSIX_ENABLE_OFFSET (PCI_MSIX_FLAGS + 1)
+#define MSIX_ENABLE_MASK (PCI_MSIX_FLAGS_ENABLE >> 8)
+
+/* MSI-X table format */
+#define MSIX_MSG_ADDR 0
+#define MSIX_MSG_UPPER_ADDR 4
+#define MSIX_MSG_DATA 8
+#define MSIX_VECTOR_CTRL 12
+#define MSIX_ENTRY_SIZE 16
+#define MSIX_VECTOR_MASK 0x1
+
+/* How much space does an MSIX table need. */
+/* The spec requires giving the table structure
+ * a 4K aligned region all by itself. Align it to
+ * target pages so that drivers can do passthrough
+ * on the rest of the region. */
+#define MSIX_PAGE_SIZE TARGET_PAGE_ALIGN(0x1000)
+/* Reserve second half of the page for pending bits */
+#define MSIX_PAGE_PENDING (MSIX_PAGE_SIZE / 2)
+#define MSIX_MAX_ENTRIES 32
+
+
+#ifdef MSIX_DEBUG
+#define DEBUG(fmt, ...)                                       \
+    do {                                                      \
+      fprintf(stderr, "%s: " fmt, __func__ , __VA_ARGS__);    \
+    } while (0)
+#else
+#define DEBUG(fmt, ...) do { } while(0)
+#endif
+
+/* Flag to globally disable MSI-X support */
+int msix_disable;
+
+/* Flag for interrupt controller to declare MSI-X support */
+int msix_supported;
+
+/* Add MSI-X capability to the config space for the device. */
+/* Given a bar and its size, add MSI-X table on top of it
+ * and fill MSI-X capability in the config space.
+ * Original bar size must be a power of 2 or 0.
+ * New bar size is returned. */
+static int msix_add_config(struct PCIDevice *pdev, unsigned short nentries,
+                           unsigned bar_nr, unsigned bar_size)
+{
+    int config_offset;
+    uint8_t *config;
+    uint32_t new_size;
+
+    if (nentries < 1 || nentries > PCI_MSIX_FLAGS_QSIZE + 1)
+        return -EINVAL;
+    if (bar_size > 0x80000000)
+        return -ENOSPC;
+
+    /* Add space for MSI-X structures */
+    if (!bar_size)
+        new_size = MSIX_PAGE_SIZE;
+    else if (bar_size < MSIX_PAGE_SIZE) {
+        bar_size = MSIX_PAGE_SIZE;
+        new_size = MSIX_PAGE_SIZE * 2;
+    } else
+        new_size = bar_size * 2;
+
+    pdev->msix_bar_size = new_size;
+    config_offset = pci_add_capability(pdev, PCI_CAP_ID_MSIX, MSIX_CAP_LENGTH);
+    if (config_offset < 0)
+        return config_offset;
+    config = pdev->config + config_offset;
+
+    pci_set_word(config + PCI_MSIX_FLAGS, nentries - 1);
+    /* Table on top of BAR */
+    pci_set_long(config + MSIX_TABLE_OFFSET, bar_size | bar_nr);
+    /* Pending bits on top of that */
+    pci_set_long(config + MSIX_PBA_OFFSET, (bar_size + MSIX_PAGE_PENDING) |
+                 bar_nr);
+    pdev->msix_cap = config_offset;
+    /* Make flags bit writeable. */
+    pdev->wmask[config_offset + MSIX_ENABLE_OFFSET] |= MSIX_ENABLE_MASK;
+    return 0;
+}
+
+static void msix_free_irq_entries(PCIDevice *dev)
+{
+    int vector;
+
+    for (vector = 0; vector < dev->msix_entries_nr; ++vector)
+        dev->msix_entry_used[vector] = 0;
+}
+
+/* Handle MSI-X capability config write. */
+void msix_write_config(PCIDevice *dev, uint32_t addr,
+                       uint32_t val, int len)
+{
+    unsigned enable_pos = dev->msix_cap + MSIX_ENABLE_OFFSET;
+    if (addr + len <= enable_pos || addr > enable_pos)
+        return;
+
+    if (msix_enabled(dev))
+        qemu_set_irq(dev->irq[0], 0);
+}
+
+static uint32_t msix_mmio_readl(void *opaque, target_phys_addr_t addr)
+{
+    PCIDevice *dev = opaque;
+    unsigned int offset = addr & (MSIX_PAGE_SIZE - 1);
+    void *page = dev->msix_table_page;
+    uint32_t val = 0;
+
+    memcpy(&val, (void *)((char *)page + offset), 4);
+
+    return val;
+}
+
+static uint32_t msix_mmio_read_unallowed(void *opaque, target_phys_addr_t addr)
+{
+    fprintf(stderr, "MSI-X: only dword read is allowed!\n");
+    return 0;
+}
+
+static uint8_t msix_pending_mask(int vector)
+{
+    return 1 << (vector % 8);
+}
+
+static uint8_t *msix_pending_byte(PCIDevice *dev, int vector)
+{
+    return dev->msix_table_page + MSIX_PAGE_PENDING + vector / 8;
+}
+
+static int msix_is_pending(PCIDevice *dev, int vector)
+{
+    return *msix_pending_byte(dev, vector) & msix_pending_mask(vector);
+}
+
+static void msix_set_pending(PCIDevice *dev, int vector)
+{
+    *msix_pending_byte(dev, vector) |= msix_pending_mask(vector);
+}
+
+static void msix_clr_pending(PCIDevice *dev, int vector)
+{
+    *msix_pending_byte(dev, vector) &= ~msix_pending_mask(vector);
+}
+
+static int msix_is_masked(PCIDevice *dev, int vector)
+{
+    unsigned offset = vector * MSIX_ENTRY_SIZE + MSIX_VECTOR_CTRL;
+    return dev->msix_table_page[offset] & MSIX_VECTOR_MASK;
+}
+
+static void msix_mmio_writel(void *opaque, target_phys_addr_t addr,
+                             uint32_t val)
+{
+    PCIDevice *dev = opaque;
+    unsigned int offset = addr & (MSIX_PAGE_SIZE - 1);
+    int vector = offset / MSIX_ENTRY_SIZE;
+    memcpy(dev->msix_table_page + offset, &val, 4);
+    if (!msix_is_masked(dev, vector) && msix_is_pending(dev, vector)) {
+        msix_clr_pending(dev, vector);
+        msix_notify(dev, vector);
+    }
+}
+
+static void msix_mmio_write_unallowed(void *opaque, target_phys_addr_t addr,
+                                      uint32_t val)
+{
+    fprintf(stderr, "MSI-X: only dword write is allowed!\n");
+}
+
+static CPUWriteMemoryFunc *msix_mmio_write[] = {
+    msix_mmio_write_unallowed, msix_mmio_write_unallowed, msix_mmio_writel
+};
+
+static CPUReadMemoryFunc *msix_mmio_read[] = {
+    msix_mmio_read_unallowed, msix_mmio_read_unallowed, msix_mmio_readl
+};
+
+/* Should be called from device's map method. */
+void msix_mmio_map(PCIDevice *d, int region_num,
+                   uint32_t addr, uint32_t size, int type)
+{
+    uint8_t *config = d->config + d->msix_cap;
+    uint32_t table = pci_get_long(config + MSIX_TABLE_OFFSET);
+    uint32_t offset = table & ~(MSIX_PAGE_SIZE - 1);
+    /* TODO: for assigned devices, we'll want to make it possible to map
+     * pending bits separately in case they are in a separate bar. */
+    int table_bir = table & PCI_MSIX_FLAGS_BIRMASK;
+
+    if (table_bir != region_num)
+        return;
+    if (size <= offset)
+        return;
+    cpu_register_physical_memory(addr + offset, size - offset,
+                                 d->msix_mmio_index);
+}
+
+/* Initialize the MSI-X structures. Note: if MSI-X is supported, BAR size is
+ * modified, it should be retrieved with msix_bar_size. */
+int msix_init(struct PCIDevice *dev, unsigned short nentries,
+              unsigned bar_nr, unsigned bar_size)
+{
+    int ret = -ENOMEM;
+    /* Nothing to do if MSI is not supported by interrupt controller */
+    if (!msix_supported)
+        return -ENOTTY;
+
+    if (nentries > MSIX_MAX_ENTRIES)
+        return -EINVAL;
+
+    dev->msix_entry_used = qemu_mallocz(MSIX_MAX_ENTRIES *
+                                        sizeof *dev->msix_entry_used);
+    if (!dev->msix_entry_used)
+        goto err_used;
+
+    dev->msix_table_page = qemu_mallocz(MSIX_PAGE_SIZE);
+    if (!dev->msix_table_page)
+        goto err_page;
+
+    dev->msix_mmio_index = cpu_register_io_memory(0, msix_mmio_read,
+                                                  msix_mmio_write, dev);
+    if (dev->msix_mmio_index == -1) {
+        ret = -EBUSY;
+        goto err_index;
+    }
+
+    dev->msix_entries_nr = nentries;
+    dev->cap_supported |= QEMU_PCI_CAP_MSIX;
+    /* If disabled, stop here. User can later load confiuration with MSI-X
+     * enabled. */
+    if (msix_disable)
+        return 0;
+
+    ret = msix_add_config(dev, nentries, bar_nr, bar_size);
+    if (ret)
+        goto err_config;
+
+    dev->cap_present |= QEMU_PCI_CAP_MSIX;
+    return 0;
+
+err_config:
+    cpu_unregister_io_memory(dev->msix_mmio_index);
+err_index:
+    qemu_free(dev->msix_table_page);
+    dev->msix_table_page = NULL;
+err_page:
+    qemu_free(dev->msix_entry_used);
+    dev->msix_entry_used = NULL;
+err_used:
+    return ret;
+}
+
+/* Clean up resources for the device. */
+int msix_uninit(PCIDevice *dev)
+{
+    if (!(dev->cap_supported & QEMU_PCI_CAP_MSIX))
+        return 0;
+    pci_del_capability(dev, PCI_CAP_ID_MSIX, MSIX_CAP_LENGTH);
+    dev->msix_cap = 0;
+    msix_free_irq_entries(dev);
+    dev->msix_entries_nr = 0;
+    cpu_unregister_io_memory(dev->msix_mmio_index);
+    qemu_free(dev->msix_table_page);
+    dev->msix_table_page = NULL;
+    qemu_free(dev->msix_entry_used);
+    dev->msix_entry_used = NULL;
+    dev->cap_present &= ~QEMU_PCI_CAP_MSIX;
+    dev->cap_supported &= ~QEMU_PCI_CAP_MSIX;
+    return 0;
+}
+
+void msix_save(PCIDevice *dev, QEMUFile *f)
+{
+    unsigned nentries = (pci_get_word(dev->config + PCI_MSIX_FLAGS) &
+                         PCI_MSIX_FLAGS_QSIZE) + 1;
+    qemu_put_buffer(f, dev->msix_table_page, nentries * MSIX_ENTRY_SIZE);
+    qemu_put_buffer(f, dev->msix_table_page + MSIX_PAGE_PENDING,
+                    (nentries + 7) / 8);
+}
+
+/* Should be called after restoring the config space. */
+int msix_load(PCIDevice *dev, QEMUFile *f)
+{
+    uint8_t offset = pci_find_capability(dev, PCI_CAP_ID_MSIX);
+    unsigned nentries;
+
+    if (!!(dev->cap_present & QEMU_PCI_CAP_MSIX) == !!offset) {
+        fprintf(stderr, "MSI-X bit set but no capability is present\n");
+        return -EINVAL;
+    }
+
+    msix_free_irq_entries(dev);
+
+    if (!dev->cap_present & QEMU_PCI_CAP_MSIX)
+        return 0;
+
+    /* Sanity check: we probably could add more of these. */
+    nentries = (pci_get_word(dev->config + PCI_MSIX_FLAGS) &
+                PCI_MSIX_FLAGS_QSIZE) + 1;
+    if (nentries > MSIX_MAX_ENTRIES) {
+        fprintf(stderr, "msix_load: nentries mismatch: %d > %d\n",
+                nentries, dev->msix_entries_nr);
+        return -EINVAL;
+    }
+
+    /* Make flags bit writeable. */
+    dev->wmask[offset + MSIX_ENABLE_OFFSET] |= MSIX_ENABLE_MASK;
+    /* Reserve space used by this capability */
+    pci_reserve_capability(dev, offset, MSIX_CAP_LENGTH);
+    /* Store the new offset */
+    dev->msix_cap = offset;
+
+    dev->msix_entries_nr = nentries;
+
+    qemu_get_buffer(f, dev->msix_table_page, nentries * MSIX_ENTRY_SIZE);
+    qemu_get_buffer(f, dev->msix_table_page + MSIX_PAGE_PENDING,
+                   (nentries + 7) / 8);
+
+    return 0;
+}
+
+/* Does device support MSI-X? */
+int msix_present(PCIDevice *dev)
+{
+    return dev->cap_present & QEMU_PCI_CAP_MSIX;
+}
+
+/* Is MSI-X enabled? */
+int msix_enabled(PCIDevice *dev)
+{
+    return (dev->cap_present & QEMU_PCI_CAP_MSIX) &&
+        (dev->config[dev->msix_cap + MSIX_ENABLE_OFFSET] &
+         MSIX_ENABLE_MASK);
+}
+
+/* Size of bar where MSI-X table resides, or 0 if MSI-X not supported. */
+uint32_t msix_bar_size(PCIDevice *dev)
+{
+    return (dev->cap_present & QEMU_PCI_CAP_MSIX) ?
+        dev->msix_bar_size : 0;
+}
+
+/* Send an MSI-X message */
+void msix_notify(PCIDevice *dev, unsigned vector)
+{
+    uint8_t *table_entry = dev->msix_table_page + vector * MSIX_ENTRY_SIZE;
+    uint64_t address;
+    uint32_t data;
+
+    if (vector >= dev->msix_entries_nr || !dev->msix_entry_used[vector])
+        return;
+    if (msix_is_masked(dev, vector)) {
+        msix_set_pending(dev, vector);
+        return;
+    }
+
+    address = pci_get_long(table_entry + MSIX_MSG_UPPER_ADDR);
+    address = (address << 32) | pci_get_long(table_entry + MSIX_MSG_ADDR);
+    data = pci_get_long(table_entry + MSIX_MSG_DATA);
+    stl_phys(address, data);
+}
+
+void msix_reset(PCIDevice *dev)
+{
+    if (!(dev->cap_present & QEMU_PCI_CAP_MSIX))
+        return;
+    msix_free_irq_entries(dev);
+    dev->config[dev->msix_cap + MSIX_ENABLE_OFFSET] &= MSIX_ENABLE_MASK;
+    memset(dev->msix_table_page, 0, MSIX_PAGE_SIZE);
+}
+
+/* PCI spec suggests that devices make it possible for software to configure
+ * less vectors than supported by the device, but does not specify a standard
+ * mechanism for devices to do so.
+ *
+ * We support this by asking devices to declare vectors software is going to
+ * actually use, and checking this on the notification path. Devices that
+ * don't want to follow the spec suggestion can declare all vectors as used. */
+
+/* Mark vector as used. */
+int msix_vector_use(PCIDevice *dev, unsigned vector)
+{
+    if (vector >= dev->msix_entries_nr)
+        return -EINVAL;
+    dev->msix_entry_used[vector]++;
+    return 0;
+}
+
+/* Mark vector as unused. */
+void msix_vector_unuse(PCIDevice *dev, unsigned vector)
+{
+    if (vector < dev->msix_entries_nr && dev->msix_entry_used[vector])
+        --dev->msix_entry_used[vector];
+}
diff --git a/hw/msix.h b/hw/msix.h
new file mode 100644
index 0000000..79e84a3
--- /dev/null
+++ b/hw/msix.h
@@ -0,0 +1,35 @@
+#ifndef QEMU_MSIX_H
+#define QEMU_MSIX_H
+
+#include "qemu-common.h"
+
+int msix_init(PCIDevice *pdev, unsigned short nentries,
+              unsigned bar_nr, unsigned bar_size);
+
+void msix_write_config(PCIDevice *pci_dev, uint32_t address,
+                       uint32_t val, int len);
+
+void msix_mmio_map(PCIDevice *pci_dev, int region_num,
+                   uint32_t addr, uint32_t size, int type);
+
+int msix_uninit(PCIDevice *d);
+
+void msix_save(PCIDevice *dev, QEMUFile *f);
+int msix_load(PCIDevice *dev, QEMUFile *f);
+
+int msix_enabled(PCIDevice *dev);
+int msix_present(PCIDevice *dev);
+
+uint32_t msix_bar_size(PCIDevice *dev);
+
+int msix_vector_use(PCIDevice *dev, unsigned vector);
+void msix_vector_unuse(PCIDevice *dev, unsigned vector);
+
+void msix_notify(PCIDevice *dev, unsigned vector);
+
+void msix_reset(PCIDevice *dev);
+
+extern int msix_disable;
+extern int msix_supported;
+
+#endif
diff --git a/hw/pci.h b/hw/pci.h
index 477aa64..98a34ee 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -156,6 +156,11 @@ typedef struct PCIIORegion {
 /* Size of the standard PCI config space */
 #define PCI_CONFIG_SPACE_SIZE 0x100
 
+/* Bits in cap_supported/cap_present fields. */
+enum {
+    QEMU_PCI_CAP_MSIX = 0x1,
+};
+
 struct PCIDevice {
     DeviceState qdev;
     /* PCI config space */
@@ -189,6 +194,21 @@ struct PCIDevice {
     /* Capability bits for save/load */
     uint32_t cap_supported;
     uint32_t cap_present;
+
+    /* Offset of MSI-X capability in config space */
+    uint8_t msix_cap;
+
+    /* MSI-X entries */
+    int msix_entries_nr;
+
+    /* Space to store MSIX table */
+    uint8_t *msix_table_page;
+    /* MMIO index used to map MSIX table and pending bit entries. */
+    int msix_mmio_index;
+    /* Reference-count for entries actually in use by driver. */
+    unsigned *msix_entry_used;
+    /* Region including the MSI-X table */
+    uint32_t msix_bar_size;
 };
 
 PCIDevice *pci_register_device(PCIBus *bus, const char *name,
-- 
1.6.3.1.56.g79e1.dirty


^ permalink raw reply related	[flat|nested] 457+ messages in thread

* [Qemu-devel] [PATCHv3 05/13] qemu: MSI-X support functions
@ 2009-06-05 10:23   ` Michael S. Tsirkin
  0 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-05 10:23 UTC (permalink / raw)
  To: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm,
	Rusty Russell, virtualization, Christian Borntraeger, Blue Swirl,
	Anthony Liguori

Add functions implementing MSI-X support. First user will be virtio-pci.
Note that platform must set a flag to declare MSI supported.
For PC this will be set by APIC.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 Makefile.target |    2 +-
 hw/msix.c       |  423 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 hw/msix.h       |   35 +++++
 hw/pci.h        |   20 +++
 4 files changed, 479 insertions(+), 1 deletions(-)
 create mode 100644 hw/msix.c
 create mode 100644 hw/msix.h

diff --git a/Makefile.target b/Makefile.target
index 664a1e3..87b2859 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -486,7 +486,7 @@ endif #CONFIG_BSD_USER
 ifndef CONFIG_USER_ONLY
 
 OBJS=vl.o osdep.o monitor.o pci.o loader.o isa_mmio.o machine.o \
-     gdbstub.o gdbstub-xml.o
+     gdbstub.o gdbstub-xml.o msix.o
 # virtio has to be here due to weird dependency between PCI and virtio-net.
 # need to fix this properly
 OBJS+=virtio-blk.o virtio-balloon.o virtio-net.o virtio-console.o
diff --git a/hw/msix.c b/hw/msix.c
new file mode 100644
index 0000000..1b5aec8
--- /dev/null
+++ b/hw/msix.c
@@ -0,0 +1,423 @@
+/*
+ * MSI-X device support
+ *
+ * This module includes support for MSI-X in pci devices.
+ *
+ * Author: Michael S. Tsirkin <mst@redhat.com>
+ *
+ *  Copyright (c) 2009, Red Hat Inc, Michael S. Tsirkin (mst@redhat.com)
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#include "hw.h"
+#include "msix.h"
+#include "pci.h"
+
+/* Declaration from linux/pci_regs.h */
+#define  PCI_CAP_ID_MSIX 0x11 /* MSI-X */
+#define  PCI_MSIX_FLAGS 2     /* Table at lower 11 bits */
+#define  PCI_MSIX_FLAGS_QSIZE	0x7FF
+#define  PCI_MSIX_FLAGS_ENABLE	(1 << 15)
+#define  PCI_MSIX_FLAGS_BIRMASK	(7 << 0)
+
+/* MSI-X capability structure */
+#define MSIX_TABLE_OFFSET 4
+#define MSIX_PBA_OFFSET 8
+#define MSIX_CAP_LENGTH 12
+
+/* MSI enable bit is in byte 1 in FLAGS register */
+#define MSIX_ENABLE_OFFSET (PCI_MSIX_FLAGS + 1)
+#define MSIX_ENABLE_MASK (PCI_MSIX_FLAGS_ENABLE >> 8)
+
+/* MSI-X table format */
+#define MSIX_MSG_ADDR 0
+#define MSIX_MSG_UPPER_ADDR 4
+#define MSIX_MSG_DATA 8
+#define MSIX_VECTOR_CTRL 12
+#define MSIX_ENTRY_SIZE 16
+#define MSIX_VECTOR_MASK 0x1
+
+/* How much space does an MSIX table need. */
+/* The spec requires giving the table structure
+ * a 4K aligned region all by itself. Align it to
+ * target pages so that drivers can do passthrough
+ * on the rest of the region. */
+#define MSIX_PAGE_SIZE TARGET_PAGE_ALIGN(0x1000)
+/* Reserve second half of the page for pending bits */
+#define MSIX_PAGE_PENDING (MSIX_PAGE_SIZE / 2)
+#define MSIX_MAX_ENTRIES 32
+
+
+#ifdef MSIX_DEBUG
+#define DEBUG(fmt, ...)                                       \
+    do {                                                      \
+      fprintf(stderr, "%s: " fmt, __func__ , __VA_ARGS__);    \
+    } while (0)
+#else
+#define DEBUG(fmt, ...) do { } while(0)
+#endif
+
+/* Flag to globally disable MSI-X support */
+int msix_disable;
+
+/* Flag for interrupt controller to declare MSI-X support */
+int msix_supported;
+
+/* Add MSI-X capability to the config space for the device. */
+/* Given a bar and its size, add MSI-X table on top of it
+ * and fill MSI-X capability in the config space.
+ * Original bar size must be a power of 2 or 0.
+ * New bar size is returned. */
+static int msix_add_config(struct PCIDevice *pdev, unsigned short nentries,
+                           unsigned bar_nr, unsigned bar_size)
+{
+    int config_offset;
+    uint8_t *config;
+    uint32_t new_size;
+
+    if (nentries < 1 || nentries > PCI_MSIX_FLAGS_QSIZE + 1)
+        return -EINVAL;
+    if (bar_size > 0x80000000)
+        return -ENOSPC;
+
+    /* Add space for MSI-X structures */
+    if (!bar_size)
+        new_size = MSIX_PAGE_SIZE;
+    else if (bar_size < MSIX_PAGE_SIZE) {
+        bar_size = MSIX_PAGE_SIZE;
+        new_size = MSIX_PAGE_SIZE * 2;
+    } else
+        new_size = bar_size * 2;
+
+    pdev->msix_bar_size = new_size;
+    config_offset = pci_add_capability(pdev, PCI_CAP_ID_MSIX, MSIX_CAP_LENGTH);
+    if (config_offset < 0)
+        return config_offset;
+    config = pdev->config + config_offset;
+
+    pci_set_word(config + PCI_MSIX_FLAGS, nentries - 1);
+    /* Table on top of BAR */
+    pci_set_long(config + MSIX_TABLE_OFFSET, bar_size | bar_nr);
+    /* Pending bits on top of that */
+    pci_set_long(config + MSIX_PBA_OFFSET, (bar_size + MSIX_PAGE_PENDING) |
+                 bar_nr);
+    pdev->msix_cap = config_offset;
+    /* Make flags bit writeable. */
+    pdev->wmask[config_offset + MSIX_ENABLE_OFFSET] |= MSIX_ENABLE_MASK;
+    return 0;
+}
+
+static void msix_free_irq_entries(PCIDevice *dev)
+{
+    int vector;
+
+    for (vector = 0; vector < dev->msix_entries_nr; ++vector)
+        dev->msix_entry_used[vector] = 0;
+}
+
+/* Handle MSI-X capability config write. */
+void msix_write_config(PCIDevice *dev, uint32_t addr,
+                       uint32_t val, int len)
+{
+    unsigned enable_pos = dev->msix_cap + MSIX_ENABLE_OFFSET;
+    if (addr + len <= enable_pos || addr > enable_pos)
+        return;
+
+    if (msix_enabled(dev))
+        qemu_set_irq(dev->irq[0], 0);
+}
+
+static uint32_t msix_mmio_readl(void *opaque, target_phys_addr_t addr)
+{
+    PCIDevice *dev = opaque;
+    unsigned int offset = addr & (MSIX_PAGE_SIZE - 1);
+    void *page = dev->msix_table_page;
+    uint32_t val = 0;
+
+    memcpy(&val, (void *)((char *)page + offset), 4);
+
+    return val;
+}
+
+static uint32_t msix_mmio_read_unallowed(void *opaque, target_phys_addr_t addr)
+{
+    fprintf(stderr, "MSI-X: only dword read is allowed!\n");
+    return 0;
+}
+
+static uint8_t msix_pending_mask(int vector)
+{
+    return 1 << (vector % 8);
+}
+
+static uint8_t *msix_pending_byte(PCIDevice *dev, int vector)
+{
+    return dev->msix_table_page + MSIX_PAGE_PENDING + vector / 8;
+}
+
+static int msix_is_pending(PCIDevice *dev, int vector)
+{
+    return *msix_pending_byte(dev, vector) & msix_pending_mask(vector);
+}
+
+static void msix_set_pending(PCIDevice *dev, int vector)
+{
+    *msix_pending_byte(dev, vector) |= msix_pending_mask(vector);
+}
+
+static void msix_clr_pending(PCIDevice *dev, int vector)
+{
+    *msix_pending_byte(dev, vector) &= ~msix_pending_mask(vector);
+}
+
+static int msix_is_masked(PCIDevice *dev, int vector)
+{
+    unsigned offset = vector * MSIX_ENTRY_SIZE + MSIX_VECTOR_CTRL;
+    return dev->msix_table_page[offset] & MSIX_VECTOR_MASK;
+}
+
+static void msix_mmio_writel(void *opaque, target_phys_addr_t addr,
+                             uint32_t val)
+{
+    PCIDevice *dev = opaque;
+    unsigned int offset = addr & (MSIX_PAGE_SIZE - 1);
+    int vector = offset / MSIX_ENTRY_SIZE;
+    memcpy(dev->msix_table_page + offset, &val, 4);
+    if (!msix_is_masked(dev, vector) && msix_is_pending(dev, vector)) {
+        msix_clr_pending(dev, vector);
+        msix_notify(dev, vector);
+    }
+}
+
+static void msix_mmio_write_unallowed(void *opaque, target_phys_addr_t addr,
+                                      uint32_t val)
+{
+    fprintf(stderr, "MSI-X: only dword write is allowed!\n");
+}
+
+static CPUWriteMemoryFunc *msix_mmio_write[] = {
+    msix_mmio_write_unallowed, msix_mmio_write_unallowed, msix_mmio_writel
+};
+
+static CPUReadMemoryFunc *msix_mmio_read[] = {
+    msix_mmio_read_unallowed, msix_mmio_read_unallowed, msix_mmio_readl
+};
+
+/* Should be called from device's map method. */
+void msix_mmio_map(PCIDevice *d, int region_num,
+                   uint32_t addr, uint32_t size, int type)
+{
+    uint8_t *config = d->config + d->msix_cap;
+    uint32_t table = pci_get_long(config + MSIX_TABLE_OFFSET);
+    uint32_t offset = table & ~(MSIX_PAGE_SIZE - 1);
+    /* TODO: for assigned devices, we'll want to make it possible to map
+     * pending bits separately in case they are in a separate bar. */
+    int table_bir = table & PCI_MSIX_FLAGS_BIRMASK;
+
+    if (table_bir != region_num)
+        return;
+    if (size <= offset)
+        return;
+    cpu_register_physical_memory(addr + offset, size - offset,
+                                 d->msix_mmio_index);
+}
+
+/* Initialize the MSI-X structures. Note: if MSI-X is supported, BAR size is
+ * modified, it should be retrieved with msix_bar_size. */
+int msix_init(struct PCIDevice *dev, unsigned short nentries,
+              unsigned bar_nr, unsigned bar_size)
+{
+    int ret = -ENOMEM;
+    /* Nothing to do if MSI is not supported by interrupt controller */
+    if (!msix_supported)
+        return -ENOTTY;
+
+    if (nentries > MSIX_MAX_ENTRIES)
+        return -EINVAL;
+
+    dev->msix_entry_used = qemu_mallocz(MSIX_MAX_ENTRIES *
+                                        sizeof *dev->msix_entry_used);
+    if (!dev->msix_entry_used)
+        goto err_used;
+
+    dev->msix_table_page = qemu_mallocz(MSIX_PAGE_SIZE);
+    if (!dev->msix_table_page)
+        goto err_page;
+
+    dev->msix_mmio_index = cpu_register_io_memory(0, msix_mmio_read,
+                                                  msix_mmio_write, dev);
+    if (dev->msix_mmio_index == -1) {
+        ret = -EBUSY;
+        goto err_index;
+    }
+
+    dev->msix_entries_nr = nentries;
+    dev->cap_supported |= QEMU_PCI_CAP_MSIX;
+    /* If disabled, stop here. User can later load confiuration with MSI-X
+     * enabled. */
+    if (msix_disable)
+        return 0;
+
+    ret = msix_add_config(dev, nentries, bar_nr, bar_size);
+    if (ret)
+        goto err_config;
+
+    dev->cap_present |= QEMU_PCI_CAP_MSIX;
+    return 0;
+
+err_config:
+    cpu_unregister_io_memory(dev->msix_mmio_index);
+err_index:
+    qemu_free(dev->msix_table_page);
+    dev->msix_table_page = NULL;
+err_page:
+    qemu_free(dev->msix_entry_used);
+    dev->msix_entry_used = NULL;
+err_used:
+    return ret;
+}
+
+/* Clean up resources for the device. */
+int msix_uninit(PCIDevice *dev)
+{
+    if (!(dev->cap_supported & QEMU_PCI_CAP_MSIX))
+        return 0;
+    pci_del_capability(dev, PCI_CAP_ID_MSIX, MSIX_CAP_LENGTH);
+    dev->msix_cap = 0;
+    msix_free_irq_entries(dev);
+    dev->msix_entries_nr = 0;
+    cpu_unregister_io_memory(dev->msix_mmio_index);
+    qemu_free(dev->msix_table_page);
+    dev->msix_table_page = NULL;
+    qemu_free(dev->msix_entry_used);
+    dev->msix_entry_used = NULL;
+    dev->cap_present &= ~QEMU_PCI_CAP_MSIX;
+    dev->cap_supported &= ~QEMU_PCI_CAP_MSIX;
+    return 0;
+}
+
+void msix_save(PCIDevice *dev, QEMUFile *f)
+{
+    unsigned nentries = (pci_get_word(dev->config + PCI_MSIX_FLAGS) &
+                         PCI_MSIX_FLAGS_QSIZE) + 1;
+    qemu_put_buffer(f, dev->msix_table_page, nentries * MSIX_ENTRY_SIZE);
+    qemu_put_buffer(f, dev->msix_table_page + MSIX_PAGE_PENDING,
+                    (nentries + 7) / 8);
+}
+
+/* Should be called after restoring the config space. */
+int msix_load(PCIDevice *dev, QEMUFile *f)
+{
+    uint8_t offset = pci_find_capability(dev, PCI_CAP_ID_MSIX);
+    unsigned nentries;
+
+    if (!!(dev->cap_present & QEMU_PCI_CAP_MSIX) == !!offset) {
+        fprintf(stderr, "MSI-X bit set but no capability is present\n");
+        return -EINVAL;
+    }
+
+    msix_free_irq_entries(dev);
+
+    if (!dev->cap_present & QEMU_PCI_CAP_MSIX)
+        return 0;
+
+    /* Sanity check: we probably could add more of these. */
+    nentries = (pci_get_word(dev->config + PCI_MSIX_FLAGS) &
+                PCI_MSIX_FLAGS_QSIZE) + 1;
+    if (nentries > MSIX_MAX_ENTRIES) {
+        fprintf(stderr, "msix_load: nentries mismatch: %d > %d\n",
+                nentries, dev->msix_entries_nr);
+        return -EINVAL;
+    }
+
+    /* Make flags bit writeable. */
+    dev->wmask[offset + MSIX_ENABLE_OFFSET] |= MSIX_ENABLE_MASK;
+    /* Reserve space used by this capability */
+    pci_reserve_capability(dev, offset, MSIX_CAP_LENGTH);
+    /* Store the new offset */
+    dev->msix_cap = offset;
+
+    dev->msix_entries_nr = nentries;
+
+    qemu_get_buffer(f, dev->msix_table_page, nentries * MSIX_ENTRY_SIZE);
+    qemu_get_buffer(f, dev->msix_table_page + MSIX_PAGE_PENDING,
+                   (nentries + 7) / 8);
+
+    return 0;
+}
+
+/* Does device support MSI-X? */
+int msix_present(PCIDevice *dev)
+{
+    return dev->cap_present & QEMU_PCI_CAP_MSIX;
+}
+
+/* Is MSI-X enabled? */
+int msix_enabled(PCIDevice *dev)
+{
+    return (dev->cap_present & QEMU_PCI_CAP_MSIX) &&
+        (dev->config[dev->msix_cap + MSIX_ENABLE_OFFSET] &
+         MSIX_ENABLE_MASK);
+}
+
+/* Size of bar where MSI-X table resides, or 0 if MSI-X not supported. */
+uint32_t msix_bar_size(PCIDevice *dev)
+{
+    return (dev->cap_present & QEMU_PCI_CAP_MSIX) ?
+        dev->msix_bar_size : 0;
+}
+
+/* Send an MSI-X message */
+void msix_notify(PCIDevice *dev, unsigned vector)
+{
+    uint8_t *table_entry = dev->msix_table_page + vector * MSIX_ENTRY_SIZE;
+    uint64_t address;
+    uint32_t data;
+
+    if (vector >= dev->msix_entries_nr || !dev->msix_entry_used[vector])
+        return;
+    if (msix_is_masked(dev, vector)) {
+        msix_set_pending(dev, vector);
+        return;
+    }
+
+    address = pci_get_long(table_entry + MSIX_MSG_UPPER_ADDR);
+    address = (address << 32) | pci_get_long(table_entry + MSIX_MSG_ADDR);
+    data = pci_get_long(table_entry + MSIX_MSG_DATA);
+    stl_phys(address, data);
+}
+
+void msix_reset(PCIDevice *dev)
+{
+    if (!(dev->cap_present & QEMU_PCI_CAP_MSIX))
+        return;
+    msix_free_irq_entries(dev);
+    dev->config[dev->msix_cap + MSIX_ENABLE_OFFSET] &= MSIX_ENABLE_MASK;
+    memset(dev->msix_table_page, 0, MSIX_PAGE_SIZE);
+}
+
+/* PCI spec suggests that devices make it possible for software to configure
+ * less vectors than supported by the device, but does not specify a standard
+ * mechanism for devices to do so.
+ *
+ * We support this by asking devices to declare vectors software is going to
+ * actually use, and checking this on the notification path. Devices that
+ * don't want to follow the spec suggestion can declare all vectors as used. */
+
+/* Mark vector as used. */
+int msix_vector_use(PCIDevice *dev, unsigned vector)
+{
+    if (vector >= dev->msix_entries_nr)
+        return -EINVAL;
+    dev->msix_entry_used[vector]++;
+    return 0;
+}
+
+/* Mark vector as unused. */
+void msix_vector_unuse(PCIDevice *dev, unsigned vector)
+{
+    if (vector < dev->msix_entries_nr && dev->msix_entry_used[vector])
+        --dev->msix_entry_used[vector];
+}
diff --git a/hw/msix.h b/hw/msix.h
new file mode 100644
index 0000000..79e84a3
--- /dev/null
+++ b/hw/msix.h
@@ -0,0 +1,35 @@
+#ifndef QEMU_MSIX_H
+#define QEMU_MSIX_H
+
+#include "qemu-common.h"
+
+int msix_init(PCIDevice *pdev, unsigned short nentries,
+              unsigned bar_nr, unsigned bar_size);
+
+void msix_write_config(PCIDevice *pci_dev, uint32_t address,
+                       uint32_t val, int len);
+
+void msix_mmio_map(PCIDevice *pci_dev, int region_num,
+                   uint32_t addr, uint32_t size, int type);
+
+int msix_uninit(PCIDevice *d);
+
+void msix_save(PCIDevice *dev, QEMUFile *f);
+int msix_load(PCIDevice *dev, QEMUFile *f);
+
+int msix_enabled(PCIDevice *dev);
+int msix_present(PCIDevice *dev);
+
+uint32_t msix_bar_size(PCIDevice *dev);
+
+int msix_vector_use(PCIDevice *dev, unsigned vector);
+void msix_vector_unuse(PCIDevice *dev, unsigned vector);
+
+void msix_notify(PCIDevice *dev, unsigned vector);
+
+void msix_reset(PCIDevice *dev);
+
+extern int msix_disable;
+extern int msix_supported;
+
+#endif
diff --git a/hw/pci.h b/hw/pci.h
index 477aa64..98a34ee 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -156,6 +156,11 @@ typedef struct PCIIORegion {
 /* Size of the standard PCI config space */
 #define PCI_CONFIG_SPACE_SIZE 0x100
 
+/* Bits in cap_supported/cap_present fields. */
+enum {
+    QEMU_PCI_CAP_MSIX = 0x1,
+};
+
 struct PCIDevice {
     DeviceState qdev;
     /* PCI config space */
@@ -189,6 +194,21 @@ struct PCIDevice {
     /* Capability bits for save/load */
     uint32_t cap_supported;
     uint32_t cap_present;
+
+    /* Offset of MSI-X capability in config space */
+    uint8_t msix_cap;
+
+    /* MSI-X entries */
+    int msix_entries_nr;
+
+    /* Space to store MSIX table */
+    uint8_t *msix_table_page;
+    /* MMIO index used to map MSIX table and pending bit entries. */
+    int msix_mmio_index;
+    /* Reference-count for entries actually in use by driver. */
+    unsigned *msix_entry_used;
+    /* Region including the MSI-X table */
+    uint32_t msix_bar_size;
 };
 
 PCIDevice *pci_register_device(PCIBus *bus, const char *name,
-- 
1.6.3.1.56.g79e1.dirty

^ permalink raw reply related	[flat|nested] 457+ messages in thread

* [PATCHv3 05/13] qemu: MSI-X support functions
       [not found] <cover.1244192535.git.mst@redhat.com>
                   ` (8 preceding siblings ...)
  2009-06-05 10:23   ` [Qemu-devel] " Michael S. Tsirkin
@ 2009-06-05 10:23 ` Michael S. Tsirkin
  2009-06-05 10:23   ` [Qemu-devel] " Michael S. Tsirkin
                   ` (15 subsequent siblings)
  25 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-05 10:23 UTC (permalink / raw)
  To: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm, Rusty Russell

Add functions implementing MSI-X support. First user will be virtio-pci.
Note that platform must set a flag to declare MSI supported.
For PC this will be set by APIC.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 Makefile.target |    2 +-
 hw/msix.c       |  423 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 hw/msix.h       |   35 +++++
 hw/pci.h        |   20 +++
 4 files changed, 479 insertions(+), 1 deletions(-)
 create mode 100644 hw/msix.c
 create mode 100644 hw/msix.h

diff --git a/Makefile.target b/Makefile.target
index 664a1e3..87b2859 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -486,7 +486,7 @@ endif #CONFIG_BSD_USER
 ifndef CONFIG_USER_ONLY
 
 OBJS=vl.o osdep.o monitor.o pci.o loader.o isa_mmio.o machine.o \
-     gdbstub.o gdbstub-xml.o
+     gdbstub.o gdbstub-xml.o msix.o
 # virtio has to be here due to weird dependency between PCI and virtio-net.
 # need to fix this properly
 OBJS+=virtio-blk.o virtio-balloon.o virtio-net.o virtio-console.o
diff --git a/hw/msix.c b/hw/msix.c
new file mode 100644
index 0000000..1b5aec8
--- /dev/null
+++ b/hw/msix.c
@@ -0,0 +1,423 @@
+/*
+ * MSI-X device support
+ *
+ * This module includes support for MSI-X in pci devices.
+ *
+ * Author: Michael S. Tsirkin <mst@redhat.com>
+ *
+ *  Copyright (c) 2009, Red Hat Inc, Michael S. Tsirkin (mst@redhat.com)
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#include "hw.h"
+#include "msix.h"
+#include "pci.h"
+
+/* Declaration from linux/pci_regs.h */
+#define  PCI_CAP_ID_MSIX 0x11 /* MSI-X */
+#define  PCI_MSIX_FLAGS 2     /* Table at lower 11 bits */
+#define  PCI_MSIX_FLAGS_QSIZE	0x7FF
+#define  PCI_MSIX_FLAGS_ENABLE	(1 << 15)
+#define  PCI_MSIX_FLAGS_BIRMASK	(7 << 0)
+
+/* MSI-X capability structure */
+#define MSIX_TABLE_OFFSET 4
+#define MSIX_PBA_OFFSET 8
+#define MSIX_CAP_LENGTH 12
+
+/* MSI enable bit is in byte 1 in FLAGS register */
+#define MSIX_ENABLE_OFFSET (PCI_MSIX_FLAGS + 1)
+#define MSIX_ENABLE_MASK (PCI_MSIX_FLAGS_ENABLE >> 8)
+
+/* MSI-X table format */
+#define MSIX_MSG_ADDR 0
+#define MSIX_MSG_UPPER_ADDR 4
+#define MSIX_MSG_DATA 8
+#define MSIX_VECTOR_CTRL 12
+#define MSIX_ENTRY_SIZE 16
+#define MSIX_VECTOR_MASK 0x1
+
+/* How much space does an MSIX table need. */
+/* The spec requires giving the table structure
+ * a 4K aligned region all by itself. Align it to
+ * target pages so that drivers can do passthrough
+ * on the rest of the region. */
+#define MSIX_PAGE_SIZE TARGET_PAGE_ALIGN(0x1000)
+/* Reserve second half of the page for pending bits */
+#define MSIX_PAGE_PENDING (MSIX_PAGE_SIZE / 2)
+#define MSIX_MAX_ENTRIES 32
+
+
+#ifdef MSIX_DEBUG
+#define DEBUG(fmt, ...)                                       \
+    do {                                                      \
+      fprintf(stderr, "%s: " fmt, __func__ , __VA_ARGS__);    \
+    } while (0)
+#else
+#define DEBUG(fmt, ...) do { } while(0)
+#endif
+
+/* Flag to globally disable MSI-X support */
+int msix_disable;
+
+/* Flag for interrupt controller to declare MSI-X support */
+int msix_supported;
+
+/* Add MSI-X capability to the config space for the device. */
+/* Given a bar and its size, add MSI-X table on top of it
+ * and fill MSI-X capability in the config space.
+ * Original bar size must be a power of 2 or 0.
+ * New bar size is returned. */
+static int msix_add_config(struct PCIDevice *pdev, unsigned short nentries,
+                           unsigned bar_nr, unsigned bar_size)
+{
+    int config_offset;
+    uint8_t *config;
+    uint32_t new_size;
+
+    if (nentries < 1 || nentries > PCI_MSIX_FLAGS_QSIZE + 1)
+        return -EINVAL;
+    if (bar_size > 0x80000000)
+        return -ENOSPC;
+
+    /* Add space for MSI-X structures */
+    if (!bar_size)
+        new_size = MSIX_PAGE_SIZE;
+    else if (bar_size < MSIX_PAGE_SIZE) {
+        bar_size = MSIX_PAGE_SIZE;
+        new_size = MSIX_PAGE_SIZE * 2;
+    } else
+        new_size = bar_size * 2;
+
+    pdev->msix_bar_size = new_size;
+    config_offset = pci_add_capability(pdev, PCI_CAP_ID_MSIX, MSIX_CAP_LENGTH);
+    if (config_offset < 0)
+        return config_offset;
+    config = pdev->config + config_offset;
+
+    pci_set_word(config + PCI_MSIX_FLAGS, nentries - 1);
+    /* Table on top of BAR */
+    pci_set_long(config + MSIX_TABLE_OFFSET, bar_size | bar_nr);
+    /* Pending bits on top of that */
+    pci_set_long(config + MSIX_PBA_OFFSET, (bar_size + MSIX_PAGE_PENDING) |
+                 bar_nr);
+    pdev->msix_cap = config_offset;
+    /* Make flags bit writeable. */
+    pdev->wmask[config_offset + MSIX_ENABLE_OFFSET] |= MSIX_ENABLE_MASK;
+    return 0;
+}
+
+static void msix_free_irq_entries(PCIDevice *dev)
+{
+    int vector;
+
+    for (vector = 0; vector < dev->msix_entries_nr; ++vector)
+        dev->msix_entry_used[vector] = 0;
+}
+
+/* Handle MSI-X capability config write. */
+void msix_write_config(PCIDevice *dev, uint32_t addr,
+                       uint32_t val, int len)
+{
+    unsigned enable_pos = dev->msix_cap + MSIX_ENABLE_OFFSET;
+    if (addr + len <= enable_pos || addr > enable_pos)
+        return;
+
+    if (msix_enabled(dev))
+        qemu_set_irq(dev->irq[0], 0);
+}
+
+static uint32_t msix_mmio_readl(void *opaque, target_phys_addr_t addr)
+{
+    PCIDevice *dev = opaque;
+    unsigned int offset = addr & (MSIX_PAGE_SIZE - 1);
+    void *page = dev->msix_table_page;
+    uint32_t val = 0;
+
+    memcpy(&val, (void *)((char *)page + offset), 4);
+
+    return val;
+}
+
+static uint32_t msix_mmio_read_unallowed(void *opaque, target_phys_addr_t addr)
+{
+    fprintf(stderr, "MSI-X: only dword read is allowed!\n");
+    return 0;
+}
+
+static uint8_t msix_pending_mask(int vector)
+{
+    return 1 << (vector % 8);
+}
+
+static uint8_t *msix_pending_byte(PCIDevice *dev, int vector)
+{
+    return dev->msix_table_page + MSIX_PAGE_PENDING + vector / 8;
+}
+
+static int msix_is_pending(PCIDevice *dev, int vector)
+{
+    return *msix_pending_byte(dev, vector) & msix_pending_mask(vector);
+}
+
+static void msix_set_pending(PCIDevice *dev, int vector)
+{
+    *msix_pending_byte(dev, vector) |= msix_pending_mask(vector);
+}
+
+static void msix_clr_pending(PCIDevice *dev, int vector)
+{
+    *msix_pending_byte(dev, vector) &= ~msix_pending_mask(vector);
+}
+
+static int msix_is_masked(PCIDevice *dev, int vector)
+{
+    unsigned offset = vector * MSIX_ENTRY_SIZE + MSIX_VECTOR_CTRL;
+    return dev->msix_table_page[offset] & MSIX_VECTOR_MASK;
+}
+
+static void msix_mmio_writel(void *opaque, target_phys_addr_t addr,
+                             uint32_t val)
+{
+    PCIDevice *dev = opaque;
+    unsigned int offset = addr & (MSIX_PAGE_SIZE - 1);
+    int vector = offset / MSIX_ENTRY_SIZE;
+    memcpy(dev->msix_table_page + offset, &val, 4);
+    if (!msix_is_masked(dev, vector) && msix_is_pending(dev, vector)) {
+        msix_clr_pending(dev, vector);
+        msix_notify(dev, vector);
+    }
+}
+
+static void msix_mmio_write_unallowed(void *opaque, target_phys_addr_t addr,
+                                      uint32_t val)
+{
+    fprintf(stderr, "MSI-X: only dword write is allowed!\n");
+}
+
+static CPUWriteMemoryFunc *msix_mmio_write[] = {
+    msix_mmio_write_unallowed, msix_mmio_write_unallowed, msix_mmio_writel
+};
+
+static CPUReadMemoryFunc *msix_mmio_read[] = {
+    msix_mmio_read_unallowed, msix_mmio_read_unallowed, msix_mmio_readl
+};
+
+/* Should be called from device's map method. */
+void msix_mmio_map(PCIDevice *d, int region_num,
+                   uint32_t addr, uint32_t size, int type)
+{
+    uint8_t *config = d->config + d->msix_cap;
+    uint32_t table = pci_get_long(config + MSIX_TABLE_OFFSET);
+    uint32_t offset = table & ~(MSIX_PAGE_SIZE - 1);
+    /* TODO: for assigned devices, we'll want to make it possible to map
+     * pending bits separately in case they are in a separate bar. */
+    int table_bir = table & PCI_MSIX_FLAGS_BIRMASK;
+
+    if (table_bir != region_num)
+        return;
+    if (size <= offset)
+        return;
+    cpu_register_physical_memory(addr + offset, size - offset,
+                                 d->msix_mmio_index);
+}
+
+/* Initialize the MSI-X structures. Note: if MSI-X is supported, BAR size is
+ * modified, it should be retrieved with msix_bar_size. */
+int msix_init(struct PCIDevice *dev, unsigned short nentries,
+              unsigned bar_nr, unsigned bar_size)
+{
+    int ret = -ENOMEM;
+    /* Nothing to do if MSI is not supported by interrupt controller */
+    if (!msix_supported)
+        return -ENOTTY;
+
+    if (nentries > MSIX_MAX_ENTRIES)
+        return -EINVAL;
+
+    dev->msix_entry_used = qemu_mallocz(MSIX_MAX_ENTRIES *
+                                        sizeof *dev->msix_entry_used);
+    if (!dev->msix_entry_used)
+        goto err_used;
+
+    dev->msix_table_page = qemu_mallocz(MSIX_PAGE_SIZE);
+    if (!dev->msix_table_page)
+        goto err_page;
+
+    dev->msix_mmio_index = cpu_register_io_memory(0, msix_mmio_read,
+                                                  msix_mmio_write, dev);
+    if (dev->msix_mmio_index == -1) {
+        ret = -EBUSY;
+        goto err_index;
+    }
+
+    dev->msix_entries_nr = nentries;
+    dev->cap_supported |= QEMU_PCI_CAP_MSIX;
+    /* If disabled, stop here. User can later load confiuration with MSI-X
+     * enabled. */
+    if (msix_disable)
+        return 0;
+
+    ret = msix_add_config(dev, nentries, bar_nr, bar_size);
+    if (ret)
+        goto err_config;
+
+    dev->cap_present |= QEMU_PCI_CAP_MSIX;
+    return 0;
+
+err_config:
+    cpu_unregister_io_memory(dev->msix_mmio_index);
+err_index:
+    qemu_free(dev->msix_table_page);
+    dev->msix_table_page = NULL;
+err_page:
+    qemu_free(dev->msix_entry_used);
+    dev->msix_entry_used = NULL;
+err_used:
+    return ret;
+}
+
+/* Clean up resources for the device. */
+int msix_uninit(PCIDevice *dev)
+{
+    if (!(dev->cap_supported & QEMU_PCI_CAP_MSIX))
+        return 0;
+    pci_del_capability(dev, PCI_CAP_ID_MSIX, MSIX_CAP_LENGTH);
+    dev->msix_cap = 0;
+    msix_free_irq_entries(dev);
+    dev->msix_entries_nr = 0;
+    cpu_unregister_io_memory(dev->msix_mmio_index);
+    qemu_free(dev->msix_table_page);
+    dev->msix_table_page = NULL;
+    qemu_free(dev->msix_entry_used);
+    dev->msix_entry_used = NULL;
+    dev->cap_present &= ~QEMU_PCI_CAP_MSIX;
+    dev->cap_supported &= ~QEMU_PCI_CAP_MSIX;
+    return 0;
+}
+
+void msix_save(PCIDevice *dev, QEMUFile *f)
+{
+    unsigned nentries = (pci_get_word(dev->config + PCI_MSIX_FLAGS) &
+                         PCI_MSIX_FLAGS_QSIZE) + 1;
+    qemu_put_buffer(f, dev->msix_table_page, nentries * MSIX_ENTRY_SIZE);
+    qemu_put_buffer(f, dev->msix_table_page + MSIX_PAGE_PENDING,
+                    (nentries + 7) / 8);
+}
+
+/* Should be called after restoring the config space. */
+int msix_load(PCIDevice *dev, QEMUFile *f)
+{
+    uint8_t offset = pci_find_capability(dev, PCI_CAP_ID_MSIX);
+    unsigned nentries;
+
+    if (!!(dev->cap_present & QEMU_PCI_CAP_MSIX) == !!offset) {
+        fprintf(stderr, "MSI-X bit set but no capability is present\n");
+        return -EINVAL;
+    }
+
+    msix_free_irq_entries(dev);
+
+    if (!dev->cap_present & QEMU_PCI_CAP_MSIX)
+        return 0;
+
+    /* Sanity check: we probably could add more of these. */
+    nentries = (pci_get_word(dev->config + PCI_MSIX_FLAGS) &
+                PCI_MSIX_FLAGS_QSIZE) + 1;
+    if (nentries > MSIX_MAX_ENTRIES) {
+        fprintf(stderr, "msix_load: nentries mismatch: %d > %d\n",
+                nentries, dev->msix_entries_nr);
+        return -EINVAL;
+    }
+
+    /* Make flags bit writeable. */
+    dev->wmask[offset + MSIX_ENABLE_OFFSET] |= MSIX_ENABLE_MASK;
+    /* Reserve space used by this capability */
+    pci_reserve_capability(dev, offset, MSIX_CAP_LENGTH);
+    /* Store the new offset */
+    dev->msix_cap = offset;
+
+    dev->msix_entries_nr = nentries;
+
+    qemu_get_buffer(f, dev->msix_table_page, nentries * MSIX_ENTRY_SIZE);
+    qemu_get_buffer(f, dev->msix_table_page + MSIX_PAGE_PENDING,
+                   (nentries + 7) / 8);
+
+    return 0;
+}
+
+/* Does device support MSI-X? */
+int msix_present(PCIDevice *dev)
+{
+    return dev->cap_present & QEMU_PCI_CAP_MSIX;
+}
+
+/* Is MSI-X enabled? */
+int msix_enabled(PCIDevice *dev)
+{
+    return (dev->cap_present & QEMU_PCI_CAP_MSIX) &&
+        (dev->config[dev->msix_cap + MSIX_ENABLE_OFFSET] &
+         MSIX_ENABLE_MASK);
+}
+
+/* Size of bar where MSI-X table resides, or 0 if MSI-X not supported. */
+uint32_t msix_bar_size(PCIDevice *dev)
+{
+    return (dev->cap_present & QEMU_PCI_CAP_MSIX) ?
+        dev->msix_bar_size : 0;
+}
+
+/* Send an MSI-X message */
+void msix_notify(PCIDevice *dev, unsigned vector)
+{
+    uint8_t *table_entry = dev->msix_table_page + vector * MSIX_ENTRY_SIZE;
+    uint64_t address;
+    uint32_t data;
+
+    if (vector >= dev->msix_entries_nr || !dev->msix_entry_used[vector])
+        return;
+    if (msix_is_masked(dev, vector)) {
+        msix_set_pending(dev, vector);
+        return;
+    }
+
+    address = pci_get_long(table_entry + MSIX_MSG_UPPER_ADDR);
+    address = (address << 32) | pci_get_long(table_entry + MSIX_MSG_ADDR);
+    data = pci_get_long(table_entry + MSIX_MSG_DATA);
+    stl_phys(address, data);
+}
+
+void msix_reset(PCIDevice *dev)
+{
+    if (!(dev->cap_present & QEMU_PCI_CAP_MSIX))
+        return;
+    msix_free_irq_entries(dev);
+    dev->config[dev->msix_cap + MSIX_ENABLE_OFFSET] &= MSIX_ENABLE_MASK;
+    memset(dev->msix_table_page, 0, MSIX_PAGE_SIZE);
+}
+
+/* PCI spec suggests that devices make it possible for software to configure
+ * less vectors than supported by the device, but does not specify a standard
+ * mechanism for devices to do so.
+ *
+ * We support this by asking devices to declare vectors software is going to
+ * actually use, and checking this on the notification path. Devices that
+ * don't want to follow the spec suggestion can declare all vectors as used. */
+
+/* Mark vector as used. */
+int msix_vector_use(PCIDevice *dev, unsigned vector)
+{
+    if (vector >= dev->msix_entries_nr)
+        return -EINVAL;
+    dev->msix_entry_used[vector]++;
+    return 0;
+}
+
+/* Mark vector as unused. */
+void msix_vector_unuse(PCIDevice *dev, unsigned vector)
+{
+    if (vector < dev->msix_entries_nr && dev->msix_entry_used[vector])
+        --dev->msix_entry_used[vector];
+}
diff --git a/hw/msix.h b/hw/msix.h
new file mode 100644
index 0000000..79e84a3
--- /dev/null
+++ b/hw/msix.h
@@ -0,0 +1,35 @@
+#ifndef QEMU_MSIX_H
+#define QEMU_MSIX_H
+
+#include "qemu-common.h"
+
+int msix_init(PCIDevice *pdev, unsigned short nentries,
+              unsigned bar_nr, unsigned bar_size);
+
+void msix_write_config(PCIDevice *pci_dev, uint32_t address,
+                       uint32_t val, int len);
+
+void msix_mmio_map(PCIDevice *pci_dev, int region_num,
+                   uint32_t addr, uint32_t size, int type);
+
+int msix_uninit(PCIDevice *d);
+
+void msix_save(PCIDevice *dev, QEMUFile *f);
+int msix_load(PCIDevice *dev, QEMUFile *f);
+
+int msix_enabled(PCIDevice *dev);
+int msix_present(PCIDevice *dev);
+
+uint32_t msix_bar_size(PCIDevice *dev);
+
+int msix_vector_use(PCIDevice *dev, unsigned vector);
+void msix_vector_unuse(PCIDevice *dev, unsigned vector);
+
+void msix_notify(PCIDevice *dev, unsigned vector);
+
+void msix_reset(PCIDevice *dev);
+
+extern int msix_disable;
+extern int msix_supported;
+
+#endif
diff --git a/hw/pci.h b/hw/pci.h
index 477aa64..98a34ee 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -156,6 +156,11 @@ typedef struct PCIIORegion {
 /* Size of the standard PCI config space */
 #define PCI_CONFIG_SPACE_SIZE 0x100
 
+/* Bits in cap_supported/cap_present fields. */
+enum {
+    QEMU_PCI_CAP_MSIX = 0x1,
+};
+
 struct PCIDevice {
     DeviceState qdev;
     /* PCI config space */
@@ -189,6 +194,21 @@ struct PCIDevice {
     /* Capability bits for save/load */
     uint32_t cap_supported;
     uint32_t cap_present;
+
+    /* Offset of MSI-X capability in config space */
+    uint8_t msix_cap;
+
+    /* MSI-X entries */
+    int msix_entries_nr;
+
+    /* Space to store MSIX table */
+    uint8_t *msix_table_page;
+    /* MMIO index used to map MSIX table and pending bit entries. */
+    int msix_mmio_index;
+    /* Reference-count for entries actually in use by driver. */
+    unsigned *msix_entry_used;
+    /* Region including the MSI-X table */
+    uint32_t msix_bar_size;
 };
 
 PCIDevice *pci_register_device(PCIBus *bus, const char *name,
-- 
1.6.3.1.56.g79e1.dirty

^ permalink raw reply related	[flat|nested] 457+ messages in thread

* [PATCHv3 06/13] qemu: add flag to disable MSI-X by default
       [not found] <cover.1244192535.git.mst@redhat.com>
@ 2009-06-05 10:23   ` Michael S. Tsirkin
  2009-06-05 10:22   ` [Qemu-devel] " Michael S. Tsirkin
                     ` (24 subsequent siblings)
  25 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-05 10:23 UTC (permalink / raw)
  To: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm, Rusty Russell, vi

Add global flag to disable MSI-X by default.  This is useful primarily
to make images loadable by older qemu (without msix).  Even when MSI-X
is disabled by flag, you can still load images that have MSI-X enabled.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/msix.c       |    3 +++
 qemu-options.hx |    2 ++
 vl.c            |    3 +++
 3 files changed, 8 insertions(+), 0 deletions(-)

diff --git a/hw/msix.c b/hw/msix.c
index 1b5aec8..ada81d8 100644
--- a/hw/msix.c
+++ b/hw/msix.c
@@ -122,6 +122,9 @@ void msix_write_config(PCIDevice *dev, uint32_t addr,
                        uint32_t val, int len)
 {
     unsigned enable_pos = dev->msix_cap + MSIX_ENABLE_OFFSET;
+    if (!(dev->cap_present & QEMU_PCI_CAP_MSIX))
+        return;
+
     if (addr + len <= enable_pos || addr > enable_pos)
         return;
 
diff --git a/qemu-options.hx b/qemu-options.hx
index 87af798..fd041a4 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -1575,3 +1575,5 @@ DEF("semihosting", 0, QEMU_OPTION_semihosting,
 DEF("old-param", 0, QEMU_OPTION_old_param,
     "-old-param      old param mode\n")
 #endif
+DEF("disable-msix", 0, QEMU_OPTION_disable_msix,
+    "-disable-msix disable msix support for PCI devices (enabled by default)\n")
diff --git a/vl.c b/vl.c
index 2c1f0e0..2757d4f 100644
--- a/vl.c
+++ b/vl.c
@@ -134,6 +134,7 @@ int main(int argc, char **argv)
 #include "hw/usb.h"
 #include "hw/pcmcia.h"
 #include "hw/pc.h"
+#include "hw/msix.h"
 #include "hw/audiodev.h"
 #include "hw/isa.h"
 #include "hw/baum.h"
@@ -5557,6 +5558,8 @@ int main(int argc, char **argv, char **envp)
                 xen_mode = XEN_ATTACH;
                 break;
 #endif
+            case QEMU_OPTION_disable_msix:
+                msix_disable = 1;
             }
         }
     }
-- 
1.6.3.1.56.g79e1.dirty


^ permalink raw reply related	[flat|nested] 457+ messages in thread

* [Qemu-devel] [PATCHv3 06/13] qemu: add flag to disable MSI-X by default
@ 2009-06-05 10:23   ` Michael S. Tsirkin
  0 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-05 10:23 UTC (permalink / raw)
  To: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm,
	Rusty Russell, virtualization, Christian Borntraeger, Blue Swirl,
	Anthony Liguori

Add global flag to disable MSI-X by default.  This is useful primarily
to make images loadable by older qemu (without msix).  Even when MSI-X
is disabled by flag, you can still load images that have MSI-X enabled.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/msix.c       |    3 +++
 qemu-options.hx |    2 ++
 vl.c            |    3 +++
 3 files changed, 8 insertions(+), 0 deletions(-)

diff --git a/hw/msix.c b/hw/msix.c
index 1b5aec8..ada81d8 100644
--- a/hw/msix.c
+++ b/hw/msix.c
@@ -122,6 +122,9 @@ void msix_write_config(PCIDevice *dev, uint32_t addr,
                        uint32_t val, int len)
 {
     unsigned enable_pos = dev->msix_cap + MSIX_ENABLE_OFFSET;
+    if (!(dev->cap_present & QEMU_PCI_CAP_MSIX))
+        return;
+
     if (addr + len <= enable_pos || addr > enable_pos)
         return;
 
diff --git a/qemu-options.hx b/qemu-options.hx
index 87af798..fd041a4 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -1575,3 +1575,5 @@ DEF("semihosting", 0, QEMU_OPTION_semihosting,
 DEF("old-param", 0, QEMU_OPTION_old_param,
     "-old-param      old param mode\n")
 #endif
+DEF("disable-msix", 0, QEMU_OPTION_disable_msix,
+    "-disable-msix disable msix support for PCI devices (enabled by default)\n")
diff --git a/vl.c b/vl.c
index 2c1f0e0..2757d4f 100644
--- a/vl.c
+++ b/vl.c
@@ -134,6 +134,7 @@ int main(int argc, char **argv)
 #include "hw/usb.h"
 #include "hw/pcmcia.h"
 #include "hw/pc.h"
+#include "hw/msix.h"
 #include "hw/audiodev.h"
 #include "hw/isa.h"
 #include "hw/baum.h"
@@ -5557,6 +5558,8 @@ int main(int argc, char **argv, char **envp)
                 xen_mode = XEN_ATTACH;
                 break;
 #endif
+            case QEMU_OPTION_disable_msix:
+                msix_disable = 1;
             }
         }
     }
-- 
1.6.3.1.56.g79e1.dirty

^ permalink raw reply related	[flat|nested] 457+ messages in thread

* [PATCHv3 06/13] qemu: add flag to disable MSI-X by default
       [not found] <cover.1244192535.git.mst@redhat.com>
                   ` (10 preceding siblings ...)
  2009-06-05 10:23   ` [Qemu-devel] " Michael S. Tsirkin
@ 2009-06-05 10:23 ` Michael S. Tsirkin
  2009-06-05 10:23   ` [Qemu-devel] " Michael S. Tsirkin
                   ` (13 subsequent siblings)
  25 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-05 10:23 UTC (permalink / raw)
  To: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm, Rusty Russell

Add global flag to disable MSI-X by default.  This is useful primarily
to make images loadable by older qemu (without msix).  Even when MSI-X
is disabled by flag, you can still load images that have MSI-X enabled.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/msix.c       |    3 +++
 qemu-options.hx |    2 ++
 vl.c            |    3 +++
 3 files changed, 8 insertions(+), 0 deletions(-)

diff --git a/hw/msix.c b/hw/msix.c
index 1b5aec8..ada81d8 100644
--- a/hw/msix.c
+++ b/hw/msix.c
@@ -122,6 +122,9 @@ void msix_write_config(PCIDevice *dev, uint32_t addr,
                        uint32_t val, int len)
 {
     unsigned enable_pos = dev->msix_cap + MSIX_ENABLE_OFFSET;
+    if (!(dev->cap_present & QEMU_PCI_CAP_MSIX))
+        return;
+
     if (addr + len <= enable_pos || addr > enable_pos)
         return;
 
diff --git a/qemu-options.hx b/qemu-options.hx
index 87af798..fd041a4 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -1575,3 +1575,5 @@ DEF("semihosting", 0, QEMU_OPTION_semihosting,
 DEF("old-param", 0, QEMU_OPTION_old_param,
     "-old-param      old param mode\n")
 #endif
+DEF("disable-msix", 0, QEMU_OPTION_disable_msix,
+    "-disable-msix disable msix support for PCI devices (enabled by default)\n")
diff --git a/vl.c b/vl.c
index 2c1f0e0..2757d4f 100644
--- a/vl.c
+++ b/vl.c
@@ -134,6 +134,7 @@ int main(int argc, char **argv)
 #include "hw/usb.h"
 #include "hw/pcmcia.h"
 #include "hw/pc.h"
+#include "hw/msix.h"
 #include "hw/audiodev.h"
 #include "hw/isa.h"
 #include "hw/baum.h"
@@ -5557,6 +5558,8 @@ int main(int argc, char **argv, char **envp)
                 xen_mode = XEN_ATTACH;
                 break;
 #endif
+            case QEMU_OPTION_disable_msix:
+                msix_disable = 1;
             }
         }
     }
-- 
1.6.3.1.56.g79e1.dirty

^ permalink raw reply related	[flat|nested] 457+ messages in thread

* [PATCHv3 07/13] qemu: minimal MSI/MSI-X implementation for PC
       [not found] <cover.1244192535.git.mst@redhat.com>
@ 2009-06-05 10:23   ` Michael S. Tsirkin
  2009-06-05 10:22   ` [Qemu-devel] " Michael S. Tsirkin
                     ` (24 subsequent siblings)
  25 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-05 10:23 UTC (permalink / raw)
  To: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm, Rusty Russell, vi

Implement MSI support in APIC. Note that MSI and MMIO APIC registers
are at the same memory location, but actually not on the global bus: MSI
is on PCI bus, APIC is connected directly to the CPU. We map them on the
global bus at the same address which happens to work because MSI
registers are reserved in APIC MMIO and vice versa.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/apic.c |   43 +++++++++++++++++++++++++++++++++++++++----
 1 files changed, 39 insertions(+), 4 deletions(-)

diff --git a/hw/apic.c b/hw/apic.c
index 8c8b2de..ed03a36 100644
--- a/hw/apic.c
+++ b/hw/apic.c
@@ -19,6 +19,8 @@
  */
 #include "hw.h"
 #include "pc.h"
+#include "pci.h"
+#include "msix.h"
 #include "qemu-timer.h"
 #include "host-utils.h"
 
@@ -63,6 +65,19 @@
 #define MAX_APICS 255
 #define MAX_APIC_WORDS 8
 
+/* Intel APIC constants: from include/asm/msidef.h */
+#define MSI_DATA_VECTOR_SHIFT		0
+#define MSI_DATA_VECTOR_MASK		0x000000ff
+#define MSI_DATA_DELIVERY_MODE_SHIFT	8
+#define MSI_DATA_TRIGGER_SHIFT		15
+#define MSI_DATA_LEVEL_SHIFT		14
+#define MSI_ADDR_DEST_MODE_SHIFT	2
+#define MSI_ADDR_DEST_ID_SHIFT		12
+#define	MSI_ADDR_DEST_ID_MASK		0x00ffff0
+
+#define MSI_ADDR_BASE                   0xfee00000
+#define MSI_ADDR_SIZE                   0x100000
+
 typedef struct APICState {
     CPUState *cpu_env;
     uint32_t apicbase;
@@ -712,11 +727,31 @@ static uint32_t apic_mem_readl(void *opaque, target_phys_addr_t addr)
     return val;
 }
 
+static void apic_send_msi(target_phys_addr_t addr, uint32 data)
+{
+    uint8_t dest = (addr & MSI_ADDR_DEST_ID_MASK) >> MSI_ADDR_DEST_ID_SHIFT;
+    uint8_t vector = (data & MSI_DATA_VECTOR_MASK) >> MSI_DATA_VECTOR_SHIFT;
+    uint8_t dest_mode = (addr >> MSI_ADDR_DEST_MODE_SHIFT) & 0x1;
+    uint8_t trigger_mode = (data >> MSI_DATA_TRIGGER_SHIFT) & 0x1;
+    uint8_t delivery = (data >> MSI_DATA_DELIVERY_MODE_SHIFT) & 0x7;
+    /* XXX: Ignore redirection hint. */
+    apic_deliver_irq(dest, dest_mode, delivery, vector, 0, trigger_mode);
+}
+
 static void apic_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
 {
     CPUState *env;
     APICState *s;
-    int index;
+    int index = (addr >> 4) & 0xff;
+    if (addr > 0xfff || !index) {
+        /* MSI and MMIO APIC are at the same memory location,
+         * but actually not on the global bus: MSI is on PCI bus
+         * APIC is connected directly to the CPU.
+         * Mapping them on the global bus happens to work because
+         * MSI registers are reserved in APIC MMIO and vice versa. */
+        apic_send_msi(addr, val);
+        return;
+    }
 
     env = cpu_single_env;
     if (!env)
@@ -727,7 +762,6 @@ static void apic_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
     printf("APIC write: %08x = %08x\n", (uint32_t)addr, val);
 #endif
 
-    index = (addr >> 4) & 0xff;
     switch(index) {
     case 0x02:
         s->id = (val >> 24);
@@ -911,6 +945,7 @@ int apic_init(CPUState *env)
     s->cpu_env = env;
 
     apic_reset(s);
+    msix_supported = 1;
 
     /* XXX: mapping more APICs at the same memory location */
     if (apic_io_memory == 0) {
@@ -918,7 +953,8 @@ int apic_init(CPUState *env)
            on the global memory bus. */
         apic_io_memory = cpu_register_io_memory(0, apic_mem_read,
                                                 apic_mem_write, NULL);
-        cpu_register_physical_memory(s->apicbase & ~0xfff, 0x1000,
+        /* XXX: what if the base changes? */
+        cpu_register_physical_memory(MSI_ADDR_BASE, MSI_ADDR_SIZE,
                                      apic_io_memory);
     }
     s->timer = qemu_new_timer(vm_clock, apic_timer, s);
@@ -929,4 +965,3 @@ int apic_init(CPUState *env)
     local_apics[s->id] = s;
     return 0;
 }
-
-- 
1.6.3.1.56.g79e1.dirty


^ permalink raw reply related	[flat|nested] 457+ messages in thread

* [Qemu-devel] [PATCHv3 07/13] qemu: minimal MSI/MSI-X implementation for PC
@ 2009-06-05 10:23   ` Michael S. Tsirkin
  0 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-05 10:23 UTC (permalink / raw)
  To: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm,
	Rusty Russell, virtualization, Christian Borntraeger, Blue Swirl,
	Anthony Liguori

Implement MSI support in APIC. Note that MSI and MMIO APIC registers
are at the same memory location, but actually not on the global bus: MSI
is on PCI bus, APIC is connected directly to the CPU. We map them on the
global bus at the same address which happens to work because MSI
registers are reserved in APIC MMIO and vice versa.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/apic.c |   43 +++++++++++++++++++++++++++++++++++++++----
 1 files changed, 39 insertions(+), 4 deletions(-)

diff --git a/hw/apic.c b/hw/apic.c
index 8c8b2de..ed03a36 100644
--- a/hw/apic.c
+++ b/hw/apic.c
@@ -19,6 +19,8 @@
  */
 #include "hw.h"
 #include "pc.h"
+#include "pci.h"
+#include "msix.h"
 #include "qemu-timer.h"
 #include "host-utils.h"
 
@@ -63,6 +65,19 @@
 #define MAX_APICS 255
 #define MAX_APIC_WORDS 8
 
+/* Intel APIC constants: from include/asm/msidef.h */
+#define MSI_DATA_VECTOR_SHIFT		0
+#define MSI_DATA_VECTOR_MASK		0x000000ff
+#define MSI_DATA_DELIVERY_MODE_SHIFT	8
+#define MSI_DATA_TRIGGER_SHIFT		15
+#define MSI_DATA_LEVEL_SHIFT		14
+#define MSI_ADDR_DEST_MODE_SHIFT	2
+#define MSI_ADDR_DEST_ID_SHIFT		12
+#define	MSI_ADDR_DEST_ID_MASK		0x00ffff0
+
+#define MSI_ADDR_BASE                   0xfee00000
+#define MSI_ADDR_SIZE                   0x100000
+
 typedef struct APICState {
     CPUState *cpu_env;
     uint32_t apicbase;
@@ -712,11 +727,31 @@ static uint32_t apic_mem_readl(void *opaque, target_phys_addr_t addr)
     return val;
 }
 
+static void apic_send_msi(target_phys_addr_t addr, uint32 data)
+{
+    uint8_t dest = (addr & MSI_ADDR_DEST_ID_MASK) >> MSI_ADDR_DEST_ID_SHIFT;
+    uint8_t vector = (data & MSI_DATA_VECTOR_MASK) >> MSI_DATA_VECTOR_SHIFT;
+    uint8_t dest_mode = (addr >> MSI_ADDR_DEST_MODE_SHIFT) & 0x1;
+    uint8_t trigger_mode = (data >> MSI_DATA_TRIGGER_SHIFT) & 0x1;
+    uint8_t delivery = (data >> MSI_DATA_DELIVERY_MODE_SHIFT) & 0x7;
+    /* XXX: Ignore redirection hint. */
+    apic_deliver_irq(dest, dest_mode, delivery, vector, 0, trigger_mode);
+}
+
 static void apic_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
 {
     CPUState *env;
     APICState *s;
-    int index;
+    int index = (addr >> 4) & 0xff;
+    if (addr > 0xfff || !index) {
+        /* MSI and MMIO APIC are at the same memory location,
+         * but actually not on the global bus: MSI is on PCI bus
+         * APIC is connected directly to the CPU.
+         * Mapping them on the global bus happens to work because
+         * MSI registers are reserved in APIC MMIO and vice versa. */
+        apic_send_msi(addr, val);
+        return;
+    }
 
     env = cpu_single_env;
     if (!env)
@@ -727,7 +762,6 @@ static void apic_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
     printf("APIC write: %08x = %08x\n", (uint32_t)addr, val);
 #endif
 
-    index = (addr >> 4) & 0xff;
     switch(index) {
     case 0x02:
         s->id = (val >> 24);
@@ -911,6 +945,7 @@ int apic_init(CPUState *env)
     s->cpu_env = env;
 
     apic_reset(s);
+    msix_supported = 1;
 
     /* XXX: mapping more APICs at the same memory location */
     if (apic_io_memory == 0) {
@@ -918,7 +953,8 @@ int apic_init(CPUState *env)
            on the global memory bus. */
         apic_io_memory = cpu_register_io_memory(0, apic_mem_read,
                                                 apic_mem_write, NULL);
-        cpu_register_physical_memory(s->apicbase & ~0xfff, 0x1000,
+        /* XXX: what if the base changes? */
+        cpu_register_physical_memory(MSI_ADDR_BASE, MSI_ADDR_SIZE,
                                      apic_io_memory);
     }
     s->timer = qemu_new_timer(vm_clock, apic_timer, s);
@@ -929,4 +965,3 @@ int apic_init(CPUState *env)
     local_apics[s->id] = s;
     return 0;
 }
-
-- 
1.6.3.1.56.g79e1.dirty

^ permalink raw reply related	[flat|nested] 457+ messages in thread

* [PATCHv3 07/13] qemu: minimal MSI/MSI-X implementation for PC
       [not found] <cover.1244192535.git.mst@redhat.com>
                   ` (12 preceding siblings ...)
  2009-06-05 10:23   ` [Qemu-devel] " Michael S. Tsirkin
@ 2009-06-05 10:23 ` Michael S. Tsirkin
  2009-06-05 10:23 ` [PATCHv3 08/13] qemu: add support for resizing regions Michael S. Tsirkin
                   ` (11 subsequent siblings)
  25 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-05 10:23 UTC (permalink / raw)
  To: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm, Rusty Russell

Implement MSI support in APIC. Note that MSI and MMIO APIC registers
are at the same memory location, but actually not on the global bus: MSI
is on PCI bus, APIC is connected directly to the CPU. We map them on the
global bus at the same address which happens to work because MSI
registers are reserved in APIC MMIO and vice versa.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/apic.c |   43 +++++++++++++++++++++++++++++++++++++++----
 1 files changed, 39 insertions(+), 4 deletions(-)

diff --git a/hw/apic.c b/hw/apic.c
index 8c8b2de..ed03a36 100644
--- a/hw/apic.c
+++ b/hw/apic.c
@@ -19,6 +19,8 @@
  */
 #include "hw.h"
 #include "pc.h"
+#include "pci.h"
+#include "msix.h"
 #include "qemu-timer.h"
 #include "host-utils.h"
 
@@ -63,6 +65,19 @@
 #define MAX_APICS 255
 #define MAX_APIC_WORDS 8
 
+/* Intel APIC constants: from include/asm/msidef.h */
+#define MSI_DATA_VECTOR_SHIFT		0
+#define MSI_DATA_VECTOR_MASK		0x000000ff
+#define MSI_DATA_DELIVERY_MODE_SHIFT	8
+#define MSI_DATA_TRIGGER_SHIFT		15
+#define MSI_DATA_LEVEL_SHIFT		14
+#define MSI_ADDR_DEST_MODE_SHIFT	2
+#define MSI_ADDR_DEST_ID_SHIFT		12
+#define	MSI_ADDR_DEST_ID_MASK		0x00ffff0
+
+#define MSI_ADDR_BASE                   0xfee00000
+#define MSI_ADDR_SIZE                   0x100000
+
 typedef struct APICState {
     CPUState *cpu_env;
     uint32_t apicbase;
@@ -712,11 +727,31 @@ static uint32_t apic_mem_readl(void *opaque, target_phys_addr_t addr)
     return val;
 }
 
+static void apic_send_msi(target_phys_addr_t addr, uint32 data)
+{
+    uint8_t dest = (addr & MSI_ADDR_DEST_ID_MASK) >> MSI_ADDR_DEST_ID_SHIFT;
+    uint8_t vector = (data & MSI_DATA_VECTOR_MASK) >> MSI_DATA_VECTOR_SHIFT;
+    uint8_t dest_mode = (addr >> MSI_ADDR_DEST_MODE_SHIFT) & 0x1;
+    uint8_t trigger_mode = (data >> MSI_DATA_TRIGGER_SHIFT) & 0x1;
+    uint8_t delivery = (data >> MSI_DATA_DELIVERY_MODE_SHIFT) & 0x7;
+    /* XXX: Ignore redirection hint. */
+    apic_deliver_irq(dest, dest_mode, delivery, vector, 0, trigger_mode);
+}
+
 static void apic_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
 {
     CPUState *env;
     APICState *s;
-    int index;
+    int index = (addr >> 4) & 0xff;
+    if (addr > 0xfff || !index) {
+        /* MSI and MMIO APIC are at the same memory location,
+         * but actually not on the global bus: MSI is on PCI bus
+         * APIC is connected directly to the CPU.
+         * Mapping them on the global bus happens to work because
+         * MSI registers are reserved in APIC MMIO and vice versa. */
+        apic_send_msi(addr, val);
+        return;
+    }
 
     env = cpu_single_env;
     if (!env)
@@ -727,7 +762,6 @@ static void apic_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
     printf("APIC write: %08x = %08x\n", (uint32_t)addr, val);
 #endif
 
-    index = (addr >> 4) & 0xff;
     switch(index) {
     case 0x02:
         s->id = (val >> 24);
@@ -911,6 +945,7 @@ int apic_init(CPUState *env)
     s->cpu_env = env;
 
     apic_reset(s);
+    msix_supported = 1;
 
     /* XXX: mapping more APICs at the same memory location */
     if (apic_io_memory == 0) {
@@ -918,7 +953,8 @@ int apic_init(CPUState *env)
            on the global memory bus. */
         apic_io_memory = cpu_register_io_memory(0, apic_mem_read,
                                                 apic_mem_write, NULL);
-        cpu_register_physical_memory(s->apicbase & ~0xfff, 0x1000,
+        /* XXX: what if the base changes? */
+        cpu_register_physical_memory(MSI_ADDR_BASE, MSI_ADDR_SIZE,
                                      apic_io_memory);
     }
     s->timer = qemu_new_timer(vm_clock, apic_timer, s);
@@ -929,4 +965,3 @@ int apic_init(CPUState *env)
     local_apics[s->id] = s;
     return 0;
 }
-
-- 
1.6.3.1.56.g79e1.dirty

^ permalink raw reply related	[flat|nested] 457+ messages in thread

* [PATCHv3 08/13] qemu: add support for resizing regions
       [not found] <cover.1244192535.git.mst@redhat.com>
@ 2009-06-05 10:23   ` Michael S. Tsirkin
  2009-06-05 10:22   ` [Qemu-devel] " Michael S. Tsirkin
                     ` (24 subsequent siblings)
  25 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-05 10:23 UTC (permalink / raw)
  To: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm, Rusty Russell, vi

Make it possible to resize PCI regions.  This will be used by virtio
with MSI-X, where the region size depends on whether MSI-X is enabled,
and can change across load/save.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/pci.c |   54 ++++++++++++++++++++++++++++++++++++------------------
 hw/pci.h |    3 +++
 2 files changed, 39 insertions(+), 18 deletions(-)

diff --git a/hw/pci.c b/hw/pci.c
index ed011b5..042a216 100644
--- a/hw/pci.c
+++ b/hw/pci.c
@@ -392,6 +392,41 @@ void pci_register_io_region(PCIDevice *pci_dev, int region_num,
     *(uint32_t *)(pci_dev->wmask + addr) = cpu_to_le32(wmask);
 }
 
+static void pci_unmap_region(PCIDevice *d, PCIIORegion *r)
+{
+    if (r->addr == -1)
+        return;
+    if (r->type & PCI_ADDRESS_SPACE_IO) {
+        int class;
+        /* NOTE: specific hack for IDE in PC case:
+           only one byte must be mapped. */
+        class = pci_get_word(d->config + PCI_CLASS_DEVICE);
+        if (class == 0x0101 && r->size == 4) {
+            isa_unassign_ioport(r->addr + 2, 1);
+        } else {
+            isa_unassign_ioport(r->addr, r->size);
+        }
+    } else {
+        cpu_register_physical_memory(pci_to_cpu_addr(r->addr),
+                                     r->size,
+                                     IO_MEM_UNASSIGNED);
+        qemu_unregister_coalesced_mmio(r->addr, r->size);
+    }
+}
+
+void pci_resize_io_region(PCIDevice *pci_dev, int region_num,
+                          uint32_t size)
+{
+
+    PCIIORegion *r = &pci_dev->io_regions[region_num];
+    if (r->size == size)
+        return;
+    r->size = size;
+    pci_unmap_region(pci_dev, r);
+    r->addr = -1;
+    pci_update_mappings(pci_dev);
+}
+
 static void pci_update_mappings(PCIDevice *d)
 {
     PCIIORegion *r;
@@ -445,24 +480,7 @@ static void pci_update_mappings(PCIDevice *d)
             }
             /* now do the real mapping */
             if (new_addr != r->addr) {
-                if (r->addr != -1) {
-                    if (r->type & PCI_ADDRESS_SPACE_IO) {
-                        int class;
-                        /* NOTE: specific hack for IDE in PC case:
-                           only one byte must be mapped. */
-                        class = d->config[0x0a] | (d->config[0x0b] << 8);
-                        if (class == 0x0101 && r->size == 4) {
-                            isa_unassign_ioport(r->addr + 2, 1);
-                        } else {
-                            isa_unassign_ioport(r->addr, r->size);
-                        }
-                    } else {
-                        cpu_register_physical_memory(pci_to_cpu_addr(r->addr),
-                                                     r->size,
-                                                     IO_MEM_UNASSIGNED);
-                        qemu_unregister_coalesced_mmio(r->addr, r->size);
-                    }
-                }
+                pci_unmap_region(d, r);
                 r->addr = new_addr;
                 if (r->addr != -1) {
                     r->map_func(d, i, r->addr, r->size, r->type);
diff --git a/hw/pci.h b/hw/pci.h
index 98a34ee..8e74033 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -221,6 +221,9 @@ void pci_register_io_region(PCIDevice *pci_dev, int region_num,
                             uint32_t size, int type,
                             PCIMapIORegionFunc *map_func);
 
+void pci_resize_io_region(PCIDevice *pci_dev, int region_num,
+                          uint32_t size);
+
 int pci_add_capability(PCIDevice *pci_dev, uint8_t cap_id, uint8_t cap_size);
 
 void pci_del_capability(PCIDevice *pci_dev, uint8_t cap_id, uint8_t cap_size);
-- 
1.6.3.1.56.g79e1.dirty


^ permalink raw reply related	[flat|nested] 457+ messages in thread

* [Qemu-devel] [PATCHv3 08/13] qemu: add support for resizing regions
@ 2009-06-05 10:23   ` Michael S. Tsirkin
  0 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-05 10:23 UTC (permalink / raw)
  To: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm,
	Rusty Russell, virtualization, Christian Borntraeger, Blue Swirl,
	Anthony Liguori

Make it possible to resize PCI regions.  This will be used by virtio
with MSI-X, where the region size depends on whether MSI-X is enabled,
and can change across load/save.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/pci.c |   54 ++++++++++++++++++++++++++++++++++++------------------
 hw/pci.h |    3 +++
 2 files changed, 39 insertions(+), 18 deletions(-)

diff --git a/hw/pci.c b/hw/pci.c
index ed011b5..042a216 100644
--- a/hw/pci.c
+++ b/hw/pci.c
@@ -392,6 +392,41 @@ void pci_register_io_region(PCIDevice *pci_dev, int region_num,
     *(uint32_t *)(pci_dev->wmask + addr) = cpu_to_le32(wmask);
 }
 
+static void pci_unmap_region(PCIDevice *d, PCIIORegion *r)
+{
+    if (r->addr == -1)
+        return;
+    if (r->type & PCI_ADDRESS_SPACE_IO) {
+        int class;
+        /* NOTE: specific hack for IDE in PC case:
+           only one byte must be mapped. */
+        class = pci_get_word(d->config + PCI_CLASS_DEVICE);
+        if (class == 0x0101 && r->size == 4) {
+            isa_unassign_ioport(r->addr + 2, 1);
+        } else {
+            isa_unassign_ioport(r->addr, r->size);
+        }
+    } else {
+        cpu_register_physical_memory(pci_to_cpu_addr(r->addr),
+                                     r->size,
+                                     IO_MEM_UNASSIGNED);
+        qemu_unregister_coalesced_mmio(r->addr, r->size);
+    }
+}
+
+void pci_resize_io_region(PCIDevice *pci_dev, int region_num,
+                          uint32_t size)
+{
+
+    PCIIORegion *r = &pci_dev->io_regions[region_num];
+    if (r->size == size)
+        return;
+    r->size = size;
+    pci_unmap_region(pci_dev, r);
+    r->addr = -1;
+    pci_update_mappings(pci_dev);
+}
+
 static void pci_update_mappings(PCIDevice *d)
 {
     PCIIORegion *r;
@@ -445,24 +480,7 @@ static void pci_update_mappings(PCIDevice *d)
             }
             /* now do the real mapping */
             if (new_addr != r->addr) {
-                if (r->addr != -1) {
-                    if (r->type & PCI_ADDRESS_SPACE_IO) {
-                        int class;
-                        /* NOTE: specific hack for IDE in PC case:
-                           only one byte must be mapped. */
-                        class = d->config[0x0a] | (d->config[0x0b] << 8);
-                        if (class == 0x0101 && r->size == 4) {
-                            isa_unassign_ioport(r->addr + 2, 1);
-                        } else {
-                            isa_unassign_ioport(r->addr, r->size);
-                        }
-                    } else {
-                        cpu_register_physical_memory(pci_to_cpu_addr(r->addr),
-                                                     r->size,
-                                                     IO_MEM_UNASSIGNED);
-                        qemu_unregister_coalesced_mmio(r->addr, r->size);
-                    }
-                }
+                pci_unmap_region(d, r);
                 r->addr = new_addr;
                 if (r->addr != -1) {
                     r->map_func(d, i, r->addr, r->size, r->type);
diff --git a/hw/pci.h b/hw/pci.h
index 98a34ee..8e74033 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -221,6 +221,9 @@ void pci_register_io_region(PCIDevice *pci_dev, int region_num,
                             uint32_t size, int type,
                             PCIMapIORegionFunc *map_func);
 
+void pci_resize_io_region(PCIDevice *pci_dev, int region_num,
+                          uint32_t size);
+
 int pci_add_capability(PCIDevice *pci_dev, uint8_t cap_id, uint8_t cap_size);
 
 void pci_del_capability(PCIDevice *pci_dev, uint8_t cap_id, uint8_t cap_size);
-- 
1.6.3.1.56.g79e1.dirty

^ permalink raw reply related	[flat|nested] 457+ messages in thread

* [PATCHv3 08/13] qemu: add support for resizing regions
       [not found] <cover.1244192535.git.mst@redhat.com>
                   ` (13 preceding siblings ...)
  2009-06-05 10:23 ` Michael S. Tsirkin
@ 2009-06-05 10:23 ` Michael S. Tsirkin
  2009-06-05 10:23   ` [Qemu-devel] " Michael S. Tsirkin
                   ` (10 subsequent siblings)
  25 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-05 10:23 UTC (permalink / raw)
  To: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm, Rusty Russell

Make it possible to resize PCI regions.  This will be used by virtio
with MSI-X, where the region size depends on whether MSI-X is enabled,
and can change across load/save.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/pci.c |   54 ++++++++++++++++++++++++++++++++++++------------------
 hw/pci.h |    3 +++
 2 files changed, 39 insertions(+), 18 deletions(-)

diff --git a/hw/pci.c b/hw/pci.c
index ed011b5..042a216 100644
--- a/hw/pci.c
+++ b/hw/pci.c
@@ -392,6 +392,41 @@ void pci_register_io_region(PCIDevice *pci_dev, int region_num,
     *(uint32_t *)(pci_dev->wmask + addr) = cpu_to_le32(wmask);
 }
 
+static void pci_unmap_region(PCIDevice *d, PCIIORegion *r)
+{
+    if (r->addr == -1)
+        return;
+    if (r->type & PCI_ADDRESS_SPACE_IO) {
+        int class;
+        /* NOTE: specific hack for IDE in PC case:
+           only one byte must be mapped. */
+        class = pci_get_word(d->config + PCI_CLASS_DEVICE);
+        if (class == 0x0101 && r->size == 4) {
+            isa_unassign_ioport(r->addr + 2, 1);
+        } else {
+            isa_unassign_ioport(r->addr, r->size);
+        }
+    } else {
+        cpu_register_physical_memory(pci_to_cpu_addr(r->addr),
+                                     r->size,
+                                     IO_MEM_UNASSIGNED);
+        qemu_unregister_coalesced_mmio(r->addr, r->size);
+    }
+}
+
+void pci_resize_io_region(PCIDevice *pci_dev, int region_num,
+                          uint32_t size)
+{
+
+    PCIIORegion *r = &pci_dev->io_regions[region_num];
+    if (r->size == size)
+        return;
+    r->size = size;
+    pci_unmap_region(pci_dev, r);
+    r->addr = -1;
+    pci_update_mappings(pci_dev);
+}
+
 static void pci_update_mappings(PCIDevice *d)
 {
     PCIIORegion *r;
@@ -445,24 +480,7 @@ static void pci_update_mappings(PCIDevice *d)
             }
             /* now do the real mapping */
             if (new_addr != r->addr) {
-                if (r->addr != -1) {
-                    if (r->type & PCI_ADDRESS_SPACE_IO) {
-                        int class;
-                        /* NOTE: specific hack for IDE in PC case:
-                           only one byte must be mapped. */
-                        class = d->config[0x0a] | (d->config[0x0b] << 8);
-                        if (class == 0x0101 && r->size == 4) {
-                            isa_unassign_ioport(r->addr + 2, 1);
-                        } else {
-                            isa_unassign_ioport(r->addr, r->size);
-                        }
-                    } else {
-                        cpu_register_physical_memory(pci_to_cpu_addr(r->addr),
-                                                     r->size,
-                                                     IO_MEM_UNASSIGNED);
-                        qemu_unregister_coalesced_mmio(r->addr, r->size);
-                    }
-                }
+                pci_unmap_region(d, r);
                 r->addr = new_addr;
                 if (r->addr != -1) {
                     r->map_func(d, i, r->addr, r->size, r->type);
diff --git a/hw/pci.h b/hw/pci.h
index 98a34ee..8e74033 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -221,6 +221,9 @@ void pci_register_io_region(PCIDevice *pci_dev, int region_num,
                             uint32_t size, int type,
                             PCIMapIORegionFunc *map_func);
 
+void pci_resize_io_region(PCIDevice *pci_dev, int region_num,
+                          uint32_t size);
+
 int pci_add_capability(PCIDevice *pci_dev, uint8_t cap_id, uint8_t cap_size);
 
 void pci_del_capability(PCIDevice *pci_dev, uint8_t cap_id, uint8_t cap_size);
-- 
1.6.3.1.56.g79e1.dirty

^ permalink raw reply related	[flat|nested] 457+ messages in thread

* [PATCHv3 09/13] qemu: virtio support for many interrupt vectors
       [not found] <cover.1244192535.git.mst@redhat.com>
@ 2009-06-05 10:24   ` Michael S. Tsirkin
  2009-06-05 10:22   ` [Qemu-devel] " Michael S. Tsirkin
                     ` (24 subsequent siblings)
  25 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-05 10:24 UTC (permalink / raw)
  To: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm, Rusty Russell, vi

Extend virtio to support many interrupt vectors, and rearrange code in
preparation for multi-vector support (mostly move reset out to bindings,
because we will have to reset the vectors in transport-specific code).
Actual bindings in pci, and use in net, to follow.
Load and save are not connected to bindings yet, so they are left
stubbed out for now.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/syborg_virtio.c |   13 ++++++++--
 hw/virtio-pci.c    |   24 +++++++++++++++----
 hw/virtio.c        |   63 ++++++++++++++++++++++++++++++++++++++-------------
 hw/virtio.h        |   10 ++++++-
 4 files changed, 84 insertions(+), 26 deletions(-)

diff --git a/hw/syborg_virtio.c b/hw/syborg_virtio.c
index 37c219c..d8c978a 100644
--- a/hw/syborg_virtio.c
+++ b/hw/syborg_virtio.c
@@ -134,7 +134,10 @@ static void syborg_virtio_writel(void *opaque, target_phys_addr_t offset,
         vdev->features = value;
         break;
     case SYBORG_VIRTIO_QUEUE_BASE:
-        virtio_queue_set_addr(vdev, vdev->queue_sel, value);
+        if (value == 0)
+            virtio_reset(vdev);
+        else
+            virtio_queue_set_addr(vdev, vdev->queue_sel, value);
         break;
     case SYBORG_VIRTIO_QUEUE_SEL:
         if (value < VIRTIO_PCI_QUEUE_MAX)
@@ -228,7 +231,7 @@ static CPUWriteMemoryFunc *syborg_virtio_writefn[] = {
      syborg_virtio_writel
 };
 
-static void syborg_virtio_update_irq(void *opaque)
+static void syborg_virtio_update_irq(void *opaque, uint16_t vector)
 {
     SyborgVirtIOProxy *proxy = opaque;
     int level;
@@ -239,7 +242,7 @@ static void syborg_virtio_update_irq(void *opaque)
 }
 
 static VirtIOBindings syborg_virtio_bindings = {
-    .update_irq = syborg_virtio_update_irq
+    .notify = syborg_virtio_update_irq
 };
 
 static void syborg_virtio_init(SyborgVirtIOProxy *proxy, VirtIODevice *vdev)
@@ -248,6 +251,8 @@ static void syborg_virtio_init(SyborgVirtIOProxy *proxy, VirtIODevice *vdev)
 
     proxy->vdev = vdev;
 
+    /* Don't support multiple vectors */
+    proxy->vdev->nvectors = 0;
     sysbus_init_irq(&proxy->busdev, &proxy->irq);
     iomemtype = cpu_register_io_memory(0, syborg_virtio_readfn,
                                        syborg_virtio_writefn, proxy);
@@ -255,6 +260,8 @@ static void syborg_virtio_init(SyborgVirtIOProxy *proxy, VirtIODevice *vdev)
 
     proxy->id = ((uint32_t)0x1af4 << 16) | vdev->device_id;
 
+    qemu_register_reset(virtio_reset, 0, vdev);
+
     virtio_bind_device(vdev, &syborg_virtio_bindings, proxy);
 }
 
diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c
index c072423..7dfdd80 100644
--- a/hw/virtio-pci.c
+++ b/hw/virtio-pci.c
@@ -78,13 +78,19 @@ typedef struct {
 
 /* virtio device */
 
-static void virtio_pci_update_irq(void *opaque)
+static void virtio_pci_notify(void *opaque, uint16_t vector)
 {
     VirtIOPCIProxy *proxy = opaque;
 
     qemu_set_irq(proxy->pci_dev.irq[0], proxy->vdev->isr & 1);
 }
 
+static void virtio_pci_reset(void *opaque)
+{
+    VirtIOPCIProxy *proxy = opaque;
+    virtio_reset(proxy->vdev);
+}
+
 static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val)
 {
     VirtIOPCIProxy *proxy = opaque;
@@ -108,7 +114,10 @@ static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val)
         break;
     case VIRTIO_PCI_QUEUE_PFN:
         pa = (target_phys_addr_t)val << VIRTIO_PCI_QUEUE_ADDR_SHIFT;
-        virtio_queue_set_addr(vdev, vdev->queue_sel, pa);
+        if (pa == 0)
+            virtio_pci_reset(proxy);
+        else
+            virtio_queue_set_addr(vdev, vdev->queue_sel, pa);
         break;
     case VIRTIO_PCI_QUEUE_SEL:
         if (val < VIRTIO_PCI_QUEUE_MAX)
@@ -120,7 +129,7 @@ static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val)
     case VIRTIO_PCI_STATUS:
         vdev->status = val & 0xFF;
         if (vdev->status == 0)
-            virtio_reset(vdev);
+            virtio_pci_reset(proxy);
         break;
     }
 }
@@ -158,7 +167,7 @@ static uint32_t virtio_ioport_read(void *opaque, uint32_t addr)
         /* reading from the ISR also clears it. */
         ret = vdev->isr;
         vdev->isr = 0;
-        virtio_update_irq(vdev);
+        qemu_set_irq(proxy->pci_dev.irq[0], 0);
         break;
     default:
         break;
@@ -243,7 +252,7 @@ static void virtio_map(PCIDevice *pci_dev, int region_num,
 }
 
 static const VirtIOBindings virtio_pci_bindings = {
-    .update_irq = virtio_pci_update_irq
+    .notify = virtio_pci_notify
 };
 
 static void virtio_init_pci(VirtIOPCIProxy *proxy, VirtIODevice *vdev,
@@ -255,6 +264,9 @@ static void virtio_init_pci(VirtIOPCIProxy *proxy, VirtIODevice *vdev,
 
     proxy->vdev = vdev;
 
+    /* No support for multiple vectors yet. */
+    proxy->vdev->nvectors = 0;
+
     config = proxy->pci_dev.config;
     pci_config_set_vendor_id(config, vendor);
     pci_config_set_device_id(config, device);
@@ -279,6 +291,8 @@ static void virtio_init_pci(VirtIOPCIProxy *proxy, VirtIODevice *vdev,
     pci_register_io_region(&proxy->pci_dev, 0, size, PCI_ADDRESS_SPACE_IO,
                            virtio_map);
 
+    qemu_register_reset(virtio_pci_reset, 0, proxy);
+
     virtio_bind_device(vdev, &virtio_pci_bindings, proxy);
 }
 
diff --git a/hw/virtio.c b/hw/virtio.c
index 45a49fa..63ffcff 100644
--- a/hw/virtio.c
+++ b/hw/virtio.c
@@ -68,6 +68,7 @@ struct VirtQueue
     target_phys_addr_t pa;
     uint16_t last_avail_idx;
     int inuse;
+    uint16_t vector;
     void (*handle_output)(VirtIODevice *vdev, VirtQueue *vq);
 };
 
@@ -373,12 +374,16 @@ int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem)
 }
 
 /* virtio device */
+static void virtio_notify_vector(VirtIODevice *vdev, uint16_t vector)
+{
+    if (vdev->binding->notify) {
+        vdev->binding->notify(vdev->binding_opaque, vector);
+    }
+}
 
 void virtio_update_irq(VirtIODevice *vdev)
 {
-    if (vdev->binding->update_irq) {
-        vdev->binding->update_irq(vdev->binding_opaque);
-    }
+    virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
 }
 
 void virtio_reset(void *opaque)
@@ -393,7 +398,8 @@ void virtio_reset(void *opaque)
     vdev->queue_sel = 0;
     vdev->status = 0;
     vdev->isr = 0;
-    virtio_update_irq(vdev);
+    vdev->config_vector = VIRTIO_NO_VECTOR;
+    virtio_notify_vector(vdev, vdev->config_vector);
 
     for(i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
         vdev->vq[i].vring.desc = 0;
@@ -401,6 +407,7 @@ void virtio_reset(void *opaque)
         vdev->vq[i].vring.used = 0;
         vdev->vq[i].last_avail_idx = 0;
         vdev->vq[i].pa = 0;
+        vdev->vq[i].vector = VIRTIO_NO_VECTOR;
     }
 }
 
@@ -484,12 +491,8 @@ void virtio_config_writel(VirtIODevice *vdev, uint32_t addr, uint32_t data)
 
 void virtio_queue_set_addr(VirtIODevice *vdev, int n, target_phys_addr_t addr)
 {
-    if (addr == 0) {
-        virtio_reset(vdev);
-    } else {
-        vdev->vq[n].pa = addr;
-        virtqueue_init(&vdev->vq[n]);
-    }
+    vdev->vq[n].pa = addr;
+    virtqueue_init(&vdev->vq[n]);
 }
 
 target_phys_addr_t virtio_queue_get_addr(VirtIODevice *vdev, int n)
@@ -509,6 +512,18 @@ void virtio_queue_notify(VirtIODevice *vdev, int n)
     }
 }
 
+uint16_t virtio_queue_vector(VirtIODevice *vdev, int n)
+{
+    return n < VIRTIO_PCI_QUEUE_MAX ? vdev->vq[n].vector :
+        VIRTIO_NO_VECTOR;
+}
+
+void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector)
+{
+    if (n < VIRTIO_PCI_QUEUE_MAX)
+        vdev->vq[n].vector = vector;
+}
+
 VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
                             void (*handle_output)(VirtIODevice *, VirtQueue *))
 {
@@ -537,7 +552,7 @@ void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
         return;
 
     vdev->isr |= 0x01;
-    virtio_update_irq(vdev);
+    virtio_notify_vector(vdev, vq->vector);
 }
 
 void virtio_notify_config(VirtIODevice *vdev)
@@ -546,7 +561,7 @@ void virtio_notify_config(VirtIODevice *vdev)
         return;
 
     vdev->isr |= 0x03;
-    virtio_update_irq(vdev);
+    virtio_notify_vector(vdev, vdev->config_vector);
 }
 
 void virtio_save(VirtIODevice *vdev, QEMUFile *f)
@@ -555,6 +570,7 @@ void virtio_save(VirtIODevice *vdev, QEMUFile *f)
 
     /* FIXME: load/save binding.  */
     //pci_device_save(&vdev->pci_dev, f);
+    //msix_save(&vdev->pci_dev, f);
 
     qemu_put_8s(f, &vdev->status);
     qemu_put_8s(f, &vdev->isr);
@@ -563,6 +579,9 @@ void virtio_save(VirtIODevice *vdev, QEMUFile *f)
     qemu_put_be32(f, vdev->config_len);
     qemu_put_buffer(f, vdev->config, vdev->config_len);
 
+    if (vdev->nvectors)
+        qemu_put_be16s(f, &vdev->config_vector);
+
     for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
         if (vdev->vq[i].vring.num == 0)
             break;
@@ -577,15 +596,19 @@ void virtio_save(VirtIODevice *vdev, QEMUFile *f)
         qemu_put_be32(f, vdev->vq[i].vring.num);
         qemu_put_be64(f, vdev->vq[i].pa);
         qemu_put_be16s(f, &vdev->vq[i].last_avail_idx);
+        if (vdev->nvectors)
+            qemu_put_be16s(f, &vdev->vq[i].vector);
     }
 }
 
-void virtio_load(VirtIODevice *vdev, QEMUFile *f)
+int virtio_load(VirtIODevice *vdev, QEMUFile *f)
 {
     int num, i;
 
     /* FIXME: load/save binding.  */
     //pci_device_load(&vdev->pci_dev, f);
+    //r = msix_load(&vdev->pci_dev, f);
+    //pci_resize_io_region(&vdev->pci_dev, 1, msix_bar_size(&vdev->pci_dev));
 
     qemu_get_8s(f, &vdev->status);
     qemu_get_8s(f, &vdev->isr);
@@ -594,6 +617,10 @@ void virtio_load(VirtIODevice *vdev, QEMUFile *f)
     vdev->config_len = qemu_get_be32(f);
     qemu_get_buffer(f, vdev->config, vdev->config_len);
 
+    if (vdev->nvectors) {
+        qemu_get_be16s(f, &vdev->config_vector);
+        //msix_vector_use(&vdev->pci_dev, vdev->config_vector);
+    }
     num = qemu_get_be32(f);
 
     for (i = 0; i < num; i++) {
@@ -604,9 +631,14 @@ void virtio_load(VirtIODevice *vdev, QEMUFile *f)
         if (vdev->vq[i].pa) {
             virtqueue_init(&vdev->vq[i]);
         }
+        if (vdev->nvectors) {
+            qemu_get_be16s(f, &vdev->vq[i].vector);
+            //msix_vector_use(&vdev->pci_dev, vdev->config_vector);
+        }
     }
 
-    virtio_update_irq(vdev);
+    virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
+    return 0;
 }
 
 void virtio_cleanup(VirtIODevice *vdev)
@@ -627,6 +659,7 @@ VirtIODevice *virtio_common_init(const char *name, uint16_t device_id,
     vdev->status = 0;
     vdev->isr = 0;
     vdev->queue_sel = 0;
+    vdev->config_vector = VIRTIO_NO_VECTOR;
     vdev->vq = qemu_mallocz(sizeof(VirtQueue) * VIRTIO_PCI_QUEUE_MAX);
 
     vdev->name = name;
@@ -636,8 +669,6 @@ VirtIODevice *virtio_common_init(const char *name, uint16_t device_id,
     else
         vdev->config = NULL;
 
-    qemu_register_reset(virtio_reset, 0, vdev);
-
     return vdev;
 }
 
diff --git a/hw/virtio.h b/hw/virtio.h
index 425727e..04a3c3d 100644
--- a/hw/virtio.h
+++ b/hw/virtio.h
@@ -71,11 +71,13 @@ typedef struct VirtQueueElement
 } VirtQueueElement;
 
 typedef struct {
-    void (*update_irq)(void * opaque);
+    void (*notify)(void * opaque, uint16_t vector);
 } VirtIOBindings;
 
 #define VIRTIO_PCI_QUEUE_MAX 16
 
+#define VIRTIO_NO_VECTOR 0xffff
+
 struct VirtIODevice
 {
     const char *name;
@@ -85,6 +87,8 @@ struct VirtIODevice
     uint32_t features;
     size_t config_len;
     void *config;
+    uint16_t config_vector;
+    int nvectors;
     uint32_t (*get_features)(VirtIODevice *vdev);
     uint32_t (*bad_features)(VirtIODevice *vdev);
     void (*set_features)(VirtIODevice *vdev, uint32_t val);
@@ -114,7 +118,7 @@ void virtio_notify(VirtIODevice *vdev, VirtQueue *vq);
 
 void virtio_save(VirtIODevice *vdev, QEMUFile *f);
 
-void virtio_load(VirtIODevice *vdev, QEMUFile *f);
+int virtio_load(VirtIODevice *vdev, QEMUFile *f);
 
 void virtio_cleanup(VirtIODevice *vdev);
 
@@ -140,6 +144,8 @@ void virtio_queue_set_addr(VirtIODevice *vdev, int n, target_phys_addr_t addr);
 target_phys_addr_t virtio_queue_get_addr(VirtIODevice *vdev, int n);
 int virtio_queue_get_num(VirtIODevice *vdev, int n);
 void virtio_queue_notify(VirtIODevice *vdev, int n);
+uint16_t virtio_queue_vector(VirtIODevice *vdev, int n);
+void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector);
 void virtio_reset(void *opaque);
 void virtio_update_irq(VirtIODevice *vdev);
 
-- 
1.6.3.1.56.g79e1.dirty


^ permalink raw reply related	[flat|nested] 457+ messages in thread

* [Qemu-devel] [PATCHv3 09/13] qemu: virtio support for many interrupt vectors
@ 2009-06-05 10:24   ` Michael S. Tsirkin
  0 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-05 10:24 UTC (permalink / raw)
  To: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm,
	Rusty Russell, virtualization, Christian Borntraeger, Blue Swirl,
	Anthony Liguori

Extend virtio to support many interrupt vectors, and rearrange code in
preparation for multi-vector support (mostly move reset out to bindings,
because we will have to reset the vectors in transport-specific code).
Actual bindings in pci, and use in net, to follow.
Load and save are not connected to bindings yet, so they are left
stubbed out for now.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/syborg_virtio.c |   13 ++++++++--
 hw/virtio-pci.c    |   24 +++++++++++++++----
 hw/virtio.c        |   63 ++++++++++++++++++++++++++++++++++++++-------------
 hw/virtio.h        |   10 ++++++-
 4 files changed, 84 insertions(+), 26 deletions(-)

diff --git a/hw/syborg_virtio.c b/hw/syborg_virtio.c
index 37c219c..d8c978a 100644
--- a/hw/syborg_virtio.c
+++ b/hw/syborg_virtio.c
@@ -134,7 +134,10 @@ static void syborg_virtio_writel(void *opaque, target_phys_addr_t offset,
         vdev->features = value;
         break;
     case SYBORG_VIRTIO_QUEUE_BASE:
-        virtio_queue_set_addr(vdev, vdev->queue_sel, value);
+        if (value == 0)
+            virtio_reset(vdev);
+        else
+            virtio_queue_set_addr(vdev, vdev->queue_sel, value);
         break;
     case SYBORG_VIRTIO_QUEUE_SEL:
         if (value < VIRTIO_PCI_QUEUE_MAX)
@@ -228,7 +231,7 @@ static CPUWriteMemoryFunc *syborg_virtio_writefn[] = {
      syborg_virtio_writel
 };
 
-static void syborg_virtio_update_irq(void *opaque)
+static void syborg_virtio_update_irq(void *opaque, uint16_t vector)
 {
     SyborgVirtIOProxy *proxy = opaque;
     int level;
@@ -239,7 +242,7 @@ static void syborg_virtio_update_irq(void *opaque)
 }
 
 static VirtIOBindings syborg_virtio_bindings = {
-    .update_irq = syborg_virtio_update_irq
+    .notify = syborg_virtio_update_irq
 };
 
 static void syborg_virtio_init(SyborgVirtIOProxy *proxy, VirtIODevice *vdev)
@@ -248,6 +251,8 @@ static void syborg_virtio_init(SyborgVirtIOProxy *proxy, VirtIODevice *vdev)
 
     proxy->vdev = vdev;
 
+    /* Don't support multiple vectors */
+    proxy->vdev->nvectors = 0;
     sysbus_init_irq(&proxy->busdev, &proxy->irq);
     iomemtype = cpu_register_io_memory(0, syborg_virtio_readfn,
                                        syborg_virtio_writefn, proxy);
@@ -255,6 +260,8 @@ static void syborg_virtio_init(SyborgVirtIOProxy *proxy, VirtIODevice *vdev)
 
     proxy->id = ((uint32_t)0x1af4 << 16) | vdev->device_id;
 
+    qemu_register_reset(virtio_reset, 0, vdev);
+
     virtio_bind_device(vdev, &syborg_virtio_bindings, proxy);
 }
 
diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c
index c072423..7dfdd80 100644
--- a/hw/virtio-pci.c
+++ b/hw/virtio-pci.c
@@ -78,13 +78,19 @@ typedef struct {
 
 /* virtio device */
 
-static void virtio_pci_update_irq(void *opaque)
+static void virtio_pci_notify(void *opaque, uint16_t vector)
 {
     VirtIOPCIProxy *proxy = opaque;
 
     qemu_set_irq(proxy->pci_dev.irq[0], proxy->vdev->isr & 1);
 }
 
+static void virtio_pci_reset(void *opaque)
+{
+    VirtIOPCIProxy *proxy = opaque;
+    virtio_reset(proxy->vdev);
+}
+
 static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val)
 {
     VirtIOPCIProxy *proxy = opaque;
@@ -108,7 +114,10 @@ static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val)
         break;
     case VIRTIO_PCI_QUEUE_PFN:
         pa = (target_phys_addr_t)val << VIRTIO_PCI_QUEUE_ADDR_SHIFT;
-        virtio_queue_set_addr(vdev, vdev->queue_sel, pa);
+        if (pa == 0)
+            virtio_pci_reset(proxy);
+        else
+            virtio_queue_set_addr(vdev, vdev->queue_sel, pa);
         break;
     case VIRTIO_PCI_QUEUE_SEL:
         if (val < VIRTIO_PCI_QUEUE_MAX)
@@ -120,7 +129,7 @@ static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val)
     case VIRTIO_PCI_STATUS:
         vdev->status = val & 0xFF;
         if (vdev->status == 0)
-            virtio_reset(vdev);
+            virtio_pci_reset(proxy);
         break;
     }
 }
@@ -158,7 +167,7 @@ static uint32_t virtio_ioport_read(void *opaque, uint32_t addr)
         /* reading from the ISR also clears it. */
         ret = vdev->isr;
         vdev->isr = 0;
-        virtio_update_irq(vdev);
+        qemu_set_irq(proxy->pci_dev.irq[0], 0);
         break;
     default:
         break;
@@ -243,7 +252,7 @@ static void virtio_map(PCIDevice *pci_dev, int region_num,
 }
 
 static const VirtIOBindings virtio_pci_bindings = {
-    .update_irq = virtio_pci_update_irq
+    .notify = virtio_pci_notify
 };
 
 static void virtio_init_pci(VirtIOPCIProxy *proxy, VirtIODevice *vdev,
@@ -255,6 +264,9 @@ static void virtio_init_pci(VirtIOPCIProxy *proxy, VirtIODevice *vdev,
 
     proxy->vdev = vdev;
 
+    /* No support for multiple vectors yet. */
+    proxy->vdev->nvectors = 0;
+
     config = proxy->pci_dev.config;
     pci_config_set_vendor_id(config, vendor);
     pci_config_set_device_id(config, device);
@@ -279,6 +291,8 @@ static void virtio_init_pci(VirtIOPCIProxy *proxy, VirtIODevice *vdev,
     pci_register_io_region(&proxy->pci_dev, 0, size, PCI_ADDRESS_SPACE_IO,
                            virtio_map);
 
+    qemu_register_reset(virtio_pci_reset, 0, proxy);
+
     virtio_bind_device(vdev, &virtio_pci_bindings, proxy);
 }
 
diff --git a/hw/virtio.c b/hw/virtio.c
index 45a49fa..63ffcff 100644
--- a/hw/virtio.c
+++ b/hw/virtio.c
@@ -68,6 +68,7 @@ struct VirtQueue
     target_phys_addr_t pa;
     uint16_t last_avail_idx;
     int inuse;
+    uint16_t vector;
     void (*handle_output)(VirtIODevice *vdev, VirtQueue *vq);
 };
 
@@ -373,12 +374,16 @@ int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem)
 }
 
 /* virtio device */
+static void virtio_notify_vector(VirtIODevice *vdev, uint16_t vector)
+{
+    if (vdev->binding->notify) {
+        vdev->binding->notify(vdev->binding_opaque, vector);
+    }
+}
 
 void virtio_update_irq(VirtIODevice *vdev)
 {
-    if (vdev->binding->update_irq) {
-        vdev->binding->update_irq(vdev->binding_opaque);
-    }
+    virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
 }
 
 void virtio_reset(void *opaque)
@@ -393,7 +398,8 @@ void virtio_reset(void *opaque)
     vdev->queue_sel = 0;
     vdev->status = 0;
     vdev->isr = 0;
-    virtio_update_irq(vdev);
+    vdev->config_vector = VIRTIO_NO_VECTOR;
+    virtio_notify_vector(vdev, vdev->config_vector);
 
     for(i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
         vdev->vq[i].vring.desc = 0;
@@ -401,6 +407,7 @@ void virtio_reset(void *opaque)
         vdev->vq[i].vring.used = 0;
         vdev->vq[i].last_avail_idx = 0;
         vdev->vq[i].pa = 0;
+        vdev->vq[i].vector = VIRTIO_NO_VECTOR;
     }
 }
 
@@ -484,12 +491,8 @@ void virtio_config_writel(VirtIODevice *vdev, uint32_t addr, uint32_t data)
 
 void virtio_queue_set_addr(VirtIODevice *vdev, int n, target_phys_addr_t addr)
 {
-    if (addr == 0) {
-        virtio_reset(vdev);
-    } else {
-        vdev->vq[n].pa = addr;
-        virtqueue_init(&vdev->vq[n]);
-    }
+    vdev->vq[n].pa = addr;
+    virtqueue_init(&vdev->vq[n]);
 }
 
 target_phys_addr_t virtio_queue_get_addr(VirtIODevice *vdev, int n)
@@ -509,6 +512,18 @@ void virtio_queue_notify(VirtIODevice *vdev, int n)
     }
 }
 
+uint16_t virtio_queue_vector(VirtIODevice *vdev, int n)
+{
+    return n < VIRTIO_PCI_QUEUE_MAX ? vdev->vq[n].vector :
+        VIRTIO_NO_VECTOR;
+}
+
+void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector)
+{
+    if (n < VIRTIO_PCI_QUEUE_MAX)
+        vdev->vq[n].vector = vector;
+}
+
 VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
                             void (*handle_output)(VirtIODevice *, VirtQueue *))
 {
@@ -537,7 +552,7 @@ void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
         return;
 
     vdev->isr |= 0x01;
-    virtio_update_irq(vdev);
+    virtio_notify_vector(vdev, vq->vector);
 }
 
 void virtio_notify_config(VirtIODevice *vdev)
@@ -546,7 +561,7 @@ void virtio_notify_config(VirtIODevice *vdev)
         return;
 
     vdev->isr |= 0x03;
-    virtio_update_irq(vdev);
+    virtio_notify_vector(vdev, vdev->config_vector);
 }
 
 void virtio_save(VirtIODevice *vdev, QEMUFile *f)
@@ -555,6 +570,7 @@ void virtio_save(VirtIODevice *vdev, QEMUFile *f)
 
     /* FIXME: load/save binding.  */
     //pci_device_save(&vdev->pci_dev, f);
+    //msix_save(&vdev->pci_dev, f);
 
     qemu_put_8s(f, &vdev->status);
     qemu_put_8s(f, &vdev->isr);
@@ -563,6 +579,9 @@ void virtio_save(VirtIODevice *vdev, QEMUFile *f)
     qemu_put_be32(f, vdev->config_len);
     qemu_put_buffer(f, vdev->config, vdev->config_len);
 
+    if (vdev->nvectors)
+        qemu_put_be16s(f, &vdev->config_vector);
+
     for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
         if (vdev->vq[i].vring.num == 0)
             break;
@@ -577,15 +596,19 @@ void virtio_save(VirtIODevice *vdev, QEMUFile *f)
         qemu_put_be32(f, vdev->vq[i].vring.num);
         qemu_put_be64(f, vdev->vq[i].pa);
         qemu_put_be16s(f, &vdev->vq[i].last_avail_idx);
+        if (vdev->nvectors)
+            qemu_put_be16s(f, &vdev->vq[i].vector);
     }
 }
 
-void virtio_load(VirtIODevice *vdev, QEMUFile *f)
+int virtio_load(VirtIODevice *vdev, QEMUFile *f)
 {
     int num, i;
 
     /* FIXME: load/save binding.  */
     //pci_device_load(&vdev->pci_dev, f);
+    //r = msix_load(&vdev->pci_dev, f);
+    //pci_resize_io_region(&vdev->pci_dev, 1, msix_bar_size(&vdev->pci_dev));
 
     qemu_get_8s(f, &vdev->status);
     qemu_get_8s(f, &vdev->isr);
@@ -594,6 +617,10 @@ void virtio_load(VirtIODevice *vdev, QEMUFile *f)
     vdev->config_len = qemu_get_be32(f);
     qemu_get_buffer(f, vdev->config, vdev->config_len);
 
+    if (vdev->nvectors) {
+        qemu_get_be16s(f, &vdev->config_vector);
+        //msix_vector_use(&vdev->pci_dev, vdev->config_vector);
+    }
     num = qemu_get_be32(f);
 
     for (i = 0; i < num; i++) {
@@ -604,9 +631,14 @@ void virtio_load(VirtIODevice *vdev, QEMUFile *f)
         if (vdev->vq[i].pa) {
             virtqueue_init(&vdev->vq[i]);
         }
+        if (vdev->nvectors) {
+            qemu_get_be16s(f, &vdev->vq[i].vector);
+            //msix_vector_use(&vdev->pci_dev, vdev->config_vector);
+        }
     }
 
-    virtio_update_irq(vdev);
+    virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
+    return 0;
 }
 
 void virtio_cleanup(VirtIODevice *vdev)
@@ -627,6 +659,7 @@ VirtIODevice *virtio_common_init(const char *name, uint16_t device_id,
     vdev->status = 0;
     vdev->isr = 0;
     vdev->queue_sel = 0;
+    vdev->config_vector = VIRTIO_NO_VECTOR;
     vdev->vq = qemu_mallocz(sizeof(VirtQueue) * VIRTIO_PCI_QUEUE_MAX);
 
     vdev->name = name;
@@ -636,8 +669,6 @@ VirtIODevice *virtio_common_init(const char *name, uint16_t device_id,
     else
         vdev->config = NULL;
 
-    qemu_register_reset(virtio_reset, 0, vdev);
-
     return vdev;
 }
 
diff --git a/hw/virtio.h b/hw/virtio.h
index 425727e..04a3c3d 100644
--- a/hw/virtio.h
+++ b/hw/virtio.h
@@ -71,11 +71,13 @@ typedef struct VirtQueueElement
 } VirtQueueElement;
 
 typedef struct {
-    void (*update_irq)(void * opaque);
+    void (*notify)(void * opaque, uint16_t vector);
 } VirtIOBindings;
 
 #define VIRTIO_PCI_QUEUE_MAX 16
 
+#define VIRTIO_NO_VECTOR 0xffff
+
 struct VirtIODevice
 {
     const char *name;
@@ -85,6 +87,8 @@ struct VirtIODevice
     uint32_t features;
     size_t config_len;
     void *config;
+    uint16_t config_vector;
+    int nvectors;
     uint32_t (*get_features)(VirtIODevice *vdev);
     uint32_t (*bad_features)(VirtIODevice *vdev);
     void (*set_features)(VirtIODevice *vdev, uint32_t val);
@@ -114,7 +118,7 @@ void virtio_notify(VirtIODevice *vdev, VirtQueue *vq);
 
 void virtio_save(VirtIODevice *vdev, QEMUFile *f);
 
-void virtio_load(VirtIODevice *vdev, QEMUFile *f);
+int virtio_load(VirtIODevice *vdev, QEMUFile *f);
 
 void virtio_cleanup(VirtIODevice *vdev);
 
@@ -140,6 +144,8 @@ void virtio_queue_set_addr(VirtIODevice *vdev, int n, target_phys_addr_t addr);
 target_phys_addr_t virtio_queue_get_addr(VirtIODevice *vdev, int n);
 int virtio_queue_get_num(VirtIODevice *vdev, int n);
 void virtio_queue_notify(VirtIODevice *vdev, int n);
+uint16_t virtio_queue_vector(VirtIODevice *vdev, int n);
+void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector);
 void virtio_reset(void *opaque);
 void virtio_update_irq(VirtIODevice *vdev);
 
-- 
1.6.3.1.56.g79e1.dirty

^ permalink raw reply related	[flat|nested] 457+ messages in thread

* [PATCHv3 09/13] qemu: virtio support for many interrupt vectors
       [not found] <cover.1244192535.git.mst@redhat.com>
                   ` (15 preceding siblings ...)
  2009-06-05 10:23   ` [Qemu-devel] " Michael S. Tsirkin
@ 2009-06-05 10:24 ` Michael S. Tsirkin
  2009-06-05 10:24   ` [Qemu-devel] " Michael S. Tsirkin
                   ` (8 subsequent siblings)
  25 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-05 10:24 UTC (permalink / raw)
  To: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm, Rusty Russell

Extend virtio to support many interrupt vectors, and rearrange code in
preparation for multi-vector support (mostly move reset out to bindings,
because we will have to reset the vectors in transport-specific code).
Actual bindings in pci, and use in net, to follow.
Load and save are not connected to bindings yet, so they are left
stubbed out for now.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/syborg_virtio.c |   13 ++++++++--
 hw/virtio-pci.c    |   24 +++++++++++++++----
 hw/virtio.c        |   63 ++++++++++++++++++++++++++++++++++++++-------------
 hw/virtio.h        |   10 ++++++-
 4 files changed, 84 insertions(+), 26 deletions(-)

diff --git a/hw/syborg_virtio.c b/hw/syborg_virtio.c
index 37c219c..d8c978a 100644
--- a/hw/syborg_virtio.c
+++ b/hw/syborg_virtio.c
@@ -134,7 +134,10 @@ static void syborg_virtio_writel(void *opaque, target_phys_addr_t offset,
         vdev->features = value;
         break;
     case SYBORG_VIRTIO_QUEUE_BASE:
-        virtio_queue_set_addr(vdev, vdev->queue_sel, value);
+        if (value == 0)
+            virtio_reset(vdev);
+        else
+            virtio_queue_set_addr(vdev, vdev->queue_sel, value);
         break;
     case SYBORG_VIRTIO_QUEUE_SEL:
         if (value < VIRTIO_PCI_QUEUE_MAX)
@@ -228,7 +231,7 @@ static CPUWriteMemoryFunc *syborg_virtio_writefn[] = {
      syborg_virtio_writel
 };
 
-static void syborg_virtio_update_irq(void *opaque)
+static void syborg_virtio_update_irq(void *opaque, uint16_t vector)
 {
     SyborgVirtIOProxy *proxy = opaque;
     int level;
@@ -239,7 +242,7 @@ static void syborg_virtio_update_irq(void *opaque)
 }
 
 static VirtIOBindings syborg_virtio_bindings = {
-    .update_irq = syborg_virtio_update_irq
+    .notify = syborg_virtio_update_irq
 };
 
 static void syborg_virtio_init(SyborgVirtIOProxy *proxy, VirtIODevice *vdev)
@@ -248,6 +251,8 @@ static void syborg_virtio_init(SyborgVirtIOProxy *proxy, VirtIODevice *vdev)
 
     proxy->vdev = vdev;
 
+    /* Don't support multiple vectors */
+    proxy->vdev->nvectors = 0;
     sysbus_init_irq(&proxy->busdev, &proxy->irq);
     iomemtype = cpu_register_io_memory(0, syborg_virtio_readfn,
                                        syborg_virtio_writefn, proxy);
@@ -255,6 +260,8 @@ static void syborg_virtio_init(SyborgVirtIOProxy *proxy, VirtIODevice *vdev)
 
     proxy->id = ((uint32_t)0x1af4 << 16) | vdev->device_id;
 
+    qemu_register_reset(virtio_reset, 0, vdev);
+
     virtio_bind_device(vdev, &syborg_virtio_bindings, proxy);
 }
 
diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c
index c072423..7dfdd80 100644
--- a/hw/virtio-pci.c
+++ b/hw/virtio-pci.c
@@ -78,13 +78,19 @@ typedef struct {
 
 /* virtio device */
 
-static void virtio_pci_update_irq(void *opaque)
+static void virtio_pci_notify(void *opaque, uint16_t vector)
 {
     VirtIOPCIProxy *proxy = opaque;
 
     qemu_set_irq(proxy->pci_dev.irq[0], proxy->vdev->isr & 1);
 }
 
+static void virtio_pci_reset(void *opaque)
+{
+    VirtIOPCIProxy *proxy = opaque;
+    virtio_reset(proxy->vdev);
+}
+
 static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val)
 {
     VirtIOPCIProxy *proxy = opaque;
@@ -108,7 +114,10 @@ static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val)
         break;
     case VIRTIO_PCI_QUEUE_PFN:
         pa = (target_phys_addr_t)val << VIRTIO_PCI_QUEUE_ADDR_SHIFT;
-        virtio_queue_set_addr(vdev, vdev->queue_sel, pa);
+        if (pa == 0)
+            virtio_pci_reset(proxy);
+        else
+            virtio_queue_set_addr(vdev, vdev->queue_sel, pa);
         break;
     case VIRTIO_PCI_QUEUE_SEL:
         if (val < VIRTIO_PCI_QUEUE_MAX)
@@ -120,7 +129,7 @@ static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val)
     case VIRTIO_PCI_STATUS:
         vdev->status = val & 0xFF;
         if (vdev->status == 0)
-            virtio_reset(vdev);
+            virtio_pci_reset(proxy);
         break;
     }
 }
@@ -158,7 +167,7 @@ static uint32_t virtio_ioport_read(void *opaque, uint32_t addr)
         /* reading from the ISR also clears it. */
         ret = vdev->isr;
         vdev->isr = 0;
-        virtio_update_irq(vdev);
+        qemu_set_irq(proxy->pci_dev.irq[0], 0);
         break;
     default:
         break;
@@ -243,7 +252,7 @@ static void virtio_map(PCIDevice *pci_dev, int region_num,
 }
 
 static const VirtIOBindings virtio_pci_bindings = {
-    .update_irq = virtio_pci_update_irq
+    .notify = virtio_pci_notify
 };
 
 static void virtio_init_pci(VirtIOPCIProxy *proxy, VirtIODevice *vdev,
@@ -255,6 +264,9 @@ static void virtio_init_pci(VirtIOPCIProxy *proxy, VirtIODevice *vdev,
 
     proxy->vdev = vdev;
 
+    /* No support for multiple vectors yet. */
+    proxy->vdev->nvectors = 0;
+
     config = proxy->pci_dev.config;
     pci_config_set_vendor_id(config, vendor);
     pci_config_set_device_id(config, device);
@@ -279,6 +291,8 @@ static void virtio_init_pci(VirtIOPCIProxy *proxy, VirtIODevice *vdev,
     pci_register_io_region(&proxy->pci_dev, 0, size, PCI_ADDRESS_SPACE_IO,
                            virtio_map);
 
+    qemu_register_reset(virtio_pci_reset, 0, proxy);
+
     virtio_bind_device(vdev, &virtio_pci_bindings, proxy);
 }
 
diff --git a/hw/virtio.c b/hw/virtio.c
index 45a49fa..63ffcff 100644
--- a/hw/virtio.c
+++ b/hw/virtio.c
@@ -68,6 +68,7 @@ struct VirtQueue
     target_phys_addr_t pa;
     uint16_t last_avail_idx;
     int inuse;
+    uint16_t vector;
     void (*handle_output)(VirtIODevice *vdev, VirtQueue *vq);
 };
 
@@ -373,12 +374,16 @@ int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem)
 }
 
 /* virtio device */
+static void virtio_notify_vector(VirtIODevice *vdev, uint16_t vector)
+{
+    if (vdev->binding->notify) {
+        vdev->binding->notify(vdev->binding_opaque, vector);
+    }
+}
 
 void virtio_update_irq(VirtIODevice *vdev)
 {
-    if (vdev->binding->update_irq) {
-        vdev->binding->update_irq(vdev->binding_opaque);
-    }
+    virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
 }
 
 void virtio_reset(void *opaque)
@@ -393,7 +398,8 @@ void virtio_reset(void *opaque)
     vdev->queue_sel = 0;
     vdev->status = 0;
     vdev->isr = 0;
-    virtio_update_irq(vdev);
+    vdev->config_vector = VIRTIO_NO_VECTOR;
+    virtio_notify_vector(vdev, vdev->config_vector);
 
     for(i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
         vdev->vq[i].vring.desc = 0;
@@ -401,6 +407,7 @@ void virtio_reset(void *opaque)
         vdev->vq[i].vring.used = 0;
         vdev->vq[i].last_avail_idx = 0;
         vdev->vq[i].pa = 0;
+        vdev->vq[i].vector = VIRTIO_NO_VECTOR;
     }
 }
 
@@ -484,12 +491,8 @@ void virtio_config_writel(VirtIODevice *vdev, uint32_t addr, uint32_t data)
 
 void virtio_queue_set_addr(VirtIODevice *vdev, int n, target_phys_addr_t addr)
 {
-    if (addr == 0) {
-        virtio_reset(vdev);
-    } else {
-        vdev->vq[n].pa = addr;
-        virtqueue_init(&vdev->vq[n]);
-    }
+    vdev->vq[n].pa = addr;
+    virtqueue_init(&vdev->vq[n]);
 }
 
 target_phys_addr_t virtio_queue_get_addr(VirtIODevice *vdev, int n)
@@ -509,6 +512,18 @@ void virtio_queue_notify(VirtIODevice *vdev, int n)
     }
 }
 
+uint16_t virtio_queue_vector(VirtIODevice *vdev, int n)
+{
+    return n < VIRTIO_PCI_QUEUE_MAX ? vdev->vq[n].vector :
+        VIRTIO_NO_VECTOR;
+}
+
+void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector)
+{
+    if (n < VIRTIO_PCI_QUEUE_MAX)
+        vdev->vq[n].vector = vector;
+}
+
 VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
                             void (*handle_output)(VirtIODevice *, VirtQueue *))
 {
@@ -537,7 +552,7 @@ void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
         return;
 
     vdev->isr |= 0x01;
-    virtio_update_irq(vdev);
+    virtio_notify_vector(vdev, vq->vector);
 }
 
 void virtio_notify_config(VirtIODevice *vdev)
@@ -546,7 +561,7 @@ void virtio_notify_config(VirtIODevice *vdev)
         return;
 
     vdev->isr |= 0x03;
-    virtio_update_irq(vdev);
+    virtio_notify_vector(vdev, vdev->config_vector);
 }
 
 void virtio_save(VirtIODevice *vdev, QEMUFile *f)
@@ -555,6 +570,7 @@ void virtio_save(VirtIODevice *vdev, QEMUFile *f)
 
     /* FIXME: load/save binding.  */
     //pci_device_save(&vdev->pci_dev, f);
+    //msix_save(&vdev->pci_dev, f);
 
     qemu_put_8s(f, &vdev->status);
     qemu_put_8s(f, &vdev->isr);
@@ -563,6 +579,9 @@ void virtio_save(VirtIODevice *vdev, QEMUFile *f)
     qemu_put_be32(f, vdev->config_len);
     qemu_put_buffer(f, vdev->config, vdev->config_len);
 
+    if (vdev->nvectors)
+        qemu_put_be16s(f, &vdev->config_vector);
+
     for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
         if (vdev->vq[i].vring.num == 0)
             break;
@@ -577,15 +596,19 @@ void virtio_save(VirtIODevice *vdev, QEMUFile *f)
         qemu_put_be32(f, vdev->vq[i].vring.num);
         qemu_put_be64(f, vdev->vq[i].pa);
         qemu_put_be16s(f, &vdev->vq[i].last_avail_idx);
+        if (vdev->nvectors)
+            qemu_put_be16s(f, &vdev->vq[i].vector);
     }
 }
 
-void virtio_load(VirtIODevice *vdev, QEMUFile *f)
+int virtio_load(VirtIODevice *vdev, QEMUFile *f)
 {
     int num, i;
 
     /* FIXME: load/save binding.  */
     //pci_device_load(&vdev->pci_dev, f);
+    //r = msix_load(&vdev->pci_dev, f);
+    //pci_resize_io_region(&vdev->pci_dev, 1, msix_bar_size(&vdev->pci_dev));
 
     qemu_get_8s(f, &vdev->status);
     qemu_get_8s(f, &vdev->isr);
@@ -594,6 +617,10 @@ void virtio_load(VirtIODevice *vdev, QEMUFile *f)
     vdev->config_len = qemu_get_be32(f);
     qemu_get_buffer(f, vdev->config, vdev->config_len);
 
+    if (vdev->nvectors) {
+        qemu_get_be16s(f, &vdev->config_vector);
+        //msix_vector_use(&vdev->pci_dev, vdev->config_vector);
+    }
     num = qemu_get_be32(f);
 
     for (i = 0; i < num; i++) {
@@ -604,9 +631,14 @@ void virtio_load(VirtIODevice *vdev, QEMUFile *f)
         if (vdev->vq[i].pa) {
             virtqueue_init(&vdev->vq[i]);
         }
+        if (vdev->nvectors) {
+            qemu_get_be16s(f, &vdev->vq[i].vector);
+            //msix_vector_use(&vdev->pci_dev, vdev->config_vector);
+        }
     }
 
-    virtio_update_irq(vdev);
+    virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
+    return 0;
 }
 
 void virtio_cleanup(VirtIODevice *vdev)
@@ -627,6 +659,7 @@ VirtIODevice *virtio_common_init(const char *name, uint16_t device_id,
     vdev->status = 0;
     vdev->isr = 0;
     vdev->queue_sel = 0;
+    vdev->config_vector = VIRTIO_NO_VECTOR;
     vdev->vq = qemu_mallocz(sizeof(VirtQueue) * VIRTIO_PCI_QUEUE_MAX);
 
     vdev->name = name;
@@ -636,8 +669,6 @@ VirtIODevice *virtio_common_init(const char *name, uint16_t device_id,
     else
         vdev->config = NULL;
 
-    qemu_register_reset(virtio_reset, 0, vdev);
-
     return vdev;
 }
 
diff --git a/hw/virtio.h b/hw/virtio.h
index 425727e..04a3c3d 100644
--- a/hw/virtio.h
+++ b/hw/virtio.h
@@ -71,11 +71,13 @@ typedef struct VirtQueueElement
 } VirtQueueElement;
 
 typedef struct {
-    void (*update_irq)(void * opaque);
+    void (*notify)(void * opaque, uint16_t vector);
 } VirtIOBindings;
 
 #define VIRTIO_PCI_QUEUE_MAX 16
 
+#define VIRTIO_NO_VECTOR 0xffff
+
 struct VirtIODevice
 {
     const char *name;
@@ -85,6 +87,8 @@ struct VirtIODevice
     uint32_t features;
     size_t config_len;
     void *config;
+    uint16_t config_vector;
+    int nvectors;
     uint32_t (*get_features)(VirtIODevice *vdev);
     uint32_t (*bad_features)(VirtIODevice *vdev);
     void (*set_features)(VirtIODevice *vdev, uint32_t val);
@@ -114,7 +118,7 @@ void virtio_notify(VirtIODevice *vdev, VirtQueue *vq);
 
 void virtio_save(VirtIODevice *vdev, QEMUFile *f);
 
-void virtio_load(VirtIODevice *vdev, QEMUFile *f);
+int virtio_load(VirtIODevice *vdev, QEMUFile *f);
 
 void virtio_cleanup(VirtIODevice *vdev);
 
@@ -140,6 +144,8 @@ void virtio_queue_set_addr(VirtIODevice *vdev, int n, target_phys_addr_t addr);
 target_phys_addr_t virtio_queue_get_addr(VirtIODevice *vdev, int n);
 int virtio_queue_get_num(VirtIODevice *vdev, int n);
 void virtio_queue_notify(VirtIODevice *vdev, int n);
+uint16_t virtio_queue_vector(VirtIODevice *vdev, int n);
+void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector);
 void virtio_reset(void *opaque);
 void virtio_update_irq(VirtIODevice *vdev);
 
-- 
1.6.3.1.56.g79e1.dirty

^ permalink raw reply related	[flat|nested] 457+ messages in thread

* [PATCHv3 10/13] qemu: MSI-X support in virtio PCI
       [not found] <cover.1244192535.git.mst@redhat.com>
@ 2009-06-05 10:24   ` Michael S. Tsirkin
  2009-06-05 10:22   ` [Qemu-devel] " Michael S. Tsirkin
                     ` (24 subsequent siblings)
  25 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-05 10:24 UTC (permalink / raw)
  To: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm, Rusty Russell, vi

This enables actual support for MSI-X in virtio PCI.
First user will be virtio-net.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/virtio-pci.c |  152 ++++++++++++++++++++++++++++++++++++++++--------------
 1 files changed, 112 insertions(+), 40 deletions(-)

diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c
index 7dfdd80..294f4c7 100644
--- a/hw/virtio-pci.c
+++ b/hw/virtio-pci.c
@@ -18,6 +18,7 @@
 #include "virtio.h"
 #include "pci.h"
 //#include "sysemu.h"
+#include "msix.h"
 
 /* from Linux's linux/virtio_pci.h */
 
@@ -47,7 +48,24 @@
  * a read-and-acknowledge. */
 #define VIRTIO_PCI_ISR                  19
 
-#define VIRTIO_PCI_CONFIG               20
+/* MSI-X registers: only enabled if MSI-X is enabled. */
+/* A 16-bit vector for configuration changes. */
+#define VIRTIO_MSI_CONFIG_VECTOR        20
+/* A 16-bit vector for selected queue notifications. */
+#define VIRTIO_MSI_QUEUE_VECTOR         22
+
+/* Config space size */
+#define VIRTIO_PCI_CONFIG_NOMSI         20
+#define VIRTIO_PCI_CONFIG_MSI           24
+#define VIRTIO_PCI_REGION_SIZE(dev)     (msix_present(dev) ? \
+                                         VIRTIO_PCI_CONFIG_MSI : \
+                                         VIRTIO_PCI_CONFIG_NOMSI)
+
+/* The remaining space is defined by each driver as the per-driver
+ * configuration space */
+#define VIRTIO_PCI_CONFIG(dev)          (msix_enabled(dev) ? \
+                                         VIRTIO_PCI_CONFIG_MSI : \
+                                         VIRTIO_PCI_CONFIG_NOMSI)
 
 /* Virtio ABI version, if we increment this, we break the guest driver. */
 #define VIRTIO_PCI_ABI_VERSION          0
@@ -81,14 +99,17 @@ typedef struct {
 static void virtio_pci_notify(void *opaque, uint16_t vector)
 {
     VirtIOPCIProxy *proxy = opaque;
-
-    qemu_set_irq(proxy->pci_dev.irq[0], proxy->vdev->isr & 1);
+    if (msix_enabled(&proxy->pci_dev))
+        msix_notify(&proxy->pci_dev, vector);
+    else
+        qemu_set_irq(proxy->pci_dev.irq[0], proxy->vdev->isr & 1);
 }
 
 static void virtio_pci_reset(void *opaque)
 {
     VirtIOPCIProxy *proxy = opaque;
     virtio_reset(proxy->vdev);
+    msix_reset(&proxy->pci_dev);
 }
 
 static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val)
@@ -97,8 +118,6 @@ static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val)
     VirtIODevice *vdev = proxy->vdev;
     target_phys_addr_t pa;
 
-    addr -= proxy->addr;
-
     switch (addr) {
     case VIRTIO_PCI_GUEST_FEATURES:
 	/* Guest does not negotiate properly?  We have to assume nothing. */
@@ -131,17 +150,33 @@ static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val)
         if (vdev->status == 0)
             virtio_pci_reset(proxy);
         break;
+    case VIRTIO_MSI_CONFIG_VECTOR:
+        msix_vector_unuse(&proxy->pci_dev, vdev->config_vector);
+        /* Make it possible for guest to discover an error took place. */
+        if (msix_vector_use(&proxy->pci_dev, val) < 0)
+            val = VIRTIO_NO_VECTOR;
+        vdev->config_vector = val;
+        break;
+    case VIRTIO_MSI_QUEUE_VECTOR:
+        msix_vector_unuse(&proxy->pci_dev,
+                          virtio_queue_vector(vdev, vdev->queue_sel));
+        /* Make it possible for guest to discover an error took place. */
+        if (msix_vector_use(&proxy->pci_dev, val) < 0)
+            val = VIRTIO_NO_VECTOR;
+        virtio_queue_set_vector(vdev, vdev->queue_sel, val);
+        break;
+    default:
+        fprintf(stderr, "%s: unexpected address 0x%x value 0x%x\n",
+                __func__, addr, val);
+        break;
     }
 }
 
-static uint32_t virtio_ioport_read(void *opaque, uint32_t addr)
+static uint32_t virtio_ioport_read(VirtIOPCIProxy *proxy, uint32_t addr)
 {
-    VirtIOPCIProxy *proxy = opaque;
     VirtIODevice *vdev = proxy->vdev;
     uint32_t ret = 0xFFFFFFFF;
 
-    addr -= proxy->addr;
-
     switch (addr) {
     case VIRTIO_PCI_HOST_FEATURES:
         ret = vdev->get_features(vdev);
@@ -169,6 +204,12 @@ static uint32_t virtio_ioport_read(void *opaque, uint32_t addr)
         vdev->isr = 0;
         qemu_set_irq(proxy->pci_dev.irq[0], 0);
         break;
+    case VIRTIO_MSI_CONFIG_VECTOR:
+        ret = vdev->config_vector;
+        break;
+    case VIRTIO_MSI_QUEUE_VECTOR:
+        ret = virtio_queue_vector(vdev, vdev->queue_sel);
+        break;
     default:
         break;
     }
@@ -179,42 +220,72 @@ static uint32_t virtio_ioport_read(void *opaque, uint32_t addr)
 static uint32_t virtio_pci_config_readb(void *opaque, uint32_t addr)
 {
     VirtIOPCIProxy *proxy = opaque;
-    addr -= proxy->addr + VIRTIO_PCI_CONFIG;
+    uint32_t config = VIRTIO_PCI_CONFIG(&proxy->pci_dev);
+    addr -= proxy->addr;
+    if (addr < config)
+        return virtio_ioport_read(proxy, addr);
+    addr -= config;
     return virtio_config_readb(proxy->vdev, addr);
 }
 
 static uint32_t virtio_pci_config_readw(void *opaque, uint32_t addr)
 {
     VirtIOPCIProxy *proxy = opaque;
-    addr -= proxy->addr + VIRTIO_PCI_CONFIG;
+    uint32_t config = VIRTIO_PCI_CONFIG(&proxy->pci_dev);
+    addr -= proxy->addr;
+    if (addr < config)
+        return virtio_ioport_read(proxy, addr);
+    addr -= config;
     return virtio_config_readw(proxy->vdev, addr);
 }
 
 static uint32_t virtio_pci_config_readl(void *opaque, uint32_t addr)
 {
     VirtIOPCIProxy *proxy = opaque;
-    addr -= proxy->addr + VIRTIO_PCI_CONFIG;
+    uint32_t config = VIRTIO_PCI_CONFIG(&proxy->pci_dev);
+    addr -= proxy->addr;
+    if (addr < config)
+        return virtio_ioport_read(proxy, addr);
+    addr -= config;
     return virtio_config_readl(proxy->vdev, addr);
 }
 
 static void virtio_pci_config_writeb(void *opaque, uint32_t addr, uint32_t val)
 {
     VirtIOPCIProxy *proxy = opaque;
-    addr -= proxy->addr + VIRTIO_PCI_CONFIG;
+    uint32_t config = VIRTIO_PCI_CONFIG(&proxy->pci_dev);
+    addr -= proxy->addr;
+    if (addr < config) {
+        virtio_ioport_write(proxy, addr, val);
+        return;
+    }
+    addr -= config;
     virtio_config_writeb(proxy->vdev, addr, val);
 }
 
 static void virtio_pci_config_writew(void *opaque, uint32_t addr, uint32_t val)
 {
     VirtIOPCIProxy *proxy = opaque;
-    addr -= proxy->addr + VIRTIO_PCI_CONFIG;
+    uint32_t config = VIRTIO_PCI_CONFIG(&proxy->pci_dev);
+    addr -= proxy->addr;
+    if (addr < config) {
+        virtio_ioport_write(proxy, addr, val);
+        return;
+    }
+    addr -= config;
     virtio_config_writew(proxy->vdev, addr, val);
 }
 
 static void virtio_pci_config_writel(void *opaque, uint32_t addr, uint32_t val)
 {
     VirtIOPCIProxy *proxy = opaque;
-    addr -= proxy->addr + VIRTIO_PCI_CONFIG;
+    uint32_t config = VIRTIO_PCI_CONFIG(&proxy->pci_dev);
+    addr -= proxy->addr;
+    if (addr < config) {
+        virtio_ioport_write(proxy, addr, val);
+        return;
+    }
+    addr -= config;
     virtio_config_writel(proxy->vdev, addr, val);
 }
 
@@ -223,32 +294,26 @@ static void virtio_map(PCIDevice *pci_dev, int region_num,
 {
     VirtIOPCIProxy *proxy = container_of(pci_dev, VirtIOPCIProxy, pci_dev);
     VirtIODevice *vdev = proxy->vdev;
-    int i;
+    unsigned config_len = VIRTIO_PCI_REGION_SIZE(pci_dev) + vdev->config_len;
 
     proxy->addr = addr;
-    for (i = 0; i < 3; i++) {
-        register_ioport_write(addr, VIRTIO_PCI_CONFIG, 1 << i,
-                              virtio_ioport_write, proxy);
-        register_ioport_read(addr, VIRTIO_PCI_CONFIG, 1 << i,
-                             virtio_ioport_read, proxy);
-    }
 
-    if (vdev->config_len) {
-        register_ioport_write(addr + VIRTIO_PCI_CONFIG, vdev->config_len, 1,
-                              virtio_pci_config_writeb, proxy);
-        register_ioport_write(addr + VIRTIO_PCI_CONFIG, vdev->config_len, 2,
-                              virtio_pci_config_writew, proxy);
-        register_ioport_write(addr + VIRTIO_PCI_CONFIG, vdev->config_len, 4,
-                              virtio_pci_config_writel, proxy);
-        register_ioport_read(addr + VIRTIO_PCI_CONFIG, vdev->config_len, 1,
-                             virtio_pci_config_readb, proxy);
-        register_ioport_read(addr + VIRTIO_PCI_CONFIG, vdev->config_len, 2,
-                             virtio_pci_config_readw, proxy);
-        register_ioport_read(addr + VIRTIO_PCI_CONFIG, vdev->config_len, 4,
-                             virtio_pci_config_readl, proxy);
+    register_ioport_write(addr, config_len, 1, virtio_pci_config_writeb, proxy);
+    register_ioport_write(addr, config_len, 2, virtio_pci_config_writew, proxy);
+    register_ioport_write(addr, config_len, 4, virtio_pci_config_writel, proxy);
+    register_ioport_read(addr, config_len, 1, virtio_pci_config_readb, proxy);
+    register_ioport_read(addr, config_len, 2, virtio_pci_config_readw, proxy);
+    register_ioport_read(addr, config_len, 4, virtio_pci_config_readl, proxy);
 
+    if (vdev->config_len)
         vdev->get_config(vdev, vdev->config);
-    }
+}
+
+static void virtio_write_config(PCIDevice *pci_dev, uint32_t address,
+                                uint32_t val, int len)
+{
+    pci_default_write_config(pci_dev, address, val, len);
+    msix_write_config(pci_dev, address, val, len);
 }
 
 static const VirtIOBindings virtio_pci_bindings = {
@@ -264,9 +329,6 @@ static void virtio_init_pci(VirtIOPCIProxy *proxy, VirtIODevice *vdev,
 
     proxy->vdev = vdev;
 
-    /* No support for multiple vectors yet. */
-    proxy->vdev->nvectors = 0;
-
     config = proxy->pci_dev.config;
     pci_config_set_vendor_id(config, vendor);
     pci_config_set_device_id(config, device);
@@ -284,7 +346,17 @@ static void virtio_init_pci(VirtIOPCIProxy *proxy, VirtIODevice *vdev,
 
     config[0x3d] = 1;
 
-    size = 20 + vdev->config_len;
+    if (vdev->nvectors && !msix_init(&proxy->pci_dev, vdev->nvectors, 1, 0)) {
+        pci_register_io_region(&proxy->pci_dev, 1,
+                               msix_bar_size(&proxy->pci_dev),
+                               PCI_ADDRESS_SPACE_MEM,
+                               msix_mmio_map);
+        proxy->pci_dev.config_write = virtio_write_config;
+        proxy->pci_dev.unregister = msix_uninit;
+    } else
+        vdev->nvectors = 0;
+
+    size = VIRTIO_PCI_REGION_SIZE(&proxy->pci_dev) + vdev->config_len;
     if (size & (size-1))
         size = 1 << qemu_fls(size);
 
-- 
1.6.3.1.56.g79e1.dirty


^ permalink raw reply related	[flat|nested] 457+ messages in thread

* [Qemu-devel] [PATCHv3 10/13] qemu: MSI-X support in virtio PCI
@ 2009-06-05 10:24   ` Michael S. Tsirkin
  0 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-05 10:24 UTC (permalink / raw)
  To: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm,
	Rusty Russell, virtualization, Christian Borntraeger, Blue Swirl,
	Anthony Liguori

This enables actual support for MSI-X in virtio PCI.
First user will be virtio-net.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/virtio-pci.c |  152 ++++++++++++++++++++++++++++++++++++++++--------------
 1 files changed, 112 insertions(+), 40 deletions(-)

diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c
index 7dfdd80..294f4c7 100644
--- a/hw/virtio-pci.c
+++ b/hw/virtio-pci.c
@@ -18,6 +18,7 @@
 #include "virtio.h"
 #include "pci.h"
 //#include "sysemu.h"
+#include "msix.h"
 
 /* from Linux's linux/virtio_pci.h */
 
@@ -47,7 +48,24 @@
  * a read-and-acknowledge. */
 #define VIRTIO_PCI_ISR                  19
 
-#define VIRTIO_PCI_CONFIG               20
+/* MSI-X registers: only enabled if MSI-X is enabled. */
+/* A 16-bit vector for configuration changes. */
+#define VIRTIO_MSI_CONFIG_VECTOR        20
+/* A 16-bit vector for selected queue notifications. */
+#define VIRTIO_MSI_QUEUE_VECTOR         22
+
+/* Config space size */
+#define VIRTIO_PCI_CONFIG_NOMSI         20
+#define VIRTIO_PCI_CONFIG_MSI           24
+#define VIRTIO_PCI_REGION_SIZE(dev)     (msix_present(dev) ? \
+                                         VIRTIO_PCI_CONFIG_MSI : \
+                                         VIRTIO_PCI_CONFIG_NOMSI)
+
+/* The remaining space is defined by each driver as the per-driver
+ * configuration space */
+#define VIRTIO_PCI_CONFIG(dev)          (msix_enabled(dev) ? \
+                                         VIRTIO_PCI_CONFIG_MSI : \
+                                         VIRTIO_PCI_CONFIG_NOMSI)
 
 /* Virtio ABI version, if we increment this, we break the guest driver. */
 #define VIRTIO_PCI_ABI_VERSION          0
@@ -81,14 +99,17 @@ typedef struct {
 static void virtio_pci_notify(void *opaque, uint16_t vector)
 {
     VirtIOPCIProxy *proxy = opaque;
-
-    qemu_set_irq(proxy->pci_dev.irq[0], proxy->vdev->isr & 1);
+    if (msix_enabled(&proxy->pci_dev))
+        msix_notify(&proxy->pci_dev, vector);
+    else
+        qemu_set_irq(proxy->pci_dev.irq[0], proxy->vdev->isr & 1);
 }
 
 static void virtio_pci_reset(void *opaque)
 {
     VirtIOPCIProxy *proxy = opaque;
     virtio_reset(proxy->vdev);
+    msix_reset(&proxy->pci_dev);
 }
 
 static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val)
@@ -97,8 +118,6 @@ static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val)
     VirtIODevice *vdev = proxy->vdev;
     target_phys_addr_t pa;
 
-    addr -= proxy->addr;
-
     switch (addr) {
     case VIRTIO_PCI_GUEST_FEATURES:
 	/* Guest does not negotiate properly?  We have to assume nothing. */
@@ -131,17 +150,33 @@ static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val)
         if (vdev->status == 0)
             virtio_pci_reset(proxy);
         break;
+    case VIRTIO_MSI_CONFIG_VECTOR:
+        msix_vector_unuse(&proxy->pci_dev, vdev->config_vector);
+        /* Make it possible for guest to discover an error took place. */
+        if (msix_vector_use(&proxy->pci_dev, val) < 0)
+            val = VIRTIO_NO_VECTOR;
+        vdev->config_vector = val;
+        break;
+    case VIRTIO_MSI_QUEUE_VECTOR:
+        msix_vector_unuse(&proxy->pci_dev,
+                          virtio_queue_vector(vdev, vdev->queue_sel));
+        /* Make it possible for guest to discover an error took place. */
+        if (msix_vector_use(&proxy->pci_dev, val) < 0)
+            val = VIRTIO_NO_VECTOR;
+        virtio_queue_set_vector(vdev, vdev->queue_sel, val);
+        break;
+    default:
+        fprintf(stderr, "%s: unexpected address 0x%x value 0x%x\n",
+                __func__, addr, val);
+        break;
     }
 }
 
-static uint32_t virtio_ioport_read(void *opaque, uint32_t addr)
+static uint32_t virtio_ioport_read(VirtIOPCIProxy *proxy, uint32_t addr)
 {
-    VirtIOPCIProxy *proxy = opaque;
     VirtIODevice *vdev = proxy->vdev;
     uint32_t ret = 0xFFFFFFFF;
 
-    addr -= proxy->addr;
-
     switch (addr) {
     case VIRTIO_PCI_HOST_FEATURES:
         ret = vdev->get_features(vdev);
@@ -169,6 +204,12 @@ static uint32_t virtio_ioport_read(void *opaque, uint32_t addr)
         vdev->isr = 0;
         qemu_set_irq(proxy->pci_dev.irq[0], 0);
         break;
+    case VIRTIO_MSI_CONFIG_VECTOR:
+        ret = vdev->config_vector;
+        break;
+    case VIRTIO_MSI_QUEUE_VECTOR:
+        ret = virtio_queue_vector(vdev, vdev->queue_sel);
+        break;
     default:
         break;
     }
@@ -179,42 +220,72 @@ static uint32_t virtio_ioport_read(void *opaque, uint32_t addr)
 static uint32_t virtio_pci_config_readb(void *opaque, uint32_t addr)
 {
     VirtIOPCIProxy *proxy = opaque;
-    addr -= proxy->addr + VIRTIO_PCI_CONFIG;
+    uint32_t config = VIRTIO_PCI_CONFIG(&proxy->pci_dev);
+    addr -= proxy->addr;
+    if (addr < config)
+        return virtio_ioport_read(proxy, addr);
+    addr -= config;
     return virtio_config_readb(proxy->vdev, addr);
 }
 
 static uint32_t virtio_pci_config_readw(void *opaque, uint32_t addr)
 {
     VirtIOPCIProxy *proxy = opaque;
-    addr -= proxy->addr + VIRTIO_PCI_CONFIG;
+    uint32_t config = VIRTIO_PCI_CONFIG(&proxy->pci_dev);
+    addr -= proxy->addr;
+    if (addr < config)
+        return virtio_ioport_read(proxy, addr);
+    addr -= config;
     return virtio_config_readw(proxy->vdev, addr);
 }
 
 static uint32_t virtio_pci_config_readl(void *opaque, uint32_t addr)
 {
     VirtIOPCIProxy *proxy = opaque;
-    addr -= proxy->addr + VIRTIO_PCI_CONFIG;
+    uint32_t config = VIRTIO_PCI_CONFIG(&proxy->pci_dev);
+    addr -= proxy->addr;
+    if (addr < config)
+        return virtio_ioport_read(proxy, addr);
+    addr -= config;
     return virtio_config_readl(proxy->vdev, addr);
 }
 
 static void virtio_pci_config_writeb(void *opaque, uint32_t addr, uint32_t val)
 {
     VirtIOPCIProxy *proxy = opaque;
-    addr -= proxy->addr + VIRTIO_PCI_CONFIG;
+    uint32_t config = VIRTIO_PCI_CONFIG(&proxy->pci_dev);
+    addr -= proxy->addr;
+    if (addr < config) {
+        virtio_ioport_write(proxy, addr, val);
+        return;
+    }
+    addr -= config;
     virtio_config_writeb(proxy->vdev, addr, val);
 }
 
 static void virtio_pci_config_writew(void *opaque, uint32_t addr, uint32_t val)
 {
     VirtIOPCIProxy *proxy = opaque;
-    addr -= proxy->addr + VIRTIO_PCI_CONFIG;
+    uint32_t config = VIRTIO_PCI_CONFIG(&proxy->pci_dev);
+    addr -= proxy->addr;
+    if (addr < config) {
+        virtio_ioport_write(proxy, addr, val);
+        return;
+    }
+    addr -= config;
     virtio_config_writew(proxy->vdev, addr, val);
 }
 
 static void virtio_pci_config_writel(void *opaque, uint32_t addr, uint32_t val)
 {
     VirtIOPCIProxy *proxy = opaque;
-    addr -= proxy->addr + VIRTIO_PCI_CONFIG;
+    uint32_t config = VIRTIO_PCI_CONFIG(&proxy->pci_dev);
+    addr -= proxy->addr;
+    if (addr < config) {
+        virtio_ioport_write(proxy, addr, val);
+        return;
+    }
+    addr -= config;
     virtio_config_writel(proxy->vdev, addr, val);
 }
 
@@ -223,32 +294,26 @@ static void virtio_map(PCIDevice *pci_dev, int region_num,
 {
     VirtIOPCIProxy *proxy = container_of(pci_dev, VirtIOPCIProxy, pci_dev);
     VirtIODevice *vdev = proxy->vdev;
-    int i;
+    unsigned config_len = VIRTIO_PCI_REGION_SIZE(pci_dev) + vdev->config_len;
 
     proxy->addr = addr;
-    for (i = 0; i < 3; i++) {
-        register_ioport_write(addr, VIRTIO_PCI_CONFIG, 1 << i,
-                              virtio_ioport_write, proxy);
-        register_ioport_read(addr, VIRTIO_PCI_CONFIG, 1 << i,
-                             virtio_ioport_read, proxy);
-    }
 
-    if (vdev->config_len) {
-        register_ioport_write(addr + VIRTIO_PCI_CONFIG, vdev->config_len, 1,
-                              virtio_pci_config_writeb, proxy);
-        register_ioport_write(addr + VIRTIO_PCI_CONFIG, vdev->config_len, 2,
-                              virtio_pci_config_writew, proxy);
-        register_ioport_write(addr + VIRTIO_PCI_CONFIG, vdev->config_len, 4,
-                              virtio_pci_config_writel, proxy);
-        register_ioport_read(addr + VIRTIO_PCI_CONFIG, vdev->config_len, 1,
-                             virtio_pci_config_readb, proxy);
-        register_ioport_read(addr + VIRTIO_PCI_CONFIG, vdev->config_len, 2,
-                             virtio_pci_config_readw, proxy);
-        register_ioport_read(addr + VIRTIO_PCI_CONFIG, vdev->config_len, 4,
-                             virtio_pci_config_readl, proxy);
+    register_ioport_write(addr, config_len, 1, virtio_pci_config_writeb, proxy);
+    register_ioport_write(addr, config_len, 2, virtio_pci_config_writew, proxy);
+    register_ioport_write(addr, config_len, 4, virtio_pci_config_writel, proxy);
+    register_ioport_read(addr, config_len, 1, virtio_pci_config_readb, proxy);
+    register_ioport_read(addr, config_len, 2, virtio_pci_config_readw, proxy);
+    register_ioport_read(addr, config_len, 4, virtio_pci_config_readl, proxy);
 
+    if (vdev->config_len)
         vdev->get_config(vdev, vdev->config);
-    }
+}
+
+static void virtio_write_config(PCIDevice *pci_dev, uint32_t address,
+                                uint32_t val, int len)
+{
+    pci_default_write_config(pci_dev, address, val, len);
+    msix_write_config(pci_dev, address, val, len);
 }
 
 static const VirtIOBindings virtio_pci_bindings = {
@@ -264,9 +329,6 @@ static void virtio_init_pci(VirtIOPCIProxy *proxy, VirtIODevice *vdev,
 
     proxy->vdev = vdev;
 
-    /* No support for multiple vectors yet. */
-    proxy->vdev->nvectors = 0;
-
     config = proxy->pci_dev.config;
     pci_config_set_vendor_id(config, vendor);
     pci_config_set_device_id(config, device);
@@ -284,7 +346,17 @@ static void virtio_init_pci(VirtIOPCIProxy *proxy, VirtIODevice *vdev,
 
     config[0x3d] = 1;
 
-    size = 20 + vdev->config_len;
+    if (vdev->nvectors && !msix_init(&proxy->pci_dev, vdev->nvectors, 1, 0)) {
+        pci_register_io_region(&proxy->pci_dev, 1,
+                               msix_bar_size(&proxy->pci_dev),
+                               PCI_ADDRESS_SPACE_MEM,
+                               msix_mmio_map);
+        proxy->pci_dev.config_write = virtio_write_config;
+        proxy->pci_dev.unregister = msix_uninit;
+    } else
+        vdev->nvectors = 0;
+
+    size = VIRTIO_PCI_REGION_SIZE(&proxy->pci_dev) + vdev->config_len;
     if (size & (size-1))
         size = 1 << qemu_fls(size);
 
-- 
1.6.3.1.56.g79e1.dirty

^ permalink raw reply related	[flat|nested] 457+ messages in thread

* [PATCHv3 10/13] qemu: MSI-X support in virtio PCI
       [not found] <cover.1244192535.git.mst@redhat.com>
                   ` (18 preceding siblings ...)
  2009-06-05 10:24   ` [Qemu-devel] " Michael S. Tsirkin
@ 2009-06-05 10:24 ` Michael S. Tsirkin
  2009-06-05 10:24   ` [Qemu-devel] " Michael S. Tsirkin
                   ` (5 subsequent siblings)
  25 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-05 10:24 UTC (permalink / raw)
  To: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm, Rusty Russell

This enables actual support for MSI-X in virtio PCI.
First user will be virtio-net.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/virtio-pci.c |  152 ++++++++++++++++++++++++++++++++++++++++--------------
 1 files changed, 112 insertions(+), 40 deletions(-)

diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c
index 7dfdd80..294f4c7 100644
--- a/hw/virtio-pci.c
+++ b/hw/virtio-pci.c
@@ -18,6 +18,7 @@
 #include "virtio.h"
 #include "pci.h"
 //#include "sysemu.h"
+#include "msix.h"
 
 /* from Linux's linux/virtio_pci.h */
 
@@ -47,7 +48,24 @@
  * a read-and-acknowledge. */
 #define VIRTIO_PCI_ISR                  19
 
-#define VIRTIO_PCI_CONFIG               20
+/* MSI-X registers: only enabled if MSI-X is enabled. */
+/* A 16-bit vector for configuration changes. */
+#define VIRTIO_MSI_CONFIG_VECTOR        20
+/* A 16-bit vector for selected queue notifications. */
+#define VIRTIO_MSI_QUEUE_VECTOR         22
+
+/* Config space size */
+#define VIRTIO_PCI_CONFIG_NOMSI         20
+#define VIRTIO_PCI_CONFIG_MSI           24
+#define VIRTIO_PCI_REGION_SIZE(dev)     (msix_present(dev) ? \
+                                         VIRTIO_PCI_CONFIG_MSI : \
+                                         VIRTIO_PCI_CONFIG_NOMSI)
+
+/* The remaining space is defined by each driver as the per-driver
+ * configuration space */
+#define VIRTIO_PCI_CONFIG(dev)          (msix_enabled(dev) ? \
+                                         VIRTIO_PCI_CONFIG_MSI : \
+                                         VIRTIO_PCI_CONFIG_NOMSI)
 
 /* Virtio ABI version, if we increment this, we break the guest driver. */
 #define VIRTIO_PCI_ABI_VERSION          0
@@ -81,14 +99,17 @@ typedef struct {
 static void virtio_pci_notify(void *opaque, uint16_t vector)
 {
     VirtIOPCIProxy *proxy = opaque;
-
-    qemu_set_irq(proxy->pci_dev.irq[0], proxy->vdev->isr & 1);
+    if (msix_enabled(&proxy->pci_dev))
+        msix_notify(&proxy->pci_dev, vector);
+    else
+        qemu_set_irq(proxy->pci_dev.irq[0], proxy->vdev->isr & 1);
 }
 
 static void virtio_pci_reset(void *opaque)
 {
     VirtIOPCIProxy *proxy = opaque;
     virtio_reset(proxy->vdev);
+    msix_reset(&proxy->pci_dev);
 }
 
 static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val)
@@ -97,8 +118,6 @@ static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val)
     VirtIODevice *vdev = proxy->vdev;
     target_phys_addr_t pa;
 
-    addr -= proxy->addr;
-
     switch (addr) {
     case VIRTIO_PCI_GUEST_FEATURES:
 	/* Guest does not negotiate properly?  We have to assume nothing. */
@@ -131,17 +150,33 @@ static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val)
         if (vdev->status == 0)
             virtio_pci_reset(proxy);
         break;
+    case VIRTIO_MSI_CONFIG_VECTOR:
+        msix_vector_unuse(&proxy->pci_dev, vdev->config_vector);
+        /* Make it possible for guest to discover an error took place. */
+        if (msix_vector_use(&proxy->pci_dev, val) < 0)
+            val = VIRTIO_NO_VECTOR;
+        vdev->config_vector = val;
+        break;
+    case VIRTIO_MSI_QUEUE_VECTOR:
+        msix_vector_unuse(&proxy->pci_dev,
+                          virtio_queue_vector(vdev, vdev->queue_sel));
+        /* Make it possible for guest to discover an error took place. */
+        if (msix_vector_use(&proxy->pci_dev, val) < 0)
+            val = VIRTIO_NO_VECTOR;
+        virtio_queue_set_vector(vdev, vdev->queue_sel, val);
+        break;
+    default:
+        fprintf(stderr, "%s: unexpected address 0x%x value 0x%x\n",
+                __func__, addr, val);
+        break;
     }
 }
 
-static uint32_t virtio_ioport_read(void *opaque, uint32_t addr)
+static uint32_t virtio_ioport_read(VirtIOPCIProxy *proxy, uint32_t addr)
 {
-    VirtIOPCIProxy *proxy = opaque;
     VirtIODevice *vdev = proxy->vdev;
     uint32_t ret = 0xFFFFFFFF;
 
-    addr -= proxy->addr;
-
     switch (addr) {
     case VIRTIO_PCI_HOST_FEATURES:
         ret = vdev->get_features(vdev);
@@ -169,6 +204,12 @@ static uint32_t virtio_ioport_read(void *opaque, uint32_t addr)
         vdev->isr = 0;
         qemu_set_irq(proxy->pci_dev.irq[0], 0);
         break;
+    case VIRTIO_MSI_CONFIG_VECTOR:
+        ret = vdev->config_vector;
+        break;
+    case VIRTIO_MSI_QUEUE_VECTOR:
+        ret = virtio_queue_vector(vdev, vdev->queue_sel);
+        break;
     default:
         break;
     }
@@ -179,42 +220,72 @@ static uint32_t virtio_ioport_read(void *opaque, uint32_t addr)
 static uint32_t virtio_pci_config_readb(void *opaque, uint32_t addr)
 {
     VirtIOPCIProxy *proxy = opaque;
-    addr -= proxy->addr + VIRTIO_PCI_CONFIG;
+    uint32_t config = VIRTIO_PCI_CONFIG(&proxy->pci_dev);
+    addr -= proxy->addr;
+    if (addr < config)
+        return virtio_ioport_read(proxy, addr);
+    addr -= config;
     return virtio_config_readb(proxy->vdev, addr);
 }
 
 static uint32_t virtio_pci_config_readw(void *opaque, uint32_t addr)
 {
     VirtIOPCIProxy *proxy = opaque;
-    addr -= proxy->addr + VIRTIO_PCI_CONFIG;
+    uint32_t config = VIRTIO_PCI_CONFIG(&proxy->pci_dev);
+    addr -= proxy->addr;
+    if (addr < config)
+        return virtio_ioport_read(proxy, addr);
+    addr -= config;
     return virtio_config_readw(proxy->vdev, addr);
 }
 
 static uint32_t virtio_pci_config_readl(void *opaque, uint32_t addr)
 {
     VirtIOPCIProxy *proxy = opaque;
-    addr -= proxy->addr + VIRTIO_PCI_CONFIG;
+    uint32_t config = VIRTIO_PCI_CONFIG(&proxy->pci_dev);
+    addr -= proxy->addr;
+    if (addr < config)
+        return virtio_ioport_read(proxy, addr);
+    addr -= config;
     return virtio_config_readl(proxy->vdev, addr);
 }
 
 static void virtio_pci_config_writeb(void *opaque, uint32_t addr, uint32_t val)
 {
     VirtIOPCIProxy *proxy = opaque;
-    addr -= proxy->addr + VIRTIO_PCI_CONFIG;
+    uint32_t config = VIRTIO_PCI_CONFIG(&proxy->pci_dev);
+    addr -= proxy->addr;
+    if (addr < config) {
+        virtio_ioport_write(proxy, addr, val);
+        return;
+    }
+    addr -= config;
     virtio_config_writeb(proxy->vdev, addr, val);
 }
 
 static void virtio_pci_config_writew(void *opaque, uint32_t addr, uint32_t val)
 {
     VirtIOPCIProxy *proxy = opaque;
-    addr -= proxy->addr + VIRTIO_PCI_CONFIG;
+    uint32_t config = VIRTIO_PCI_CONFIG(&proxy->pci_dev);
+    addr -= proxy->addr;
+    if (addr < config) {
+        virtio_ioport_write(proxy, addr, val);
+        return;
+    }
+    addr -= config;
     virtio_config_writew(proxy->vdev, addr, val);
 }
 
 static void virtio_pci_config_writel(void *opaque, uint32_t addr, uint32_t val)
 {
     VirtIOPCIProxy *proxy = opaque;
-    addr -= proxy->addr + VIRTIO_PCI_CONFIG;
+    uint32_t config = VIRTIO_PCI_CONFIG(&proxy->pci_dev);
+    addr -= proxy->addr;
+    if (addr < config) {
+        virtio_ioport_write(proxy, addr, val);
+        return;
+    }
+    addr -= config;
     virtio_config_writel(proxy->vdev, addr, val);
 }
 
@@ -223,32 +294,26 @@ static void virtio_map(PCIDevice *pci_dev, int region_num,
 {
     VirtIOPCIProxy *proxy = container_of(pci_dev, VirtIOPCIProxy, pci_dev);
     VirtIODevice *vdev = proxy->vdev;
-    int i;
+    unsigned config_len = VIRTIO_PCI_REGION_SIZE(pci_dev) + vdev->config_len;
 
     proxy->addr = addr;
-    for (i = 0; i < 3; i++) {
-        register_ioport_write(addr, VIRTIO_PCI_CONFIG, 1 << i,
-                              virtio_ioport_write, proxy);
-        register_ioport_read(addr, VIRTIO_PCI_CONFIG, 1 << i,
-                             virtio_ioport_read, proxy);
-    }
 
-    if (vdev->config_len) {
-        register_ioport_write(addr + VIRTIO_PCI_CONFIG, vdev->config_len, 1,
-                              virtio_pci_config_writeb, proxy);
-        register_ioport_write(addr + VIRTIO_PCI_CONFIG, vdev->config_len, 2,
-                              virtio_pci_config_writew, proxy);
-        register_ioport_write(addr + VIRTIO_PCI_CONFIG, vdev->config_len, 4,
-                              virtio_pci_config_writel, proxy);
-        register_ioport_read(addr + VIRTIO_PCI_CONFIG, vdev->config_len, 1,
-                             virtio_pci_config_readb, proxy);
-        register_ioport_read(addr + VIRTIO_PCI_CONFIG, vdev->config_len, 2,
-                             virtio_pci_config_readw, proxy);
-        register_ioport_read(addr + VIRTIO_PCI_CONFIG, vdev->config_len, 4,
-                             virtio_pci_config_readl, proxy);
+    register_ioport_write(addr, config_len, 1, virtio_pci_config_writeb, proxy);
+    register_ioport_write(addr, config_len, 2, virtio_pci_config_writew, proxy);
+    register_ioport_write(addr, config_len, 4, virtio_pci_config_writel, proxy);
+    register_ioport_read(addr, config_len, 1, virtio_pci_config_readb, proxy);
+    register_ioport_read(addr, config_len, 2, virtio_pci_config_readw, proxy);
+    register_ioport_read(addr, config_len, 4, virtio_pci_config_readl, proxy);
 
+    if (vdev->config_len)
         vdev->get_config(vdev, vdev->config);
-    }
+}
+
+static void virtio_write_config(PCIDevice *pci_dev, uint32_t address,
+                                uint32_t val, int len)
+{
+    pci_default_write_config(pci_dev, address, val, len);
+    msix_write_config(pci_dev, address, val, len);
 }
 
 static const VirtIOBindings virtio_pci_bindings = {
@@ -264,9 +329,6 @@ static void virtio_init_pci(VirtIOPCIProxy *proxy, VirtIODevice *vdev,
 
     proxy->vdev = vdev;
 
-    /* No support for multiple vectors yet. */
-    proxy->vdev->nvectors = 0;
-
     config = proxy->pci_dev.config;
     pci_config_set_vendor_id(config, vendor);
     pci_config_set_device_id(config, device);
@@ -284,7 +346,17 @@ static void virtio_init_pci(VirtIOPCIProxy *proxy, VirtIODevice *vdev,
 
     config[0x3d] = 1;
 
-    size = 20 + vdev->config_len;
+    if (vdev->nvectors && !msix_init(&proxy->pci_dev, vdev->nvectors, 1, 0)) {
+        pci_register_io_region(&proxy->pci_dev, 1,
+                               msix_bar_size(&proxy->pci_dev),
+                               PCI_ADDRESS_SPACE_MEM,
+                               msix_mmio_map);
+        proxy->pci_dev.config_write = virtio_write_config;
+        proxy->pci_dev.unregister = msix_uninit;
+    } else
+        vdev->nvectors = 0;
+
+    size = VIRTIO_PCI_REGION_SIZE(&proxy->pci_dev) + vdev->config_len;
     if (size & (size-1))
         size = 1 << qemu_fls(size);
 
-- 
1.6.3.1.56.g79e1.dirty

^ permalink raw reply related	[flat|nested] 457+ messages in thread

* [PATCHv3 11/13] qemu: request 3 vectors in virtio-net
       [not found] <cover.1244192535.git.mst@redhat.com>
@ 2009-06-05 10:24   ` Michael S. Tsirkin
  2009-06-05 10:22   ` [Qemu-devel] " Michael S. Tsirkin
                     ` (24 subsequent siblings)
  25 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-05 10:24 UTC (permalink / raw)
  To: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm, Rusty Russell, vi

Request up to 3 vectors in virtio-net. Actual bindings might supply
less.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/virtio-net.c |    1 +
 1 files changed, 1 insertions(+), 0 deletions(-)

diff --git a/hw/virtio-net.c b/hw/virtio-net.c
index 60aa6da..6118fe3 100644
--- a/hw/virtio-net.c
+++ b/hw/virtio-net.c
@@ -621,6 +621,7 @@ VirtIODevice *virtio_net_init(DeviceState *dev)
     n->mac_table.macs = qemu_mallocz(MAC_TABLE_ENTRIES * ETH_ALEN);
 
     n->vlans = qemu_mallocz(MAX_VLAN >> 3);
+    n->vdev.nvectors = 3;
 
     register_savevm("virtio-net", virtio_net_id++, VIRTIO_NET_VM_VERSION,
                     virtio_net_save, virtio_net_load, n);
-- 
1.6.3.1.56.g79e1.dirty


^ permalink raw reply related	[flat|nested] 457+ messages in thread

* [Qemu-devel] [PATCHv3 11/13] qemu: request 3 vectors in virtio-net
@ 2009-06-05 10:24   ` Michael S. Tsirkin
  0 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-05 10:24 UTC (permalink / raw)
  To: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm,
	Rusty Russell, virtualization, Christian Borntraeger, Blue Swirl,
	Anthony Liguori

Request up to 3 vectors in virtio-net. Actual bindings might supply
less.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/virtio-net.c |    1 +
 1 files changed, 1 insertions(+), 0 deletions(-)

diff --git a/hw/virtio-net.c b/hw/virtio-net.c
index 60aa6da..6118fe3 100644
--- a/hw/virtio-net.c
+++ b/hw/virtio-net.c
@@ -621,6 +621,7 @@ VirtIODevice *virtio_net_init(DeviceState *dev)
     n->mac_table.macs = qemu_mallocz(MAC_TABLE_ENTRIES * ETH_ALEN);
 
     n->vlans = qemu_mallocz(MAX_VLAN >> 3);
+    n->vdev.nvectors = 3;
 
     register_savevm("virtio-net", virtio_net_id++, VIRTIO_NET_VM_VERSION,
                     virtio_net_save, virtio_net_load, n);
-- 
1.6.3.1.56.g79e1.dirty

^ permalink raw reply related	[flat|nested] 457+ messages in thread

* [PATCHv3 11/13] qemu: request 3 vectors in virtio-net
       [not found] <cover.1244192535.git.mst@redhat.com>
                   ` (20 preceding siblings ...)
  2009-06-05 10:24   ` [Qemu-devel] " Michael S. Tsirkin
@ 2009-06-05 10:24 ` Michael S. Tsirkin
  2009-06-05 10:24 ` [PATCHv3 12/13] qemu: virtio save/load bindings Michael S. Tsirkin
                   ` (3 subsequent siblings)
  25 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-05 10:24 UTC (permalink / raw)
  To: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm, Rusty Russell

Request up to 3 vectors in virtio-net. Actual bindings might supply
less.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/virtio-net.c |    1 +
 1 files changed, 1 insertions(+), 0 deletions(-)

diff --git a/hw/virtio-net.c b/hw/virtio-net.c
index 60aa6da..6118fe3 100644
--- a/hw/virtio-net.c
+++ b/hw/virtio-net.c
@@ -621,6 +621,7 @@ VirtIODevice *virtio_net_init(DeviceState *dev)
     n->mac_table.macs = qemu_mallocz(MAC_TABLE_ENTRIES * ETH_ALEN);
 
     n->vlans = qemu_mallocz(MAX_VLAN >> 3);
+    n->vdev.nvectors = 3;
 
     register_savevm("virtio-net", virtio_net_id++, VIRTIO_NET_VM_VERSION,
                     virtio_net_save, virtio_net_load, n);
-- 
1.6.3.1.56.g79e1.dirty

^ permalink raw reply related	[flat|nested] 457+ messages in thread

* [PATCHv3 12/13] qemu: virtio save/load bindings
       [not found] <cover.1244192535.git.mst@redhat.com>
@ 2009-06-05 10:24   ` Michael S. Tsirkin
  2009-06-05 10:22   ` [Qemu-devel] " Michael S. Tsirkin
                     ` (24 subsequent siblings)
  25 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-05 10:24 UTC (permalink / raw)
  To: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm, Rusty Russell, vi

Implement bindings for virtio save/load. Use them in virtio pci.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/virtio-pci.c |   49 ++++++++++++++++++++++++++++++++++++++++++++++++-
 hw/virtio.c     |   31 ++++++++++++++-----------------
 hw/virtio.h     |    4 ++++
 3 files changed, 66 insertions(+), 18 deletions(-)

diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c
index 294f4c7..589fbb1 100644
--- a/hw/virtio-pci.c
+++ b/hw/virtio-pci.c
@@ -105,6 +105,48 @@ static void virtio_pci_notify(void *opaque, uint16_t vector)
         qemu_set_irq(proxy->pci_dev.irq[0], proxy->vdev->isr & 1);
 }
 
+static void virtio_pci_save_config(void * opaque, QEMUFile *f)
+{
+    VirtIOPCIProxy *proxy = opaque;
+    pci_device_save(&proxy->pci_dev, f);
+    msix_save(&proxy->pci_dev, f);
+    if (msix_present(&proxy->pci_dev))
+        qemu_put_be16(f, proxy->vdev->config_vector);
+}
+
+static void virtio_pci_save_queue(void * opaque, int n, QEMUFile *f)
+{
+    VirtIOPCIProxy *proxy = opaque;
+    if (msix_present(&proxy->pci_dev))
+        qemu_put_be16(f, virtio_queue_vector(proxy->vdev, n));
+}
+
+static int virtio_pci_load_config(void * opaque, QEMUFile *f)
+{
+    VirtIOPCIProxy *proxy = opaque;
+    int ret;
+    ret = pci_device_load(&proxy->pci_dev, f);
+    if (ret)
+        return ret;
+    ret = msix_load(&proxy->pci_dev, f);
+    if (ret)
+        return ret;
+    if (msix_present(&proxy->pci_dev))
+        qemu_get_be16s(f, &proxy->vdev->config_vector);
+    return 0;
+}
+
+static int virtio_pci_load_queue(void * opaque, int n, QEMUFile *f)
+{
+    VirtIOPCIProxy *proxy = opaque;
+    uint16_t vector;
+    if (!msix_present(&proxy->pci_dev))
+        return 0;
+    qemu_get_be16s(f, &vector);
+    virtio_queue_set_vector(proxy->vdev, n, vector);
+    return 0;
+}
+
 static void virtio_pci_reset(void *opaque)
 {
     VirtIOPCIProxy *proxy = opaque;
@@ -317,7 +359,12 @@ static void virtio_write_config(PCIDevice *pci_dev, uint32_t address,
 }
 
 static const VirtIOBindings virtio_pci_bindings = {
-    .notify = virtio_pci_notify
+    .notify = virtio_pci_notify,
+    .save_config = virtio_pci_save_config,
+    .load_config = virtio_pci_load_config,
+    .save_config = virtio_pci_save_config,
+    .save_queue = virtio_pci_save_queue,
+    .load_queue = virtio_pci_load_queue,
 };
 
 static void virtio_init_pci(VirtIOPCIProxy *proxy, VirtIODevice *vdev,
diff --git a/hw/virtio.c b/hw/virtio.c
index 63ffcff..b773dff 100644
--- a/hw/virtio.c
+++ b/hw/virtio.c
@@ -568,9 +568,8 @@ void virtio_save(VirtIODevice *vdev, QEMUFile *f)
 {
     int i;
 
-    /* FIXME: load/save binding.  */
-    //pci_device_save(&vdev->pci_dev, f);
-    //msix_save(&vdev->pci_dev, f);
+    if (vdev->binding->save_config)
+        vdev->binding->save_config(vdev->binding_opaque, f);
 
     qemu_put_8s(f, &vdev->status);
     qemu_put_8s(f, &vdev->isr);
@@ -596,19 +595,20 @@ void virtio_save(VirtIODevice *vdev, QEMUFile *f)
         qemu_put_be32(f, vdev->vq[i].vring.num);
         qemu_put_be64(f, vdev->vq[i].pa);
         qemu_put_be16s(f, &vdev->vq[i].last_avail_idx);
-        if (vdev->nvectors)
-            qemu_put_be16s(f, &vdev->vq[i].vector);
+        if (vdev->binding->save_queue)
+            vdev->binding->save_queue(vdev->binding_opaque, i, f);
     }
 }
 
 int virtio_load(VirtIODevice *vdev, QEMUFile *f)
 {
-    int num, i;
+    int num, i, ret;
 
-    /* FIXME: load/save binding.  */
-    //pci_device_load(&vdev->pci_dev, f);
-    //r = msix_load(&vdev->pci_dev, f);
-    //pci_resize_io_region(&vdev->pci_dev, 1, msix_bar_size(&vdev->pci_dev));
+    if (vdev->binding->load_config) {
+        ret = vdev->binding->load_config(vdev->binding_opaque, f);
+        if (ret)
+            return ret;
+    }
 
     qemu_get_8s(f, &vdev->status);
     qemu_get_8s(f, &vdev->isr);
@@ -617,10 +617,6 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f)
     vdev->config_len = qemu_get_be32(f);
     qemu_get_buffer(f, vdev->config, vdev->config_len);
 
-    if (vdev->nvectors) {
-        qemu_get_be16s(f, &vdev->config_vector);
-        //msix_vector_use(&vdev->pci_dev, vdev->config_vector);
-    }
     num = qemu_get_be32(f);
 
     for (i = 0; i < num; i++) {
@@ -631,9 +627,10 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f)
         if (vdev->vq[i].pa) {
             virtqueue_init(&vdev->vq[i]);
         }
-        if (vdev->nvectors) {
-            qemu_get_be16s(f, &vdev->vq[i].vector);
-            //msix_vector_use(&vdev->pci_dev, vdev->config_vector);
+        if (vdev->binding->load_queue) {
+            ret = vdev->binding->load_queue(vdev->binding_opaque, i, f);
+            if (ret)
+                return ret;
         }
     }
 
diff --git a/hw/virtio.h b/hw/virtio.h
index 04a3c3d..ce05517 100644
--- a/hw/virtio.h
+++ b/hw/virtio.h
@@ -72,6 +72,10 @@ typedef struct VirtQueueElement
 
 typedef struct {
     void (*notify)(void * opaque, uint16_t vector);
+    void (*save_config)(void * opaque, QEMUFile *f);
+    void (*save_queue)(void * opaque, int n, QEMUFile *f);
+    int (*load_config)(void * opaque, QEMUFile *f);
+    int (*load_queue)(void * opaque, int n, QEMUFile *f);
 } VirtIOBindings;
 
 #define VIRTIO_PCI_QUEUE_MAX 16
-- 
1.6.3.1.56.g79e1.dirty


^ permalink raw reply related	[flat|nested] 457+ messages in thread

* [Qemu-devel] [PATCHv3 12/13] qemu: virtio save/load bindings
@ 2009-06-05 10:24   ` Michael S. Tsirkin
  0 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-05 10:24 UTC (permalink / raw)
  To: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm,
	Rusty Russell, virtualization, Christian Borntraeger, Blue Swirl,
	Anthony Liguori

Implement bindings for virtio save/load. Use them in virtio pci.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/virtio-pci.c |   49 ++++++++++++++++++++++++++++++++++++++++++++++++-
 hw/virtio.c     |   31 ++++++++++++++-----------------
 hw/virtio.h     |    4 ++++
 3 files changed, 66 insertions(+), 18 deletions(-)

diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c
index 294f4c7..589fbb1 100644
--- a/hw/virtio-pci.c
+++ b/hw/virtio-pci.c
@@ -105,6 +105,48 @@ static void virtio_pci_notify(void *opaque, uint16_t vector)
         qemu_set_irq(proxy->pci_dev.irq[0], proxy->vdev->isr & 1);
 }
 
+static void virtio_pci_save_config(void * opaque, QEMUFile *f)
+{
+    VirtIOPCIProxy *proxy = opaque;
+    pci_device_save(&proxy->pci_dev, f);
+    msix_save(&proxy->pci_dev, f);
+    if (msix_present(&proxy->pci_dev))
+        qemu_put_be16(f, proxy->vdev->config_vector);
+}
+
+static void virtio_pci_save_queue(void * opaque, int n, QEMUFile *f)
+{
+    VirtIOPCIProxy *proxy = opaque;
+    if (msix_present(&proxy->pci_dev))
+        qemu_put_be16(f, virtio_queue_vector(proxy->vdev, n));
+}
+
+static int virtio_pci_load_config(void * opaque, QEMUFile *f)
+{
+    VirtIOPCIProxy *proxy = opaque;
+    int ret;
+    ret = pci_device_load(&proxy->pci_dev, f);
+    if (ret)
+        return ret;
+    ret = msix_load(&proxy->pci_dev, f);
+    if (ret)
+        return ret;
+    if (msix_present(&proxy->pci_dev))
+        qemu_get_be16s(f, &proxy->vdev->config_vector);
+    return 0;
+}
+
+static int virtio_pci_load_queue(void * opaque, int n, QEMUFile *f)
+{
+    VirtIOPCIProxy *proxy = opaque;
+    uint16_t vector;
+    if (!msix_present(&proxy->pci_dev))
+        return 0;
+    qemu_get_be16s(f, &vector);
+    virtio_queue_set_vector(proxy->vdev, n, vector);
+    return 0;
+}
+
 static void virtio_pci_reset(void *opaque)
 {
     VirtIOPCIProxy *proxy = opaque;
@@ -317,7 +359,12 @@ static void virtio_write_config(PCIDevice *pci_dev, uint32_t address,
 }
 
 static const VirtIOBindings virtio_pci_bindings = {
-    .notify = virtio_pci_notify
+    .notify = virtio_pci_notify,
+    .save_config = virtio_pci_save_config,
+    .load_config = virtio_pci_load_config,
+    .save_config = virtio_pci_save_config,
+    .save_queue = virtio_pci_save_queue,
+    .load_queue = virtio_pci_load_queue,
 };
 
 static void virtio_init_pci(VirtIOPCIProxy *proxy, VirtIODevice *vdev,
diff --git a/hw/virtio.c b/hw/virtio.c
index 63ffcff..b773dff 100644
--- a/hw/virtio.c
+++ b/hw/virtio.c
@@ -568,9 +568,8 @@ void virtio_save(VirtIODevice *vdev, QEMUFile *f)
 {
     int i;
 
-    /* FIXME: load/save binding.  */
-    //pci_device_save(&vdev->pci_dev, f);
-    //msix_save(&vdev->pci_dev, f);
+    if (vdev->binding->save_config)
+        vdev->binding->save_config(vdev->binding_opaque, f);
 
     qemu_put_8s(f, &vdev->status);
     qemu_put_8s(f, &vdev->isr);
@@ -596,19 +595,20 @@ void virtio_save(VirtIODevice *vdev, QEMUFile *f)
         qemu_put_be32(f, vdev->vq[i].vring.num);
         qemu_put_be64(f, vdev->vq[i].pa);
         qemu_put_be16s(f, &vdev->vq[i].last_avail_idx);
-        if (vdev->nvectors)
-            qemu_put_be16s(f, &vdev->vq[i].vector);
+        if (vdev->binding->save_queue)
+            vdev->binding->save_queue(vdev->binding_opaque, i, f);
     }
 }
 
 int virtio_load(VirtIODevice *vdev, QEMUFile *f)
 {
-    int num, i;
+    int num, i, ret;
 
-    /* FIXME: load/save binding.  */
-    //pci_device_load(&vdev->pci_dev, f);
-    //r = msix_load(&vdev->pci_dev, f);
-    //pci_resize_io_region(&vdev->pci_dev, 1, msix_bar_size(&vdev->pci_dev));
+    if (vdev->binding->load_config) {
+        ret = vdev->binding->load_config(vdev->binding_opaque, f);
+        if (ret)
+            return ret;
+    }
 
     qemu_get_8s(f, &vdev->status);
     qemu_get_8s(f, &vdev->isr);
@@ -617,10 +617,6 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f)
     vdev->config_len = qemu_get_be32(f);
     qemu_get_buffer(f, vdev->config, vdev->config_len);
 
-    if (vdev->nvectors) {
-        qemu_get_be16s(f, &vdev->config_vector);
-        //msix_vector_use(&vdev->pci_dev, vdev->config_vector);
-    }
     num = qemu_get_be32(f);
 
     for (i = 0; i < num; i++) {
@@ -631,9 +627,10 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f)
         if (vdev->vq[i].pa) {
             virtqueue_init(&vdev->vq[i]);
         }
-        if (vdev->nvectors) {
-            qemu_get_be16s(f, &vdev->vq[i].vector);
-            //msix_vector_use(&vdev->pci_dev, vdev->config_vector);
+        if (vdev->binding->load_queue) {
+            ret = vdev->binding->load_queue(vdev->binding_opaque, i, f);
+            if (ret)
+                return ret;
         }
     }
 
diff --git a/hw/virtio.h b/hw/virtio.h
index 04a3c3d..ce05517 100644
--- a/hw/virtio.h
+++ b/hw/virtio.h
@@ -72,6 +72,10 @@ typedef struct VirtQueueElement
 
 typedef struct {
     void (*notify)(void * opaque, uint16_t vector);
+    void (*save_config)(void * opaque, QEMUFile *f);
+    void (*save_queue)(void * opaque, int n, QEMUFile *f);
+    int (*load_config)(void * opaque, QEMUFile *f);
+    int (*load_queue)(void * opaque, int n, QEMUFile *f);
 } VirtIOBindings;
 
 #define VIRTIO_PCI_QUEUE_MAX 16
-- 
1.6.3.1.56.g79e1.dirty

^ permalink raw reply related	[flat|nested] 457+ messages in thread

* [PATCHv3 12/13] qemu: virtio save/load bindings
       [not found] <cover.1244192535.git.mst@redhat.com>
                   ` (21 preceding siblings ...)
  2009-06-05 10:24 ` Michael S. Tsirkin
@ 2009-06-05 10:24 ` Michael S. Tsirkin
  2009-06-05 10:24   ` [Qemu-devel] " Michael S. Tsirkin
                   ` (2 subsequent siblings)
  25 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-05 10:24 UTC (permalink / raw)
  To: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm, Rusty Russell

Implement bindings for virtio save/load. Use them in virtio pci.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/virtio-pci.c |   49 ++++++++++++++++++++++++++++++++++++++++++++++++-
 hw/virtio.c     |   31 ++++++++++++++-----------------
 hw/virtio.h     |    4 ++++
 3 files changed, 66 insertions(+), 18 deletions(-)

diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c
index 294f4c7..589fbb1 100644
--- a/hw/virtio-pci.c
+++ b/hw/virtio-pci.c
@@ -105,6 +105,48 @@ static void virtio_pci_notify(void *opaque, uint16_t vector)
         qemu_set_irq(proxy->pci_dev.irq[0], proxy->vdev->isr & 1);
 }
 
+static void virtio_pci_save_config(void * opaque, QEMUFile *f)
+{
+    VirtIOPCIProxy *proxy = opaque;
+    pci_device_save(&proxy->pci_dev, f);
+    msix_save(&proxy->pci_dev, f);
+    if (msix_present(&proxy->pci_dev))
+        qemu_put_be16(f, proxy->vdev->config_vector);
+}
+
+static void virtio_pci_save_queue(void * opaque, int n, QEMUFile *f)
+{
+    VirtIOPCIProxy *proxy = opaque;
+    if (msix_present(&proxy->pci_dev))
+        qemu_put_be16(f, virtio_queue_vector(proxy->vdev, n));
+}
+
+static int virtio_pci_load_config(void * opaque, QEMUFile *f)
+{
+    VirtIOPCIProxy *proxy = opaque;
+    int ret;
+    ret = pci_device_load(&proxy->pci_dev, f);
+    if (ret)
+        return ret;
+    ret = msix_load(&proxy->pci_dev, f);
+    if (ret)
+        return ret;
+    if (msix_present(&proxy->pci_dev))
+        qemu_get_be16s(f, &proxy->vdev->config_vector);
+    return 0;
+}
+
+static int virtio_pci_load_queue(void * opaque, int n, QEMUFile *f)
+{
+    VirtIOPCIProxy *proxy = opaque;
+    uint16_t vector;
+    if (!msix_present(&proxy->pci_dev))
+        return 0;
+    qemu_get_be16s(f, &vector);
+    virtio_queue_set_vector(proxy->vdev, n, vector);
+    return 0;
+}
+
 static void virtio_pci_reset(void *opaque)
 {
     VirtIOPCIProxy *proxy = opaque;
@@ -317,7 +359,12 @@ static void virtio_write_config(PCIDevice *pci_dev, uint32_t address,
 }
 
 static const VirtIOBindings virtio_pci_bindings = {
-    .notify = virtio_pci_notify
+    .notify = virtio_pci_notify,
+    .save_config = virtio_pci_save_config,
+    .load_config = virtio_pci_load_config,
+    .save_config = virtio_pci_save_config,
+    .save_queue = virtio_pci_save_queue,
+    .load_queue = virtio_pci_load_queue,
 };
 
 static void virtio_init_pci(VirtIOPCIProxy *proxy, VirtIODevice *vdev,
diff --git a/hw/virtio.c b/hw/virtio.c
index 63ffcff..b773dff 100644
--- a/hw/virtio.c
+++ b/hw/virtio.c
@@ -568,9 +568,8 @@ void virtio_save(VirtIODevice *vdev, QEMUFile *f)
 {
     int i;
 
-    /* FIXME: load/save binding.  */
-    //pci_device_save(&vdev->pci_dev, f);
-    //msix_save(&vdev->pci_dev, f);
+    if (vdev->binding->save_config)
+        vdev->binding->save_config(vdev->binding_opaque, f);
 
     qemu_put_8s(f, &vdev->status);
     qemu_put_8s(f, &vdev->isr);
@@ -596,19 +595,20 @@ void virtio_save(VirtIODevice *vdev, QEMUFile *f)
         qemu_put_be32(f, vdev->vq[i].vring.num);
         qemu_put_be64(f, vdev->vq[i].pa);
         qemu_put_be16s(f, &vdev->vq[i].last_avail_idx);
-        if (vdev->nvectors)
-            qemu_put_be16s(f, &vdev->vq[i].vector);
+        if (vdev->binding->save_queue)
+            vdev->binding->save_queue(vdev->binding_opaque, i, f);
     }
 }
 
 int virtio_load(VirtIODevice *vdev, QEMUFile *f)
 {
-    int num, i;
+    int num, i, ret;
 
-    /* FIXME: load/save binding.  */
-    //pci_device_load(&vdev->pci_dev, f);
-    //r = msix_load(&vdev->pci_dev, f);
-    //pci_resize_io_region(&vdev->pci_dev, 1, msix_bar_size(&vdev->pci_dev));
+    if (vdev->binding->load_config) {
+        ret = vdev->binding->load_config(vdev->binding_opaque, f);
+        if (ret)
+            return ret;
+    }
 
     qemu_get_8s(f, &vdev->status);
     qemu_get_8s(f, &vdev->isr);
@@ -617,10 +617,6 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f)
     vdev->config_len = qemu_get_be32(f);
     qemu_get_buffer(f, vdev->config, vdev->config_len);
 
-    if (vdev->nvectors) {
-        qemu_get_be16s(f, &vdev->config_vector);
-        //msix_vector_use(&vdev->pci_dev, vdev->config_vector);
-    }
     num = qemu_get_be32(f);
 
     for (i = 0; i < num; i++) {
@@ -631,9 +627,10 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f)
         if (vdev->vq[i].pa) {
             virtqueue_init(&vdev->vq[i]);
         }
-        if (vdev->nvectors) {
-            qemu_get_be16s(f, &vdev->vq[i].vector);
-            //msix_vector_use(&vdev->pci_dev, vdev->config_vector);
+        if (vdev->binding->load_queue) {
+            ret = vdev->binding->load_queue(vdev->binding_opaque, i, f);
+            if (ret)
+                return ret;
         }
     }
 
diff --git a/hw/virtio.h b/hw/virtio.h
index 04a3c3d..ce05517 100644
--- a/hw/virtio.h
+++ b/hw/virtio.h
@@ -72,6 +72,10 @@ typedef struct VirtQueueElement
 
 typedef struct {
     void (*notify)(void * opaque, uint16_t vector);
+    void (*save_config)(void * opaque, QEMUFile *f);
+    void (*save_queue)(void * opaque, int n, QEMUFile *f);
+    int (*load_config)(void * opaque, QEMUFile *f);
+    int (*load_queue)(void * opaque, int n, QEMUFile *f);
 } VirtIOBindings;
 
 #define VIRTIO_PCI_QUEUE_MAX 16
-- 
1.6.3.1.56.g79e1.dirty

^ permalink raw reply related	[flat|nested] 457+ messages in thread

* [PATCHv3 13/13] qemu: add pci_get/set_byte
       [not found] <cover.1244192535.git.mst@redhat.com>
@ 2009-06-05 10:24   ` Michael S. Tsirkin
  2009-06-05 10:22   ` [Qemu-devel] " Michael S. Tsirkin
                     ` (24 subsequent siblings)
  25 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-05 10:24 UTC (permalink / raw)
  To: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm, Rusty Russell, vi

Add pci_get/set_byte to keep *_word and *_long access functions company.
They are unused for now.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/pci.h |   12 ++++++++++++
 1 files changed, 12 insertions(+), 0 deletions(-)

diff --git a/hw/pci.h b/hw/pci.h
index 8e74033..7cc9a8a 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -263,6 +263,18 @@ PCIBus *pci_bridge_init(PCIBus *bus, int devfn, uint16_t vid, uint16_t did,
                         pci_map_irq_fn map_irq, const char *name);
 
 static inline void
+pci_set_byte(uint8_t *config, uint8_t val)
+{
+    *config = val;
+}
+
+static inline uint8_t
+pci_get_byte(uint8_t *config)
+{
+    return *config;
+}
+
+static inline void
 pci_set_word(uint8_t *config, uint16_t val)
 {
     cpu_to_le16wu((uint16_t *)config, val);
-- 
1.6.3.1.56.g79e1.dirty

^ permalink raw reply related	[flat|nested] 457+ messages in thread

* [Qemu-devel] [PATCHv3 13/13] qemu: add pci_get/set_byte
@ 2009-06-05 10:24   ` Michael S. Tsirkin
  0 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-05 10:24 UTC (permalink / raw)
  To: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm,
	Rusty Russell, virtualization, Christian Borntraeger, Blue Swirl,
	Anthony Liguori

Add pci_get/set_byte to keep *_word and *_long access functions company.
They are unused for now.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/pci.h |   12 ++++++++++++
 1 files changed, 12 insertions(+), 0 deletions(-)

diff --git a/hw/pci.h b/hw/pci.h
index 8e74033..7cc9a8a 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -263,6 +263,18 @@ PCIBus *pci_bridge_init(PCIBus *bus, int devfn, uint16_t vid, uint16_t did,
                         pci_map_irq_fn map_irq, const char *name);
 
 static inline void
+pci_set_byte(uint8_t *config, uint8_t val)
+{
+    *config = val;
+}
+
+static inline uint8_t
+pci_get_byte(uint8_t *config)
+{
+    return *config;
+}
+
+static inline void
 pci_set_word(uint8_t *config, uint16_t val)
 {
     cpu_to_le16wu((uint16_t *)config, val);
-- 
1.6.3.1.56.g79e1.dirty

^ permalink raw reply related	[flat|nested] 457+ messages in thread

* [PATCHv3 13/13] qemu: add pci_get/set_byte
       [not found] <cover.1244192535.git.mst@redhat.com>
                   ` (24 preceding siblings ...)
  2009-06-05 10:24   ` [Qemu-devel] " Michael S. Tsirkin
@ 2009-06-05 10:24 ` Michael S. Tsirkin
  25 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-05 10:24 UTC (permalink / raw)
  To: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm, Rusty Russell

Add pci_get/set_byte to keep *_word and *_long access functions company.
They are unused for now.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/pci.h |   12 ++++++++++++
 1 files changed, 12 insertions(+), 0 deletions(-)

diff --git a/hw/pci.h b/hw/pci.h
index 8e74033..7cc9a8a 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -263,6 +263,18 @@ PCIBus *pci_bridge_init(PCIBus *bus, int devfn, uint16_t vid, uint16_t did,
                         pci_map_irq_fn map_irq, const char *name);
 
 static inline void
+pci_set_byte(uint8_t *config, uint8_t val)
+{
+    *config = val;
+}
+
+static inline uint8_t
+pci_get_byte(uint8_t *config)
+{
+    return *config;
+}
+
+static inline void
 pci_set_word(uint8_t *config, uint16_t val)
 {
     cpu_to_le16wu((uint16_t *)config, val);
-- 
1.6.3.1.56.g79e1.dirty

^ permalink raw reply related	[flat|nested] 457+ messages in thread

* Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities
  2009-06-05 10:23   ` [Qemu-devel] " Michael S. Tsirkin
@ 2009-06-09 17:11     ` Glauber Costa
  -1 siblings, 0 replies; 457+ messages in thread
From: Glauber Costa @ 2009-06-09 17:11 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm,
	Rusty Russell, virtualization, Christian Borntraeger, Blue Swirl,
	Anthony Liguori

On Fri, Jun 05, 2009 at 01:23:15PM +0300, Michael S. Tsirkin wrote:
> Add routines to manage PCI capability list. First user will be MSI-X.
> 
> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> ---
>  hw/pci.c |   98 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++------
>  hw/pci.h |   18 +++++++++++-
>  2 files changed, 106 insertions(+), 10 deletions(-)
> 
> diff --git a/hw/pci.c b/hw/pci.c
> index 361d741..ed011b5 100644
> --- a/hw/pci.c
> +++ b/hw/pci.c
> @@ -130,12 +130,13 @@ void pci_device_save(PCIDevice *s, QEMUFile *f)
>      int version = s->cap_present ? 3 : 2;
>      int i;
>  
> -    qemu_put_be32(f, version); /* PCI device version */
> +    /* PCI device version and capabilities */
> +    qemu_put_be32(f, version);
> +    if (version >= 3)
> +        qemu_put_be32(f, s->cap_present);
>      qemu_put_buffer(f, s->config, 256);
>      for (i = 0; i < 4; i++)
>          qemu_put_be32(f, s->irq_state[i]);
> -    if (version >= 3)
> -        qemu_put_be32(f, s->cap_present);
>  }
What is it doing here?
You should just do it right in the first patch, instead of doing in
one way there, and fixing here.

>  
>  int pci_device_load(PCIDevice *s, QEMUFile *f)
> @@ -146,12 +147,6 @@ int pci_device_load(PCIDevice *s, QEMUFile *f)
>      version_id = qemu_get_be32(f);
>      if (version_id > 3)
>          return -EINVAL;
> -    qemu_get_buffer(f, s->config, 256);
> -    pci_update_mappings(s);
> -
> -    if (version_id >= 2)
> -        for (i = 0; i < 4; i ++)
> -            s->irq_state[i] = qemu_get_be32(f);
>      if (version_id >= 3)
>          s->cap_present = qemu_get_be32(f);
>      else
ditto.

> @@ -160,6 +155,18 @@ int pci_device_load(PCIDevice *s, QEMUFile *f)
>      if (s->cap_present & ~s->cap_supported)
>          return -EINVAL;
>  
> +    qemu_get_buffer(f, s->config, 256);
> +    pci_update_mappings(s);
> +
> +    if (version_id >= 2)
> +        for (i = 0; i < 4; i ++)
> +            s->irq_state[i] = qemu_get_be32(f);
> +    /* Clear wmask and used bits for capabilities.
> +       Must be restored separately, since capabilities can
> +       be placed anywhere in config space. */
> +    memset(s->used, 0, PCI_CONFIG_SPACE_SIZE);
> +    for (i = PCI_CONFIG_HEADER_SIZE; i < PCI_CONFIG_SPACE_SIZE; ++i)
> +        s->wmask[i] = 0xff;
>      return 0;
>  }
Sorry, I don't exactly understand it. Although it can be anywhere, what do we actually
lose by keeping it at the same place in config space?

>  
> @@ -870,3 +877,76 @@ PCIDevice *pci_create_simple(PCIBus *bus, int devfn, const char *name)
>  
>      return (PCIDevice *)dev;
>  }
> +
> +static int pci_find_space(PCIDevice *pdev, uint8_t size)
> +{
> +    int offset = PCI_CONFIG_HEADER_SIZE;
> +    int i;
> +    for (i = PCI_CONFIG_HEADER_SIZE; i < PCI_CONFIG_SPACE_SIZE; ++i)
> +        if (pdev->used[i])
> +            offset = i + 1;
> +        else if (i - offset + 1 == size)
> +            return offset;
> +    return 0;
> +}
> +
> +static uint8_t pci_find_capability_list(PCIDevice *pdev, uint8_t cap_id,
> +                                        uint8_t *prev_p)
> +{
> +    uint8_t next, prev;
> +
> +    if (!(pdev->config[PCI_STATUS] & PCI_STATUS_CAP_LIST))
> +        return 0;
> +
> +    for (prev = PCI_CAPABILITY_LIST; (next = pdev->config[prev]);
> +         prev = next + PCI_CAP_LIST_NEXT)
> +        if (pdev->config[next + PCI_CAP_LIST_ID] == cap_id)
> +            break;
> +
> +    *prev_p = prev;
> +    return next;
> +}
I'd prefer to do:
	if (prev_p)
		*prev_p = prev;
so we don't have to always pass a prev_p pointer. You have yourself a user
where you don't need it in this very patch.

> +
> +/* Reserve space and add capability to the linked list in pci config space */
> +int pci_add_capability(PCIDevice *pdev, uint8_t cap_id, uint8_t size)
> +{
> +    uint8_t offset = pci_find_space(pdev, size);
> +    uint8_t *config = pdev->config + offset;
> +    if (!offset)
> +        return -ENOSPC;
> +    config[PCI_CAP_LIST_ID] = cap_id;
> +    config[PCI_CAP_LIST_NEXT] = pdev->config[PCI_CAPABILITY_LIST];
> +    pdev->config[PCI_CAPABILITY_LIST] = offset;
> +    pdev->config[PCI_STATUS] |= PCI_STATUS_CAP_LIST;
> +    memset(pdev->used + offset, 0xFF, size);
> +    /* Make capability read-only by default */
> +    memset(pdev->wmask + offset, 0, size);
> +    return offset;
> +}
> +
> +/* Unlink capability from the pci config space. */
> +void pci_del_capability(PCIDevice *pdev, uint8_t cap_id, uint8_t size)
> +{
> +    uint8_t prev, offset = pci_find_capability_list(pdev, cap_id, &prev);
> +    if (!offset)
> +        return;
> +    pdev->config[prev] = pdev->config[offset + PCI_CAP_LIST_NEXT];
> +    /* Make capability writeable again */
> +    memset(pdev->wmask + offset, 0xff, size);
> +    memset(pdev->used + offset, 0, size);
> +
> +    if (!pdev->config[PCI_CAPABILITY_LIST])
> +        pdev->config[PCI_STATUS] &= ~PCI_STATUS_CAP_LIST;
> +}
> +
> +/* Reserve space for capability at a known offset (to call after load). */
> +void pci_reserve_capability(PCIDevice *pdev, uint8_t offset, uint8_t size)
> +{
> +    memset(pdev->used + offset, 0xff, size);
> +}
> +
> +uint8_t pci_find_capability(PCIDevice *pdev, uint8_t cap_id)
> +{
> +    uint8_t prev;
> +    return pci_find_capability_list(pdev, cap_id, &prev);
> +}
> diff --git a/hw/pci.h b/hw/pci.h
> index 6f0803f..4838c59 100644
> --- a/hw/pci.h
> +++ b/hw/pci.h
> @@ -123,6 +123,10 @@ typedef struct PCIIORegion {
>  #define PCI_MIN_GNT		0x3e	/* 8 bits */
>  #define PCI_MAX_LAT		0x3f	/* 8 bits */
>  
> +/* Capability lists */
> +#define PCI_CAP_LIST_ID		0	/* Capability ID */
> +#define PCI_CAP_LIST_NEXT	1	/* Next capability in the list */
> +
>  #define PCI_REVISION            0x08    /* obsolete, use PCI_REVISION_ID */
>  #define PCI_SUBVENDOR_ID        0x2c    /* obsolete, use PCI_SUBSYSTEM_VENDOR_ID */
>  #define PCI_SUBDEVICE_ID        0x2e    /* obsolete, use PCI_SUBSYSTEM_ID */
> @@ -130,7 +134,7 @@ typedef struct PCIIORegion {
>  /* Bits in the PCI Status Register (PCI 2.3 spec) */
>  #define PCI_STATUS_RESERVED1	0x007
>  #define PCI_STATUS_INT_STATUS	0x008
> -#define PCI_STATUS_CAPABILITIES	0x010
> +#define PCI_STATUS_CAP_LIST	0x010
>  #define PCI_STATUS_66MHZ	0x020
>  #define PCI_STATUS_RESERVED2	0x040
>  #define PCI_STATUS_FAST_BACK	0x080
> @@ -160,6 +164,9 @@ struct PCIDevice {
>      /* Used to implement R/W bytes */
>      uint8_t wmask[PCI_CONFIG_SPACE_SIZE];
>  
> +    /* Used to allocate config space for capabilities. */
> +    uint8_t used[PCI_CONFIG_SPACE_SIZE];
> +
>      /* the following fields are read only */
>      PCIBus *bus;
>      int devfn;
> @@ -194,6 +201,15 @@ void pci_register_io_region(PCIDevice *pci_dev, int region_num,
>                              uint32_t size, int type,
>                              PCIMapIORegionFunc *map_func);
>  
> +int pci_add_capability(PCIDevice *pci_dev, uint8_t cap_id, uint8_t cap_size);
> +
> +void pci_del_capability(PCIDevice *pci_dev, uint8_t cap_id, uint8_t cap_size);
> +
> +void pci_reserve_capability(PCIDevice *pci_dev, uint8_t offset, uint8_t size);
> +
> +uint8_t pci_find_capability(PCIDevice *pci_dev, uint8_t cap_id);
> +
> +
>  uint32_t pci_default_read_config(PCIDevice *d,
>                                   uint32_t address, int len);
>  void pci_default_write_config(PCIDevice *d,
> -- 
> 1.6.3.1.56.g79e1.dirty
> 
> 
> 

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities
@ 2009-06-09 17:11     ` Glauber Costa
  0 siblings, 0 replies; 457+ messages in thread
From: Glauber Costa @ 2009-06-09 17:11 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Carsten Otte, kvm, Rusty Russell, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook, Avi Kivity

On Fri, Jun 05, 2009 at 01:23:15PM +0300, Michael S. Tsirkin wrote:
> Add routines to manage PCI capability list. First user will be MSI-X.
> 
> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> ---
>  hw/pci.c |   98 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++------
>  hw/pci.h |   18 +++++++++++-
>  2 files changed, 106 insertions(+), 10 deletions(-)
> 
> diff --git a/hw/pci.c b/hw/pci.c
> index 361d741..ed011b5 100644
> --- a/hw/pci.c
> +++ b/hw/pci.c
> @@ -130,12 +130,13 @@ void pci_device_save(PCIDevice *s, QEMUFile *f)
>      int version = s->cap_present ? 3 : 2;
>      int i;
>  
> -    qemu_put_be32(f, version); /* PCI device version */
> +    /* PCI device version and capabilities */
> +    qemu_put_be32(f, version);
> +    if (version >= 3)
> +        qemu_put_be32(f, s->cap_present);
>      qemu_put_buffer(f, s->config, 256);
>      for (i = 0; i < 4; i++)
>          qemu_put_be32(f, s->irq_state[i]);
> -    if (version >= 3)
> -        qemu_put_be32(f, s->cap_present);
>  }
What is it doing here?
You should just do it right in the first patch, instead of doing in
one way there, and fixing here.

>  
>  int pci_device_load(PCIDevice *s, QEMUFile *f)
> @@ -146,12 +147,6 @@ int pci_device_load(PCIDevice *s, QEMUFile *f)
>      version_id = qemu_get_be32(f);
>      if (version_id > 3)
>          return -EINVAL;
> -    qemu_get_buffer(f, s->config, 256);
> -    pci_update_mappings(s);
> -
> -    if (version_id >= 2)
> -        for (i = 0; i < 4; i ++)
> -            s->irq_state[i] = qemu_get_be32(f);
>      if (version_id >= 3)
>          s->cap_present = qemu_get_be32(f);
>      else
ditto.

> @@ -160,6 +155,18 @@ int pci_device_load(PCIDevice *s, QEMUFile *f)
>      if (s->cap_present & ~s->cap_supported)
>          return -EINVAL;
>  
> +    qemu_get_buffer(f, s->config, 256);
> +    pci_update_mappings(s);
> +
> +    if (version_id >= 2)
> +        for (i = 0; i < 4; i ++)
> +            s->irq_state[i] = qemu_get_be32(f);
> +    /* Clear wmask and used bits for capabilities.
> +       Must be restored separately, since capabilities can
> +       be placed anywhere in config space. */
> +    memset(s->used, 0, PCI_CONFIG_SPACE_SIZE);
> +    for (i = PCI_CONFIG_HEADER_SIZE; i < PCI_CONFIG_SPACE_SIZE; ++i)
> +        s->wmask[i] = 0xff;
>      return 0;
>  }
Sorry, I don't exactly understand it. Although it can be anywhere, what do we actually
lose by keeping it at the same place in config space?

>  
> @@ -870,3 +877,76 @@ PCIDevice *pci_create_simple(PCIBus *bus, int devfn, const char *name)
>  
>      return (PCIDevice *)dev;
>  }
> +
> +static int pci_find_space(PCIDevice *pdev, uint8_t size)
> +{
> +    int offset = PCI_CONFIG_HEADER_SIZE;
> +    int i;
> +    for (i = PCI_CONFIG_HEADER_SIZE; i < PCI_CONFIG_SPACE_SIZE; ++i)
> +        if (pdev->used[i])
> +            offset = i + 1;
> +        else if (i - offset + 1 == size)
> +            return offset;
> +    return 0;
> +}
> +
> +static uint8_t pci_find_capability_list(PCIDevice *pdev, uint8_t cap_id,
> +                                        uint8_t *prev_p)
> +{
> +    uint8_t next, prev;
> +
> +    if (!(pdev->config[PCI_STATUS] & PCI_STATUS_CAP_LIST))
> +        return 0;
> +
> +    for (prev = PCI_CAPABILITY_LIST; (next = pdev->config[prev]);
> +         prev = next + PCI_CAP_LIST_NEXT)
> +        if (pdev->config[next + PCI_CAP_LIST_ID] == cap_id)
> +            break;
> +
> +    *prev_p = prev;
> +    return next;
> +}
I'd prefer to do:
	if (prev_p)
		*prev_p = prev;
so we don't have to always pass a prev_p pointer. You have yourself a user
where you don't need it in this very patch.

> +
> +/* Reserve space and add capability to the linked list in pci config space */
> +int pci_add_capability(PCIDevice *pdev, uint8_t cap_id, uint8_t size)
> +{
> +    uint8_t offset = pci_find_space(pdev, size);
> +    uint8_t *config = pdev->config + offset;
> +    if (!offset)
> +        return -ENOSPC;
> +    config[PCI_CAP_LIST_ID] = cap_id;
> +    config[PCI_CAP_LIST_NEXT] = pdev->config[PCI_CAPABILITY_LIST];
> +    pdev->config[PCI_CAPABILITY_LIST] = offset;
> +    pdev->config[PCI_STATUS] |= PCI_STATUS_CAP_LIST;
> +    memset(pdev->used + offset, 0xFF, size);
> +    /* Make capability read-only by default */
> +    memset(pdev->wmask + offset, 0, size);
> +    return offset;
> +}
> +
> +/* Unlink capability from the pci config space. */
> +void pci_del_capability(PCIDevice *pdev, uint8_t cap_id, uint8_t size)
> +{
> +    uint8_t prev, offset = pci_find_capability_list(pdev, cap_id, &prev);
> +    if (!offset)
> +        return;
> +    pdev->config[prev] = pdev->config[offset + PCI_CAP_LIST_NEXT];
> +    /* Make capability writeable again */
> +    memset(pdev->wmask + offset, 0xff, size);
> +    memset(pdev->used + offset, 0, size);
> +
> +    if (!pdev->config[PCI_CAPABILITY_LIST])
> +        pdev->config[PCI_STATUS] &= ~PCI_STATUS_CAP_LIST;
> +}
> +
> +/* Reserve space for capability at a known offset (to call after load). */
> +void pci_reserve_capability(PCIDevice *pdev, uint8_t offset, uint8_t size)
> +{
> +    memset(pdev->used + offset, 0xff, size);
> +}
> +
> +uint8_t pci_find_capability(PCIDevice *pdev, uint8_t cap_id)
> +{
> +    uint8_t prev;
> +    return pci_find_capability_list(pdev, cap_id, &prev);
> +}
> diff --git a/hw/pci.h b/hw/pci.h
> index 6f0803f..4838c59 100644
> --- a/hw/pci.h
> +++ b/hw/pci.h
> @@ -123,6 +123,10 @@ typedef struct PCIIORegion {
>  #define PCI_MIN_GNT		0x3e	/* 8 bits */
>  #define PCI_MAX_LAT		0x3f	/* 8 bits */
>  
> +/* Capability lists */
> +#define PCI_CAP_LIST_ID		0	/* Capability ID */
> +#define PCI_CAP_LIST_NEXT	1	/* Next capability in the list */
> +
>  #define PCI_REVISION            0x08    /* obsolete, use PCI_REVISION_ID */
>  #define PCI_SUBVENDOR_ID        0x2c    /* obsolete, use PCI_SUBSYSTEM_VENDOR_ID */
>  #define PCI_SUBDEVICE_ID        0x2e    /* obsolete, use PCI_SUBSYSTEM_ID */
> @@ -130,7 +134,7 @@ typedef struct PCIIORegion {
>  /* Bits in the PCI Status Register (PCI 2.3 spec) */
>  #define PCI_STATUS_RESERVED1	0x007
>  #define PCI_STATUS_INT_STATUS	0x008
> -#define PCI_STATUS_CAPABILITIES	0x010
> +#define PCI_STATUS_CAP_LIST	0x010
>  #define PCI_STATUS_66MHZ	0x020
>  #define PCI_STATUS_RESERVED2	0x040
>  #define PCI_STATUS_FAST_BACK	0x080
> @@ -160,6 +164,9 @@ struct PCIDevice {
>      /* Used to implement R/W bytes */
>      uint8_t wmask[PCI_CONFIG_SPACE_SIZE];
>  
> +    /* Used to allocate config space for capabilities. */
> +    uint8_t used[PCI_CONFIG_SPACE_SIZE];
> +
>      /* the following fields are read only */
>      PCIBus *bus;
>      int devfn;
> @@ -194,6 +201,15 @@ void pci_register_io_region(PCIDevice *pci_dev, int region_num,
>                              uint32_t size, int type,
>                              PCIMapIORegionFunc *map_func);
>  
> +int pci_add_capability(PCIDevice *pci_dev, uint8_t cap_id, uint8_t cap_size);
> +
> +void pci_del_capability(PCIDevice *pci_dev, uint8_t cap_id, uint8_t cap_size);
> +
> +void pci_reserve_capability(PCIDevice *pci_dev, uint8_t offset, uint8_t size);
> +
> +uint8_t pci_find_capability(PCIDevice *pci_dev, uint8_t cap_id);
> +
> +
>  uint32_t pci_default_read_config(PCIDevice *d,
>                                   uint32_t address, int len);
>  void pci_default_write_config(PCIDevice *d,
> -- 
> 1.6.3.1.56.g79e1.dirty
> 
> 
> 

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities
  2009-06-05 10:23   ` [Qemu-devel] " Michael S. Tsirkin
  (?)
  (?)
@ 2009-06-09 17:11   ` Glauber Costa
  -1 siblings, 0 replies; 457+ messages in thread
From: Glauber Costa @ 2009-06-09 17:11 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Carsten Otte, kvm, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Paul Brook, Anthony Liguori, Avi Kivity

On Fri, Jun 05, 2009 at 01:23:15PM +0300, Michael S. Tsirkin wrote:
> Add routines to manage PCI capability list. First user will be MSI-X.
> 
> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> ---
>  hw/pci.c |   98 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++------
>  hw/pci.h |   18 +++++++++++-
>  2 files changed, 106 insertions(+), 10 deletions(-)
> 
> diff --git a/hw/pci.c b/hw/pci.c
> index 361d741..ed011b5 100644
> --- a/hw/pci.c
> +++ b/hw/pci.c
> @@ -130,12 +130,13 @@ void pci_device_save(PCIDevice *s, QEMUFile *f)
>      int version = s->cap_present ? 3 : 2;
>      int i;
>  
> -    qemu_put_be32(f, version); /* PCI device version */
> +    /* PCI device version and capabilities */
> +    qemu_put_be32(f, version);
> +    if (version >= 3)
> +        qemu_put_be32(f, s->cap_present);
>      qemu_put_buffer(f, s->config, 256);
>      for (i = 0; i < 4; i++)
>          qemu_put_be32(f, s->irq_state[i]);
> -    if (version >= 3)
> -        qemu_put_be32(f, s->cap_present);
>  }
What is it doing here?
You should just do it right in the first patch, instead of doing in
one way there, and fixing here.

>  
>  int pci_device_load(PCIDevice *s, QEMUFile *f)
> @@ -146,12 +147,6 @@ int pci_device_load(PCIDevice *s, QEMUFile *f)
>      version_id = qemu_get_be32(f);
>      if (version_id > 3)
>          return -EINVAL;
> -    qemu_get_buffer(f, s->config, 256);
> -    pci_update_mappings(s);
> -
> -    if (version_id >= 2)
> -        for (i = 0; i < 4; i ++)
> -            s->irq_state[i] = qemu_get_be32(f);
>      if (version_id >= 3)
>          s->cap_present = qemu_get_be32(f);
>      else
ditto.

> @@ -160,6 +155,18 @@ int pci_device_load(PCIDevice *s, QEMUFile *f)
>      if (s->cap_present & ~s->cap_supported)
>          return -EINVAL;
>  
> +    qemu_get_buffer(f, s->config, 256);
> +    pci_update_mappings(s);
> +
> +    if (version_id >= 2)
> +        for (i = 0; i < 4; i ++)
> +            s->irq_state[i] = qemu_get_be32(f);
> +    /* Clear wmask and used bits for capabilities.
> +       Must be restored separately, since capabilities can
> +       be placed anywhere in config space. */
> +    memset(s->used, 0, PCI_CONFIG_SPACE_SIZE);
> +    for (i = PCI_CONFIG_HEADER_SIZE; i < PCI_CONFIG_SPACE_SIZE; ++i)
> +        s->wmask[i] = 0xff;
>      return 0;
>  }
Sorry, I don't exactly understand it. Although it can be anywhere, what do we actually
lose by keeping it at the same place in config space?

>  
> @@ -870,3 +877,76 @@ PCIDevice *pci_create_simple(PCIBus *bus, int devfn, const char *name)
>  
>      return (PCIDevice *)dev;
>  }
> +
> +static int pci_find_space(PCIDevice *pdev, uint8_t size)
> +{
> +    int offset = PCI_CONFIG_HEADER_SIZE;
> +    int i;
> +    for (i = PCI_CONFIG_HEADER_SIZE; i < PCI_CONFIG_SPACE_SIZE; ++i)
> +        if (pdev->used[i])
> +            offset = i + 1;
> +        else if (i - offset + 1 == size)
> +            return offset;
> +    return 0;
> +}
> +
> +static uint8_t pci_find_capability_list(PCIDevice *pdev, uint8_t cap_id,
> +                                        uint8_t *prev_p)
> +{
> +    uint8_t next, prev;
> +
> +    if (!(pdev->config[PCI_STATUS] & PCI_STATUS_CAP_LIST))
> +        return 0;
> +
> +    for (prev = PCI_CAPABILITY_LIST; (next = pdev->config[prev]);
> +         prev = next + PCI_CAP_LIST_NEXT)
> +        if (pdev->config[next + PCI_CAP_LIST_ID] == cap_id)
> +            break;
> +
> +    *prev_p = prev;
> +    return next;
> +}
I'd prefer to do:
	if (prev_p)
		*prev_p = prev;
so we don't have to always pass a prev_p pointer. You have yourself a user
where you don't need it in this very patch.

> +
> +/* Reserve space and add capability to the linked list in pci config space */
> +int pci_add_capability(PCIDevice *pdev, uint8_t cap_id, uint8_t size)
> +{
> +    uint8_t offset = pci_find_space(pdev, size);
> +    uint8_t *config = pdev->config + offset;
> +    if (!offset)
> +        return -ENOSPC;
> +    config[PCI_CAP_LIST_ID] = cap_id;
> +    config[PCI_CAP_LIST_NEXT] = pdev->config[PCI_CAPABILITY_LIST];
> +    pdev->config[PCI_CAPABILITY_LIST] = offset;
> +    pdev->config[PCI_STATUS] |= PCI_STATUS_CAP_LIST;
> +    memset(pdev->used + offset, 0xFF, size);
> +    /* Make capability read-only by default */
> +    memset(pdev->wmask + offset, 0, size);
> +    return offset;
> +}
> +
> +/* Unlink capability from the pci config space. */
> +void pci_del_capability(PCIDevice *pdev, uint8_t cap_id, uint8_t size)
> +{
> +    uint8_t prev, offset = pci_find_capability_list(pdev, cap_id, &prev);
> +    if (!offset)
> +        return;
> +    pdev->config[prev] = pdev->config[offset + PCI_CAP_LIST_NEXT];
> +    /* Make capability writeable again */
> +    memset(pdev->wmask + offset, 0xff, size);
> +    memset(pdev->used + offset, 0, size);
> +
> +    if (!pdev->config[PCI_CAPABILITY_LIST])
> +        pdev->config[PCI_STATUS] &= ~PCI_STATUS_CAP_LIST;
> +}
> +
> +/* Reserve space for capability at a known offset (to call after load). */
> +void pci_reserve_capability(PCIDevice *pdev, uint8_t offset, uint8_t size)
> +{
> +    memset(pdev->used + offset, 0xff, size);
> +}
> +
> +uint8_t pci_find_capability(PCIDevice *pdev, uint8_t cap_id)
> +{
> +    uint8_t prev;
> +    return pci_find_capability_list(pdev, cap_id, &prev);
> +}
> diff --git a/hw/pci.h b/hw/pci.h
> index 6f0803f..4838c59 100644
> --- a/hw/pci.h
> +++ b/hw/pci.h
> @@ -123,6 +123,10 @@ typedef struct PCIIORegion {
>  #define PCI_MIN_GNT		0x3e	/* 8 bits */
>  #define PCI_MAX_LAT		0x3f	/* 8 bits */
>  
> +/* Capability lists */
> +#define PCI_CAP_LIST_ID		0	/* Capability ID */
> +#define PCI_CAP_LIST_NEXT	1	/* Next capability in the list */
> +
>  #define PCI_REVISION            0x08    /* obsolete, use PCI_REVISION_ID */
>  #define PCI_SUBVENDOR_ID        0x2c    /* obsolete, use PCI_SUBSYSTEM_VENDOR_ID */
>  #define PCI_SUBDEVICE_ID        0x2e    /* obsolete, use PCI_SUBSYSTEM_ID */
> @@ -130,7 +134,7 @@ typedef struct PCIIORegion {
>  /* Bits in the PCI Status Register (PCI 2.3 spec) */
>  #define PCI_STATUS_RESERVED1	0x007
>  #define PCI_STATUS_INT_STATUS	0x008
> -#define PCI_STATUS_CAPABILITIES	0x010
> +#define PCI_STATUS_CAP_LIST	0x010
>  #define PCI_STATUS_66MHZ	0x020
>  #define PCI_STATUS_RESERVED2	0x040
>  #define PCI_STATUS_FAST_BACK	0x080
> @@ -160,6 +164,9 @@ struct PCIDevice {
>      /* Used to implement R/W bytes */
>      uint8_t wmask[PCI_CONFIG_SPACE_SIZE];
>  
> +    /* Used to allocate config space for capabilities. */
> +    uint8_t used[PCI_CONFIG_SPACE_SIZE];
> +
>      /* the following fields are read only */
>      PCIBus *bus;
>      int devfn;
> @@ -194,6 +201,15 @@ void pci_register_io_region(PCIDevice *pci_dev, int region_num,
>                              uint32_t size, int type,
>                              PCIMapIORegionFunc *map_func);
>  
> +int pci_add_capability(PCIDevice *pci_dev, uint8_t cap_id, uint8_t cap_size);
> +
> +void pci_del_capability(PCIDevice *pci_dev, uint8_t cap_id, uint8_t cap_size);
> +
> +void pci_reserve_capability(PCIDevice *pci_dev, uint8_t offset, uint8_t size);
> +
> +uint8_t pci_find_capability(PCIDevice *pci_dev, uint8_t cap_id);
> +
> +
>  uint32_t pci_default_read_config(PCIDevice *d,
>                                   uint32_t address, int len);
>  void pci_default_write_config(PCIDevice *d,
> -- 
> 1.6.3.1.56.g79e1.dirty
> 
> 
> 

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: [Qemu-devel] [PATCHv3 05/13] qemu: MSI-X support functions
  2009-06-05 10:23   ` [Qemu-devel] " Michael S. Tsirkin
@ 2009-06-09 17:26     ` Glauber Costa
  -1 siblings, 0 replies; 457+ messages in thread
From: Glauber Costa @ 2009-06-09 17:26 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm,
	Rusty Russell, virtualization, Christian Borntraeger, Blue Swirl,
	Anthony Liguori

On Fri, Jun 05, 2009 at 01:23:31PM +0300, Michael S. Tsirkin wrote:
> Add functions implementing MSI-X support. First user will be virtio-pci.
> Note that platform must set a flag to declare MSI supported.
> For PC this will be set by APIC.
> 
> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> ---
>  Makefile.target |    2 +-
>  hw/msix.c       |  423 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  hw/msix.h       |   35 +++++
>  hw/pci.h        |   20 +++
>  4 files changed, 479 insertions(+), 1 deletions(-)
>  create mode 100644 hw/msix.c
>  create mode 100644 hw/msix.h
> 
> diff --git a/Makefile.target b/Makefile.target
> index 664a1e3..87b2859 100644
> --- a/Makefile.target
> +++ b/Makefile.target
> @@ -486,7 +486,7 @@ endif #CONFIG_BSD_USER
>  ifndef CONFIG_USER_ONLY
>  
>  OBJS=vl.o osdep.o monitor.o pci.o loader.o isa_mmio.o machine.o \
> -     gdbstub.o gdbstub-xml.o
> +     gdbstub.o gdbstub-xml.o msix.o
>  # virtio has to be here due to weird dependency between PCI and virtio-net.
>  # need to fix this properly
>  OBJS+=virtio-blk.o virtio-balloon.o virtio-net.o virtio-console.o
> diff --git a/hw/msix.c b/hw/msix.c
> new file mode 100644
> index 0000000..1b5aec8
> --- /dev/null
> +++ b/hw/msix.c
> @@ -0,0 +1,423 @@
> +/*
> + * MSI-X device support
> + *
> + * This module includes support for MSI-X in pci devices.
> + *
> + * Author: Michael S. Tsirkin <mst@redhat.com>
> + *
> + *  Copyright (c) 2009, Red Hat Inc, Michael S. Tsirkin (mst@redhat.com)
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2.  See
> + * the COPYING file in the top-level directory.
> + */
> +
> +#include "hw.h"
> +#include "msix.h"
> +#include "pci.h"
> +
> +/* Declaration from linux/pci_regs.h */
> +#define  PCI_CAP_ID_MSIX 0x11 /* MSI-X */
> +#define  PCI_MSIX_FLAGS 2     /* Table at lower 11 bits */
> +#define  PCI_MSIX_FLAGS_QSIZE	0x7FF
> +#define  PCI_MSIX_FLAGS_ENABLE	(1 << 15)
> +#define  PCI_MSIX_FLAGS_BIRMASK	(7 << 0)
> +
> +/* MSI-X capability structure */
> +#define MSIX_TABLE_OFFSET 4
> +#define MSIX_PBA_OFFSET 8
> +#define MSIX_CAP_LENGTH 12
> +
> +/* MSI enable bit is in byte 1 in FLAGS register */
> +#define MSIX_ENABLE_OFFSET (PCI_MSIX_FLAGS + 1)
> +#define MSIX_ENABLE_MASK (PCI_MSIX_FLAGS_ENABLE >> 8)
> +
> +/* MSI-X table format */
> +#define MSIX_MSG_ADDR 0
> +#define MSIX_MSG_UPPER_ADDR 4
> +#define MSIX_MSG_DATA 8
> +#define MSIX_VECTOR_CTRL 12
> +#define MSIX_ENTRY_SIZE 16
> +#define MSIX_VECTOR_MASK 0x1
> +
> +/* How much space does an MSIX table need. */
> +/* The spec requires giving the table structure
> + * a 4K aligned region all by itself. Align it to
> + * target pages so that drivers can do passthrough
> + * on the rest of the region. */
> +#define MSIX_PAGE_SIZE TARGET_PAGE_ALIGN(0x1000)
> +/* Reserve second half of the page for pending bits */
> +#define MSIX_PAGE_PENDING (MSIX_PAGE_SIZE / 2)
> +#define MSIX_MAX_ENTRIES 32
> +
> +
> +#ifdef MSIX_DEBUG
> +#define DEBUG(fmt, ...)                                       \
> +    do {                                                      \
> +      fprintf(stderr, "%s: " fmt, __func__ , __VA_ARGS__);    \
> +    } while (0)
> +#else
> +#define DEBUG(fmt, ...) do { } while(0)
> +#endif
> +
> +/* Flag to globally disable MSI-X support */
> +int msix_disable;
> +
> +/* Flag for interrupt controller to declare MSI-X support */
> +int msix_supported;
maybe better to make it static, and provide msi_state() returning -1 for disabled,
0 for supported, etc...

> +
> +/* Add MSI-X capability to the config space for the device. */
> +/* Given a bar and its size, add MSI-X table on top of it
> + * and fill MSI-X capability in the config space.
> + * Original bar size must be a power of 2 or 0.
> + * New bar size is returned. */
> +static int msix_add_config(struct PCIDevice *pdev, unsigned short nentries,
> +                           unsigned bar_nr, unsigned bar_size)
> +{
> +    int config_offset;
> +    uint8_t *config;
> +    uint32_t new_size;
> +
> +    if (nentries < 1 || nentries > PCI_MSIX_FLAGS_QSIZE + 1)
> +        return -EINVAL;
> +    if (bar_size > 0x80000000)
> +        return -ENOSPC;
> +
> +    /* Add space for MSI-X structures */
> +    if (!bar_size)
> +        new_size = MSIX_PAGE_SIZE;
> +    else if (bar_size < MSIX_PAGE_SIZE) {
> +        bar_size = MSIX_PAGE_SIZE;
> +        new_size = MSIX_PAGE_SIZE * 2;
> +    } else
> +        new_size = bar_size * 2;
> +
> +    pdev->msix_bar_size = new_size;
> +    config_offset = pci_add_capability(pdev, PCI_CAP_ID_MSIX, MSIX_CAP_LENGTH);
> +    if (config_offset < 0)
> +        return config_offset;
> +    config = pdev->config + config_offset;
> +
> +    pci_set_word(config + PCI_MSIX_FLAGS, nentries - 1);
> +    /* Table on top of BAR */
> +    pci_set_long(config + MSIX_TABLE_OFFSET, bar_size | bar_nr);
> +    /* Pending bits on top of that */
> +    pci_set_long(config + MSIX_PBA_OFFSET, (bar_size + MSIX_PAGE_PENDING) |
> +                 bar_nr);
> +    pdev->msix_cap = config_offset;
> +    /* Make flags bit writeable. */
> +    pdev->wmask[config_offset + MSIX_ENABLE_OFFSET] |= MSIX_ENABLE_MASK;
> +    return 0;
> +}
> +

> +
> +/* Initialize the MSI-X structures. Note: if MSI-X is supported, BAR size is
> + * modified, it should be retrieved with msix_bar_size. */
> +int msix_init(struct PCIDevice *dev, unsigned short nentries,
> +              unsigned bar_nr, unsigned bar_size)
> +{
> +    int ret = -ENOMEM;
> +    /* Nothing to do if MSI is not supported by interrupt controller */
> +    if (!msix_supported)
> +        return -ENOTTY;
> +
> +    if (nentries > MSIX_MAX_ENTRIES)
> +        return -EINVAL;
> +
> +    dev->msix_entry_used = qemu_mallocz(MSIX_MAX_ENTRIES *
> +                                        sizeof *dev->msix_entry_used);
> +    if (!dev->msix_entry_used)
> +        goto err_used;
no need to check. oom_checker will kill qemu if it fails.

> +
> +    dev->msix_table_page = qemu_mallocz(MSIX_PAGE_SIZE);
> +    if (!dev->msix_table_page)
> +        goto err_page;
ditto.



^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: [Qemu-devel] [PATCHv3 05/13] qemu: MSI-X support functions
@ 2009-06-09 17:26     ` Glauber Costa
  0 siblings, 0 replies; 457+ messages in thread
From: Glauber Costa @ 2009-06-09 17:26 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Carsten Otte, kvm, Rusty Russell, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook, Avi Kivity

On Fri, Jun 05, 2009 at 01:23:31PM +0300, Michael S. Tsirkin wrote:
> Add functions implementing MSI-X support. First user will be virtio-pci.
> Note that platform must set a flag to declare MSI supported.
> For PC this will be set by APIC.
> 
> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> ---
>  Makefile.target |    2 +-
>  hw/msix.c       |  423 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  hw/msix.h       |   35 +++++
>  hw/pci.h        |   20 +++
>  4 files changed, 479 insertions(+), 1 deletions(-)
>  create mode 100644 hw/msix.c
>  create mode 100644 hw/msix.h
> 
> diff --git a/Makefile.target b/Makefile.target
> index 664a1e3..87b2859 100644
> --- a/Makefile.target
> +++ b/Makefile.target
> @@ -486,7 +486,7 @@ endif #CONFIG_BSD_USER
>  ifndef CONFIG_USER_ONLY
>  
>  OBJS=vl.o osdep.o monitor.o pci.o loader.o isa_mmio.o machine.o \
> -     gdbstub.o gdbstub-xml.o
> +     gdbstub.o gdbstub-xml.o msix.o
>  # virtio has to be here due to weird dependency between PCI and virtio-net.
>  # need to fix this properly
>  OBJS+=virtio-blk.o virtio-balloon.o virtio-net.o virtio-console.o
> diff --git a/hw/msix.c b/hw/msix.c
> new file mode 100644
> index 0000000..1b5aec8
> --- /dev/null
> +++ b/hw/msix.c
> @@ -0,0 +1,423 @@
> +/*
> + * MSI-X device support
> + *
> + * This module includes support for MSI-X in pci devices.
> + *
> + * Author: Michael S. Tsirkin <mst@redhat.com>
> + *
> + *  Copyright (c) 2009, Red Hat Inc, Michael S. Tsirkin (mst@redhat.com)
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2.  See
> + * the COPYING file in the top-level directory.
> + */
> +
> +#include "hw.h"
> +#include "msix.h"
> +#include "pci.h"
> +
> +/* Declaration from linux/pci_regs.h */
> +#define  PCI_CAP_ID_MSIX 0x11 /* MSI-X */
> +#define  PCI_MSIX_FLAGS 2     /* Table at lower 11 bits */
> +#define  PCI_MSIX_FLAGS_QSIZE	0x7FF
> +#define  PCI_MSIX_FLAGS_ENABLE	(1 << 15)
> +#define  PCI_MSIX_FLAGS_BIRMASK	(7 << 0)
> +
> +/* MSI-X capability structure */
> +#define MSIX_TABLE_OFFSET 4
> +#define MSIX_PBA_OFFSET 8
> +#define MSIX_CAP_LENGTH 12
> +
> +/* MSI enable bit is in byte 1 in FLAGS register */
> +#define MSIX_ENABLE_OFFSET (PCI_MSIX_FLAGS + 1)
> +#define MSIX_ENABLE_MASK (PCI_MSIX_FLAGS_ENABLE >> 8)
> +
> +/* MSI-X table format */
> +#define MSIX_MSG_ADDR 0
> +#define MSIX_MSG_UPPER_ADDR 4
> +#define MSIX_MSG_DATA 8
> +#define MSIX_VECTOR_CTRL 12
> +#define MSIX_ENTRY_SIZE 16
> +#define MSIX_VECTOR_MASK 0x1
> +
> +/* How much space does an MSIX table need. */
> +/* The spec requires giving the table structure
> + * a 4K aligned region all by itself. Align it to
> + * target pages so that drivers can do passthrough
> + * on the rest of the region. */
> +#define MSIX_PAGE_SIZE TARGET_PAGE_ALIGN(0x1000)
> +/* Reserve second half of the page for pending bits */
> +#define MSIX_PAGE_PENDING (MSIX_PAGE_SIZE / 2)
> +#define MSIX_MAX_ENTRIES 32
> +
> +
> +#ifdef MSIX_DEBUG
> +#define DEBUG(fmt, ...)                                       \
> +    do {                                                      \
> +      fprintf(stderr, "%s: " fmt, __func__ , __VA_ARGS__);    \
> +    } while (0)
> +#else
> +#define DEBUG(fmt, ...) do { } while(0)
> +#endif
> +
> +/* Flag to globally disable MSI-X support */
> +int msix_disable;
> +
> +/* Flag for interrupt controller to declare MSI-X support */
> +int msix_supported;
maybe better to make it static, and provide msi_state() returning -1 for disabled,
0 for supported, etc...

> +
> +/* Add MSI-X capability to the config space for the device. */
> +/* Given a bar and its size, add MSI-X table on top of it
> + * and fill MSI-X capability in the config space.
> + * Original bar size must be a power of 2 or 0.
> + * New bar size is returned. */
> +static int msix_add_config(struct PCIDevice *pdev, unsigned short nentries,
> +                           unsigned bar_nr, unsigned bar_size)
> +{
> +    int config_offset;
> +    uint8_t *config;
> +    uint32_t new_size;
> +
> +    if (nentries < 1 || nentries > PCI_MSIX_FLAGS_QSIZE + 1)
> +        return -EINVAL;
> +    if (bar_size > 0x80000000)
> +        return -ENOSPC;
> +
> +    /* Add space for MSI-X structures */
> +    if (!bar_size)
> +        new_size = MSIX_PAGE_SIZE;
> +    else if (bar_size < MSIX_PAGE_SIZE) {
> +        bar_size = MSIX_PAGE_SIZE;
> +        new_size = MSIX_PAGE_SIZE * 2;
> +    } else
> +        new_size = bar_size * 2;
> +
> +    pdev->msix_bar_size = new_size;
> +    config_offset = pci_add_capability(pdev, PCI_CAP_ID_MSIX, MSIX_CAP_LENGTH);
> +    if (config_offset < 0)
> +        return config_offset;
> +    config = pdev->config + config_offset;
> +
> +    pci_set_word(config + PCI_MSIX_FLAGS, nentries - 1);
> +    /* Table on top of BAR */
> +    pci_set_long(config + MSIX_TABLE_OFFSET, bar_size | bar_nr);
> +    /* Pending bits on top of that */
> +    pci_set_long(config + MSIX_PBA_OFFSET, (bar_size + MSIX_PAGE_PENDING) |
> +                 bar_nr);
> +    pdev->msix_cap = config_offset;
> +    /* Make flags bit writeable. */
> +    pdev->wmask[config_offset + MSIX_ENABLE_OFFSET] |= MSIX_ENABLE_MASK;
> +    return 0;
> +}
> +

> +
> +/* Initialize the MSI-X structures. Note: if MSI-X is supported, BAR size is
> + * modified, it should be retrieved with msix_bar_size. */
> +int msix_init(struct PCIDevice *dev, unsigned short nentries,
> +              unsigned bar_nr, unsigned bar_size)
> +{
> +    int ret = -ENOMEM;
> +    /* Nothing to do if MSI is not supported by interrupt controller */
> +    if (!msix_supported)
> +        return -ENOTTY;
> +
> +    if (nentries > MSIX_MAX_ENTRIES)
> +        return -EINVAL;
> +
> +    dev->msix_entry_used = qemu_mallocz(MSIX_MAX_ENTRIES *
> +                                        sizeof *dev->msix_entry_used);
> +    if (!dev->msix_entry_used)
> +        goto err_used;
no need to check. oom_checker will kill qemu if it fails.

> +
> +    dev->msix_table_page = qemu_mallocz(MSIX_PAGE_SIZE);
> +    if (!dev->msix_table_page)
> +        goto err_page;
ditto.

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: [Qemu-devel] [PATCHv3 05/13] qemu: MSI-X support functions
  2009-06-05 10:23   ` [Qemu-devel] " Michael S. Tsirkin
  (?)
@ 2009-06-09 17:26   ` Glauber Costa
  -1 siblings, 0 replies; 457+ messages in thread
From: Glauber Costa @ 2009-06-09 17:26 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Carsten Otte, kvm, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Paul Brook, Anthony Liguori, Avi Kivity

On Fri, Jun 05, 2009 at 01:23:31PM +0300, Michael S. Tsirkin wrote:
> Add functions implementing MSI-X support. First user will be virtio-pci.
> Note that platform must set a flag to declare MSI supported.
> For PC this will be set by APIC.
> 
> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> ---
>  Makefile.target |    2 +-
>  hw/msix.c       |  423 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  hw/msix.h       |   35 +++++
>  hw/pci.h        |   20 +++
>  4 files changed, 479 insertions(+), 1 deletions(-)
>  create mode 100644 hw/msix.c
>  create mode 100644 hw/msix.h
> 
> diff --git a/Makefile.target b/Makefile.target
> index 664a1e3..87b2859 100644
> --- a/Makefile.target
> +++ b/Makefile.target
> @@ -486,7 +486,7 @@ endif #CONFIG_BSD_USER
>  ifndef CONFIG_USER_ONLY
>  
>  OBJS=vl.o osdep.o monitor.o pci.o loader.o isa_mmio.o machine.o \
> -     gdbstub.o gdbstub-xml.o
> +     gdbstub.o gdbstub-xml.o msix.o
>  # virtio has to be here due to weird dependency between PCI and virtio-net.
>  # need to fix this properly
>  OBJS+=virtio-blk.o virtio-balloon.o virtio-net.o virtio-console.o
> diff --git a/hw/msix.c b/hw/msix.c
> new file mode 100644
> index 0000000..1b5aec8
> --- /dev/null
> +++ b/hw/msix.c
> @@ -0,0 +1,423 @@
> +/*
> + * MSI-X device support
> + *
> + * This module includes support for MSI-X in pci devices.
> + *
> + * Author: Michael S. Tsirkin <mst@redhat.com>
> + *
> + *  Copyright (c) 2009, Red Hat Inc, Michael S. Tsirkin (mst@redhat.com)
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2.  See
> + * the COPYING file in the top-level directory.
> + */
> +
> +#include "hw.h"
> +#include "msix.h"
> +#include "pci.h"
> +
> +/* Declaration from linux/pci_regs.h */
> +#define  PCI_CAP_ID_MSIX 0x11 /* MSI-X */
> +#define  PCI_MSIX_FLAGS 2     /* Table at lower 11 bits */
> +#define  PCI_MSIX_FLAGS_QSIZE	0x7FF
> +#define  PCI_MSIX_FLAGS_ENABLE	(1 << 15)
> +#define  PCI_MSIX_FLAGS_BIRMASK	(7 << 0)
> +
> +/* MSI-X capability structure */
> +#define MSIX_TABLE_OFFSET 4
> +#define MSIX_PBA_OFFSET 8
> +#define MSIX_CAP_LENGTH 12
> +
> +/* MSI enable bit is in byte 1 in FLAGS register */
> +#define MSIX_ENABLE_OFFSET (PCI_MSIX_FLAGS + 1)
> +#define MSIX_ENABLE_MASK (PCI_MSIX_FLAGS_ENABLE >> 8)
> +
> +/* MSI-X table format */
> +#define MSIX_MSG_ADDR 0
> +#define MSIX_MSG_UPPER_ADDR 4
> +#define MSIX_MSG_DATA 8
> +#define MSIX_VECTOR_CTRL 12
> +#define MSIX_ENTRY_SIZE 16
> +#define MSIX_VECTOR_MASK 0x1
> +
> +/* How much space does an MSIX table need. */
> +/* The spec requires giving the table structure
> + * a 4K aligned region all by itself. Align it to
> + * target pages so that drivers can do passthrough
> + * on the rest of the region. */
> +#define MSIX_PAGE_SIZE TARGET_PAGE_ALIGN(0x1000)
> +/* Reserve second half of the page for pending bits */
> +#define MSIX_PAGE_PENDING (MSIX_PAGE_SIZE / 2)
> +#define MSIX_MAX_ENTRIES 32
> +
> +
> +#ifdef MSIX_DEBUG
> +#define DEBUG(fmt, ...)                                       \
> +    do {                                                      \
> +      fprintf(stderr, "%s: " fmt, __func__ , __VA_ARGS__);    \
> +    } while (0)
> +#else
> +#define DEBUG(fmt, ...) do { } while(0)
> +#endif
> +
> +/* Flag to globally disable MSI-X support */
> +int msix_disable;
> +
> +/* Flag for interrupt controller to declare MSI-X support */
> +int msix_supported;
maybe better to make it static, and provide msi_state() returning -1 for disabled,
0 for supported, etc...

> +
> +/* Add MSI-X capability to the config space for the device. */
> +/* Given a bar and its size, add MSI-X table on top of it
> + * and fill MSI-X capability in the config space.
> + * Original bar size must be a power of 2 or 0.
> + * New bar size is returned. */
> +static int msix_add_config(struct PCIDevice *pdev, unsigned short nentries,
> +                           unsigned bar_nr, unsigned bar_size)
> +{
> +    int config_offset;
> +    uint8_t *config;
> +    uint32_t new_size;
> +
> +    if (nentries < 1 || nentries > PCI_MSIX_FLAGS_QSIZE + 1)
> +        return -EINVAL;
> +    if (bar_size > 0x80000000)
> +        return -ENOSPC;
> +
> +    /* Add space for MSI-X structures */
> +    if (!bar_size)
> +        new_size = MSIX_PAGE_SIZE;
> +    else if (bar_size < MSIX_PAGE_SIZE) {
> +        bar_size = MSIX_PAGE_SIZE;
> +        new_size = MSIX_PAGE_SIZE * 2;
> +    } else
> +        new_size = bar_size * 2;
> +
> +    pdev->msix_bar_size = new_size;
> +    config_offset = pci_add_capability(pdev, PCI_CAP_ID_MSIX, MSIX_CAP_LENGTH);
> +    if (config_offset < 0)
> +        return config_offset;
> +    config = pdev->config + config_offset;
> +
> +    pci_set_word(config + PCI_MSIX_FLAGS, nentries - 1);
> +    /* Table on top of BAR */
> +    pci_set_long(config + MSIX_TABLE_OFFSET, bar_size | bar_nr);
> +    /* Pending bits on top of that */
> +    pci_set_long(config + MSIX_PBA_OFFSET, (bar_size + MSIX_PAGE_PENDING) |
> +                 bar_nr);
> +    pdev->msix_cap = config_offset;
> +    /* Make flags bit writeable. */
> +    pdev->wmask[config_offset + MSIX_ENABLE_OFFSET] |= MSIX_ENABLE_MASK;
> +    return 0;
> +}
> +

> +
> +/* Initialize the MSI-X structures. Note: if MSI-X is supported, BAR size is
> + * modified, it should be retrieved with msix_bar_size. */
> +int msix_init(struct PCIDevice *dev, unsigned short nentries,
> +              unsigned bar_nr, unsigned bar_size)
> +{
> +    int ret = -ENOMEM;
> +    /* Nothing to do if MSI is not supported by interrupt controller */
> +    if (!msix_supported)
> +        return -ENOTTY;
> +
> +    if (nentries > MSIX_MAX_ENTRIES)
> +        return -EINVAL;
> +
> +    dev->msix_entry_used = qemu_mallocz(MSIX_MAX_ENTRIES *
> +                                        sizeof *dev->msix_entry_used);
> +    if (!dev->msix_entry_used)
> +        goto err_used;
no need to check. oom_checker will kill qemu if it fails.

> +
> +    dev->msix_table_page = qemu_mallocz(MSIX_PAGE_SIZE);
> +    if (!dev->msix_table_page)
> +        goto err_page;
ditto.

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: [Qemu-devel] [PATCHv3 07/13] qemu: minimal MSI/MSI-X implementation for PC
  2009-06-05 10:23   ` [Qemu-devel] " Michael S. Tsirkin
@ 2009-06-09 17:33     ` Glauber Costa
  -1 siblings, 0 replies; 457+ messages in thread
From: Glauber Costa @ 2009-06-09 17:33 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm,
	Rusty Russell, virtualization, Christian Borntraeger, Blue Swirl,
	Anthony Liguori

>      env = cpu_single_env;
>      if (!env)
> @@ -727,7 +762,6 @@ static void apic_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
>      printf("APIC write: %08x = %08x\n", (uint32_t)addr, val);
>  #endif
>  
> -    index = (addr >> 4) & 0xff;
>      switch(index) {
>      case 0x02:
>          s->id = (val >> 24);
> @@ -911,6 +945,7 @@ int apic_init(CPUState *env)
>      s->cpu_env = env;
>  
>      apic_reset(s);
> +    msix_supported = 1;
>  
>      /* XXX: mapping more APICs at the same memory location */
>      if (apic_io_memory == 0) {
> @@ -918,7 +953,8 @@ int apic_init(CPUState *env)
>             on the global memory bus. */
>          apic_io_memory = cpu_register_io_memory(0, apic_mem_read,
>                                                  apic_mem_write, NULL);
> -        cpu_register_physical_memory(s->apicbase & ~0xfff, 0x1000,
> +        /* XXX: what if the base changes? */
> +        cpu_register_physical_memory(MSI_ADDR_BASE, MSI_ADDR_SIZE,
>                                       apic_io_memory);
+1 

I think you have a point here. Your patch is in no way worse than what we had,
but we're currently not handling correctly the case of base address changing.
Guess it is not common in normal apic usage for OSes...


^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: [Qemu-devel] [PATCHv3 07/13] qemu: minimal MSI/MSI-X implementation for PC
@ 2009-06-09 17:33     ` Glauber Costa
  0 siblings, 0 replies; 457+ messages in thread
From: Glauber Costa @ 2009-06-09 17:33 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Carsten Otte, kvm, Rusty Russell, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook, Avi Kivity

>      env = cpu_single_env;
>      if (!env)
> @@ -727,7 +762,6 @@ static void apic_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
>      printf("APIC write: %08x = %08x\n", (uint32_t)addr, val);
>  #endif
>  
> -    index = (addr >> 4) & 0xff;
>      switch(index) {
>      case 0x02:
>          s->id = (val >> 24);
> @@ -911,6 +945,7 @@ int apic_init(CPUState *env)
>      s->cpu_env = env;
>  
>      apic_reset(s);
> +    msix_supported = 1;
>  
>      /* XXX: mapping more APICs at the same memory location */
>      if (apic_io_memory == 0) {
> @@ -918,7 +953,8 @@ int apic_init(CPUState *env)
>             on the global memory bus. */
>          apic_io_memory = cpu_register_io_memory(0, apic_mem_read,
>                                                  apic_mem_write, NULL);
> -        cpu_register_physical_memory(s->apicbase & ~0xfff, 0x1000,
> +        /* XXX: what if the base changes? */
> +        cpu_register_physical_memory(MSI_ADDR_BASE, MSI_ADDR_SIZE,
>                                       apic_io_memory);
+1 

I think you have a point here. Your patch is in no way worse than what we had,
but we're currently not handling correctly the case of base address changing.
Guess it is not common in normal apic usage for OSes...

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: [Qemu-devel] [PATCHv3 07/13] qemu: minimal MSI/MSI-X implementation for PC
  2009-06-05 10:23   ` [Qemu-devel] " Michael S. Tsirkin
  (?)
@ 2009-06-09 17:33   ` Glauber Costa
  -1 siblings, 0 replies; 457+ messages in thread
From: Glauber Costa @ 2009-06-09 17:33 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Carsten Otte, kvm, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Paul Brook, Anthony Liguori, Avi Kivity

>      env = cpu_single_env;
>      if (!env)
> @@ -727,7 +762,6 @@ static void apic_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
>      printf("APIC write: %08x = %08x\n", (uint32_t)addr, val);
>  #endif
>  
> -    index = (addr >> 4) & 0xff;
>      switch(index) {
>      case 0x02:
>          s->id = (val >> 24);
> @@ -911,6 +945,7 @@ int apic_init(CPUState *env)
>      s->cpu_env = env;
>  
>      apic_reset(s);
> +    msix_supported = 1;
>  
>      /* XXX: mapping more APICs at the same memory location */
>      if (apic_io_memory == 0) {
> @@ -918,7 +953,8 @@ int apic_init(CPUState *env)
>             on the global memory bus. */
>          apic_io_memory = cpu_register_io_memory(0, apic_mem_read,
>                                                  apic_mem_write, NULL);
> -        cpu_register_physical_memory(s->apicbase & ~0xfff, 0x1000,
> +        /* XXX: what if the base changes? */
> +        cpu_register_physical_memory(MSI_ADDR_BASE, MSI_ADDR_SIZE,
>                                       apic_io_memory);
+1 

I think you have a point here. Your patch is in no way worse than what we had,
but we're currently not handling correctly the case of base address changing.
Guess it is not common in normal apic usage for OSes...

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: [Qemu-devel] [PATCHv3 08/13] qemu: add support for resizing regions
  2009-06-05 10:23   ` [Qemu-devel] " Michael S. Tsirkin
@ 2009-06-09 17:36     ` Glauber Costa
  -1 siblings, 0 replies; 457+ messages in thread
From: Glauber Costa @ 2009-06-09 17:36 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm,
	Rusty Russell, virtualization, Christian Borntraeger, Blue Swirl,
	Anthony Liguori

On Fri, Jun 05, 2009 at 01:23:55PM +0300, Michael S. Tsirkin wrote:
> Make it possible to resize PCI regions.  This will be used by virtio
> with MSI-X, where the region size depends on whether MSI-X is enabled,
> and can change across load/save.
> 
> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> ---
>  hw/pci.c |   54 ++++++++++++++++++++++++++++++++++++------------------
>  hw/pci.h |    3 +++
>  2 files changed, 39 insertions(+), 18 deletions(-)
> 
> diff --git a/hw/pci.c b/hw/pci.c
> index ed011b5..042a216 100644
> --- a/hw/pci.c
> +++ b/hw/pci.c
> @@ -392,6 +392,41 @@ void pci_register_io_region(PCIDevice *pci_dev, int region_num,
>      *(uint32_t *)(pci_dev->wmask + addr) = cpu_to_le32(wmask);
>  }
>  
> +static void pci_unmap_region(PCIDevice *d, PCIIORegion *r)
> +{
> +    if (r->addr == -1)
> +        return;
> +    if (r->type & PCI_ADDRESS_SPACE_IO) {
> +        int class;
> +        /* NOTE: specific hack for IDE in PC case:
> +           only one byte must be mapped. */
> +        class = pci_get_word(d->config + PCI_CLASS_DEVICE);
> +        if (class == 0x0101 && r->size == 4) {
> +            isa_unassign_ioport(r->addr + 2, 1);
> +        } else {
> +            isa_unassign_ioport(r->addr, r->size);
> +        }
> +    } else {
> +        cpu_register_physical_memory(pci_to_cpu_addr(r->addr),
> +                                     r->size,
> +                                     IO_MEM_UNASSIGNED);
> +        qemu_unregister_coalesced_mmio(r->addr, r->size);
> +    }
> +}
> +
this is a good cleanup...

> +void pci_resize_io_region(PCIDevice *pci_dev, int region_num,
> +                          uint32_t size)
> +{
> +
> +    PCIIORegion *r = &pci_dev->io_regions[region_num];
> +    if (r->size == size)
> +        return;
> +    r->size = size;
> +    pci_unmap_region(pci_dev, r);
> +    r->addr = -1;
> +    pci_update_mappings(pci_dev);
> +}
> +
but the only user of this one seem to be commented out, and later removed.
Why is this needed?



^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: [Qemu-devel] [PATCHv3 08/13] qemu: add support for resizing regions
@ 2009-06-09 17:36     ` Glauber Costa
  0 siblings, 0 replies; 457+ messages in thread
From: Glauber Costa @ 2009-06-09 17:36 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Carsten Otte, kvm, Rusty Russell, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook, Avi Kivity

On Fri, Jun 05, 2009 at 01:23:55PM +0300, Michael S. Tsirkin wrote:
> Make it possible to resize PCI regions.  This will be used by virtio
> with MSI-X, where the region size depends on whether MSI-X is enabled,
> and can change across load/save.
> 
> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> ---
>  hw/pci.c |   54 ++++++++++++++++++++++++++++++++++++------------------
>  hw/pci.h |    3 +++
>  2 files changed, 39 insertions(+), 18 deletions(-)
> 
> diff --git a/hw/pci.c b/hw/pci.c
> index ed011b5..042a216 100644
> --- a/hw/pci.c
> +++ b/hw/pci.c
> @@ -392,6 +392,41 @@ void pci_register_io_region(PCIDevice *pci_dev, int region_num,
>      *(uint32_t *)(pci_dev->wmask + addr) = cpu_to_le32(wmask);
>  }
>  
> +static void pci_unmap_region(PCIDevice *d, PCIIORegion *r)
> +{
> +    if (r->addr == -1)
> +        return;
> +    if (r->type & PCI_ADDRESS_SPACE_IO) {
> +        int class;
> +        /* NOTE: specific hack for IDE in PC case:
> +           only one byte must be mapped. */
> +        class = pci_get_word(d->config + PCI_CLASS_DEVICE);
> +        if (class == 0x0101 && r->size == 4) {
> +            isa_unassign_ioport(r->addr + 2, 1);
> +        } else {
> +            isa_unassign_ioport(r->addr, r->size);
> +        }
> +    } else {
> +        cpu_register_physical_memory(pci_to_cpu_addr(r->addr),
> +                                     r->size,
> +                                     IO_MEM_UNASSIGNED);
> +        qemu_unregister_coalesced_mmio(r->addr, r->size);
> +    }
> +}
> +
this is a good cleanup...

> +void pci_resize_io_region(PCIDevice *pci_dev, int region_num,
> +                          uint32_t size)
> +{
> +
> +    PCIIORegion *r = &pci_dev->io_regions[region_num];
> +    if (r->size == size)
> +        return;
> +    r->size = size;
> +    pci_unmap_region(pci_dev, r);
> +    r->addr = -1;
> +    pci_update_mappings(pci_dev);
> +}
> +
but the only user of this one seem to be commented out, and later removed.
Why is this needed?

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: [Qemu-devel] [PATCHv3 08/13] qemu: add support for resizing regions
  2009-06-05 10:23   ` [Qemu-devel] " Michael S. Tsirkin
  (?)
  (?)
@ 2009-06-09 17:36   ` Glauber Costa
  -1 siblings, 0 replies; 457+ messages in thread
From: Glauber Costa @ 2009-06-09 17:36 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Carsten Otte, kvm, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Paul Brook, Anthony Liguori, Avi Kivity

On Fri, Jun 05, 2009 at 01:23:55PM +0300, Michael S. Tsirkin wrote:
> Make it possible to resize PCI regions.  This will be used by virtio
> with MSI-X, where the region size depends on whether MSI-X is enabled,
> and can change across load/save.
> 
> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> ---
>  hw/pci.c |   54 ++++++++++++++++++++++++++++++++++++------------------
>  hw/pci.h |    3 +++
>  2 files changed, 39 insertions(+), 18 deletions(-)
> 
> diff --git a/hw/pci.c b/hw/pci.c
> index ed011b5..042a216 100644
> --- a/hw/pci.c
> +++ b/hw/pci.c
> @@ -392,6 +392,41 @@ void pci_register_io_region(PCIDevice *pci_dev, int region_num,
>      *(uint32_t *)(pci_dev->wmask + addr) = cpu_to_le32(wmask);
>  }
>  
> +static void pci_unmap_region(PCIDevice *d, PCIIORegion *r)
> +{
> +    if (r->addr == -1)
> +        return;
> +    if (r->type & PCI_ADDRESS_SPACE_IO) {
> +        int class;
> +        /* NOTE: specific hack for IDE in PC case:
> +           only one byte must be mapped. */
> +        class = pci_get_word(d->config + PCI_CLASS_DEVICE);
> +        if (class == 0x0101 && r->size == 4) {
> +            isa_unassign_ioport(r->addr + 2, 1);
> +        } else {
> +            isa_unassign_ioport(r->addr, r->size);
> +        }
> +    } else {
> +        cpu_register_physical_memory(pci_to_cpu_addr(r->addr),
> +                                     r->size,
> +                                     IO_MEM_UNASSIGNED);
> +        qemu_unregister_coalesced_mmio(r->addr, r->size);
> +    }
> +}
> +
this is a good cleanup...

> +void pci_resize_io_region(PCIDevice *pci_dev, int region_num,
> +                          uint32_t size)
> +{
> +
> +    PCIIORegion *r = &pci_dev->io_regions[region_num];
> +    if (r->size == size)
> +        return;
> +    r->size = size;
> +    pci_unmap_region(pci_dev, r);
> +    r->addr = -1;
> +    pci_update_mappings(pci_dev);
> +}
> +
but the only user of this one seem to be commented out, and later removed.
Why is this needed?

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: [Qemu-devel] [PATCHv3 12/13] qemu: virtio save/load bindings
  2009-06-05 10:24   ` [Qemu-devel] " Michael S. Tsirkin
@ 2009-06-09 17:45     ` Glauber Costa
  -1 siblings, 0 replies; 457+ messages in thread
From: Glauber Costa @ 2009-06-09 17:45 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm,
	Rusty Russell, virtualization, Christian Borntraeger, Blue Swirl,
	Anthony Liguori

duplicated save config.

> diff --git a/hw/virtio.h b/hw/virtio.h
> index 04a3c3d..ce05517 100644
> --- a/hw/virtio.h
> +++ b/hw/virtio.h
> @@ -72,6 +72,10 @@ typedef struct VirtQueueElement
>  
>  typedef struct {
>      void (*notify)(void * opaque, uint16_t vector);
> +    void (*save_config)(void * opaque, QEMUFile *f);
> +    void (*save_queue)(void * opaque, int n, QEMUFile *f);
> +    int (*load_config)(void * opaque, QEMUFile *f);
> +    int (*load_queue)(void * opaque, int n, QEMUFile *f);
>  } VirtIOBindings;
>  
So, what's the overall effect on a virtual machine that gets migrated,
of a certain device not implementing one of those functions? Will it work?
Will it break?



^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: [Qemu-devel] [PATCHv3 12/13] qemu: virtio save/load bindings
@ 2009-06-09 17:45     ` Glauber Costa
  0 siblings, 0 replies; 457+ messages in thread
From: Glauber Costa @ 2009-06-09 17:45 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Carsten Otte, kvm, Rusty Russell, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook, Avi Kivity

duplicated save config.

> diff --git a/hw/virtio.h b/hw/virtio.h
> index 04a3c3d..ce05517 100644
> --- a/hw/virtio.h
> +++ b/hw/virtio.h
> @@ -72,6 +72,10 @@ typedef struct VirtQueueElement
>  
>  typedef struct {
>      void (*notify)(void * opaque, uint16_t vector);
> +    void (*save_config)(void * opaque, QEMUFile *f);
> +    void (*save_queue)(void * opaque, int n, QEMUFile *f);
> +    int (*load_config)(void * opaque, QEMUFile *f);
> +    int (*load_queue)(void * opaque, int n, QEMUFile *f);
>  } VirtIOBindings;
>  
So, what's the overall effect on a virtual machine that gets migrated,
of a certain device not implementing one of those functions? Will it work?
Will it break?

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: [Qemu-devel] [PATCHv3 12/13] qemu: virtio save/load bindings
  2009-06-05 10:24   ` [Qemu-devel] " Michael S. Tsirkin
  (?)
@ 2009-06-09 17:45   ` Glauber Costa
  -1 siblings, 0 replies; 457+ messages in thread
From: Glauber Costa @ 2009-06-09 17:45 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Carsten Otte, kvm, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Paul Brook, Anthony Liguori, Avi Kivity

duplicated save config.

> diff --git a/hw/virtio.h b/hw/virtio.h
> index 04a3c3d..ce05517 100644
> --- a/hw/virtio.h
> +++ b/hw/virtio.h
> @@ -72,6 +72,10 @@ typedef struct VirtQueueElement
>  
>  typedef struct {
>      void (*notify)(void * opaque, uint16_t vector);
> +    void (*save_config)(void * opaque, QEMUFile *f);
> +    void (*save_queue)(void * opaque, int n, QEMUFile *f);
> +    int (*load_config)(void * opaque, QEMUFile *f);
> +    int (*load_queue)(void * opaque, int n, QEMUFile *f);
>  } VirtIOBindings;
>  
So, what's the overall effect on a virtual machine that gets migrated,
of a certain device not implementing one of those functions? Will it work?
Will it break?

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities
  2009-06-09 17:11     ` Glauber Costa
@ 2009-06-10  9:54       ` Michael S. Tsirkin
  -1 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-10  9:54 UTC (permalink / raw)
  To: Glauber Costa
  Cc: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm,
	Rusty Russell, virtualization, Christian Borntraeger, Blue Swirl,
	Anthony Liguori

On Tue, Jun 09, 2009 at 02:11:14PM -0300, Glauber Costa wrote:
> On Fri, Jun 05, 2009 at 01:23:15PM +0300, Michael S. Tsirkin wrote:
> > Add routines to manage PCI capability list. First user will be MSI-X.
> > 
> > Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> > ---
> >  hw/pci.c |   98 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++------
> >  hw/pci.h |   18 +++++++++++-
> >  2 files changed, 106 insertions(+), 10 deletions(-)
> > 
> > diff --git a/hw/pci.c b/hw/pci.c
> > index 361d741..ed011b5 100644
> > --- a/hw/pci.c
> > +++ b/hw/pci.c
> > @@ -130,12 +130,13 @@ void pci_device_save(PCIDevice *s, QEMUFile *f)
> >      int version = s->cap_present ? 3 : 2;
> >      int i;
> >  
> > -    qemu_put_be32(f, version); /* PCI device version */
> > +    /* PCI device version and capabilities */
> > +    qemu_put_be32(f, version);
> > +    if (version >= 3)
> > +        qemu_put_be32(f, s->cap_present);
> >      qemu_put_buffer(f, s->config, 256);
> >      for (i = 0; i < 4; i++)
> >          qemu_put_be32(f, s->irq_state[i]);
> > -    if (version >= 3)
> > -        qemu_put_be32(f, s->cap_present);
> >  }
> What is it doing here?
> You should just do it right in the first patch, instead of doing in
> one way there, and fixing here.
> 
> >  
> >  int pci_device_load(PCIDevice *s, QEMUFile *f)
> > @@ -146,12 +147,6 @@ int pci_device_load(PCIDevice *s, QEMUFile *f)
> >      version_id = qemu_get_be32(f);
> >      if (version_id > 3)
> >          return -EINVAL;
> > -    qemu_get_buffer(f, s->config, 256);
> > -    pci_update_mappings(s);
> > -
> > -    if (version_id >= 2)
> > -        for (i = 0; i < 4; i ++)
> > -            s->irq_state[i] = qemu_get_be32(f);
> >      if (version_id >= 3)
> >          s->cap_present = qemu_get_be32(f);
> >      else
> ditto.
> > @@ -160,6 +155,18 @@ int pci_device_load(PCIDevice *s, QEMUFile *f)
> >      if (s->cap_present & ~s->cap_supported)
> >          return -EINVAL;
> >  
> > +    qemu_get_buffer(f, s->config, 256);
> > +    pci_update_mappings(s);
> > +
> > +    if (version_id >= 2)
> > +        for (i = 0; i < 4; i ++)
> > +            s->irq_state[i] = qemu_get_be32(f);
> > +    /* Clear wmask and used bits for capabilities.
> > +       Must be restored separately, since capabilities can
> > +       be placed anywhere in config space. */
> > +    memset(s->used, 0, PCI_CONFIG_SPACE_SIZE);
> > +    for (i = PCI_CONFIG_HEADER_SIZE; i < PCI_CONFIG_SPACE_SIZE; ++i)
> > +        s->wmask[i] = 0xff;
> >      return 0;
> >  }
> Sorry, I don't exactly understand it. Although it can be anywhere, what do we actually
> lose by keeping it at the same place in config space?

We lose the ability to let user control the capabilities exposed
by the device.

And generally, I dislike arbitrary limitations. The PCI spec says the
capability can be anywhere, implementing a linked list of caps is simple
enough to not invent abritrary restrictions.

> >  
> > @@ -870,3 +877,76 @@ PCIDevice *pci_create_simple(PCIBus *bus, int devfn, const char *name)
> >  
> >      return (PCIDevice *)dev;
> >  }
> > +
> > +static int pci_find_space(PCIDevice *pdev, uint8_t size)
> > +{
> > +    int offset = PCI_CONFIG_HEADER_SIZE;
> > +    int i;
> > +    for (i = PCI_CONFIG_HEADER_SIZE; i < PCI_CONFIG_SPACE_SIZE; ++i)
> > +        if (pdev->used[i])
> > +            offset = i + 1;
> > +        else if (i - offset + 1 == size)
> > +            return offset;
> > +    return 0;
> > +}
> > +
> > +static uint8_t pci_find_capability_list(PCIDevice *pdev, uint8_t cap_id,
> > +                                        uint8_t *prev_p)
> > +{
> > +    uint8_t next, prev;
> > +
> > +    if (!(pdev->config[PCI_STATUS] & PCI_STATUS_CAP_LIST))
> > +        return 0;
> > +
> > +    for (prev = PCI_CAPABILITY_LIST; (next = pdev->config[prev]);
> > +         prev = next + PCI_CAP_LIST_NEXT)
> > +        if (pdev->config[next + PCI_CAP_LIST_ID] == cap_id)
> > +            break;
> > +
> > +    *prev_p = prev;
> > +    return next;
> > +}
> I'd prefer to do:
> 	if (prev_p)
> 		*prev_p = prev;
> so we don't have to always pass a prev_p pointer. You have yourself a user
> where you don't need it in this very patch.


Good idea.

> > +
> > +/* Reserve space and add capability to the linked list in pci config space */
> > +int pci_add_capability(PCIDevice *pdev, uint8_t cap_id, uint8_t size)
> > +{
> > +    uint8_t offset = pci_find_space(pdev, size);
> > +    uint8_t *config = pdev->config + offset;
> > +    if (!offset)
> > +        return -ENOSPC;
> > +    config[PCI_CAP_LIST_ID] = cap_id;
> > +    config[PCI_CAP_LIST_NEXT] = pdev->config[PCI_CAPABILITY_LIST];
> > +    pdev->config[PCI_CAPABILITY_LIST] = offset;
> > +    pdev->config[PCI_STATUS] |= PCI_STATUS_CAP_LIST;
> > +    memset(pdev->used + offset, 0xFF, size);
> > +    /* Make capability read-only by default */
> > +    memset(pdev->wmask + offset, 0, size);
> > +    return offset;
> > +}
> > +
> > +/* Unlink capability from the pci config space. */
> > +void pci_del_capability(PCIDevice *pdev, uint8_t cap_id, uint8_t size)
> > +{
> > +    uint8_t prev, offset = pci_find_capability_list(pdev, cap_id, &prev);
> > +    if (!offset)
> > +        return;
> > +    pdev->config[prev] = pdev->config[offset + PCI_CAP_LIST_NEXT];
> > +    /* Make capability writeable again */
> > +    memset(pdev->wmask + offset, 0xff, size);
> > +    memset(pdev->used + offset, 0, size);
> > +
> > +    if (!pdev->config[PCI_CAPABILITY_LIST])
> > +        pdev->config[PCI_STATUS] &= ~PCI_STATUS_CAP_LIST;
> > +}
> > +
> > +/* Reserve space for capability at a known offset (to call after load). */
> > +void pci_reserve_capability(PCIDevice *pdev, uint8_t offset, uint8_t size)
> > +{
> > +    memset(pdev->used + offset, 0xff, size);
> > +}
> > +
> > +uint8_t pci_find_capability(PCIDevice *pdev, uint8_t cap_id)
> > +{
> > +    uint8_t prev;
> > +    return pci_find_capability_list(pdev, cap_id, &prev);
> > +}
> > diff --git a/hw/pci.h b/hw/pci.h
> > index 6f0803f..4838c59 100644
> > --- a/hw/pci.h
> > +++ b/hw/pci.h
> > @@ -123,6 +123,10 @@ typedef struct PCIIORegion {
> >  #define PCI_MIN_GNT		0x3e	/* 8 bits */
> >  #define PCI_MAX_LAT		0x3f	/* 8 bits */
> >  
> > +/* Capability lists */
> > +#define PCI_CAP_LIST_ID		0	/* Capability ID */
> > +#define PCI_CAP_LIST_NEXT	1	/* Next capability in the list */
> > +
> >  #define PCI_REVISION            0x08    /* obsolete, use PCI_REVISION_ID */
> >  #define PCI_SUBVENDOR_ID        0x2c    /* obsolete, use PCI_SUBSYSTEM_VENDOR_ID */
> >  #define PCI_SUBDEVICE_ID        0x2e    /* obsolete, use PCI_SUBSYSTEM_ID */
> > @@ -130,7 +134,7 @@ typedef struct PCIIORegion {
> >  /* Bits in the PCI Status Register (PCI 2.3 spec) */
> >  #define PCI_STATUS_RESERVED1	0x007
> >  #define PCI_STATUS_INT_STATUS	0x008
> > -#define PCI_STATUS_CAPABILITIES	0x010
> > +#define PCI_STATUS_CAP_LIST	0x010
> >  #define PCI_STATUS_66MHZ	0x020
> >  #define PCI_STATUS_RESERVED2	0x040
> >  #define PCI_STATUS_FAST_BACK	0x080
> > @@ -160,6 +164,9 @@ struct PCIDevice {
> >      /* Used to implement R/W bytes */
> >      uint8_t wmask[PCI_CONFIG_SPACE_SIZE];
> >  
> > +    /* Used to allocate config space for capabilities. */
> > +    uint8_t used[PCI_CONFIG_SPACE_SIZE];
> > +
> >      /* the following fields are read only */
> >      PCIBus *bus;
> >      int devfn;
> > @@ -194,6 +201,15 @@ void pci_register_io_region(PCIDevice *pci_dev, int region_num,
> >                              uint32_t size, int type,
> >                              PCIMapIORegionFunc *map_func);
> >  
> > +int pci_add_capability(PCIDevice *pci_dev, uint8_t cap_id, uint8_t cap_size);
> > +
> > +void pci_del_capability(PCIDevice *pci_dev, uint8_t cap_id, uint8_t cap_size);
> > +
> > +void pci_reserve_capability(PCIDevice *pci_dev, uint8_t offset, uint8_t size);
> > +
> > +uint8_t pci_find_capability(PCIDevice *pci_dev, uint8_t cap_id);
> > +
> > +
> >  uint32_t pci_default_read_config(PCIDevice *d,
> >                                   uint32_t address, int len);
> >  void pci_default_write_config(PCIDevice *d,
> > -- 
> > 1.6.3.1.56.g79e1.dirty
> > 
> > 
> > 

-- 
MST

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities
@ 2009-06-10  9:54       ` Michael S. Tsirkin
  0 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-10  9:54 UTC (permalink / raw)
  To: Glauber Costa
  Cc: Carsten Otte, kvm, Rusty Russell, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook, Avi Kivity

On Tue, Jun 09, 2009 at 02:11:14PM -0300, Glauber Costa wrote:
> On Fri, Jun 05, 2009 at 01:23:15PM +0300, Michael S. Tsirkin wrote:
> > Add routines to manage PCI capability list. First user will be MSI-X.
> > 
> > Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> > ---
> >  hw/pci.c |   98 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++------
> >  hw/pci.h |   18 +++++++++++-
> >  2 files changed, 106 insertions(+), 10 deletions(-)
> > 
> > diff --git a/hw/pci.c b/hw/pci.c
> > index 361d741..ed011b5 100644
> > --- a/hw/pci.c
> > +++ b/hw/pci.c
> > @@ -130,12 +130,13 @@ void pci_device_save(PCIDevice *s, QEMUFile *f)
> >      int version = s->cap_present ? 3 : 2;
> >      int i;
> >  
> > -    qemu_put_be32(f, version); /* PCI device version */
> > +    /* PCI device version and capabilities */
> > +    qemu_put_be32(f, version);
> > +    if (version >= 3)
> > +        qemu_put_be32(f, s->cap_present);
> >      qemu_put_buffer(f, s->config, 256);
> >      for (i = 0; i < 4; i++)
> >          qemu_put_be32(f, s->irq_state[i]);
> > -    if (version >= 3)
> > -        qemu_put_be32(f, s->cap_present);
> >  }
> What is it doing here?
> You should just do it right in the first patch, instead of doing in
> one way there, and fixing here.
> 
> >  
> >  int pci_device_load(PCIDevice *s, QEMUFile *f)
> > @@ -146,12 +147,6 @@ int pci_device_load(PCIDevice *s, QEMUFile *f)
> >      version_id = qemu_get_be32(f);
> >      if (version_id > 3)
> >          return -EINVAL;
> > -    qemu_get_buffer(f, s->config, 256);
> > -    pci_update_mappings(s);
> > -
> > -    if (version_id >= 2)
> > -        for (i = 0; i < 4; i ++)
> > -            s->irq_state[i] = qemu_get_be32(f);
> >      if (version_id >= 3)
> >          s->cap_present = qemu_get_be32(f);
> >      else
> ditto.
> > @@ -160,6 +155,18 @@ int pci_device_load(PCIDevice *s, QEMUFile *f)
> >      if (s->cap_present & ~s->cap_supported)
> >          return -EINVAL;
> >  
> > +    qemu_get_buffer(f, s->config, 256);
> > +    pci_update_mappings(s);
> > +
> > +    if (version_id >= 2)
> > +        for (i = 0; i < 4; i ++)
> > +            s->irq_state[i] = qemu_get_be32(f);
> > +    /* Clear wmask and used bits for capabilities.
> > +       Must be restored separately, since capabilities can
> > +       be placed anywhere in config space. */
> > +    memset(s->used, 0, PCI_CONFIG_SPACE_SIZE);
> > +    for (i = PCI_CONFIG_HEADER_SIZE; i < PCI_CONFIG_SPACE_SIZE; ++i)
> > +        s->wmask[i] = 0xff;
> >      return 0;
> >  }
> Sorry, I don't exactly understand it. Although it can be anywhere, what do we actually
> lose by keeping it at the same place in config space?

We lose the ability to let user control the capabilities exposed
by the device.

And generally, I dislike arbitrary limitations. The PCI spec says the
capability can be anywhere, implementing a linked list of caps is simple
enough to not invent abritrary restrictions.

> >  
> > @@ -870,3 +877,76 @@ PCIDevice *pci_create_simple(PCIBus *bus, int devfn, const char *name)
> >  
> >      return (PCIDevice *)dev;
> >  }
> > +
> > +static int pci_find_space(PCIDevice *pdev, uint8_t size)
> > +{
> > +    int offset = PCI_CONFIG_HEADER_SIZE;
> > +    int i;
> > +    for (i = PCI_CONFIG_HEADER_SIZE; i < PCI_CONFIG_SPACE_SIZE; ++i)
> > +        if (pdev->used[i])
> > +            offset = i + 1;
> > +        else if (i - offset + 1 == size)
> > +            return offset;
> > +    return 0;
> > +}
> > +
> > +static uint8_t pci_find_capability_list(PCIDevice *pdev, uint8_t cap_id,
> > +                                        uint8_t *prev_p)
> > +{
> > +    uint8_t next, prev;
> > +
> > +    if (!(pdev->config[PCI_STATUS] & PCI_STATUS_CAP_LIST))
> > +        return 0;
> > +
> > +    for (prev = PCI_CAPABILITY_LIST; (next = pdev->config[prev]);
> > +         prev = next + PCI_CAP_LIST_NEXT)
> > +        if (pdev->config[next + PCI_CAP_LIST_ID] == cap_id)
> > +            break;
> > +
> > +    *prev_p = prev;
> > +    return next;
> > +}
> I'd prefer to do:
> 	if (prev_p)
> 		*prev_p = prev;
> so we don't have to always pass a prev_p pointer. You have yourself a user
> where you don't need it in this very patch.


Good idea.

> > +
> > +/* Reserve space and add capability to the linked list in pci config space */
> > +int pci_add_capability(PCIDevice *pdev, uint8_t cap_id, uint8_t size)
> > +{
> > +    uint8_t offset = pci_find_space(pdev, size);
> > +    uint8_t *config = pdev->config + offset;
> > +    if (!offset)
> > +        return -ENOSPC;
> > +    config[PCI_CAP_LIST_ID] = cap_id;
> > +    config[PCI_CAP_LIST_NEXT] = pdev->config[PCI_CAPABILITY_LIST];
> > +    pdev->config[PCI_CAPABILITY_LIST] = offset;
> > +    pdev->config[PCI_STATUS] |= PCI_STATUS_CAP_LIST;
> > +    memset(pdev->used + offset, 0xFF, size);
> > +    /* Make capability read-only by default */
> > +    memset(pdev->wmask + offset, 0, size);
> > +    return offset;
> > +}
> > +
> > +/* Unlink capability from the pci config space. */
> > +void pci_del_capability(PCIDevice *pdev, uint8_t cap_id, uint8_t size)
> > +{
> > +    uint8_t prev, offset = pci_find_capability_list(pdev, cap_id, &prev);
> > +    if (!offset)
> > +        return;
> > +    pdev->config[prev] = pdev->config[offset + PCI_CAP_LIST_NEXT];
> > +    /* Make capability writeable again */
> > +    memset(pdev->wmask + offset, 0xff, size);
> > +    memset(pdev->used + offset, 0, size);
> > +
> > +    if (!pdev->config[PCI_CAPABILITY_LIST])
> > +        pdev->config[PCI_STATUS] &= ~PCI_STATUS_CAP_LIST;
> > +}
> > +
> > +/* Reserve space for capability at a known offset (to call after load). */
> > +void pci_reserve_capability(PCIDevice *pdev, uint8_t offset, uint8_t size)
> > +{
> > +    memset(pdev->used + offset, 0xff, size);
> > +}
> > +
> > +uint8_t pci_find_capability(PCIDevice *pdev, uint8_t cap_id)
> > +{
> > +    uint8_t prev;
> > +    return pci_find_capability_list(pdev, cap_id, &prev);
> > +}
> > diff --git a/hw/pci.h b/hw/pci.h
> > index 6f0803f..4838c59 100644
> > --- a/hw/pci.h
> > +++ b/hw/pci.h
> > @@ -123,6 +123,10 @@ typedef struct PCIIORegion {
> >  #define PCI_MIN_GNT		0x3e	/* 8 bits */
> >  #define PCI_MAX_LAT		0x3f	/* 8 bits */
> >  
> > +/* Capability lists */
> > +#define PCI_CAP_LIST_ID		0	/* Capability ID */
> > +#define PCI_CAP_LIST_NEXT	1	/* Next capability in the list */
> > +
> >  #define PCI_REVISION            0x08    /* obsolete, use PCI_REVISION_ID */
> >  #define PCI_SUBVENDOR_ID        0x2c    /* obsolete, use PCI_SUBSYSTEM_VENDOR_ID */
> >  #define PCI_SUBDEVICE_ID        0x2e    /* obsolete, use PCI_SUBSYSTEM_ID */
> > @@ -130,7 +134,7 @@ typedef struct PCIIORegion {
> >  /* Bits in the PCI Status Register (PCI 2.3 spec) */
> >  #define PCI_STATUS_RESERVED1	0x007
> >  #define PCI_STATUS_INT_STATUS	0x008
> > -#define PCI_STATUS_CAPABILITIES	0x010
> > +#define PCI_STATUS_CAP_LIST	0x010
> >  #define PCI_STATUS_66MHZ	0x020
> >  #define PCI_STATUS_RESERVED2	0x040
> >  #define PCI_STATUS_FAST_BACK	0x080
> > @@ -160,6 +164,9 @@ struct PCIDevice {
> >      /* Used to implement R/W bytes */
> >      uint8_t wmask[PCI_CONFIG_SPACE_SIZE];
> >  
> > +    /* Used to allocate config space for capabilities. */
> > +    uint8_t used[PCI_CONFIG_SPACE_SIZE];
> > +
> >      /* the following fields are read only */
> >      PCIBus *bus;
> >      int devfn;
> > @@ -194,6 +201,15 @@ void pci_register_io_region(PCIDevice *pci_dev, int region_num,
> >                              uint32_t size, int type,
> >                              PCIMapIORegionFunc *map_func);
> >  
> > +int pci_add_capability(PCIDevice *pci_dev, uint8_t cap_id, uint8_t cap_size);
> > +
> > +void pci_del_capability(PCIDevice *pci_dev, uint8_t cap_id, uint8_t cap_size);
> > +
> > +void pci_reserve_capability(PCIDevice *pci_dev, uint8_t offset, uint8_t size);
> > +
> > +uint8_t pci_find_capability(PCIDevice *pci_dev, uint8_t cap_id);
> > +
> > +
> >  uint32_t pci_default_read_config(PCIDevice *d,
> >                                   uint32_t address, int len);
> >  void pci_default_write_config(PCIDevice *d,
> > -- 
> > 1.6.3.1.56.g79e1.dirty
> > 
> > 
> > 

-- 
MST

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities
  2009-06-09 17:11     ` Glauber Costa
  (?)
@ 2009-06-10  9:54     ` Michael S. Tsirkin
  -1 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-10  9:54 UTC (permalink / raw)
  To: Glauber Costa
  Cc: Carsten Otte, kvm, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Paul Brook, Anthony Liguori, Avi Kivity

On Tue, Jun 09, 2009 at 02:11:14PM -0300, Glauber Costa wrote:
> On Fri, Jun 05, 2009 at 01:23:15PM +0300, Michael S. Tsirkin wrote:
> > Add routines to manage PCI capability list. First user will be MSI-X.
> > 
> > Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> > ---
> >  hw/pci.c |   98 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++------
> >  hw/pci.h |   18 +++++++++++-
> >  2 files changed, 106 insertions(+), 10 deletions(-)
> > 
> > diff --git a/hw/pci.c b/hw/pci.c
> > index 361d741..ed011b5 100644
> > --- a/hw/pci.c
> > +++ b/hw/pci.c
> > @@ -130,12 +130,13 @@ void pci_device_save(PCIDevice *s, QEMUFile *f)
> >      int version = s->cap_present ? 3 : 2;
> >      int i;
> >  
> > -    qemu_put_be32(f, version); /* PCI device version */
> > +    /* PCI device version and capabilities */
> > +    qemu_put_be32(f, version);
> > +    if (version >= 3)
> > +        qemu_put_be32(f, s->cap_present);
> >      qemu_put_buffer(f, s->config, 256);
> >      for (i = 0; i < 4; i++)
> >          qemu_put_be32(f, s->irq_state[i]);
> > -    if (version >= 3)
> > -        qemu_put_be32(f, s->cap_present);
> >  }
> What is it doing here?
> You should just do it right in the first patch, instead of doing in
> one way there, and fixing here.
> 
> >  
> >  int pci_device_load(PCIDevice *s, QEMUFile *f)
> > @@ -146,12 +147,6 @@ int pci_device_load(PCIDevice *s, QEMUFile *f)
> >      version_id = qemu_get_be32(f);
> >      if (version_id > 3)
> >          return -EINVAL;
> > -    qemu_get_buffer(f, s->config, 256);
> > -    pci_update_mappings(s);
> > -
> > -    if (version_id >= 2)
> > -        for (i = 0; i < 4; i ++)
> > -            s->irq_state[i] = qemu_get_be32(f);
> >      if (version_id >= 3)
> >          s->cap_present = qemu_get_be32(f);
> >      else
> ditto.
> > @@ -160,6 +155,18 @@ int pci_device_load(PCIDevice *s, QEMUFile *f)
> >      if (s->cap_present & ~s->cap_supported)
> >          return -EINVAL;
> >  
> > +    qemu_get_buffer(f, s->config, 256);
> > +    pci_update_mappings(s);
> > +
> > +    if (version_id >= 2)
> > +        for (i = 0; i < 4; i ++)
> > +            s->irq_state[i] = qemu_get_be32(f);
> > +    /* Clear wmask and used bits for capabilities.
> > +       Must be restored separately, since capabilities can
> > +       be placed anywhere in config space. */
> > +    memset(s->used, 0, PCI_CONFIG_SPACE_SIZE);
> > +    for (i = PCI_CONFIG_HEADER_SIZE; i < PCI_CONFIG_SPACE_SIZE; ++i)
> > +        s->wmask[i] = 0xff;
> >      return 0;
> >  }
> Sorry, I don't exactly understand it. Although it can be anywhere, what do we actually
> lose by keeping it at the same place in config space?

We lose the ability to let user control the capabilities exposed
by the device.

And generally, I dislike arbitrary limitations. The PCI spec says the
capability can be anywhere, implementing a linked list of caps is simple
enough to not invent abritrary restrictions.

> >  
> > @@ -870,3 +877,76 @@ PCIDevice *pci_create_simple(PCIBus *bus, int devfn, const char *name)
> >  
> >      return (PCIDevice *)dev;
> >  }
> > +
> > +static int pci_find_space(PCIDevice *pdev, uint8_t size)
> > +{
> > +    int offset = PCI_CONFIG_HEADER_SIZE;
> > +    int i;
> > +    for (i = PCI_CONFIG_HEADER_SIZE; i < PCI_CONFIG_SPACE_SIZE; ++i)
> > +        if (pdev->used[i])
> > +            offset = i + 1;
> > +        else if (i - offset + 1 == size)
> > +            return offset;
> > +    return 0;
> > +}
> > +
> > +static uint8_t pci_find_capability_list(PCIDevice *pdev, uint8_t cap_id,
> > +                                        uint8_t *prev_p)
> > +{
> > +    uint8_t next, prev;
> > +
> > +    if (!(pdev->config[PCI_STATUS] & PCI_STATUS_CAP_LIST))
> > +        return 0;
> > +
> > +    for (prev = PCI_CAPABILITY_LIST; (next = pdev->config[prev]);
> > +         prev = next + PCI_CAP_LIST_NEXT)
> > +        if (pdev->config[next + PCI_CAP_LIST_ID] == cap_id)
> > +            break;
> > +
> > +    *prev_p = prev;
> > +    return next;
> > +}
> I'd prefer to do:
> 	if (prev_p)
> 		*prev_p = prev;
> so we don't have to always pass a prev_p pointer. You have yourself a user
> where you don't need it in this very patch.


Good idea.

> > +
> > +/* Reserve space and add capability to the linked list in pci config space */
> > +int pci_add_capability(PCIDevice *pdev, uint8_t cap_id, uint8_t size)
> > +{
> > +    uint8_t offset = pci_find_space(pdev, size);
> > +    uint8_t *config = pdev->config + offset;
> > +    if (!offset)
> > +        return -ENOSPC;
> > +    config[PCI_CAP_LIST_ID] = cap_id;
> > +    config[PCI_CAP_LIST_NEXT] = pdev->config[PCI_CAPABILITY_LIST];
> > +    pdev->config[PCI_CAPABILITY_LIST] = offset;
> > +    pdev->config[PCI_STATUS] |= PCI_STATUS_CAP_LIST;
> > +    memset(pdev->used + offset, 0xFF, size);
> > +    /* Make capability read-only by default */
> > +    memset(pdev->wmask + offset, 0, size);
> > +    return offset;
> > +}
> > +
> > +/* Unlink capability from the pci config space. */
> > +void pci_del_capability(PCIDevice *pdev, uint8_t cap_id, uint8_t size)
> > +{
> > +    uint8_t prev, offset = pci_find_capability_list(pdev, cap_id, &prev);
> > +    if (!offset)
> > +        return;
> > +    pdev->config[prev] = pdev->config[offset + PCI_CAP_LIST_NEXT];
> > +    /* Make capability writeable again */
> > +    memset(pdev->wmask + offset, 0xff, size);
> > +    memset(pdev->used + offset, 0, size);
> > +
> > +    if (!pdev->config[PCI_CAPABILITY_LIST])
> > +        pdev->config[PCI_STATUS] &= ~PCI_STATUS_CAP_LIST;
> > +}
> > +
> > +/* Reserve space for capability at a known offset (to call after load). */
> > +void pci_reserve_capability(PCIDevice *pdev, uint8_t offset, uint8_t size)
> > +{
> > +    memset(pdev->used + offset, 0xff, size);
> > +}
> > +
> > +uint8_t pci_find_capability(PCIDevice *pdev, uint8_t cap_id)
> > +{
> > +    uint8_t prev;
> > +    return pci_find_capability_list(pdev, cap_id, &prev);
> > +}
> > diff --git a/hw/pci.h b/hw/pci.h
> > index 6f0803f..4838c59 100644
> > --- a/hw/pci.h
> > +++ b/hw/pci.h
> > @@ -123,6 +123,10 @@ typedef struct PCIIORegion {
> >  #define PCI_MIN_GNT		0x3e	/* 8 bits */
> >  #define PCI_MAX_LAT		0x3f	/* 8 bits */
> >  
> > +/* Capability lists */
> > +#define PCI_CAP_LIST_ID		0	/* Capability ID */
> > +#define PCI_CAP_LIST_NEXT	1	/* Next capability in the list */
> > +
> >  #define PCI_REVISION            0x08    /* obsolete, use PCI_REVISION_ID */
> >  #define PCI_SUBVENDOR_ID        0x2c    /* obsolete, use PCI_SUBSYSTEM_VENDOR_ID */
> >  #define PCI_SUBDEVICE_ID        0x2e    /* obsolete, use PCI_SUBSYSTEM_ID */
> > @@ -130,7 +134,7 @@ typedef struct PCIIORegion {
> >  /* Bits in the PCI Status Register (PCI 2.3 spec) */
> >  #define PCI_STATUS_RESERVED1	0x007
> >  #define PCI_STATUS_INT_STATUS	0x008
> > -#define PCI_STATUS_CAPABILITIES	0x010
> > +#define PCI_STATUS_CAP_LIST	0x010
> >  #define PCI_STATUS_66MHZ	0x020
> >  #define PCI_STATUS_RESERVED2	0x040
> >  #define PCI_STATUS_FAST_BACK	0x080
> > @@ -160,6 +164,9 @@ struct PCIDevice {
> >      /* Used to implement R/W bytes */
> >      uint8_t wmask[PCI_CONFIG_SPACE_SIZE];
> >  
> > +    /* Used to allocate config space for capabilities. */
> > +    uint8_t used[PCI_CONFIG_SPACE_SIZE];
> > +
> >      /* the following fields are read only */
> >      PCIBus *bus;
> >      int devfn;
> > @@ -194,6 +201,15 @@ void pci_register_io_region(PCIDevice *pci_dev, int region_num,
> >                              uint32_t size, int type,
> >                              PCIMapIORegionFunc *map_func);
> >  
> > +int pci_add_capability(PCIDevice *pci_dev, uint8_t cap_id, uint8_t cap_size);
> > +
> > +void pci_del_capability(PCIDevice *pci_dev, uint8_t cap_id, uint8_t cap_size);
> > +
> > +void pci_reserve_capability(PCIDevice *pci_dev, uint8_t offset, uint8_t size);
> > +
> > +uint8_t pci_find_capability(PCIDevice *pci_dev, uint8_t cap_id);
> > +
> > +
> >  uint32_t pci_default_read_config(PCIDevice *d,
> >                                   uint32_t address, int len);
> >  void pci_default_write_config(PCIDevice *d,
> > -- 
> > 1.6.3.1.56.g79e1.dirty
> > 
> > 
> > 

-- 
MST

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: [Qemu-devel] [PATCHv3 05/13] qemu: MSI-X support functions
  2009-06-09 17:26     ` Glauber Costa
@ 2009-06-10  9:58       ` Michael S. Tsirkin
  -1 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-10  9:58 UTC (permalink / raw)
  To: Glauber Costa
  Cc: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm,
	Rusty Russell, virtualization, Christian Borntraeger, Blue Swirl,
	Anthony Liguori

On Tue, Jun 09, 2009 at 02:26:27PM -0300, Glauber Costa wrote:
> On Fri, Jun 05, 2009 at 01:23:31PM +0300, Michael S. Tsirkin wrote:
> > Add functions implementing MSI-X support. First user will be virtio-pci.
> > Note that platform must set a flag to declare MSI supported.
> > For PC this will be set by APIC.
> > 
> > Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> > ---
> >  Makefile.target |    2 +-
> >  hw/msix.c       |  423 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
> >  hw/msix.h       |   35 +++++
> >  hw/pci.h        |   20 +++
> >  4 files changed, 479 insertions(+), 1 deletions(-)
> >  create mode 100644 hw/msix.c
> >  create mode 100644 hw/msix.h
> > 
> > diff --git a/Makefile.target b/Makefile.target
> > index 664a1e3..87b2859 100644
> > --- a/Makefile.target
> > +++ b/Makefile.target
> > @@ -486,7 +486,7 @@ endif #CONFIG_BSD_USER
> >  ifndef CONFIG_USER_ONLY
> >  
> >  OBJS=vl.o osdep.o monitor.o pci.o loader.o isa_mmio.o machine.o \
> > -     gdbstub.o gdbstub-xml.o
> > +     gdbstub.o gdbstub-xml.o msix.o
> >  # virtio has to be here due to weird dependency between PCI and virtio-net.
> >  # need to fix this properly
> >  OBJS+=virtio-blk.o virtio-balloon.o virtio-net.o virtio-console.o
> > diff --git a/hw/msix.c b/hw/msix.c
> > new file mode 100644
> > index 0000000..1b5aec8
> > --- /dev/null
> > +++ b/hw/msix.c
> > @@ -0,0 +1,423 @@
> > +/*
> > + * MSI-X device support
> > + *
> > + * This module includes support for MSI-X in pci devices.
> > + *
> > + * Author: Michael S. Tsirkin <mst@redhat.com>
> > + *
> > + *  Copyright (c) 2009, Red Hat Inc, Michael S. Tsirkin (mst@redhat.com)
> > + *
> > + * This work is licensed under the terms of the GNU GPL, version 2.  See
> > + * the COPYING file in the top-level directory.
> > + */
> > +
> > +#include "hw.h"
> > +#include "msix.h"
> > +#include "pci.h"
> > +
> > +/* Declaration from linux/pci_regs.h */
> > +#define  PCI_CAP_ID_MSIX 0x11 /* MSI-X */
> > +#define  PCI_MSIX_FLAGS 2     /* Table at lower 11 bits */
> > +#define  PCI_MSIX_FLAGS_QSIZE	0x7FF
> > +#define  PCI_MSIX_FLAGS_ENABLE	(1 << 15)
> > +#define  PCI_MSIX_FLAGS_BIRMASK	(7 << 0)
> > +
> > +/* MSI-X capability structure */
> > +#define MSIX_TABLE_OFFSET 4
> > +#define MSIX_PBA_OFFSET 8
> > +#define MSIX_CAP_LENGTH 12
> > +
> > +/* MSI enable bit is in byte 1 in FLAGS register */
> > +#define MSIX_ENABLE_OFFSET (PCI_MSIX_FLAGS + 1)
> > +#define MSIX_ENABLE_MASK (PCI_MSIX_FLAGS_ENABLE >> 8)
> > +
> > +/* MSI-X table format */
> > +#define MSIX_MSG_ADDR 0
> > +#define MSIX_MSG_UPPER_ADDR 4
> > +#define MSIX_MSG_DATA 8
> > +#define MSIX_VECTOR_CTRL 12
> > +#define MSIX_ENTRY_SIZE 16
> > +#define MSIX_VECTOR_MASK 0x1
> > +
> > +/* How much space does an MSIX table need. */
> > +/* The spec requires giving the table structure
> > + * a 4K aligned region all by itself. Align it to
> > + * target pages so that drivers can do passthrough
> > + * on the rest of the region. */
> > +#define MSIX_PAGE_SIZE TARGET_PAGE_ALIGN(0x1000)
> > +/* Reserve second half of the page for pending bits */
> > +#define MSIX_PAGE_PENDING (MSIX_PAGE_SIZE / 2)
> > +#define MSIX_MAX_ENTRIES 32
> > +
> > +
> > +#ifdef MSIX_DEBUG
> > +#define DEBUG(fmt, ...)                                       \
> > +    do {                                                      \
> > +      fprintf(stderr, "%s: " fmt, __func__ , __VA_ARGS__);    \
> > +    } while (0)
> > +#else
> > +#define DEBUG(fmt, ...) do { } while(0)
> > +#endif
> > +
> > +/* Flag to globally disable MSI-X support */
> > +int msix_disable;
> > +
> > +/* Flag for interrupt controller to declare MSI-X support */
> > +int msix_supported;
> maybe better to make it static,

It's not read-only either.

> and provide msi_state() returning -1 for disabled,
> 0 for supported, etc... 

Matter of taste, I prefer a set of binary flags rather than yet another enum:
msix_disable is controlled by user, msix_supported is a safety valve
for non-PC platforms. It's easier to keep them separate IMO.

> > +
> > +/* Add MSI-X capability to the config space for the device. */
> > +/* Given a bar and its size, add MSI-X table on top of it
> > + * and fill MSI-X capability in the config space.
> > + * Original bar size must be a power of 2 or 0.
> > + * New bar size is returned. */
> > +static int msix_add_config(struct PCIDevice *pdev, unsigned short nentries,
> > +                           unsigned bar_nr, unsigned bar_size)
> > +{
> > +    int config_offset;
> > +    uint8_t *config;
> > +    uint32_t new_size;
> > +
> > +    if (nentries < 1 || nentries > PCI_MSIX_FLAGS_QSIZE + 1)
> > +        return -EINVAL;
> > +    if (bar_size > 0x80000000)
> > +        return -ENOSPC;
> > +
> > +    /* Add space for MSI-X structures */
> > +    if (!bar_size)
> > +        new_size = MSIX_PAGE_SIZE;
> > +    else if (bar_size < MSIX_PAGE_SIZE) {
> > +        bar_size = MSIX_PAGE_SIZE;
> > +        new_size = MSIX_PAGE_SIZE * 2;
> > +    } else
> > +        new_size = bar_size * 2;
> > +
> > +    pdev->msix_bar_size = new_size;
> > +    config_offset = pci_add_capability(pdev, PCI_CAP_ID_MSIX, MSIX_CAP_LENGTH);
> > +    if (config_offset < 0)
> > +        return config_offset;
> > +    config = pdev->config + config_offset;
> > +
> > +    pci_set_word(config + PCI_MSIX_FLAGS, nentries - 1);
> > +    /* Table on top of BAR */
> > +    pci_set_long(config + MSIX_TABLE_OFFSET, bar_size | bar_nr);
> > +    /* Pending bits on top of that */
> > +    pci_set_long(config + MSIX_PBA_OFFSET, (bar_size + MSIX_PAGE_PENDING) |
> > +                 bar_nr);
> > +    pdev->msix_cap = config_offset;
> > +    /* Make flags bit writeable. */
> > +    pdev->wmask[config_offset + MSIX_ENABLE_OFFSET] |= MSIX_ENABLE_MASK;
> > +    return 0;
> > +}
> > +
> 
> > +
> > +/* Initialize the MSI-X structures. Note: if MSI-X is supported, BAR size is
> > + * modified, it should be retrieved with msix_bar_size. */
> > +int msix_init(struct PCIDevice *dev, unsigned short nentries,
> > +              unsigned bar_nr, unsigned bar_size)
> > +{
> > +    int ret = -ENOMEM;
> > +    /* Nothing to do if MSI is not supported by interrupt controller */
> > +    if (!msix_supported)
> > +        return -ENOTTY;
> > +
> > +    if (nentries > MSIX_MAX_ENTRIES)
> > +        return -EINVAL;
> > +
> > +    dev->msix_entry_used = qemu_mallocz(MSIX_MAX_ENTRIES *
> > +                                        sizeof *dev->msix_entry_used);
> > +    if (!dev->msix_entry_used)
> > +        goto err_used;
> no need to check. oom_checker will kill qemu if it fails.
> 
> > +
> > +    dev->msix_table_page = qemu_mallocz(MSIX_PAGE_SIZE);
> > +    if (!dev->msix_table_page)
> > +        goto err_page;
> ditto.
> 

Good point.

-- 
MST

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: [Qemu-devel] [PATCHv3 05/13] qemu: MSI-X support functions
@ 2009-06-10  9:58       ` Michael S. Tsirkin
  0 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-10  9:58 UTC (permalink / raw)
  To: Glauber Costa
  Cc: Carsten Otte, kvm, Rusty Russell, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook, Avi Kivity

On Tue, Jun 09, 2009 at 02:26:27PM -0300, Glauber Costa wrote:
> On Fri, Jun 05, 2009 at 01:23:31PM +0300, Michael S. Tsirkin wrote:
> > Add functions implementing MSI-X support. First user will be virtio-pci.
> > Note that platform must set a flag to declare MSI supported.
> > For PC this will be set by APIC.
> > 
> > Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> > ---
> >  Makefile.target |    2 +-
> >  hw/msix.c       |  423 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
> >  hw/msix.h       |   35 +++++
> >  hw/pci.h        |   20 +++
> >  4 files changed, 479 insertions(+), 1 deletions(-)
> >  create mode 100644 hw/msix.c
> >  create mode 100644 hw/msix.h
> > 
> > diff --git a/Makefile.target b/Makefile.target
> > index 664a1e3..87b2859 100644
> > --- a/Makefile.target
> > +++ b/Makefile.target
> > @@ -486,7 +486,7 @@ endif #CONFIG_BSD_USER
> >  ifndef CONFIG_USER_ONLY
> >  
> >  OBJS=vl.o osdep.o monitor.o pci.o loader.o isa_mmio.o machine.o \
> > -     gdbstub.o gdbstub-xml.o
> > +     gdbstub.o gdbstub-xml.o msix.o
> >  # virtio has to be here due to weird dependency between PCI and virtio-net.
> >  # need to fix this properly
> >  OBJS+=virtio-blk.o virtio-balloon.o virtio-net.o virtio-console.o
> > diff --git a/hw/msix.c b/hw/msix.c
> > new file mode 100644
> > index 0000000..1b5aec8
> > --- /dev/null
> > +++ b/hw/msix.c
> > @@ -0,0 +1,423 @@
> > +/*
> > + * MSI-X device support
> > + *
> > + * This module includes support for MSI-X in pci devices.
> > + *
> > + * Author: Michael S. Tsirkin <mst@redhat.com>
> > + *
> > + *  Copyright (c) 2009, Red Hat Inc, Michael S. Tsirkin (mst@redhat.com)
> > + *
> > + * This work is licensed under the terms of the GNU GPL, version 2.  See
> > + * the COPYING file in the top-level directory.
> > + */
> > +
> > +#include "hw.h"
> > +#include "msix.h"
> > +#include "pci.h"
> > +
> > +/* Declaration from linux/pci_regs.h */
> > +#define  PCI_CAP_ID_MSIX 0x11 /* MSI-X */
> > +#define  PCI_MSIX_FLAGS 2     /* Table at lower 11 bits */
> > +#define  PCI_MSIX_FLAGS_QSIZE	0x7FF
> > +#define  PCI_MSIX_FLAGS_ENABLE	(1 << 15)
> > +#define  PCI_MSIX_FLAGS_BIRMASK	(7 << 0)
> > +
> > +/* MSI-X capability structure */
> > +#define MSIX_TABLE_OFFSET 4
> > +#define MSIX_PBA_OFFSET 8
> > +#define MSIX_CAP_LENGTH 12
> > +
> > +/* MSI enable bit is in byte 1 in FLAGS register */
> > +#define MSIX_ENABLE_OFFSET (PCI_MSIX_FLAGS + 1)
> > +#define MSIX_ENABLE_MASK (PCI_MSIX_FLAGS_ENABLE >> 8)
> > +
> > +/* MSI-X table format */
> > +#define MSIX_MSG_ADDR 0
> > +#define MSIX_MSG_UPPER_ADDR 4
> > +#define MSIX_MSG_DATA 8
> > +#define MSIX_VECTOR_CTRL 12
> > +#define MSIX_ENTRY_SIZE 16
> > +#define MSIX_VECTOR_MASK 0x1
> > +
> > +/* How much space does an MSIX table need. */
> > +/* The spec requires giving the table structure
> > + * a 4K aligned region all by itself. Align it to
> > + * target pages so that drivers can do passthrough
> > + * on the rest of the region. */
> > +#define MSIX_PAGE_SIZE TARGET_PAGE_ALIGN(0x1000)
> > +/* Reserve second half of the page for pending bits */
> > +#define MSIX_PAGE_PENDING (MSIX_PAGE_SIZE / 2)
> > +#define MSIX_MAX_ENTRIES 32
> > +
> > +
> > +#ifdef MSIX_DEBUG
> > +#define DEBUG(fmt, ...)                                       \
> > +    do {                                                      \
> > +      fprintf(stderr, "%s: " fmt, __func__ , __VA_ARGS__);    \
> > +    } while (0)
> > +#else
> > +#define DEBUG(fmt, ...) do { } while(0)
> > +#endif
> > +
> > +/* Flag to globally disable MSI-X support */
> > +int msix_disable;
> > +
> > +/* Flag for interrupt controller to declare MSI-X support */
> > +int msix_supported;
> maybe better to make it static,

It's not read-only either.

> and provide msi_state() returning -1 for disabled,
> 0 for supported, etc... 

Matter of taste, I prefer a set of binary flags rather than yet another enum:
msix_disable is controlled by user, msix_supported is a safety valve
for non-PC platforms. It's easier to keep them separate IMO.

> > +
> > +/* Add MSI-X capability to the config space for the device. */
> > +/* Given a bar and its size, add MSI-X table on top of it
> > + * and fill MSI-X capability in the config space.
> > + * Original bar size must be a power of 2 or 0.
> > + * New bar size is returned. */
> > +static int msix_add_config(struct PCIDevice *pdev, unsigned short nentries,
> > +                           unsigned bar_nr, unsigned bar_size)
> > +{
> > +    int config_offset;
> > +    uint8_t *config;
> > +    uint32_t new_size;
> > +
> > +    if (nentries < 1 || nentries > PCI_MSIX_FLAGS_QSIZE + 1)
> > +        return -EINVAL;
> > +    if (bar_size > 0x80000000)
> > +        return -ENOSPC;
> > +
> > +    /* Add space for MSI-X structures */
> > +    if (!bar_size)
> > +        new_size = MSIX_PAGE_SIZE;
> > +    else if (bar_size < MSIX_PAGE_SIZE) {
> > +        bar_size = MSIX_PAGE_SIZE;
> > +        new_size = MSIX_PAGE_SIZE * 2;
> > +    } else
> > +        new_size = bar_size * 2;
> > +
> > +    pdev->msix_bar_size = new_size;
> > +    config_offset = pci_add_capability(pdev, PCI_CAP_ID_MSIX, MSIX_CAP_LENGTH);
> > +    if (config_offset < 0)
> > +        return config_offset;
> > +    config = pdev->config + config_offset;
> > +
> > +    pci_set_word(config + PCI_MSIX_FLAGS, nentries - 1);
> > +    /* Table on top of BAR */
> > +    pci_set_long(config + MSIX_TABLE_OFFSET, bar_size | bar_nr);
> > +    /* Pending bits on top of that */
> > +    pci_set_long(config + MSIX_PBA_OFFSET, (bar_size + MSIX_PAGE_PENDING) |
> > +                 bar_nr);
> > +    pdev->msix_cap = config_offset;
> > +    /* Make flags bit writeable. */
> > +    pdev->wmask[config_offset + MSIX_ENABLE_OFFSET] |= MSIX_ENABLE_MASK;
> > +    return 0;
> > +}
> > +
> 
> > +
> > +/* Initialize the MSI-X structures. Note: if MSI-X is supported, BAR size is
> > + * modified, it should be retrieved with msix_bar_size. */
> > +int msix_init(struct PCIDevice *dev, unsigned short nentries,
> > +              unsigned bar_nr, unsigned bar_size)
> > +{
> > +    int ret = -ENOMEM;
> > +    /* Nothing to do if MSI is not supported by interrupt controller */
> > +    if (!msix_supported)
> > +        return -ENOTTY;
> > +
> > +    if (nentries > MSIX_MAX_ENTRIES)
> > +        return -EINVAL;
> > +
> > +    dev->msix_entry_used = qemu_mallocz(MSIX_MAX_ENTRIES *
> > +                                        sizeof *dev->msix_entry_used);
> > +    if (!dev->msix_entry_used)
> > +        goto err_used;
> no need to check. oom_checker will kill qemu if it fails.
> 
> > +
> > +    dev->msix_table_page = qemu_mallocz(MSIX_PAGE_SIZE);
> > +    if (!dev->msix_table_page)
> > +        goto err_page;
> ditto.
> 

Good point.

-- 
MST

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: [Qemu-devel] [PATCHv3 05/13] qemu: MSI-X support functions
  2009-06-09 17:26     ` Glauber Costa
  (?)
@ 2009-06-10  9:58     ` Michael S. Tsirkin
  -1 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-10  9:58 UTC (permalink / raw)
  To: Glauber Costa
  Cc: Carsten Otte, kvm, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Paul Brook, Anthony Liguori, Avi Kivity

On Tue, Jun 09, 2009 at 02:26:27PM -0300, Glauber Costa wrote:
> On Fri, Jun 05, 2009 at 01:23:31PM +0300, Michael S. Tsirkin wrote:
> > Add functions implementing MSI-X support. First user will be virtio-pci.
> > Note that platform must set a flag to declare MSI supported.
> > For PC this will be set by APIC.
> > 
> > Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> > ---
> >  Makefile.target |    2 +-
> >  hw/msix.c       |  423 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
> >  hw/msix.h       |   35 +++++
> >  hw/pci.h        |   20 +++
> >  4 files changed, 479 insertions(+), 1 deletions(-)
> >  create mode 100644 hw/msix.c
> >  create mode 100644 hw/msix.h
> > 
> > diff --git a/Makefile.target b/Makefile.target
> > index 664a1e3..87b2859 100644
> > --- a/Makefile.target
> > +++ b/Makefile.target
> > @@ -486,7 +486,7 @@ endif #CONFIG_BSD_USER
> >  ifndef CONFIG_USER_ONLY
> >  
> >  OBJS=vl.o osdep.o monitor.o pci.o loader.o isa_mmio.o machine.o \
> > -     gdbstub.o gdbstub-xml.o
> > +     gdbstub.o gdbstub-xml.o msix.o
> >  # virtio has to be here due to weird dependency between PCI and virtio-net.
> >  # need to fix this properly
> >  OBJS+=virtio-blk.o virtio-balloon.o virtio-net.o virtio-console.o
> > diff --git a/hw/msix.c b/hw/msix.c
> > new file mode 100644
> > index 0000000..1b5aec8
> > --- /dev/null
> > +++ b/hw/msix.c
> > @@ -0,0 +1,423 @@
> > +/*
> > + * MSI-X device support
> > + *
> > + * This module includes support for MSI-X in pci devices.
> > + *
> > + * Author: Michael S. Tsirkin <mst@redhat.com>
> > + *
> > + *  Copyright (c) 2009, Red Hat Inc, Michael S. Tsirkin (mst@redhat.com)
> > + *
> > + * This work is licensed under the terms of the GNU GPL, version 2.  See
> > + * the COPYING file in the top-level directory.
> > + */
> > +
> > +#include "hw.h"
> > +#include "msix.h"
> > +#include "pci.h"
> > +
> > +/* Declaration from linux/pci_regs.h */
> > +#define  PCI_CAP_ID_MSIX 0x11 /* MSI-X */
> > +#define  PCI_MSIX_FLAGS 2     /* Table at lower 11 bits */
> > +#define  PCI_MSIX_FLAGS_QSIZE	0x7FF
> > +#define  PCI_MSIX_FLAGS_ENABLE	(1 << 15)
> > +#define  PCI_MSIX_FLAGS_BIRMASK	(7 << 0)
> > +
> > +/* MSI-X capability structure */
> > +#define MSIX_TABLE_OFFSET 4
> > +#define MSIX_PBA_OFFSET 8
> > +#define MSIX_CAP_LENGTH 12
> > +
> > +/* MSI enable bit is in byte 1 in FLAGS register */
> > +#define MSIX_ENABLE_OFFSET (PCI_MSIX_FLAGS + 1)
> > +#define MSIX_ENABLE_MASK (PCI_MSIX_FLAGS_ENABLE >> 8)
> > +
> > +/* MSI-X table format */
> > +#define MSIX_MSG_ADDR 0
> > +#define MSIX_MSG_UPPER_ADDR 4
> > +#define MSIX_MSG_DATA 8
> > +#define MSIX_VECTOR_CTRL 12
> > +#define MSIX_ENTRY_SIZE 16
> > +#define MSIX_VECTOR_MASK 0x1
> > +
> > +/* How much space does an MSIX table need. */
> > +/* The spec requires giving the table structure
> > + * a 4K aligned region all by itself. Align it to
> > + * target pages so that drivers can do passthrough
> > + * on the rest of the region. */
> > +#define MSIX_PAGE_SIZE TARGET_PAGE_ALIGN(0x1000)
> > +/* Reserve second half of the page for pending bits */
> > +#define MSIX_PAGE_PENDING (MSIX_PAGE_SIZE / 2)
> > +#define MSIX_MAX_ENTRIES 32
> > +
> > +
> > +#ifdef MSIX_DEBUG
> > +#define DEBUG(fmt, ...)                                       \
> > +    do {                                                      \
> > +      fprintf(stderr, "%s: " fmt, __func__ , __VA_ARGS__);    \
> > +    } while (0)
> > +#else
> > +#define DEBUG(fmt, ...) do { } while(0)
> > +#endif
> > +
> > +/* Flag to globally disable MSI-X support */
> > +int msix_disable;
> > +
> > +/* Flag for interrupt controller to declare MSI-X support */
> > +int msix_supported;
> maybe better to make it static,

It's not read-only either.

> and provide msi_state() returning -1 for disabled,
> 0 for supported, etc... 

Matter of taste, I prefer a set of binary flags rather than yet another enum:
msix_disable is controlled by user, msix_supported is a safety valve
for non-PC platforms. It's easier to keep them separate IMO.

> > +
> > +/* Add MSI-X capability to the config space for the device. */
> > +/* Given a bar and its size, add MSI-X table on top of it
> > + * and fill MSI-X capability in the config space.
> > + * Original bar size must be a power of 2 or 0.
> > + * New bar size is returned. */
> > +static int msix_add_config(struct PCIDevice *pdev, unsigned short nentries,
> > +                           unsigned bar_nr, unsigned bar_size)
> > +{
> > +    int config_offset;
> > +    uint8_t *config;
> > +    uint32_t new_size;
> > +
> > +    if (nentries < 1 || nentries > PCI_MSIX_FLAGS_QSIZE + 1)
> > +        return -EINVAL;
> > +    if (bar_size > 0x80000000)
> > +        return -ENOSPC;
> > +
> > +    /* Add space for MSI-X structures */
> > +    if (!bar_size)
> > +        new_size = MSIX_PAGE_SIZE;
> > +    else if (bar_size < MSIX_PAGE_SIZE) {
> > +        bar_size = MSIX_PAGE_SIZE;
> > +        new_size = MSIX_PAGE_SIZE * 2;
> > +    } else
> > +        new_size = bar_size * 2;
> > +
> > +    pdev->msix_bar_size = new_size;
> > +    config_offset = pci_add_capability(pdev, PCI_CAP_ID_MSIX, MSIX_CAP_LENGTH);
> > +    if (config_offset < 0)
> > +        return config_offset;
> > +    config = pdev->config + config_offset;
> > +
> > +    pci_set_word(config + PCI_MSIX_FLAGS, nentries - 1);
> > +    /* Table on top of BAR */
> > +    pci_set_long(config + MSIX_TABLE_OFFSET, bar_size | bar_nr);
> > +    /* Pending bits on top of that */
> > +    pci_set_long(config + MSIX_PBA_OFFSET, (bar_size + MSIX_PAGE_PENDING) |
> > +                 bar_nr);
> > +    pdev->msix_cap = config_offset;
> > +    /* Make flags bit writeable. */
> > +    pdev->wmask[config_offset + MSIX_ENABLE_OFFSET] |= MSIX_ENABLE_MASK;
> > +    return 0;
> > +}
> > +
> 
> > +
> > +/* Initialize the MSI-X structures. Note: if MSI-X is supported, BAR size is
> > + * modified, it should be retrieved with msix_bar_size. */
> > +int msix_init(struct PCIDevice *dev, unsigned short nentries,
> > +              unsigned bar_nr, unsigned bar_size)
> > +{
> > +    int ret = -ENOMEM;
> > +    /* Nothing to do if MSI is not supported by interrupt controller */
> > +    if (!msix_supported)
> > +        return -ENOTTY;
> > +
> > +    if (nentries > MSIX_MAX_ENTRIES)
> > +        return -EINVAL;
> > +
> > +    dev->msix_entry_used = qemu_mallocz(MSIX_MAX_ENTRIES *
> > +                                        sizeof *dev->msix_entry_used);
> > +    if (!dev->msix_entry_used)
> > +        goto err_used;
> no need to check. oom_checker will kill qemu if it fails.
> 
> > +
> > +    dev->msix_table_page = qemu_mallocz(MSIX_PAGE_SIZE);
> > +    if (!dev->msix_table_page)
> > +        goto err_page;
> ditto.
> 

Good point.

-- 
MST

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: [Qemu-devel] [PATCHv3 07/13] qemu: minimal MSI/MSI-X implementation for PC
  2009-06-09 17:33     ` Glauber Costa
@ 2009-06-10  9:59       ` Michael S. Tsirkin
  -1 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-10  9:59 UTC (permalink / raw)
  To: Glauber Costa
  Cc: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm,
	Rusty Russell, virtualization, Christian Borntraeger, Blue Swirl,
	Anthony Liguori

On Tue, Jun 09, 2009 at 02:33:33PM -0300, Glauber Costa wrote:
> >      env = cpu_single_env;
> >      if (!env)
> > @@ -727,7 +762,6 @@ static void apic_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
> >      printf("APIC write: %08x = %08x\n", (uint32_t)addr, val);
> >  #endif
> >  
> > -    index = (addr >> 4) & 0xff;
> >      switch(index) {
> >      case 0x02:
> >          s->id = (val >> 24);
> > @@ -911,6 +945,7 @@ int apic_init(CPUState *env)
> >      s->cpu_env = env;
> >  
> >      apic_reset(s);
> > +    msix_supported = 1;
> >  
> >      /* XXX: mapping more APICs at the same memory location */
> >      if (apic_io_memory == 0) {
> > @@ -918,7 +953,8 @@ int apic_init(CPUState *env)
> >             on the global memory bus. */
> >          apic_io_memory = cpu_register_io_memory(0, apic_mem_read,
> >                                                  apic_mem_write, NULL);
> > -        cpu_register_physical_memory(s->apicbase & ~0xfff, 0x1000,
> > +        /* XXX: what if the base changes? */
> > +        cpu_register_physical_memory(MSI_ADDR_BASE, MSI_ADDR_SIZE,
> >                                       apic_io_memory);
> +1 
> 
> I think you have a point here. Your patch is in no way worse than what we had,
> but we're currently not handling correctly the case of base address changing.

Yep.

> Guess it is not common in normal apic usage for OSes...

-- 
MST

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: [Qemu-devel] [PATCHv3 07/13] qemu: minimal MSI/MSI-X implementation for PC
@ 2009-06-10  9:59       ` Michael S. Tsirkin
  0 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-10  9:59 UTC (permalink / raw)
  To: Glauber Costa
  Cc: Carsten Otte, kvm, Rusty Russell, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook, Avi Kivity

On Tue, Jun 09, 2009 at 02:33:33PM -0300, Glauber Costa wrote:
> >      env = cpu_single_env;
> >      if (!env)
> > @@ -727,7 +762,6 @@ static void apic_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
> >      printf("APIC write: %08x = %08x\n", (uint32_t)addr, val);
> >  #endif
> >  
> > -    index = (addr >> 4) & 0xff;
> >      switch(index) {
> >      case 0x02:
> >          s->id = (val >> 24);
> > @@ -911,6 +945,7 @@ int apic_init(CPUState *env)
> >      s->cpu_env = env;
> >  
> >      apic_reset(s);
> > +    msix_supported = 1;
> >  
> >      /* XXX: mapping more APICs at the same memory location */
> >      if (apic_io_memory == 0) {
> > @@ -918,7 +953,8 @@ int apic_init(CPUState *env)
> >             on the global memory bus. */
> >          apic_io_memory = cpu_register_io_memory(0, apic_mem_read,
> >                                                  apic_mem_write, NULL);
> > -        cpu_register_physical_memory(s->apicbase & ~0xfff, 0x1000,
> > +        /* XXX: what if the base changes? */
> > +        cpu_register_physical_memory(MSI_ADDR_BASE, MSI_ADDR_SIZE,
> >                                       apic_io_memory);
> +1 
> 
> I think you have a point here. Your patch is in no way worse than what we had,
> but we're currently not handling correctly the case of base address changing.

Yep.

> Guess it is not common in normal apic usage for OSes...

-- 
MST

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: [Qemu-devel] [PATCHv3 07/13] qemu: minimal MSI/MSI-X implementation for PC
  2009-06-09 17:33     ` Glauber Costa
  (?)
@ 2009-06-10  9:59     ` Michael S. Tsirkin
  -1 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-10  9:59 UTC (permalink / raw)
  To: Glauber Costa
  Cc: Carsten Otte, kvm, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Paul Brook, Anthony Liguori, Avi Kivity

On Tue, Jun 09, 2009 at 02:33:33PM -0300, Glauber Costa wrote:
> >      env = cpu_single_env;
> >      if (!env)
> > @@ -727,7 +762,6 @@ static void apic_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
> >      printf("APIC write: %08x = %08x\n", (uint32_t)addr, val);
> >  #endif
> >  
> > -    index = (addr >> 4) & 0xff;
> >      switch(index) {
> >      case 0x02:
> >          s->id = (val >> 24);
> > @@ -911,6 +945,7 @@ int apic_init(CPUState *env)
> >      s->cpu_env = env;
> >  
> >      apic_reset(s);
> > +    msix_supported = 1;
> >  
> >      /* XXX: mapping more APICs at the same memory location */
> >      if (apic_io_memory == 0) {
> > @@ -918,7 +953,8 @@ int apic_init(CPUState *env)
> >             on the global memory bus. */
> >          apic_io_memory = cpu_register_io_memory(0, apic_mem_read,
> >                                                  apic_mem_write, NULL);
> > -        cpu_register_physical_memory(s->apicbase & ~0xfff, 0x1000,
> > +        /* XXX: what if the base changes? */
> > +        cpu_register_physical_memory(MSI_ADDR_BASE, MSI_ADDR_SIZE,
> >                                       apic_io_memory);
> +1 
> 
> I think you have a point here. Your patch is in no way worse than what we had,
> but we're currently not handling correctly the case of base address changing.

Yep.

> Guess it is not common in normal apic usage for OSes...

-- 
MST

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: [Qemu-devel] [PATCHv3 08/13] qemu: add support for resizing regions
  2009-06-09 17:36     ` Glauber Costa
@ 2009-06-10 10:05       ` Michael S. Tsirkin
  -1 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-10 10:05 UTC (permalink / raw)
  To: Glauber Costa
  Cc: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm,
	Rusty Russell, virtualization, Christian Borntraeger, Blue Swirl,
	Anthony Liguori

On Tue, Jun 09, 2009 at 02:36:21PM -0300, Glauber Costa wrote:
> On Fri, Jun 05, 2009 at 01:23:55PM +0300, Michael S. Tsirkin wrote:
> > Make it possible to resize PCI regions.  This will be used by virtio
> > with MSI-X, where the region size depends on whether MSI-X is enabled,
> > and can change across load/save.
> > 
> > Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> > ---
> >  hw/pci.c |   54 ++++++++++++++++++++++++++++++++++++------------------
> >  hw/pci.h |    3 +++
> >  2 files changed, 39 insertions(+), 18 deletions(-)
> > 
> > diff --git a/hw/pci.c b/hw/pci.c
> > index ed011b5..042a216 100644
> > --- a/hw/pci.c
> > +++ b/hw/pci.c
> > @@ -392,6 +392,41 @@ void pci_register_io_region(PCIDevice *pci_dev, int region_num,
> >      *(uint32_t *)(pci_dev->wmask + addr) = cpu_to_le32(wmask);
> >  }
> >  
> > +static void pci_unmap_region(PCIDevice *d, PCIIORegion *r)
> > +{
> > +    if (r->addr == -1)
> > +        return;
> > +    if (r->type & PCI_ADDRESS_SPACE_IO) {
> > +        int class;
> > +        /* NOTE: specific hack for IDE in PC case:
> > +           only one byte must be mapped. */
> > +        class = pci_get_word(d->config + PCI_CLASS_DEVICE);
> > +        if (class == 0x0101 && r->size == 4) {
> > +            isa_unassign_ioport(r->addr + 2, 1);
> > +        } else {
> > +            isa_unassign_ioport(r->addr, r->size);
> > +        }
> > +    } else {
> > +        cpu_register_physical_memory(pci_to_cpu_addr(r->addr),
> > +                                     r->size,
> > +                                     IO_MEM_UNASSIGNED);
> > +        qemu_unregister_coalesced_mmio(r->addr, r->size);
> > +    }
> > +}
> > +
> this is a good cleanup...
> 
> > +void pci_resize_io_region(PCIDevice *pci_dev, int region_num,
> > +                          uint32_t size)
> > +{
> > +
> > +    PCIIORegion *r = &pci_dev->io_regions[region_num];
> > +    if (r->size == size)
> > +        return;
> > +    r->size = size;
> > +    pci_unmap_region(pci_dev, r);
> > +    r->addr = -1;
> > +    pci_update_mappings(pci_dev);
> > +}
> > +
> but the only user of this one seem to be commented out, and later removed.
> Why is this needed?
> 

Um, I think this needs to be called on load: virtio has a memmory region
if and only if it has MSI-X.

-- 
MST

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: [Qemu-devel] [PATCHv3 08/13] qemu: add support for resizing regions
@ 2009-06-10 10:05       ` Michael S. Tsirkin
  0 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-10 10:05 UTC (permalink / raw)
  To: Glauber Costa
  Cc: Carsten Otte, kvm, Rusty Russell, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook, Avi Kivity

On Tue, Jun 09, 2009 at 02:36:21PM -0300, Glauber Costa wrote:
> On Fri, Jun 05, 2009 at 01:23:55PM +0300, Michael S. Tsirkin wrote:
> > Make it possible to resize PCI regions.  This will be used by virtio
> > with MSI-X, where the region size depends on whether MSI-X is enabled,
> > and can change across load/save.
> > 
> > Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> > ---
> >  hw/pci.c |   54 ++++++++++++++++++++++++++++++++++++------------------
> >  hw/pci.h |    3 +++
> >  2 files changed, 39 insertions(+), 18 deletions(-)
> > 
> > diff --git a/hw/pci.c b/hw/pci.c
> > index ed011b5..042a216 100644
> > --- a/hw/pci.c
> > +++ b/hw/pci.c
> > @@ -392,6 +392,41 @@ void pci_register_io_region(PCIDevice *pci_dev, int region_num,
> >      *(uint32_t *)(pci_dev->wmask + addr) = cpu_to_le32(wmask);
> >  }
> >  
> > +static void pci_unmap_region(PCIDevice *d, PCIIORegion *r)
> > +{
> > +    if (r->addr == -1)
> > +        return;
> > +    if (r->type & PCI_ADDRESS_SPACE_IO) {
> > +        int class;
> > +        /* NOTE: specific hack for IDE in PC case:
> > +           only one byte must be mapped. */
> > +        class = pci_get_word(d->config + PCI_CLASS_DEVICE);
> > +        if (class == 0x0101 && r->size == 4) {
> > +            isa_unassign_ioport(r->addr + 2, 1);
> > +        } else {
> > +            isa_unassign_ioport(r->addr, r->size);
> > +        }
> > +    } else {
> > +        cpu_register_physical_memory(pci_to_cpu_addr(r->addr),
> > +                                     r->size,
> > +                                     IO_MEM_UNASSIGNED);
> > +        qemu_unregister_coalesced_mmio(r->addr, r->size);
> > +    }
> > +}
> > +
> this is a good cleanup...
> 
> > +void pci_resize_io_region(PCIDevice *pci_dev, int region_num,
> > +                          uint32_t size)
> > +{
> > +
> > +    PCIIORegion *r = &pci_dev->io_regions[region_num];
> > +    if (r->size == size)
> > +        return;
> > +    r->size = size;
> > +    pci_unmap_region(pci_dev, r);
> > +    r->addr = -1;
> > +    pci_update_mappings(pci_dev);
> > +}
> > +
> but the only user of this one seem to be commented out, and later removed.
> Why is this needed?
> 

Um, I think this needs to be called on load: virtio has a memmory region
if and only if it has MSI-X.

-- 
MST

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: [Qemu-devel] [PATCHv3 08/13] qemu: add support for resizing regions
  2009-06-09 17:36     ` Glauber Costa
  (?)
@ 2009-06-10 10:05     ` Michael S. Tsirkin
  -1 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-10 10:05 UTC (permalink / raw)
  To: Glauber Costa
  Cc: Carsten Otte, kvm, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Paul Brook, Anthony Liguori, Avi Kivity

On Tue, Jun 09, 2009 at 02:36:21PM -0300, Glauber Costa wrote:
> On Fri, Jun 05, 2009 at 01:23:55PM +0300, Michael S. Tsirkin wrote:
> > Make it possible to resize PCI regions.  This will be used by virtio
> > with MSI-X, where the region size depends on whether MSI-X is enabled,
> > and can change across load/save.
> > 
> > Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> > ---
> >  hw/pci.c |   54 ++++++++++++++++++++++++++++++++++++------------------
> >  hw/pci.h |    3 +++
> >  2 files changed, 39 insertions(+), 18 deletions(-)
> > 
> > diff --git a/hw/pci.c b/hw/pci.c
> > index ed011b5..042a216 100644
> > --- a/hw/pci.c
> > +++ b/hw/pci.c
> > @@ -392,6 +392,41 @@ void pci_register_io_region(PCIDevice *pci_dev, int region_num,
> >      *(uint32_t *)(pci_dev->wmask + addr) = cpu_to_le32(wmask);
> >  }
> >  
> > +static void pci_unmap_region(PCIDevice *d, PCIIORegion *r)
> > +{
> > +    if (r->addr == -1)
> > +        return;
> > +    if (r->type & PCI_ADDRESS_SPACE_IO) {
> > +        int class;
> > +        /* NOTE: specific hack for IDE in PC case:
> > +           only one byte must be mapped. */
> > +        class = pci_get_word(d->config + PCI_CLASS_DEVICE);
> > +        if (class == 0x0101 && r->size == 4) {
> > +            isa_unassign_ioport(r->addr + 2, 1);
> > +        } else {
> > +            isa_unassign_ioport(r->addr, r->size);
> > +        }
> > +    } else {
> > +        cpu_register_physical_memory(pci_to_cpu_addr(r->addr),
> > +                                     r->size,
> > +                                     IO_MEM_UNASSIGNED);
> > +        qemu_unregister_coalesced_mmio(r->addr, r->size);
> > +    }
> > +}
> > +
> this is a good cleanup...
> 
> > +void pci_resize_io_region(PCIDevice *pci_dev, int region_num,
> > +                          uint32_t size)
> > +{
> > +
> > +    PCIIORegion *r = &pci_dev->io_regions[region_num];
> > +    if (r->size == size)
> > +        return;
> > +    r->size = size;
> > +    pci_unmap_region(pci_dev, r);
> > +    r->addr = -1;
> > +    pci_update_mappings(pci_dev);
> > +}
> > +
> but the only user of this one seem to be commented out, and later removed.
> Why is this needed?
> 

Um, I think this needs to be called on load: virtio has a memmory region
if and only if it has MSI-X.

-- 
MST

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: [Qemu-devel] [PATCHv3 12/13] qemu: virtio save/load bindings
  2009-06-09 17:45     ` Glauber Costa
@ 2009-06-10 10:11       ` Michael S. Tsirkin
  -1 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-10 10:11 UTC (permalink / raw)
  To: Glauber Costa
  Cc: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm,
	Rusty Russell, virtualization, Christian Borntraeger, Blue Swirl,
	Anthony Liguori

On Tue, Jun 09, 2009 at 02:45:54PM -0300, Glauber Costa wrote:
> duplicated save config.
> 
> > diff --git a/hw/virtio.h b/hw/virtio.h
> > index 04a3c3d..ce05517 100644
> > --- a/hw/virtio.h
> > +++ b/hw/virtio.h
> > @@ -72,6 +72,10 @@ typedef struct VirtQueueElement
> >  
> >  typedef struct {
> >      void (*notify)(void * opaque, uint16_t vector);
> > +    void (*save_config)(void * opaque, QEMUFile *f);
> > +    void (*save_queue)(void * opaque, int n, QEMUFile *f);
> > +    int (*load_config)(void * opaque, QEMUFile *f);
> > +    int (*load_queue)(void * opaque, int n, QEMUFile *f);
> >  } VirtIOBindings;
> >  
> So, what's the overall effect on a virtual machine that gets migrated,
> of a certain device not implementing one of those functions?

Those are implemented by a transport (e.g. virtio_pci) not the device.

> Will it work? Will it break?

It will work - assuming there's nothing transport-specific you need to
save and load. If there is - this patch is not breaking anything
that isn't already broken ...

-- 
MST

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: [Qemu-devel] [PATCHv3 12/13] qemu: virtio save/load bindings
@ 2009-06-10 10:11       ` Michael S. Tsirkin
  0 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-10 10:11 UTC (permalink / raw)
  To: Glauber Costa
  Cc: Carsten Otte, kvm, Rusty Russell, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook, Avi Kivity

On Tue, Jun 09, 2009 at 02:45:54PM -0300, Glauber Costa wrote:
> duplicated save config.
> 
> > diff --git a/hw/virtio.h b/hw/virtio.h
> > index 04a3c3d..ce05517 100644
> > --- a/hw/virtio.h
> > +++ b/hw/virtio.h
> > @@ -72,6 +72,10 @@ typedef struct VirtQueueElement
> >  
> >  typedef struct {
> >      void (*notify)(void * opaque, uint16_t vector);
> > +    void (*save_config)(void * opaque, QEMUFile *f);
> > +    void (*save_queue)(void * opaque, int n, QEMUFile *f);
> > +    int (*load_config)(void * opaque, QEMUFile *f);
> > +    int (*load_queue)(void * opaque, int n, QEMUFile *f);
> >  } VirtIOBindings;
> >  
> So, what's the overall effect on a virtual machine that gets migrated,
> of a certain device not implementing one of those functions?

Those are implemented by a transport (e.g. virtio_pci) not the device.

> Will it work? Will it break?

It will work - assuming there's nothing transport-specific you need to
save and load. If there is - this patch is not breaking anything
that isn't already broken ...

-- 
MST

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: [Qemu-devel] [PATCHv3 12/13] qemu: virtio save/load bindings
  2009-06-09 17:45     ` Glauber Costa
  (?)
@ 2009-06-10 10:11     ` Michael S. Tsirkin
  -1 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-10 10:11 UTC (permalink / raw)
  To: Glauber Costa
  Cc: Carsten Otte, kvm, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Paul Brook, Anthony Liguori, Avi Kivity

On Tue, Jun 09, 2009 at 02:45:54PM -0300, Glauber Costa wrote:
> duplicated save config.
> 
> > diff --git a/hw/virtio.h b/hw/virtio.h
> > index 04a3c3d..ce05517 100644
> > --- a/hw/virtio.h
> > +++ b/hw/virtio.h
> > @@ -72,6 +72,10 @@ typedef struct VirtQueueElement
> >  
> >  typedef struct {
> >      void (*notify)(void * opaque, uint16_t vector);
> > +    void (*save_config)(void * opaque, QEMUFile *f);
> > +    void (*save_queue)(void * opaque, int n, QEMUFile *f);
> > +    int (*load_config)(void * opaque, QEMUFile *f);
> > +    int (*load_queue)(void * opaque, int n, QEMUFile *f);
> >  } VirtIOBindings;
> >  
> So, what's the overall effect on a virtual machine that gets migrated,
> of a certain device not implementing one of those functions?

Those are implemented by a transport (e.g. virtio_pci) not the device.

> Will it work? Will it break?

It will work - assuming there's nothing transport-specific you need to
save and load. If there is - this patch is not breaking anything
that isn't already broken ...

-- 
MST

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: [Qemu-devel] [PATCHv3 08/13] qemu: add support for resizing regions
  2009-06-09 17:36     ` Glauber Costa
@ 2009-06-10 10:46       ` Michael S. Tsirkin
  -1 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-10 10:46 UTC (permalink / raw)
  To: Glauber Costa
  Cc: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm,
	Rusty Russell, virtualization, Christian Borntraeger, Blue Swirl,
	Anthony Liguori

On Tue, Jun 09, 2009 at 02:36:21PM -0300, Glauber Costa wrote:
> On Fri, Jun 05, 2009 at 01:23:55PM +0300, Michael S. Tsirkin wrote:
> > Make it possible to resize PCI regions.  This will be used by virtio
> > with MSI-X, where the region size depends on whether MSI-X is enabled,
> > and can change across load/save.
> > 
> > Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> > ---
> >  hw/pci.c |   54 ++++++++++++++++++++++++++++++++++++------------------
> >  hw/pci.h |    3 +++
> >  2 files changed, 39 insertions(+), 18 deletions(-)
> > 
> > diff --git a/hw/pci.c b/hw/pci.c
> > index ed011b5..042a216 100644
> > --- a/hw/pci.c
> > +++ b/hw/pci.c
> > @@ -392,6 +392,41 @@ void pci_register_io_region(PCIDevice *pci_dev, int region_num,
> >      *(uint32_t *)(pci_dev->wmask + addr) = cpu_to_le32(wmask);
> >  }
> >  
> > +static void pci_unmap_region(PCIDevice *d, PCIIORegion *r)
> > +{
> > +    if (r->addr == -1)
> > +        return;
> > +    if (r->type & PCI_ADDRESS_SPACE_IO) {
> > +        int class;
> > +        /* NOTE: specific hack for IDE in PC case:
> > +           only one byte must be mapped. */
> > +        class = pci_get_word(d->config + PCI_CLASS_DEVICE);
> > +        if (class == 0x0101 && r->size == 4) {
> > +            isa_unassign_ioport(r->addr + 2, 1);
> > +        } else {
> > +            isa_unassign_ioport(r->addr, r->size);
> > +        }
> > +    } else {
> > +        cpu_register_physical_memory(pci_to_cpu_addr(r->addr),
> > +                                     r->size,
> > +                                     IO_MEM_UNASSIGNED);
> > +        qemu_unregister_coalesced_mmio(r->addr, r->size);
> > +    }
> > +}
> > +
> this is a good cleanup...
> 
> > +void pci_resize_io_region(PCIDevice *pci_dev, int region_num,
> > +                          uint32_t size)
> > +{
> > +
> > +    PCIIORegion *r = &pci_dev->io_regions[region_num];
> > +    if (r->size == size)
> > +        return;
> > +    r->size = size;
> > +    pci_unmap_region(pci_dev, r);
> > +    r->addr = -1;
> > +    pci_update_mappings(pci_dev);
> > +}
> > +
> but the only user of this one seem to be commented out, and later removed.
> Why is this needed?
> 

This was the missing bit:

Set correct size for msi-x memory region when loading the device.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>

diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c
index 589fbb1..f657364 100644
--- a/hw/virtio-pci.c
+++ b/hw/virtio-pci.c
@@ -133,6 +133,8 @@ static int virtio_pci_load_config(void * opaque, QEMUFile *f)
         return ret;
     if (msix_present(&proxy->pci_dev))
         qemu_get_be16s(f, &proxy->vdev->config_vector);
+
+    pci_resize_io_region(&proxy->pci_dev, 1, msix_bar_size(&proxy->pci_dev));
     return 0;
 }
 

-- 
MST

^ permalink raw reply related	[flat|nested] 457+ messages in thread

* Re: [Qemu-devel] [PATCHv3 08/13] qemu: add support for resizing regions
@ 2009-06-10 10:46       ` Michael S. Tsirkin
  0 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-10 10:46 UTC (permalink / raw)
  To: Glauber Costa
  Cc: Carsten Otte, kvm, Rusty Russell, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook, Avi Kivity

On Tue, Jun 09, 2009 at 02:36:21PM -0300, Glauber Costa wrote:
> On Fri, Jun 05, 2009 at 01:23:55PM +0300, Michael S. Tsirkin wrote:
> > Make it possible to resize PCI regions.  This will be used by virtio
> > with MSI-X, where the region size depends on whether MSI-X is enabled,
> > and can change across load/save.
> > 
> > Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> > ---
> >  hw/pci.c |   54 ++++++++++++++++++++++++++++++++++++------------------
> >  hw/pci.h |    3 +++
> >  2 files changed, 39 insertions(+), 18 deletions(-)
> > 
> > diff --git a/hw/pci.c b/hw/pci.c
> > index ed011b5..042a216 100644
> > --- a/hw/pci.c
> > +++ b/hw/pci.c
> > @@ -392,6 +392,41 @@ void pci_register_io_region(PCIDevice *pci_dev, int region_num,
> >      *(uint32_t *)(pci_dev->wmask + addr) = cpu_to_le32(wmask);
> >  }
> >  
> > +static void pci_unmap_region(PCIDevice *d, PCIIORegion *r)
> > +{
> > +    if (r->addr == -1)
> > +        return;
> > +    if (r->type & PCI_ADDRESS_SPACE_IO) {
> > +        int class;
> > +        /* NOTE: specific hack for IDE in PC case:
> > +           only one byte must be mapped. */
> > +        class = pci_get_word(d->config + PCI_CLASS_DEVICE);
> > +        if (class == 0x0101 && r->size == 4) {
> > +            isa_unassign_ioport(r->addr + 2, 1);
> > +        } else {
> > +            isa_unassign_ioport(r->addr, r->size);
> > +        }
> > +    } else {
> > +        cpu_register_physical_memory(pci_to_cpu_addr(r->addr),
> > +                                     r->size,
> > +                                     IO_MEM_UNASSIGNED);
> > +        qemu_unregister_coalesced_mmio(r->addr, r->size);
> > +    }
> > +}
> > +
> this is a good cleanup...
> 
> > +void pci_resize_io_region(PCIDevice *pci_dev, int region_num,
> > +                          uint32_t size)
> > +{
> > +
> > +    PCIIORegion *r = &pci_dev->io_regions[region_num];
> > +    if (r->size == size)
> > +        return;
> > +    r->size = size;
> > +    pci_unmap_region(pci_dev, r);
> > +    r->addr = -1;
> > +    pci_update_mappings(pci_dev);
> > +}
> > +
> but the only user of this one seem to be commented out, and later removed.
> Why is this needed?
> 

This was the missing bit:

Set correct size for msi-x memory region when loading the device.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>

diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c
index 589fbb1..f657364 100644
--- a/hw/virtio-pci.c
+++ b/hw/virtio-pci.c
@@ -133,6 +133,8 @@ static int virtio_pci_load_config(void * opaque, QEMUFile *f)
         return ret;
     if (msix_present(&proxy->pci_dev))
         qemu_get_be16s(f, &proxy->vdev->config_vector);
+
+    pci_resize_io_region(&proxy->pci_dev, 1, msix_bar_size(&proxy->pci_dev));
     return 0;
 }
 

-- 
MST

^ permalink raw reply related	[flat|nested] 457+ messages in thread

* Re: [Qemu-devel] [PATCHv3 08/13] qemu: add support for resizing regions
  2009-06-09 17:36     ` Glauber Costa
                       ` (3 preceding siblings ...)
  (?)
@ 2009-06-10 10:46     ` Michael S. Tsirkin
  -1 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-10 10:46 UTC (permalink / raw)
  To: Glauber Costa
  Cc: Carsten Otte, kvm, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Paul Brook, Anthony Liguori, Avi Kivity

On Tue, Jun 09, 2009 at 02:36:21PM -0300, Glauber Costa wrote:
> On Fri, Jun 05, 2009 at 01:23:55PM +0300, Michael S. Tsirkin wrote:
> > Make it possible to resize PCI regions.  This will be used by virtio
> > with MSI-X, where the region size depends on whether MSI-X is enabled,
> > and can change across load/save.
> > 
> > Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> > ---
> >  hw/pci.c |   54 ++++++++++++++++++++++++++++++++++++------------------
> >  hw/pci.h |    3 +++
> >  2 files changed, 39 insertions(+), 18 deletions(-)
> > 
> > diff --git a/hw/pci.c b/hw/pci.c
> > index ed011b5..042a216 100644
> > --- a/hw/pci.c
> > +++ b/hw/pci.c
> > @@ -392,6 +392,41 @@ void pci_register_io_region(PCIDevice *pci_dev, int region_num,
> >      *(uint32_t *)(pci_dev->wmask + addr) = cpu_to_le32(wmask);
> >  }
> >  
> > +static void pci_unmap_region(PCIDevice *d, PCIIORegion *r)
> > +{
> > +    if (r->addr == -1)
> > +        return;
> > +    if (r->type & PCI_ADDRESS_SPACE_IO) {
> > +        int class;
> > +        /* NOTE: specific hack for IDE in PC case:
> > +           only one byte must be mapped. */
> > +        class = pci_get_word(d->config + PCI_CLASS_DEVICE);
> > +        if (class == 0x0101 && r->size == 4) {
> > +            isa_unassign_ioport(r->addr + 2, 1);
> > +        } else {
> > +            isa_unassign_ioport(r->addr, r->size);
> > +        }
> > +    } else {
> > +        cpu_register_physical_memory(pci_to_cpu_addr(r->addr),
> > +                                     r->size,
> > +                                     IO_MEM_UNASSIGNED);
> > +        qemu_unregister_coalesced_mmio(r->addr, r->size);
> > +    }
> > +}
> > +
> this is a good cleanup...
> 
> > +void pci_resize_io_region(PCIDevice *pci_dev, int region_num,
> > +                          uint32_t size)
> > +{
> > +
> > +    PCIIORegion *r = &pci_dev->io_regions[region_num];
> > +    if (r->size == size)
> > +        return;
> > +    r->size = size;
> > +    pci_unmap_region(pci_dev, r);
> > +    r->addr = -1;
> > +    pci_update_mappings(pci_dev);
> > +}
> > +
> but the only user of this one seem to be commented out, and later removed.
> Why is this needed?
> 

This was the missing bit:

Set correct size for msi-x memory region when loading the device.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>

diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c
index 589fbb1..f657364 100644
--- a/hw/virtio-pci.c
+++ b/hw/virtio-pci.c
@@ -133,6 +133,8 @@ static int virtio_pci_load_config(void * opaque, QEMUFile *f)
         return ret;
     if (msix_present(&proxy->pci_dev))
         qemu_get_be16s(f, &proxy->vdev->config_vector);
+
+    pci_resize_io_region(&proxy->pci_dev, 1, msix_bar_size(&proxy->pci_dev));
     return 0;
 }
 

-- 
MST

^ permalink raw reply related	[flat|nested] 457+ messages in thread

* Re: [Qemu-devel] [PATCHv3 12/13] qemu: virtio save/load bindings
  2009-06-09 17:45     ` Glauber Costa
@ 2009-06-10 11:33       ` Michael S. Tsirkin
  -1 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-10 11:33 UTC (permalink / raw)
  To: Glauber Costa
  Cc: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm,
	Rusty Russell, virtualization, Christian Borntraeger, Blue Swirl,
	Anthony Liguori

On Tue, Jun 09, 2009 at 02:45:54PM -0300, Glauber Costa wrote:
> duplicated save config.

Good catch. Fixed.

-- 
MST

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: [Qemu-devel] [PATCHv3 12/13] qemu: virtio save/load bindings
@ 2009-06-10 11:33       ` Michael S. Tsirkin
  0 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-10 11:33 UTC (permalink / raw)
  To: Glauber Costa
  Cc: Carsten Otte, kvm, Rusty Russell, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook, Avi Kivity

On Tue, Jun 09, 2009 at 02:45:54PM -0300, Glauber Costa wrote:
> duplicated save config.

Good catch. Fixed.

-- 
MST

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: [Qemu-devel] [PATCHv3 12/13] qemu: virtio save/load bindings
  2009-06-09 17:45     ` Glauber Costa
                       ` (3 preceding siblings ...)
  (?)
@ 2009-06-10 11:33     ` Michael S. Tsirkin
  -1 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-10 11:33 UTC (permalink / raw)
  To: Glauber Costa
  Cc: Carsten Otte, kvm, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Paul Brook, Anthony Liguori, Avi Kivity

On Tue, Jun 09, 2009 at 02:45:54PM -0300, Glauber Costa wrote:
> duplicated save config.

Good catch. Fixed.

-- 
MST

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities
  2009-06-10  9:54       ` Michael S. Tsirkin
@ 2009-06-10 14:55         ` Glauber Costa
  -1 siblings, 0 replies; 457+ messages in thread
From: Glauber Costa @ 2009-06-10 14:55 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm,
	Rusty Russell, virtualization, Christian Borntraeger, Blue Swirl,
	Anthony Liguori

On Wed, Jun 10, 2009 at 12:54:15PM +0300, Michael S. Tsirkin wrote:
> On Tue, Jun 09, 2009 at 02:11:14PM -0300, Glauber Costa wrote:
> > On Fri, Jun 05, 2009 at 01:23:15PM +0300, Michael S. Tsirkin wrote:
> > > Add routines to manage PCI capability list. First user will be MSI-X.
> > > 
> > > Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> > > ---
> > >  hw/pci.c |   98 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++------
> > >  hw/pci.h |   18 +++++++++++-
> > >  2 files changed, 106 insertions(+), 10 deletions(-)
> > > 
> > > diff --git a/hw/pci.c b/hw/pci.c
> > > index 361d741..ed011b5 100644
> > > --- a/hw/pci.c
> > > +++ b/hw/pci.c
> > > @@ -130,12 +130,13 @@ void pci_device_save(PCIDevice *s, QEMUFile *f)
> > >      int version = s->cap_present ? 3 : 2;
> > >      int i;
> > >  
> > > -    qemu_put_be32(f, version); /* PCI device version */
> > > +    /* PCI device version and capabilities */
> > > +    qemu_put_be32(f, version);
> > > +    if (version >= 3)
> > > +        qemu_put_be32(f, s->cap_present);
> > >      qemu_put_buffer(f, s->config, 256);
> > >      for (i = 0; i < 4; i++)
> > >          qemu_put_be32(f, s->irq_state[i]);
> > > -    if (version >= 3)
> > > -        qemu_put_be32(f, s->cap_present);
> > >  }
> > What is it doing here?
> > You should just do it right in the first patch, instead of doing in
> > one way there, and fixing here.
> > 
> > >  
> > >  int pci_device_load(PCIDevice *s, QEMUFile *f)
> > > @@ -146,12 +147,6 @@ int pci_device_load(PCIDevice *s, QEMUFile *f)
> > >      version_id = qemu_get_be32(f);
> > >      if (version_id > 3)
> > >          return -EINVAL;
> > > -    qemu_get_buffer(f, s->config, 256);
> > > -    pci_update_mappings(s);
> > > -
> > > -    if (version_id >= 2)
> > > -        for (i = 0; i < 4; i ++)
> > > -            s->irq_state[i] = qemu_get_be32(f);
> > >      if (version_id >= 3)
> > >          s->cap_present = qemu_get_be32(f);
> > >      else
> > ditto.
> > > @@ -160,6 +155,18 @@ int pci_device_load(PCIDevice *s, QEMUFile *f)
> > >      if (s->cap_present & ~s->cap_supported)
> > >          return -EINVAL;
> > >  
> > > +    qemu_get_buffer(f, s->config, 256);
> > > +    pci_update_mappings(s);
> > > +
> > > +    if (version_id >= 2)
> > > +        for (i = 0; i < 4; i ++)
> > > +            s->irq_state[i] = qemu_get_be32(f);
> > > +    /* Clear wmask and used bits for capabilities.
> > > +       Must be restored separately, since capabilities can
> > > +       be placed anywhere in config space. */
> > > +    memset(s->used, 0, PCI_CONFIG_SPACE_SIZE);
> > > +    for (i = PCI_CONFIG_HEADER_SIZE; i < PCI_CONFIG_SPACE_SIZE; ++i)
> > > +        s->wmask[i] = 0xff;
> > >      return 0;
> > >  }
> > Sorry, I don't exactly understand it. Although it can be anywhere, what do we actually
> > lose by keeping it at the same place in config space?
> 
> We lose the ability to let user control the capabilities exposed
> by the device.
> 
> And generally, I dislike arbitrary limitations. The PCI spec says the
> capability can be anywhere, implementing a linked list of caps is simple
> enough to not invent abritrary restrictions.
yes, but this is migration time, right?

caps can be anywhere, but we don't expect it to change during machine execution
lifetime.

Or I am just confused by the name "pci_device_load" ?


^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities
@ 2009-06-10 14:55         ` Glauber Costa
  0 siblings, 0 replies; 457+ messages in thread
From: Glauber Costa @ 2009-06-10 14:55 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Carsten Otte, kvm, Rusty Russell, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook, Avi Kivity

On Wed, Jun 10, 2009 at 12:54:15PM +0300, Michael S. Tsirkin wrote:
> On Tue, Jun 09, 2009 at 02:11:14PM -0300, Glauber Costa wrote:
> > On Fri, Jun 05, 2009 at 01:23:15PM +0300, Michael S. Tsirkin wrote:
> > > Add routines to manage PCI capability list. First user will be MSI-X.
> > > 
> > > Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> > > ---
> > >  hw/pci.c |   98 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++------
> > >  hw/pci.h |   18 +++++++++++-
> > >  2 files changed, 106 insertions(+), 10 deletions(-)
> > > 
> > > diff --git a/hw/pci.c b/hw/pci.c
> > > index 361d741..ed011b5 100644
> > > --- a/hw/pci.c
> > > +++ b/hw/pci.c
> > > @@ -130,12 +130,13 @@ void pci_device_save(PCIDevice *s, QEMUFile *f)
> > >      int version = s->cap_present ? 3 : 2;
> > >      int i;
> > >  
> > > -    qemu_put_be32(f, version); /* PCI device version */
> > > +    /* PCI device version and capabilities */
> > > +    qemu_put_be32(f, version);
> > > +    if (version >= 3)
> > > +        qemu_put_be32(f, s->cap_present);
> > >      qemu_put_buffer(f, s->config, 256);
> > >      for (i = 0; i < 4; i++)
> > >          qemu_put_be32(f, s->irq_state[i]);
> > > -    if (version >= 3)
> > > -        qemu_put_be32(f, s->cap_present);
> > >  }
> > What is it doing here?
> > You should just do it right in the first patch, instead of doing in
> > one way there, and fixing here.
> > 
> > >  
> > >  int pci_device_load(PCIDevice *s, QEMUFile *f)
> > > @@ -146,12 +147,6 @@ int pci_device_load(PCIDevice *s, QEMUFile *f)
> > >      version_id = qemu_get_be32(f);
> > >      if (version_id > 3)
> > >          return -EINVAL;
> > > -    qemu_get_buffer(f, s->config, 256);
> > > -    pci_update_mappings(s);
> > > -
> > > -    if (version_id >= 2)
> > > -        for (i = 0; i < 4; i ++)
> > > -            s->irq_state[i] = qemu_get_be32(f);
> > >      if (version_id >= 3)
> > >          s->cap_present = qemu_get_be32(f);
> > >      else
> > ditto.
> > > @@ -160,6 +155,18 @@ int pci_device_load(PCIDevice *s, QEMUFile *f)
> > >      if (s->cap_present & ~s->cap_supported)
> > >          return -EINVAL;
> > >  
> > > +    qemu_get_buffer(f, s->config, 256);
> > > +    pci_update_mappings(s);
> > > +
> > > +    if (version_id >= 2)
> > > +        for (i = 0; i < 4; i ++)
> > > +            s->irq_state[i] = qemu_get_be32(f);
> > > +    /* Clear wmask and used bits for capabilities.
> > > +       Must be restored separately, since capabilities can
> > > +       be placed anywhere in config space. */
> > > +    memset(s->used, 0, PCI_CONFIG_SPACE_SIZE);
> > > +    for (i = PCI_CONFIG_HEADER_SIZE; i < PCI_CONFIG_SPACE_SIZE; ++i)
> > > +        s->wmask[i] = 0xff;
> > >      return 0;
> > >  }
> > Sorry, I don't exactly understand it. Although it can be anywhere, what do we actually
> > lose by keeping it at the same place in config space?
> 
> We lose the ability to let user control the capabilities exposed
> by the device.
> 
> And generally, I dislike arbitrary limitations. The PCI spec says the
> capability can be anywhere, implementing a linked list of caps is simple
> enough to not invent abritrary restrictions.
yes, but this is migration time, right?

caps can be anywhere, but we don't expect it to change during machine execution
lifetime.

Or I am just confused by the name "pci_device_load" ?

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities
  2009-06-10  9:54       ` Michael S. Tsirkin
  (?)
  (?)
@ 2009-06-10 14:55       ` Glauber Costa
  -1 siblings, 0 replies; 457+ messages in thread
From: Glauber Costa @ 2009-06-10 14:55 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Carsten Otte, kvm, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Paul Brook, Anthony Liguori, Avi Kivity

On Wed, Jun 10, 2009 at 12:54:15PM +0300, Michael S. Tsirkin wrote:
> On Tue, Jun 09, 2009 at 02:11:14PM -0300, Glauber Costa wrote:
> > On Fri, Jun 05, 2009 at 01:23:15PM +0300, Michael S. Tsirkin wrote:
> > > Add routines to manage PCI capability list. First user will be MSI-X.
> > > 
> > > Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> > > ---
> > >  hw/pci.c |   98 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++------
> > >  hw/pci.h |   18 +++++++++++-
> > >  2 files changed, 106 insertions(+), 10 deletions(-)
> > > 
> > > diff --git a/hw/pci.c b/hw/pci.c
> > > index 361d741..ed011b5 100644
> > > --- a/hw/pci.c
> > > +++ b/hw/pci.c
> > > @@ -130,12 +130,13 @@ void pci_device_save(PCIDevice *s, QEMUFile *f)
> > >      int version = s->cap_present ? 3 : 2;
> > >      int i;
> > >  
> > > -    qemu_put_be32(f, version); /* PCI device version */
> > > +    /* PCI device version and capabilities */
> > > +    qemu_put_be32(f, version);
> > > +    if (version >= 3)
> > > +        qemu_put_be32(f, s->cap_present);
> > >      qemu_put_buffer(f, s->config, 256);
> > >      for (i = 0; i < 4; i++)
> > >          qemu_put_be32(f, s->irq_state[i]);
> > > -    if (version >= 3)
> > > -        qemu_put_be32(f, s->cap_present);
> > >  }
> > What is it doing here?
> > You should just do it right in the first patch, instead of doing in
> > one way there, and fixing here.
> > 
> > >  
> > >  int pci_device_load(PCIDevice *s, QEMUFile *f)
> > > @@ -146,12 +147,6 @@ int pci_device_load(PCIDevice *s, QEMUFile *f)
> > >      version_id = qemu_get_be32(f);
> > >      if (version_id > 3)
> > >          return -EINVAL;
> > > -    qemu_get_buffer(f, s->config, 256);
> > > -    pci_update_mappings(s);
> > > -
> > > -    if (version_id >= 2)
> > > -        for (i = 0; i < 4; i ++)
> > > -            s->irq_state[i] = qemu_get_be32(f);
> > >      if (version_id >= 3)
> > >          s->cap_present = qemu_get_be32(f);
> > >      else
> > ditto.
> > > @@ -160,6 +155,18 @@ int pci_device_load(PCIDevice *s, QEMUFile *f)
> > >      if (s->cap_present & ~s->cap_supported)
> > >          return -EINVAL;
> > >  
> > > +    qemu_get_buffer(f, s->config, 256);
> > > +    pci_update_mappings(s);
> > > +
> > > +    if (version_id >= 2)
> > > +        for (i = 0; i < 4; i ++)
> > > +            s->irq_state[i] = qemu_get_be32(f);
> > > +    /* Clear wmask and used bits for capabilities.
> > > +       Must be restored separately, since capabilities can
> > > +       be placed anywhere in config space. */
> > > +    memset(s->used, 0, PCI_CONFIG_SPACE_SIZE);
> > > +    for (i = PCI_CONFIG_HEADER_SIZE; i < PCI_CONFIG_SPACE_SIZE; ++i)
> > > +        s->wmask[i] = 0xff;
> > >      return 0;
> > >  }
> > Sorry, I don't exactly understand it. Although it can be anywhere, what do we actually
> > lose by keeping it at the same place in config space?
> 
> We lose the ability to let user control the capabilities exposed
> by the device.
> 
> And generally, I dislike arbitrary limitations. The PCI spec says the
> capability can be anywhere, implementing a linked list of caps is simple
> enough to not invent abritrary restrictions.
yes, but this is migration time, right?

caps can be anywhere, but we don't expect it to change during machine execution
lifetime.

Or I am just confused by the name "pci_device_load" ?

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities
  2009-06-10 14:55         ` Glauber Costa
@ 2009-06-10 15:01           ` Michael S. Tsirkin
  -1 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-10 15:01 UTC (permalink / raw)
  To: Glauber Costa
  Cc: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm,
	Rusty Russell, virtualization, Christian Borntraeger, Blue Swirl,
	Anthony Liguori

On Wed, Jun 10, 2009 at 11:55:40AM -0300, Glauber Costa wrote:
> On Wed, Jun 10, 2009 at 12:54:15PM +0300, Michael S. Tsirkin wrote:
> > On Tue, Jun 09, 2009 at 02:11:14PM -0300, Glauber Costa wrote:
> > > On Fri, Jun 05, 2009 at 01:23:15PM +0300, Michael S. Tsirkin wrote:
> > > > Add routines to manage PCI capability list. First user will be MSI-X.
> > > > 
> > > > Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> > > > ---
> > > >  hw/pci.c |   98 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++------
> > > >  hw/pci.h |   18 +++++++++++-
> > > >  2 files changed, 106 insertions(+), 10 deletions(-)
> > > > 
> > > > diff --git a/hw/pci.c b/hw/pci.c
> > > > index 361d741..ed011b5 100644
> > > > --- a/hw/pci.c
> > > > +++ b/hw/pci.c
> > > > @@ -130,12 +130,13 @@ void pci_device_save(PCIDevice *s, QEMUFile *f)
> > > >      int version = s->cap_present ? 3 : 2;
> > > >      int i;
> > > >  
> > > > -    qemu_put_be32(f, version); /* PCI device version */
> > > > +    /* PCI device version and capabilities */
> > > > +    qemu_put_be32(f, version);
> > > > +    if (version >= 3)
> > > > +        qemu_put_be32(f, s->cap_present);
> > > >      qemu_put_buffer(f, s->config, 256);
> > > >      for (i = 0; i < 4; i++)
> > > >          qemu_put_be32(f, s->irq_state[i]);
> > > > -    if (version >= 3)
> > > > -        qemu_put_be32(f, s->cap_present);
> > > >  }
> > > What is it doing here?
> > > You should just do it right in the first patch, instead of doing in
> > > one way there, and fixing here.
> > > 
> > > >  
> > > >  int pci_device_load(PCIDevice *s, QEMUFile *f)
> > > > @@ -146,12 +147,6 @@ int pci_device_load(PCIDevice *s, QEMUFile *f)
> > > >      version_id = qemu_get_be32(f);
> > > >      if (version_id > 3)
> > > >          return -EINVAL;
> > > > -    qemu_get_buffer(f, s->config, 256);
> > > > -    pci_update_mappings(s);
> > > > -
> > > > -    if (version_id >= 2)
> > > > -        for (i = 0; i < 4; i ++)
> > > > -            s->irq_state[i] = qemu_get_be32(f);
> > > >      if (version_id >= 3)
> > > >          s->cap_present = qemu_get_be32(f);
> > > >      else
> > > ditto.
> > > > @@ -160,6 +155,18 @@ int pci_device_load(PCIDevice *s, QEMUFile *f)
> > > >      if (s->cap_present & ~s->cap_supported)
> > > >          return -EINVAL;
> > > >  
> > > > +    qemu_get_buffer(f, s->config, 256);
> > > > +    pci_update_mappings(s);
> > > > +
> > > > +    if (version_id >= 2)
> > > > +        for (i = 0; i < 4; i ++)
> > > > +            s->irq_state[i] = qemu_get_be32(f);
> > > > +    /* Clear wmask and used bits for capabilities.
> > > > +       Must be restored separately, since capabilities can
> > > > +       be placed anywhere in config space. */
> > > > +    memset(s->used, 0, PCI_CONFIG_SPACE_SIZE);
> > > > +    for (i = PCI_CONFIG_HEADER_SIZE; i < PCI_CONFIG_SPACE_SIZE; ++i)
> > > > +        s->wmask[i] = 0xff;
> > > >      return 0;
> > > >  }
> > > Sorry, I don't exactly understand it. Although it can be anywhere, what do we actually
> > > lose by keeping it at the same place in config space?
> > 
> > We lose the ability to let user control the capabilities exposed
> > by the device.
> > 
> > And generally, I dislike arbitrary limitations. The PCI spec says the
> > capability can be anywhere, implementing a linked list of caps is simple
> > enough to not invent abritrary restrictions.
> yes, but this is migration time, right?

I think so, yes.

> 
> caps can be anywhere, but we don't expect it to change during machine execution
> lifetime.
> 
> Or I am just confused by the name "pci_device_load" ?

Right. So I want to load an image and it has capability X at offset Y.
wmask has to match. I don't want to assume that we never change Y
for the device without breaking old images, so I clear wmask here
and set it up again after looking up capabilities that I loaded.

Maybe this explanation should go into the comment above?

-- 
MST

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities
@ 2009-06-10 15:01           ` Michael S. Tsirkin
  0 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-10 15:01 UTC (permalink / raw)
  To: Glauber Costa
  Cc: Carsten Otte, kvm, Rusty Russell, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook, Avi Kivity

On Wed, Jun 10, 2009 at 11:55:40AM -0300, Glauber Costa wrote:
> On Wed, Jun 10, 2009 at 12:54:15PM +0300, Michael S. Tsirkin wrote:
> > On Tue, Jun 09, 2009 at 02:11:14PM -0300, Glauber Costa wrote:
> > > On Fri, Jun 05, 2009 at 01:23:15PM +0300, Michael S. Tsirkin wrote:
> > > > Add routines to manage PCI capability list. First user will be MSI-X.
> > > > 
> > > > Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> > > > ---
> > > >  hw/pci.c |   98 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++------
> > > >  hw/pci.h |   18 +++++++++++-
> > > >  2 files changed, 106 insertions(+), 10 deletions(-)
> > > > 
> > > > diff --git a/hw/pci.c b/hw/pci.c
> > > > index 361d741..ed011b5 100644
> > > > --- a/hw/pci.c
> > > > +++ b/hw/pci.c
> > > > @@ -130,12 +130,13 @@ void pci_device_save(PCIDevice *s, QEMUFile *f)
> > > >      int version = s->cap_present ? 3 : 2;
> > > >      int i;
> > > >  
> > > > -    qemu_put_be32(f, version); /* PCI device version */
> > > > +    /* PCI device version and capabilities */
> > > > +    qemu_put_be32(f, version);
> > > > +    if (version >= 3)
> > > > +        qemu_put_be32(f, s->cap_present);
> > > >      qemu_put_buffer(f, s->config, 256);
> > > >      for (i = 0; i < 4; i++)
> > > >          qemu_put_be32(f, s->irq_state[i]);
> > > > -    if (version >= 3)
> > > > -        qemu_put_be32(f, s->cap_present);
> > > >  }
> > > What is it doing here?
> > > You should just do it right in the first patch, instead of doing in
> > > one way there, and fixing here.
> > > 
> > > >  
> > > >  int pci_device_load(PCIDevice *s, QEMUFile *f)
> > > > @@ -146,12 +147,6 @@ int pci_device_load(PCIDevice *s, QEMUFile *f)
> > > >      version_id = qemu_get_be32(f);
> > > >      if (version_id > 3)
> > > >          return -EINVAL;
> > > > -    qemu_get_buffer(f, s->config, 256);
> > > > -    pci_update_mappings(s);
> > > > -
> > > > -    if (version_id >= 2)
> > > > -        for (i = 0; i < 4; i ++)
> > > > -            s->irq_state[i] = qemu_get_be32(f);
> > > >      if (version_id >= 3)
> > > >          s->cap_present = qemu_get_be32(f);
> > > >      else
> > > ditto.
> > > > @@ -160,6 +155,18 @@ int pci_device_load(PCIDevice *s, QEMUFile *f)
> > > >      if (s->cap_present & ~s->cap_supported)
> > > >          return -EINVAL;
> > > >  
> > > > +    qemu_get_buffer(f, s->config, 256);
> > > > +    pci_update_mappings(s);
> > > > +
> > > > +    if (version_id >= 2)
> > > > +        for (i = 0; i < 4; i ++)
> > > > +            s->irq_state[i] = qemu_get_be32(f);
> > > > +    /* Clear wmask and used bits for capabilities.
> > > > +       Must be restored separately, since capabilities can
> > > > +       be placed anywhere in config space. */
> > > > +    memset(s->used, 0, PCI_CONFIG_SPACE_SIZE);
> > > > +    for (i = PCI_CONFIG_HEADER_SIZE; i < PCI_CONFIG_SPACE_SIZE; ++i)
> > > > +        s->wmask[i] = 0xff;
> > > >      return 0;
> > > >  }
> > > Sorry, I don't exactly understand it. Although it can be anywhere, what do we actually
> > > lose by keeping it at the same place in config space?
> > 
> > We lose the ability to let user control the capabilities exposed
> > by the device.
> > 
> > And generally, I dislike arbitrary limitations. The PCI spec says the
> > capability can be anywhere, implementing a linked list of caps is simple
> > enough to not invent abritrary restrictions.
> yes, but this is migration time, right?

I think so, yes.

> 
> caps can be anywhere, but we don't expect it to change during machine execution
> lifetime.
> 
> Or I am just confused by the name "pci_device_load" ?

Right. So I want to load an image and it has capability X at offset Y.
wmask has to match. I don't want to assume that we never change Y
for the device without breaking old images, so I clear wmask here
and set it up again after looking up capabilities that I loaded.

Maybe this explanation should go into the comment above?

-- 
MST

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities
  2009-06-10 14:55         ` Glauber Costa
  (?)
@ 2009-06-10 15:01         ` Michael S. Tsirkin
  -1 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-10 15:01 UTC (permalink / raw)
  To: Glauber Costa
  Cc: Carsten Otte, kvm, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Paul Brook, Anthony Liguori, Avi Kivity

On Wed, Jun 10, 2009 at 11:55:40AM -0300, Glauber Costa wrote:
> On Wed, Jun 10, 2009 at 12:54:15PM +0300, Michael S. Tsirkin wrote:
> > On Tue, Jun 09, 2009 at 02:11:14PM -0300, Glauber Costa wrote:
> > > On Fri, Jun 05, 2009 at 01:23:15PM +0300, Michael S. Tsirkin wrote:
> > > > Add routines to manage PCI capability list. First user will be MSI-X.
> > > > 
> > > > Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> > > > ---
> > > >  hw/pci.c |   98 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++------
> > > >  hw/pci.h |   18 +++++++++++-
> > > >  2 files changed, 106 insertions(+), 10 deletions(-)
> > > > 
> > > > diff --git a/hw/pci.c b/hw/pci.c
> > > > index 361d741..ed011b5 100644
> > > > --- a/hw/pci.c
> > > > +++ b/hw/pci.c
> > > > @@ -130,12 +130,13 @@ void pci_device_save(PCIDevice *s, QEMUFile *f)
> > > >      int version = s->cap_present ? 3 : 2;
> > > >      int i;
> > > >  
> > > > -    qemu_put_be32(f, version); /* PCI device version */
> > > > +    /* PCI device version and capabilities */
> > > > +    qemu_put_be32(f, version);
> > > > +    if (version >= 3)
> > > > +        qemu_put_be32(f, s->cap_present);
> > > >      qemu_put_buffer(f, s->config, 256);
> > > >      for (i = 0; i < 4; i++)
> > > >          qemu_put_be32(f, s->irq_state[i]);
> > > > -    if (version >= 3)
> > > > -        qemu_put_be32(f, s->cap_present);
> > > >  }
> > > What is it doing here?
> > > You should just do it right in the first patch, instead of doing in
> > > one way there, and fixing here.
> > > 
> > > >  
> > > >  int pci_device_load(PCIDevice *s, QEMUFile *f)
> > > > @@ -146,12 +147,6 @@ int pci_device_load(PCIDevice *s, QEMUFile *f)
> > > >      version_id = qemu_get_be32(f);
> > > >      if (version_id > 3)
> > > >          return -EINVAL;
> > > > -    qemu_get_buffer(f, s->config, 256);
> > > > -    pci_update_mappings(s);
> > > > -
> > > > -    if (version_id >= 2)
> > > > -        for (i = 0; i < 4; i ++)
> > > > -            s->irq_state[i] = qemu_get_be32(f);
> > > >      if (version_id >= 3)
> > > >          s->cap_present = qemu_get_be32(f);
> > > >      else
> > > ditto.
> > > > @@ -160,6 +155,18 @@ int pci_device_load(PCIDevice *s, QEMUFile *f)
> > > >      if (s->cap_present & ~s->cap_supported)
> > > >          return -EINVAL;
> > > >  
> > > > +    qemu_get_buffer(f, s->config, 256);
> > > > +    pci_update_mappings(s);
> > > > +
> > > > +    if (version_id >= 2)
> > > > +        for (i = 0; i < 4; i ++)
> > > > +            s->irq_state[i] = qemu_get_be32(f);
> > > > +    /* Clear wmask and used bits for capabilities.
> > > > +       Must be restored separately, since capabilities can
> > > > +       be placed anywhere in config space. */
> > > > +    memset(s->used, 0, PCI_CONFIG_SPACE_SIZE);
> > > > +    for (i = PCI_CONFIG_HEADER_SIZE; i < PCI_CONFIG_SPACE_SIZE; ++i)
> > > > +        s->wmask[i] = 0xff;
> > > >      return 0;
> > > >  }
> > > Sorry, I don't exactly understand it. Although it can be anywhere, what do we actually
> > > lose by keeping it at the same place in config space?
> > 
> > We lose the ability to let user control the capabilities exposed
> > by the device.
> > 
> > And generally, I dislike arbitrary limitations. The PCI spec says the
> > capability can be anywhere, implementing a linked list of caps is simple
> > enough to not invent abritrary restrictions.
> yes, but this is migration time, right?

I think so, yes.

> 
> caps can be anywhere, but we don't expect it to change during machine execution
> lifetime.
> 
> Or I am just confused by the name "pci_device_load" ?

Right. So I want to load an image and it has capability X at offset Y.
wmask has to match. I don't want to assume that we never change Y
for the device without breaking old images, so I clear wmask here
and set it up again after looking up capabilities that I loaded.

Maybe this explanation should go into the comment above?

-- 
MST

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities
  2009-06-10 15:01           ` Michael S. Tsirkin
@ 2009-06-10 15:24             ` Paul Brook
  -1 siblings, 0 replies; 457+ messages in thread
From: Paul Brook @ 2009-06-10 15:24 UTC (permalink / raw)
  To: qemu-devel
  Cc: Michael S. Tsirkin, Glauber Costa, Carsten Otte, kvm,
	Rusty Russell, virtualization, Blue Swirl, Christian Borntraeger,
	Avi Kivity

> > caps can be anywhere, but we don't expect it to change during machine
> > execution lifetime.
> >
> > Or I am just confused by the name "pci_device_load" ?
>
> Right. So I want to load an image and it has capability X at offset Y.
> wmask has to match. I don't want to assume that we never change Y
> for the device without breaking old images, so I clear wmask here
> and set it up again after looking up capabilities that I loaded.

We should not be loading state into a different device (or a similar device 
with a different set of capabilities).

If you want to provide backwards compatibility then you should do that by 
creating a device that is the same as the original.  As I mentioned in my 
earlier mail, loading a snapshot should never do anything that can not be 
achieved through normal operation.

Paul


^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities
@ 2009-06-10 15:24             ` Paul Brook
  0 siblings, 0 replies; 457+ messages in thread
From: Paul Brook @ 2009-06-10 15:24 UTC (permalink / raw)
  To: qemu-devel
  Cc: Carsten Otte, kvm, Michael S. Tsirkin, Glauber Costa,
	Rusty Russell, virtualization, Blue Swirl, Christian Borntraeger,
	Avi Kivity

> > caps can be anywhere, but we don't expect it to change during machine
> > execution lifetime.
> >
> > Or I am just confused by the name "pci_device_load" ?
>
> Right. So I want to load an image and it has capability X at offset Y.
> wmask has to match. I don't want to assume that we never change Y
> for the device without breaking old images, so I clear wmask here
> and set it up again after looking up capabilities that I loaded.

We should not be loading state into a different device (or a similar device 
with a different set of capabilities).

If you want to provide backwards compatibility then you should do that by 
creating a device that is the same as the original.  As I mentioned in my 
earlier mail, loading a snapshot should never do anything that can not be 
achieved through normal operation.

Paul

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities
  2009-06-10 15:01           ` Michael S. Tsirkin
  (?)
  (?)
@ 2009-06-10 15:24           ` Paul Brook
  -1 siblings, 0 replies; 457+ messages in thread
From: Paul Brook @ 2009-06-10 15:24 UTC (permalink / raw)
  To: qemu-devel
  Cc: Carsten Otte, kvm, Michael S. Tsirkin, Glauber Costa,
	virtualization, Blue Swirl, Christian Borntraeger, Avi Kivity

> > caps can be anywhere, but we don't expect it to change during machine
> > execution lifetime.
> >
> > Or I am just confused by the name "pci_device_load" ?
>
> Right. So I want to load an image and it has capability X at offset Y.
> wmask has to match. I don't want to assume that we never change Y
> for the device without breaking old images, so I clear wmask here
> and set it up again after looking up capabilities that I loaded.

We should not be loading state into a different device (or a similar device 
with a different set of capabilities).

If you want to provide backwards compatibility then you should do that by 
creating a device that is the same as the original.  As I mentioned in my 
earlier mail, loading a snapshot should never do anything that can not be 
achieved through normal operation.

Paul

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities
  2009-06-10 15:24             ` Paul Brook
@ 2009-06-10 15:50               ` Michael S. Tsirkin
  -1 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-10 15:50 UTC (permalink / raw)
  To: Paul Brook
  Cc: qemu-devel, Glauber Costa, Carsten Otte, kvm, Rusty Russell,
	virtualization, Blue Swirl, Christian Borntraeger, Avi Kivity

On Wed, Jun 10, 2009 at 04:24:28PM +0100, Paul Brook wrote:
> > > caps can be anywhere, but we don't expect it to change during machine
> > > execution lifetime.
> > >
> > > Or I am just confused by the name "pci_device_load" ?
> >
> > Right. So I want to load an image and it has capability X at offset Y.
> > wmask has to match. I don't want to assume that we never change Y
> > for the device without breaking old images, so I clear wmask here
> > and set it up again after looking up capabilities that I loaded.
> 
> We should not be loading state into a different device (or a similar device 
> with a different set of capabilities).
> 
> If you want to provide backwards compatibility then you should do that by 
> creating a device that is the same as the original.  As I mentioned in my 
> earlier mail, loading a snapshot should never do anything that can not be 
> achieved through normal operation.
> 
> Paul

Why shouldn't it? You don't load a snapshot while guest is running.

-- 
MST

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities
@ 2009-06-10 15:50               ` Michael S. Tsirkin
  0 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-10 15:50 UTC (permalink / raw)
  To: Paul Brook
  Cc: Carsten Otte, kvm, Glauber Costa, Rusty Russell, qemu-devel,
	virtualization, Blue Swirl, Christian Borntraeger, Avi Kivity

On Wed, Jun 10, 2009 at 04:24:28PM +0100, Paul Brook wrote:
> > > caps can be anywhere, but we don't expect it to change during machine
> > > execution lifetime.
> > >
> > > Or I am just confused by the name "pci_device_load" ?
> >
> > Right. So I want to load an image and it has capability X at offset Y.
> > wmask has to match. I don't want to assume that we never change Y
> > for the device without breaking old images, so I clear wmask here
> > and set it up again after looking up capabilities that I loaded.
> 
> We should not be loading state into a different device (or a similar device 
> with a different set of capabilities).
> 
> If you want to provide backwards compatibility then you should do that by 
> creating a device that is the same as the original.  As I mentioned in my 
> earlier mail, loading a snapshot should never do anything that can not be 
> achieved through normal operation.
> 
> Paul

Why shouldn't it? You don't load a snapshot while guest is running.

-- 
MST

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities
  2009-06-10 15:24             ` Paul Brook
  (?)
@ 2009-06-10 15:50             ` Michael S. Tsirkin
  -1 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-10 15:50 UTC (permalink / raw)
  To: Paul Brook
  Cc: Carsten Otte, kvm, Glauber Costa, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Avi Kivity

On Wed, Jun 10, 2009 at 04:24:28PM +0100, Paul Brook wrote:
> > > caps can be anywhere, but we don't expect it to change during machine
> > > execution lifetime.
> > >
> > > Or I am just confused by the name "pci_device_load" ?
> >
> > Right. So I want to load an image and it has capability X at offset Y.
> > wmask has to match. I don't want to assume that we never change Y
> > for the device without breaking old images, so I clear wmask here
> > and set it up again after looking up capabilities that I loaded.
> 
> We should not be loading state into a different device (or a similar device 
> with a different set of capabilities).
> 
> If you want to provide backwards compatibility then you should do that by 
> creating a device that is the same as the original.  As I mentioned in my 
> earlier mail, loading a snapshot should never do anything that can not be 
> achieved through normal operation.
> 
> Paul

Why shouldn't it? You don't load a snapshot while guest is running.

-- 
MST

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities
  2009-06-10 15:24             ` Paul Brook
@ 2009-06-10 17:43               ` Jamie Lokier
  -1 siblings, 0 replies; 457+ messages in thread
From: Jamie Lokier @ 2009-06-10 17:43 UTC (permalink / raw)
  To: Paul Brook
  Cc: qemu-devel, Carsten Otte, kvm, Michael S. Tsirkin, Glauber Costa,
	Rusty Russell, virtualization, Blue Swirl, Christian Borntraeger,
	Avi Kivity

Paul Brook wrote:
> > > caps can be anywhere, but we don't expect it to change during machine
> > > execution lifetime.
> > >
> > > Or I am just confused by the name "pci_device_load" ?
> >
> > Right. So I want to load an image and it has capability X at offset Y.
> > wmask has to match. I don't want to assume that we never change Y
> > for the device without breaking old images, so I clear wmask here
> > and set it up again after looking up capabilities that I loaded.
> 
> We should not be loading state into a different device (or a similar device 
> with a different set of capabilities).
> 
> If you want to provide backwards compatibility then you should do that by 
> creating a device that is the same as the original.  As I mentioned in my 
> earlier mail, loading a snapshot should never do anything that can not be 
> achieved through normal operation.

If you can create a machine be restoring a snapshot which you can't
create by normally starting QEMU, then you'll soon have guests which
work fine from their snapshots, but which cannot be booted without a
snapshot because there's no way to boot the right machine for the guest.

Ssomeone might even have guests like that for years without noticing,
because they always save and restore guest state using snapshots, then
one day they simply want to boot the guest from it's disk image and
find there's no way to do it with any QEMU which runs on their host
platform.

I think the right long term answer to all this is a way to get QEMU to
dump it's current machine configuration in glorious detail as a file
which can be reloaded as a machine configuration.

-- Jamie

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities
@ 2009-06-10 17:43               ` Jamie Lokier
  0 siblings, 0 replies; 457+ messages in thread
From: Jamie Lokier @ 2009-06-10 17:43 UTC (permalink / raw)
  To: Paul Brook
  Cc: Carsten Otte, kvm, Michael S. Tsirkin, Glauber Costa,
	Rusty Russell, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Avi Kivity

Paul Brook wrote:
> > > caps can be anywhere, but we don't expect it to change during machine
> > > execution lifetime.
> > >
> > > Or I am just confused by the name "pci_device_load" ?
> >
> > Right. So I want to load an image and it has capability X at offset Y.
> > wmask has to match. I don't want to assume that we never change Y
> > for the device without breaking old images, so I clear wmask here
> > and set it up again after looking up capabilities that I loaded.
> 
> We should not be loading state into a different device (or a similar device 
> with a different set of capabilities).
> 
> If you want to provide backwards compatibility then you should do that by 
> creating a device that is the same as the original.  As I mentioned in my 
> earlier mail, loading a snapshot should never do anything that can not be 
> achieved through normal operation.

If you can create a machine be restoring a snapshot which you can't
create by normally starting QEMU, then you'll soon have guests which
work fine from their snapshots, but which cannot be booted without a
snapshot because there's no way to boot the right machine for the guest.

Ssomeone might even have guests like that for years without noticing,
because they always save and restore guest state using snapshots, then
one day they simply want to boot the guest from it's disk image and
find there's no way to do it with any QEMU which runs on their host
platform.

I think the right long term answer to all this is a way to get QEMU to
dump it's current machine configuration in glorious detail as a file
which can be reloaded as a machine configuration.

-- Jamie

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities
  2009-06-10 15:24             ` Paul Brook
                               ` (3 preceding siblings ...)
  (?)
@ 2009-06-10 17:43             ` Jamie Lokier
  -1 siblings, 0 replies; 457+ messages in thread
From: Jamie Lokier @ 2009-06-10 17:43 UTC (permalink / raw)
  To: Paul Brook
  Cc: Carsten Otte, kvm, Michael S. Tsirkin, Glauber Costa, qemu-devel,
	virtualization, Blue Swirl, Christian Borntraeger, Avi Kivity

Paul Brook wrote:
> > > caps can be anywhere, but we don't expect it to change during machine
> > > execution lifetime.
> > >
> > > Or I am just confused by the name "pci_device_load" ?
> >
> > Right. So I want to load an image and it has capability X at offset Y.
> > wmask has to match. I don't want to assume that we never change Y
> > for the device without breaking old images, so I clear wmask here
> > and set it up again after looking up capabilities that I loaded.
> 
> We should not be loading state into a different device (or a similar device 
> with a different set of capabilities).
> 
> If you want to provide backwards compatibility then you should do that by 
> creating a device that is the same as the original.  As I mentioned in my 
> earlier mail, loading a snapshot should never do anything that can not be 
> achieved through normal operation.

If you can create a machine be restoring a snapshot which you can't
create by normally starting QEMU, then you'll soon have guests which
work fine from their snapshots, but which cannot be booted without a
snapshot because there's no way to boot the right machine for the guest.

Ssomeone might even have guests like that for years without noticing,
because they always save and restore guest state using snapshots, then
one day they simply want to boot the guest from it's disk image and
find there's no way to do it with any QEMU which runs on their host
platform.

I think the right long term answer to all this is a way to get QEMU to
dump it's current machine configuration in glorious detail as a file
which can be reloaded as a machine configuration.

-- Jamie

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities
  2009-06-10 17:43               ` Jamie Lokier
@ 2009-06-10 18:22                 ` Michael S. Tsirkin
  -1 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-10 18:22 UTC (permalink / raw)
  To: Jamie Lokier
  Cc: Paul Brook, qemu-devel, Carsten Otte, kvm, Glauber Costa,
	Rusty Russell, virtualization, Blue Swirl, Christian Borntraeger,
	Avi Kivity

On Wed, Jun 10, 2009 at 06:43:02PM +0100, Jamie Lokier wrote:
> Paul Brook wrote:
> > > > caps can be anywhere, but we don't expect it to change during machine
> > > > execution lifetime.
> > > >
> > > > Or I am just confused by the name "pci_device_load" ?
> > >
> > > Right. So I want to load an image and it has capability X at offset Y.
> > > wmask has to match. I don't want to assume that we never change Y
> > > for the device without breaking old images, so I clear wmask here
> > > and set it up again after looking up capabilities that I loaded.
> > 
> > We should not be loading state into a different device (or a similar device 
> > with a different set of capabilities).
> > 
> > If you want to provide backwards compatibility then you should do that by 
> > creating a device that is the same as the original.  As I mentioned in my 
> > earlier mail, loading a snapshot should never do anything that can not be 
> > achieved through normal operation.
> 
> If you can create a machine be restoring a snapshot which you can't
> create by normally starting QEMU, then you'll soon have guests which
> work fine from their snapshots, but which cannot be booted without a
> snapshot because there's no way to boot the right machine for the guest.

Yes. This clearly isn't what I'm building here. You *can* create a guest
without msi-x support by passing an appropriate flag.

> Ssomeone might even have guests like that for years without noticing,
> because they always save and restore guest state using snapshots, then
> one day they simply want to boot the guest from it's disk image and
> find there's no way to do it with any QEMU which runs on their host
> platform.
> 
> I think the right long term answer to all this is a way to get QEMU to
> dump it's current machine configuration in glorious detail as a file
> which can be reloaded as a machine configuration.
> 
> -- Jamie

And then we'll have the same set of problems there.

-- 
MST

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities
@ 2009-06-10 18:22                 ` Michael S. Tsirkin
  0 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-10 18:22 UTC (permalink / raw)
  To: Jamie Lokier
  Cc: Carsten Otte, kvm, Glauber Costa, Rusty Russell, qemu-devel,
	virtualization, Blue Swirl, Christian Borntraeger, Paul Brook,
	Avi Kivity

On Wed, Jun 10, 2009 at 06:43:02PM +0100, Jamie Lokier wrote:
> Paul Brook wrote:
> > > > caps can be anywhere, but we don't expect it to change during machine
> > > > execution lifetime.
> > > >
> > > > Or I am just confused by the name "pci_device_load" ?
> > >
> > > Right. So I want to load an image and it has capability X at offset Y.
> > > wmask has to match. I don't want to assume that we never change Y
> > > for the device without breaking old images, so I clear wmask here
> > > and set it up again after looking up capabilities that I loaded.
> > 
> > We should not be loading state into a different device (or a similar device 
> > with a different set of capabilities).
> > 
> > If you want to provide backwards compatibility then you should do that by 
> > creating a device that is the same as the original.  As I mentioned in my 
> > earlier mail, loading a snapshot should never do anything that can not be 
> > achieved through normal operation.
> 
> If you can create a machine be restoring a snapshot which you can't
> create by normally starting QEMU, then you'll soon have guests which
> work fine from their snapshots, but which cannot be booted without a
> snapshot because there's no way to boot the right machine for the guest.

Yes. This clearly isn't what I'm building here. You *can* create a guest
without msi-x support by passing an appropriate flag.

> Ssomeone might even have guests like that for years without noticing,
> because they always save and restore guest state using snapshots, then
> one day they simply want to boot the guest from it's disk image and
> find there's no way to do it with any QEMU which runs on their host
> platform.
> 
> I think the right long term answer to all this is a way to get QEMU to
> dump it's current machine configuration in glorious detail as a file
> which can be reloaded as a machine configuration.
> 
> -- Jamie

And then we'll have the same set of problems there.

-- 
MST

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities
  2009-06-10 17:43               ` Jamie Lokier
  (?)
  (?)
@ 2009-06-10 18:22               ` Michael S. Tsirkin
  -1 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-10 18:22 UTC (permalink / raw)
  To: Jamie Lokier
  Cc: Carsten Otte, kvm, Glauber Costa, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook, Avi Kivity

On Wed, Jun 10, 2009 at 06:43:02PM +0100, Jamie Lokier wrote:
> Paul Brook wrote:
> > > > caps can be anywhere, but we don't expect it to change during machine
> > > > execution lifetime.
> > > >
> > > > Or I am just confused by the name "pci_device_load" ?
> > >
> > > Right. So I want to load an image and it has capability X at offset Y.
> > > wmask has to match. I don't want to assume that we never change Y
> > > for the device without breaking old images, so I clear wmask here
> > > and set it up again after looking up capabilities that I loaded.
> > 
> > We should not be loading state into a different device (or a similar device 
> > with a different set of capabilities).
> > 
> > If you want to provide backwards compatibility then you should do that by 
> > creating a device that is the same as the original.  As I mentioned in my 
> > earlier mail, loading a snapshot should never do anything that can not be 
> > achieved through normal operation.
> 
> If you can create a machine be restoring a snapshot which you can't
> create by normally starting QEMU, then you'll soon have guests which
> work fine from their snapshots, but which cannot be booted without a
> snapshot because there's no way to boot the right machine for the guest.

Yes. This clearly isn't what I'm building here. You *can* create a guest
without msi-x support by passing an appropriate flag.

> Ssomeone might even have guests like that for years without noticing,
> because they always save and restore guest state using snapshots, then
> one day they simply want to boot the guest from it's disk image and
> find there's no way to do it with any QEMU which runs on their host
> platform.
> 
> I think the right long term answer to all this is a way to get QEMU to
> dump it's current machine configuration in glorious detail as a file
> which can be reloaded as a machine configuration.
> 
> -- Jamie

And then we'll have the same set of problems there.

-- 
MST

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities
  2009-06-10 18:22                 ` Michael S. Tsirkin
@ 2009-06-10 19:27                   ` Jamie Lokier
  -1 siblings, 0 replies; 457+ messages in thread
From: Jamie Lokier @ 2009-06-10 19:27 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Paul Brook, qemu-devel, Carsten Otte, kvm, Glauber Costa,
	Rusty Russell, virtualization, Blue Swirl, Christian Borntraeger,
	Avi Kivity

Michael S. Tsirkin wrote:
> > I think the right long term answer to all this is a way to get QEMU to
> > dump it's current machine configuration in glorious detail as a file
> > which can be reloaded as a machine configuration.
> 
> And then we'll have the same set of problems there.

We will, and the solution will be the same: options to create devices
as they were in older versions of QEMU.  It only needs to cover device
features which matter to guests, not every bug fix.

However with a machine configuration which is generated by QEMU,
there's less worry about proliferation of obscure options, compared
with the command line.  You don't necessarily have to document every
backward-compatibility option in any detail, you just have to make
sure it's written and read properly, which is much the same thing as
the snapshot code does.

-- Jamie

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities
@ 2009-06-10 19:27                   ` Jamie Lokier
  0 siblings, 0 replies; 457+ messages in thread
From: Jamie Lokier @ 2009-06-10 19:27 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Carsten Otte, kvm, Glauber Costa, Rusty Russell, qemu-devel,
	virtualization, Blue Swirl, Christian Borntraeger, Paul Brook,
	Avi Kivity

Michael S. Tsirkin wrote:
> > I think the right long term answer to all this is a way to get QEMU to
> > dump it's current machine configuration in glorious detail as a file
> > which can be reloaded as a machine configuration.
> 
> And then we'll have the same set of problems there.

We will, and the solution will be the same: options to create devices
as they were in older versions of QEMU.  It only needs to cover device
features which matter to guests, not every bug fix.

However with a machine configuration which is generated by QEMU,
there's less worry about proliferation of obscure options, compared
with the command line.  You don't necessarily have to document every
backward-compatibility option in any detail, you just have to make
sure it's written and read properly, which is much the same thing as
the snapshot code does.

-- Jamie

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities
  2009-06-10 18:22                 ` Michael S. Tsirkin
  (?)
  (?)
@ 2009-06-10 19:27                 ` Jamie Lokier
  -1 siblings, 0 replies; 457+ messages in thread
From: Jamie Lokier @ 2009-06-10 19:27 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Carsten Otte, kvm, Glauber Costa, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook, Avi Kivity

Michael S. Tsirkin wrote:
> > I think the right long term answer to all this is a way to get QEMU to
> > dump it's current machine configuration in glorious detail as a file
> > which can be reloaded as a machine configuration.
> 
> And then we'll have the same set of problems there.

We will, and the solution will be the same: options to create devices
as they were in older versions of QEMU.  It only needs to cover device
features which matter to guests, not every bug fix.

However with a machine configuration which is generated by QEMU,
there's less worry about proliferation of obscure options, compared
with the command line.  You don't necessarily have to document every
backward-compatibility option in any detail, you just have to make
sure it's written and read properly, which is much the same thing as
the snapshot code does.

-- Jamie

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-10 19:27                   ` Jamie Lokier
@ 2009-06-12  8:43                     ` Mark McLoughlin
  -1 siblings, 0 replies; 457+ messages in thread
From: Mark McLoughlin @ 2009-06-12  8:43 UTC (permalink / raw)
  To: Jamie Lokier
  Cc: Michael S. Tsirkin, Carsten Otte, kvm, Glauber Costa,
	Rusty Russell, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Paul Brook, Avi Kivity

On Wed, 2009-06-10 at 20:27 +0100, Jamie Lokier wrote:
> Michael S. Tsirkin wrote:
> > > I think the right long term answer to all this is a way to get QEMU to
> > > dump it's current machine configuration in glorious detail as a file
> > > which can be reloaded as a machine configuration.
> > 
> > And then we'll have the same set of problems there.
> 
> We will, and the solution will be the same: options to create devices
> as they were in older versions of QEMU.  It only needs to cover device
> features which matter to guests, not every bug fix.
> 
> However with a machine configuration which is generated by QEMU,
> there's less worry about proliferation of obscure options, compared
> with the command line.  You don't necessarily have to document every
> backward-compatibility option in any detail, you just have to make
> sure it's written and read properly, which is much the same thing as
> the snapshot code does.

This is a sensible plan, but I don't think we should mix these compat
options in with the VM manager supplied configuration.

There are two problems with that approach.

= Problem 1 - VM manager needs to parse qemu config =

Your proposal implies:

  - VM manager supplies a basic configuration to qemu

  - It then immediately asks qemu for a dump of the machine 
    configuration in all its glorious detail and retains that
    config

  - If the VM manager wishes to add a new device it needs to parse the 
    qemu config and add it, rather than just generate an entirely new 
    config

= Problem 2 - We can't predict the future =

If a VM manager supplies a configuration which is missing any given
option, qemu cannot tell the difference between:

  - This is a basic config, the VM manager wants whatever the default 
    of the current qemu version

  - This is a complete config dumped using an old version of qemu, the 
    VM manager wants the old default

= Solution - Separate configuration from compat hints =

As I suggested before:

  - Allow the VM manager to dump compat hints; this would be an opaque 
    file format, more like the savevm format than a config file

  - Use defaults where compat hints are not available; e.g. if the VM 
    manager specifies a device config, but no compat hints are 
    supplied for it, then just use default values

  - Make the config override compat hints; e.g. if there are compat 
    hints specified for a device not included in the machine config, 
    just ignore those hints

Cheers,
Mark.


^ permalink raw reply	[flat|nested] 457+ messages in thread

* Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
@ 2009-06-12  8:43                     ` Mark McLoughlin
  0 siblings, 0 replies; 457+ messages in thread
From: Mark McLoughlin @ 2009-06-12  8:43 UTC (permalink / raw)
  To: Jamie Lokier
  Cc: Carsten Otte, kvm, Michael S. Tsirkin, Glauber Costa,
	Rusty Russell, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Paul Brook, Avi Kivity

On Wed, 2009-06-10 at 20:27 +0100, Jamie Lokier wrote:
> Michael S. Tsirkin wrote:
> > > I think the right long term answer to all this is a way to get QEMU to
> > > dump it's current machine configuration in glorious detail as a file
> > > which can be reloaded as a machine configuration.
> > 
> > And then we'll have the same set of problems there.
> 
> We will, and the solution will be the same: options to create devices
> as they were in older versions of QEMU.  It only needs to cover device
> features which matter to guests, not every bug fix.
> 
> However with a machine configuration which is generated by QEMU,
> there's less worry about proliferation of obscure options, compared
> with the command line.  You don't necessarily have to document every
> backward-compatibility option in any detail, you just have to make
> sure it's written and read properly, which is much the same thing as
> the snapshot code does.

This is a sensible plan, but I don't think we should mix these compat
options in with the VM manager supplied configuration.

There are two problems with that approach.

= Problem 1 - VM manager needs to parse qemu config =

Your proposal implies:

  - VM manager supplies a basic configuration to qemu

  - It then immediately asks qemu for a dump of the machine 
    configuration in all its glorious detail and retains that
    config

  - If the VM manager wishes to add a new device it needs to parse the 
    qemu config and add it, rather than just generate an entirely new 
    config

= Problem 2 - We can't predict the future =

If a VM manager supplies a configuration which is missing any given
option, qemu cannot tell the difference between:

  - This is a basic config, the VM manager wants whatever the default 
    of the current qemu version

  - This is a complete config dumped using an old version of qemu, the 
    VM manager wants the old default

= Solution - Separate configuration from compat hints =

As I suggested before:

  - Allow the VM manager to dump compat hints; this would be an opaque 
    file format, more like the savevm format than a config file

  - Use defaults where compat hints are not available; e.g. if the VM 
    manager specifies a device config, but no compat hints are 
    supplied for it, then just use default values

  - Make the config override compat hints; e.g. if there are compat 
    hints specified for a device not included in the machine config, 
    just ignore those hints

Cheers,
Mark.

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-10 19:27                   ` Jamie Lokier
  (?)
@ 2009-06-12  8:43                   ` Mark McLoughlin
  -1 siblings, 0 replies; 457+ messages in thread
From: Mark McLoughlin @ 2009-06-12  8:43 UTC (permalink / raw)
  To: Jamie Lokier
  Cc: Carsten Otte, kvm, Michael S. Tsirkin, Glauber Costa, qemu-devel,
	virtualization, Blue Swirl, Christian Borntraeger, Paul Brook,
	Avi Kivity

On Wed, 2009-06-10 at 20:27 +0100, Jamie Lokier wrote:
> Michael S. Tsirkin wrote:
> > > I think the right long term answer to all this is a way to get QEMU to
> > > dump it's current machine configuration in glorious detail as a file
> > > which can be reloaded as a machine configuration.
> > 
> > And then we'll have the same set of problems there.
> 
> We will, and the solution will be the same: options to create devices
> as they were in older versions of QEMU.  It only needs to cover device
> features which matter to guests, not every bug fix.
> 
> However with a machine configuration which is generated by QEMU,
> there's less worry about proliferation of obscure options, compared
> with the command line.  You don't necessarily have to document every
> backward-compatibility option in any detail, you just have to make
> sure it's written and read properly, which is much the same thing as
> the snapshot code does.

This is a sensible plan, but I don't think we should mix these compat
options in with the VM manager supplied configuration.

There are two problems with that approach.

= Problem 1 - VM manager needs to parse qemu config =

Your proposal implies:

  - VM manager supplies a basic configuration to qemu

  - It then immediately asks qemu for a dump of the machine 
    configuration in all its glorious detail and retains that
    config

  - If the VM manager wishes to add a new device it needs to parse the 
    qemu config and add it, rather than just generate an entirely new 
    config

= Problem 2 - We can't predict the future =

If a VM manager supplies a configuration which is missing any given
option, qemu cannot tell the difference between:

  - This is a basic config, the VM manager wants whatever the default 
    of the current qemu version

  - This is a complete config dumped using an old version of qemu, the 
    VM manager wants the old default

= Solution - Separate configuration from compat hints =

As I suggested before:

  - Allow the VM manager to dump compat hints; this would be an opaque 
    file format, more like the savevm format than a config file

  - Use defaults where compat hints are not available; e.g. if the VM 
    manager specifies a device config, but no compat hints are 
    supplied for it, then just use default values

  - Make the config override compat hints; e.g. if there are compat 
    hints specified for a device not included in the machine config, 
    just ignore those hints

Cheers,
Mark.

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-12  8:43                     ` Mark McLoughlin
@ 2009-06-12 13:59                       ` Michael S. Tsirkin
  -1 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-12 13:59 UTC (permalink / raw)
  To: Mark McLoughlin
  Cc: Jamie Lokier, Carsten Otte, kvm, Glauber Costa, Rusty Russell,
	qemu-devel, virtualization, Blue Swirl, Christian Borntraeger,
	Paul Brook, Avi Kivity

On Fri, Jun 12, 2009 at 09:43:29AM +0100, Mark McLoughlin wrote:
> = Solution - Separate configuration from compat hints =
> 
> As I suggested before:
> 
>   - Allow the VM manager to dump compat hints; this would be an opaque 
>     file format, more like the savevm format than a config file

Why make it "like the savevm" format then?
If they are opaque anyway, compat hints could be part of savevm format.

>   - Use defaults where compat hints are not available; e.g. if the VM 
>     manager specifies a device config, but no compat hints are 
>     supplied for it, then just use default values
> 
>   - Make the config override compat hints; e.g. if there are compat 
>     hints specified for a device not included in the machine config, 
>     just ignore those hints
> 
> Cheers,
> Mark.

If compat hints are opaque and only editable by qemu, we can get into a
situation where one can't create a specific setup with a new qemu.

-- 
MST

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
@ 2009-06-12 13:59                       ` Michael S. Tsirkin
  0 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-12 13:59 UTC (permalink / raw)
  To: Mark McLoughlin
  Cc: Carsten Otte, kvm, Glauber Costa, Rusty Russell, qemu-devel,
	virtualization, Blue Swirl, Christian Borntraeger, Paul Brook,
	Avi Kivity

On Fri, Jun 12, 2009 at 09:43:29AM +0100, Mark McLoughlin wrote:
> = Solution - Separate configuration from compat hints =
> 
> As I suggested before:
> 
>   - Allow the VM manager to dump compat hints; this would be an opaque 
>     file format, more like the savevm format than a config file

Why make it "like the savevm" format then?
If they are opaque anyway, compat hints could be part of savevm format.

>   - Use defaults where compat hints are not available; e.g. if the VM 
>     manager specifies a device config, but no compat hints are 
>     supplied for it, then just use default values
> 
>   - Make the config override compat hints; e.g. if there are compat 
>     hints specified for a device not included in the machine config, 
>     just ignore those hints
> 
> Cheers,
> Mark.

If compat hints are opaque and only editable by qemu, we can get into a
situation where one can't create a specific setup with a new qemu.

-- 
MST

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-12  8:43                     ` Mark McLoughlin
  (?)
  (?)
@ 2009-06-12 13:59                     ` Michael S. Tsirkin
  -1 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-12 13:59 UTC (permalink / raw)
  To: Mark McLoughlin
  Cc: Carsten Otte, kvm, Glauber Costa, Jamie Lokier, qemu-devel,
	virtualization, Blue Swirl, Christian Borntraeger, Paul Brook,
	Avi Kivity

On Fri, Jun 12, 2009 at 09:43:29AM +0100, Mark McLoughlin wrote:
> = Solution - Separate configuration from compat hints =
> 
> As I suggested before:
> 
>   - Allow the VM manager to dump compat hints; this would be an opaque 
>     file format, more like the savevm format than a config file

Why make it "like the savevm" format then?
If they are opaque anyway, compat hints could be part of savevm format.

>   - Use defaults where compat hints are not available; e.g. if the VM 
>     manager specifies a device config, but no compat hints are 
>     supplied for it, then just use default values
> 
>   - Make the config override compat hints; e.g. if there are compat 
>     hints specified for a device not included in the machine config, 
>     just ignore those hints
> 
> Cheers,
> Mark.

If compat hints are opaque and only editable by qemu, we can get into a
situation where one can't create a specific setup with a new qemu.

-- 
MST

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-12 13:59                       ` Michael S. Tsirkin
@ 2009-06-12 14:48                         ` Mark McLoughlin
  -1 siblings, 0 replies; 457+ messages in thread
From: Mark McLoughlin @ 2009-06-12 14:48 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Jamie Lokier, Carsten Otte, kvm, Glauber Costa, Rusty Russell,
	qemu-devel, virtualization, Blue Swirl, Christian Borntraeger,
	Paul Brook, Avi Kivity

On Fri, 2009-06-12 at 16:59 +0300, Michael S. Tsirkin wrote:
> On Fri, Jun 12, 2009 at 09:43:29AM +0100, Mark McLoughlin wrote:
> > = Solution - Separate configuration from compat hints =
> > 
> > As I suggested before:
> > 
> >   - Allow the VM manager to dump compat hints; this would be an opaque 
> >     file format, more like the savevm format than a config file
> 
> Why make it "like the savevm" format then?
> If they are opaque anyway, compat hints could be part of savevm format.

So a "savevm --only-compat-hints" command? It might make sense, since we
would want the compat hints with savevm too.

> >   - Use defaults where compat hints are not available; e.g. if the VM 
> >     manager specifies a device config, but no compat hints are 
> >     supplied for it, then just use default values
> > 
> >   - Make the config override compat hints; e.g. if there are compat 
> >     hints specified for a device not included in the machine config, 
> >     just ignore those hints
> > 
> > Cheers,
> > Mark.
> 
> If compat hints are opaque and only editable by qemu, we can get into a
> situation where one can't create a specific setup with a new qemu.

An example?

Cheers,
Mark.


^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
@ 2009-06-12 14:48                         ` Mark McLoughlin
  0 siblings, 0 replies; 457+ messages in thread
From: Mark McLoughlin @ 2009-06-12 14:48 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Carsten Otte, kvm, Glauber Costa, Rusty Russell, qemu-devel,
	virtualization, Blue Swirl, Christian Borntraeger, Paul Brook,
	Avi Kivity

On Fri, 2009-06-12 at 16:59 +0300, Michael S. Tsirkin wrote:
> On Fri, Jun 12, 2009 at 09:43:29AM +0100, Mark McLoughlin wrote:
> > = Solution - Separate configuration from compat hints =
> > 
> > As I suggested before:
> > 
> >   - Allow the VM manager to dump compat hints; this would be an opaque 
> >     file format, more like the savevm format than a config file
> 
> Why make it "like the savevm" format then?
> If they are opaque anyway, compat hints could be part of savevm format.

So a "savevm --only-compat-hints" command? It might make sense, since we
would want the compat hints with savevm too.

> >   - Use defaults where compat hints are not available; e.g. if the VM 
> >     manager specifies a device config, but no compat hints are 
> >     supplied for it, then just use default values
> > 
> >   - Make the config override compat hints; e.g. if there are compat 
> >     hints specified for a device not included in the machine config, 
> >     just ignore those hints
> > 
> > Cheers,
> > Mark.
> 
> If compat hints are opaque and only editable by qemu, we can get into a
> situation where one can't create a specific setup with a new qemu.

An example?

Cheers,
Mark.

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-12 13:59                       ` Michael S. Tsirkin
  (?)
@ 2009-06-12 14:48                       ` Mark McLoughlin
  -1 siblings, 0 replies; 457+ messages in thread
From: Mark McLoughlin @ 2009-06-12 14:48 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Carsten Otte, kvm, Glauber Costa, Jamie Lokier, qemu-devel,
	virtualization, Blue Swirl, Christian Borntraeger, Paul Brook,
	Avi Kivity

On Fri, 2009-06-12 at 16:59 +0300, Michael S. Tsirkin wrote:
> On Fri, Jun 12, 2009 at 09:43:29AM +0100, Mark McLoughlin wrote:
> > = Solution - Separate configuration from compat hints =
> > 
> > As I suggested before:
> > 
> >   - Allow the VM manager to dump compat hints; this would be an opaque 
> >     file format, more like the savevm format than a config file
> 
> Why make it "like the savevm" format then?
> If they are opaque anyway, compat hints could be part of savevm format.

So a "savevm --only-compat-hints" command? It might make sense, since we
would want the compat hints with savevm too.

> >   - Use defaults where compat hints are not available; e.g. if the VM 
> >     manager specifies a device config, but no compat hints are 
> >     supplied for it, then just use default values
> > 
> >   - Make the config override compat hints; e.g. if there are compat 
> >     hints specified for a device not included in the machine config, 
> >     just ignore those hints
> > 
> > Cheers,
> > Mark.
> 
> If compat hints are opaque and only editable by qemu, we can get into a
> situation where one can't create a specific setup with a new qemu.

An example?

Cheers,
Mark.

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-12  8:43                     ` Mark McLoughlin
@ 2009-06-12 14:51                       ` Anthony Liguori
  -1 siblings, 0 replies; 457+ messages in thread
From: Anthony Liguori @ 2009-06-12 14:51 UTC (permalink / raw)
  To: Mark McLoughlin
  Cc: Jamie Lokier, Michael S. Tsirkin, Carsten Otte, kvm,
	Glauber Costa, Rusty Russell, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook, Avi Kivity

Mark McLoughlin wrote:
> On Wed, 2009-06-10 at 20:27 +0100, Jamie Lokier wrote:
>   
>> Michael S. Tsirkin wrote:
>>     
>>>> I think the right long term answer to all this is a way to get QEMU to
>>>> dump it's current machine configuration in glorious detail as a file
>>>> which can be reloaded as a machine configuration.
>>>>         
>>> And then we'll have the same set of problems there.
>>>       
>> We will, and the solution will be the same: options to create devices
>> as they were in older versions of QEMU.  It only needs to cover device
>> features which matter to guests, not every bug fix.
>>
>> However with a machine configuration which is generated by QEMU,
>> there's less worry about proliferation of obscure options, compared
>> with the command line.  You don't necessarily have to document every
>> backward-compatibility option in any detail, you just have to make
>> sure it's written and read properly, which is much the same thing as
>> the snapshot code does.
>>     
>
> This is a sensible plan, but I don't think we should mix these compat
> options in with the VM manager supplied configuration.
>
> There are two problems with that approach.
>
> = Problem 1 - VM manager needs to parse qemu config =
>
> Your proposal implies:
>
>   - VM manager supplies a basic configuration to qemu
>
>   - It then immediately asks qemu for a dump of the machine 
>     configuration in all its glorious detail and retains that
>     config
>
>   - If the VM manager wishes to add a new device it needs to parse the 
>     qemu config and add it, rather than just generate an entirely new 
>     config
>   

What's the problem with parsing the device config and modifying it?  Is 
it just complexity?

If we provided a mechanism to simplify manipulating a device config, 
would that eliminate the concern here?

Regards,

Anthony Liguori



^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
@ 2009-06-12 14:51                       ` Anthony Liguori
  0 siblings, 0 replies; 457+ messages in thread
From: Anthony Liguori @ 2009-06-12 14:51 UTC (permalink / raw)
  To: Mark McLoughlin
  Cc: Carsten Otte, kvm, Michael S. Tsirkin, Glauber Costa,
	Rusty Russell, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Paul Brook, Avi Kivity

Mark McLoughlin wrote:
> On Wed, 2009-06-10 at 20:27 +0100, Jamie Lokier wrote:
>   
>> Michael S. Tsirkin wrote:
>>     
>>>> I think the right long term answer to all this is a way to get QEMU to
>>>> dump it's current machine configuration in glorious detail as a file
>>>> which can be reloaded as a machine configuration.
>>>>         
>>> And then we'll have the same set of problems there.
>>>       
>> We will, and the solution will be the same: options to create devices
>> as they were in older versions of QEMU.  It only needs to cover device
>> features which matter to guests, not every bug fix.
>>
>> However with a machine configuration which is generated by QEMU,
>> there's less worry about proliferation of obscure options, compared
>> with the command line.  You don't necessarily have to document every
>> backward-compatibility option in any detail, you just have to make
>> sure it's written and read properly, which is much the same thing as
>> the snapshot code does.
>>     
>
> This is a sensible plan, but I don't think we should mix these compat
> options in with the VM manager supplied configuration.
>
> There are two problems with that approach.
>
> = Problem 1 - VM manager needs to parse qemu config =
>
> Your proposal implies:
>
>   - VM manager supplies a basic configuration to qemu
>
>   - It then immediately asks qemu for a dump of the machine 
>     configuration in all its glorious detail and retains that
>     config
>
>   - If the VM manager wishes to add a new device it needs to parse the 
>     qemu config and add it, rather than just generate an entirely new 
>     config
>   

What's the problem with parsing the device config and modifying it?  Is 
it just complexity?

If we provided a mechanism to simplify manipulating a device config, 
would that eliminate the concern here?

Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-12  8:43                     ` Mark McLoughlin
                                       ` (2 preceding siblings ...)
  (?)
@ 2009-06-12 14:51                     ` Anthony Liguori
  -1 siblings, 0 replies; 457+ messages in thread
From: Anthony Liguori @ 2009-06-12 14:51 UTC (permalink / raw)
  To: Mark McLoughlin
  Cc: Carsten Otte, kvm, Michael S. Tsirkin, Glauber Costa,
	Jamie Lokier, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Paul Brook, Avi Kivity

Mark McLoughlin wrote:
> On Wed, 2009-06-10 at 20:27 +0100, Jamie Lokier wrote:
>   
>> Michael S. Tsirkin wrote:
>>     
>>>> I think the right long term answer to all this is a way to get QEMU to
>>>> dump it's current machine configuration in glorious detail as a file
>>>> which can be reloaded as a machine configuration.
>>>>         
>>> And then we'll have the same set of problems there.
>>>       
>> We will, and the solution will be the same: options to create devices
>> as they were in older versions of QEMU.  It only needs to cover device
>> features which matter to guests, not every bug fix.
>>
>> However with a machine configuration which is generated by QEMU,
>> there's less worry about proliferation of obscure options, compared
>> with the command line.  You don't necessarily have to document every
>> backward-compatibility option in any detail, you just have to make
>> sure it's written and read properly, which is much the same thing as
>> the snapshot code does.
>>     
>
> This is a sensible plan, but I don't think we should mix these compat
> options in with the VM manager supplied configuration.
>
> There are two problems with that approach.
>
> = Problem 1 - VM manager needs to parse qemu config =
>
> Your proposal implies:
>
>   - VM manager supplies a basic configuration to qemu
>
>   - It then immediately asks qemu for a dump of the machine 
>     configuration in all its glorious detail and retains that
>     config
>
>   - If the VM manager wishes to add a new device it needs to parse the 
>     qemu config and add it, rather than just generate an entirely new 
>     config
>   

What's the problem with parsing the device config and modifying it?  Is 
it just complexity?

If we provided a mechanism to simplify manipulating a device config, 
would that eliminate the concern here?

Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-12  8:43                     ` Mark McLoughlin
@ 2009-06-12 14:55                       ` Anthony Liguori
  -1 siblings, 0 replies; 457+ messages in thread
From: Anthony Liguori @ 2009-06-12 14:55 UTC (permalink / raw)
  To: Mark McLoughlin
  Cc: Jamie Lokier, Michael S. Tsirkin, Carsten Otte, kvm,
	Glauber Costa, Rusty Russell, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook, Avi Kivity

Mark McLoughlin wrote:
> On Wed, 2009-06-10 at 20:27 +0100, Jamie Lokier wrote:
>   
> = Solution - Separate configuration from compat hints =
>
> As I suggested before:
>
>   - Allow the VM manager to dump compat hints; this would be an opaque 
>     file format, more like the savevm format than a config file
>   

How is compat hints different from a device tree?

In my mind, that's what compat hints is.  I don't see another sane way 
to implement it.

Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
@ 2009-06-12 14:55                       ` Anthony Liguori
  0 siblings, 0 replies; 457+ messages in thread
From: Anthony Liguori @ 2009-06-12 14:55 UTC (permalink / raw)
  To: Mark McLoughlin
  Cc: Carsten Otte, kvm, Michael S. Tsirkin, Glauber Costa,
	Rusty Russell, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Paul Brook, Avi Kivity

Mark McLoughlin wrote:
> On Wed, 2009-06-10 at 20:27 +0100, Jamie Lokier wrote:
>   
> = Solution - Separate configuration from compat hints =
>
> As I suggested before:
>
>   - Allow the VM manager to dump compat hints; this would be an opaque 
>     file format, more like the savevm format than a config file
>   

How is compat hints different from a device tree?

In my mind, that's what compat hints is.  I don't see another sane way 
to implement it.

Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-12  8:43                     ` Mark McLoughlin
                                       ` (5 preceding siblings ...)
  (?)
@ 2009-06-12 14:55                     ` Anthony Liguori
  -1 siblings, 0 replies; 457+ messages in thread
From: Anthony Liguori @ 2009-06-12 14:55 UTC (permalink / raw)
  To: Mark McLoughlin
  Cc: Carsten Otte, kvm, Michael S. Tsirkin, Glauber Costa,
	Jamie Lokier, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Paul Brook, Avi Kivity

Mark McLoughlin wrote:
> On Wed, 2009-06-10 at 20:27 +0100, Jamie Lokier wrote:
>   
> = Solution - Separate configuration from compat hints =
>
> As I suggested before:
>
>   - Allow the VM manager to dump compat hints; this would be an opaque 
>     file format, more like the savevm format than a config file
>   

How is compat hints different from a device tree?

In my mind, that's what compat hints is.  I don't see another sane way 
to implement it.

Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-12 14:51                       ` Anthony Liguori
@ 2009-06-12 15:41                         ` Mark McLoughlin
  -1 siblings, 0 replies; 457+ messages in thread
From: Mark McLoughlin @ 2009-06-12 15:41 UTC (permalink / raw)
  To: Anthony Liguori
  Cc: Jamie Lokier, Michael S. Tsirkin, Carsten Otte, kvm,
	Glauber Costa, Rusty Russell, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook, Avi Kivity

On Fri, 2009-06-12 at 09:51 -0500, Anthony Liguori wrote:
> Mark McLoughlin wrote:
> > On Wed, 2009-06-10 at 20:27 +0100, Jamie Lokier wrote:
> >   
> >> Michael S. Tsirkin wrote:
> >>     
> >>>> I think the right long term answer to all this is a way to get QEMU to
> >>>> dump it's current machine configuration in glorious detail as a file
> >>>> which can be reloaded as a machine configuration.
> >>>>         
> >>> And then we'll have the same set of problems there.
> >>>       
> >> We will, and the solution will be the same: options to create devices
> >> as they were in older versions of QEMU.  It only needs to cover device
> >> features which matter to guests, not every bug fix.
> >>
> >> However with a machine configuration which is generated by QEMU,
> >> there's less worry about proliferation of obscure options, compared
> >> with the command line.  You don't necessarily have to document every
> >> backward-compatibility option in any detail, you just have to make
> >> sure it's written and read properly, which is much the same thing as
> >> the snapshot code does.
> >>     
> >
> > This is a sensible plan, but I don't think we should mix these compat
> > options in with the VM manager supplied configuration.
> >
> > There are two problems with that approach.
> >
> > = Problem 1 - VM manager needs to parse qemu config =
> >
> > Your proposal implies:
> >
> >   - VM manager supplies a basic configuration to qemu
> >
> >   - It then immediately asks qemu for a dump of the machine 
> >     configuration in all its glorious detail and retains that
> >     config
> >
> >   - If the VM manager wishes to add a new device it needs to parse the 
> >     qemu config and add it, rather than just generate an entirely new 
> >     config
> >   
> 
> What's the problem with parsing the device config and modifying it?  Is 
> it just complexity?

Yes, complexity is the issue.

> If we provided a mechanism to simplify manipulating a device config, 
> would that eliminate the concern here?

In libvirt's case, a lot of the complexity would come from needing to
figure out what to change.

i.e. libvirt produces a qemu configuration (currently a command line)
from a guest XML's configuration; with this idea, libvirt would probably
compare the old XML config to the new XML config, and then apply those
differences to the qemu configuration.

Cheers,
Mark.


^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
@ 2009-06-12 15:41                         ` Mark McLoughlin
  0 siblings, 0 replies; 457+ messages in thread
From: Mark McLoughlin @ 2009-06-12 15:41 UTC (permalink / raw)
  To: Anthony Liguori
  Cc: Carsten Otte, kvm, Michael S. Tsirkin, Glauber Costa,
	Rusty Russell, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Paul Brook, Avi Kivity

On Fri, 2009-06-12 at 09:51 -0500, Anthony Liguori wrote:
> Mark McLoughlin wrote:
> > On Wed, 2009-06-10 at 20:27 +0100, Jamie Lokier wrote:
> >   
> >> Michael S. Tsirkin wrote:
> >>     
> >>>> I think the right long term answer to all this is a way to get QEMU to
> >>>> dump it's current machine configuration in glorious detail as a file
> >>>> which can be reloaded as a machine configuration.
> >>>>         
> >>> And then we'll have the same set of problems there.
> >>>       
> >> We will, and the solution will be the same: options to create devices
> >> as they were in older versions of QEMU.  It only needs to cover device
> >> features which matter to guests, not every bug fix.
> >>
> >> However with a machine configuration which is generated by QEMU,
> >> there's less worry about proliferation of obscure options, compared
> >> with the command line.  You don't necessarily have to document every
> >> backward-compatibility option in any detail, you just have to make
> >> sure it's written and read properly, which is much the same thing as
> >> the snapshot code does.
> >>     
> >
> > This is a sensible plan, but I don't think we should mix these compat
> > options in with the VM manager supplied configuration.
> >
> > There are two problems with that approach.
> >
> > = Problem 1 - VM manager needs to parse qemu config =
> >
> > Your proposal implies:
> >
> >   - VM manager supplies a basic configuration to qemu
> >
> >   - It then immediately asks qemu for a dump of the machine 
> >     configuration in all its glorious detail and retains that
> >     config
> >
> >   - If the VM manager wishes to add a new device it needs to parse the 
> >     qemu config and add it, rather than just generate an entirely new 
> >     config
> >   
> 
> What's the problem with parsing the device config and modifying it?  Is 
> it just complexity?

Yes, complexity is the issue.

> If we provided a mechanism to simplify manipulating a device config, 
> would that eliminate the concern here?

In libvirt's case, a lot of the complexity would come from needing to
figure out what to change.

i.e. libvirt produces a qemu configuration (currently a command line)
from a guest XML's configuration; with this idea, libvirt would probably
compare the old XML config to the new XML config, and then apply those
differences to the qemu configuration.

Cheers,
Mark.

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-12 14:51                       ` Anthony Liguori
  (?)
  (?)
@ 2009-06-12 15:41                       ` Mark McLoughlin
  -1 siblings, 0 replies; 457+ messages in thread
From: Mark McLoughlin @ 2009-06-12 15:41 UTC (permalink / raw)
  To: Anthony Liguori
  Cc: Carsten Otte, kvm, Michael S. Tsirkin, Glauber Costa,
	Jamie Lokier, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Paul Brook, Avi Kivity

On Fri, 2009-06-12 at 09:51 -0500, Anthony Liguori wrote:
> Mark McLoughlin wrote:
> > On Wed, 2009-06-10 at 20:27 +0100, Jamie Lokier wrote:
> >   
> >> Michael S. Tsirkin wrote:
> >>     
> >>>> I think the right long term answer to all this is a way to get QEMU to
> >>>> dump it's current machine configuration in glorious detail as a file
> >>>> which can be reloaded as a machine configuration.
> >>>>         
> >>> And then we'll have the same set of problems there.
> >>>       
> >> We will, and the solution will be the same: options to create devices
> >> as they were in older versions of QEMU.  It only needs to cover device
> >> features which matter to guests, not every bug fix.
> >>
> >> However with a machine configuration which is generated by QEMU,
> >> there's less worry about proliferation of obscure options, compared
> >> with the command line.  You don't necessarily have to document every
> >> backward-compatibility option in any detail, you just have to make
> >> sure it's written and read properly, which is much the same thing as
> >> the snapshot code does.
> >>     
> >
> > This is a sensible plan, but I don't think we should mix these compat
> > options in with the VM manager supplied configuration.
> >
> > There are two problems with that approach.
> >
> > = Problem 1 - VM manager needs to parse qemu config =
> >
> > Your proposal implies:
> >
> >   - VM manager supplies a basic configuration to qemu
> >
> >   - It then immediately asks qemu for a dump of the machine 
> >     configuration in all its glorious detail and retains that
> >     config
> >
> >   - If the VM manager wishes to add a new device it needs to parse the 
> >     qemu config and add it, rather than just generate an entirely new 
> >     config
> >   
> 
> What's the problem with parsing the device config and modifying it?  Is 
> it just complexity?

Yes, complexity is the issue.

> If we provided a mechanism to simplify manipulating a device config, 
> would that eliminate the concern here?

In libvirt's case, a lot of the complexity would come from needing to
figure out what to change.

i.e. libvirt produces a qemu configuration (currently a command line)
from a guest XML's configuration; with this idea, libvirt would probably
compare the old XML config to the new XML config, and then apply those
differences to the qemu configuration.

Cheers,
Mark.

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-12 14:55                       ` Anthony Liguori
@ 2009-06-12 15:53                         ` Mark McLoughlin
  -1 siblings, 0 replies; 457+ messages in thread
From: Mark McLoughlin @ 2009-06-12 15:53 UTC (permalink / raw)
  To: Anthony Liguori
  Cc: Jamie Lokier, Michael S. Tsirkin, Carsten Otte, kvm,
	Glauber Costa, Rusty Russell, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook, Avi Kivity

On Fri, 2009-06-12 at 09:55 -0500, Anthony Liguori wrote:
> Mark McLoughlin wrote:
> > On Wed, 2009-06-10 at 20:27 +0100, Jamie Lokier wrote:
> >   
> > = Solution - Separate configuration from compat hints =
> >
> > As I suggested before:
> >
> >   - Allow the VM manager to dump compat hints; this would be an opaque 
> >     file format, more like the savevm format than a config file
> >   
> 
> How is compat hints different from a device tree?
> 
> In my mind, that's what compat hints is.  I don't see another sane way 
> to implement it.

A device tree with a different purpose than a config file.

In its simplest form it could be a device tree with a version number for
each device[1]. 

The other obvious piece to add to it would be PCI addresses, so that
even if you remove a device, the addresses assigned to existing devices
don't change.

Cheers,
Mark.

[1] - Adding such a per-device version number to the config file would
solve problem (2)


^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
@ 2009-06-12 15:53                         ` Mark McLoughlin
  0 siblings, 0 replies; 457+ messages in thread
From: Mark McLoughlin @ 2009-06-12 15:53 UTC (permalink / raw)
  To: Anthony Liguori
  Cc: Carsten Otte, kvm, Michael S. Tsirkin, Glauber Costa,
	Rusty Russell, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Paul Brook, Avi Kivity

On Fri, 2009-06-12 at 09:55 -0500, Anthony Liguori wrote:
> Mark McLoughlin wrote:
> > On Wed, 2009-06-10 at 20:27 +0100, Jamie Lokier wrote:
> >   
> > = Solution - Separate configuration from compat hints =
> >
> > As I suggested before:
> >
> >   - Allow the VM manager to dump compat hints; this would be an opaque 
> >     file format, more like the savevm format than a config file
> >   
> 
> How is compat hints different from a device tree?
> 
> In my mind, that's what compat hints is.  I don't see another sane way 
> to implement it.

A device tree with a different purpose than a config file.

In its simplest form it could be a device tree with a version number for
each device[1]. 

The other obvious piece to add to it would be PCI addresses, so that
even if you remove a device, the addresses assigned to existing devices
don't change.

Cheers,
Mark.

[1] - Adding such a per-device version number to the config file would
solve problem (2)

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-12 14:55                       ` Anthony Liguori
  (?)
  (?)
@ 2009-06-12 15:53                       ` Mark McLoughlin
  -1 siblings, 0 replies; 457+ messages in thread
From: Mark McLoughlin @ 2009-06-12 15:53 UTC (permalink / raw)
  To: Anthony Liguori
  Cc: Carsten Otte, kvm, Michael S. Tsirkin, Glauber Costa,
	Jamie Lokier, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Paul Brook, Avi Kivity

On Fri, 2009-06-12 at 09:55 -0500, Anthony Liguori wrote:
> Mark McLoughlin wrote:
> > On Wed, 2009-06-10 at 20:27 +0100, Jamie Lokier wrote:
> >   
> > = Solution - Separate configuration from compat hints =
> >
> > As I suggested before:
> >
> >   - Allow the VM manager to dump compat hints; this would be an opaque 
> >     file format, more like the savevm format than a config file
> >   
> 
> How is compat hints different from a device tree?
> 
> In my mind, that's what compat hints is.  I don't see another sane way 
> to implement it.

A device tree with a different purpose than a config file.

In its simplest form it could be a device tree with a version number for
each device[1]. 

The other obvious piece to add to it would be PCI addresses, so that
even if you remove a device, the addresses assigned to existing devices
don't change.

Cheers,
Mark.

[1] - Adding such a per-device version number to the config file would
solve problem (2)

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-12 15:41                         ` Mark McLoughlin
@ 2009-06-12 16:11                           ` Anthony Liguori
  -1 siblings, 0 replies; 457+ messages in thread
From: Anthony Liguori @ 2009-06-12 16:11 UTC (permalink / raw)
  To: Mark McLoughlin
  Cc: Jamie Lokier, Michael S. Tsirkin, Carsten Otte, kvm,
	Glauber Costa, Rusty Russell, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook, Avi Kivity

Mark McLoughlin wrote:
> On Fri, 2009-06-12 at 09:51 -0500, Anthony Liguori wrote:
>   
>> Mark McLoughlin wrote:
>>     
>>> On Wed, 2009-06-10 at 20:27 +0100, Jamie Lokier wrote:
>>>   
>>>       
>>>> Michael S. Tsirkin wrote:
>>>>     
>>>>         
>>>>>> I think the right long term answer to all this is a way to get QEMU to
>>>>>> dump it's current machine configuration in glorious detail as a file
>>>>>> which can be reloaded as a machine configuration.
>>>>>>         
>>>>>>             
>>>>> And then we'll have the same set of problems there.
>>>>>       
>>>>>           
>>>> We will, and the solution will be the same: options to create devices
>>>> as they were in older versions of QEMU.  It only needs to cover device
>>>> features which matter to guests, not every bug fix.
>>>>
>>>> However with a machine configuration which is generated by QEMU,
>>>> there's less worry about proliferation of obscure options, compared
>>>> with the command line.  You don't necessarily have to document every
>>>> backward-compatibility option in any detail, you just have to make
>>>> sure it's written and read properly, which is much the same thing as
>>>> the snapshot code does.
>>>>     
>>>>         
>>> This is a sensible plan, but I don't think we should mix these compat
>>> options in with the VM manager supplied configuration.
>>>
>>> There are two problems with that approach.
>>>
>>> = Problem 1 - VM manager needs to parse qemu config =
>>>
>>> Your proposal implies:
>>>
>>>   - VM manager supplies a basic configuration to qemu
>>>
>>>   - It then immediately asks qemu for a dump of the machine 
>>>     configuration in all its glorious detail and retains that
>>>     config
>>>
>>>   - If the VM manager wishes to add a new device it needs to parse the 
>>>     qemu config and add it, rather than just generate an entirely new 
>>>     config
>>>   
>>>       
>> What's the problem with parsing the device config and modifying it?  Is 
>> it just complexity?
>>     
>
> Yes, complexity is the issue.
>
>   
>> If we provided a mechanism to simplify manipulating a device config, 
>> would that eliminate the concern here?
>>     
>
> In libvirt's case, a lot of the complexity would come from needing to
> figure out what to change.
>   

Right, libvirt wants to be able to easily say "add a scsi block device 
to this VM".  The way I see this working is that there would be a 
default pc.dtc.  We would still have a -drive file=foo.img,if=scsi 
option that would really just be a wrapper around first searching for an 
existing LSI controller, if one exists, attaching the lun, if not, 
create one, etc.

libvirt could continue to use this sort of interface.  However, as it 
wants to do more advanced things, it may have to dive into the device 
tree itself.

On live migration, QEMU will save a copy of the device tree somewhere 
and libvirt needs to keep track of it.  It can treat it as opaque.  -M 
/path/to/foo.dtc -drive file=foo.img,if=scsi should continue working as 
expected IMHO.

Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
@ 2009-06-12 16:11                           ` Anthony Liguori
  0 siblings, 0 replies; 457+ messages in thread
From: Anthony Liguori @ 2009-06-12 16:11 UTC (permalink / raw)
  To: Mark McLoughlin
  Cc: Carsten Otte, kvm, Michael S. Tsirkin, Glauber Costa,
	Rusty Russell, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Paul Brook, Avi Kivity

Mark McLoughlin wrote:
> On Fri, 2009-06-12 at 09:51 -0500, Anthony Liguori wrote:
>   
>> Mark McLoughlin wrote:
>>     
>>> On Wed, 2009-06-10 at 20:27 +0100, Jamie Lokier wrote:
>>>   
>>>       
>>>> Michael S. Tsirkin wrote:
>>>>     
>>>>         
>>>>>> I think the right long term answer to all this is a way to get QEMU to
>>>>>> dump it's current machine configuration in glorious detail as a file
>>>>>> which can be reloaded as a machine configuration.
>>>>>>         
>>>>>>             
>>>>> And then we'll have the same set of problems there.
>>>>>       
>>>>>           
>>>> We will, and the solution will be the same: options to create devices
>>>> as they were in older versions of QEMU.  It only needs to cover device
>>>> features which matter to guests, not every bug fix.
>>>>
>>>> However with a machine configuration which is generated by QEMU,
>>>> there's less worry about proliferation of obscure options, compared
>>>> with the command line.  You don't necessarily have to document every
>>>> backward-compatibility option in any detail, you just have to make
>>>> sure it's written and read properly, which is much the same thing as
>>>> the snapshot code does.
>>>>     
>>>>         
>>> This is a sensible plan, but I don't think we should mix these compat
>>> options in with the VM manager supplied configuration.
>>>
>>> There are two problems with that approach.
>>>
>>> = Problem 1 - VM manager needs to parse qemu config =
>>>
>>> Your proposal implies:
>>>
>>>   - VM manager supplies a basic configuration to qemu
>>>
>>>   - It then immediately asks qemu for a dump of the machine 
>>>     configuration in all its glorious detail and retains that
>>>     config
>>>
>>>   - If the VM manager wishes to add a new device it needs to parse the 
>>>     qemu config and add it, rather than just generate an entirely new 
>>>     config
>>>   
>>>       
>> What's the problem with parsing the device config and modifying it?  Is 
>> it just complexity?
>>     
>
> Yes, complexity is the issue.
>
>   
>> If we provided a mechanism to simplify manipulating a device config, 
>> would that eliminate the concern here?
>>     
>
> In libvirt's case, a lot of the complexity would come from needing to
> figure out what to change.
>   

Right, libvirt wants to be able to easily say "add a scsi block device 
to this VM".  The way I see this working is that there would be a 
default pc.dtc.  We would still have a -drive file=foo.img,if=scsi 
option that would really just be a wrapper around first searching for an 
existing LSI controller, if one exists, attaching the lun, if not, 
create one, etc.

libvirt could continue to use this sort of interface.  However, as it 
wants to do more advanced things, it may have to dive into the device 
tree itself.

On live migration, QEMU will save a copy of the device tree somewhere 
and libvirt needs to keep track of it.  It can treat it as opaque.  -M 
/path/to/foo.dtc -drive file=foo.img,if=scsi should continue working as 
expected IMHO.

Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-12 15:41                         ` Mark McLoughlin
  (?)
@ 2009-06-12 16:11                         ` Anthony Liguori
  -1 siblings, 0 replies; 457+ messages in thread
From: Anthony Liguori @ 2009-06-12 16:11 UTC (permalink / raw)
  To: Mark McLoughlin
  Cc: Carsten Otte, kvm, Michael S. Tsirkin, Glauber Costa,
	Jamie Lokier, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Paul Brook, Avi Kivity

Mark McLoughlin wrote:
> On Fri, 2009-06-12 at 09:51 -0500, Anthony Liguori wrote:
>   
>> Mark McLoughlin wrote:
>>     
>>> On Wed, 2009-06-10 at 20:27 +0100, Jamie Lokier wrote:
>>>   
>>>       
>>>> Michael S. Tsirkin wrote:
>>>>     
>>>>         
>>>>>> I think the right long term answer to all this is a way to get QEMU to
>>>>>> dump it's current machine configuration in glorious detail as a file
>>>>>> which can be reloaded as a machine configuration.
>>>>>>         
>>>>>>             
>>>>> And then we'll have the same set of problems there.
>>>>>       
>>>>>           
>>>> We will, and the solution will be the same: options to create devices
>>>> as they were in older versions of QEMU.  It only needs to cover device
>>>> features which matter to guests, not every bug fix.
>>>>
>>>> However with a machine configuration which is generated by QEMU,
>>>> there's less worry about proliferation of obscure options, compared
>>>> with the command line.  You don't necessarily have to document every
>>>> backward-compatibility option in any detail, you just have to make
>>>> sure it's written and read properly, which is much the same thing as
>>>> the snapshot code does.
>>>>     
>>>>         
>>> This is a sensible plan, but I don't think we should mix these compat
>>> options in with the VM manager supplied configuration.
>>>
>>> There are two problems with that approach.
>>>
>>> = Problem 1 - VM manager needs to parse qemu config =
>>>
>>> Your proposal implies:
>>>
>>>   - VM manager supplies a basic configuration to qemu
>>>
>>>   - It then immediately asks qemu for a dump of the machine 
>>>     configuration in all its glorious detail and retains that
>>>     config
>>>
>>>   - If the VM manager wishes to add a new device it needs to parse the 
>>>     qemu config and add it, rather than just generate an entirely new 
>>>     config
>>>   
>>>       
>> What's the problem with parsing the device config and modifying it?  Is 
>> it just complexity?
>>     
>
> Yes, complexity is the issue.
>
>   
>> If we provided a mechanism to simplify manipulating a device config, 
>> would that eliminate the concern here?
>>     
>
> In libvirt's case, a lot of the complexity would come from needing to
> figure out what to change.
>   

Right, libvirt wants to be able to easily say "add a scsi block device 
to this VM".  The way I see this working is that there would be a 
default pc.dtc.  We would still have a -drive file=foo.img,if=scsi 
option that would really just be a wrapper around first searching for an 
existing LSI controller, if one exists, attaching the lun, if not, 
create one, etc.

libvirt could continue to use this sort of interface.  However, as it 
wants to do more advanced things, it may have to dive into the device 
tree itself.

On live migration, QEMU will save a copy of the device tree somewhere 
and libvirt needs to keep track of it.  It can treat it as opaque.  -M 
/path/to/foo.dtc -drive file=foo.img,if=scsi should continue working as 
expected IMHO.

Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-12 15:53                         ` Mark McLoughlin
@ 2009-06-12 16:12                           ` Anthony Liguori
  -1 siblings, 0 replies; 457+ messages in thread
From: Anthony Liguori @ 2009-06-12 16:12 UTC (permalink / raw)
  To: Mark McLoughlin
  Cc: Jamie Lokier, Michael S. Tsirkin, Carsten Otte, kvm,
	Glauber Costa, Rusty Russell, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook, Avi Kivity

Mark McLoughlin wrote:
> On Fri, 2009-06-12 at 09:55 -0500, Anthony Liguori wrote:
>   
>> Mark McLoughlin wrote:
>>     
>>> On Wed, 2009-06-10 at 20:27 +0100, Jamie Lokier wrote:
>>>   
>>> = Solution - Separate configuration from compat hints =
>>>
>>> As I suggested before:
>>>
>>>   - Allow the VM manager to dump compat hints; this would be an opaque 
>>>     file format, more like the savevm format than a config file
>>>   
>>>       
>> How is compat hints different from a device tree?
>>
>> In my mind, that's what compat hints is.  I don't see another sane way 
>> to implement it.
>>     
>
> A device tree with a different purpose than a config file.
>
> In its simplest form it could be a device tree with a version number for
> each device[1]. 
>   

I think the point is that you don't need version numbers if you have a 
proper device tree.  NB the device tree contains no host configuration 
information.

Regards,

Anthony Liguori

> The other obvious piece to add to it would be PCI addresses, so that
> even if you remove a device, the addresses assigned to existing devices
> don't change.
>
> Cheers,
> Mark.
>
> [1] - Adding such a per-device version number to the config file would
> solve problem (2)
>
>   


^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
@ 2009-06-12 16:12                           ` Anthony Liguori
  0 siblings, 0 replies; 457+ messages in thread
From: Anthony Liguori @ 2009-06-12 16:12 UTC (permalink / raw)
  To: Mark McLoughlin
  Cc: Carsten Otte, kvm, Michael S. Tsirkin, Glauber Costa,
	Rusty Russell, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Paul Brook, Avi Kivity

Mark McLoughlin wrote:
> On Fri, 2009-06-12 at 09:55 -0500, Anthony Liguori wrote:
>   
>> Mark McLoughlin wrote:
>>     
>>> On Wed, 2009-06-10 at 20:27 +0100, Jamie Lokier wrote:
>>>   
>>> = Solution - Separate configuration from compat hints =
>>>
>>> As I suggested before:
>>>
>>>   - Allow the VM manager to dump compat hints; this would be an opaque 
>>>     file format, more like the savevm format than a config file
>>>   
>>>       
>> How is compat hints different from a device tree?
>>
>> In my mind, that's what compat hints is.  I don't see another sane way 
>> to implement it.
>>     
>
> A device tree with a different purpose than a config file.
>
> In its simplest form it could be a device tree with a version number for
> each device[1]. 
>   

I think the point is that you don't need version numbers if you have a 
proper device tree.  NB the device tree contains no host configuration 
information.

Regards,

Anthony Liguori

> The other obvious piece to add to it would be PCI addresses, so that
> even if you remove a device, the addresses assigned to existing devices
> don't change.
>
> Cheers,
> Mark.
>
> [1] - Adding such a per-device version number to the config file would
> solve problem (2)
>
>   

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-12 15:53                         ` Mark McLoughlin
  (?)
@ 2009-06-12 16:12                         ` Anthony Liguori
  -1 siblings, 0 replies; 457+ messages in thread
From: Anthony Liguori @ 2009-06-12 16:12 UTC (permalink / raw)
  To: Mark McLoughlin
  Cc: Carsten Otte, kvm, Michael S. Tsirkin, Glauber Costa,
	Jamie Lokier, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Paul Brook, Avi Kivity

Mark McLoughlin wrote:
> On Fri, 2009-06-12 at 09:55 -0500, Anthony Liguori wrote:
>   
>> Mark McLoughlin wrote:
>>     
>>> On Wed, 2009-06-10 at 20:27 +0100, Jamie Lokier wrote:
>>>   
>>> = Solution - Separate configuration from compat hints =
>>>
>>> As I suggested before:
>>>
>>>   - Allow the VM manager to dump compat hints; this would be an opaque 
>>>     file format, more like the savevm format than a config file
>>>   
>>>       
>> How is compat hints different from a device tree?
>>
>> In my mind, that's what compat hints is.  I don't see another sane way 
>> to implement it.
>>     
>
> A device tree with a different purpose than a config file.
>
> In its simplest form it could be a device tree with a version number for
> each device[1]. 
>   

I think the point is that you don't need version numbers if you have a 
proper device tree.  NB the device tree contains no host configuration 
information.

Regards,

Anthony Liguori

> The other obvious piece to add to it would be PCI addresses, so that
> even if you remove a device, the addresses assigned to existing devices
> don't change.
>
> Cheers,
> Mark.
>
> [1] - Adding such a per-device version number to the config file would
> solve problem (2)
>
>   

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-12 16:11                           ` Anthony Liguori
@ 2009-06-12 16:48                             ` Mark McLoughlin
  -1 siblings, 0 replies; 457+ messages in thread
From: Mark McLoughlin @ 2009-06-12 16:48 UTC (permalink / raw)
  To: Anthony Liguori
  Cc: Jamie Lokier, Michael S. Tsirkin, Carsten Otte, kvm,
	Glauber Costa, Rusty Russell, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook, Avi Kivity

On Fri, 2009-06-12 at 11:11 -0500, Anthony Liguori wrote:
> Mark McLoughlin wrote:
> > On Fri, 2009-06-12 at 09:51 -0500, Anthony Liguori wrote:
> >   
> >> Mark McLoughlin wrote:
> >>     
> >>> On Wed, 2009-06-10 at 20:27 +0100, Jamie Lokier wrote:
> >>>   
> >>>       
> >>>> Michael S. Tsirkin wrote:
> >>>>     
> >>>>         
> >>>>>> I think the right long term answer to all this is a way to get QEMU to
> >>>>>> dump it's current machine configuration in glorious detail as a file
> >>>>>> which can be reloaded as a machine configuration.
> >>>>>>         
> >>>>>>             
> >>>>> And then we'll have the same set of problems there.
> >>>>>       
> >>>>>           
> >>>> We will, and the solution will be the same: options to create devices
> >>>> as they were in older versions of QEMU.  It only needs to cover device
> >>>> features which matter to guests, not every bug fix.
> >>>>
> >>>> However with a machine configuration which is generated by QEMU,
> >>>> there's less worry about proliferation of obscure options, compared
> >>>> with the command line.  You don't necessarily have to document every
> >>>> backward-compatibility option in any detail, you just have to make
> >>>> sure it's written and read properly, which is much the same thing as
> >>>> the snapshot code does.
> >>>>     
> >>>>         
> >>> This is a sensible plan, but I don't think we should mix these compat
> >>> options in with the VM manager supplied configuration.
> >>>
> >>> There are two problems with that approach.
> >>>
> >>> = Problem 1 - VM manager needs to parse qemu config =
> >>>
> >>> Your proposal implies:
> >>>
> >>>   - VM manager supplies a basic configuration to qemu
> >>>
> >>>   - It then immediately asks qemu for a dump of the machine 
> >>>     configuration in all its glorious detail and retains that
> >>>     config
> >>>
> >>>   - If the VM manager wishes to add a new device it needs to parse the 
> >>>     qemu config and add it, rather than just generate an entirely new 
> >>>     config
> >>>   
> >>>       
> >> What's the problem with parsing the device config and modifying it?  Is 
> >> it just complexity?
> >>     
> >
> > Yes, complexity is the issue.
> >
> >   
> >> If we provided a mechanism to simplify manipulating a device config, 
> >> would that eliminate the concern here?
> >>     
> >
> > In libvirt's case, a lot of the complexity would come from needing to
> > figure out what to change.
> >   
> 
> Right, libvirt wants to be able to easily say "add a scsi block device 
> to this VM".  The way I see this working is that there would be a 
> default pc.dtc.  We would still have a -drive file=foo.img,if=scsi 
> option that would really just be a wrapper around first searching for an 
> existing LSI controller, if one exists, attaching the lun, if not, 
> create one, etc.
> 
> libvirt could continue to use this sort of interface.  However, as it 
> wants to do more advanced things, it may have to dive into the device 
> tree itself.
> 
> On live migration, QEMU will save a copy of the device tree somewhere 
> and libvirt needs to keep track of it.  It can treat it as opaque.  -M 
> /path/to/foo.dtc -drive file=foo.img,if=scsi should continue working as 
> expected IMHO.

So, when libvirt creates a guest for the first time, it makes a copy of
the device tree and continues to use that even if qemu is upgraded.
That's enough to ensure compat is retained for all built-in devices.

However, in order to retain compat for that SCSI device (e.g. ensuring
the PCI address doesn't change as other devices are added an removed),
we're back to the same problem ... either:

  1) Use '-drive file=foo.img,if=scsi,pci_addr=foo'; in order to figure 
     out what address to use, libvirt would need to query qemu for what 
     address was originally allocated to device or it would do all the 
     PCI address allocation itself ... or:

  2) Don't use the command line, instead get a dump of the entire 
     device tree (including the SCSI device) - if the device is to be 
     removed or modified in future, libvirt would need to modify the 
     device tree

The basic problem would be that the command line config would have very
limited ability to override the device tree config.

Cheers,
Mark.


^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
@ 2009-06-12 16:48                             ` Mark McLoughlin
  0 siblings, 0 replies; 457+ messages in thread
From: Mark McLoughlin @ 2009-06-12 16:48 UTC (permalink / raw)
  To: Anthony Liguori
  Cc: Carsten Otte, kvm, Michael S. Tsirkin, Glauber Costa,
	Rusty Russell, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Paul Brook, Avi Kivity

On Fri, 2009-06-12 at 11:11 -0500, Anthony Liguori wrote:
> Mark McLoughlin wrote:
> > On Fri, 2009-06-12 at 09:51 -0500, Anthony Liguori wrote:
> >   
> >> Mark McLoughlin wrote:
> >>     
> >>> On Wed, 2009-06-10 at 20:27 +0100, Jamie Lokier wrote:
> >>>   
> >>>       
> >>>> Michael S. Tsirkin wrote:
> >>>>     
> >>>>         
> >>>>>> I think the right long term answer to all this is a way to get QEMU to
> >>>>>> dump it's current machine configuration in glorious detail as a file
> >>>>>> which can be reloaded as a machine configuration.
> >>>>>>         
> >>>>>>             
> >>>>> And then we'll have the same set of problems there.
> >>>>>       
> >>>>>           
> >>>> We will, and the solution will be the same: options to create devices
> >>>> as they were in older versions of QEMU.  It only needs to cover device
> >>>> features which matter to guests, not every bug fix.
> >>>>
> >>>> However with a machine configuration which is generated by QEMU,
> >>>> there's less worry about proliferation of obscure options, compared
> >>>> with the command line.  You don't necessarily have to document every
> >>>> backward-compatibility option in any detail, you just have to make
> >>>> sure it's written and read properly, which is much the same thing as
> >>>> the snapshot code does.
> >>>>     
> >>>>         
> >>> This is a sensible plan, but I don't think we should mix these compat
> >>> options in with the VM manager supplied configuration.
> >>>
> >>> There are two problems with that approach.
> >>>
> >>> = Problem 1 - VM manager needs to parse qemu config =
> >>>
> >>> Your proposal implies:
> >>>
> >>>   - VM manager supplies a basic configuration to qemu
> >>>
> >>>   - It then immediately asks qemu for a dump of the machine 
> >>>     configuration in all its glorious detail and retains that
> >>>     config
> >>>
> >>>   - If the VM manager wishes to add a new device it needs to parse the 
> >>>     qemu config and add it, rather than just generate an entirely new 
> >>>     config
> >>>   
> >>>       
> >> What's the problem with parsing the device config and modifying it?  Is 
> >> it just complexity?
> >>     
> >
> > Yes, complexity is the issue.
> >
> >   
> >> If we provided a mechanism to simplify manipulating a device config, 
> >> would that eliminate the concern here?
> >>     
> >
> > In libvirt's case, a lot of the complexity would come from needing to
> > figure out what to change.
> >   
> 
> Right, libvirt wants to be able to easily say "add a scsi block device 
> to this VM".  The way I see this working is that there would be a 
> default pc.dtc.  We would still have a -drive file=foo.img,if=scsi 
> option that would really just be a wrapper around first searching for an 
> existing LSI controller, if one exists, attaching the lun, if not, 
> create one, etc.
> 
> libvirt could continue to use this sort of interface.  However, as it 
> wants to do more advanced things, it may have to dive into the device 
> tree itself.
> 
> On live migration, QEMU will save a copy of the device tree somewhere 
> and libvirt needs to keep track of it.  It can treat it as opaque.  -M 
> /path/to/foo.dtc -drive file=foo.img,if=scsi should continue working as 
> expected IMHO.

So, when libvirt creates a guest for the first time, it makes a copy of
the device tree and continues to use that even if qemu is upgraded.
That's enough to ensure compat is retained for all built-in devices.

However, in order to retain compat for that SCSI device (e.g. ensuring
the PCI address doesn't change as other devices are added an removed),
we're back to the same problem ... either:

  1) Use '-drive file=foo.img,if=scsi,pci_addr=foo'; in order to figure 
     out what address to use, libvirt would need to query qemu for what 
     address was originally allocated to device or it would do all the 
     PCI address allocation itself ... or:

  2) Don't use the command line, instead get a dump of the entire 
     device tree (including the SCSI device) - if the device is to be 
     removed or modified in future, libvirt would need to modify the 
     device tree

The basic problem would be that the command line config would have very
limited ability to override the device tree config.

Cheers,
Mark.

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-12 16:11                           ` Anthony Liguori
  (?)
@ 2009-06-12 16:48                           ` Mark McLoughlin
  -1 siblings, 0 replies; 457+ messages in thread
From: Mark McLoughlin @ 2009-06-12 16:48 UTC (permalink / raw)
  To: Anthony Liguori
  Cc: Carsten Otte, kvm, Michael S. Tsirkin, Glauber Costa,
	Jamie Lokier, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Paul Brook, Avi Kivity

On Fri, 2009-06-12 at 11:11 -0500, Anthony Liguori wrote:
> Mark McLoughlin wrote:
> > On Fri, 2009-06-12 at 09:51 -0500, Anthony Liguori wrote:
> >   
> >> Mark McLoughlin wrote:
> >>     
> >>> On Wed, 2009-06-10 at 20:27 +0100, Jamie Lokier wrote:
> >>>   
> >>>       
> >>>> Michael S. Tsirkin wrote:
> >>>>     
> >>>>         
> >>>>>> I think the right long term answer to all this is a way to get QEMU to
> >>>>>> dump it's current machine configuration in glorious detail as a file
> >>>>>> which can be reloaded as a machine configuration.
> >>>>>>         
> >>>>>>             
> >>>>> And then we'll have the same set of problems there.
> >>>>>       
> >>>>>           
> >>>> We will, and the solution will be the same: options to create devices
> >>>> as they were in older versions of QEMU.  It only needs to cover device
> >>>> features which matter to guests, not every bug fix.
> >>>>
> >>>> However with a machine configuration which is generated by QEMU,
> >>>> there's less worry about proliferation of obscure options, compared
> >>>> with the command line.  You don't necessarily have to document every
> >>>> backward-compatibility option in any detail, you just have to make
> >>>> sure it's written and read properly, which is much the same thing as
> >>>> the snapshot code does.
> >>>>     
> >>>>         
> >>> This is a sensible plan, but I don't think we should mix these compat
> >>> options in with the VM manager supplied configuration.
> >>>
> >>> There are two problems with that approach.
> >>>
> >>> = Problem 1 - VM manager needs to parse qemu config =
> >>>
> >>> Your proposal implies:
> >>>
> >>>   - VM manager supplies a basic configuration to qemu
> >>>
> >>>   - It then immediately asks qemu for a dump of the machine 
> >>>     configuration in all its glorious detail and retains that
> >>>     config
> >>>
> >>>   - If the VM manager wishes to add a new device it needs to parse the 
> >>>     qemu config and add it, rather than just generate an entirely new 
> >>>     config
> >>>   
> >>>       
> >> What's the problem with parsing the device config and modifying it?  Is 
> >> it just complexity?
> >>     
> >
> > Yes, complexity is the issue.
> >
> >   
> >> If we provided a mechanism to simplify manipulating a device config, 
> >> would that eliminate the concern here?
> >>     
> >
> > In libvirt's case, a lot of the complexity would come from needing to
> > figure out what to change.
> >   
> 
> Right, libvirt wants to be able to easily say "add a scsi block device 
> to this VM".  The way I see this working is that there would be a 
> default pc.dtc.  We would still have a -drive file=foo.img,if=scsi 
> option that would really just be a wrapper around first searching for an 
> existing LSI controller, if one exists, attaching the lun, if not, 
> create one, etc.
> 
> libvirt could continue to use this sort of interface.  However, as it 
> wants to do more advanced things, it may have to dive into the device 
> tree itself.
> 
> On live migration, QEMU will save a copy of the device tree somewhere 
> and libvirt needs to keep track of it.  It can treat it as opaque.  -M 
> /path/to/foo.dtc -drive file=foo.img,if=scsi should continue working as 
> expected IMHO.

So, when libvirt creates a guest for the first time, it makes a copy of
the device tree and continues to use that even if qemu is upgraded.
That's enough to ensure compat is retained for all built-in devices.

However, in order to retain compat for that SCSI device (e.g. ensuring
the PCI address doesn't change as other devices are added an removed),
we're back to the same problem ... either:

  1) Use '-drive file=foo.img,if=scsi,pci_addr=foo'; in order to figure 
     out what address to use, libvirt would need to query qemu for what 
     address was originally allocated to device or it would do all the 
     PCI address allocation itself ... or:

  2) Don't use the command line, instead get a dump of the entire 
     device tree (including the SCSI device) - if the device is to be 
     removed or modified in future, libvirt would need to modify the 
     device tree

The basic problem would be that the command line config would have very
limited ability to override the device tree config.

Cheers,
Mark.

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-12 16:12                           ` Anthony Liguori
@ 2009-06-12 16:48                             ` Mark McLoughlin
  -1 siblings, 0 replies; 457+ messages in thread
From: Mark McLoughlin @ 2009-06-12 16:48 UTC (permalink / raw)
  To: Anthony Liguori
  Cc: Jamie Lokier, Michael S. Tsirkin, Carsten Otte, kvm,
	Glauber Costa, Rusty Russell, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook, Avi Kivity

On Fri, 2009-06-12 at 11:12 -0500, Anthony Liguori wrote:
> Mark McLoughlin wrote:
> > On Fri, 2009-06-12 at 09:55 -0500, Anthony Liguori wrote:
> >   
> >> Mark McLoughlin wrote:
> >>     
> >>> On Wed, 2009-06-10 at 20:27 +0100, Jamie Lokier wrote:
> >>>   
> >>> = Solution - Separate configuration from compat hints =
> >>>
> >>> As I suggested before:
> >>>
> >>>   - Allow the VM manager to dump compat hints; this would be an opaque 
> >>>     file format, more like the savevm format than a config file
> >>>   
> >>>       
> >> How is compat hints different from a device tree?
> >>
> >> In my mind, that's what compat hints is.  I don't see another sane way 
> >> to implement it.
> >>     
> >
> > A device tree with a different purpose than a config file.
> >
> > In its simplest form it could be a device tree with a version number for
> > each device[1]. 
> >   
> 
> I think the point is that you don't need version numbers if you have a 
> proper device tree.

How do you add a new attribute to the device tree and, when a supplied
device tree lacking said attribute, distinguish between a device tree
from an old version of qemu (i.e. use the old default) and a partial
device tree from the VM manager (i.e. use the new default) ?

> NB the device tree contains no host configuration information.

So, it wouldn't e.g. include the path to the image file for a block
device? That would always be specified on the command line?

Cheers,
Mark.


^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
@ 2009-06-12 16:48                             ` Mark McLoughlin
  0 siblings, 0 replies; 457+ messages in thread
From: Mark McLoughlin @ 2009-06-12 16:48 UTC (permalink / raw)
  To: Anthony Liguori
  Cc: Carsten Otte, kvm, Michael S. Tsirkin, Glauber Costa,
	Rusty Russell, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Paul Brook, Avi Kivity

On Fri, 2009-06-12 at 11:12 -0500, Anthony Liguori wrote:
> Mark McLoughlin wrote:
> > On Fri, 2009-06-12 at 09:55 -0500, Anthony Liguori wrote:
> >   
> >> Mark McLoughlin wrote:
> >>     
> >>> On Wed, 2009-06-10 at 20:27 +0100, Jamie Lokier wrote:
> >>>   
> >>> = Solution - Separate configuration from compat hints =
> >>>
> >>> As I suggested before:
> >>>
> >>>   - Allow the VM manager to dump compat hints; this would be an opaque 
> >>>     file format, more like the savevm format than a config file
> >>>   
> >>>       
> >> How is compat hints different from a device tree?
> >>
> >> In my mind, that's what compat hints is.  I don't see another sane way 
> >> to implement it.
> >>     
> >
> > A device tree with a different purpose than a config file.
> >
> > In its simplest form it could be a device tree with a version number for
> > each device[1]. 
> >   
> 
> I think the point is that you don't need version numbers if you have a 
> proper device tree.

How do you add a new attribute to the device tree and, when a supplied
device tree lacking said attribute, distinguish between a device tree
from an old version of qemu (i.e. use the old default) and a partial
device tree from the VM manager (i.e. use the new default) ?

> NB the device tree contains no host configuration information.

So, it wouldn't e.g. include the path to the image file for a block
device? That would always be specified on the command line?

Cheers,
Mark.

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-12 16:12                           ` Anthony Liguori
  (?)
@ 2009-06-12 16:48                           ` Mark McLoughlin
  -1 siblings, 0 replies; 457+ messages in thread
From: Mark McLoughlin @ 2009-06-12 16:48 UTC (permalink / raw)
  To: Anthony Liguori
  Cc: Carsten Otte, kvm, Michael S. Tsirkin, Glauber Costa,
	Jamie Lokier, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Paul Brook, Avi Kivity

On Fri, 2009-06-12 at 11:12 -0500, Anthony Liguori wrote:
> Mark McLoughlin wrote:
> > On Fri, 2009-06-12 at 09:55 -0500, Anthony Liguori wrote:
> >   
> >> Mark McLoughlin wrote:
> >>     
> >>> On Wed, 2009-06-10 at 20:27 +0100, Jamie Lokier wrote:
> >>>   
> >>> = Solution - Separate configuration from compat hints =
> >>>
> >>> As I suggested before:
> >>>
> >>>   - Allow the VM manager to dump compat hints; this would be an opaque 
> >>>     file format, more like the savevm format than a config file
> >>>   
> >>>       
> >> How is compat hints different from a device tree?
> >>
> >> In my mind, that's what compat hints is.  I don't see another sane way 
> >> to implement it.
> >>     
> >
> > A device tree with a different purpose than a config file.
> >
> > In its simplest form it could be a device tree with a version number for
> > each device[1]. 
> >   
> 
> I think the point is that you don't need version numbers if you have a 
> proper device tree.

How do you add a new attribute to the device tree and, when a supplied
device tree lacking said attribute, distinguish between a device tree
from an old version of qemu (i.e. use the old default) and a partial
device tree from the VM manager (i.e. use the new default) ?

> NB the device tree contains no host configuration information.

So, it wouldn't e.g. include the path to the image file for a block
device? That would always be specified on the command line?

Cheers,
Mark.

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-12 16:48                             ` Mark McLoughlin
@ 2009-06-12 17:00                               ` Anthony Liguori
  -1 siblings, 0 replies; 457+ messages in thread
From: Anthony Liguori @ 2009-06-12 17:00 UTC (permalink / raw)
  To: Mark McLoughlin
  Cc: Jamie Lokier, Michael S. Tsirkin, Carsten Otte, kvm,
	Glauber Costa, Rusty Russell, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook, Avi Kivity

Mark McLoughlin wrote:
> So, when libvirt creates a guest for the first time, it makes a copy of
> the device tree and continues to use that even if qemu is upgraded.
> That's enough to ensure compat is retained for all built-in devices.
>
> However, in order to retain compat for that SCSI device (e.g. ensuring
> the PCI address doesn't change as other devices are added an removed),
> we're back to the same problem ... either:
>
>   1) Use '-drive file=foo.img,if=scsi,pci_addr=foo'; in order to figure 
>      out what address to use, libvirt would need to query qemu for what 
>      address was originally allocated to device or it would do all the 
>      PCI address allocation itself ... or:
>
>   2) Don't use the command line, instead get a dump of the entire 
>      device tree (including the SCSI device) - if the device is to be 
>      removed or modified in future, libvirt would need to modify the 
>      device tree
>
> The basic problem would be that the command line config would have very
> limited ability to override the device tree config.
>   

After libvirt has done -drive file=foo... it should dump the machine 
config and use that from then on.

To combined to a single thread...
> How do you add a new attribute to the device tree and, when a supplied
> device tree lacking said attribute, distinguish between a device tree
> from an old version of qemu (i.e. use the old default) and a partial
> device tree from the VM manager (i.e. use the new default) ?
>   

Please define "attribute".  I don't follow what you're asking.

>> NB the device tree contains no host configuration information.
>>     
>
> So, it wouldn't e.g. include the path to the image file for a block
> device? That would always be specified on the command line?
>   

No, the IDE definition would contain some sort of symbolic node name.  A 
separate mechanism (either command line or host config file) would then 
link a image file to the symbolic name.

libvirt should really never worry about the machine config file for 
normal things unless it needs to change what devices are exposed to a guest.

Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
@ 2009-06-12 17:00                               ` Anthony Liguori
  0 siblings, 0 replies; 457+ messages in thread
From: Anthony Liguori @ 2009-06-12 17:00 UTC (permalink / raw)
  To: Mark McLoughlin
  Cc: Carsten Otte, kvm, Michael S. Tsirkin, Glauber Costa,
	Rusty Russell, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Paul Brook, Avi Kivity

Mark McLoughlin wrote:
> So, when libvirt creates a guest for the first time, it makes a copy of
> the device tree and continues to use that even if qemu is upgraded.
> That's enough to ensure compat is retained for all built-in devices.
>
> However, in order to retain compat for that SCSI device (e.g. ensuring
> the PCI address doesn't change as other devices are added an removed),
> we're back to the same problem ... either:
>
>   1) Use '-drive file=foo.img,if=scsi,pci_addr=foo'; in order to figure 
>      out what address to use, libvirt would need to query qemu for what 
>      address was originally allocated to device or it would do all the 
>      PCI address allocation itself ... or:
>
>   2) Don't use the command line, instead get a dump of the entire 
>      device tree (including the SCSI device) - if the device is to be 
>      removed or modified in future, libvirt would need to modify the 
>      device tree
>
> The basic problem would be that the command line config would have very
> limited ability to override the device tree config.
>   

After libvirt has done -drive file=foo... it should dump the machine 
config and use that from then on.

To combined to a single thread...
> How do you add a new attribute to the device tree and, when a supplied
> device tree lacking said attribute, distinguish between a device tree
> from an old version of qemu (i.e. use the old default) and a partial
> device tree from the VM manager (i.e. use the new default) ?
>   

Please define "attribute".  I don't follow what you're asking.

>> NB the device tree contains no host configuration information.
>>     
>
> So, it wouldn't e.g. include the path to the image file for a block
> device? That would always be specified on the command line?
>   

No, the IDE definition would contain some sort of symbolic node name.  A 
separate mechanism (either command line or host config file) would then 
link a image file to the symbolic name.

libvirt should really never worry about the machine config file for 
normal things unless it needs to change what devices are exposed to a guest.

Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-12 16:48                             ` Mark McLoughlin
  (?)
@ 2009-06-12 17:00                             ` Anthony Liguori
  -1 siblings, 0 replies; 457+ messages in thread
From: Anthony Liguori @ 2009-06-12 17:00 UTC (permalink / raw)
  To: Mark McLoughlin
  Cc: Carsten Otte, kvm, Michael S. Tsirkin, Glauber Costa,
	Jamie Lokier, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Paul Brook, Avi Kivity

Mark McLoughlin wrote:
> So, when libvirt creates a guest for the first time, it makes a copy of
> the device tree and continues to use that even if qemu is upgraded.
> That's enough to ensure compat is retained for all built-in devices.
>
> However, in order to retain compat for that SCSI device (e.g. ensuring
> the PCI address doesn't change as other devices are added an removed),
> we're back to the same problem ... either:
>
>   1) Use '-drive file=foo.img,if=scsi,pci_addr=foo'; in order to figure 
>      out what address to use, libvirt would need to query qemu for what 
>      address was originally allocated to device or it would do all the 
>      PCI address allocation itself ... or:
>
>   2) Don't use the command line, instead get a dump of the entire 
>      device tree (including the SCSI device) - if the device is to be 
>      removed or modified in future, libvirt would need to modify the 
>      device tree
>
> The basic problem would be that the command line config would have very
> limited ability to override the device tree config.
>   

After libvirt has done -drive file=foo... it should dump the machine 
config and use that from then on.

To combined to a single thread...
> How do you add a new attribute to the device tree and, when a supplied
> device tree lacking said attribute, distinguish between a device tree
> from an old version of qemu (i.e. use the old default) and a partial
> device tree from the VM manager (i.e. use the new default) ?
>   

Please define "attribute".  I don't follow what you're asking.

>> NB the device tree contains no host configuration information.
>>     
>
> So, it wouldn't e.g. include the path to the image file for a block
> device? That would always be specified on the command line?
>   

No, the IDE definition would contain some sort of symbolic node name.  A 
separate mechanism (either command line or host config file) would then 
link a image file to the symbolic name.

libvirt should really never worry about the machine config file for 
normal things unless it needs to change what devices are exposed to a guest.

Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-12 17:00                               ` Anthony Liguori
@ 2009-06-12 17:31                                 ` Mark McLoughlin
  -1 siblings, 0 replies; 457+ messages in thread
From: Mark McLoughlin @ 2009-06-12 17:31 UTC (permalink / raw)
  To: Anthony Liguori
  Cc: Jamie Lokier, Michael S. Tsirkin, Carsten Otte, kvm,
	Glauber Costa, Rusty Russell, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook, Avi Kivity

On Fri, 2009-06-12 at 12:00 -0500, Anthony Liguori wrote:
> Mark McLoughlin wrote:
> > So, when libvirt creates a guest for the first time, it makes a copy of
> > the device tree and continues to use that even if qemu is upgraded.
> > That's enough to ensure compat is retained for all built-in devices.
> >
> > However, in order to retain compat for that SCSI device (e.g. ensuring
> > the PCI address doesn't change as other devices are added an removed),
> > we're back to the same problem ... either:
> >
> >   1) Use '-drive file=foo.img,if=scsi,pci_addr=foo'; in order to figure 
> >      out what address to use, libvirt would need to query qemu for what 
> >      address was originally allocated to device or it would do all the 
> >      PCI address allocation itself ... or:
> >
> >   2) Don't use the command line, instead get a dump of the entire 
> >      device tree (including the SCSI device) - if the device is to be 
> >      removed or modified in future, libvirt would need to modify the 
> >      device tree
> >
> > The basic problem would be that the command line config would have very
> > limited ability to override the device tree config.
> >   
> 
> After libvirt has done -drive file=foo... it should dump the machine 
> config and use that from then on.

Right - libvirt then wouldn't be able to avoid the complexity of merging
any future changes into the dumped machine config.

> To combined to a single thread...
> > How do you add a new attribute to the device tree and, when a supplied
> > device tree lacking said attribute, distinguish between a device tree
> > from an old version of qemu (i.e. use the old default) and a partial
> > device tree from the VM manager (i.e. use the new default) ?
> >   
> 
> Please define "attribute".  I don't follow what you're asking.

e.g. a per-device "enable MSI support" flag.

If qemu is supplied with a device tree that lacks that flag, does it
enable or disable MSI?

Enable by default is bad - it could be a device tree dumped from an old
version of qemu, so compat would be broken.

Disable by default is bad - it could be a simple device tree supplied by
the user, and the latest features are wanted.

Maybe we want a per-device "this is a complete device description" flag
and if anything is missing from a supposedly complete description, the
old defaults would be used. A config dumped from qemu would have this
flag set, a config generated by libvirt would not have the flag.

> >> NB the device tree contains no host configuration information.
> >>     
> >
> > So, it wouldn't e.g. include the path to the image file for a block
> > device? That would always be specified on the command line?
> >   
> 
> No, the IDE definition would contain some sort of symbolic node name.  A 
> separate mechanism (either command line or host config file) would then 
> link a image file to the symbolic name.

Okay.

> libvirt should really never worry about the machine config file for 
> normal things unless it needs to change what devices are exposed to a guest.

But changing devices *is* normal ... e.g. removing a block device.

Writing out a device tree is not a problem for libvirt (or any other
management tools), it's the need to merge changes into an existing
device tree is where the real complexity would lie.

Cheers,
Mark.


^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
@ 2009-06-12 17:31                                 ` Mark McLoughlin
  0 siblings, 0 replies; 457+ messages in thread
From: Mark McLoughlin @ 2009-06-12 17:31 UTC (permalink / raw)
  To: Anthony Liguori
  Cc: Carsten Otte, kvm, Michael S. Tsirkin, Glauber Costa,
	Rusty Russell, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Paul Brook, Avi Kivity

On Fri, 2009-06-12 at 12:00 -0500, Anthony Liguori wrote:
> Mark McLoughlin wrote:
> > So, when libvirt creates a guest for the first time, it makes a copy of
> > the device tree and continues to use that even if qemu is upgraded.
> > That's enough to ensure compat is retained for all built-in devices.
> >
> > However, in order to retain compat for that SCSI device (e.g. ensuring
> > the PCI address doesn't change as other devices are added an removed),
> > we're back to the same problem ... either:
> >
> >   1) Use '-drive file=foo.img,if=scsi,pci_addr=foo'; in order to figure 
> >      out what address to use, libvirt would need to query qemu for what 
> >      address was originally allocated to device or it would do all the 
> >      PCI address allocation itself ... or:
> >
> >   2) Don't use the command line, instead get a dump of the entire 
> >      device tree (including the SCSI device) - if the device is to be 
> >      removed or modified in future, libvirt would need to modify the 
> >      device tree
> >
> > The basic problem would be that the command line config would have very
> > limited ability to override the device tree config.
> >   
> 
> After libvirt has done -drive file=foo... it should dump the machine 
> config and use that from then on.

Right - libvirt then wouldn't be able to avoid the complexity of merging
any future changes into the dumped machine config.

> To combined to a single thread...
> > How do you add a new attribute to the device tree and, when a supplied
> > device tree lacking said attribute, distinguish between a device tree
> > from an old version of qemu (i.e. use the old default) and a partial
> > device tree from the VM manager (i.e. use the new default) ?
> >   
> 
> Please define "attribute".  I don't follow what you're asking.

e.g. a per-device "enable MSI support" flag.

If qemu is supplied with a device tree that lacks that flag, does it
enable or disable MSI?

Enable by default is bad - it could be a device tree dumped from an old
version of qemu, so compat would be broken.

Disable by default is bad - it could be a simple device tree supplied by
the user, and the latest features are wanted.

Maybe we want a per-device "this is a complete device description" flag
and if anything is missing from a supposedly complete description, the
old defaults would be used. A config dumped from qemu would have this
flag set, a config generated by libvirt would not have the flag.

> >> NB the device tree contains no host configuration information.
> >>     
> >
> > So, it wouldn't e.g. include the path to the image file for a block
> > device? That would always be specified on the command line?
> >   
> 
> No, the IDE definition would contain some sort of symbolic node name.  A 
> separate mechanism (either command line or host config file) would then 
> link a image file to the symbolic name.

Okay.

> libvirt should really never worry about the machine config file for 
> normal things unless it needs to change what devices are exposed to a guest.

But changing devices *is* normal ... e.g. removing a block device.

Writing out a device tree is not a problem for libvirt (or any other
management tools), it's the need to merge changes into an existing
device tree is where the real complexity would lie.

Cheers,
Mark.

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-12 17:00                               ` Anthony Liguori
  (?)
  (?)
@ 2009-06-12 17:31                               ` Mark McLoughlin
  -1 siblings, 0 replies; 457+ messages in thread
From: Mark McLoughlin @ 2009-06-12 17:31 UTC (permalink / raw)
  To: Anthony Liguori
  Cc: Carsten Otte, kvm, Michael S. Tsirkin, Glauber Costa,
	Jamie Lokier, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Paul Brook, Avi Kivity

On Fri, 2009-06-12 at 12:00 -0500, Anthony Liguori wrote:
> Mark McLoughlin wrote:
> > So, when libvirt creates a guest for the first time, it makes a copy of
> > the device tree and continues to use that even if qemu is upgraded.
> > That's enough to ensure compat is retained for all built-in devices.
> >
> > However, in order to retain compat for that SCSI device (e.g. ensuring
> > the PCI address doesn't change as other devices are added an removed),
> > we're back to the same problem ... either:
> >
> >   1) Use '-drive file=foo.img,if=scsi,pci_addr=foo'; in order to figure 
> >      out what address to use, libvirt would need to query qemu for what 
> >      address was originally allocated to device or it would do all the 
> >      PCI address allocation itself ... or:
> >
> >   2) Don't use the command line, instead get a dump of the entire 
> >      device tree (including the SCSI device) - if the device is to be 
> >      removed or modified in future, libvirt would need to modify the 
> >      device tree
> >
> > The basic problem would be that the command line config would have very
> > limited ability to override the device tree config.
> >   
> 
> After libvirt has done -drive file=foo... it should dump the machine 
> config and use that from then on.

Right - libvirt then wouldn't be able to avoid the complexity of merging
any future changes into the dumped machine config.

> To combined to a single thread...
> > How do you add a new attribute to the device tree and, when a supplied
> > device tree lacking said attribute, distinguish between a device tree
> > from an old version of qemu (i.e. use the old default) and a partial
> > device tree from the VM manager (i.e. use the new default) ?
> >   
> 
> Please define "attribute".  I don't follow what you're asking.

e.g. a per-device "enable MSI support" flag.

If qemu is supplied with a device tree that lacks that flag, does it
enable or disable MSI?

Enable by default is bad - it could be a device tree dumped from an old
version of qemu, so compat would be broken.

Disable by default is bad - it could be a simple device tree supplied by
the user, and the latest features are wanted.

Maybe we want a per-device "this is a complete device description" flag
and if anything is missing from a supposedly complete description, the
old defaults would be used. A config dumped from qemu would have this
flag set, a config generated by libvirt would not have the flag.

> >> NB the device tree contains no host configuration information.
> >>     
> >
> > So, it wouldn't e.g. include the path to the image file for a block
> > device? That would always be specified on the command line?
> >   
> 
> No, the IDE definition would contain some sort of symbolic node name.  A 
> separate mechanism (either command line or host config file) would then 
> link a image file to the symbolic name.

Okay.

> libvirt should really never worry about the machine config file for 
> normal things unless it needs to change what devices are exposed to a guest.

But changing devices *is* normal ... e.g. removing a block device.

Writing out a device tree is not a problem for libvirt (or any other
management tools), it's the need to merge changes into an existing
device tree is where the real complexity would lie.

Cheers,
Mark.

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-12 17:31                                 ` Mark McLoughlin
@ 2009-06-12 17:44                                   ` Blue Swirl
  -1 siblings, 0 replies; 457+ messages in thread
From: Blue Swirl @ 2009-06-12 17:44 UTC (permalink / raw)
  To: Mark McLoughlin
  Cc: Anthony Liguori, Jamie Lokier, Michael S. Tsirkin, Carsten Otte,
	kvm, Glauber Costa, Rusty Russell, qemu-devel, virtualization,
	Christian Borntraeger, Paul Brook, Avi Kivity

On 6/12/09, Mark McLoughlin <markmc@redhat.com> wrote:
> On Fri, 2009-06-12 at 12:00 -0500, Anthony Liguori wrote:
>  > Mark McLoughlin wrote:
>  > > So, when libvirt creates a guest for the first time, it makes a copy of
>  > > the device tree and continues to use that even if qemu is upgraded.
>  > > That's enough to ensure compat is retained for all built-in devices.
>  > >
>  > > However, in order to retain compat for that SCSI device (e.g. ensuring
>  > > the PCI address doesn't change as other devices are added an removed),
>  > > we're back to the same problem ... either:
>  > >
>  > >   1) Use '-drive file=foo.img,if=scsi,pci_addr=foo'; in order to figure
>  > >      out what address to use, libvirt would need to query qemu for what
>  > >      address was originally allocated to device or it would do all the
>  > >      PCI address allocation itself ... or:
>  > >
>  > >   2) Don't use the command line, instead get a dump of the entire
>  > >      device tree (including the SCSI device) - if the device is to be
>  > >      removed or modified in future, libvirt would need to modify the
>  > >      device tree
>  > >
>  > > The basic problem would be that the command line config would have very
>  > > limited ability to override the device tree config.
>  > >
>  >
>  > After libvirt has done -drive file=foo... it should dump the machine
>  > config and use that from then on.
>
>  Right - libvirt then wouldn't be able to avoid the complexity of merging
>  any future changes into the dumped machine config.
>
>  > To combined to a single thread...
>  > > How do you add a new attribute to the device tree and, when a supplied
>  > > device tree lacking said attribute, distinguish between a device tree
>  > > from an old version of qemu (i.e. use the old default) and a partial
>  > > device tree from the VM manager (i.e. use the new default) ?
>  > >
>  >
>  > Please define "attribute".  I don't follow what you're asking.
>
>  e.g. a per-device "enable MSI support" flag.
>
>  If qemu is supplied with a device tree that lacks that flag, does it
>  enable or disable MSI?
>
>  Enable by default is bad - it could be a device tree dumped from an old
>  version of qemu, so compat would be broken.
>
>  Disable by default is bad - it could be a simple device tree supplied by
>  the user, and the latest features are wanted.
>
>  Maybe we want a per-device "this is a complete device description" flag
>  and if anything is missing from a supposedly complete description, the
>  old defaults would be used. A config dumped from qemu would have this
>  flag set, a config generated by libvirt would not have the flag.

If the device has different behavior or different properties from
guest perspective compared to the old device, it should get a new
device type so that you could specify in the device tree either the
old device or the new one. Flags won't help in the long term.

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
@ 2009-06-12 17:44                                   ` Blue Swirl
  0 siblings, 0 replies; 457+ messages in thread
From: Blue Swirl @ 2009-06-12 17:44 UTC (permalink / raw)
  To: Mark McLoughlin
  Cc: Carsten Otte, kvm, Michael S. Tsirkin, Glauber Costa,
	Rusty Russell, qemu-devel, virtualization, Christian Borntraeger,
	Paul Brook, Avi Kivity

On 6/12/09, Mark McLoughlin <markmc@redhat.com> wrote:
> On Fri, 2009-06-12 at 12:00 -0500, Anthony Liguori wrote:
>  > Mark McLoughlin wrote:
>  > > So, when libvirt creates a guest for the first time, it makes a copy of
>  > > the device tree and continues to use that even if qemu is upgraded.
>  > > That's enough to ensure compat is retained for all built-in devices.
>  > >
>  > > However, in order to retain compat for that SCSI device (e.g. ensuring
>  > > the PCI address doesn't change as other devices are added an removed),
>  > > we're back to the same problem ... either:
>  > >
>  > >   1) Use '-drive file=foo.img,if=scsi,pci_addr=foo'; in order to figure
>  > >      out what address to use, libvirt would need to query qemu for what
>  > >      address was originally allocated to device or it would do all the
>  > >      PCI address allocation itself ... or:
>  > >
>  > >   2) Don't use the command line, instead get a dump of the entire
>  > >      device tree (including the SCSI device) - if the device is to be
>  > >      removed or modified in future, libvirt would need to modify the
>  > >      device tree
>  > >
>  > > The basic problem would be that the command line config would have very
>  > > limited ability to override the device tree config.
>  > >
>  >
>  > After libvirt has done -drive file=foo... it should dump the machine
>  > config and use that from then on.
>
>  Right - libvirt then wouldn't be able to avoid the complexity of merging
>  any future changes into the dumped machine config.
>
>  > To combined to a single thread...
>  > > How do you add a new attribute to the device tree and, when a supplied
>  > > device tree lacking said attribute, distinguish between a device tree
>  > > from an old version of qemu (i.e. use the old default) and a partial
>  > > device tree from the VM manager (i.e. use the new default) ?
>  > >
>  >
>  > Please define "attribute".  I don't follow what you're asking.
>
>  e.g. a per-device "enable MSI support" flag.
>
>  If qemu is supplied with a device tree that lacks that flag, does it
>  enable or disable MSI?
>
>  Enable by default is bad - it could be a device tree dumped from an old
>  version of qemu, so compat would be broken.
>
>  Disable by default is bad - it could be a simple device tree supplied by
>  the user, and the latest features are wanted.
>
>  Maybe we want a per-device "this is a complete device description" flag
>  and if anything is missing from a supposedly complete description, the
>  old defaults would be used. A config dumped from qemu would have this
>  flag set, a config generated by libvirt would not have the flag.

If the device has different behavior or different properties from
guest perspective compared to the old device, it should get a new
device type so that you could specify in the device tree either the
old device or the new one. Flags won't help in the long term.

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-12 17:31                                 ` Mark McLoughlin
  (?)
@ 2009-06-12 17:44                                 ` Blue Swirl
  -1 siblings, 0 replies; 457+ messages in thread
From: Blue Swirl @ 2009-06-12 17:44 UTC (permalink / raw)
  To: Mark McLoughlin
  Cc: Carsten Otte, kvm, Michael S. Tsirkin, Glauber Costa,
	Jamie Lokier, qemu-devel, virtualization, Christian Borntraeger,
	Paul Brook, Anthony Liguori, Avi Kivity

On 6/12/09, Mark McLoughlin <markmc@redhat.com> wrote:
> On Fri, 2009-06-12 at 12:00 -0500, Anthony Liguori wrote:
>  > Mark McLoughlin wrote:
>  > > So, when libvirt creates a guest for the first time, it makes a copy of
>  > > the device tree and continues to use that even if qemu is upgraded.
>  > > That's enough to ensure compat is retained for all built-in devices.
>  > >
>  > > However, in order to retain compat for that SCSI device (e.g. ensuring
>  > > the PCI address doesn't change as other devices are added an removed),
>  > > we're back to the same problem ... either:
>  > >
>  > >   1) Use '-drive file=foo.img,if=scsi,pci_addr=foo'; in order to figure
>  > >      out what address to use, libvirt would need to query qemu for what
>  > >      address was originally allocated to device or it would do all the
>  > >      PCI address allocation itself ... or:
>  > >
>  > >   2) Don't use the command line, instead get a dump of the entire
>  > >      device tree (including the SCSI device) - if the device is to be
>  > >      removed or modified in future, libvirt would need to modify the
>  > >      device tree
>  > >
>  > > The basic problem would be that the command line config would have very
>  > > limited ability to override the device tree config.
>  > >
>  >
>  > After libvirt has done -drive file=foo... it should dump the machine
>  > config and use that from then on.
>
>  Right - libvirt then wouldn't be able to avoid the complexity of merging
>  any future changes into the dumped machine config.
>
>  > To combined to a single thread...
>  > > How do you add a new attribute to the device tree and, when a supplied
>  > > device tree lacking said attribute, distinguish between a device tree
>  > > from an old version of qemu (i.e. use the old default) and a partial
>  > > device tree from the VM manager (i.e. use the new default) ?
>  > >
>  >
>  > Please define "attribute".  I don't follow what you're asking.
>
>  e.g. a per-device "enable MSI support" flag.
>
>  If qemu is supplied with a device tree that lacks that flag, does it
>  enable or disable MSI?
>
>  Enable by default is bad - it could be a device tree dumped from an old
>  version of qemu, so compat would be broken.
>
>  Disable by default is bad - it could be a simple device tree supplied by
>  the user, and the latest features are wanted.
>
>  Maybe we want a per-device "this is a complete device description" flag
>  and if anything is missing from a supposedly complete description, the
>  old defaults would be used. A config dumped from qemu would have this
>  flag set, a config generated by libvirt would not have the flag.

If the device has different behavior or different properties from
guest perspective compared to the old device, it should get a new
device type so that you could specify in the device tree either the
old device or the new one. Flags won't help in the long term.

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-12 17:44                                   ` Blue Swirl
@ 2009-06-12 17:55                                     ` Mark McLoughlin
  -1 siblings, 0 replies; 457+ messages in thread
From: Mark McLoughlin @ 2009-06-12 17:55 UTC (permalink / raw)
  To: Blue Swirl
  Cc: Anthony Liguori, Jamie Lokier, Michael S. Tsirkin, Carsten Otte,
	kvm, Glauber Costa, Rusty Russell, qemu-devel, virtualization,
	Christian Borntraeger, Paul Brook, Avi Kivity

On Fri, 2009-06-12 at 20:44 +0300, Blue Swirl wrote:

> If the device has different behavior or different properties from
> guest perspective compared to the old device, it should get a new
> device type so that you could specify in the device tree either the
> old device or the new one.

Yes, that works - it's analogous to a device (type, version) pair.

Cheers,
Mark.


^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
@ 2009-06-12 17:55                                     ` Mark McLoughlin
  0 siblings, 0 replies; 457+ messages in thread
From: Mark McLoughlin @ 2009-06-12 17:55 UTC (permalink / raw)
  To: Blue Swirl
  Cc: Carsten Otte, kvm, Michael S. Tsirkin, Glauber Costa,
	Rusty Russell, qemu-devel, virtualization, Christian Borntraeger,
	Paul Brook, Avi Kivity

On Fri, 2009-06-12 at 20:44 +0300, Blue Swirl wrote:

> If the device has different behavior or different properties from
> guest perspective compared to the old device, it should get a new
> device type so that you could specify in the device tree either the
> old device or the new one.

Yes, that works - it's analogous to a device (type, version) pair.

Cheers,
Mark.

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-12 17:44                                   ` Blue Swirl
  (?)
@ 2009-06-12 17:55                                   ` Mark McLoughlin
  -1 siblings, 0 replies; 457+ messages in thread
From: Mark McLoughlin @ 2009-06-12 17:55 UTC (permalink / raw)
  To: Blue Swirl
  Cc: Carsten Otte, kvm, Michael S. Tsirkin, Glauber Costa,
	Jamie Lokier, qemu-devel, virtualization, Christian Borntraeger,
	Paul Brook, Anthony Liguori, Avi Kivity

On Fri, 2009-06-12 at 20:44 +0300, Blue Swirl wrote:

> If the device has different behavior or different properties from
> guest perspective compared to the old device, it should get a new
> device type so that you could specify in the device tree either the
> old device or the new one.

Yes, that works - it's analogous to a device (type, version) pair.

Cheers,
Mark.

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-12 14:51                       ` Anthony Liguori
@ 2009-06-14  7:55                         ` Avi Kivity
  -1 siblings, 0 replies; 457+ messages in thread
From: Avi Kivity @ 2009-06-14  7:55 UTC (permalink / raw)
  To: Anthony Liguori
  Cc: Mark McLoughlin, Jamie Lokier, Michael S. Tsirkin, Carsten Otte,
	kvm, Glauber Costa, Rusty Russell, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook

Anthony Liguori wrote:
>
> What's the problem with parsing the device config and modifying it?  
> Is it just complexity?

Two-way modification.  Management wants to store the configuration in 
their database and tell the hypervisor what the machine looks like.  If 
qemu also tells management what the machine looks like, we can easily 
get conflicts.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
@ 2009-06-14  7:55                         ` Avi Kivity
  0 siblings, 0 replies; 457+ messages in thread
From: Avi Kivity @ 2009-06-14  7:55 UTC (permalink / raw)
  To: Anthony Liguori
  Cc: Carsten Otte, Rusty Russell, kvm, Mark McLoughlin, Glauber Costa,
	Michael S. Tsirkin, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Paul Brook

Anthony Liguori wrote:
>
> What's the problem with parsing the device config and modifying it?  
> Is it just complexity?

Two-way modification.  Management wants to store the configuration in 
their database and tell the hypervisor what the machine looks like.  If 
qemu also tells management what the machine looks like, we can easily 
get conflicts.

-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-12 14:51                       ` Anthony Liguori
                                         ` (2 preceding siblings ...)
  (?)
@ 2009-06-14  7:55                       ` Avi Kivity
  -1 siblings, 0 replies; 457+ messages in thread
From: Avi Kivity @ 2009-06-14  7:55 UTC (permalink / raw)
  To: Anthony Liguori
  Cc: Carsten Otte, kvm, Mark McLoughlin, Glauber Costa,
	Michael S. Tsirkin, Jamie Lokier, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook

Anthony Liguori wrote:
>
> What's the problem with parsing the device config and modifying it?  
> Is it just complexity?

Two-way modification.  Management wants to store the configuration in 
their database and tell the hypervisor what the machine looks like.  If 
qemu also tells management what the machine looks like, we can easily 
get conflicts.

-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-12 16:48                             ` Mark McLoughlin
@ 2009-06-14  7:58                               ` Avi Kivity
  -1 siblings, 0 replies; 457+ messages in thread
From: Avi Kivity @ 2009-06-14  7:58 UTC (permalink / raw)
  To: Mark McLoughlin
  Cc: Anthony Liguori, Jamie Lokier, Michael S. Tsirkin, Carsten Otte,
	kvm, Glauber Costa, Rusty Russell, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook

Mark McLoughlin wrote:

  

>> I think the point is that you don't need version numbers if you have a 
>> proper device tree.
>>     
>
> How do you add a new attribute to the device tree and, when a supplied
> device tree lacking said attribute, distinguish between a device tree
> from an old version of qemu (i.e. use the old default) and a partial
> device tree from the VM manager (i.e. use the new default) ?
>   

-baseline 0.10

>   
>> NB the device tree contains no host configuration information.
>>     
>
> So, it wouldn't e.g. include the path to the image file for a block
> device? That would always be specified on the command line?
>   

Or in a different file.  I agree splitting host and guest configuration 
is a must-have, this ensures portability of virtual machines across 
hosts and time.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
@ 2009-06-14  7:58                               ` Avi Kivity
  0 siblings, 0 replies; 457+ messages in thread
From: Avi Kivity @ 2009-06-14  7:58 UTC (permalink / raw)
  To: Mark McLoughlin
  Cc: Carsten Otte, kvm, Michael S. Tsirkin, Glauber Costa,
	Rusty Russell, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Paul Brook

Mark McLoughlin wrote:

  

>> I think the point is that you don't need version numbers if you have a 
>> proper device tree.
>>     
>
> How do you add a new attribute to the device tree and, when a supplied
> device tree lacking said attribute, distinguish between a device tree
> from an old version of qemu (i.e. use the old default) and a partial
> device tree from the VM manager (i.e. use the new default) ?
>   

-baseline 0.10

>   
>> NB the device tree contains no host configuration information.
>>     
>
> So, it wouldn't e.g. include the path to the image file for a block
> device? That would always be specified on the command line?
>   

Or in a different file.  I agree splitting host and guest configuration 
is a must-have, this ensures portability of virtual machines across 
hosts and time.

-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-12 16:48                             ` Mark McLoughlin
  (?)
@ 2009-06-14  7:58                             ` Avi Kivity
  -1 siblings, 0 replies; 457+ messages in thread
From: Avi Kivity @ 2009-06-14  7:58 UTC (permalink / raw)
  To: Mark McLoughlin
  Cc: Carsten Otte, kvm, Michael S. Tsirkin, Glauber Costa,
	Jamie Lokier, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Paul Brook, Anthony Liguori

Mark McLoughlin wrote:

  

>> I think the point is that you don't need version numbers if you have a 
>> proper device tree.
>>     
>
> How do you add a new attribute to the device tree and, when a supplied
> device tree lacking said attribute, distinguish between a device tree
> from an old version of qemu (i.e. use the old default) and a partial
> device tree from the VM manager (i.e. use the new default) ?
>   

-baseline 0.10

>   
>> NB the device tree contains no host configuration information.
>>     
>
> So, it wouldn't e.g. include the path to the image file for a block
> device? That would always be specified on the command line?
>   

Or in a different file.  I agree splitting host and guest configuration 
is a must-have, this ensures portability of virtual machines across 
hosts and time.

-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-12 15:53                         ` Mark McLoughlin
@ 2009-06-14  9:34                           ` Michael S. Tsirkin
  -1 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-14  9:34 UTC (permalink / raw)
  To: Mark McLoughlin
  Cc: Anthony Liguori, Jamie Lokier, Carsten Otte, kvm, Glauber Costa,
	Rusty Russell, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Paul Brook, Avi Kivity

On Fri, Jun 12, 2009 at 04:53:27PM +0100, Mark McLoughlin wrote:
> On Fri, 2009-06-12 at 09:55 -0500, Anthony Liguori wrote:
> > Mark McLoughlin wrote:
> > > On Wed, 2009-06-10 at 20:27 +0100, Jamie Lokier wrote:
> > >   
> > > = Solution - Separate configuration from compat hints =
> > >
> > > As I suggested before:
> > >
> > >   - Allow the VM manager to dump compat hints; this would be an opaque 
> > >     file format, more like the savevm format than a config file
> > >   
> > 
> > How is compat hints different from a device tree?
> > 
> > In my mind, that's what compat hints is.  I don't see another sane way 
> > to implement it.
> 
> A device tree with a different purpose than a config file.
> 
> In its simplest form it could be a device tree with a version number for
> each device[1]. 
> 
> The other obvious piece to add to it would be PCI addresses, so that
> even if you remove a device, the addresses assigned to existing devices
> don't change.

Could you clarify this requirement please?

If we want to remove a device from under a running guest, you need
hotplug. So we can't just remove several lines from the config and hope
that it'll work simply because the PCI address is stable.

OTOH, if you reboot the guest, it's ok for addresses to change.


> Cheers,
> Mark.
> 
> [1] - Adding such a per-device version number to the config file would
> solve problem (2)
> 
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
@ 2009-06-14  9:34                           ` Michael S. Tsirkin
  0 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-14  9:34 UTC (permalink / raw)
  To: Mark McLoughlin
  Cc: Carsten Otte, kvm, Glauber Costa, Rusty Russell, qemu-devel,
	virtualization, Blue Swirl, Christian Borntraeger, Paul Brook,
	Avi Kivity

On Fri, Jun 12, 2009 at 04:53:27PM +0100, Mark McLoughlin wrote:
> On Fri, 2009-06-12 at 09:55 -0500, Anthony Liguori wrote:
> > Mark McLoughlin wrote:
> > > On Wed, 2009-06-10 at 20:27 +0100, Jamie Lokier wrote:
> > >   
> > > = Solution - Separate configuration from compat hints =
> > >
> > > As I suggested before:
> > >
> > >   - Allow the VM manager to dump compat hints; this would be an opaque 
> > >     file format, more like the savevm format than a config file
> > >   
> > 
> > How is compat hints different from a device tree?
> > 
> > In my mind, that's what compat hints is.  I don't see another sane way 
> > to implement it.
> 
> A device tree with a different purpose than a config file.
> 
> In its simplest form it could be a device tree with a version number for
> each device[1]. 
> 
> The other obvious piece to add to it would be PCI addresses, so that
> even if you remove a device, the addresses assigned to existing devices
> don't change.

Could you clarify this requirement please?

If we want to remove a device from under a running guest, you need
hotplug. So we can't just remove several lines from the config and hope
that it'll work simply because the PCI address is stable.

OTOH, if you reboot the guest, it's ok for addresses to change.


> Cheers,
> Mark.
> 
> [1] - Adding such a per-device version number to the config file would
> solve problem (2)
> 
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-12 15:53                         ` Mark McLoughlin
                                           ` (2 preceding siblings ...)
  (?)
@ 2009-06-14  9:34                         ` Michael S. Tsirkin
  -1 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-14  9:34 UTC (permalink / raw)
  To: Mark McLoughlin
  Cc: Carsten Otte, kvm, Glauber Costa, Jamie Lokier, qemu-devel,
	virtualization, Blue Swirl, Christian Borntraeger, Paul Brook,
	Anthony Liguori, Avi Kivity

On Fri, Jun 12, 2009 at 04:53:27PM +0100, Mark McLoughlin wrote:
> On Fri, 2009-06-12 at 09:55 -0500, Anthony Liguori wrote:
> > Mark McLoughlin wrote:
> > > On Wed, 2009-06-10 at 20:27 +0100, Jamie Lokier wrote:
> > >   
> > > = Solution - Separate configuration from compat hints =
> > >
> > > As I suggested before:
> > >
> > >   - Allow the VM manager to dump compat hints; this would be an opaque 
> > >     file format, more like the savevm format than a config file
> > >   
> > 
> > How is compat hints different from a device tree?
> > 
> > In my mind, that's what compat hints is.  I don't see another sane way 
> > to implement it.
> 
> A device tree with a different purpose than a config file.
> 
> In its simplest form it could be a device tree with a version number for
> each device[1]. 
> 
> The other obvious piece to add to it would be PCI addresses, so that
> even if you remove a device, the addresses assigned to existing devices
> don't change.

Could you clarify this requirement please?

If we want to remove a device from under a running guest, you need
hotplug. So we can't just remove several lines from the config and hope
that it'll work simply because the PCI address is stable.

OTOH, if you reboot the guest, it's ok for addresses to change.


> Cheers,
> Mark.
> 
> [1] - Adding such a per-device version number to the config file would
> solve problem (2)
> 
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-14  9:34                           ` Michael S. Tsirkin
@ 2009-06-14  9:37                             ` Avi Kivity
  -1 siblings, 0 replies; 457+ messages in thread
From: Avi Kivity @ 2009-06-14  9:37 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Mark McLoughlin, Anthony Liguori, Jamie Lokier, Carsten Otte,
	kvm, Glauber Costa, Rusty Russell, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook

Michael S. Tsirkin wrote:
>
> If we want to remove a device from under a running guest, you need
> hotplug. So we can't just remove several lines from the config and hope
> that it'll work simply because the PCI address is stable.
>   

Why not?

> OTOH, if you reboot the guest, it's ok for addresses to change.
>   

No, it's not.  Some guests depend on addressing for their configuration 
(for example older Linux guests will swap eth0/eth1 if you swap their 
slots).

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
@ 2009-06-14  9:37                             ` Avi Kivity
  0 siblings, 0 replies; 457+ messages in thread
From: Avi Kivity @ 2009-06-14  9:37 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Mark McLoughlin, kvm, Carsten Otte, Glauber Costa, Rusty Russell,
	qemu-devel, virtualization, Blue Swirl, Christian Borntraeger,
	Paul Brook

Michael S. Tsirkin wrote:
>
> If we want to remove a device from under a running guest, you need
> hotplug. So we can't just remove several lines from the config and hope
> that it'll work simply because the PCI address is stable.
>   

Why not?

> OTOH, if you reboot the guest, it's ok for addresses to change.
>   

No, it's not.  Some guests depend on addressing for their configuration 
(for example older Linux guests will swap eth0/eth1 if you swap their 
slots).

-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-14  9:34                           ` Michael S. Tsirkin
  (?)
@ 2009-06-14  9:37                           ` Avi Kivity
  -1 siblings, 0 replies; 457+ messages in thread
From: Avi Kivity @ 2009-06-14  9:37 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Mark McLoughlin, kvm, Carsten Otte, Glauber Costa, Jamie Lokier,
	qemu-devel, virtualization, Blue Swirl, Christian Borntraeger,
	Paul Brook, Anthony Liguori

Michael S. Tsirkin wrote:
>
> If we want to remove a device from under a running guest, you need
> hotplug. So we can't just remove several lines from the config and hope
> that it'll work simply because the PCI address is stable.
>   

Why not?

> OTOH, if you reboot the guest, it's ok for addresses to change.
>   

No, it's not.  Some guests depend on addressing for their configuration 
(for example older Linux guests will swap eth0/eth1 if you swap their 
slots).

-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-14  9:37                             ` Avi Kivity
@ 2009-06-14  9:47                               ` Michael S. Tsirkin
  -1 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-14  9:47 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Mark McLoughlin, Anthony Liguori, Jamie Lokier, Carsten Otte,
	kvm, Glauber Costa, Rusty Russell, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook

On Sun, Jun 14, 2009 at 12:37:13PM +0300, Avi Kivity wrote:
> Michael S. Tsirkin wrote:
>>
>> If we want to remove a device from under a running guest, you need
>> hotplug. So we can't just remove several lines from the config and hope
>> that it'll work simply because the PCI address is stable.
>>   
>
> Why not?

E.g. configuration cycles address a specific bus/slot.
You need cooperation from guest if you want to move
a device.

>> OTOH, if you reboot the guest, it's ok for addresses to change.
>>   
>
> No, it's not.  Some guests depend on addressing for their configuration  
> (for example older Linux guests will swap eth0/eth1 if you swap their  
> slots).

Ah, I misunderstood what's meant by the address. I agree that it's
useful to be able to control device's placement on the bus.

-- 
MST

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
@ 2009-06-14  9:47                               ` Michael S. Tsirkin
  0 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-14  9:47 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Mark McLoughlin, kvm, Carsten Otte, Glauber Costa, Rusty Russell,
	qemu-devel, virtualization, Blue Swirl, Christian Borntraeger,
	Paul Brook

On Sun, Jun 14, 2009 at 12:37:13PM +0300, Avi Kivity wrote:
> Michael S. Tsirkin wrote:
>>
>> If we want to remove a device from under a running guest, you need
>> hotplug. So we can't just remove several lines from the config and hope
>> that it'll work simply because the PCI address is stable.
>>   
>
> Why not?

E.g. configuration cycles address a specific bus/slot.
You need cooperation from guest if you want to move
a device.

>> OTOH, if you reboot the guest, it's ok for addresses to change.
>>   
>
> No, it's not.  Some guests depend on addressing for their configuration  
> (for example older Linux guests will swap eth0/eth1 if you swap their  
> slots).

Ah, I misunderstood what's meant by the address. I agree that it's
useful to be able to control device's placement on the bus.

-- 
MST

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-14  9:37                             ` Avi Kivity
  (?)
  (?)
@ 2009-06-14  9:47                             ` Michael S. Tsirkin
  -1 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-14  9:47 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Mark McLoughlin, kvm, Carsten Otte, Glauber Costa, Jamie Lokier,
	qemu-devel, virtualization, Blue Swirl, Christian Borntraeger,
	Paul Brook, Anthony Liguori

On Sun, Jun 14, 2009 at 12:37:13PM +0300, Avi Kivity wrote:
> Michael S. Tsirkin wrote:
>>
>> If we want to remove a device from under a running guest, you need
>> hotplug. So we can't just remove several lines from the config and hope
>> that it'll work simply because the PCI address is stable.
>>   
>
> Why not?

E.g. configuration cycles address a specific bus/slot.
You need cooperation from guest if you want to move
a device.

>> OTOH, if you reboot the guest, it's ok for addresses to change.
>>   
>
> No, it's not.  Some guests depend on addressing for their configuration  
> (for example older Linux guests will swap eth0/eth1 if you swap their  
> slots).

Ah, I misunderstood what's meant by the address. I agree that it's
useful to be able to control device's placement on the bus.

-- 
MST

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-12 16:48                             ` Mark McLoughlin
@ 2009-06-14  9:50                               ` Michael S. Tsirkin
  -1 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-14  9:50 UTC (permalink / raw)
  To: Mark McLoughlin
  Cc: Anthony Liguori, Jamie Lokier, Carsten Otte, kvm, Glauber Costa,
	Rusty Russell, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Paul Brook, Avi Kivity

On Fri, Jun 12, 2009 at 05:48:23PM +0100, Mark McLoughlin wrote:
> However, in order to retain compat for that SCSI device (e.g. ensuring
> the PCI address doesn't change as other devices are added an removed),
> we're back to the same problem ... either:
> 
>   1) Use '-drive file=foo.img,if=scsi,pci_addr=foo'; in order to figure 
>      out what address to use, libvirt would need to query qemu for what 
>      address was originally allocated to device or it would do all the 
>      PCI address allocation itself ... 

This last option makes sense to me: in a real world the user has
control over where he places the device on the bus, so why
not with qemu?

-- 
MST

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
@ 2009-06-14  9:50                               ` Michael S. Tsirkin
  0 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-14  9:50 UTC (permalink / raw)
  To: Mark McLoughlin
  Cc: Carsten Otte, kvm, Glauber Costa, Rusty Russell, qemu-devel,
	virtualization, Blue Swirl, Christian Borntraeger, Paul Brook,
	Avi Kivity

On Fri, Jun 12, 2009 at 05:48:23PM +0100, Mark McLoughlin wrote:
> However, in order to retain compat for that SCSI device (e.g. ensuring
> the PCI address doesn't change as other devices are added an removed),
> we're back to the same problem ... either:
> 
>   1) Use '-drive file=foo.img,if=scsi,pci_addr=foo'; in order to figure 
>      out what address to use, libvirt would need to query qemu for what 
>      address was originally allocated to device or it would do all the 
>      PCI address allocation itself ... 

This last option makes sense to me: in a real world the user has
control over where he places the device on the bus, so why
not with qemu?

-- 
MST

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-12 16:48                             ` Mark McLoughlin
                                               ` (3 preceding siblings ...)
  (?)
@ 2009-06-14  9:50                             ` Michael S. Tsirkin
  -1 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-14  9:50 UTC (permalink / raw)
  To: Mark McLoughlin
  Cc: Carsten Otte, kvm, Glauber Costa, Jamie Lokier, qemu-devel,
	virtualization, Blue Swirl, Christian Borntraeger, Paul Brook,
	Anthony Liguori, Avi Kivity

On Fri, Jun 12, 2009 at 05:48:23PM +0100, Mark McLoughlin wrote:
> However, in order to retain compat for that SCSI device (e.g. ensuring
> the PCI address doesn't change as other devices are added an removed),
> we're back to the same problem ... either:
> 
>   1) Use '-drive file=foo.img,if=scsi,pci_addr=foo'; in order to figure 
>      out what address to use, libvirt would need to query qemu for what 
>      address was originally allocated to device or it would do all the 
>      PCI address allocation itself ... 

This last option makes sense to me: in a real world the user has
control over where he places the device on the bus, so why
not with qemu?

-- 
MST

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints
  2009-06-14  7:58                               ` Avi Kivity
@ 2009-06-15  5:32                                 ` Markus Armbruster
  -1 siblings, 0 replies; 457+ messages in thread
From: Markus Armbruster @ 2009-06-15  5:32 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Mark McLoughlin, Carsten Otte, kvm, Michael S. Tsirkin,
	Glauber Costa, Rusty Russell, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook

Avi Kivity <avi@redhat.com> writes:

> Mark McLoughlin wrote:
[...]
>>> NB the device tree contains no host configuration information.
>>>     
>>
>> So, it wouldn't e.g. include the path to the image file for a block
>> device? That would always be specified on the command line?
>>   
>
> Or in a different file.  I agree splitting host and guest
> configuration is a must-have, this ensures portability of virtual
> machines across hosts and time.

Splitting into two separate sections should suffice, they could live in
the same file for convenience.

^ permalink raw reply	[flat|nested] 457+ messages in thread

* [Qemu-devel] Re: Configuration vs. compat hints
@ 2009-06-15  5:32                                 ` Markus Armbruster
  0 siblings, 0 replies; 457+ messages in thread
From: Markus Armbruster @ 2009-06-15  5:32 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Mark McLoughlin, kvm, Michael S. Tsirkin, Glauber Costa,
	Rusty Russell, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Paul Brook, Carsten Otte

Avi Kivity <avi@redhat.com> writes:

> Mark McLoughlin wrote:
[...]
>>> NB the device tree contains no host configuration information.
>>>     
>>
>> So, it wouldn't e.g. include the path to the image file for a block
>> device? That would always be specified on the command line?
>>   
>
> Or in a different file.  I agree splitting host and guest
> configuration is a must-have, this ensures portability of virtual
> machines across hosts and time.

Splitting into two separate sections should suffice, they could live in
the same file for convenience.

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints
  2009-06-14  7:58                               ` Avi Kivity
  (?)
@ 2009-06-15  5:32                               ` Markus Armbruster
  -1 siblings, 0 replies; 457+ messages in thread
From: Markus Armbruster @ 2009-06-15  5:32 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Mark McLoughlin, kvm, Michael S. Tsirkin, Glauber Costa,
	qemu-devel, virtualization, Blue Swirl, Christian Borntraeger,
	Paul Brook, Carsten Otte

Avi Kivity <avi@redhat.com> writes:

> Mark McLoughlin wrote:
[...]
>>> NB the device tree contains no host configuration information.
>>>     
>>
>> So, it wouldn't e.g. include the path to the image file for a block
>> device? That would always be specified on the command line?
>>   
>
> Or in a different file.  I agree splitting host and guest
> configuration is a must-have, this ensures portability of virtual
> machines across hosts and time.

Splitting into two separate sections should suffice, they could live in
the same file for convenience.

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-14  9:34                           ` Michael S. Tsirkin
@ 2009-06-15  9:02                             ` Mark McLoughlin
  -1 siblings, 0 replies; 457+ messages in thread
From: Mark McLoughlin @ 2009-06-15  9:02 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Anthony Liguori, Jamie Lokier, Carsten Otte, kvm, Glauber Costa,
	Rusty Russell, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Paul Brook, Avi Kivity

On Sun, 2009-06-14 at 12:34 +0300, Michael S. Tsirkin wrote:
> On Fri, Jun 12, 2009 at 04:53:27PM +0100, Mark McLoughlin wrote:
> > The other obvious piece to add to it would be PCI addresses, so that
> > even if you remove a device, the addresses assigned to existing devices
> > don't change.
> 
> Could you clarify this requirement please?

Avi clarified, but I've written it up here too:

  https://fedoraproject.org/wiki/Features/KVM_Stable_PCI_Addresses

Cheers,
Mark.


^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
@ 2009-06-15  9:02                             ` Mark McLoughlin
  0 siblings, 0 replies; 457+ messages in thread
From: Mark McLoughlin @ 2009-06-15  9:02 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Carsten Otte, kvm, Glauber Costa, Rusty Russell, qemu-devel,
	virtualization, Blue Swirl, Christian Borntraeger, Paul Brook,
	Avi Kivity

On Sun, 2009-06-14 at 12:34 +0300, Michael S. Tsirkin wrote:
> On Fri, Jun 12, 2009 at 04:53:27PM +0100, Mark McLoughlin wrote:
> > The other obvious piece to add to it would be PCI addresses, so that
> > even if you remove a device, the addresses assigned to existing devices
> > don't change.
> 
> Could you clarify this requirement please?

Avi clarified, but I've written it up here too:

  https://fedoraproject.org/wiki/Features/KVM_Stable_PCI_Addresses

Cheers,
Mark.

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-14  9:34                           ` Michael S. Tsirkin
                                             ` (2 preceding siblings ...)
  (?)
@ 2009-06-15  9:02                           ` Mark McLoughlin
  -1 siblings, 0 replies; 457+ messages in thread
From: Mark McLoughlin @ 2009-06-15  9:02 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Carsten Otte, kvm, Glauber Costa, Jamie Lokier, qemu-devel,
	virtualization, Blue Swirl, Christian Borntraeger, Paul Brook,
	Anthony Liguori, Avi Kivity

On Sun, 2009-06-14 at 12:34 +0300, Michael S. Tsirkin wrote:
> On Fri, Jun 12, 2009 at 04:53:27PM +0100, Mark McLoughlin wrote:
> > The other obvious piece to add to it would be PCI addresses, so that
> > even if you remove a device, the addresses assigned to existing devices
> > don't change.
> 
> Could you clarify this requirement please?

Avi clarified, but I've written it up here too:

  https://fedoraproject.org/wiki/Features/KVM_Stable_PCI_Addresses

Cheers,
Mark.

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-14  9:50                               ` Michael S. Tsirkin
@ 2009-06-15  9:08                                 ` Mark McLoughlin
  -1 siblings, 0 replies; 457+ messages in thread
From: Mark McLoughlin @ 2009-06-15  9:08 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Anthony Liguori, Jamie Lokier, Carsten Otte, kvm, Glauber Costa,
	Rusty Russell, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Paul Brook, Avi Kivity

On Sun, 2009-06-14 at 12:50 +0300, Michael S. Tsirkin wrote:
> On Fri, Jun 12, 2009 at 05:48:23PM +0100, Mark McLoughlin wrote:
> > However, in order to retain compat for that SCSI device (e.g. ensuring
> > the PCI address doesn't change as other devices are added an removed),
> > we're back to the same problem ... either:
> > 
> >   1) Use '-drive file=foo.img,if=scsi,pci_addr=foo'; in order to figure 
> >      out what address to use, libvirt would need to query qemu for what 
> >      address was originally allocated to device or it would do all the 
> >      PCI address allocation itself ... 
> 
> This last option makes sense to me: in a real world the user has
> control over where he places the device on the bus, so why
> not with qemu?

Yep, most people seem to agree that it makes sense to allow this, but
some believe it should only be via a machine description file, not the
command line.

However, the first problem is that it isn't a solution to the guest ABI
problem more generally.

And the second problem is that for e.g. libvirt to use it, it would have
to be possible to query qemu for what PCI slots were assigned to the
devices - libvirt would need to be able to parse 'info pci' and match
the devices listed with the devices specified on the command line.

Again, details written up here:

  https://fedoraproject.org/wiki/Features/KVM_Stable_PCI_Addresses

Cheers,
Mark.


^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
@ 2009-06-15  9:08                                 ` Mark McLoughlin
  0 siblings, 0 replies; 457+ messages in thread
From: Mark McLoughlin @ 2009-06-15  9:08 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Carsten Otte, kvm, Glauber Costa, Rusty Russell, qemu-devel,
	virtualization, Blue Swirl, Christian Borntraeger, Paul Brook,
	Avi Kivity

On Sun, 2009-06-14 at 12:50 +0300, Michael S. Tsirkin wrote:
> On Fri, Jun 12, 2009 at 05:48:23PM +0100, Mark McLoughlin wrote:
> > However, in order to retain compat for that SCSI device (e.g. ensuring
> > the PCI address doesn't change as other devices are added an removed),
> > we're back to the same problem ... either:
> > 
> >   1) Use '-drive file=foo.img,if=scsi,pci_addr=foo'; in order to figure 
> >      out what address to use, libvirt would need to query qemu for what 
> >      address was originally allocated to device or it would do all the 
> >      PCI address allocation itself ... 
> 
> This last option makes sense to me: in a real world the user has
> control over where he places the device on the bus, so why
> not with qemu?

Yep, most people seem to agree that it makes sense to allow this, but
some believe it should only be via a machine description file, not the
command line.

However, the first problem is that it isn't a solution to the guest ABI
problem more generally.

And the second problem is that for e.g. libvirt to use it, it would have
to be possible to query qemu for what PCI slots were assigned to the
devices - libvirt would need to be able to parse 'info pci' and match
the devices listed with the devices specified on the command line.

Again, details written up here:

  https://fedoraproject.org/wiki/Features/KVM_Stable_PCI_Addresses

Cheers,
Mark.

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-14  9:50                               ` Michael S. Tsirkin
  (?)
@ 2009-06-15  9:08                               ` Mark McLoughlin
  -1 siblings, 0 replies; 457+ messages in thread
From: Mark McLoughlin @ 2009-06-15  9:08 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Carsten Otte, kvm, Glauber Costa, Jamie Lokier, qemu-devel,
	virtualization, Blue Swirl, Christian Borntraeger, Paul Brook,
	Anthony Liguori, Avi Kivity

On Sun, 2009-06-14 at 12:50 +0300, Michael S. Tsirkin wrote:
> On Fri, Jun 12, 2009 at 05:48:23PM +0100, Mark McLoughlin wrote:
> > However, in order to retain compat for that SCSI device (e.g. ensuring
> > the PCI address doesn't change as other devices are added an removed),
> > we're back to the same problem ... either:
> > 
> >   1) Use '-drive file=foo.img,if=scsi,pci_addr=foo'; in order to figure 
> >      out what address to use, libvirt would need to query qemu for what 
> >      address was originally allocated to device or it would do all the 
> >      PCI address allocation itself ... 
> 
> This last option makes sense to me: in a real world the user has
> control over where he places the device on the bus, so why
> not with qemu?

Yep, most people seem to agree that it makes sense to allow this, but
some believe it should only be via a machine description file, not the
command line.

However, the first problem is that it isn't a solution to the guest ABI
problem more generally.

And the second problem is that for e.g. libvirt to use it, it would have
to be possible to query qemu for what PCI slots were assigned to the
devices - libvirt would need to be able to parse 'info pci' and match
the devices listed with the devices specified on the command line.

Again, details written up here:

  https://fedoraproject.org/wiki/Features/KVM_Stable_PCI_Addresses

Cheers,
Mark.

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-14  7:58                               ` Avi Kivity
@ 2009-06-15  9:09                                 ` Mark McLoughlin
  -1 siblings, 0 replies; 457+ messages in thread
From: Mark McLoughlin @ 2009-06-15  9:09 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Anthony Liguori, Jamie Lokier, Michael S. Tsirkin, Carsten Otte,
	kvm, Glauber Costa, Rusty Russell, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook

On Sun, 2009-06-14 at 10:58 +0300, Avi Kivity wrote:
> Mark McLoughlin wrote:
> 
>   
> 
> >> I think the point is that you don't need version numbers if you have a 
> >> proper device tree.
> >>     
> >
> > How do you add a new attribute to the device tree and, when a supplied
> > device tree lacking said attribute, distinguish between a device tree
> > from an old version of qemu (i.e. use the old default) and a partial
> > device tree from the VM manager (i.e. use the new default) ?
> >   
> 
> -baseline 0.10

That's a version number :-)

(I was responding to Anthony's "you don't need a version number")

Cheers,
Mark.


^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
@ 2009-06-15  9:09                                 ` Mark McLoughlin
  0 siblings, 0 replies; 457+ messages in thread
From: Mark McLoughlin @ 2009-06-15  9:09 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Carsten Otte, kvm, Michael S. Tsirkin, Glauber Costa,
	Rusty Russell, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Paul Brook

On Sun, 2009-06-14 at 10:58 +0300, Avi Kivity wrote:
> Mark McLoughlin wrote:
> 
>   
> 
> >> I think the point is that you don't need version numbers if you have a 
> >> proper device tree.
> >>     
> >
> > How do you add a new attribute to the device tree and, when a supplied
> > device tree lacking said attribute, distinguish between a device tree
> > from an old version of qemu (i.e. use the old default) and a partial
> > device tree from the VM manager (i.e. use the new default) ?
> >   
> 
> -baseline 0.10

That's a version number :-)

(I was responding to Anthony's "you don't need a version number")

Cheers,
Mark.

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-14  7:58                               ` Avi Kivity
                                                 ` (3 preceding siblings ...)
  (?)
@ 2009-06-15  9:09                               ` Mark McLoughlin
  -1 siblings, 0 replies; 457+ messages in thread
From: Mark McLoughlin @ 2009-06-15  9:09 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Carsten Otte, kvm, Michael S. Tsirkin, Glauber Costa,
	Jamie Lokier, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Paul Brook, Anthony Liguori

On Sun, 2009-06-14 at 10:58 +0300, Avi Kivity wrote:
> Mark McLoughlin wrote:
> 
>   
> 
> >> I think the point is that you don't need version numbers if you have a 
> >> proper device tree.
> >>     
> >
> > How do you add a new attribute to the device tree and, when a supplied
> > device tree lacking said attribute, distinguish between a device tree
> > from an old version of qemu (i.e. use the old default) and a partial
> > device tree from the VM manager (i.e. use the new default) ?
> >   
> 
> -baseline 0.10

That's a version number :-)

(I was responding to Anthony's "you don't need a version number")

Cheers,
Mark.

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15  9:08                                 ` Mark McLoughlin
@ 2009-06-15  9:27                                   ` Avi Kivity
  -1 siblings, 0 replies; 457+ messages in thread
From: Avi Kivity @ 2009-06-15  9:27 UTC (permalink / raw)
  To: Mark McLoughlin
  Cc: Michael S. Tsirkin, Anthony Liguori, Jamie Lokier, Carsten Otte,
	kvm, Glauber Costa, Rusty Russell, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook

On 06/15/2009 12:08 PM, Mark McLoughlin wrote:
>> This last option makes sense to me: in a real world the user has
>> control over where he places the device on the bus, so why
>> not with qemu?
>>      
>
> Yep, most people seem to agree that it makes sense to allow this, but
> some believe it should only be via a machine description file, not the
> command line.
>    

I don't understand this opposition.  It's clear a machine config file is 
a long way in our future.  It's also clear lack of stable PCI addresses 
hurts us now.

> However, the first problem is that it isn't a solution to the guest ABI
> problem more generally.
>    

pci_addr was never meant to bring world peace, just stable PCI 
addresses.  The other issues should be addressed separately.

> And the second problem is that for e.g. libvirt to use it, it would have
> to be possible to query qemu for what PCI slots were assigned to the
> devices - libvirt would need to be able to parse 'info pci' and match
> the devices listed with the devices specified on the command line.
>    

If all devices (including vga, ide) are set up with pci_addr, then this 
is unneeded.  You do need to export available slot numbers from qemu.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
@ 2009-06-15  9:27                                   ` Avi Kivity
  0 siblings, 0 replies; 457+ messages in thread
From: Avi Kivity @ 2009-06-15  9:27 UTC (permalink / raw)
  To: Mark McLoughlin
  Cc: Carsten Otte, kvm, Michael S. Tsirkin, Glauber Costa,
	Rusty Russell, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Paul Brook

On 06/15/2009 12:08 PM, Mark McLoughlin wrote:
>> This last option makes sense to me: in a real world the user has
>> control over where he places the device on the bus, so why
>> not with qemu?
>>      
>
> Yep, most people seem to agree that it makes sense to allow this, but
> some believe it should only be via a machine description file, not the
> command line.
>    

I don't understand this opposition.  It's clear a machine config file is 
a long way in our future.  It's also clear lack of stable PCI addresses 
hurts us now.

> However, the first problem is that it isn't a solution to the guest ABI
> problem more generally.
>    

pci_addr was never meant to bring world peace, just stable PCI 
addresses.  The other issues should be addressed separately.

> And the second problem is that for e.g. libvirt to use it, it would have
> to be possible to query qemu for what PCI slots were assigned to the
> devices - libvirt would need to be able to parse 'info pci' and match
> the devices listed with the devices specified on the command line.
>    

If all devices (including vga, ide) are set up with pci_addr, then this 
is unneeded.  You do need to export available slot numbers from qemu.

-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15  9:08                                 ` Mark McLoughlin
  (?)
  (?)
@ 2009-06-15  9:27                                 ` Avi Kivity
  -1 siblings, 0 replies; 457+ messages in thread
From: Avi Kivity @ 2009-06-15  9:27 UTC (permalink / raw)
  To: Mark McLoughlin
  Cc: Carsten Otte, kvm, Michael S. Tsirkin, Glauber Costa,
	Jamie Lokier, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Paul Brook, Anthony Liguori

On 06/15/2009 12:08 PM, Mark McLoughlin wrote:
>> This last option makes sense to me: in a real world the user has
>> control over where he places the device on the bus, so why
>> not with qemu?
>>      
>
> Yep, most people seem to agree that it makes sense to allow this, but
> some believe it should only be via a machine description file, not the
> command line.
>    

I don't understand this opposition.  It's clear a machine config file is 
a long way in our future.  It's also clear lack of stable PCI addresses 
hurts us now.

> However, the first problem is that it isn't a solution to the guest ABI
> problem more generally.
>    

pci_addr was never meant to bring world peace, just stable PCI 
addresses.  The other issues should be addressed separately.

> And the second problem is that for e.g. libvirt to use it, it would have
> to be possible to query qemu for what PCI slots were assigned to the
> devices - libvirt would need to be able to parse 'info pci' and match
> the devices listed with the devices specified on the command line.
>    

If all devices (including vga, ide) are set up with pci_addr, then this 
is unneeded.  You do need to export available slot numbers from qemu.

-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-14  9:47                               ` Michael S. Tsirkin
@ 2009-06-15  9:38                                 ` Avi Kivity
  -1 siblings, 0 replies; 457+ messages in thread
From: Avi Kivity @ 2009-06-15  9:38 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Mark McLoughlin, Anthony Liguori, Jamie Lokier, Carsten Otte,
	kvm, Glauber Costa, Rusty Russell, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook

On 06/14/2009 12:47 PM, Michael S. Tsirkin wrote:
> Michael S. Tsirkin wrote:
>>> If we want to remove a device from under a running guest, you need
>>> hotplug. So we can't just remove several lines from the config and hope
>>> that it'll work simply because the PCI address is stable.
>>>
>>>        
>> Why not?
>>      
>
> E.g. configuration cycles address a specific bus/slot.
> You need cooperation from guest if you want to move
> a device.
>    

By "remove several lines from the config" I understood the guest needs 
to be restarted.  Of course if you don't restart the guest you need true 
hotplug.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
@ 2009-06-15  9:38                                 ` Avi Kivity
  0 siblings, 0 replies; 457+ messages in thread
From: Avi Kivity @ 2009-06-15  9:38 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Mark McLoughlin, kvm, Carsten Otte, Glauber Costa, Rusty Russell,
	qemu-devel, virtualization, Blue Swirl, Christian Borntraeger,
	Paul Brook

On 06/14/2009 12:47 PM, Michael S. Tsirkin wrote:
> Michael S. Tsirkin wrote:
>>> If we want to remove a device from under a running guest, you need
>>> hotplug. So we can't just remove several lines from the config and hope
>>> that it'll work simply because the PCI address is stable.
>>>
>>>        
>> Why not?
>>      
>
> E.g. configuration cycles address a specific bus/slot.
> You need cooperation from guest if you want to move
> a device.
>    

By "remove several lines from the config" I understood the guest needs 
to be restarted.  Of course if you don't restart the guest you need true 
hotplug.

-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-14  9:47                               ` Michael S. Tsirkin
  (?)
  (?)
@ 2009-06-15  9:38                               ` Avi Kivity
  -1 siblings, 0 replies; 457+ messages in thread
From: Avi Kivity @ 2009-06-15  9:38 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Mark McLoughlin, kvm, Carsten Otte, Glauber Costa, Jamie Lokier,
	qemu-devel, virtualization, Blue Swirl, Christian Borntraeger,
	Paul Brook, Anthony Liguori

On 06/14/2009 12:47 PM, Michael S. Tsirkin wrote:
> Michael S. Tsirkin wrote:
>>> If we want to remove a device from under a running guest, you need
>>> hotplug. So we can't just remove several lines from the config and hope
>>> that it'll work simply because the PCI address is stable.
>>>
>>>        
>> Why not?
>>      
>
> E.g. configuration cycles address a specific bus/slot.
> You need cooperation from guest if you want to move
> a device.
>    

By "remove several lines from the config" I understood the guest needs 
to be restarted.  Of course if you don't restart the guest you need true 
hotplug.

-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-14  9:50                               ` Michael S. Tsirkin
@ 2009-06-15  9:43                                 ` Avi Kivity
  -1 siblings, 0 replies; 457+ messages in thread
From: Avi Kivity @ 2009-06-15  9:43 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Mark McLoughlin, Anthony Liguori, Jamie Lokier, Carsten Otte,
	kvm, Glauber Costa, Rusty Russell, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook

On 06/14/2009 12:50 PM, Michael S. Tsirkin wrote:
> On Fri, Jun 12, 2009 at 05:48:23PM +0100, Mark McLoughlin wrote:
>    
>> However, in order to retain compat for that SCSI device (e.g. ensuring
>> the PCI address doesn't change as other devices are added an removed),
>> we're back to the same problem ... either:
>>
>>    1) Use '-drive file=foo.img,if=scsi,pci_addr=foo'; in order to figure
>>       out what address to use, libvirt would need to query qemu for what
>>       address was originally allocated to device or it would do all the
>>       PCI address allocation itself ...
>>      
>
> This last option makes sense to me: in a real world the user has
> control over where he places the device on the bus, so why
> not with qemu?
>    

Yes, the user build the machine using the command line and monitor (or, 
in 2017, the machine configuration file), then turns on the power.  
Command line options are the parts lying around when we start.

btw, -drive needs to be separated:

   -controller type=lsi1234,pci_addr=foobar,name=blah
   -drive file=foo.img,controller=blah,index=0
   -drive file=bar.img,controller=blah,index=1

Drives to not have pci addresses.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
@ 2009-06-15  9:43                                 ` Avi Kivity
  0 siblings, 0 replies; 457+ messages in thread
From: Avi Kivity @ 2009-06-15  9:43 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Mark McLoughlin, kvm, Carsten Otte, Glauber Costa, Rusty Russell,
	qemu-devel, virtualization, Blue Swirl, Christian Borntraeger,
	Paul Brook

On 06/14/2009 12:50 PM, Michael S. Tsirkin wrote:
> On Fri, Jun 12, 2009 at 05:48:23PM +0100, Mark McLoughlin wrote:
>    
>> However, in order to retain compat for that SCSI device (e.g. ensuring
>> the PCI address doesn't change as other devices are added an removed),
>> we're back to the same problem ... either:
>>
>>    1) Use '-drive file=foo.img,if=scsi,pci_addr=foo'; in order to figure
>>       out what address to use, libvirt would need to query qemu for what
>>       address was originally allocated to device or it would do all the
>>       PCI address allocation itself ...
>>      
>
> This last option makes sense to me: in a real world the user has
> control over where he places the device on the bus, so why
> not with qemu?
>    

Yes, the user build the machine using the command line and monitor (or, 
in 2017, the machine configuration file), then turns on the power.  
Command line options are the parts lying around when we start.

btw, -drive needs to be separated:

   -controller type=lsi1234,pci_addr=foobar,name=blah
   -drive file=foo.img,controller=blah,index=0
   -drive file=bar.img,controller=blah,index=1

Drives to not have pci addresses.

-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-14  9:50                               ` Michael S. Tsirkin
                                                 ` (2 preceding siblings ...)
  (?)
@ 2009-06-15  9:43                               ` Avi Kivity
  -1 siblings, 0 replies; 457+ messages in thread
From: Avi Kivity @ 2009-06-15  9:43 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Mark McLoughlin, kvm, Carsten Otte, Glauber Costa, Jamie Lokier,
	qemu-devel, virtualization, Blue Swirl, Christian Borntraeger,
	Paul Brook, Anthony Liguori

On 06/14/2009 12:50 PM, Michael S. Tsirkin wrote:
> On Fri, Jun 12, 2009 at 05:48:23PM +0100, Mark McLoughlin wrote:
>    
>> However, in order to retain compat for that SCSI device (e.g. ensuring
>> the PCI address doesn't change as other devices are added an removed),
>> we're back to the same problem ... either:
>>
>>    1) Use '-drive file=foo.img,if=scsi,pci_addr=foo'; in order to figure
>>       out what address to use, libvirt would need to query qemu for what
>>       address was originally allocated to device or it would do all the
>>       PCI address allocation itself ...
>>      
>
> This last option makes sense to me: in a real world the user has
> control over where he places the device on the bus, so why
> not with qemu?
>    

Yes, the user build the machine using the command line and monitor (or, 
in 2017, the machine configuration file), then turns on the power.  
Command line options are the parts lying around when we start.

btw, -drive needs to be separated:

   -controller type=lsi1234,pci_addr=foobar,name=blah
   -drive file=foo.img,controller=blah,index=0
   -drive file=bar.img,controller=blah,index=1

Drives to not have pci addresses.

-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15  9:43                                 ` Avi Kivity
@ 2009-06-15 10:29                                   ` Michael S. Tsirkin
  -1 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-15 10:29 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Mark McLoughlin, Anthony Liguori, Jamie Lokier, Carsten Otte,
	kvm, Glauber Costa, Rusty Russell, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook

On Mon, Jun 15, 2009 at 12:43:48PM +0300, Avi Kivity wrote:
> On 06/14/2009 12:50 PM, Michael S. Tsirkin wrote:
>> On Fri, Jun 12, 2009 at 05:48:23PM +0100, Mark McLoughlin wrote:
>>    
>>> However, in order to retain compat for that SCSI device (e.g. ensuring
>>> the PCI address doesn't change as other devices are added an removed),
>>> we're back to the same problem ... either:
>>>
>>>    1) Use '-drive file=foo.img,if=scsi,pci_addr=foo'; in order to figure
>>>       out what address to use, libvirt would need to query qemu for what
>>>       address was originally allocated to device or it would do all the
>>>       PCI address allocation itself ...
>>>      
>>
>> This last option makes sense to me: in a real world the user has
>> control over where he places the device on the bus, so why
>> not with qemu?
>>    
>
> Yes, the user build the machine using the command line and monitor (or,  
> in 2017, the machine configuration file), then turns on the power.   
> Command line options are the parts lying around when we start.
>
> btw, -drive needs to be separated:
>
>   -controller type=lsi1234,pci_addr=foobar,name=blah
>   -drive file=foo.img,controller=blah,index=0
>   -drive file=bar.img,controller=blah,index=1
>
> Drives to not have pci addresses.

Maybe we need a generic 'bus options' flag.




^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
@ 2009-06-15 10:29                                   ` Michael S. Tsirkin
  0 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-15 10:29 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Mark McLoughlin, kvm, Carsten Otte, Glauber Costa, Rusty Russell,
	qemu-devel, virtualization, Blue Swirl, Christian Borntraeger,
	Paul Brook

On Mon, Jun 15, 2009 at 12:43:48PM +0300, Avi Kivity wrote:
> On 06/14/2009 12:50 PM, Michael S. Tsirkin wrote:
>> On Fri, Jun 12, 2009 at 05:48:23PM +0100, Mark McLoughlin wrote:
>>    
>>> However, in order to retain compat for that SCSI device (e.g. ensuring
>>> the PCI address doesn't change as other devices are added an removed),
>>> we're back to the same problem ... either:
>>>
>>>    1) Use '-drive file=foo.img,if=scsi,pci_addr=foo'; in order to figure
>>>       out what address to use, libvirt would need to query qemu for what
>>>       address was originally allocated to device or it would do all the
>>>       PCI address allocation itself ...
>>>      
>>
>> This last option makes sense to me: in a real world the user has
>> control over where he places the device on the bus, so why
>> not with qemu?
>>    
>
> Yes, the user build the machine using the command line and monitor (or,  
> in 2017, the machine configuration file), then turns on the power.   
> Command line options are the parts lying around when we start.
>
> btw, -drive needs to be separated:
>
>   -controller type=lsi1234,pci_addr=foobar,name=blah
>   -drive file=foo.img,controller=blah,index=0
>   -drive file=bar.img,controller=blah,index=1
>
> Drives to not have pci addresses.

Maybe we need a generic 'bus options' flag.

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15  9:43                                 ` Avi Kivity
  (?)
@ 2009-06-15 10:29                                 ` Michael S. Tsirkin
  -1 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-15 10:29 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Mark McLoughlin, kvm, Carsten Otte, Glauber Costa, Jamie Lokier,
	qemu-devel, virtualization, Blue Swirl, Christian Borntraeger,
	Paul Brook, Anthony Liguori

On Mon, Jun 15, 2009 at 12:43:48PM +0300, Avi Kivity wrote:
> On 06/14/2009 12:50 PM, Michael S. Tsirkin wrote:
>> On Fri, Jun 12, 2009 at 05:48:23PM +0100, Mark McLoughlin wrote:
>>    
>>> However, in order to retain compat for that SCSI device (e.g. ensuring
>>> the PCI address doesn't change as other devices are added an removed),
>>> we're back to the same problem ... either:
>>>
>>>    1) Use '-drive file=foo.img,if=scsi,pci_addr=foo'; in order to figure
>>>       out what address to use, libvirt would need to query qemu for what
>>>       address was originally allocated to device or it would do all the
>>>       PCI address allocation itself ...
>>>      
>>
>> This last option makes sense to me: in a real world the user has
>> control over where he places the device on the bus, so why
>> not with qemu?
>>    
>
> Yes, the user build the machine using the command line and monitor (or,  
> in 2017, the machine configuration file), then turns on the power.   
> Command line options are the parts lying around when we start.
>
> btw, -drive needs to be separated:
>
>   -controller type=lsi1234,pci_addr=foobar,name=blah
>   -drive file=foo.img,controller=blah,index=0
>   -drive file=bar.img,controller=blah,index=1
>
> Drives to not have pci addresses.

Maybe we need a generic 'bus options' flag.

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15  9:27                                   ` Avi Kivity
@ 2009-06-15 10:32                                     ` Michael S. Tsirkin
  -1 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-15 10:32 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Mark McLoughlin, Anthony Liguori, Jamie Lokier, Carsten Otte,
	kvm, Glauber Costa, Rusty Russell, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook

On Mon, Jun 15, 2009 at 12:27:08PM +0300, Avi Kivity wrote:
> On 06/15/2009 12:08 PM, Mark McLoughlin wrote:
>>> This last option makes sense to me: in a real world the user has
>>> control over where he places the device on the bus, so why
>>> not with qemu?
>>>      
>>
>> Yep, most people seem to agree that it makes sense to allow this, but
>> some believe it should only be via a machine description file, not the
>> command line.
>>    
>
> I don't understand this opposition.  It's clear a machine config file is  
> a long way in our future.  It's also clear lack of stable PCI addresses  
> hurts us now.
>
>> However, the first problem is that it isn't a solution to the guest ABI
>> problem more generally.
>>    
>
> pci_addr was never meant to bring world peace, just stable PCI  
> addresses.  The other issues should be addressed separately.
>
>> And the second problem is that for e.g. libvirt to use it, it would have
>> to be possible to query qemu for what PCI slots were assigned to the
>> devices - libvirt would need to be able to parse 'info pci' and match
>> the devices listed with the devices specified on the command line.
>>    
>
> If all devices (including vga, ide) are set up with pci_addr, then this  
> is unneeded.

Right. I think it could be an all or nothing at all approach.

> You do need to export available slot numbers from qemu.

Why would a slot be unavailable?


^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
@ 2009-06-15 10:32                                     ` Michael S. Tsirkin
  0 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-15 10:32 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Mark McLoughlin, kvm, Carsten Otte, Glauber Costa, Rusty Russell,
	qemu-devel, virtualization, Blue Swirl, Christian Borntraeger,
	Paul Brook

On Mon, Jun 15, 2009 at 12:27:08PM +0300, Avi Kivity wrote:
> On 06/15/2009 12:08 PM, Mark McLoughlin wrote:
>>> This last option makes sense to me: in a real world the user has
>>> control over where he places the device on the bus, so why
>>> not with qemu?
>>>      
>>
>> Yep, most people seem to agree that it makes sense to allow this, but
>> some believe it should only be via a machine description file, not the
>> command line.
>>    
>
> I don't understand this opposition.  It's clear a machine config file is  
> a long way in our future.  It's also clear lack of stable PCI addresses  
> hurts us now.
>
>> However, the first problem is that it isn't a solution to the guest ABI
>> problem more generally.
>>    
>
> pci_addr was never meant to bring world peace, just stable PCI  
> addresses.  The other issues should be addressed separately.
>
>> And the second problem is that for e.g. libvirt to use it, it would have
>> to be possible to query qemu for what PCI slots were assigned to the
>> devices - libvirt would need to be able to parse 'info pci' and match
>> the devices listed with the devices specified on the command line.
>>    
>
> If all devices (including vga, ide) are set up with pci_addr, then this  
> is unneeded.

Right. I think it could be an all or nothing at all approach.

> You do need to export available slot numbers from qemu.

Why would a slot be unavailable?

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15  9:27                                   ` Avi Kivity
  (?)
  (?)
@ 2009-06-15 10:32                                   ` Michael S. Tsirkin
  -1 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-15 10:32 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Mark McLoughlin, kvm, Carsten Otte, Glauber Costa, Jamie Lokier,
	qemu-devel, virtualization, Blue Swirl, Christian Borntraeger,
	Paul Brook, Anthony Liguori

On Mon, Jun 15, 2009 at 12:27:08PM +0300, Avi Kivity wrote:
> On 06/15/2009 12:08 PM, Mark McLoughlin wrote:
>>> This last option makes sense to me: in a real world the user has
>>> control over where he places the device on the bus, so why
>>> not with qemu?
>>>      
>>
>> Yep, most people seem to agree that it makes sense to allow this, but
>> some believe it should only be via a machine description file, not the
>> command line.
>>    
>
> I don't understand this opposition.  It's clear a machine config file is  
> a long way in our future.  It's also clear lack of stable PCI addresses  
> hurts us now.
>
>> However, the first problem is that it isn't a solution to the guest ABI
>> problem more generally.
>>    
>
> pci_addr was never meant to bring world peace, just stable PCI  
> addresses.  The other issues should be addressed separately.
>
>> And the second problem is that for e.g. libvirt to use it, it would have
>> to be possible to query qemu for what PCI slots were assigned to the
>> devices - libvirt would need to be able to parse 'info pci' and match
>> the devices listed with the devices specified on the command line.
>>    
>
> If all devices (including vga, ide) are set up with pci_addr, then this  
> is unneeded.

Right. I think it could be an all or nothing at all approach.

> You do need to export available slot numbers from qemu.

Why would a slot be unavailable?

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15 10:32                                     ` Michael S. Tsirkin
@ 2009-06-15 10:44                                       ` Gleb Natapov
  -1 siblings, 0 replies; 457+ messages in thread
From: Gleb Natapov @ 2009-06-15 10:44 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Avi Kivity, Mark McLoughlin, Anthony Liguori, Jamie Lokier,
	Carsten Otte, kvm, Glauber Costa, Rusty Russell, qemu-devel,
	virtualization, Blue Swirl, Christian Borntraeger, Paul Brook

On Mon, Jun 15, 2009 at 01:32:49PM +0300, Michael S. Tsirkin wrote:
> > You do need to export available slot numbers from qemu.
> 
> Why would a slot be unavailable?
> 
Because it does not exist?

--
			Gleb.

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
@ 2009-06-15 10:44                                       ` Gleb Natapov
  0 siblings, 0 replies; 457+ messages in thread
From: Gleb Natapov @ 2009-06-15 10:44 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Mark McLoughlin, kvm, Carsten Otte, Glauber Costa, Rusty Russell,
	qemu-devel, virtualization, Blue Swirl, Christian Borntraeger,
	Avi Kivity, Paul Brook

On Mon, Jun 15, 2009 at 01:32:49PM +0300, Michael S. Tsirkin wrote:
> > You do need to export available slot numbers from qemu.
> 
> Why would a slot be unavailable?
> 
Because it does not exist?

--
			Gleb.

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15 10:32                                     ` Michael S. Tsirkin
  (?)
@ 2009-06-15 10:44                                     ` Gleb Natapov
  -1 siblings, 0 replies; 457+ messages in thread
From: Gleb Natapov @ 2009-06-15 10:44 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Mark McLoughlin, kvm, Carsten Otte, Glauber Costa, Jamie Lokier,
	qemu-devel, virtualization, Blue Swirl, Christian Borntraeger,
	Avi Kivity, Anthony Liguori, Paul Brook

On Mon, Jun 15, 2009 at 01:32:49PM +0300, Michael S. Tsirkin wrote:
> > You do need to export available slot numbers from qemu.
> 
> Why would a slot be unavailable?
> 
Because it does not exist?

--
			Gleb.

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15 10:44                                       ` Gleb Natapov
@ 2009-06-15 10:46                                         ` Michael S. Tsirkin
  -1 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-15 10:46 UTC (permalink / raw)
  To: Gleb Natapov
  Cc: Avi Kivity, Mark McLoughlin, Anthony Liguori, Jamie Lokier,
	Carsten Otte, kvm, Glauber Costa, Rusty Russell, qemu-devel,
	virtualization, Blue Swirl, Christian Borntraeger, Paul Brook

On Mon, Jun 15, 2009 at 01:44:56PM +0300, Gleb Natapov wrote:
> On Mon, Jun 15, 2009 at 01:32:49PM +0300, Michael S. Tsirkin wrote:
> > > You do need to export available slot numbers from qemu.
> > 
> > Why would a slot be unavailable?
> > 
> Because it does not exist?

We can create a slot with any number, can't we?


^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
@ 2009-06-15 10:46                                         ` Michael S. Tsirkin
  0 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-15 10:46 UTC (permalink / raw)
  To: Gleb Natapov
  Cc: Mark McLoughlin, kvm, Carsten Otte, Glauber Costa, Rusty Russell,
	qemu-devel, virtualization, Blue Swirl, Christian Borntraeger,
	Avi Kivity, Paul Brook

On Mon, Jun 15, 2009 at 01:44:56PM +0300, Gleb Natapov wrote:
> On Mon, Jun 15, 2009 at 01:32:49PM +0300, Michael S. Tsirkin wrote:
> > > You do need to export available slot numbers from qemu.
> > 
> > Why would a slot be unavailable?
> > 
> Because it does not exist?

We can create a slot with any number, can't we?

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15 10:44                                       ` Gleb Natapov
  (?)
  (?)
@ 2009-06-15 10:46                                       ` Michael S. Tsirkin
  -1 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-15 10:46 UTC (permalink / raw)
  To: Gleb Natapov
  Cc: Mark McLoughlin, kvm, Carsten Otte, Glauber Costa, Jamie Lokier,
	qemu-devel, virtualization, Blue Swirl, Christian Borntraeger,
	Avi Kivity, Anthony Liguori, Paul Brook

On Mon, Jun 15, 2009 at 01:44:56PM +0300, Gleb Natapov wrote:
> On Mon, Jun 15, 2009 at 01:32:49PM +0300, Michael S. Tsirkin wrote:
> > > You do need to export available slot numbers from qemu.
> > 
> > Why would a slot be unavailable?
> > 
> Because it does not exist?

We can create a slot with any number, can't we?

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15 10:46                                         ` Michael S. Tsirkin
@ 2009-06-15 10:52                                           ` Gleb Natapov
  -1 siblings, 0 replies; 457+ messages in thread
From: Gleb Natapov @ 2009-06-15 10:52 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Avi Kivity, Mark McLoughlin, Anthony Liguori, Jamie Lokier,
	Carsten Otte, kvm, Glauber Costa, Rusty Russell, qemu-devel,
	virtualization, Blue Swirl, Christian Borntraeger, Paul Brook

On Mon, Jun 15, 2009 at 01:46:53PM +0300, Michael S. Tsirkin wrote:
> On Mon, Jun 15, 2009 at 01:44:56PM +0300, Gleb Natapov wrote:
> > On Mon, Jun 15, 2009 at 01:32:49PM +0300, Michael S. Tsirkin wrote:
> > > > You do need to export available slot numbers from qemu.
> > > 
> > > Why would a slot be unavailable?
> > > 
> > Because it does not exist?
> 
> We can create a slot with any number, can't we?
What do you mean? If the mobo has 4 slots you can't create fifth.
KVM describes 32 slots in the BIOS.

--
			Gleb.

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
@ 2009-06-15 10:52                                           ` Gleb Natapov
  0 siblings, 0 replies; 457+ messages in thread
From: Gleb Natapov @ 2009-06-15 10:52 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Mark McLoughlin, kvm, Carsten Otte, Glauber Costa, Rusty Russell,
	qemu-devel, virtualization, Blue Swirl, Christian Borntraeger,
	Avi Kivity, Paul Brook

On Mon, Jun 15, 2009 at 01:46:53PM +0300, Michael S. Tsirkin wrote:
> On Mon, Jun 15, 2009 at 01:44:56PM +0300, Gleb Natapov wrote:
> > On Mon, Jun 15, 2009 at 01:32:49PM +0300, Michael S. Tsirkin wrote:
> > > > You do need to export available slot numbers from qemu.
> > > 
> > > Why would a slot be unavailable?
> > > 
> > Because it does not exist?
> 
> We can create a slot with any number, can't we?
What do you mean? If the mobo has 4 slots you can't create fifth.
KVM describes 32 slots in the BIOS.

--
			Gleb.

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15 10:46                                         ` Michael S. Tsirkin
  (?)
  (?)
@ 2009-06-15 10:52                                         ` Gleb Natapov
  -1 siblings, 0 replies; 457+ messages in thread
From: Gleb Natapov @ 2009-06-15 10:52 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Mark McLoughlin, kvm, Carsten Otte, Glauber Costa, Jamie Lokier,
	qemu-devel, virtualization, Blue Swirl, Christian Borntraeger,
	Avi Kivity, Anthony Liguori, Paul Brook

On Mon, Jun 15, 2009 at 01:46:53PM +0300, Michael S. Tsirkin wrote:
> On Mon, Jun 15, 2009 at 01:44:56PM +0300, Gleb Natapov wrote:
> > On Mon, Jun 15, 2009 at 01:32:49PM +0300, Michael S. Tsirkin wrote:
> > > > You do need to export available slot numbers from qemu.
> > > 
> > > Why would a slot be unavailable?
> > > 
> > Because it does not exist?
> 
> We can create a slot with any number, can't we?
What do you mean? If the mobo has 4 slots you can't create fifth.
KVM describes 32 slots in the BIOS.

--
			Gleb.

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15 10:52                                           ` Gleb Natapov
@ 2009-06-15 11:07                                             ` Michael S. Tsirkin
  -1 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-15 11:07 UTC (permalink / raw)
  To: Gleb Natapov
  Cc: Avi Kivity, Mark McLoughlin, Anthony Liguori, Jamie Lokier,
	Carsten Otte, kvm, Glauber Costa, Rusty Russell, qemu-devel,
	virtualization, Blue Swirl, Christian Borntraeger, Paul Brook

On Mon, Jun 15, 2009 at 01:52:13PM +0300, Gleb Natapov wrote:
> On Mon, Jun 15, 2009 at 01:46:53PM +0300, Michael S. Tsirkin wrote:
> > On Mon, Jun 15, 2009 at 01:44:56PM +0300, Gleb Natapov wrote:
> > > On Mon, Jun 15, 2009 at 01:32:49PM +0300, Michael S. Tsirkin wrote:
> > > > > You do need to export available slot numbers from qemu.
> > > > 
> > > > Why would a slot be unavailable?
> > > > 
> > > Because it does not exist?
> > 
> > We can create a slot with any number, can't we?
> What do you mean? If the mobo has 4 slots you can't create fifth.
> KVM describes 32 slots in the BIOS.

Do you mean the KVM kernel module here? I don't know much about the
BIOS. Can't qemu control the number of slots declared?

-- 
MST

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
@ 2009-06-15 11:07                                             ` Michael S. Tsirkin
  0 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-15 11:07 UTC (permalink / raw)
  To: Gleb Natapov
  Cc: Mark McLoughlin, kvm, Carsten Otte, Glauber Costa, Rusty Russell,
	qemu-devel, virtualization, Blue Swirl, Christian Borntraeger,
	Avi Kivity, Paul Brook

On Mon, Jun 15, 2009 at 01:52:13PM +0300, Gleb Natapov wrote:
> On Mon, Jun 15, 2009 at 01:46:53PM +0300, Michael S. Tsirkin wrote:
> > On Mon, Jun 15, 2009 at 01:44:56PM +0300, Gleb Natapov wrote:
> > > On Mon, Jun 15, 2009 at 01:32:49PM +0300, Michael S. Tsirkin wrote:
> > > > > You do need to export available slot numbers from qemu.
> > > > 
> > > > Why would a slot be unavailable?
> > > > 
> > > Because it does not exist?
> > 
> > We can create a slot with any number, can't we?
> What do you mean? If the mobo has 4 slots you can't create fifth.
> KVM describes 32 slots in the BIOS.

Do you mean the KVM kernel module here? I don't know much about the
BIOS. Can't qemu control the number of slots declared?

-- 
MST

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15 10:52                                           ` Gleb Natapov
  (?)
@ 2009-06-15 11:07                                           ` Michael S. Tsirkin
  -1 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-15 11:07 UTC (permalink / raw)
  To: Gleb Natapov
  Cc: Mark McLoughlin, kvm, Carsten Otte, Glauber Costa, Jamie Lokier,
	qemu-devel, virtualization, Blue Swirl, Christian Borntraeger,
	Avi Kivity, Anthony Liguori, Paul Brook

On Mon, Jun 15, 2009 at 01:52:13PM +0300, Gleb Natapov wrote:
> On Mon, Jun 15, 2009 at 01:46:53PM +0300, Michael S. Tsirkin wrote:
> > On Mon, Jun 15, 2009 at 01:44:56PM +0300, Gleb Natapov wrote:
> > > On Mon, Jun 15, 2009 at 01:32:49PM +0300, Michael S. Tsirkin wrote:
> > > > > You do need to export available slot numbers from qemu.
> > > > 
> > > > Why would a slot be unavailable?
> > > > 
> > > Because it does not exist?
> > 
> > We can create a slot with any number, can't we?
> What do you mean? If the mobo has 4 slots you can't create fifth.
> KVM describes 32 slots in the BIOS.

Do you mean the KVM kernel module here? I don't know much about the
BIOS. Can't qemu control the number of slots declared?

-- 
MST

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15 11:07                                             ` Michael S. Tsirkin
@ 2009-06-15 11:14                                               ` Gleb Natapov
  -1 siblings, 0 replies; 457+ messages in thread
From: Gleb Natapov @ 2009-06-15 11:14 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Avi Kivity, Mark McLoughlin, Anthony Liguori, Jamie Lokier,
	Carsten Otte, kvm, Glauber Costa, Rusty Russell, qemu-devel,
	virtualization, Blue Swirl, Christian Borntraeger, Paul Brook

On Mon, Jun 15, 2009 at 02:07:53PM +0300, Michael S. Tsirkin wrote:
> On Mon, Jun 15, 2009 at 01:52:13PM +0300, Gleb Natapov wrote:
> > On Mon, Jun 15, 2009 at 01:46:53PM +0300, Michael S. Tsirkin wrote:
> > > On Mon, Jun 15, 2009 at 01:44:56PM +0300, Gleb Natapov wrote:
> > > > On Mon, Jun 15, 2009 at 01:32:49PM +0300, Michael S. Tsirkin wrote:
> > > > > > You do need to export available slot numbers from qemu.
> > > > > 
> > > > > Why would a slot be unavailable?
> > > > > 
> > > > Because it does not exist?
> > > 
> > > We can create a slot with any number, can't we?
> > What do you mean? If the mobo has 4 slots you can't create fifth.
> > KVM describes 32 slots in the BIOS.
> 
> Do you mean the KVM kernel module here? I don't know much about the
No I don't mean KVM kernel module here.

> BIOS. Can't qemu control the number of slots declared?
> 
Qemu represents HW, BIOS drives this HW. They should be in sync on such
important issues like pci slot configuration. Even if QEMU can control
the number of slots declared (which it can't easily do), it will be able
to do it only on startup (before BIOS runs). The way to have dynamic
number of slots may be pci bridge emulation. Not sure what is needed
from BIOS for that.

--
			Gleb.

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
@ 2009-06-15 11:14                                               ` Gleb Natapov
  0 siblings, 0 replies; 457+ messages in thread
From: Gleb Natapov @ 2009-06-15 11:14 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Mark McLoughlin, kvm, Carsten Otte, Glauber Costa, Rusty Russell,
	qemu-devel, virtualization, Blue Swirl, Christian Borntraeger,
	Avi Kivity, Paul Brook

On Mon, Jun 15, 2009 at 02:07:53PM +0300, Michael S. Tsirkin wrote:
> On Mon, Jun 15, 2009 at 01:52:13PM +0300, Gleb Natapov wrote:
> > On Mon, Jun 15, 2009 at 01:46:53PM +0300, Michael S. Tsirkin wrote:
> > > On Mon, Jun 15, 2009 at 01:44:56PM +0300, Gleb Natapov wrote:
> > > > On Mon, Jun 15, 2009 at 01:32:49PM +0300, Michael S. Tsirkin wrote:
> > > > > > You do need to export available slot numbers from qemu.
> > > > > 
> > > > > Why would a slot be unavailable?
> > > > > 
> > > > Because it does not exist?
> > > 
> > > We can create a slot with any number, can't we?
> > What do you mean? If the mobo has 4 slots you can't create fifth.
> > KVM describes 32 slots in the BIOS.
> 
> Do you mean the KVM kernel module here? I don't know much about the
No I don't mean KVM kernel module here.

> BIOS. Can't qemu control the number of slots declared?
> 
Qemu represents HW, BIOS drives this HW. They should be in sync on such
important issues like pci slot configuration. Even if QEMU can control
the number of slots declared (which it can't easily do), it will be able
to do it only on startup (before BIOS runs). The way to have dynamic
number of slots may be pci bridge emulation. Not sure what is needed
from BIOS for that.

--
			Gleb.

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15 11:07                                             ` Michael S. Tsirkin
  (?)
@ 2009-06-15 11:14                                             ` Gleb Natapov
  -1 siblings, 0 replies; 457+ messages in thread
From: Gleb Natapov @ 2009-06-15 11:14 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Mark McLoughlin, kvm, Carsten Otte, Glauber Costa, Jamie Lokier,
	qemu-devel, virtualization, Blue Swirl, Christian Borntraeger,
	Avi Kivity, Anthony Liguori, Paul Brook

On Mon, Jun 15, 2009 at 02:07:53PM +0300, Michael S. Tsirkin wrote:
> On Mon, Jun 15, 2009 at 01:52:13PM +0300, Gleb Natapov wrote:
> > On Mon, Jun 15, 2009 at 01:46:53PM +0300, Michael S. Tsirkin wrote:
> > > On Mon, Jun 15, 2009 at 01:44:56PM +0300, Gleb Natapov wrote:
> > > > On Mon, Jun 15, 2009 at 01:32:49PM +0300, Michael S. Tsirkin wrote:
> > > > > > You do need to export available slot numbers from qemu.
> > > > > 
> > > > > Why would a slot be unavailable?
> > > > > 
> > > > Because it does not exist?
> > > 
> > > We can create a slot with any number, can't we?
> > What do you mean? If the mobo has 4 slots you can't create fifth.
> > KVM describes 32 slots in the BIOS.
> 
> Do you mean the KVM kernel module here? I don't know much about the
No I don't mean KVM kernel module here.

> BIOS. Can't qemu control the number of slots declared?
> 
Qemu represents HW, BIOS drives this HW. They should be in sync on such
important issues like pci slot configuration. Even if QEMU can control
the number of slots declared (which it can't easily do), it will be able
to do it only on startup (before BIOS runs). The way to have dynamic
number of slots may be pci bridge emulation. Not sure what is needed
from BIOS for that.

--
			Gleb.

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15 10:32                                     ` Michael S. Tsirkin
@ 2009-06-15 11:27                                       ` Avi Kivity
  -1 siblings, 0 replies; 457+ messages in thread
From: Avi Kivity @ 2009-06-15 11:27 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Mark McLoughlin, Anthony Liguori, Jamie Lokier, Carsten Otte,
	kvm, Glauber Costa, Rusty Russell, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook

On 06/15/2009 01:32 PM, Michael S. Tsirkin wrote:
>> You do need to export available slot numbers from qemu.
>>      
>
> Why would a slot be unavailable?
>    

A slot needs to be configured in ACPI, and not be taken by onboard chips 
(piix takes slot 0, for example).

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
@ 2009-06-15 11:27                                       ` Avi Kivity
  0 siblings, 0 replies; 457+ messages in thread
From: Avi Kivity @ 2009-06-15 11:27 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Mark McLoughlin, kvm, Carsten Otte, Glauber Costa, Rusty Russell,
	qemu-devel, virtualization, Blue Swirl, Christian Borntraeger,
	Paul Brook

On 06/15/2009 01:32 PM, Michael S. Tsirkin wrote:
>> You do need to export available slot numbers from qemu.
>>      
>
> Why would a slot be unavailable?
>    

A slot needs to be configured in ACPI, and not be taken by onboard chips 
(piix takes slot 0, for example).

-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15 10:32                                     ` Michael S. Tsirkin
                                                       ` (2 preceding siblings ...)
  (?)
@ 2009-06-15 11:27                                     ` Avi Kivity
  -1 siblings, 0 replies; 457+ messages in thread
From: Avi Kivity @ 2009-06-15 11:27 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Mark McLoughlin, kvm, Carsten Otte, Glauber Costa, Jamie Lokier,
	qemu-devel, virtualization, Blue Swirl, Christian Borntraeger,
	Paul Brook, Anthony Liguori

On 06/15/2009 01:32 PM, Michael S. Tsirkin wrote:
>> You do need to export available slot numbers from qemu.
>>      
>
> Why would a slot be unavailable?
>    

A slot needs to be configured in ACPI, and not be taken by onboard chips 
(piix takes slot 0, for example).

-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15  9:09                                 ` Mark McLoughlin
@ 2009-06-15 11:32                                   ` Avi Kivity
  -1 siblings, 0 replies; 457+ messages in thread
From: Avi Kivity @ 2009-06-15 11:32 UTC (permalink / raw)
  To: Mark McLoughlin
  Cc: Anthony Liguori, Jamie Lokier, Michael S. Tsirkin, Carsten Otte,
	kvm, Glauber Costa, Rusty Russell, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook

On 06/15/2009 12:09 PM, Mark McLoughlin wrote:
>>>> I think the point is that you don't need version numbers if you have a
>>>> proper device tree.
>>>>
>>>>          
>>> How do you add a new attribute to the device tree and, when a supplied
>>> device tree lacking said attribute, distinguish between a device tree
>>> from an old version of qemu (i.e. use the old default) and a partial
>>> device tree from the VM manager (i.e. use the new default) ?
>>>
>>>        
>> -baseline 0.10
>>      
>
> That's a version number :-)
>
> (I was responding to Anthony's "you don't need a version number")
>    

If you want to prevent incompatibilities, you need to make everything 
new (potentially including bugfixes) non-default.  Eventually the 
default configuration becomes increasingly unusable and you need a new 
baseline.  You must still be able to fall back to the old baseline for 
older guests.  I don't think games with configuration files can hide that.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
@ 2009-06-15 11:32                                   ` Avi Kivity
  0 siblings, 0 replies; 457+ messages in thread
From: Avi Kivity @ 2009-06-15 11:32 UTC (permalink / raw)
  To: Mark McLoughlin
  Cc: Carsten Otte, kvm, Michael S. Tsirkin, Glauber Costa,
	Rusty Russell, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Paul Brook

On 06/15/2009 12:09 PM, Mark McLoughlin wrote:
>>>> I think the point is that you don't need version numbers if you have a
>>>> proper device tree.
>>>>
>>>>          
>>> How do you add a new attribute to the device tree and, when a supplied
>>> device tree lacking said attribute, distinguish between a device tree
>>> from an old version of qemu (i.e. use the old default) and a partial
>>> device tree from the VM manager (i.e. use the new default) ?
>>>
>>>        
>> -baseline 0.10
>>      
>
> That's a version number :-)
>
> (I was responding to Anthony's "you don't need a version number")
>    

If you want to prevent incompatibilities, you need to make everything 
new (potentially including bugfixes) non-default.  Eventually the 
default configuration becomes increasingly unusable and you need a new 
baseline.  You must still be able to fall back to the old baseline for 
older guests.  I don't think games with configuration files can hide that.

-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15  9:09                                 ` Mark McLoughlin
  (?)
@ 2009-06-15 11:32                                 ` Avi Kivity
  -1 siblings, 0 replies; 457+ messages in thread
From: Avi Kivity @ 2009-06-15 11:32 UTC (permalink / raw)
  To: Mark McLoughlin
  Cc: Carsten Otte, kvm, Michael S. Tsirkin, Glauber Costa,
	Jamie Lokier, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Paul Brook, Anthony Liguori

On 06/15/2009 12:09 PM, Mark McLoughlin wrote:
>>>> I think the point is that you don't need version numbers if you have a
>>>> proper device tree.
>>>>
>>>>          
>>> How do you add a new attribute to the device tree and, when a supplied
>>> device tree lacking said attribute, distinguish between a device tree
>>> from an old version of qemu (i.e. use the old default) and a partial
>>> device tree from the VM manager (i.e. use the new default) ?
>>>
>>>        
>> -baseline 0.10
>>      
>
> That's a version number :-)
>
> (I was responding to Anthony's "you don't need a version number")
>    

If you want to prevent incompatibilities, you need to make everything 
new (potentially including bugfixes) non-default.  Eventually the 
default configuration becomes increasingly unusable and you need a new 
baseline.  You must still be able to fall back to the old baseline for 
older guests.  I don't think games with configuration files can hide that.

-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15 11:14                                               ` Gleb Natapov
@ 2009-06-15 11:34                                                 ` Michael S. Tsirkin
  -1 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-15 11:34 UTC (permalink / raw)
  To: Gleb Natapov
  Cc: Avi Kivity, Mark McLoughlin, Anthony Liguori, Jamie Lokier,
	Carsten Otte, kvm, Glauber Costa, Rusty Russell, qemu-devel,
	virtualization, Blue Swirl, Christian Borntraeger, Paul Brook

On Mon, Jun 15, 2009 at 02:14:15PM +0300, Gleb Natapov wrote:
> On Mon, Jun 15, 2009 at 02:07:53PM +0300, Michael S. Tsirkin wrote:
> > On Mon, Jun 15, 2009 at 01:52:13PM +0300, Gleb Natapov wrote:
> > > On Mon, Jun 15, 2009 at 01:46:53PM +0300, Michael S. Tsirkin wrote:
> > > > On Mon, Jun 15, 2009 at 01:44:56PM +0300, Gleb Natapov wrote:
> > > > > On Mon, Jun 15, 2009 at 01:32:49PM +0300, Michael S. Tsirkin wrote:
> > > > > > > You do need to export available slot numbers from qemu.
> > > > > > 
> > > > > > Why would a slot be unavailable?
> > > > > > 
> > > > > Because it does not exist?
> > > > 
> > > > We can create a slot with any number, can't we?
> > > What do you mean? If the mobo has 4 slots you can't create fifth.
> > > KVM describes 32 slots in the BIOS.
> > 
> > Do you mean the KVM kernel module here? I don't know much about the
> No I don't mean KVM kernel module here.
> 
> > BIOS. Can't qemu control the number of slots declared?
> > 
> Qemu represents HW, BIOS drives this HW. They should be in sync on such
> important issues like pci slot configuration.

As a simple solution, let's stick to 32 slots per bus. That's the
maximum that the PCI spec allows, anyway.

> Even if QEMU can control the number of slots declared (which it can't
> easily do), it will be able to do it only on startup (before BIOS
> runs).

That's OK - this is when the machine description is read.

> The way to have dynamic
> number of slots may be pci bridge emulation. Not sure what is needed
> from BIOS for that.

Since bridge can be hot-plugged, probably nothing?
But we don't necessarily need dynamic number of slots IMO.

> 
> --
> 			Gleb.

-- 
MST

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
@ 2009-06-15 11:34                                                 ` Michael S. Tsirkin
  0 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-15 11:34 UTC (permalink / raw)
  To: Gleb Natapov
  Cc: Mark McLoughlin, kvm, Carsten Otte, Glauber Costa, Rusty Russell,
	qemu-devel, virtualization, Blue Swirl, Christian Borntraeger,
	Avi Kivity, Paul Brook

On Mon, Jun 15, 2009 at 02:14:15PM +0300, Gleb Natapov wrote:
> On Mon, Jun 15, 2009 at 02:07:53PM +0300, Michael S. Tsirkin wrote:
> > On Mon, Jun 15, 2009 at 01:52:13PM +0300, Gleb Natapov wrote:
> > > On Mon, Jun 15, 2009 at 01:46:53PM +0300, Michael S. Tsirkin wrote:
> > > > On Mon, Jun 15, 2009 at 01:44:56PM +0300, Gleb Natapov wrote:
> > > > > On Mon, Jun 15, 2009 at 01:32:49PM +0300, Michael S. Tsirkin wrote:
> > > > > > > You do need to export available slot numbers from qemu.
> > > > > > 
> > > > > > Why would a slot be unavailable?
> > > > > > 
> > > > > Because it does not exist?
> > > > 
> > > > We can create a slot with any number, can't we?
> > > What do you mean? If the mobo has 4 slots you can't create fifth.
> > > KVM describes 32 slots in the BIOS.
> > 
> > Do you mean the KVM kernel module here? I don't know much about the
> No I don't mean KVM kernel module here.
> 
> > BIOS. Can't qemu control the number of slots declared?
> > 
> Qemu represents HW, BIOS drives this HW. They should be in sync on such
> important issues like pci slot configuration.

As a simple solution, let's stick to 32 slots per bus. That's the
maximum that the PCI spec allows, anyway.

> Even if QEMU can control the number of slots declared (which it can't
> easily do), it will be able to do it only on startup (before BIOS
> runs).

That's OK - this is when the machine description is read.

> The way to have dynamic
> number of slots may be pci bridge emulation. Not sure what is needed
> from BIOS for that.

Since bridge can be hot-plugged, probably nothing?
But we don't necessarily need dynamic number of slots IMO.

> 
> --
> 			Gleb.

-- 
MST

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15 11:14                                               ` Gleb Natapov
  (?)
@ 2009-06-15 11:34                                               ` Michael S. Tsirkin
  -1 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-15 11:34 UTC (permalink / raw)
  To: Gleb Natapov
  Cc: Mark McLoughlin, kvm, Carsten Otte, Glauber Costa, Jamie Lokier,
	qemu-devel, virtualization, Blue Swirl, Christian Borntraeger,
	Avi Kivity, Anthony Liguori, Paul Brook

On Mon, Jun 15, 2009 at 02:14:15PM +0300, Gleb Natapov wrote:
> On Mon, Jun 15, 2009 at 02:07:53PM +0300, Michael S. Tsirkin wrote:
> > On Mon, Jun 15, 2009 at 01:52:13PM +0300, Gleb Natapov wrote:
> > > On Mon, Jun 15, 2009 at 01:46:53PM +0300, Michael S. Tsirkin wrote:
> > > > On Mon, Jun 15, 2009 at 01:44:56PM +0300, Gleb Natapov wrote:
> > > > > On Mon, Jun 15, 2009 at 01:32:49PM +0300, Michael S. Tsirkin wrote:
> > > > > > > You do need to export available slot numbers from qemu.
> > > > > > 
> > > > > > Why would a slot be unavailable?
> > > > > > 
> > > > > Because it does not exist?
> > > > 
> > > > We can create a slot with any number, can't we?
> > > What do you mean? If the mobo has 4 slots you can't create fifth.
> > > KVM describes 32 slots in the BIOS.
> > 
> > Do you mean the KVM kernel module here? I don't know much about the
> No I don't mean KVM kernel module here.
> 
> > BIOS. Can't qemu control the number of slots declared?
> > 
> Qemu represents HW, BIOS drives this HW. They should be in sync on such
> important issues like pci slot configuration.

As a simple solution, let's stick to 32 slots per bus. That's the
maximum that the PCI spec allows, anyway.

> Even if QEMU can control the number of slots declared (which it can't
> easily do), it will be able to do it only on startup (before BIOS
> runs).

That's OK - this is when the machine description is read.

> The way to have dynamic
> number of slots may be pci bridge emulation. Not sure what is needed
> from BIOS for that.

Since bridge can be hot-plugged, probably nothing?
But we don't necessarily need dynamic number of slots IMO.

> 
> --
> 			Gleb.

-- 
MST

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints
  2009-06-15  9:27                                   ` Avi Kivity
@ 2009-06-15 11:35                                     ` Markus Armbruster
  -1 siblings, 0 replies; 457+ messages in thread
From: Markus Armbruster @ 2009-06-15 11:35 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Mark McLoughlin, Carsten Otte, kvm, Michael S. Tsirkin,
	Glauber Costa, Rusty Russell, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook

Avi Kivity <avi@redhat.com> writes:

> On 06/15/2009 12:08 PM, Mark McLoughlin wrote:
>>> This last option makes sense to me: in a real world the user has
>>> control over where he places the device on the bus, so why
>>> not with qemu?
>>>      
>>
>> Yep, most people seem to agree that it makes sense to allow this, but
>> some believe it should only be via a machine description file, not the
>> command line.
>>    
>
> I don't understand this opposition.  It's clear a machine config file
> is a long way in our future.  It's also clear lack of stable PCI
> addresses hurts us now.

Correct.

>> However, the first problem is that it isn't a solution to the guest ABI
>> problem more generally.
>>    
>
> pci_addr was never meant to bring world peace, just stable PCI
> addresses.  The other issues should be addressed separately.
>
>> And the second problem is that for e.g. libvirt to use it, it would have
>> to be possible to query qemu for what PCI slots were assigned to the
>> devices - libvirt would need to be able to parse 'info pci' and match
>> the devices listed with the devices specified on the command line.
>>    
>
> If all devices (including vga, ide) are set up with pci_addr, then
> this is unneeded.  You do need to export available slot numbers from
> qemu.

Not really.  QEMU gives just the host bridge a fixed slot[*].  All the
other slots are available.

The real problem is devices that get implicitly added, like the SCSI
controller.  Those devices get their slots auto-assigned, which can
interfere with slot numbers chosen by the user.  We need a way to avoid
that, as you suggested elsewhere in this thread.


[*] There's an exception or two for oddball targets.

^ permalink raw reply	[flat|nested] 457+ messages in thread

* [Qemu-devel] Re: Configuration vs. compat hints
@ 2009-06-15 11:35                                     ` Markus Armbruster
  0 siblings, 0 replies; 457+ messages in thread
From: Markus Armbruster @ 2009-06-15 11:35 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Mark McLoughlin, kvm, Michael S. Tsirkin, Glauber Costa,
	Rusty Russell, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Paul Brook, Carsten Otte

Avi Kivity <avi@redhat.com> writes:

> On 06/15/2009 12:08 PM, Mark McLoughlin wrote:
>>> This last option makes sense to me: in a real world the user has
>>> control over where he places the device on the bus, so why
>>> not with qemu?
>>>      
>>
>> Yep, most people seem to agree that it makes sense to allow this, but
>> some believe it should only be via a machine description file, not the
>> command line.
>>    
>
> I don't understand this opposition.  It's clear a machine config file
> is a long way in our future.  It's also clear lack of stable PCI
> addresses hurts us now.

Correct.

>> However, the first problem is that it isn't a solution to the guest ABI
>> problem more generally.
>>    
>
> pci_addr was never meant to bring world peace, just stable PCI
> addresses.  The other issues should be addressed separately.
>
>> And the second problem is that for e.g. libvirt to use it, it would have
>> to be possible to query qemu for what PCI slots were assigned to the
>> devices - libvirt would need to be able to parse 'info pci' and match
>> the devices listed with the devices specified on the command line.
>>    
>
> If all devices (including vga, ide) are set up with pci_addr, then
> this is unneeded.  You do need to export available slot numbers from
> qemu.

Not really.  QEMU gives just the host bridge a fixed slot[*].  All the
other slots are available.

The real problem is devices that get implicitly added, like the SCSI
controller.  Those devices get their slots auto-assigned, which can
interfere with slot numbers chosen by the user.  We need a way to avoid
that, as you suggested elsewhere in this thread.


[*] There's an exception or two for oddball targets.

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints
  2009-06-15  9:27                                   ` Avi Kivity
                                                     ` (3 preceding siblings ...)
  (?)
@ 2009-06-15 11:35                                   ` Markus Armbruster
  -1 siblings, 0 replies; 457+ messages in thread
From: Markus Armbruster @ 2009-06-15 11:35 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Mark McLoughlin, kvm, Michael S. Tsirkin, Glauber Costa,
	qemu-devel, virtualization, Blue Swirl, Christian Borntraeger,
	Paul Brook, Carsten Otte

Avi Kivity <avi@redhat.com> writes:

> On 06/15/2009 12:08 PM, Mark McLoughlin wrote:
>>> This last option makes sense to me: in a real world the user has
>>> control over where he places the device on the bus, so why
>>> not with qemu?
>>>      
>>
>> Yep, most people seem to agree that it makes sense to allow this, but
>> some believe it should only be via a machine description file, not the
>> command line.
>>    
>
> I don't understand this opposition.  It's clear a machine config file
> is a long way in our future.  It's also clear lack of stable PCI
> addresses hurts us now.

Correct.

>> However, the first problem is that it isn't a solution to the guest ABI
>> problem more generally.
>>    
>
> pci_addr was never meant to bring world peace, just stable PCI
> addresses.  The other issues should be addressed separately.
>
>> And the second problem is that for e.g. libvirt to use it, it would have
>> to be possible to query qemu for what PCI slots were assigned to the
>> devices - libvirt would need to be able to parse 'info pci' and match
>> the devices listed with the devices specified on the command line.
>>    
>
> If all devices (including vga, ide) are set up with pci_addr, then
> this is unneeded.  You do need to export available slot numbers from
> qemu.

Not really.  QEMU gives just the host bridge a fixed slot[*].  All the
other slots are available.

The real problem is devices that get implicitly added, like the SCSI
controller.  Those devices get their slots auto-assigned, which can
interfere with slot numbers chosen by the user.  We need a way to avoid
that, as you suggested elsewhere in this thread.


[*] There's an exception or two for oddball targets.

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints
  2009-06-15 11:35                                     ` [Qemu-devel] " Markus Armbruster
@ 2009-06-15 11:43                                       ` Avi Kivity
  -1 siblings, 0 replies; 457+ messages in thread
From: Avi Kivity @ 2009-06-15 11:43 UTC (permalink / raw)
  To: Markus Armbruster
  Cc: Mark McLoughlin, Carsten Otte, kvm, Michael S. Tsirkin,
	Glauber Costa, Rusty Russell, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook, Anthony Liguori,
	qemu-devel

(adding cc)

On 06/15/2009 02:35 PM, Markus Armbruster wrote:
> Not really.  QEMU gives just the host bridge a fixed slot[*].  All the
> other slots are available.
>    

qemu needs to export these two bits of information: the first free slot 
and the number of slots.

More generally, which slots are open.  We can assume 1:31, but that's 
unlovely.

> The real problem is devices that get implicitly added, like the SCSI
> controller.  Those devices get their slots auto-assigned, which can
> interfere with slot numbers chosen by the user.  We need a way to avoid
> that, as you suggested elsewhere in this thread.
>    

Paul/Anthony, can we have -vga pci_addr=, -usb-controller pci_addr=, and 
-drive pci_addr= (and later, -disk-controller)?  Stalling while waiting 
for the ultimate config file is only generating pain and out-of-tree 
patches.

(I'd be quite happy constructing the entire machine config on the 
command line, but I realize it's just me)

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 457+ messages in thread

* [Qemu-devel] Re: Configuration vs. compat hints
@ 2009-06-15 11:43                                       ` Avi Kivity
  0 siblings, 0 replies; 457+ messages in thread
From: Avi Kivity @ 2009-06-15 11:43 UTC (permalink / raw)
  To: Markus Armbruster
  Cc: Mark McLoughlin, kvm, Michael S. Tsirkin, Glauber Costa,
	Rusty Russell, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Paul Brook, Carsten Otte

(adding cc)

On 06/15/2009 02:35 PM, Markus Armbruster wrote:
> Not really.  QEMU gives just the host bridge a fixed slot[*].  All the
> other slots are available.
>    

qemu needs to export these two bits of information: the first free slot 
and the number of slots.

More generally, which slots are open.  We can assume 1:31, but that's 
unlovely.

> The real problem is devices that get implicitly added, like the SCSI
> controller.  Those devices get their slots auto-assigned, which can
> interfere with slot numbers chosen by the user.  We need a way to avoid
> that, as you suggested elsewhere in this thread.
>    

Paul/Anthony, can we have -vga pci_addr=, -usb-controller pci_addr=, and 
-drive pci_addr= (and later, -disk-controller)?  Stalling while waiting 
for the ultimate config file is only generating pain and out-of-tree 
patches.

(I'd be quite happy constructing the entire machine config on the 
command line, but I realize it's just me)

-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints
  2009-06-15 11:35                                     ` [Qemu-devel] " Markus Armbruster
  (?)
@ 2009-06-15 11:43                                     ` Avi Kivity
  -1 siblings, 0 replies; 457+ messages in thread
From: Avi Kivity @ 2009-06-15 11:43 UTC (permalink / raw)
  To: Markus Armbruster
  Cc: Mark McLoughlin, kvm, Michael S. Tsirkin, Glauber Costa,
	qemu-devel, virtualization, Blue Swirl, Christian Borntraeger,
	Paul Brook, Anthony Liguori, Carsten Otte

(adding cc)

On 06/15/2009 02:35 PM, Markus Armbruster wrote:
> Not really.  QEMU gives just the host bridge a fixed slot[*].  All the
> other slots are available.
>    

qemu needs to export these two bits of information: the first free slot 
and the number of slots.

More generally, which slots are open.  We can assume 1:31, but that's 
unlovely.

> The real problem is devices that get implicitly added, like the SCSI
> controller.  Those devices get their slots auto-assigned, which can
> interfere with slot numbers chosen by the user.  We need a way to avoid
> that, as you suggested elsewhere in this thread.
>    

Paul/Anthony, can we have -vga pci_addr=, -usb-controller pci_addr=, and 
-drive pci_addr= (and later, -disk-controller)?  Stalling while waiting 
for the ultimate config file is only generating pain and out-of-tree 
patches.

(I'd be quite happy constructing the entire machine config on the 
command line, but I realize it's just me)

-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15 11:27                                       ` Avi Kivity
@ 2009-06-15 11:48                                         ` Michael S. Tsirkin
  -1 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-15 11:48 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Mark McLoughlin, Anthony Liguori, Jamie Lokier, Carsten Otte,
	kvm, Glauber Costa, Rusty Russell, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook

On Mon, Jun 15, 2009 at 02:27:14PM +0300, Avi Kivity wrote:
> On 06/15/2009 01:32 PM, Michael S. Tsirkin wrote:
>>> You do need to export available slot numbers from qemu.
>>>      
>>
>> Why would a slot be unavailable?
>>    
>
> A slot needs to be configured in ACPI,

Can we configure all possible 32 slots?

> and not be taken by onboard chips  
> (piix takes slot 0, for example).

piix is the root complex, isn't it? Are there other examples? If not,
we could teach management about the root complex being special ...


> -- 
> error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
@ 2009-06-15 11:48                                         ` Michael S. Tsirkin
  0 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-15 11:48 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Mark McLoughlin, kvm, Carsten Otte, Glauber Costa, Rusty Russell,
	qemu-devel, virtualization, Blue Swirl, Christian Borntraeger,
	Paul Brook

On Mon, Jun 15, 2009 at 02:27:14PM +0300, Avi Kivity wrote:
> On 06/15/2009 01:32 PM, Michael S. Tsirkin wrote:
>>> You do need to export available slot numbers from qemu.
>>>      
>>
>> Why would a slot be unavailable?
>>    
>
> A slot needs to be configured in ACPI,

Can we configure all possible 32 slots?

> and not be taken by onboard chips  
> (piix takes slot 0, for example).

piix is the root complex, isn't it? Are there other examples? If not,
we could teach management about the root complex being special ...


> -- 
> error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15 11:27                                       ` Avi Kivity
  (?)
@ 2009-06-15 11:48                                       ` Michael S. Tsirkin
  -1 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-15 11:48 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Mark McLoughlin, kvm, Carsten Otte, Glauber Costa, Jamie Lokier,
	qemu-devel, virtualization, Blue Swirl, Christian Borntraeger,
	Paul Brook, Anthony Liguori

On Mon, Jun 15, 2009 at 02:27:14PM +0300, Avi Kivity wrote:
> On 06/15/2009 01:32 PM, Michael S. Tsirkin wrote:
>>> You do need to export available slot numbers from qemu.
>>>      
>>
>> Why would a slot be unavailable?
>>    
>
> A slot needs to be configured in ACPI,

Can we configure all possible 32 slots?

> and not be taken by onboard chips  
> (piix takes slot 0, for example).

piix is the root complex, isn't it? Are there other examples? If not,
we could teach management about the root complex being special ...


> -- 
> error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15 11:48                                         ` Michael S. Tsirkin
@ 2009-06-15 11:56                                           ` Avi Kivity
  -1 siblings, 0 replies; 457+ messages in thread
From: Avi Kivity @ 2009-06-15 11:56 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Mark McLoughlin, Anthony Liguori, Jamie Lokier, Carsten Otte,
	kvm, Glauber Costa, Rusty Russell, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook

On 06/15/2009 02:48 PM, Michael S. Tsirkin wrote:
>> A slot needs to be configured in ACPI,
>>      
>
> Can we configure all possible 32 slots?
>    

That's what we do.  But one is always taken.  In the future, perhaps more.

>> and not be taken by onboard chips
>> (piix takes slot 0, for example).
>>      
>
> piix is the root complex, isn't it? Are there other examples? If not,
> we could teach management about the root complex being special ...
>    

We should just tell the user which slots are open.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
@ 2009-06-15 11:56                                           ` Avi Kivity
  0 siblings, 0 replies; 457+ messages in thread
From: Avi Kivity @ 2009-06-15 11:56 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Mark McLoughlin, kvm, Carsten Otte, Glauber Costa, Rusty Russell,
	qemu-devel, virtualization, Blue Swirl, Christian Borntraeger,
	Paul Brook

On 06/15/2009 02:48 PM, Michael S. Tsirkin wrote:
>> A slot needs to be configured in ACPI,
>>      
>
> Can we configure all possible 32 slots?
>    

That's what we do.  But one is always taken.  In the future, perhaps more.

>> and not be taken by onboard chips
>> (piix takes slot 0, for example).
>>      
>
> piix is the root complex, isn't it? Are there other examples? If not,
> we could teach management about the root complex being special ...
>    

We should just tell the user which slots are open.

-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15 11:48                                         ` Michael S. Tsirkin
  (?)
@ 2009-06-15 11:56                                         ` Avi Kivity
  -1 siblings, 0 replies; 457+ messages in thread
From: Avi Kivity @ 2009-06-15 11:56 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Mark McLoughlin, kvm, Carsten Otte, Glauber Costa, Jamie Lokier,
	qemu-devel, virtualization, Blue Swirl, Christian Borntraeger,
	Paul Brook, Anthony Liguori

On 06/15/2009 02:48 PM, Michael S. Tsirkin wrote:
>> A slot needs to be configured in ACPI,
>>      
>
> Can we configure all possible 32 slots?
>    

That's what we do.  But one is always taken.  In the future, perhaps more.

>> and not be taken by onboard chips
>> (piix takes slot 0, for example).
>>      
>
> piix is the root complex, isn't it? Are there other examples? If not,
> we could teach management about the root complex being special ...
>    

We should just tell the user which slots are open.

-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Re: Configuration vs. compat hints
  2009-06-15 11:43                                       ` [Qemu-devel] " Avi Kivity
@ 2009-06-15 11:59                                         ` Stefano Stabellini
  -1 siblings, 0 replies; 457+ messages in thread
From: Stefano Stabellini @ 2009-06-15 11:59 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Mark McLoughlin, kvm, Michael S. Tsirkin, Glauber Costa, Russell,
	Markus Armbruster, qemu-devel, Blue Swirl, Christian Borntraeger,
	Rusty, Brook, Paul, virtualization, Carsten Otte

Avi Kivity wrote:

> (I'd be quite happy constructing the entire machine config on the 
> command line, but I realize it's just me)
> 


It is not just you.

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: [Qemu-devel] Re: Configuration vs. compat hints
@ 2009-06-15 11:59                                         ` Stefano Stabellini
  0 siblings, 0 replies; 457+ messages in thread
From: Stefano Stabellini @ 2009-06-15 11:59 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Mark McLoughlin, kvm, Michael S. Tsirkin, Glauber Costa, Russell,
	Markus Armbruster, qemu-devel, Blue Swirl, Christian Borntraeger,
	Rusty, Brook, Paul, virtualization, Carsten Otte

Avi Kivity wrote:

> (I'd be quite happy constructing the entire machine config on the 
> command line, but I realize it's just me)
> 


It is not just you.

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: [Qemu-devel] Re: Configuration vs. compat hints
  2009-06-15 11:43                                       ` [Qemu-devel] " Avi Kivity
  (?)
@ 2009-06-15 11:59                                       ` Stefano Stabellini
  -1 siblings, 0 replies; 457+ messages in thread
From: Stefano Stabellini @ 2009-06-15 11:59 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Mark McLoughlin, kvm, Michael S. Tsirkin, Glauber Costa,
	qemu-devel, Blue Swirl, Christian Borntraeger, Rusty, Brook,
	Paul, virtualization, Carsten Otte

Avi Kivity wrote:

> (I'd be quite happy constructing the entire machine config on the 
> command line, but I realize it's just me)
> 


It is not just you.

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15  9:27                                   ` Avi Kivity
@ 2009-06-15 12:41                                     ` Anthony Liguori
  -1 siblings, 0 replies; 457+ messages in thread
From: Anthony Liguori @ 2009-06-15 12:41 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Mark McLoughlin, Michael S. Tsirkin, Jamie Lokier, Carsten Otte,
	kvm, Glauber Costa, Rusty Russell, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook

Avi Kivity wrote:
> On 06/15/2009 12:08 PM, Mark McLoughlin wrote:
>>> This last option makes sense to me: in a real world the user has
>>> control over where he places the device on the bus, so why
>>> not with qemu?
>>>      
>>
>> Yep, most people seem to agree that it makes sense to allow this, but
>> some believe it should only be via a machine description file, not the
>> command line.
>>    
>
> I don't understand this opposition.  It's clear a machine config file 
> is a long way in our future.  It's also clear lack of stable PCI 
> addresses hurts us now.

Is there opposition?  I don't ever recall seeing a patch...

I think it's a perfectly fine idea.

Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
@ 2009-06-15 12:41                                     ` Anthony Liguori
  0 siblings, 0 replies; 457+ messages in thread
From: Anthony Liguori @ 2009-06-15 12:41 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Carsten Otte, Rusty Russell, kvm, Mark McLoughlin, Glauber Costa,
	Michael S. Tsirkin, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Paul Brook

Avi Kivity wrote:
> On 06/15/2009 12:08 PM, Mark McLoughlin wrote:
>>> This last option makes sense to me: in a real world the user has
>>> control over where he places the device on the bus, so why
>>> not with qemu?
>>>      
>>
>> Yep, most people seem to agree that it makes sense to allow this, but
>> some believe it should only be via a machine description file, not the
>> command line.
>>    
>
> I don't understand this opposition.  It's clear a machine config file 
> is a long way in our future.  It's also clear lack of stable PCI 
> addresses hurts us now.

Is there opposition?  I don't ever recall seeing a patch...

I think it's a perfectly fine idea.

Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15  9:27                                   ` Avi Kivity
                                                     ` (5 preceding siblings ...)
  (?)
@ 2009-06-15 12:41                                   ` Anthony Liguori
  -1 siblings, 0 replies; 457+ messages in thread
From: Anthony Liguori @ 2009-06-15 12:41 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Carsten Otte, kvm, Mark McLoughlin, Glauber Costa,
	Michael S. Tsirkin, Jamie Lokier, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook

Avi Kivity wrote:
> On 06/15/2009 12:08 PM, Mark McLoughlin wrote:
>>> This last option makes sense to me: in a real world the user has
>>> control over where he places the device on the bus, so why
>>> not with qemu?
>>>      
>>
>> Yep, most people seem to agree that it makes sense to allow this, but
>> some believe it should only be via a machine description file, not the
>> command line.
>>    
>
> I don't understand this opposition.  It's clear a machine config file 
> is a long way in our future.  It's also clear lack of stable PCI 
> addresses hurts us now.

Is there opposition?  I don't ever recall seeing a patch...

I think it's a perfectly fine idea.

Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: [Qemu-devel] Re: Configuration vs. compat hints
  2009-06-15 11:43                                       ` [Qemu-devel] " Avi Kivity
                                                         ` (3 preceding siblings ...)
  (?)
@ 2009-06-15 12:41                                       ` Markus Armbruster
  2009-06-15 12:50                                           ` Anthony Liguori
  2009-06-15 12:50                                         ` Anthony Liguori
  -1 siblings, 2 replies; 457+ messages in thread
From: Markus Armbruster @ 2009-06-15 12:41 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Mark McLoughlin, kvm, Michael S. Tsirkin, Glauber Costa,
	Rusty Russell, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Paul Brook, Carsten Otte

Avi Kivity <avi@redhat.com> writes:

> (adding cc)
>
> On 06/15/2009 02:35 PM, Markus Armbruster wrote:
>> Not really.  QEMU gives just the host bridge a fixed slot[*].  All the
>> other slots are available.
>>    
>
> qemu needs to export these two bits of information: the first free
> slot and the number of slots.
>
> More generally, which slots are open.  We can assume 1:31, but that's
> unlovely.

Point.

>> The real problem is devices that get implicitly added, like the SCSI
>> controller.  Those devices get their slots auto-assigned, which can
>> interfere with slot numbers chosen by the user.  We need a way to avoid
>> that, as you suggested elsewhere in this thread.
>>    
>
> Paul/Anthony, can we have -vga pci_addr=, -usb-controller pci_addr=,
> and -drive pci_addr= (and later, -disk-controller)?  Stalling while
> waiting for the ultimate config file is only generating pain and
> out-of-tree patches.

Yup.

I got bit-rotten patches for pci_addr=, and I can unrot them if they're
wanted.

> (I'd be quite happy constructing the entire machine config on the
> command line, but I realize it's just me)

Ha, .bash_history as config file...

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: [Qemu-devel] Re: Configuration vs. compat hints
  2009-06-15 11:43                                       ` [Qemu-devel] " Avi Kivity
                                                         ` (2 preceding siblings ...)
  (?)
@ 2009-06-15 12:41                                       ` Markus Armbruster
  -1 siblings, 0 replies; 457+ messages in thread
From: Markus Armbruster @ 2009-06-15 12:41 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Mark McLoughlin, kvm, Michael S. Tsirkin, Glauber Costa,
	qemu-devel, virtualization, Blue Swirl, Christian Borntraeger,
	Paul Brook, Carsten Otte

Avi Kivity <avi@redhat.com> writes:

> (adding cc)
>
> On 06/15/2009 02:35 PM, Markus Armbruster wrote:
>> Not really.  QEMU gives just the host bridge a fixed slot[*].  All the
>> other slots are available.
>>    
>
> qemu needs to export these two bits of information: the first free
> slot and the number of slots.
>
> More generally, which slots are open.  We can assume 1:31, but that's
> unlovely.

Point.

>> The real problem is devices that get implicitly added, like the SCSI
>> controller.  Those devices get their slots auto-assigned, which can
>> interfere with slot numbers chosen by the user.  We need a way to avoid
>> that, as you suggested elsewhere in this thread.
>>    
>
> Paul/Anthony, can we have -vga pci_addr=, -usb-controller pci_addr=,
> and -drive pci_addr= (and later, -disk-controller)?  Stalling while
> waiting for the ultimate config file is only generating pain and
> out-of-tree patches.

Yup.

I got bit-rotten patches for pci_addr=, and I can unrot them if they're
wanted.

> (I'd be quite happy constructing the entire machine config on the
> command line, but I realize it's just me)

Ha, .bash_history as config file...

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15 11:56                                           ` Avi Kivity
@ 2009-06-15 12:41                                             ` Michael S. Tsirkin
  -1 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-15 12:41 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Mark McLoughlin, Anthony Liguori, Jamie Lokier, Carsten Otte,
	kvm, Glauber Costa, Rusty Russell, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook

On Mon, Jun 15, 2009 at 02:56:42PM +0300, Avi Kivity wrote:
> On 06/15/2009 02:48 PM, Michael S. Tsirkin wrote:
>>> A slot needs to be configured in ACPI,
>>>      
>>
>> Can we configure all possible 32 slots?
>>    
>
> That's what we do.  But one is always taken.  In the future, perhaps more.
>
>>> and not be taken by onboard chips
>>> (piix takes slot 0, for example).
>>>      
>>
>> piix is the root complex, isn't it? Are there other examples? If not,
>> we could teach management about the root complex being special ...
>>    
>
> We should just tell the user which slots are open.

This might be tricky if the config is passed in with the command line
flags.


^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
@ 2009-06-15 12:41                                             ` Michael S. Tsirkin
  0 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-15 12:41 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Mark McLoughlin, kvm, Carsten Otte, Glauber Costa, Rusty Russell,
	qemu-devel, virtualization, Blue Swirl, Christian Borntraeger,
	Paul Brook

On Mon, Jun 15, 2009 at 02:56:42PM +0300, Avi Kivity wrote:
> On 06/15/2009 02:48 PM, Michael S. Tsirkin wrote:
>>> A slot needs to be configured in ACPI,
>>>      
>>
>> Can we configure all possible 32 slots?
>>    
>
> That's what we do.  But one is always taken.  In the future, perhaps more.
>
>>> and not be taken by onboard chips
>>> (piix takes slot 0, for example).
>>>      
>>
>> piix is the root complex, isn't it? Are there other examples? If not,
>> we could teach management about the root complex being special ...
>>    
>
> We should just tell the user which slots are open.

This might be tricky if the config is passed in with the command line
flags.

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15 11:56                                           ` Avi Kivity
  (?)
  (?)
@ 2009-06-15 12:41                                           ` Michael S. Tsirkin
  -1 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-15 12:41 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Mark McLoughlin, kvm, Carsten Otte, Glauber Costa, Jamie Lokier,
	qemu-devel, virtualization, Blue Swirl, Christian Borntraeger,
	Paul Brook, Anthony Liguori

On Mon, Jun 15, 2009 at 02:56:42PM +0300, Avi Kivity wrote:
> On 06/15/2009 02:48 PM, Michael S. Tsirkin wrote:
>>> A slot needs to be configured in ACPI,
>>>      
>>
>> Can we configure all possible 32 slots?
>>    
>
> That's what we do.  But one is always taken.  In the future, perhaps more.
>
>>> and not be taken by onboard chips
>>> (piix takes slot 0, for example).
>>>      
>>
>> piix is the root complex, isn't it? Are there other examples? If not,
>> we could teach management about the root complex being special ...
>>    
>
> We should just tell the user which slots are open.

This might be tricky if the config is passed in with the command line
flags.

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15  9:43                                 ` Avi Kivity
@ 2009-06-15 12:45                                   ` Anthony Liguori
  -1 siblings, 0 replies; 457+ messages in thread
From: Anthony Liguori @ 2009-06-15 12:45 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Michael S. Tsirkin, Mark McLoughlin, Jamie Lokier, Carsten Otte,
	kvm, Glauber Costa, Rusty Russell, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook

Avi Kivity wrote:
> On 06/14/2009 12:50 PM, Michael S. Tsirkin wrote:
>> On Fri, Jun 12, 2009 at 05:48:23PM +0100, Mark McLoughlin wrote:
>>   
>>> However, in order to retain compat for that SCSI device (e.g. ensuring
>>> the PCI address doesn't change as other devices are added an removed),
>>> we're back to the same problem ... either:
>>>
>>>    1) Use '-drive file=foo.img,if=scsi,pci_addr=foo'; in order to 
>>> figure
>>>       out what address to use, libvirt would need to query qemu for 
>>> what
>>>       address was originally allocated to device or it would do all the
>>>       PCI address allocation itself ...
>>>      
>>
>> This last option makes sense to me: in a real world the user has
>> control over where he places the device on the bus, so why
>> not with qemu?
>>    
>
> Yes, the user build the machine using the command line and monitor 
> (or, in 2017, the machine configuration file),

Considering pbrook just posted a machine config for arm, I think it 
would be rather sad if pc wasn't converted to it by 2017...

> then turns on the power.  Command line options are the parts lying 
> around when we start.
>
> btw, -drive needs to be separated:
>
>   -controller type=lsi1234,pci_addr=foobar,name=blah
>   -drive file=foo.img,controller=blah,index=0
>   -drive file=bar.img,controller=blah,index=1
>
> Drives to not have pci addresses.

Drivers don't have indexes and buses but we specify it on the -drive 
line.  -drive is convenient syntax.  It stops being convenient when you 
force it to be two options.

Regards,

Anthony Liguori


^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
@ 2009-06-15 12:45                                   ` Anthony Liguori
  0 siblings, 0 replies; 457+ messages in thread
From: Anthony Liguori @ 2009-06-15 12:45 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Carsten Otte, Rusty Russell, kvm, Mark McLoughlin, Glauber Costa,
	Michael S. Tsirkin, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Paul Brook

Avi Kivity wrote:
> On 06/14/2009 12:50 PM, Michael S. Tsirkin wrote:
>> On Fri, Jun 12, 2009 at 05:48:23PM +0100, Mark McLoughlin wrote:
>>   
>>> However, in order to retain compat for that SCSI device (e.g. ensuring
>>> the PCI address doesn't change as other devices are added an removed),
>>> we're back to the same problem ... either:
>>>
>>>    1) Use '-drive file=foo.img,if=scsi,pci_addr=foo'; in order to 
>>> figure
>>>       out what address to use, libvirt would need to query qemu for 
>>> what
>>>       address was originally allocated to device or it would do all the
>>>       PCI address allocation itself ...
>>>      
>>
>> This last option makes sense to me: in a real world the user has
>> control over where he places the device on the bus, so why
>> not with qemu?
>>    
>
> Yes, the user build the machine using the command line and monitor 
> (or, in 2017, the machine configuration file),

Considering pbrook just posted a machine config for arm, I think it 
would be rather sad if pc wasn't converted to it by 2017...

> then turns on the power.  Command line options are the parts lying 
> around when we start.
>
> btw, -drive needs to be separated:
>
>   -controller type=lsi1234,pci_addr=foobar,name=blah
>   -drive file=foo.img,controller=blah,index=0
>   -drive file=bar.img,controller=blah,index=1
>
> Drives to not have pci addresses.

Drivers don't have indexes and buses but we specify it on the -drive 
line.  -drive is convenient syntax.  It stops being convenient when you 
force it to be two options.

Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15  9:43                                 ` Avi Kivity
                                                   ` (3 preceding siblings ...)
  (?)
@ 2009-06-15 12:45                                 ` Anthony Liguori
  -1 siblings, 0 replies; 457+ messages in thread
From: Anthony Liguori @ 2009-06-15 12:45 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Carsten Otte, kvm, Mark McLoughlin, Glauber Costa,
	Michael S. Tsirkin, Jamie Lokier, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook

Avi Kivity wrote:
> On 06/14/2009 12:50 PM, Michael S. Tsirkin wrote:
>> On Fri, Jun 12, 2009 at 05:48:23PM +0100, Mark McLoughlin wrote:
>>   
>>> However, in order to retain compat for that SCSI device (e.g. ensuring
>>> the PCI address doesn't change as other devices are added an removed),
>>> we're back to the same problem ... either:
>>>
>>>    1) Use '-drive file=foo.img,if=scsi,pci_addr=foo'; in order to 
>>> figure
>>>       out what address to use, libvirt would need to query qemu for 
>>> what
>>>       address was originally allocated to device or it would do all the
>>>       PCI address allocation itself ...
>>>      
>>
>> This last option makes sense to me: in a real world the user has
>> control over where he places the device on the bus, so why
>> not with qemu?
>>    
>
> Yes, the user build the machine using the command line and monitor 
> (or, in 2017, the machine configuration file),

Considering pbrook just posted a machine config for arm, I think it 
would be rather sad if pc wasn't converted to it by 2017...

> then turns on the power.  Command line options are the parts lying 
> around when we start.
>
> btw, -drive needs to be separated:
>
>   -controller type=lsi1234,pci_addr=foobar,name=blah
>   -drive file=foo.img,controller=blah,index=0
>   -drive file=bar.img,controller=blah,index=1
>
> Drives to not have pci addresses.

Drivers don't have indexes and buses but we specify it on the -drive 
line.  -drive is convenient syntax.  It stops being convenient when you 
force it to be two options.

Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15 11:32                                   ` Avi Kivity
@ 2009-06-15 12:48                                     ` Anthony Liguori
  -1 siblings, 0 replies; 457+ messages in thread
From: Anthony Liguori @ 2009-06-15 12:48 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Mark McLoughlin, Jamie Lokier, Michael S. Tsirkin, Carsten Otte,
	kvm, Glauber Costa, Rusty Russell, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook

Avi Kivity wrote:
> On 06/15/2009 12:09 PM, Mark McLoughlin wrote:
>>>>> I think the point is that you don't need version numbers if you 
>>>>> have a
>>>>> proper device tree.
>>>>>
>>>>>          
>>>> How do you add a new attribute to the device tree and, when a supplied
>>>> device tree lacking said attribute, distinguish between a device tree
>>>> from an old version of qemu (i.e. use the old default) and a partial
>>>> device tree from the VM manager (i.e. use the new default) ?
>>>>
>>>>        
>>> -baseline 0.10
>>>      
>>
>> That's a version number :-)
>>
>> (I was responding to Anthony's "you don't need a version number")
>>    
>
> If you want to prevent incompatibilities, you need to make everything 
> new (potentially including bugfixes) non-default.  Eventually the 
> default configuration becomes increasingly unusable and you need a new 
> baseline.  You must still be able to fall back to the old baseline for 
> older guests.  I don't think games with configuration files can hide 
> that.

-M pc1
-M pc2

etc.

This is pretty easy to maintain with config files.

Regards,

Anthony Liguori



^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
@ 2009-06-15 12:48                                     ` Anthony Liguori
  0 siblings, 0 replies; 457+ messages in thread
From: Anthony Liguori @ 2009-06-15 12:48 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Carsten Otte, Rusty Russell, kvm, Mark McLoughlin, Glauber Costa,
	Michael S. Tsirkin, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Paul Brook

Avi Kivity wrote:
> On 06/15/2009 12:09 PM, Mark McLoughlin wrote:
>>>>> I think the point is that you don't need version numbers if you 
>>>>> have a
>>>>> proper device tree.
>>>>>
>>>>>          
>>>> How do you add a new attribute to the device tree and, when a supplied
>>>> device tree lacking said attribute, distinguish between a device tree
>>>> from an old version of qemu (i.e. use the old default) and a partial
>>>> device tree from the VM manager (i.e. use the new default) ?
>>>>
>>>>        
>>> -baseline 0.10
>>>      
>>
>> That's a version number :-)
>>
>> (I was responding to Anthony's "you don't need a version number")
>>    
>
> If you want to prevent incompatibilities, you need to make everything 
> new (potentially including bugfixes) non-default.  Eventually the 
> default configuration becomes increasingly unusable and you need a new 
> baseline.  You must still be able to fall back to the old baseline for 
> older guests.  I don't think games with configuration files can hide 
> that.

-M pc1
-M pc2

etc.

This is pretty easy to maintain with config files.

Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15 11:32                                   ` Avi Kivity
  (?)
@ 2009-06-15 12:48                                   ` Anthony Liguori
  -1 siblings, 0 replies; 457+ messages in thread
From: Anthony Liguori @ 2009-06-15 12:48 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Carsten Otte, kvm, Mark McLoughlin, Glauber Costa,
	Michael S. Tsirkin, Jamie Lokier, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook

Avi Kivity wrote:
> On 06/15/2009 12:09 PM, Mark McLoughlin wrote:
>>>>> I think the point is that you don't need version numbers if you 
>>>>> have a
>>>>> proper device tree.
>>>>>
>>>>>          
>>>> How do you add a new attribute to the device tree and, when a supplied
>>>> device tree lacking said attribute, distinguish between a device tree
>>>> from an old version of qemu (i.e. use the old default) and a partial
>>>> device tree from the VM manager (i.e. use the new default) ?
>>>>
>>>>        
>>> -baseline 0.10
>>>      
>>
>> That's a version number :-)
>>
>> (I was responding to Anthony's "you don't need a version number")
>>    
>
> If you want to prevent incompatibilities, you need to make everything 
> new (potentially including bugfixes) non-default.  Eventually the 
> default configuration becomes increasingly unusable and you need a new 
> baseline.  You must still be able to fall back to the old baseline for 
> older guests.  I don't think games with configuration files can hide 
> that.

-M pc1
-M pc2

etc.

This is pretty easy to maintain with config files.

Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15 12:41                                             ` Michael S. Tsirkin
@ 2009-06-15 12:50                                               ` Avi Kivity
  -1 siblings, 0 replies; 457+ messages in thread
From: Avi Kivity @ 2009-06-15 12:50 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Mark McLoughlin, Anthony Liguori, Jamie Lokier, Carsten Otte,
	kvm, Glauber Costa, Rusty Russell, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook

On 06/15/2009 03:41 PM, Michael S. Tsirkin wrote:
> We should just tell the user which slots are open.
>    
>
> This might be tricky if the config is passed in with the command line
> flags.
>    

qemu -show-available-pci-slots

(the qemu equivalent to KVM_CHECK_EXTENSION)

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
@ 2009-06-15 12:50                                               ` Avi Kivity
  0 siblings, 0 replies; 457+ messages in thread
From: Avi Kivity @ 2009-06-15 12:50 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Mark McLoughlin, kvm, Carsten Otte, Glauber Costa, Rusty Russell,
	qemu-devel, virtualization, Blue Swirl, Christian Borntraeger,
	Paul Brook

On 06/15/2009 03:41 PM, Michael S. Tsirkin wrote:
> We should just tell the user which slots are open.
>    
>
> This might be tricky if the config is passed in with the command line
> flags.
>    

qemu -show-available-pci-slots

(the qemu equivalent to KVM_CHECK_EXTENSION)

-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15 12:41                                             ` Michael S. Tsirkin
  (?)
@ 2009-06-15 12:50                                             ` Avi Kivity
  -1 siblings, 0 replies; 457+ messages in thread
From: Avi Kivity @ 2009-06-15 12:50 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Mark McLoughlin, kvm, Carsten Otte, Glauber Costa, Jamie Lokier,
	qemu-devel, virtualization, Blue Swirl, Christian Borntraeger,
	Paul Brook, Anthony Liguori

On 06/15/2009 03:41 PM, Michael S. Tsirkin wrote:
> We should just tell the user which slots are open.
>    
>
> This might be tricky if the config is passed in with the command line
> flags.
>    

qemu -show-available-pci-slots

(the qemu equivalent to KVM_CHECK_EXTENSION)

-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: [Qemu-devel] Re: Configuration vs. compat hints
  2009-06-15 12:41                                       ` Markus Armbruster
@ 2009-06-15 12:50                                           ` Anthony Liguori
  2009-06-15 12:50                                         ` Anthony Liguori
  1 sibling, 0 replies; 457+ messages in thread
From: Anthony Liguori @ 2009-06-15 12:50 UTC (permalink / raw)
  To: Markus Armbruster
  Cc: Avi Kivity, Mark McLoughlin, kvm, Michael S. Tsirkin,
	Glauber Costa, Rusty Russell, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook, Carsten Otte

Markus Armbruster wrote:
> Avi Kivity <avi@redhat.com> writes:
>
>   
>> Paul/Anthony, can we have -vga pci_addr=, -usb-controller pci_addr=,
>> and -drive pci_addr= (and later, -disk-controller)?  Stalling while
>> waiting for the ultimate config file is only generating pain and
>> out-of-tree patches.
>>     
>
> Yup.
>
> I got bit-rotten patches for pci_addr=, and I can unrot them if they're
> wanted.
>   

Yes, would be good to have patches on the list to discuss.  In 
principle, I have no objection to this.

Regards,

Anthony Liguori


^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: [Qemu-devel] Re: Configuration vs. compat hints
@ 2009-06-15 12:50                                           ` Anthony Liguori
  0 siblings, 0 replies; 457+ messages in thread
From: Anthony Liguori @ 2009-06-15 12:50 UTC (permalink / raw)
  To: Markus Armbruster
  Cc: Mark McLoughlin, kvm, Michael S. Tsirkin, Glauber Costa,
	Rusty Russell, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Avi Kivity, Carsten Otte, Paul Brook

Markus Armbruster wrote:
> Avi Kivity <avi@redhat.com> writes:
>
>   
>> Paul/Anthony, can we have -vga pci_addr=, -usb-controller pci_addr=,
>> and -drive pci_addr= (and later, -disk-controller)?  Stalling while
>> waiting for the ultimate config file is only generating pain and
>> out-of-tree patches.
>>     
>
> Yup.
>
> I got bit-rotten patches for pci_addr=, and I can unrot them if they're
> wanted.
>   

Yes, would be good to have patches on the list to discuss.  In 
principle, I have no objection to this.

Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: [Qemu-devel] Re: Configuration vs. compat hints
  2009-06-15 12:41                                       ` Markus Armbruster
  2009-06-15 12:50                                           ` Anthony Liguori
@ 2009-06-15 12:50                                         ` Anthony Liguori
  1 sibling, 0 replies; 457+ messages in thread
From: Anthony Liguori @ 2009-06-15 12:50 UTC (permalink / raw)
  To: Markus Armbruster
  Cc: Mark McLoughlin, kvm, Michael S. Tsirkin, Glauber Costa,
	qemu-devel, virtualization, Blue Swirl, Christian Borntraeger,
	Avi Kivity, Carsten Otte, Paul Brook

Markus Armbruster wrote:
> Avi Kivity <avi@redhat.com> writes:
>
>   
>> Paul/Anthony, can we have -vga pci_addr=, -usb-controller pci_addr=,
>> and -drive pci_addr= (and later, -disk-controller)?  Stalling while
>> waiting for the ultimate config file is only generating pain and
>> out-of-tree patches.
>>     
>
> Yup.
>
> I got bit-rotten patches for pci_addr=, and I can unrot them if they're
> wanted.
>   

Yes, would be good to have patches on the list to discuss.  In 
principle, I have no objection to this.

Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15 12:50                                               ` Avi Kivity
@ 2009-06-15 12:52                                                 ` Anthony Liguori
  -1 siblings, 0 replies; 457+ messages in thread
From: Anthony Liguori @ 2009-06-15 12:52 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Michael S. Tsirkin, Mark McLoughlin, Jamie Lokier, Carsten Otte,
	kvm, Glauber Costa, Rusty Russell, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook

Avi Kivity wrote:
> On 06/15/2009 03:41 PM, Michael S. Tsirkin wrote:
>> We should just tell the user which slots are open.
>>   
>> This might be tricky if the config is passed in with the command line
>> flags.
>>    
>
> qemu -show-available-pci-slots

Why does the user care?

Let QEMU allocate the PCI slot, then query it to see what slot it 
assigned and remember that.

It's not a good idea to have management applications attempt to do PCI 
slot allocation.  For instance, one day we may decide to make virtio 
devices multi-function.

Regards,

Anthony Liguori


^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
@ 2009-06-15 12:52                                                 ` Anthony Liguori
  0 siblings, 0 replies; 457+ messages in thread
From: Anthony Liguori @ 2009-06-15 12:52 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Carsten Otte, Rusty Russell, kvm, Mark McLoughlin, Glauber Costa,
	Michael S. Tsirkin, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Paul Brook

Avi Kivity wrote:
> On 06/15/2009 03:41 PM, Michael S. Tsirkin wrote:
>> We should just tell the user which slots are open.
>>   
>> This might be tricky if the config is passed in with the command line
>> flags.
>>    
>
> qemu -show-available-pci-slots

Why does the user care?

Let QEMU allocate the PCI slot, then query it to see what slot it 
assigned and remember that.

It's not a good idea to have management applications attempt to do PCI 
slot allocation.  For instance, one day we may decide to make virtio 
devices multi-function.

Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15 12:50                                               ` Avi Kivity
  (?)
@ 2009-06-15 12:52                                               ` Anthony Liguori
  -1 siblings, 0 replies; 457+ messages in thread
From: Anthony Liguori @ 2009-06-15 12:52 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Carsten Otte, kvm, Mark McLoughlin, Glauber Costa,
	Michael S. Tsirkin, Jamie Lokier, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook

Avi Kivity wrote:
> On 06/15/2009 03:41 PM, Michael S. Tsirkin wrote:
>> We should just tell the user which slots are open.
>>   
>> This might be tricky if the config is passed in with the command line
>> flags.
>>    
>
> qemu -show-available-pci-slots

Why does the user care?

Let QEMU allocate the PCI slot, then query it to see what slot it 
assigned and remember that.

It's not a good idea to have management applications attempt to do PCI 
slot allocation.  For instance, one day we may decide to make virtio 
devices multi-function.

Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15 12:41                                     ` Anthony Liguori
@ 2009-06-15 12:55                                       ` Avi Kivity
  -1 siblings, 0 replies; 457+ messages in thread
From: Avi Kivity @ 2009-06-15 12:55 UTC (permalink / raw)
  To: Anthony Liguori
  Cc: Mark McLoughlin, Michael S. Tsirkin, Jamie Lokier, Carsten Otte,
	kvm, Glauber Costa, Rusty Russell, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook

On 06/15/2009 03:41 PM, Anthony Liguori wrote:
>>> Yep, most people seem to agree that it makes sense to allow this, but
>>> some believe it should only be via a machine description file, not the
>>> command line.
>>
>> I don't understand this opposition.  It's clear a machine config file 
>> is a long way in our future.  It's also clear lack of stable PCI 
>> addresses hurts us now.
>
>
> Is there opposition?  I don't ever recall seeing a patch...

Izik Eidus posted a patch (using a different syntax) in November 2007.

>
> I think it's a perfectly fine idea.

Good.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
@ 2009-06-15 12:55                                       ` Avi Kivity
  0 siblings, 0 replies; 457+ messages in thread
From: Avi Kivity @ 2009-06-15 12:55 UTC (permalink / raw)
  To: Anthony Liguori
  Cc: Carsten Otte, Rusty Russell, kvm, Mark McLoughlin, Glauber Costa,
	Michael S. Tsirkin, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Paul Brook

On 06/15/2009 03:41 PM, Anthony Liguori wrote:
>>> Yep, most people seem to agree that it makes sense to allow this, but
>>> some believe it should only be via a machine description file, not the
>>> command line.
>>
>> I don't understand this opposition.  It's clear a machine config file 
>> is a long way in our future.  It's also clear lack of stable PCI 
>> addresses hurts us now.
>
>
> Is there opposition?  I don't ever recall seeing a patch...

Izik Eidus posted a patch (using a different syntax) in November 2007.

>
> I think it's a perfectly fine idea.

Good.

-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15 12:41                                     ` Anthony Liguori
  (?)
@ 2009-06-15 12:55                                     ` Avi Kivity
  -1 siblings, 0 replies; 457+ messages in thread
From: Avi Kivity @ 2009-06-15 12:55 UTC (permalink / raw)
  To: Anthony Liguori
  Cc: Carsten Otte, kvm, Mark McLoughlin, Glauber Costa,
	Michael S. Tsirkin, Jamie Lokier, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook

On 06/15/2009 03:41 PM, Anthony Liguori wrote:
>>> Yep, most people seem to agree that it makes sense to allow this, but
>>> some believe it should only be via a machine description file, not the
>>> command line.
>>
>> I don't understand this opposition.  It's clear a machine config file 
>> is a long way in our future.  It's also clear lack of stable PCI 
>> addresses hurts us now.
>
>
> Is there opposition?  I don't ever recall seeing a patch...

Izik Eidus posted a patch (using a different syntax) in November 2007.

>
> I think it's a perfectly fine idea.

Good.

-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15 12:45                                   ` Anthony Liguori
@ 2009-06-15 13:03                                     ` Avi Kivity
  -1 siblings, 0 replies; 457+ messages in thread
From: Avi Kivity @ 2009-06-15 13:03 UTC (permalink / raw)
  To: Anthony Liguori
  Cc: Michael S. Tsirkin, Mark McLoughlin, Jamie Lokier, Carsten Otte,
	kvm, Glauber Costa, Rusty Russell, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook

On 06/15/2009 03:45 PM, Anthony Liguori wrote:
>>> This last option makes sense to me: in a real world the user has
>>> control over where he places the device on the bus, so why
>>> not with qemu?
>>
>> Yes, the user build the machine using the command line and monitor 
>> (or, in 2017, the machine configuration file),
>
>
> Considering pbrook just posted a machine config for arm, I think it 
> would be rather sad if pc wasn't converted to it by 2017...

I'd be sad too, but not surprised.

>> then turns on the power.  Command line options are the parts lying 
>> around when we start.
>>
>> btw, -drive needs to be separated:
>>
>>   -controller type=lsi1234,pci_addr=foobar,name=blah
>>   -drive file=foo.img,controller=blah,index=0
>>   -drive file=bar.img,controller=blah,index=1
>>
>> Drives to not have pci addresses.
>
> Drivers don't have indexes and buses but we specify it on the -drive 
> line. 

Drives do have indexes.  On old parallel scsi drives you set the index 
by clicking a button on the back of the drive to cycle through scsi 
addresses 0-7.  An IDE drive's index is determined by the cable 
(master/slave).  A SATA drive's index is determined by which header on 
the motherboard the drive connects to.

If by bus you mean the if= parameter, then drives certainly do have 
buses.  Just try connecting the scsi drive from the previous paragraph 
to a USB port.

> -drive is convenient syntax.  It stops being convenient when you force 
> it to be two options.

controller= defaults to some builtin thing which autoinstantiates when 
necessary, so the old -drive syntax works.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
@ 2009-06-15 13:03                                     ` Avi Kivity
  0 siblings, 0 replies; 457+ messages in thread
From: Avi Kivity @ 2009-06-15 13:03 UTC (permalink / raw)
  To: Anthony Liguori
  Cc: Carsten Otte, Rusty Russell, kvm, Mark McLoughlin, Glauber Costa,
	Michael S. Tsirkin, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Paul Brook

On 06/15/2009 03:45 PM, Anthony Liguori wrote:
>>> This last option makes sense to me: in a real world the user has
>>> control over where he places the device on the bus, so why
>>> not with qemu?
>>
>> Yes, the user build the machine using the command line and monitor 
>> (or, in 2017, the machine configuration file),
>
>
> Considering pbrook just posted a machine config for arm, I think it 
> would be rather sad if pc wasn't converted to it by 2017...

I'd be sad too, but not surprised.

>> then turns on the power.  Command line options are the parts lying 
>> around when we start.
>>
>> btw, -drive needs to be separated:
>>
>>   -controller type=lsi1234,pci_addr=foobar,name=blah
>>   -drive file=foo.img,controller=blah,index=0
>>   -drive file=bar.img,controller=blah,index=1
>>
>> Drives to not have pci addresses.
>
> Drivers don't have indexes and buses but we specify it on the -drive 
> line. 

Drives do have indexes.  On old parallel scsi drives you set the index 
by clicking a button on the back of the drive to cycle through scsi 
addresses 0-7.  An IDE drive's index is determined by the cable 
(master/slave).  A SATA drive's index is determined by which header on 
the motherboard the drive connects to.

If by bus you mean the if= parameter, then drives certainly do have 
buses.  Just try connecting the scsi drive from the previous paragraph 
to a USB port.

> -drive is convenient syntax.  It stops being convenient when you force 
> it to be two options.

controller= defaults to some builtin thing which autoinstantiates when 
necessary, so the old -drive syntax works.

-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15 12:45                                   ` Anthony Liguori
  (?)
  (?)
@ 2009-06-15 13:03                                   ` Avi Kivity
  -1 siblings, 0 replies; 457+ messages in thread
From: Avi Kivity @ 2009-06-15 13:03 UTC (permalink / raw)
  To: Anthony Liguori
  Cc: Carsten Otte, kvm, Mark McLoughlin, Glauber Costa,
	Michael S. Tsirkin, Jamie Lokier, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook

On 06/15/2009 03:45 PM, Anthony Liguori wrote:
>>> This last option makes sense to me: in a real world the user has
>>> control over where he places the device on the bus, so why
>>> not with qemu?
>>
>> Yes, the user build the machine using the command line and monitor 
>> (or, in 2017, the machine configuration file),
>
>
> Considering pbrook just posted a machine config for arm, I think it 
> would be rather sad if pc wasn't converted to it by 2017...

I'd be sad too, but not surprised.

>> then turns on the power.  Command line options are the parts lying 
>> around when we start.
>>
>> btw, -drive needs to be separated:
>>
>>   -controller type=lsi1234,pci_addr=foobar,name=blah
>>   -drive file=foo.img,controller=blah,index=0
>>   -drive file=bar.img,controller=blah,index=1
>>
>> Drives to not have pci addresses.
>
> Drivers don't have indexes and buses but we specify it on the -drive 
> line. 

Drives do have indexes.  On old parallel scsi drives you set the index 
by clicking a button on the back of the drive to cycle through scsi 
addresses 0-7.  An IDE drive's index is determined by the cable 
(master/slave).  A SATA drive's index is determined by which header on 
the motherboard the drive connects to.

If by bus you mean the if= parameter, then drives certainly do have 
buses.  Just try connecting the scsi drive from the previous paragraph 
to a USB port.

> -drive is convenient syntax.  It stops being convenient when you force 
> it to be two options.

controller= defaults to some builtin thing which autoinstantiates when 
necessary, so the old -drive syntax works.

-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints
  2009-06-15 12:41                                     ` Anthony Liguori
@ 2009-06-15 13:04                                       ` Markus Armbruster
  -1 siblings, 0 replies; 457+ messages in thread
From: Markus Armbruster @ 2009-06-15 13:04 UTC (permalink / raw)
  To: Anthony Liguori
  Cc: Avi Kivity, Carsten Otte, Rusty Russell, kvm, Mark McLoughlin,
	Glauber Costa, Michael S. Tsirkin, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook

Anthony Liguori <anthony@codemonkey.ws> writes:

> Avi Kivity wrote:
>> On 06/15/2009 12:08 PM, Mark McLoughlin wrote:
>>>> This last option makes sense to me: in a real world the user has
>>>> control over where he places the device on the bus, so why
>>>> not with qemu?
>>>>      
>>>
>>> Yep, most people seem to agree that it makes sense to allow this, but
>>> some believe it should only be via a machine description file, not the
>>> command line.
>>>    
>>
>> I don't understand this opposition.  It's clear a machine config
>> file is a long way in our future.  It's also clear lack of stable
>> PCI addresses hurts us now.
>
> Is there opposition?  I don't ever recall seeing a patch...

http://www.archivum.info/qemu-devel@nongnu.org/2009-01/msg01458.html

> I think it's a perfectly fine idea.

Off to dust off my patch series.

^ permalink raw reply	[flat|nested] 457+ messages in thread

* [Qemu-devel] Re: Configuration vs. compat hints
@ 2009-06-15 13:04                                       ` Markus Armbruster
  0 siblings, 0 replies; 457+ messages in thread
From: Markus Armbruster @ 2009-06-15 13:04 UTC (permalink / raw)
  To: Anthony Liguori
  Cc: Carsten Otte, kvm, Mark McLoughlin, Glauber Costa, Rusty Russell,
	qemu-devel, virtualization, Blue Swirl, Christian Borntraeger,
	Michael S. Tsirkin, Avi Kivity, Paul Brook

Anthony Liguori <anthony@codemonkey.ws> writes:

> Avi Kivity wrote:
>> On 06/15/2009 12:08 PM, Mark McLoughlin wrote:
>>>> This last option makes sense to me: in a real world the user has
>>>> control over where he places the device on the bus, so why
>>>> not with qemu?
>>>>      
>>>
>>> Yep, most people seem to agree that it makes sense to allow this, but
>>> some believe it should only be via a machine description file, not the
>>> command line.
>>>    
>>
>> I don't understand this opposition.  It's clear a machine config
>> file is a long way in our future.  It's also clear lack of stable
>> PCI addresses hurts us now.
>
> Is there opposition?  I don't ever recall seeing a patch...

http://www.archivum.info/qemu-devel@nongnu.org/2009-01/msg01458.html

> I think it's a perfectly fine idea.

Off to dust off my patch series.

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints
  2009-06-15 12:41                                     ` Anthony Liguori
                                                       ` (2 preceding siblings ...)
  (?)
@ 2009-06-15 13:04                                     ` Markus Armbruster
  -1 siblings, 0 replies; 457+ messages in thread
From: Markus Armbruster @ 2009-06-15 13:04 UTC (permalink / raw)
  To: Anthony Liguori
  Cc: Carsten Otte, kvm, Mark McLoughlin, Glauber Costa, qemu-devel,
	virtualization, Blue Swirl, Christian Borntraeger,
	Michael S. Tsirkin, Avi Kivity, Paul Brook

Anthony Liguori <anthony@codemonkey.ws> writes:

> Avi Kivity wrote:
>> On 06/15/2009 12:08 PM, Mark McLoughlin wrote:
>>>> This last option makes sense to me: in a real world the user has
>>>> control over where he places the device on the bus, so why
>>>> not with qemu?
>>>>      
>>>
>>> Yep, most people seem to agree that it makes sense to allow this, but
>>> some believe it should only be via a machine description file, not the
>>> command line.
>>>    
>>
>> I don't understand this opposition.  It's clear a machine config
>> file is a long way in our future.  It's also clear lack of stable
>> PCI addresses hurts us now.
>
> Is there opposition?  I don't ever recall seeing a patch...

http://www.archivum.info/qemu-devel@nongnu.org/2009-01/msg01458.html

> I think it's a perfectly fine idea.

Off to dust off my patch series.

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15 12:52                                                 ` Anthony Liguori
@ 2009-06-15 13:09                                                   ` Avi Kivity
  -1 siblings, 0 replies; 457+ messages in thread
From: Avi Kivity @ 2009-06-15 13:09 UTC (permalink / raw)
  To: Anthony Liguori
  Cc: Michael S. Tsirkin, Mark McLoughlin, Jamie Lokier, Carsten Otte,
	kvm, Glauber Costa, Rusty Russell, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook

On 06/15/2009 03:52 PM, Anthony Liguori wrote:
> Avi Kivity wrote:
>> On 06/15/2009 03:41 PM, Michael S. Tsirkin wrote:
>>> We should just tell the user which slots are open.
>>>   This might be tricky if the config is passed in with the command line
>>> flags.
>>
>> qemu -show-available-pci-slots
>
> Why does the user care?
>
> Let QEMU allocate the PCI slot, then query it to see what slot it 
> assigned and remember that.

It's a roundabout way of doing things.

As an example, if you try to fit too many devices into the machine, you 
have to try to add all devices and watch for a qemu error.  If you know 
in advance how many slots you have, you never enter into that situation 
(and you need to show the limit to the user anyway).

>
> It's not a good idea to have management applications attempt to do PCI 
> slot allocation.  For instance, one day we may decide to make virtio 
> devices multi-function.

Non-virtio, as well.  But we can't make that the default, so the user 
will have to specify this anyway.

Given that you can't hotunplug individual functions, the user will have 
to specify exactly how functions are aggregated into devices.  My 
recommendation would be for a GUI to allow the user to select a 'quad 
port virtio NIC' or 'dual port virtio scsi controller' rather than 
trying to do anything automatic.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
@ 2009-06-15 13:09                                                   ` Avi Kivity
  0 siblings, 0 replies; 457+ messages in thread
From: Avi Kivity @ 2009-06-15 13:09 UTC (permalink / raw)
  To: Anthony Liguori
  Cc: Carsten Otte, Rusty Russell, kvm, Mark McLoughlin, Glauber Costa,
	Michael S. Tsirkin, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Paul Brook

On 06/15/2009 03:52 PM, Anthony Liguori wrote:
> Avi Kivity wrote:
>> On 06/15/2009 03:41 PM, Michael S. Tsirkin wrote:
>>> We should just tell the user which slots are open.
>>>   This might be tricky if the config is passed in with the command line
>>> flags.
>>
>> qemu -show-available-pci-slots
>
> Why does the user care?
>
> Let QEMU allocate the PCI slot, then query it to see what slot it 
> assigned and remember that.

It's a roundabout way of doing things.

As an example, if you try to fit too many devices into the machine, you 
have to try to add all devices and watch for a qemu error.  If you know 
in advance how many slots you have, you never enter into that situation 
(and you need to show the limit to the user anyway).

>
> It's not a good idea to have management applications attempt to do PCI 
> slot allocation.  For instance, one day we may decide to make virtio 
> devices multi-function.

Non-virtio, as well.  But we can't make that the default, so the user 
will have to specify this anyway.

Given that you can't hotunplug individual functions, the user will have 
to specify exactly how functions are aggregated into devices.  My 
recommendation would be for a GUI to allow the user to select a 'quad 
port virtio NIC' or 'dual port virtio scsi controller' rather than 
trying to do anything automatic.

-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15 12:52                                                 ` Anthony Liguori
  (?)
@ 2009-06-15 13:09                                                 ` Avi Kivity
  -1 siblings, 0 replies; 457+ messages in thread
From: Avi Kivity @ 2009-06-15 13:09 UTC (permalink / raw)
  To: Anthony Liguori
  Cc: Carsten Otte, kvm, Mark McLoughlin, Glauber Costa,
	Michael S. Tsirkin, Jamie Lokier, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook

On 06/15/2009 03:52 PM, Anthony Liguori wrote:
> Avi Kivity wrote:
>> On 06/15/2009 03:41 PM, Michael S. Tsirkin wrote:
>>> We should just tell the user which slots are open.
>>>   This might be tricky if the config is passed in with the command line
>>> flags.
>>
>> qemu -show-available-pci-slots
>
> Why does the user care?
>
> Let QEMU allocate the PCI slot, then query it to see what slot it 
> assigned and remember that.

It's a roundabout way of doing things.

As an example, if you try to fit too many devices into the machine, you 
have to try to add all devices and watch for a qemu error.  If you know 
in advance how many slots you have, you never enter into that situation 
(and you need to show the limit to the user anyway).

>
> It's not a good idea to have management applications attempt to do PCI 
> slot allocation.  For instance, one day we may decide to make virtio 
> devices multi-function.

Non-virtio, as well.  But we can't make that the default, so the user 
will have to specify this anyway.

Given that you can't hotunplug individual functions, the user will have 
to specify exactly how functions are aggregated into devices.  My 
recommendation would be for a GUI to allow the user to select a 'quad 
port virtio NIC' or 'dual port virtio scsi controller' rather than 
trying to do anything automatic.

-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15 12:48                                     ` Anthony Liguori
@ 2009-06-15 13:12                                       ` Avi Kivity
  -1 siblings, 0 replies; 457+ messages in thread
From: Avi Kivity @ 2009-06-15 13:12 UTC (permalink / raw)
  To: Anthony Liguori
  Cc: Mark McLoughlin, Jamie Lokier, Michael S. Tsirkin, Carsten Otte,
	kvm, Glauber Costa, Rusty Russell, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook

On 06/15/2009 03:48 PM, Anthony Liguori wrote:
>>>>> device tree lacking said attribute, distinguish between a device tree
>>>>> from an old version of qemu (i.e. use the old default) and a partial
>>>>> device tree from the VM manager (i.e. use the new default) ?
>>>>>
>>>> -baseline 0.10
>>>
>>> That's a version number :-)
>>>
>>> (I was responding to Anthony's "you don't need a version number")
>>
>> If you want to prevent incompatibilities, you need to make everything 
>> new (potentially including bugfixes) non-default.  Eventually the 
>> default configuration becomes increasingly unusable and you need a 
>> new baseline.  You must still be able to fall back to the old 
>> baseline for older guests.  I don't think games with configuration 
>> files can hide that.
> How do you add a new attribute to the device tree and, when a supplied
>
> -M pc1
> -M pc2

Certainly preferable to -baseline.

> This is pretty easy to maintain with config files.

Let's not tie the two together.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
@ 2009-06-15 13:12                                       ` Avi Kivity
  0 siblings, 0 replies; 457+ messages in thread
From: Avi Kivity @ 2009-06-15 13:12 UTC (permalink / raw)
  To: Anthony Liguori
  Cc: Carsten Otte, Rusty Russell, kvm, Mark McLoughlin, Glauber Costa,
	Michael S. Tsirkin, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Paul Brook

On 06/15/2009 03:48 PM, Anthony Liguori wrote:
>>>>> device tree lacking said attribute, distinguish between a device tree
>>>>> from an old version of qemu (i.e. use the old default) and a partial
>>>>> device tree from the VM manager (i.e. use the new default) ?
>>>>>
>>>> -baseline 0.10
>>>
>>> That's a version number :-)
>>>
>>> (I was responding to Anthony's "you don't need a version number")
>>
>> If you want to prevent incompatibilities, you need to make everything 
>> new (potentially including bugfixes) non-default.  Eventually the 
>> default configuration becomes increasingly unusable and you need a 
>> new baseline.  You must still be able to fall back to the old 
>> baseline for older guests.  I don't think games with configuration 
>> files can hide that.
> How do you add a new attribute to the device tree and, when a supplied
>
> -M pc1
> -M pc2

Certainly preferable to -baseline.

> This is pretty easy to maintain with config files.

Let's not tie the two together.

-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15 12:48                                     ` Anthony Liguori
  (?)
  (?)
@ 2009-06-15 13:12                                     ` Avi Kivity
  -1 siblings, 0 replies; 457+ messages in thread
From: Avi Kivity @ 2009-06-15 13:12 UTC (permalink / raw)
  To: Anthony Liguori
  Cc: Carsten Otte, kvm, Mark McLoughlin, Glauber Costa,
	Michael S. Tsirkin, Jamie Lokier, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook

On 06/15/2009 03:48 PM, Anthony Liguori wrote:
>>>>> device tree lacking said attribute, distinguish between a device tree
>>>>> from an old version of qemu (i.e. use the old default) and a partial
>>>>> device tree from the VM manager (i.e. use the new default) ?
>>>>>
>>>> -baseline 0.10
>>>
>>> That's a version number :-)
>>>
>>> (I was responding to Anthony's "you don't need a version number")
>>
>> If you want to prevent incompatibilities, you need to make everything 
>> new (potentially including bugfixes) non-default.  Eventually the 
>> default configuration becomes increasingly unusable and you need a 
>> new baseline.  You must still be able to fall back to the old 
>> baseline for older guests.  I don't think games with configuration 
>> files can hide that.
> How do you add a new attribute to the device tree and, when a supplied
>
> -M pc1
> -M pc2

Certainly preferable to -baseline.

> This is pretty easy to maintain with config files.

Let's not tie the two together.

-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15 12:45                                   ` Anthony Liguori
@ 2009-06-15 13:17                                     ` Gerd Hoffmann
  -1 siblings, 0 replies; 457+ messages in thread
From: Gerd Hoffmann @ 2009-06-15 13:17 UTC (permalink / raw)
  To: Anthony Liguori
  Cc: Avi Kivity, Michael S. Tsirkin, Mark McLoughlin, Jamie Lokier,
	Carsten Otte, kvm, Glauber Costa, Rusty Russell, qemu-devel,
	virtualization, Blue Swirl, Christian Borntraeger, Paul Brook

   Hi,

>> Yes, the user build the machine using the command line and monitor
>> (or, in 2017, the machine configuration file),
>
> Considering pbrook just posted a machine config for arm, I think it
> would be rather sad if pc wasn't converted to it by 2017...

It shouldn't last until 2017, but the process isn't that trivial.
Some qemu code / control flow needs serious restruction until it is in a
state that creating the devices from a fdt can actually work.

> Drivers don't have indexes and buses but we specify it on the -drive
> line. -drive is convenient syntax. It stops being convenient when you
> force it to be two options.

One more issue:  -drive also mixes host and guest configuration.  These 
must be separated too.

cheers,
   Gerd


^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
@ 2009-06-15 13:17                                     ` Gerd Hoffmann
  0 siblings, 0 replies; 457+ messages in thread
From: Gerd Hoffmann @ 2009-06-15 13:17 UTC (permalink / raw)
  To: Anthony Liguori
  Cc: Carsten Otte, Rusty Russell, kvm, Mark McLoughlin, Glauber Costa,
	Michael S. Tsirkin, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Avi Kivity, Paul Brook

   Hi,

>> Yes, the user build the machine using the command line and monitor
>> (or, in 2017, the machine configuration file),
>
> Considering pbrook just posted a machine config for arm, I think it
> would be rather sad if pc wasn't converted to it by 2017...

It shouldn't last until 2017, but the process isn't that trivial.
Some qemu code / control flow needs serious restruction until it is in a
state that creating the devices from a fdt can actually work.

> Drivers don't have indexes and buses but we specify it on the -drive
> line. -drive is convenient syntax. It stops being convenient when you
> force it to be two options.

One more issue:  -drive also mixes host and guest configuration.  These 
must be separated too.

cheers,
   Gerd

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15 12:45                                   ` Anthony Liguori
                                                     ` (2 preceding siblings ...)
  (?)
@ 2009-06-15 13:17                                   ` Gerd Hoffmann
  -1 siblings, 0 replies; 457+ messages in thread
From: Gerd Hoffmann @ 2009-06-15 13:17 UTC (permalink / raw)
  To: Anthony Liguori
  Cc: Carsten Otte, kvm, Mark McLoughlin, Glauber Costa,
	Michael S. Tsirkin, Jamie Lokier, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Avi Kivity, Paul Brook

   Hi,

>> Yes, the user build the machine using the command line and monitor
>> (or, in 2017, the machine configuration file),
>
> Considering pbrook just posted a machine config for arm, I think it
> would be rather sad if pc wasn't converted to it by 2017...

It shouldn't last until 2017, but the process isn't that trivial.
Some qemu code / control flow needs serious restruction until it is in a
state that creating the devices from a fdt can actually work.

> Drivers don't have indexes and buses but we specify it on the -drive
> line. -drive is convenient syntax. It stops being convenient when you
> force it to be two options.

One more issue:  -drive also mixes host and guest configuration.  These 
must be separated too.

cheers,
   Gerd

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15 13:03                                     ` Avi Kivity
@ 2009-06-15 13:20                                       ` Anthony Liguori
  -1 siblings, 0 replies; 457+ messages in thread
From: Anthony Liguori @ 2009-06-15 13:20 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Michael S. Tsirkin, Mark McLoughlin, Jamie Lokier, Carsten Otte,
	kvm, Glauber Costa, Rusty Russell, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook

Avi Kivity wrote:
> On 06/15/2009 03:45 PM, Anthony Liguori wrote:
>>>> This last option makes sense to me: in a real world the user has
>>>> control over where he places the device on the bus, so why
>>>> not with qemu?
>>>
>>> Yes, the user build the machine using the command line and monitor 
>>> (or, in 2017, the machine configuration file),
>>
>>
>> Considering pbrook just posted a machine config for arm, I think it 
>> would be rather sad if pc wasn't converted to it by 2017...
>
> I'd be sad too, but not surprised.
>
>>> then turns on the power.  Command line options are the parts lying 
>>> around when we start.
>>>
>>> btw, -drive needs to be separated:
>>>
>>>   -controller type=lsi1234,pci_addr=foobar,name=blah
>>>   -drive file=foo.img,controller=blah,index=0
>>>   -drive file=bar.img,controller=blah,index=1
>>>
>>> Drives to not have pci addresses.
>>
>> Drivers don't have indexes and buses but we specify it on the -drive 
>> line. 
>
> Drives do have indexes.  On old parallel scsi drives you set the index 
> by clicking a button on the back of the drive to cycle through scsi 
> addresses 0-7.  An IDE drive's index is determined by the cable 
> (master/slave).  A SATA drive's index is determined by which header on 
> the motherboard the drive connects to.

It's not at all that simple.   SCSI has a hierarchical address mechanism 
with 0-7 targets but then potentially multiple LUNs per target.  Today, 
we always emulate a single LUN per target but if we ever wanted to 
support more than 7 disks on a SCSI controller, we would have to add 
multiple LUN support too.  So the current linear unit= parameter is 
actually pretty broken for SCSI.

For IDE, it's a combination of bus, slot, and master/slave.  For virtio, 
it's just a PCI address.  What we really need is something that is more 
opaque and controller specific.  For instance, if we were going to do 
controllers...

-controller type=lsi1234,pci_addr=foobar,name=blah
-controller-disk controller=blah,target=0,lun=1,name=sda

-controller type=ide,pci_addr=barfoo,name=ide
-controller-disk controller=ide,slot=secondary,cable=slave,name=hdd

-drive file=foo.img,controller-disk=sda
-drive file=bar.img,controller-disk=hdd

And having "-hdd file=foo.img" be short-hand for "-drive 
file=%s,controller-disk=%s".

>
>
> If by bus you mean the if= parameter, then drives certainly do have 
> buses.  Just try connecting the scsi drive from the previous paragraph 
> to a USB port.

No, I meant drive file=foo.img,bus=3.  If that doesn't seem obvious what 
it should do to you that's because it isn't at all obvious :-)  It ends 
up skipping a predefined number of locations in the drive table.  This 
is pretty broken fundamentally because it assumes controllers always 
support a fixed number of devices.  Nothing really respects bus_id 
though so in practice, I assume it's almost universally broken.

Regards,

Anthony Liguori


^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
@ 2009-06-15 13:20                                       ` Anthony Liguori
  0 siblings, 0 replies; 457+ messages in thread
From: Anthony Liguori @ 2009-06-15 13:20 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Carsten Otte, Rusty Russell, kvm, Mark McLoughlin, Glauber Costa,
	Michael S. Tsirkin, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Paul Brook

Avi Kivity wrote:
> On 06/15/2009 03:45 PM, Anthony Liguori wrote:
>>>> This last option makes sense to me: in a real world the user has
>>>> control over where he places the device on the bus, so why
>>>> not with qemu?
>>>
>>> Yes, the user build the machine using the command line and monitor 
>>> (or, in 2017, the machine configuration file),
>>
>>
>> Considering pbrook just posted a machine config for arm, I think it 
>> would be rather sad if pc wasn't converted to it by 2017...
>
> I'd be sad too, but not surprised.
>
>>> then turns on the power.  Command line options are the parts lying 
>>> around when we start.
>>>
>>> btw, -drive needs to be separated:
>>>
>>>   -controller type=lsi1234,pci_addr=foobar,name=blah
>>>   -drive file=foo.img,controller=blah,index=0
>>>   -drive file=bar.img,controller=blah,index=1
>>>
>>> Drives to not have pci addresses.
>>
>> Drivers don't have indexes and buses but we specify it on the -drive 
>> line. 
>
> Drives do have indexes.  On old parallel scsi drives you set the index 
> by clicking a button on the back of the drive to cycle through scsi 
> addresses 0-7.  An IDE drive's index is determined by the cable 
> (master/slave).  A SATA drive's index is determined by which header on 
> the motherboard the drive connects to.

It's not at all that simple.   SCSI has a hierarchical address mechanism 
with 0-7 targets but then potentially multiple LUNs per target.  Today, 
we always emulate a single LUN per target but if we ever wanted to 
support more than 7 disks on a SCSI controller, we would have to add 
multiple LUN support too.  So the current linear unit= parameter is 
actually pretty broken for SCSI.

For IDE, it's a combination of bus, slot, and master/slave.  For virtio, 
it's just a PCI address.  What we really need is something that is more 
opaque and controller specific.  For instance, if we were going to do 
controllers...

-controller type=lsi1234,pci_addr=foobar,name=blah
-controller-disk controller=blah,target=0,lun=1,name=sda

-controller type=ide,pci_addr=barfoo,name=ide
-controller-disk controller=ide,slot=secondary,cable=slave,name=hdd

-drive file=foo.img,controller-disk=sda
-drive file=bar.img,controller-disk=hdd

And having "-hdd file=foo.img" be short-hand for "-drive 
file=%s,controller-disk=%s".

>
>
> If by bus you mean the if= parameter, then drives certainly do have 
> buses.  Just try connecting the scsi drive from the previous paragraph 
> to a USB port.

No, I meant drive file=foo.img,bus=3.  If that doesn't seem obvious what 
it should do to you that's because it isn't at all obvious :-)  It ends 
up skipping a predefined number of locations in the drive table.  This 
is pretty broken fundamentally because it assumes controllers always 
support a fixed number of devices.  Nothing really respects bus_id 
though so in practice, I assume it's almost universally broken.

Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15 13:03                                     ` Avi Kivity
  (?)
  (?)
@ 2009-06-15 13:20                                     ` Anthony Liguori
  -1 siblings, 0 replies; 457+ messages in thread
From: Anthony Liguori @ 2009-06-15 13:20 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Carsten Otte, kvm, Mark McLoughlin, Glauber Costa,
	Michael S. Tsirkin, Jamie Lokier, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook

Avi Kivity wrote:
> On 06/15/2009 03:45 PM, Anthony Liguori wrote:
>>>> This last option makes sense to me: in a real world the user has
>>>> control over where he places the device on the bus, so why
>>>> not with qemu?
>>>
>>> Yes, the user build the machine using the command line and monitor 
>>> (or, in 2017, the machine configuration file),
>>
>>
>> Considering pbrook just posted a machine config for arm, I think it 
>> would be rather sad if pc wasn't converted to it by 2017...
>
> I'd be sad too, but not surprised.
>
>>> then turns on the power.  Command line options are the parts lying 
>>> around when we start.
>>>
>>> btw, -drive needs to be separated:
>>>
>>>   -controller type=lsi1234,pci_addr=foobar,name=blah
>>>   -drive file=foo.img,controller=blah,index=0
>>>   -drive file=bar.img,controller=blah,index=1
>>>
>>> Drives to not have pci addresses.
>>
>> Drivers don't have indexes and buses but we specify it on the -drive 
>> line. 
>
> Drives do have indexes.  On old parallel scsi drives you set the index 
> by clicking a button on the back of the drive to cycle through scsi 
> addresses 0-7.  An IDE drive's index is determined by the cable 
> (master/slave).  A SATA drive's index is determined by which header on 
> the motherboard the drive connects to.

It's not at all that simple.   SCSI has a hierarchical address mechanism 
with 0-7 targets but then potentially multiple LUNs per target.  Today, 
we always emulate a single LUN per target but if we ever wanted to 
support more than 7 disks on a SCSI controller, we would have to add 
multiple LUN support too.  So the current linear unit= parameter is 
actually pretty broken for SCSI.

For IDE, it's a combination of bus, slot, and master/slave.  For virtio, 
it's just a PCI address.  What we really need is something that is more 
opaque and controller specific.  For instance, if we were going to do 
controllers...

-controller type=lsi1234,pci_addr=foobar,name=blah
-controller-disk controller=blah,target=0,lun=1,name=sda

-controller type=ide,pci_addr=barfoo,name=ide
-controller-disk controller=ide,slot=secondary,cable=slave,name=hdd

-drive file=foo.img,controller-disk=sda
-drive file=bar.img,controller-disk=hdd

And having "-hdd file=foo.img" be short-hand for "-drive 
file=%s,controller-disk=%s".

>
>
> If by bus you mean the if= parameter, then drives certainly do have 
> buses.  Just try connecting the scsi drive from the previous paragraph 
> to a USB port.

No, I meant drive file=foo.img,bus=3.  If that doesn't seem obvious what 
it should do to you that's because it isn't at all obvious :-)  It ends 
up skipping a predefined number of locations in the drive table.  This 
is pretty broken fundamentally because it assumes controllers always 
support a fixed number of devices.  Nothing really respects bus_id 
though so in practice, I assume it's almost universally broken.

Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15 13:09                                                   ` Avi Kivity
@ 2009-06-15 13:23                                                     ` Anthony Liguori
  -1 siblings, 0 replies; 457+ messages in thread
From: Anthony Liguori @ 2009-06-15 13:23 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Michael S. Tsirkin, Mark McLoughlin, Jamie Lokier, Carsten Otte,
	kvm, Glauber Costa, Rusty Russell, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook

Avi Kivity wrote:
> On 06/15/2009 03:52 PM, Anthony Liguori wrote:
>> Avi Kivity wrote:
>>> On 06/15/2009 03:41 PM, Michael S. Tsirkin wrote:
>>>> We should just tell the user which slots are open.
>>>>   This might be tricky if the config is passed in with the command 
>>>> line
>>>> flags.
>>>
>>> qemu -show-available-pci-slots
>>
>> Why does the user care?
>>
>> Let QEMU allocate the PCI slot, then query it to see what slot it 
>> assigned and remember that.
>
> It's a roundabout way of doing things.

Having libvirt do PCI slot allocation scares me.  It assumes we can 
return a whitelist of available slots, and then let libvirt just 
randomly assign things.  There's knowledge though in slot assignment 
that's board-specific.  For instance, depending on how many LNK lines 
you have, you may want to put things in slots in such a way to optimize 
interrupt balancing or something like that.

Some platforms have quirks about expecting a particular slot to have a 
particular device.  It's still an optimal device but it has to be in 
that slot.  You can't really express that via an available slot list.

> Non-virtio, as well.  But we can't make that the default, so the user 
> will have to specify this anyway.
>
> Given that you can't hotunplug individual functions, the user will 
> have to specify exactly how functions are aggregated into devices.  My 
> recommendation would be for a GUI to allow the user to select a 'quad 
> port virtio NIC' or 'dual port virtio scsi controller' rather than 
> trying to do anything automatic.

Yeah, I haven't thought much about that.

Regards,

Anthony Liguori


^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
@ 2009-06-15 13:23                                                     ` Anthony Liguori
  0 siblings, 0 replies; 457+ messages in thread
From: Anthony Liguori @ 2009-06-15 13:23 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Carsten Otte, Rusty Russell, kvm, Mark McLoughlin, Glauber Costa,
	Michael S. Tsirkin, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Paul Brook

Avi Kivity wrote:
> On 06/15/2009 03:52 PM, Anthony Liguori wrote:
>> Avi Kivity wrote:
>>> On 06/15/2009 03:41 PM, Michael S. Tsirkin wrote:
>>>> We should just tell the user which slots are open.
>>>>   This might be tricky if the config is passed in with the command 
>>>> line
>>>> flags.
>>>
>>> qemu -show-available-pci-slots
>>
>> Why does the user care?
>>
>> Let QEMU allocate the PCI slot, then query it to see what slot it 
>> assigned and remember that.
>
> It's a roundabout way of doing things.

Having libvirt do PCI slot allocation scares me.  It assumes we can 
return a whitelist of available slots, and then let libvirt just 
randomly assign things.  There's knowledge though in slot assignment 
that's board-specific.  For instance, depending on how many LNK lines 
you have, you may want to put things in slots in such a way to optimize 
interrupt balancing or something like that.

Some platforms have quirks about expecting a particular slot to have a 
particular device.  It's still an optimal device but it has to be in 
that slot.  You can't really express that via an available slot list.

> Non-virtio, as well.  But we can't make that the default, so the user 
> will have to specify this anyway.
>
> Given that you can't hotunplug individual functions, the user will 
> have to specify exactly how functions are aggregated into devices.  My 
> recommendation would be for a GUI to allow the user to select a 'quad 
> port virtio NIC' or 'dual port virtio scsi controller' rather than 
> trying to do anything automatic.

Yeah, I haven't thought much about that.

Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15 13:09                                                   ` Avi Kivity
  (?)
  (?)
@ 2009-06-15 13:23                                                   ` Anthony Liguori
  -1 siblings, 0 replies; 457+ messages in thread
From: Anthony Liguori @ 2009-06-15 13:23 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Carsten Otte, kvm, Mark McLoughlin, Glauber Costa,
	Michael S. Tsirkin, Jamie Lokier, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook

Avi Kivity wrote:
> On 06/15/2009 03:52 PM, Anthony Liguori wrote:
>> Avi Kivity wrote:
>>> On 06/15/2009 03:41 PM, Michael S. Tsirkin wrote:
>>>> We should just tell the user which slots are open.
>>>>   This might be tricky if the config is passed in with the command 
>>>> line
>>>> flags.
>>>
>>> qemu -show-available-pci-slots
>>
>> Why does the user care?
>>
>> Let QEMU allocate the PCI slot, then query it to see what slot it 
>> assigned and remember that.
>
> It's a roundabout way of doing things.

Having libvirt do PCI slot allocation scares me.  It assumes we can 
return a whitelist of available slots, and then let libvirt just 
randomly assign things.  There's knowledge though in slot assignment 
that's board-specific.  For instance, depending on how many LNK lines 
you have, you may want to put things in slots in such a way to optimize 
interrupt balancing or something like that.

Some platforms have quirks about expecting a particular slot to have a 
particular device.  It's still an optimal device but it has to be in 
that slot.  You can't really express that via an available slot list.

> Non-virtio, as well.  But we can't make that the default, so the user 
> will have to specify this anyway.
>
> Given that you can't hotunplug individual functions, the user will 
> have to specify exactly how functions are aggregated into devices.  My 
> recommendation would be for a GUI to allow the user to select a 'quad 
> port virtio NIC' or 'dual port virtio scsi controller' rather than 
> trying to do anything automatic.

Yeah, I haven't thought much about that.

Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15 13:12                                       ` Avi Kivity
@ 2009-06-15 13:24                                         ` Anthony Liguori
  -1 siblings, 0 replies; 457+ messages in thread
From: Anthony Liguori @ 2009-06-15 13:24 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Mark McLoughlin, Jamie Lokier, Michael S. Tsirkin, Carsten Otte,
	kvm, Glauber Costa, Rusty Russell, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook

Avi Kivity wrote:
>
> Certainly preferable to -baseline.
>
>> This is pretty easy to maintain with config files.
>
> Let's not tie the two together.

I mentioned it because it suggests a good transition.  We at least have 
to think through how things map to the post-config file world regardless 
of whether that's a few months from now or a decade :-)

Regards,

Anthony Liguori


^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
@ 2009-06-15 13:24                                         ` Anthony Liguori
  0 siblings, 0 replies; 457+ messages in thread
From: Anthony Liguori @ 2009-06-15 13:24 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Carsten Otte, Rusty Russell, kvm, Mark McLoughlin, Glauber Costa,
	Michael S. Tsirkin, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Paul Brook

Avi Kivity wrote:
>
> Certainly preferable to -baseline.
>
>> This is pretty easy to maintain with config files.
>
> Let's not tie the two together.

I mentioned it because it suggests a good transition.  We at least have 
to think through how things map to the post-config file world regardless 
of whether that's a few months from now or a decade :-)

Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15 13:12                                       ` Avi Kivity
  (?)
  (?)
@ 2009-06-15 13:24                                       ` Anthony Liguori
  -1 siblings, 0 replies; 457+ messages in thread
From: Anthony Liguori @ 2009-06-15 13:24 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Carsten Otte, kvm, Mark McLoughlin, Glauber Costa,
	Michael S. Tsirkin, Jamie Lokier, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook

Avi Kivity wrote:
>
> Certainly preferable to -baseline.
>
>> This is pretty easy to maintain with config files.
>
> Let's not tie the two together.

I mentioned it because it suggests a good transition.  We at least have 
to think through how things map to the post-config file world regardless 
of whether that's a few months from now or a decade :-)

Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15 13:20                                       ` Anthony Liguori
@ 2009-06-15 13:35                                         ` Avi Kivity
  -1 siblings, 0 replies; 457+ messages in thread
From: Avi Kivity @ 2009-06-15 13:35 UTC (permalink / raw)
  To: Anthony Liguori
  Cc: Michael S. Tsirkin, Mark McLoughlin, Jamie Lokier, Carsten Otte,
	kvm, Glauber Costa, Rusty Russell, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook

On 06/15/2009 04:20 PM, Anthony Liguori wrote:
>>
>>>> then turns on the power.  Command line options are the parts lying 
>>>> around when we start.
>>>>
>>>> btw, -drive needs to be separated:
>>>>
>>>>   -controller type=lsi1234,pci_addr=foobar,name=blah
>>>>   -drive file=foo.img,controller=blah,index=0
>>>>   -drive file=bar.img,controller=blah,index=1
>>>>
>>>> Drives to not have pci addresses.
>>>
>>> Drivers don't have indexes and buses but we specify it on the -drive 
>>> line. 
>>
>> Drives do have indexes.  On old parallel scsi drives you set the 
>> index by clicking a button on the back of the drive to cycle through 
>> scsi addresses 0-7.  An IDE drive's index is determined by the cable 
>> (master/slave).  A SATA drive's index is determined by which header 
>> on the motherboard the drive connects to.
>
>
> It's not at all that simple.   SCSI has a hierarchical address 
> mechanism with 0-7 targets but then potentially multiple LUNs per 
> target.  Today, we always emulate a single LUN per target but if we 
> ever wanted to support more than 7 disks on a SCSI controller, we 
> would have to add multiple LUN support too.  So the current linear 
> unit= parameter is actually pretty broken for SCSI.

Well, another level in the hierarchy, but I don't think it materially 
changes things.

>
> For IDE, it's a combination of bus, slot, and master/slave.  For 
> virtio, it's just a PCI address.  What we really need is something 
> that is more opaque and controller specific. 

virtio also has a bus (did you mean the pci bus for IDE?), master/slave 
is the index.  virtio doesn't have index, but IMO that was a mistake and 
we should have designed it as a disk controller in the first place.

> For instance, if we were going to do controllers...
>
> -controller type=lsi1234,pci_addr=foobar,name=blah
> -controller-disk controller=blah,target=0,lun=1,name=sda
>
> -controller type=ide,pci_addr=barfoo,name=ide
> -controller-disk controller=ide,slot=secondary,cable=slave,name=hdd
>
> -drive file=foo.img,controller-disk=sda
> -drive file=bar.img,controller-disk=hdd
>
> And having "-hdd file=foo.img" be short-hand for "-drive 
> file=%s,controller-disk=%s".

Yeah.

>>
>>
>> If by bus you mean the if= parameter, then drives certainly do have 
>> buses.  Just try connecting the scsi drive from the previous 
>> paragraph to a USB port.
>
> No, I meant drive file=foo.img,bus=3.  If that doesn't seem obvious 
> what it should do to you that's because it isn't at all obvious :-)  
> It ends up skipping a predefined number of locations in the drive 
> table.  This is pretty broken fundamentally because it assumes 
> controllers always support a fixed number of devices.  Nothing really 
> respects bus_id though so in practice, I assume it's almost 
> universally broken.

Isn't the drive table something totally internal?  And how does bus= 
relate to it?

Confused.


-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
@ 2009-06-15 13:35                                         ` Avi Kivity
  0 siblings, 0 replies; 457+ messages in thread
From: Avi Kivity @ 2009-06-15 13:35 UTC (permalink / raw)
  To: Anthony Liguori
  Cc: Carsten Otte, Rusty Russell, kvm, Mark McLoughlin, Glauber Costa,
	Michael S. Tsirkin, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Paul Brook

On 06/15/2009 04:20 PM, Anthony Liguori wrote:
>>
>>>> then turns on the power.  Command line options are the parts lying 
>>>> around when we start.
>>>>
>>>> btw, -drive needs to be separated:
>>>>
>>>>   -controller type=lsi1234,pci_addr=foobar,name=blah
>>>>   -drive file=foo.img,controller=blah,index=0
>>>>   -drive file=bar.img,controller=blah,index=1
>>>>
>>>> Drives to not have pci addresses.
>>>
>>> Drivers don't have indexes and buses but we specify it on the -drive 
>>> line. 
>>
>> Drives do have indexes.  On old parallel scsi drives you set the 
>> index by clicking a button on the back of the drive to cycle through 
>> scsi addresses 0-7.  An IDE drive's index is determined by the cable 
>> (master/slave).  A SATA drive's index is determined by which header 
>> on the motherboard the drive connects to.
>
>
> It's not at all that simple.   SCSI has a hierarchical address 
> mechanism with 0-7 targets but then potentially multiple LUNs per 
> target.  Today, we always emulate a single LUN per target but if we 
> ever wanted to support more than 7 disks on a SCSI controller, we 
> would have to add multiple LUN support too.  So the current linear 
> unit= parameter is actually pretty broken for SCSI.

Well, another level in the hierarchy, but I don't think it materially 
changes things.

>
> For IDE, it's a combination of bus, slot, and master/slave.  For 
> virtio, it's just a PCI address.  What we really need is something 
> that is more opaque and controller specific. 

virtio also has a bus (did you mean the pci bus for IDE?), master/slave 
is the index.  virtio doesn't have index, but IMO that was a mistake and 
we should have designed it as a disk controller in the first place.

> For instance, if we were going to do controllers...
>
> -controller type=lsi1234,pci_addr=foobar,name=blah
> -controller-disk controller=blah,target=0,lun=1,name=sda
>
> -controller type=ide,pci_addr=barfoo,name=ide
> -controller-disk controller=ide,slot=secondary,cable=slave,name=hdd
>
> -drive file=foo.img,controller-disk=sda
> -drive file=bar.img,controller-disk=hdd
>
> And having "-hdd file=foo.img" be short-hand for "-drive 
> file=%s,controller-disk=%s".

Yeah.

>>
>>
>> If by bus you mean the if= parameter, then drives certainly do have 
>> buses.  Just try connecting the scsi drive from the previous 
>> paragraph to a USB port.
>
> No, I meant drive file=foo.img,bus=3.  If that doesn't seem obvious 
> what it should do to you that's because it isn't at all obvious :-)  
> It ends up skipping a predefined number of locations in the drive 
> table.  This is pretty broken fundamentally because it assumes 
> controllers always support a fixed number of devices.  Nothing really 
> respects bus_id though so in practice, I assume it's almost 
> universally broken.

Isn't the drive table something totally internal?  And how does bus= 
relate to it?

Confused.


-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15 13:20                                       ` Anthony Liguori
  (?)
@ 2009-06-15 13:35                                       ` Avi Kivity
  -1 siblings, 0 replies; 457+ messages in thread
From: Avi Kivity @ 2009-06-15 13:35 UTC (permalink / raw)
  To: Anthony Liguori
  Cc: Carsten Otte, kvm, Mark McLoughlin, Glauber Costa,
	Michael S. Tsirkin, Jamie Lokier, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook

On 06/15/2009 04:20 PM, Anthony Liguori wrote:
>>
>>>> then turns on the power.  Command line options are the parts lying 
>>>> around when we start.
>>>>
>>>> btw, -drive needs to be separated:
>>>>
>>>>   -controller type=lsi1234,pci_addr=foobar,name=blah
>>>>   -drive file=foo.img,controller=blah,index=0
>>>>   -drive file=bar.img,controller=blah,index=1
>>>>
>>>> Drives to not have pci addresses.
>>>
>>> Drivers don't have indexes and buses but we specify it on the -drive 
>>> line. 
>>
>> Drives do have indexes.  On old parallel scsi drives you set the 
>> index by clicking a button on the back of the drive to cycle through 
>> scsi addresses 0-7.  An IDE drive's index is determined by the cable 
>> (master/slave).  A SATA drive's index is determined by which header 
>> on the motherboard the drive connects to.
>
>
> It's not at all that simple.   SCSI has a hierarchical address 
> mechanism with 0-7 targets but then potentially multiple LUNs per 
> target.  Today, we always emulate a single LUN per target but if we 
> ever wanted to support more than 7 disks on a SCSI controller, we 
> would have to add multiple LUN support too.  So the current linear 
> unit= parameter is actually pretty broken for SCSI.

Well, another level in the hierarchy, but I don't think it materially 
changes things.

>
> For IDE, it's a combination of bus, slot, and master/slave.  For 
> virtio, it's just a PCI address.  What we really need is something 
> that is more opaque and controller specific. 

virtio also has a bus (did you mean the pci bus for IDE?), master/slave 
is the index.  virtio doesn't have index, but IMO that was a mistake and 
we should have designed it as a disk controller in the first place.

> For instance, if we were going to do controllers...
>
> -controller type=lsi1234,pci_addr=foobar,name=blah
> -controller-disk controller=blah,target=0,lun=1,name=sda
>
> -controller type=ide,pci_addr=barfoo,name=ide
> -controller-disk controller=ide,slot=secondary,cable=slave,name=hdd
>
> -drive file=foo.img,controller-disk=sda
> -drive file=bar.img,controller-disk=hdd
>
> And having "-hdd file=foo.img" be short-hand for "-drive 
> file=%s,controller-disk=%s".

Yeah.

>>
>>
>> If by bus you mean the if= parameter, then drives certainly do have 
>> buses.  Just try connecting the scsi drive from the previous 
>> paragraph to a USB port.
>
> No, I meant drive file=foo.img,bus=3.  If that doesn't seem obvious 
> what it should do to you that's because it isn't at all obvious :-)  
> It ends up skipping a predefined number of locations in the drive 
> table.  This is pretty broken fundamentally because it assumes 
> controllers always support a fixed number of devices.  Nothing really 
> respects bus_id though so in practice, I assume it's almost 
> universally broken.

Isn't the drive table something totally internal?  And how does bus= 
relate to it?

Confused.


-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15 13:23                                                     ` Anthony Liguori
@ 2009-06-15 13:42                                                       ` Avi Kivity
  -1 siblings, 0 replies; 457+ messages in thread
From: Avi Kivity @ 2009-06-15 13:42 UTC (permalink / raw)
  To: Anthony Liguori
  Cc: Michael S. Tsirkin, Mark McLoughlin, Jamie Lokier, Carsten Otte,
	kvm, Glauber Costa, Rusty Russell, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook

On 06/15/2009 04:23 PM, Anthony Liguori wrote:
> Avi Kivity wrote:
>> On 06/15/2009 03:52 PM, Anthony Liguori wrote:
>>> Avi Kivity wrote:
>>>> On 06/15/2009 03:41 PM, Michael S. Tsirkin wrote:
>>>>> We should just tell the user which slots are open.
>>>>>   This might be tricky if the config is passed in with the command 
>>>>> line
>>>>> flags.
>>>>
>>>> qemu -show-available-pci-slots
>>>
>>> Why does the user care?
>>>
>>> Let QEMU allocate the PCI slot, then query it to see what slot it 
>>> assigned and remember that.
>>
>> It's a roundabout way of doing things.
>
> Having libvirt do PCI slot allocation scares me.  It assumes we can 
> return a whitelist of available slots, and then let libvirt just 
> randomly assign things.  There's knowledge though in slot assignment 
> that's board-specific.  For instance, depending on how many LNK lines 
> you have, you may want to put things in slots in such a way to 
> optimize interrupt balancing or something like that.

How would qemu know which slots to optimize for?

In practice, I don't see that as a real problem.  We should (a) add an 
ioapic and four more pci links (b) recommend that slots be assigned in 
ascending order, and everything works.

I don't see your concern about libvirt allocating slots.  If a human can 
plug a card into a slot, so can libvirt.  Doing an interactive 
back-and-forth (equivalent to plugging a card while blindfolded, then 
looking to see which slot we hit) is certainly more difficult.

> Some platforms have quirks about expecting a particular slot to have a 
> particular device.  It's still an optimal device but it has to be in 
> that slot.  You can't really express that via an available slot list.

I'll be surprised if we ever measure different dma speeds on different 
slots in the qemu virtual pci bus.  If we do, we'll find a way to 
express them:

   $ qemu -print-pci
   slot 0:01: available 33MHz
   slot 0:02: available 33MHz
   slot 0:03: available 66MHz

I feel a little silly typing this.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
@ 2009-06-15 13:42                                                       ` Avi Kivity
  0 siblings, 0 replies; 457+ messages in thread
From: Avi Kivity @ 2009-06-15 13:42 UTC (permalink / raw)
  To: Anthony Liguori
  Cc: Carsten Otte, Rusty Russell, kvm, Mark McLoughlin, Glauber Costa,
	Michael S. Tsirkin, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Paul Brook

On 06/15/2009 04:23 PM, Anthony Liguori wrote:
> Avi Kivity wrote:
>> On 06/15/2009 03:52 PM, Anthony Liguori wrote:
>>> Avi Kivity wrote:
>>>> On 06/15/2009 03:41 PM, Michael S. Tsirkin wrote:
>>>>> We should just tell the user which slots are open.
>>>>>   This might be tricky if the config is passed in with the command 
>>>>> line
>>>>> flags.
>>>>
>>>> qemu -show-available-pci-slots
>>>
>>> Why does the user care?
>>>
>>> Let QEMU allocate the PCI slot, then query it to see what slot it 
>>> assigned and remember that.
>>
>> It's a roundabout way of doing things.
>
> Having libvirt do PCI slot allocation scares me.  It assumes we can 
> return a whitelist of available slots, and then let libvirt just 
> randomly assign things.  There's knowledge though in slot assignment 
> that's board-specific.  For instance, depending on how many LNK lines 
> you have, you may want to put things in slots in such a way to 
> optimize interrupt balancing or something like that.

How would qemu know which slots to optimize for?

In practice, I don't see that as a real problem.  We should (a) add an 
ioapic and four more pci links (b) recommend that slots be assigned in 
ascending order, and everything works.

I don't see your concern about libvirt allocating slots.  If a human can 
plug a card into a slot, so can libvirt.  Doing an interactive 
back-and-forth (equivalent to plugging a card while blindfolded, then 
looking to see which slot we hit) is certainly more difficult.

> Some platforms have quirks about expecting a particular slot to have a 
> particular device.  It's still an optimal device but it has to be in 
> that slot.  You can't really express that via an available slot list.

I'll be surprised if we ever measure different dma speeds on different 
slots in the qemu virtual pci bus.  If we do, we'll find a way to 
express them:

   $ qemu -print-pci
   slot 0:01: available 33MHz
   slot 0:02: available 33MHz
   slot 0:03: available 66MHz

I feel a little silly typing this.

-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15 13:23                                                     ` Anthony Liguori
  (?)
@ 2009-06-15 13:42                                                     ` Avi Kivity
  -1 siblings, 0 replies; 457+ messages in thread
From: Avi Kivity @ 2009-06-15 13:42 UTC (permalink / raw)
  To: Anthony Liguori
  Cc: Carsten Otte, kvm, Mark McLoughlin, Glauber Costa,
	Michael S. Tsirkin, Jamie Lokier, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook

On 06/15/2009 04:23 PM, Anthony Liguori wrote:
> Avi Kivity wrote:
>> On 06/15/2009 03:52 PM, Anthony Liguori wrote:
>>> Avi Kivity wrote:
>>>> On 06/15/2009 03:41 PM, Michael S. Tsirkin wrote:
>>>>> We should just tell the user which slots are open.
>>>>>   This might be tricky if the config is passed in with the command 
>>>>> line
>>>>> flags.
>>>>
>>>> qemu -show-available-pci-slots
>>>
>>> Why does the user care?
>>>
>>> Let QEMU allocate the PCI slot, then query it to see what slot it 
>>> assigned and remember that.
>>
>> It's a roundabout way of doing things.
>
> Having libvirt do PCI slot allocation scares me.  It assumes we can 
> return a whitelist of available slots, and then let libvirt just 
> randomly assign things.  There's knowledge though in slot assignment 
> that's board-specific.  For instance, depending on how many LNK lines 
> you have, you may want to put things in slots in such a way to 
> optimize interrupt balancing or something like that.

How would qemu know which slots to optimize for?

In practice, I don't see that as a real problem.  We should (a) add an 
ioapic and four more pci links (b) recommend that slots be assigned in 
ascending order, and everything works.

I don't see your concern about libvirt allocating slots.  If a human can 
plug a card into a slot, so can libvirt.  Doing an interactive 
back-and-forth (equivalent to plugging a card while blindfolded, then 
looking to see which slot we hit) is certainly more difficult.

> Some platforms have quirks about expecting a particular slot to have a 
> particular device.  It's still an optimal device but it has to be in 
> that slot.  You can't really express that via an available slot list.

I'll be surprised if we ever measure different dma speeds on different 
slots in the qemu virtual pci bus.  If we do, we'll find a way to 
express them:

   $ qemu -print-pci
   slot 0:01: available 33MHz
   slot 0:02: available 33MHz
   slot 0:03: available 66MHz

I feel a little silly typing this.

-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15 13:24                                         ` Anthony Liguori
@ 2009-06-15 13:43                                           ` Avi Kivity
  -1 siblings, 0 replies; 457+ messages in thread
From: Avi Kivity @ 2009-06-15 13:43 UTC (permalink / raw)
  To: Anthony Liguori
  Cc: Mark McLoughlin, Jamie Lokier, Michael S. Tsirkin, Carsten Otte,
	kvm, Glauber Costa, Rusty Russell, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook

On 06/15/2009 04:24 PM, Anthony Liguori wrote:
> Avi Kivity wrote:
>>
>> Certainly preferable to -baseline.
>>
>>> This is pretty easy to maintain with config files.
>>
>> Let's not tie the two together.
>
> I mentioned it because it suggests a good transition.  We at least 
> have to think through how things map to the post-config file world 
> regardless of whether that's a few months from now or a decade :-)

Sure, it's good both from the transitional point of view and in its own 
right.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
@ 2009-06-15 13:43                                           ` Avi Kivity
  0 siblings, 0 replies; 457+ messages in thread
From: Avi Kivity @ 2009-06-15 13:43 UTC (permalink / raw)
  To: Anthony Liguori
  Cc: Carsten Otte, Rusty Russell, kvm, Mark McLoughlin, Glauber Costa,
	Michael S. Tsirkin, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Paul Brook

On 06/15/2009 04:24 PM, Anthony Liguori wrote:
> Avi Kivity wrote:
>>
>> Certainly preferable to -baseline.
>>
>>> This is pretty easy to maintain with config files.
>>
>> Let's not tie the two together.
>
> I mentioned it because it suggests a good transition.  We at least 
> have to think through how things map to the post-config file world 
> regardless of whether that's a few months from now or a decade :-)

Sure, it's good both from the transitional point of view and in its own 
right.

-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15 13:24                                         ` Anthony Liguori
  (?)
@ 2009-06-15 13:43                                         ` Avi Kivity
  -1 siblings, 0 replies; 457+ messages in thread
From: Avi Kivity @ 2009-06-15 13:43 UTC (permalink / raw)
  To: Anthony Liguori
  Cc: Carsten Otte, kvm, Mark McLoughlin, Glauber Costa,
	Michael S. Tsirkin, Jamie Lokier, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook

On 06/15/2009 04:24 PM, Anthony Liguori wrote:
> Avi Kivity wrote:
>>
>> Certainly preferable to -baseline.
>>
>>> This is pretty easy to maintain with config files.
>>
>> Let's not tie the two together.
>
> I mentioned it because it suggests a good transition.  We at least 
> have to think through how things map to the post-config file world 
> regardless of whether that's a few months from now or a decade :-)

Sure, it's good both from the transitional point of view and in its own 
right.

-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15 13:35                                         ` Avi Kivity
@ 2009-06-15 13:45                                           ` Anthony Liguori
  -1 siblings, 0 replies; 457+ messages in thread
From: Anthony Liguori @ 2009-06-15 13:45 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Michael S. Tsirkin, Mark McLoughlin, Jamie Lokier, Carsten Otte,
	kvm, Glauber Costa, Rusty Russell, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook

Avi Kivity wrote:
> On 06/15/2009 04:20 PM, Anthony Liguori wrote:
>> It's not at all that simple.   SCSI has a hierarchical address 
>> mechanism with 0-7 targets but then potentially multiple LUNs per 
>> target.  Today, we always emulate a single LUN per target but if we 
>> ever wanted to support more than 7 disks on a SCSI controller, we 
>> would have to add multiple LUN support too.  So the current linear 
>> unit= parameter is actually pretty broken for SCSI.
>
> Well, another level in the hierarchy, but I don't think it materially 
> changes things.

Depends on whether you expect to say index=0,lun=3 or index=3.  If you 
mean the later, then it's quite conceivable that each target supports 
less than the maximum number of LUNs.  This makes things pretty 
confusing to the user because they have to know that in the current 
implementation, index=0 is valid, index=1 isn't, but index=8 is.

>> No, I meant drive file=foo.img,bus=3.  If that doesn't seem obvious 
>> what it should do to you that's because it isn't at all obvious :-)  
>> It ends up skipping a predefined number of locations in the drive 
>> table.  This is pretty broken fundamentally because it assumes 
>> controllers always support a fixed number of devices.  Nothing really 
>> respects bus_id though so in practice, I assume it's almost 
>> universally broken.
>
> Isn't the drive table something totally internal?  And how does bus= 
> relate to it?

The reality of unit=X,bus=Y,if=Z is that they expand to:

drive_table_index=Y*max_devs[Z] + X

Whereas max_devs = {"ide":4, "scsi": 7, *:0}

How drive_table_index is interpreted is "if" specific.  For if=scsi, 
each lsi device gets a base drive table index that starts at bus_index * 
7.  For virtio, the first empty spot in drive_table results in no more 
drives being created.

It's broken by design.

Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
@ 2009-06-15 13:45                                           ` Anthony Liguori
  0 siblings, 0 replies; 457+ messages in thread
From: Anthony Liguori @ 2009-06-15 13:45 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Carsten Otte, Rusty Russell, kvm, Mark McLoughlin, Glauber Costa,
	Michael S. Tsirkin, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Paul Brook

Avi Kivity wrote:
> On 06/15/2009 04:20 PM, Anthony Liguori wrote:
>> It's not at all that simple.   SCSI has a hierarchical address 
>> mechanism with 0-7 targets but then potentially multiple LUNs per 
>> target.  Today, we always emulate a single LUN per target but if we 
>> ever wanted to support more than 7 disks on a SCSI controller, we 
>> would have to add multiple LUN support too.  So the current linear 
>> unit= parameter is actually pretty broken for SCSI.
>
> Well, another level in the hierarchy, but I don't think it materially 
> changes things.

Depends on whether you expect to say index=0,lun=3 or index=3.  If you 
mean the later, then it's quite conceivable that each target supports 
less than the maximum number of LUNs.  This makes things pretty 
confusing to the user because they have to know that in the current 
implementation, index=0 is valid, index=1 isn't, but index=8 is.

>> No, I meant drive file=foo.img,bus=3.  If that doesn't seem obvious 
>> what it should do to you that's because it isn't at all obvious :-)  
>> It ends up skipping a predefined number of locations in the drive 
>> table.  This is pretty broken fundamentally because it assumes 
>> controllers always support a fixed number of devices.  Nothing really 
>> respects bus_id though so in practice, I assume it's almost 
>> universally broken.
>
> Isn't the drive table something totally internal?  And how does bus= 
> relate to it?

The reality of unit=X,bus=Y,if=Z is that they expand to:

drive_table_index=Y*max_devs[Z] + X

Whereas max_devs = {"ide":4, "scsi": 7, *:0}

How drive_table_index is interpreted is "if" specific.  For if=scsi, 
each lsi device gets a base drive table index that starts at bus_index * 
7.  For virtio, the first empty spot in drive_table results in no more 
drives being created.

It's broken by design.

Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15 13:35                                         ` Avi Kivity
  (?)
  (?)
@ 2009-06-15 13:45                                         ` Anthony Liguori
  -1 siblings, 0 replies; 457+ messages in thread
From: Anthony Liguori @ 2009-06-15 13:45 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Carsten Otte, kvm, Mark McLoughlin, Glauber Costa,
	Michael S. Tsirkin, Jamie Lokier, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook

Avi Kivity wrote:
> On 06/15/2009 04:20 PM, Anthony Liguori wrote:
>> It's not at all that simple.   SCSI has a hierarchical address 
>> mechanism with 0-7 targets but then potentially multiple LUNs per 
>> target.  Today, we always emulate a single LUN per target but if we 
>> ever wanted to support more than 7 disks on a SCSI controller, we 
>> would have to add multiple LUN support too.  So the current linear 
>> unit= parameter is actually pretty broken for SCSI.
>
> Well, another level in the hierarchy, but I don't think it materially 
> changes things.

Depends on whether you expect to say index=0,lun=3 or index=3.  If you 
mean the later, then it's quite conceivable that each target supports 
less than the maximum number of LUNs.  This makes things pretty 
confusing to the user because they have to know that in the current 
implementation, index=0 is valid, index=1 isn't, but index=8 is.

>> No, I meant drive file=foo.img,bus=3.  If that doesn't seem obvious 
>> what it should do to you that's because it isn't at all obvious :-)  
>> It ends up skipping a predefined number of locations in the drive 
>> table.  This is pretty broken fundamentally because it assumes 
>> controllers always support a fixed number of devices.  Nothing really 
>> respects bus_id though so in practice, I assume it's almost 
>> universally broken.
>
> Isn't the drive table something totally internal?  And how does bus= 
> relate to it?

The reality of unit=X,bus=Y,if=Z is that they expand to:

drive_table_index=Y*max_devs[Z] + X

Whereas max_devs = {"ide":4, "scsi": 7, *:0}

How drive_table_index is interpreted is "if" specific.  For if=scsi, 
each lsi device gets a base drive table index that starts at bus_index * 
7.  For virtio, the first empty spot in drive_table results in no more 
drives being created.

It's broken by design.

Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15 13:42                                                       ` Avi Kivity
@ 2009-06-15 13:51                                                         ` Anthony Liguori
  -1 siblings, 0 replies; 457+ messages in thread
From: Anthony Liguori @ 2009-06-15 13:51 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Michael S. Tsirkin, Mark McLoughlin, Jamie Lokier, Carsten Otte,
	kvm, Glauber Costa, Rusty Russell, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook

Avi Kivity wrote:
> On 06/15/2009 04:23 PM, Anthony Liguori wrote:
>
> How would qemu know which slots to optimize for?
>
> In practice, I don't see that as a real problem.  We should (a) add an 
> ioapic and four more pci links (b) recommend that slots be assigned in 
> ascending order, and everything works.
>
> I don't see your concern about libvirt allocating slots.  If a human 
> can plug a card into a slot, so can libvirt.  Doing an interactive 
> back-and-forth (equivalent to plugging a card while blindfolded, then 
> looking to see which slot we hit) is certainly more difficult.

Let's take a concrete example because I think you missed my point.  For 
the r2d board, if you have 1 on-board NIC, it has to go in slot 2.  
Additional NICs can go in any slot, but the primary on-board NIC is 
expected to live in slot 2.  It's possible to not have that on-board NIC.

If you let QEMU allocate which PCI slot a device goes in, we can hide 
this detail from libvirt.  If you have libvirt do PCI slot allocation by 
default, it has to know about this restriction in the r2d board unless 
you have a clever way to express this sort of information.

Once QEMU has allocated a device to a slot, libvirt can do a good job 
maintaining that relationship.

Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
@ 2009-06-15 13:51                                                         ` Anthony Liguori
  0 siblings, 0 replies; 457+ messages in thread
From: Anthony Liguori @ 2009-06-15 13:51 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Carsten Otte, Rusty Russell, kvm, Mark McLoughlin, Glauber Costa,
	Michael S. Tsirkin, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Paul Brook

Avi Kivity wrote:
> On 06/15/2009 04:23 PM, Anthony Liguori wrote:
>
> How would qemu know which slots to optimize for?
>
> In practice, I don't see that as a real problem.  We should (a) add an 
> ioapic and four more pci links (b) recommend that slots be assigned in 
> ascending order, and everything works.
>
> I don't see your concern about libvirt allocating slots.  If a human 
> can plug a card into a slot, so can libvirt.  Doing an interactive 
> back-and-forth (equivalent to plugging a card while blindfolded, then 
> looking to see which slot we hit) is certainly more difficult.

Let's take a concrete example because I think you missed my point.  For 
the r2d board, if you have 1 on-board NIC, it has to go in slot 2.  
Additional NICs can go in any slot, but the primary on-board NIC is 
expected to live in slot 2.  It's possible to not have that on-board NIC.

If you let QEMU allocate which PCI slot a device goes in, we can hide 
this detail from libvirt.  If you have libvirt do PCI slot allocation by 
default, it has to know about this restriction in the r2d board unless 
you have a clever way to express this sort of information.

Once QEMU has allocated a device to a slot, libvirt can do a good job 
maintaining that relationship.

Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15 13:42                                                       ` Avi Kivity
  (?)
  (?)
@ 2009-06-15 13:51                                                       ` Anthony Liguori
  -1 siblings, 0 replies; 457+ messages in thread
From: Anthony Liguori @ 2009-06-15 13:51 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Carsten Otte, kvm, Mark McLoughlin, Glauber Costa,
	Michael S. Tsirkin, Jamie Lokier, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook

Avi Kivity wrote:
> On 06/15/2009 04:23 PM, Anthony Liguori wrote:
>
> How would qemu know which slots to optimize for?
>
> In practice, I don't see that as a real problem.  We should (a) add an 
> ioapic and four more pci links (b) recommend that slots be assigned in 
> ascending order, and everything works.
>
> I don't see your concern about libvirt allocating slots.  If a human 
> can plug a card into a slot, so can libvirt.  Doing an interactive 
> back-and-forth (equivalent to plugging a card while blindfolded, then 
> looking to see which slot we hit) is certainly more difficult.

Let's take a concrete example because I think you missed my point.  For 
the r2d board, if you have 1 on-board NIC, it has to go in slot 2.  
Additional NICs can go in any slot, but the primary on-board NIC is 
expected to live in slot 2.  It's possible to not have that on-board NIC.

If you let QEMU allocate which PCI slot a device goes in, we can hide 
this detail from libvirt.  If you have libvirt do PCI slot allocation by 
default, it has to know about this restriction in the r2d board unless 
you have a clever way to express this sort of information.

Once QEMU has allocated a device to a slot, libvirt can do a good job 
maintaining that relationship.

Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15 13:45                                           ` Anthony Liguori
@ 2009-06-15 13:54                                             ` Avi Kivity
  -1 siblings, 0 replies; 457+ messages in thread
From: Avi Kivity @ 2009-06-15 13:54 UTC (permalink / raw)
  To: Anthony Liguori
  Cc: Michael S. Tsirkin, Mark McLoughlin, Jamie Lokier, Carsten Otte,
	kvm, Glauber Costa, Rusty Russell, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook

On 06/15/2009 04:45 PM, Anthony Liguori wrote:
> Avi Kivity wrote:
>> On 06/15/2009 04:20 PM, Anthony Liguori wrote:
>>> It's not at all that simple.   SCSI has a hierarchical address 
>>> mechanism with 0-7 targets but then potentially multiple LUNs per 
>>> target.  Today, we always emulate a single LUN per target but if we 
>>> ever wanted to support more than 7 disks on a SCSI controller, we 
>>> would have to add multiple LUN support too.  So the current linear 
>>> unit= parameter is actually pretty broken for SCSI.
>>
>> Well, another level in the hierarchy, but I don't think it materially 
>> changes things.
>
> Depends on whether you expect to say index=0,lun=3 or index=3.  If you 
> mean the later, then it's quite conceivable that each target supports 
> less than the maximum number of LUNs.  This makes things pretty 
> confusing to the user because they have to know that in the current 
> implementation, index=0 is valid, index=1 isn't, but index=8 is.

I'd object to any implicit addressing rules.  If we have to say 
target=2,lun=7,street=8,city=9,state=99,zip=12345 instead of 
index=8345345235 so be it.

>>> No, I meant drive file=foo.img,bus=3.  If that doesn't seem obvious 
>>> what it should do to you that's because it isn't at all obvious :-)  
>>> It ends up skipping a predefined number of locations in the drive 
>>> table.  This is pretty broken fundamentally because it assumes 
>>> controllers always support a fixed number of devices.  Nothing 
>>> really respects bus_id though so in practice, I assume it's almost 
>>> universally broken.
>>
>> Isn't the drive table something totally internal?  And how does bus= 
>> relate to it?
>
> The reality of unit=X,bus=Y,if=Z is that they expand to:
>
> drive_table_index=Y*max_devs[Z] + X
>
> Whereas max_devs = {"ide":4, "scsi": 7, *:0}
>
> How drive_table_index is interpreted is "if" specific.  For if=scsi, 
> each lsi device gets a base drive table index that starts at bus_index 
> * 7.  For virtio, the first empty spot in drive_table results in no 
> more drives being created.
>
> It's broken by design.

Agreed.  Pity that it's exposed to the poor users.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
@ 2009-06-15 13:54                                             ` Avi Kivity
  0 siblings, 0 replies; 457+ messages in thread
From: Avi Kivity @ 2009-06-15 13:54 UTC (permalink / raw)
  To: Anthony Liguori
  Cc: Carsten Otte, Rusty Russell, kvm, Mark McLoughlin, Glauber Costa,
	Michael S. Tsirkin, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Paul Brook

On 06/15/2009 04:45 PM, Anthony Liguori wrote:
> Avi Kivity wrote:
>> On 06/15/2009 04:20 PM, Anthony Liguori wrote:
>>> It's not at all that simple.   SCSI has a hierarchical address 
>>> mechanism with 0-7 targets but then potentially multiple LUNs per 
>>> target.  Today, we always emulate a single LUN per target but if we 
>>> ever wanted to support more than 7 disks on a SCSI controller, we 
>>> would have to add multiple LUN support too.  So the current linear 
>>> unit= parameter is actually pretty broken for SCSI.
>>
>> Well, another level in the hierarchy, but I don't think it materially 
>> changes things.
>
> Depends on whether you expect to say index=0,lun=3 or index=3.  If you 
> mean the later, then it's quite conceivable that each target supports 
> less than the maximum number of LUNs.  This makes things pretty 
> confusing to the user because they have to know that in the current 
> implementation, index=0 is valid, index=1 isn't, but index=8 is.

I'd object to any implicit addressing rules.  If we have to say 
target=2,lun=7,street=8,city=9,state=99,zip=12345 instead of 
index=8345345235 so be it.

>>> No, I meant drive file=foo.img,bus=3.  If that doesn't seem obvious 
>>> what it should do to you that's because it isn't at all obvious :-)  
>>> It ends up skipping a predefined number of locations in the drive 
>>> table.  This is pretty broken fundamentally because it assumes 
>>> controllers always support a fixed number of devices.  Nothing 
>>> really respects bus_id though so in practice, I assume it's almost 
>>> universally broken.
>>
>> Isn't the drive table something totally internal?  And how does bus= 
>> relate to it?
>
> The reality of unit=X,bus=Y,if=Z is that they expand to:
>
> drive_table_index=Y*max_devs[Z] + X
>
> Whereas max_devs = {"ide":4, "scsi": 7, *:0}
>
> How drive_table_index is interpreted is "if" specific.  For if=scsi, 
> each lsi device gets a base drive table index that starts at bus_index 
> * 7.  For virtio, the first empty spot in drive_table results in no 
> more drives being created.
>
> It's broken by design.

Agreed.  Pity that it's exposed to the poor users.

-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15 13:45                                           ` Anthony Liguori
  (?)
  (?)
@ 2009-06-15 13:54                                           ` Avi Kivity
  -1 siblings, 0 replies; 457+ messages in thread
From: Avi Kivity @ 2009-06-15 13:54 UTC (permalink / raw)
  To: Anthony Liguori
  Cc: Carsten Otte, kvm, Mark McLoughlin, Glauber Costa,
	Michael S. Tsirkin, Jamie Lokier, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook

On 06/15/2009 04:45 PM, Anthony Liguori wrote:
> Avi Kivity wrote:
>> On 06/15/2009 04:20 PM, Anthony Liguori wrote:
>>> It's not at all that simple.   SCSI has a hierarchical address 
>>> mechanism with 0-7 targets but then potentially multiple LUNs per 
>>> target.  Today, we always emulate a single LUN per target but if we 
>>> ever wanted to support more than 7 disks on a SCSI controller, we 
>>> would have to add multiple LUN support too.  So the current linear 
>>> unit= parameter is actually pretty broken for SCSI.
>>
>> Well, another level in the hierarchy, but I don't think it materially 
>> changes things.
>
> Depends on whether you expect to say index=0,lun=3 or index=3.  If you 
> mean the later, then it's quite conceivable that each target supports 
> less than the maximum number of LUNs.  This makes things pretty 
> confusing to the user because they have to know that in the current 
> implementation, index=0 is valid, index=1 isn't, but index=8 is.

I'd object to any implicit addressing rules.  If we have to say 
target=2,lun=7,street=8,city=9,state=99,zip=12345 instead of 
index=8345345235 so be it.

>>> No, I meant drive file=foo.img,bus=3.  If that doesn't seem obvious 
>>> what it should do to you that's because it isn't at all obvious :-)  
>>> It ends up skipping a predefined number of locations in the drive 
>>> table.  This is pretty broken fundamentally because it assumes 
>>> controllers always support a fixed number of devices.  Nothing 
>>> really respects bus_id though so in practice, I assume it's almost 
>>> universally broken.
>>
>> Isn't the drive table something totally internal?  And how does bus= 
>> relate to it?
>
> The reality of unit=X,bus=Y,if=Z is that they expand to:
>
> drive_table_index=Y*max_devs[Z] + X
>
> Whereas max_devs = {"ide":4, "scsi": 7, *:0}
>
> How drive_table_index is interpreted is "if" specific.  For if=scsi, 
> each lsi device gets a base drive table index that starts at bus_index 
> * 7.  For virtio, the first empty spot in drive_table results in no 
> more drives being created.
>
> It's broken by design.

Agreed.  Pity that it's exposed to the poor users.

-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15 12:48                                     ` Anthony Liguori
@ 2009-06-15 14:00                                       ` Mark McLoughlin
  -1 siblings, 0 replies; 457+ messages in thread
From: Mark McLoughlin @ 2009-06-15 14:00 UTC (permalink / raw)
  To: Anthony Liguori
  Cc: Avi Kivity, Jamie Lokier, Michael S. Tsirkin, Carsten Otte, kvm,
	Glauber Costa, Rusty Russell, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook

On Mon, 2009-06-15 at 07:48 -0500, Anthony Liguori wrote:
> Avi Kivity wrote:
> > On 06/15/2009 12:09 PM, Mark McLoughlin wrote:
> >>>>> I think the point is that you don't need version numbers if you 
> >>>>> have a
> >>>>> proper device tree.
> >>>>>
> >>>>>          
> >>>> How do you add a new attribute to the device tree and, when a supplied
> >>>> device tree lacking said attribute, distinguish between a device tree
> >>>> from an old version of qemu (i.e. use the old default) and a partial
> >>>> device tree from the VM manager (i.e. use the new default) ?
> >>>>
> >>>>        
> >>> -baseline 0.10
> >>>      
> >>
> >> That's a version number :-)
> >>
> >> (I was responding to Anthony's "you don't need a version number")
> >>    
> >
> > If you want to prevent incompatibilities, you need to make everything 
> > new (potentially including bugfixes) non-default.

No need to punish new guests in order to maintain compatibility for old
guests.

> > Eventually the 
> > default configuration becomes increasingly unusable and you need a new 
> > baseline.  You must still be able to fall back to the old baseline for 
> > older guests.  I don't think games with configuration files can hide 
> > that.
> 
> -M pc1
> -M pc2
> 
> etc.
> 
> This is pretty easy to maintain with config files.

I think this would be reasonable, but it is essentially just a version
number which you objected to on the basis that it would make
cherry-picking harder for distros.

One thing that would be nice with this '-M pc1' thing would be to retain
'-M pc' as a symlink to the latest version. We'd also need a way to read
the symlink too, so that you can query what the current latest version
is and use that in future.

How would this machine type version relate to e.g. changing the default
PCI class of virtio-blk? Would we bump the version number of all machine
types can use virtio-blk?

A per-device version number is workable alternative, but only with a
saveabi type file IMHO.

I've tried to summarise the options here:

  https://fedoraproject.org/wiki/Features/KVM_Stable_Guest_ABI

Cheers,
Mark.


^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
@ 2009-06-15 14:00                                       ` Mark McLoughlin
  0 siblings, 0 replies; 457+ messages in thread
From: Mark McLoughlin @ 2009-06-15 14:00 UTC (permalink / raw)
  To: Anthony Liguori
  Cc: Carsten Otte, kvm, Michael S. Tsirkin, Glauber Costa,
	Rusty Russell, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Avi Kivity, Paul Brook

On Mon, 2009-06-15 at 07:48 -0500, Anthony Liguori wrote:
> Avi Kivity wrote:
> > On 06/15/2009 12:09 PM, Mark McLoughlin wrote:
> >>>>> I think the point is that you don't need version numbers if you 
> >>>>> have a
> >>>>> proper device tree.
> >>>>>
> >>>>>          
> >>>> How do you add a new attribute to the device tree and, when a supplied
> >>>> device tree lacking said attribute, distinguish between a device tree
> >>>> from an old version of qemu (i.e. use the old default) and a partial
> >>>> device tree from the VM manager (i.e. use the new default) ?
> >>>>
> >>>>        
> >>> -baseline 0.10
> >>>      
> >>
> >> That's a version number :-)
> >>
> >> (I was responding to Anthony's "you don't need a version number")
> >>    
> >
> > If you want to prevent incompatibilities, you need to make everything 
> > new (potentially including bugfixes) non-default.

No need to punish new guests in order to maintain compatibility for old
guests.

> > Eventually the 
> > default configuration becomes increasingly unusable and you need a new 
> > baseline.  You must still be able to fall back to the old baseline for 
> > older guests.  I don't think games with configuration files can hide 
> > that.
> 
> -M pc1
> -M pc2
> 
> etc.
> 
> This is pretty easy to maintain with config files.

I think this would be reasonable, but it is essentially just a version
number which you objected to on the basis that it would make
cherry-picking harder for distros.

One thing that would be nice with this '-M pc1' thing would be to retain
'-M pc' as a symlink to the latest version. We'd also need a way to read
the symlink too, so that you can query what the current latest version
is and use that in future.

How would this machine type version relate to e.g. changing the default
PCI class of virtio-blk? Would we bump the version number of all machine
types can use virtio-blk?

A per-device version number is workable alternative, but only with a
saveabi type file IMHO.

I've tried to summarise the options here:

  https://fedoraproject.org/wiki/Features/KVM_Stable_Guest_ABI

Cheers,
Mark.

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15 12:48                                     ` Anthony Liguori
                                                       ` (3 preceding siblings ...)
  (?)
@ 2009-06-15 14:00                                     ` Mark McLoughlin
  -1 siblings, 0 replies; 457+ messages in thread
From: Mark McLoughlin @ 2009-06-15 14:00 UTC (permalink / raw)
  To: Anthony Liguori
  Cc: Carsten Otte, kvm, Michael S. Tsirkin, Glauber Costa,
	Jamie Lokier, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Avi Kivity, Paul Brook

On Mon, 2009-06-15 at 07:48 -0500, Anthony Liguori wrote:
> Avi Kivity wrote:
> > On 06/15/2009 12:09 PM, Mark McLoughlin wrote:
> >>>>> I think the point is that you don't need version numbers if you 
> >>>>> have a
> >>>>> proper device tree.
> >>>>>
> >>>>>          
> >>>> How do you add a new attribute to the device tree and, when a supplied
> >>>> device tree lacking said attribute, distinguish between a device tree
> >>>> from an old version of qemu (i.e. use the old default) and a partial
> >>>> device tree from the VM manager (i.e. use the new default) ?
> >>>>
> >>>>        
> >>> -baseline 0.10
> >>>      
> >>
> >> That's a version number :-)
> >>
> >> (I was responding to Anthony's "you don't need a version number")
> >>    
> >
> > If you want to prevent incompatibilities, you need to make everything 
> > new (potentially including bugfixes) non-default.

No need to punish new guests in order to maintain compatibility for old
guests.

> > Eventually the 
> > default configuration becomes increasingly unusable and you need a new 
> > baseline.  You must still be able to fall back to the old baseline for 
> > older guests.  I don't think games with configuration files can hide 
> > that.
> 
> -M pc1
> -M pc2
> 
> etc.
> 
> This is pretty easy to maintain with config files.

I think this would be reasonable, but it is essentially just a version
number which you objected to on the basis that it would make
cherry-picking harder for distros.

One thing that would be nice with this '-M pc1' thing would be to retain
'-M pc' as a symlink to the latest version. We'd also need a way to read
the symlink too, so that you can query what the current latest version
is and use that in future.

How would this machine type version relate to e.g. changing the default
PCI class of virtio-blk? Would we bump the version number of all machine
types can use virtio-blk?

A per-device version number is workable alternative, but only with a
saveabi type file IMHO.

I've tried to summarise the options here:

  https://fedoraproject.org/wiki/Features/KVM_Stable_Guest_ABI

Cheers,
Mark.

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15 13:51                                                         ` Anthony Liguori
@ 2009-06-15 14:06                                                           ` Dor Laor
  -1 siblings, 0 replies; 457+ messages in thread
From: Dor Laor @ 2009-06-15 14:06 UTC (permalink / raw)
  To: Anthony Liguori
  Cc: Avi Kivity, Carsten Otte, Rusty Russell, kvm, Mark McLoughlin,
	Glauber Costa, Michael S. Tsirkin, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook

Anthony Liguori wrote:
> Avi Kivity wrote:
>> On 06/15/2009 04:23 PM, Anthony Liguori wrote:
>>
>> How would qemu know which slots to optimize for?
>>
>> In practice, I don't see that as a real problem.  We should (a) add 
>> an ioapic and four more pci links (b) recommend that slots be 
>> assigned in ascending order, and everything works.
>>
>> I don't see your concern about libvirt allocating slots.  If a human 
>> can plug a card into a slot, so can libvirt.  Doing an interactive 
>> back-and-forth (equivalent to plugging a card while blindfolded, then 
>> looking to see which slot we hit) is certainly more difficult.
>
> Let's take a concrete example because I think you missed my point.  
> For the r2d board, if you have 1 on-board NIC, it has to go in slot 
> 2.  Additional NICs can go in any slot, but the primary on-board NIC 
> is expected to live in slot 2.  It's possible to not have that 
> on-board NIC.
Libvirt does not support r2d. I hope it won't start to support it.
We can have default values for these types of devices or something like 
pci_addr=auto.

>
> If you let QEMU allocate which PCI slot a device goes in, we can hide 
> this detail from libvirt.  If you have libvirt do PCI slot allocation 
> by default, it has to know about this restriction in the r2d board 
> unless you have a clever way to express this sort of information.
>
> Once QEMU has allocated a device to a slot, libvirt can do a good job 
> maintaining that relationship.
>

The end user should have a mechanism to control device slot positioning. 
For example, if you have several pci devices, some
get high rate of interrupts and some not, if you want to optimize you 
guest you should isolate the high rate 'interesting' devices.
This is something the user will need to do. I agree that the default 
behavior might be 'auto'

Also, while moving from one qemu version to another, you'll need to 
represent the older behavior. -qemu-0.10 is not good enough
since there will be multiple versions in the future with multiple 
distributions setting their defaults.

> Regards,
>
> Anthony Liguori
>
>


^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
@ 2009-06-15 14:06                                                           ` Dor Laor
  0 siblings, 0 replies; 457+ messages in thread
From: Dor Laor @ 2009-06-15 14:06 UTC (permalink / raw)
  To: Anthony Liguori
  Cc: Carsten Otte, kvm, Mark McLoughlin, Glauber Costa, Rusty Russell,
	qemu-devel, virtualization, Blue Swirl, Christian Borntraeger,
	Michael S. Tsirkin, Avi Kivity, Paul Brook

Anthony Liguori wrote:
> Avi Kivity wrote:
>> On 06/15/2009 04:23 PM, Anthony Liguori wrote:
>>
>> How would qemu know which slots to optimize for?
>>
>> In practice, I don't see that as a real problem.  We should (a) add 
>> an ioapic and four more pci links (b) recommend that slots be 
>> assigned in ascending order, and everything works.
>>
>> I don't see your concern about libvirt allocating slots.  If a human 
>> can plug a card into a slot, so can libvirt.  Doing an interactive 
>> back-and-forth (equivalent to plugging a card while blindfolded, then 
>> looking to see which slot we hit) is certainly more difficult.
>
> Let's take a concrete example because I think you missed my point.  
> For the r2d board, if you have 1 on-board NIC, it has to go in slot 
> 2.  Additional NICs can go in any slot, but the primary on-board NIC 
> is expected to live in slot 2.  It's possible to not have that 
> on-board NIC.
Libvirt does not support r2d. I hope it won't start to support it.
We can have default values for these types of devices or something like 
pci_addr=auto.

>
> If you let QEMU allocate which PCI slot a device goes in, we can hide 
> this detail from libvirt.  If you have libvirt do PCI slot allocation 
> by default, it has to know about this restriction in the r2d board 
> unless you have a clever way to express this sort of information.
>
> Once QEMU has allocated a device to a slot, libvirt can do a good job 
> maintaining that relationship.
>

The end user should have a mechanism to control device slot positioning. 
For example, if you have several pci devices, some
get high rate of interrupts and some not, if you want to optimize you 
guest you should isolate the high rate 'interesting' devices.
This is something the user will need to do. I agree that the default 
behavior might be 'auto'

Also, while moving from one qemu version to another, you'll need to 
represent the older behavior. -qemu-0.10 is not good enough
since there will be multiple versions in the future with multiple 
distributions setting their defaults.

> Regards,
>
> Anthony Liguori
>
>

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15 13:51                                                         ` Anthony Liguori
  (?)
@ 2009-06-15 14:06                                                         ` Dor Laor
  -1 siblings, 0 replies; 457+ messages in thread
From: Dor Laor @ 2009-06-15 14:06 UTC (permalink / raw)
  To: Anthony Liguori
  Cc: Carsten Otte, kvm, Mark McLoughlin, Glauber Costa, qemu-devel,
	virtualization, Blue Swirl, Christian Borntraeger,
	Michael S. Tsirkin, Avi Kivity, Paul Brook

Anthony Liguori wrote:
> Avi Kivity wrote:
>> On 06/15/2009 04:23 PM, Anthony Liguori wrote:
>>
>> How would qemu know which slots to optimize for?
>>
>> In practice, I don't see that as a real problem.  We should (a) add 
>> an ioapic and four more pci links (b) recommend that slots be 
>> assigned in ascending order, and everything works.
>>
>> I don't see your concern about libvirt allocating slots.  If a human 
>> can plug a card into a slot, so can libvirt.  Doing an interactive 
>> back-and-forth (equivalent to plugging a card while blindfolded, then 
>> looking to see which slot we hit) is certainly more difficult.
>
> Let's take a concrete example because I think you missed my point.  
> For the r2d board, if you have 1 on-board NIC, it has to go in slot 
> 2.  Additional NICs can go in any slot, but the primary on-board NIC 
> is expected to live in slot 2.  It's possible to not have that 
> on-board NIC.
Libvirt does not support r2d. I hope it won't start to support it.
We can have default values for these types of devices or something like 
pci_addr=auto.

>
> If you let QEMU allocate which PCI slot a device goes in, we can hide 
> this detail from libvirt.  If you have libvirt do PCI slot allocation 
> by default, it has to know about this restriction in the r2d board 
> unless you have a clever way to express this sort of information.
>
> Once QEMU has allocated a device to a slot, libvirt can do a good job 
> maintaining that relationship.
>

The end user should have a mechanism to control device slot positioning. 
For example, if you have several pci devices, some
get high rate of interrupts and some not, if you want to optimize you 
guest you should isolate the high rate 'interesting' devices.
This is something the user will need to do. I agree that the default 
behavior might be 'auto'

Also, while moving from one qemu version to another, you'll need to 
represent the older behavior. -qemu-0.10 is not good enough
since there will be multiple versions in the future with multiple 
distributions setting their defaults.

> Regards,
>
> Anthony Liguori
>
>

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15 14:00                                       ` Mark McLoughlin
@ 2009-06-15 14:20                                         ` Anthony Liguori
  -1 siblings, 0 replies; 457+ messages in thread
From: Anthony Liguori @ 2009-06-15 14:20 UTC (permalink / raw)
  To: Mark McLoughlin
  Cc: Avi Kivity, Jamie Lokier, Michael S. Tsirkin, Carsten Otte, kvm,
	Glauber Costa, Rusty Russell, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook

Mark McLoughlin wrote:
> On Mon, 2009-06-15 at 07:48 -0500, Anthony Liguori wrote:
>   
>>> Eventually the 
>>> default configuration becomes increasingly unusable and you need a new 
>>> baseline.  You must still be able to fall back to the old baseline for 
>>> older guests.  I don't think games with configuration files can hide 
>>> that.
>>>       
>> -M pc1
>> -M pc2
>>
>> etc.
>>
>> This is pretty easy to maintain with config files.
>>     
>
> I think this would be reasonable, but it is essentially just a version
> number which you objected to on the basis that it would make
> cherry-picking harder for distros.
>   

It doesn't have to be pc1, pc2.  It could be pc-with-usb or 
pc-with-balloon.  If a distro cherry picks in such a way that their pc 
is not a standard QEMU pc, they would add a new PC type that's specific 
to their distro.

> One thing that would be nice with this '-M pc1' thing would be to retain
> '-M pc' as a symlink to the latest version. We'd also need a way to read
> the symlink too, so that you can query what the current latest version
> is and use that in future.
>   

Another option is an explicit -M default which always uses the default 
machine for the architecture.  Likewise, we would need a way to query 
what the default machine was for an architecture.

> How would this machine type version relate to e.g. changing the default
> PCI class of virtio-blk? Would we bump the version number of all machine
> types can use virtio-blk?
>   
You would introduce a new machine type.  For instance, 
pc-virtio-class-other.  The names don't have to look like that, I'm just 
doing that to make a point.  This may mean that you end up with dozens 
of machine types but you preserve compatibility, which is a good thing.

Of course, the flip side is that you make preserving the machine config 
the duty of the user and we don't maintain compatible machine types.  
This won't work without a proper config file though so for now, we're 
stuck maintaining machine types.

Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
@ 2009-06-15 14:20                                         ` Anthony Liguori
  0 siblings, 0 replies; 457+ messages in thread
From: Anthony Liguori @ 2009-06-15 14:20 UTC (permalink / raw)
  To: Mark McLoughlin
  Cc: Carsten Otte, kvm, Michael S. Tsirkin, Glauber Costa,
	Rusty Russell, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Avi Kivity, Paul Brook

Mark McLoughlin wrote:
> On Mon, 2009-06-15 at 07:48 -0500, Anthony Liguori wrote:
>   
>>> Eventually the 
>>> default configuration becomes increasingly unusable and you need a new 
>>> baseline.  You must still be able to fall back to the old baseline for 
>>> older guests.  I don't think games with configuration files can hide 
>>> that.
>>>       
>> -M pc1
>> -M pc2
>>
>> etc.
>>
>> This is pretty easy to maintain with config files.
>>     
>
> I think this would be reasonable, but it is essentially just a version
> number which you objected to on the basis that it would make
> cherry-picking harder for distros.
>   

It doesn't have to be pc1, pc2.  It could be pc-with-usb or 
pc-with-balloon.  If a distro cherry picks in such a way that their pc 
is not a standard QEMU pc, they would add a new PC type that's specific 
to their distro.

> One thing that would be nice with this '-M pc1' thing would be to retain
> '-M pc' as a symlink to the latest version. We'd also need a way to read
> the symlink too, so that you can query what the current latest version
> is and use that in future.
>   

Another option is an explicit -M default which always uses the default 
machine for the architecture.  Likewise, we would need a way to query 
what the default machine was for an architecture.

> How would this machine type version relate to e.g. changing the default
> PCI class of virtio-blk? Would we bump the version number of all machine
> types can use virtio-blk?
>   
You would introduce a new machine type.  For instance, 
pc-virtio-class-other.  The names don't have to look like that, I'm just 
doing that to make a point.  This may mean that you end up with dozens 
of machine types but you preserve compatibility, which is a good thing.

Of course, the flip side is that you make preserving the machine config 
the duty of the user and we don't maintain compatible machine types.  
This won't work without a proper config file though so for now, we're 
stuck maintaining machine types.

Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15 14:00                                       ` Mark McLoughlin
  (?)
  (?)
@ 2009-06-15 14:20                                       ` Anthony Liguori
  -1 siblings, 0 replies; 457+ messages in thread
From: Anthony Liguori @ 2009-06-15 14:20 UTC (permalink / raw)
  To: Mark McLoughlin
  Cc: Carsten Otte, kvm, Michael S. Tsirkin, Glauber Costa,
	Jamie Lokier, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Avi Kivity, Paul Brook

Mark McLoughlin wrote:
> On Mon, 2009-06-15 at 07:48 -0500, Anthony Liguori wrote:
>   
>>> Eventually the 
>>> default configuration becomes increasingly unusable and you need a new 
>>> baseline.  You must still be able to fall back to the old baseline for 
>>> older guests.  I don't think games with configuration files can hide 
>>> that.
>>>       
>> -M pc1
>> -M pc2
>>
>> etc.
>>
>> This is pretty easy to maintain with config files.
>>     
>
> I think this would be reasonable, but it is essentially just a version
> number which you objected to on the basis that it would make
> cherry-picking harder for distros.
>   

It doesn't have to be pc1, pc2.  It could be pc-with-usb or 
pc-with-balloon.  If a distro cherry picks in such a way that their pc 
is not a standard QEMU pc, they would add a new PC type that's specific 
to their distro.

> One thing that would be nice with this '-M pc1' thing would be to retain
> '-M pc' as a symlink to the latest version. We'd also need a way to read
> the symlink too, so that you can query what the current latest version
> is and use that in future.
>   

Another option is an explicit -M default which always uses the default 
machine for the architecture.  Likewise, we would need a way to query 
what the default machine was for an architecture.

> How would this machine type version relate to e.g. changing the default
> PCI class of virtio-blk? Would we bump the version number of all machine
> types can use virtio-blk?
>   
You would introduce a new machine type.  For instance, 
pc-virtio-class-other.  The names don't have to look like that, I'm just 
doing that to make a point.  This may mean that you end up with dozens 
of machine types but you preserve compatibility, which is a good thing.

Of course, the flip side is that you make preserving the machine config 
the duty of the user and we don't maintain compatible machine types.  
This won't work without a proper config file though so for now, we're 
stuck maintaining machine types.

Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints
  2009-06-15 11:43                                       ` [Qemu-devel] " Avi Kivity
@ 2009-06-15 14:23                                         ` Javier Guerra
  -1 siblings, 0 replies; 457+ messages in thread
From: Javier Guerra @ 2009-06-15 14:23 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Markus Armbruster, Mark McLoughlin, Carsten Otte, kvm,
	Michael S. Tsirkin, Glauber Costa, Rusty Russell, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook, Anthony Liguori,
	qemu-devel

On Mon, Jun 15, 2009 at 6:43 AM, Avi Kivity<avi@redhat.com> wrote:
> (I'd be quite happy constructing the entire machine config on the command
> line, but I realize it's just me)

as a user-only (well, i'm a developer, but don't meddle in kernel
affairs since 0.99pl9); I also like that kvm is totally CLI-managed.

but migration-wise, i think it could be nicer if the 'origin' process
could send the config to the 'target' one.  IOW: the -incoming flag
shouldn't need any other parameter, and the 'migrate' command should
send the whole hardware description before the CPU state, and fail
with a 'can't comply' message if the target complains.

of course, that's a simplification.  for example, the 'target' process
should be able to respect some parameters, mostly the 'external'
descriptions, like storage pathnames, or '-net tap' ones.

-- 
Javier

^ permalink raw reply	[flat|nested] 457+ messages in thread

* [Qemu-devel] Re: Configuration vs. compat hints
@ 2009-06-15 14:23                                         ` Javier Guerra
  0 siblings, 0 replies; 457+ messages in thread
From: Javier Guerra @ 2009-06-15 14:23 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Mark McLoughlin, kvm, Michael S. Tsirkin, qemu-devel,
	Glauber Costa, Rusty Russell, Markus Armbruster, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook, Carsten Otte

On Mon, Jun 15, 2009 at 6:43 AM, Avi Kivity<avi@redhat.com> wrote:
> (I'd be quite happy constructing the entire machine config on the command
> line, but I realize it's just me)

as a user-only (well, i'm a developer, but don't meddle in kernel
affairs since 0.99pl9); I also like that kvm is totally CLI-managed.

but migration-wise, i think it could be nicer if the 'origin' process
could send the config to the 'target' one.  IOW: the -incoming flag
shouldn't need any other parameter, and the 'migrate' command should
send the whole hardware description before the CPU state, and fail
with a 'can't comply' message if the target complains.

of course, that's a simplification.  for example, the 'target' process
should be able to respect some parameters, mostly the 'external'
descriptions, like storage pathnames, or '-net tap' ones.

-- 
Javier

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints
  2009-06-15 11:43                                       ` [Qemu-devel] " Avi Kivity
                                                         ` (5 preceding siblings ...)
  (?)
@ 2009-06-15 14:23                                       ` Javier Guerra
  -1 siblings, 0 replies; 457+ messages in thread
From: Javier Guerra @ 2009-06-15 14:23 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Mark McLoughlin, kvm, Michael S. Tsirkin, qemu-devel,
	Glauber Costa, virtualization, Blue Swirl, Christian Borntraeger,
	Paul Brook, Anthony Liguori, Carsten Otte

On Mon, Jun 15, 2009 at 6:43 AM, Avi Kivity<avi@redhat.com> wrote:
> (I'd be quite happy constructing the entire machine config on the command
> line, but I realize it's just me)

as a user-only (well, i'm a developer, but don't meddle in kernel
affairs since 0.99pl9); I also like that kvm is totally CLI-managed.

but migration-wise, i think it could be nicer if the 'origin' process
could send the config to the 'target' one.  IOW: the -incoming flag
shouldn't need any other parameter, and the 'migrate' command should
send the whole hardware description before the CPU state, and fail
with a 'can't comply' message if the target complains.

of course, that's a simplification.  for example, the 'target' process
should be able to respect some parameters, mostly the 'external'
descriptions, like storage pathnames, or '-net tap' ones.

-- 
Javier

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15 14:06                                                           ` Dor Laor
@ 2009-06-15 14:24                                                             ` Anthony Liguori
  -1 siblings, 0 replies; 457+ messages in thread
From: Anthony Liguori @ 2009-06-15 14:24 UTC (permalink / raw)
  To: dlaor
  Cc: Carsten Otte, kvm, Mark McLoughlin, Glauber Costa, qemu-devel,
	virtualization, Blue Swirl, Christian Borntraeger,
	Michael S. Tsirkin, Avi Kivity, Paul Brook

Dor Laor wrote:
> Libvirt does not support r2d. I hope it won't start to support it.

It supports mips, sparc, and ppc machines now.  I don't see why it 
wouldn't support r2d.  For ppcemb, I expect this same problem to occur.  
This sort of restriction is going to be common with embedded boards.

> We can have default values for these types of devices or something 
> like pci_addr=auto.

Why wouldn't libvirt always use pci_addr=auto?  If the only argument for 
having libvirt do pci slot allocation is error messages, can't we find a 
nice way to allow libvirt to create friendly error messages when QEMU fails?

>> If you let QEMU allocate which PCI slot a device goes in, we can hide 
>> this detail from libvirt.  If you have libvirt do PCI slot allocation 
>> by default, it has to know about this restriction in the r2d board 
>> unless you have a clever way to express this sort of information.
>>
>> Once QEMU has allocated a device to a slot, libvirt can do a good job 
>> maintaining that relationship.
>>
>
> The end user should have a mechanism to control device slot 
> positioning. For example, if you have several pci devices, some
> get high rate of interrupts and some not, if you want to optimize you 
> guest you should isolate the high rate 'interesting' devices.
> This is something the user will need to do. I agree that the default 
> behavior might be 'auto'

I'm not at all arguing against pci_addr.  I'm arguing about how libvirt 
should use it with respect to the "genesis" use-case where libvirt has 
no specific reason to choose one PCI slot over another.  In that case, 
I'm merely advocating that we want to let QEMU make the decision.

Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
@ 2009-06-15 14:24                                                             ` Anthony Liguori
  0 siblings, 0 replies; 457+ messages in thread
From: Anthony Liguori @ 2009-06-15 14:24 UTC (permalink / raw)
  To: dlaor
  Cc: Carsten Otte, kvm, Mark McLoughlin, Glauber Costa, Rusty Russell,
	qemu-devel, virtualization, Blue Swirl, Christian Borntraeger,
	Michael S. Tsirkin, Avi Kivity, Paul Brook

Dor Laor wrote:
> Libvirt does not support r2d. I hope it won't start to support it.

It supports mips, sparc, and ppc machines now.  I don't see why it 
wouldn't support r2d.  For ppcemb, I expect this same problem to occur.  
This sort of restriction is going to be common with embedded boards.

> We can have default values for these types of devices or something 
> like pci_addr=auto.

Why wouldn't libvirt always use pci_addr=auto?  If the only argument for 
having libvirt do pci slot allocation is error messages, can't we find a 
nice way to allow libvirt to create friendly error messages when QEMU fails?

>> If you let QEMU allocate which PCI slot a device goes in, we can hide 
>> this detail from libvirt.  If you have libvirt do PCI slot allocation 
>> by default, it has to know about this restriction in the r2d board 
>> unless you have a clever way to express this sort of information.
>>
>> Once QEMU has allocated a device to a slot, libvirt can do a good job 
>> maintaining that relationship.
>>
>
> The end user should have a mechanism to control device slot 
> positioning. For example, if you have several pci devices, some
> get high rate of interrupts and some not, if you want to optimize you 
> guest you should isolate the high rate 'interesting' devices.
> This is something the user will need to do. I agree that the default 
> behavior might be 'auto'

I'm not at all arguing against pci_addr.  I'm arguing about how libvirt 
should use it with respect to the "genesis" use-case where libvirt has 
no specific reason to choose one PCI slot over another.  In that case, 
I'm merely advocating that we want to let QEMU make the decision.

Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15 14:20                                         ` Anthony Liguori
@ 2009-06-15 14:34                                           ` Michael S. Tsirkin
  -1 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-15 14:34 UTC (permalink / raw)
  To: Anthony Liguori
  Cc: Mark McLoughlin, Avi Kivity, Jamie Lokier, Carsten Otte, kvm,
	Glauber Costa, Rusty Russell, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook

On Mon, Jun 15, 2009 at 09:20:00AM -0500, Anthony Liguori wrote:
> Mark McLoughlin wrote:
>> On Mon, 2009-06-15 at 07:48 -0500, Anthony Liguori wrote:
>>   
>>>> Eventually the default configuration becomes increasingly unusable 
>>>> and you need a new baseline.  You must still be able to fall back 
>>>> to the old baseline for older guests.  I don't think games with 
>>>> configuration files can hide that.
>>>>       
>>> -M pc1
>>> -M pc2
>>>
>>> etc.
>>>
>>> This is pretty easy to maintain with config files.
>>>     
>>
>> I think this would be reasonable, but it is essentially just a version
>> number which you objected to on the basis that it would make
>> cherry-picking harder for distros.
>>   
>
> It doesn't have to be pc1, pc2.  It could be pc-with-usb or  
> pc-with-balloon.  If a distro cherry picks in such a way that their pc  
> is not a standard QEMU pc, they would add a new PC type that's specific  
> to their distro.
>
>> One thing that would be nice with this '-M pc1' thing would be to retain
>> '-M pc' as a symlink to the latest version. We'd also need a way to read
>> the symlink too, so that you can query what the current latest version
>> is and use that in future.
>>   
>
> Another option is an explicit -M default which always uses the default  
> machine for the architecture.  Likewise, we would need a way to query  
> what the default machine was for an architecture.
>
>> How would this machine type version relate to e.g. changing the default
>> PCI class of virtio-blk? Would we bump the version number of all machine
>> types can use virtio-blk?
>>   
> You would introduce a new machine type.  For instance,  
> pc-virtio-class-other.  The names don't have to look like that, I'm just  
> doing that to make a point.  This may mean that you end up with dozens  
> of machine types but you preserve compatibility, which is a good thing.

And then pc-virtio-class-other-with-balloon-without-usb? Wouldn't it be
more straightforward to have capability bits which can be switched on
and off independently rather than trying to fit unrelated features into
a machine type?  IMO it only seems more work at first, and QA gets a bit
nervious that they can't exhaustively test all options. But in the long
run it simplifies things as you don't have to set policy and invent
silly names.

> Of course, the flip side is that you make preserving the machine config  
> the duty of the user and we don't maintain compatible machine types.   
> This won't work without a proper config file though so for now, we're  
> stuck maintaining machine types.
>
> Regards,
>
> Anthony Liguori

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
@ 2009-06-15 14:34                                           ` Michael S. Tsirkin
  0 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-15 14:34 UTC (permalink / raw)
  To: Anthony Liguori
  Cc: Mark McLoughlin, kvm, Carsten Otte, Glauber Costa, Rusty Russell,
	qemu-devel, virtualization, Blue Swirl, Christian Borntraeger,
	Avi Kivity, Paul Brook

On Mon, Jun 15, 2009 at 09:20:00AM -0500, Anthony Liguori wrote:
> Mark McLoughlin wrote:
>> On Mon, 2009-06-15 at 07:48 -0500, Anthony Liguori wrote:
>>   
>>>> Eventually the default configuration becomes increasingly unusable 
>>>> and you need a new baseline.  You must still be able to fall back 
>>>> to the old baseline for older guests.  I don't think games with 
>>>> configuration files can hide that.
>>>>       
>>> -M pc1
>>> -M pc2
>>>
>>> etc.
>>>
>>> This is pretty easy to maintain with config files.
>>>     
>>
>> I think this would be reasonable, but it is essentially just a version
>> number which you objected to on the basis that it would make
>> cherry-picking harder for distros.
>>   
>
> It doesn't have to be pc1, pc2.  It could be pc-with-usb or  
> pc-with-balloon.  If a distro cherry picks in such a way that their pc  
> is not a standard QEMU pc, they would add a new PC type that's specific  
> to their distro.
>
>> One thing that would be nice with this '-M pc1' thing would be to retain
>> '-M pc' as a symlink to the latest version. We'd also need a way to read
>> the symlink too, so that you can query what the current latest version
>> is and use that in future.
>>   
>
> Another option is an explicit -M default which always uses the default  
> machine for the architecture.  Likewise, we would need a way to query  
> what the default machine was for an architecture.
>
>> How would this machine type version relate to e.g. changing the default
>> PCI class of virtio-blk? Would we bump the version number of all machine
>> types can use virtio-blk?
>>   
> You would introduce a new machine type.  For instance,  
> pc-virtio-class-other.  The names don't have to look like that, I'm just  
> doing that to make a point.  This may mean that you end up with dozens  
> of machine types but you preserve compatibility, which is a good thing.

And then pc-virtio-class-other-with-balloon-without-usb? Wouldn't it be
more straightforward to have capability bits which can be switched on
and off independently rather than trying to fit unrelated features into
a machine type?  IMO it only seems more work at first, and QA gets a bit
nervious that they can't exhaustively test all options. But in the long
run it simplifies things as you don't have to set policy and invent
silly names.

> Of course, the flip side is that you make preserving the machine config  
> the duty of the user and we don't maintain compatible machine types.   
> This won't work without a proper config file though so for now, we're  
> stuck maintaining machine types.
>
> Regards,
>
> Anthony Liguori

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15 14:20                                         ` Anthony Liguori
  (?)
  (?)
@ 2009-06-15 14:34                                         ` Michael S. Tsirkin
  -1 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-15 14:34 UTC (permalink / raw)
  To: Anthony Liguori
  Cc: Mark McLoughlin, kvm, Carsten Otte, Glauber Costa, Jamie Lokier,
	qemu-devel, virtualization, Blue Swirl, Christian Borntraeger,
	Avi Kivity, Paul Brook

On Mon, Jun 15, 2009 at 09:20:00AM -0500, Anthony Liguori wrote:
> Mark McLoughlin wrote:
>> On Mon, 2009-06-15 at 07:48 -0500, Anthony Liguori wrote:
>>   
>>>> Eventually the default configuration becomes increasingly unusable 
>>>> and you need a new baseline.  You must still be able to fall back 
>>>> to the old baseline for older guests.  I don't think games with 
>>>> configuration files can hide that.
>>>>       
>>> -M pc1
>>> -M pc2
>>>
>>> etc.
>>>
>>> This is pretty easy to maintain with config files.
>>>     
>>
>> I think this would be reasonable, but it is essentially just a version
>> number which you objected to on the basis that it would make
>> cherry-picking harder for distros.
>>   
>
> It doesn't have to be pc1, pc2.  It could be pc-with-usb or  
> pc-with-balloon.  If a distro cherry picks in such a way that their pc  
> is not a standard QEMU pc, they would add a new PC type that's specific  
> to their distro.
>
>> One thing that would be nice with this '-M pc1' thing would be to retain
>> '-M pc' as a symlink to the latest version. We'd also need a way to read
>> the symlink too, so that you can query what the current latest version
>> is and use that in future.
>>   
>
> Another option is an explicit -M default which always uses the default  
> machine for the architecture.  Likewise, we would need a way to query  
> what the default machine was for an architecture.
>
>> How would this machine type version relate to e.g. changing the default
>> PCI class of virtio-blk? Would we bump the version number of all machine
>> types can use virtio-blk?
>>   
> You would introduce a new machine type.  For instance,  
> pc-virtio-class-other.  The names don't have to look like that, I'm just  
> doing that to make a point.  This may mean that you end up with dozens  
> of machine types but you preserve compatibility, which is a good thing.

And then pc-virtio-class-other-with-balloon-without-usb? Wouldn't it be
more straightforward to have capability bits which can be switched on
and off independently rather than trying to fit unrelated features into
a machine type?  IMO it only seems more work at first, and QA gets a bit
nervious that they can't exhaustively test all options. But in the long
run it simplifies things as you don't have to set policy and invent
silly names.

> Of course, the flip side is that you make preserving the machine config  
> the duty of the user and we don't maintain compatible machine types.   
> This won't work without a proper config file though so for now, we're  
> stuck maintaining machine types.
>
> Regards,
>
> Anthony Liguori

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15 14:24                                                             ` Anthony Liguori
@ 2009-06-15 14:37                                                               ` Michael S. Tsirkin
  -1 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-15 14:37 UTC (permalink / raw)
  To: Anthony Liguori
  Cc: dlaor, Avi Kivity, Carsten Otte, Rusty Russell, kvm,
	Mark McLoughlin, Glauber Costa, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook

On Mon, Jun 15, 2009 at 09:24:32AM -0500, Anthony Liguori wrote:
> Dor Laor wrote:
>> Libvirt does not support r2d. I hope it won't start to support it.
>
> It supports mips, sparc, and ppc machines now.  I don't see why it  
> wouldn't support r2d.  For ppcemb, I expect this same problem to occur.   
> This sort of restriction is going to be common with embedded boards.
>
>> We can have default values for these types of devices or something  
>> like pci_addr=auto.
>
> Why wouldn't libvirt always use pci_addr=auto?  If the only argument for  
> having libvirt do pci slot allocation is error messages, can't we find a  
> nice way to allow libvirt to create friendly error messages when QEMU 
> fails?
>
>>> If you let QEMU allocate which PCI slot a device goes in, we can hide 
>>> this detail from libvirt.  If you have libvirt do PCI slot allocation 
>>> by default, it has to know about this restriction in the r2d board  
>>> unless you have a clever way to express this sort of information.
>>>
>>> Once QEMU has allocated a device to a slot, libvirt can do a good job 
>>> maintaining that relationship.
>>>
>>
>> The end user should have a mechanism to control device slot  
>> positioning. For example, if you have several pci devices, some
>> get high rate of interrupts and some not, if you want to optimize you  
>> guest you should isolate the high rate 'interesting' devices.
>> This is something the user will need to do. I agree that the default  
>> behavior might be 'auto'
>
> I'm not at all arguing against pci_addr.  I'm arguing about how libvirt  
> should use it with respect to the "genesis" use-case where libvirt has  
> no specific reason to choose one PCI slot over another.  In that case,  
> I'm merely advocating that we want to let QEMU make the decision.

The allocation code could be moved out into a library, and libvirt could
link with it (ducks).

> Regards,
>
> Anthony Liguori

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
@ 2009-06-15 14:37                                                               ` Michael S. Tsirkin
  0 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-15 14:37 UTC (permalink / raw)
  To: Anthony Liguori
  Cc: Carsten Otte, dlaor, kvm, Mark McLoughlin, Glauber Costa,
	Rusty Russell, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Avi Kivity, Paul Brook

On Mon, Jun 15, 2009 at 09:24:32AM -0500, Anthony Liguori wrote:
> Dor Laor wrote:
>> Libvirt does not support r2d. I hope it won't start to support it.
>
> It supports mips, sparc, and ppc machines now.  I don't see why it  
> wouldn't support r2d.  For ppcemb, I expect this same problem to occur.   
> This sort of restriction is going to be common with embedded boards.
>
>> We can have default values for these types of devices or something  
>> like pci_addr=auto.
>
> Why wouldn't libvirt always use pci_addr=auto?  If the only argument for  
> having libvirt do pci slot allocation is error messages, can't we find a  
> nice way to allow libvirt to create friendly error messages when QEMU 
> fails?
>
>>> If you let QEMU allocate which PCI slot a device goes in, we can hide 
>>> this detail from libvirt.  If you have libvirt do PCI slot allocation 
>>> by default, it has to know about this restriction in the r2d board  
>>> unless you have a clever way to express this sort of information.
>>>
>>> Once QEMU has allocated a device to a slot, libvirt can do a good job 
>>> maintaining that relationship.
>>>
>>
>> The end user should have a mechanism to control device slot  
>> positioning. For example, if you have several pci devices, some
>> get high rate of interrupts and some not, if you want to optimize you  
>> guest you should isolate the high rate 'interesting' devices.
>> This is something the user will need to do. I agree that the default  
>> behavior might be 'auto'
>
> I'm not at all arguing against pci_addr.  I'm arguing about how libvirt  
> should use it with respect to the "genesis" use-case where libvirt has  
> no specific reason to choose one PCI slot over another.  In that case,  
> I'm merely advocating that we want to let QEMU make the decision.

The allocation code could be moved out into a library, and libvirt could
link with it (ducks).

> Regards,
>
> Anthony Liguori

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15 14:24                                                             ` Anthony Liguori
  (?)
  (?)
@ 2009-06-15 14:37                                                             ` Michael S. Tsirkin
  -1 siblings, 0 replies; 457+ messages in thread
From: Michael S. Tsirkin @ 2009-06-15 14:37 UTC (permalink / raw)
  To: Anthony Liguori
  Cc: Carsten Otte, kvm, Mark McLoughlin, Glauber Costa, qemu-devel,
	virtualization, Blue Swirl, Christian Borntraeger, Avi Kivity,
	Paul Brook

On Mon, Jun 15, 2009 at 09:24:32AM -0500, Anthony Liguori wrote:
> Dor Laor wrote:
>> Libvirt does not support r2d. I hope it won't start to support it.
>
> It supports mips, sparc, and ppc machines now.  I don't see why it  
> wouldn't support r2d.  For ppcemb, I expect this same problem to occur.   
> This sort of restriction is going to be common with embedded boards.
>
>> We can have default values for these types of devices or something  
>> like pci_addr=auto.
>
> Why wouldn't libvirt always use pci_addr=auto?  If the only argument for  
> having libvirt do pci slot allocation is error messages, can't we find a  
> nice way to allow libvirt to create friendly error messages when QEMU 
> fails?
>
>>> If you let QEMU allocate which PCI slot a device goes in, we can hide 
>>> this detail from libvirt.  If you have libvirt do PCI slot allocation 
>>> by default, it has to know about this restriction in the r2d board  
>>> unless you have a clever way to express this sort of information.
>>>
>>> Once QEMU has allocated a device to a slot, libvirt can do a good job 
>>> maintaining that relationship.
>>>
>>
>> The end user should have a mechanism to control device slot  
>> positioning. For example, if you have several pci devices, some
>> get high rate of interrupts and some not, if you want to optimize you  
>> guest you should isolate the high rate 'interesting' devices.
>> This is something the user will need to do. I agree that the default  
>> behavior might be 'auto'
>
> I'm not at all arguing against pci_addr.  I'm arguing about how libvirt  
> should use it with respect to the "genesis" use-case where libvirt has  
> no specific reason to choose one PCI slot over another.  In that case,  
> I'm merely advocating that we want to let QEMU make the decision.

The allocation code could be moved out into a library, and libvirt could
link with it (ducks).

> Regards,
>
> Anthony Liguori

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15 14:37                                                               ` Michael S. Tsirkin
@ 2009-06-15 15:03                                                                 ` Anthony Liguori
  -1 siblings, 0 replies; 457+ messages in thread
From: Anthony Liguori @ 2009-06-15 15:03 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: dlaor, Avi Kivity, Carsten Otte, Rusty Russell, kvm,
	Mark McLoughlin, Glauber Costa, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook

Michael S. Tsirkin wrote:
>> I'm not at all arguing against pci_addr.  I'm arguing about how libvirt  
>> should use it with respect to the "genesis" use-case where libvirt has  
>> no specific reason to choose one PCI slot over another.  In that case,  
>> I'm merely advocating that we want to let QEMU make the decision.
>>     
>
> The allocation code could be moved out into a library, and libvirt could
> link with it (ducks).
>   

Why does libvirt want to do allocation?

Regards,

Anthony Liguori



^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
@ 2009-06-15 15:03                                                                 ` Anthony Liguori
  0 siblings, 0 replies; 457+ messages in thread
From: Anthony Liguori @ 2009-06-15 15:03 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Carsten Otte, dlaor, kvm, Mark McLoughlin, Glauber Costa,
	Rusty Russell, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Avi Kivity, Paul Brook

Michael S. Tsirkin wrote:
>> I'm not at all arguing against pci_addr.  I'm arguing about how libvirt  
>> should use it with respect to the "genesis" use-case where libvirt has  
>> no specific reason to choose one PCI slot over another.  In that case,  
>> I'm merely advocating that we want to let QEMU make the decision.
>>     
>
> The allocation code could be moved out into a library, and libvirt could
> link with it (ducks).
>   

Why does libvirt want to do allocation?

Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15 14:37                                                               ` Michael S. Tsirkin
  (?)
@ 2009-06-15 15:03                                                               ` Anthony Liguori
  -1 siblings, 0 replies; 457+ messages in thread
From: Anthony Liguori @ 2009-06-15 15:03 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Carsten Otte, kvm, Mark McLoughlin, Glauber Costa, qemu-devel,
	virtualization, Blue Swirl, Christian Borntraeger, Avi Kivity,
	Paul Brook

Michael S. Tsirkin wrote:
>> I'm not at all arguing against pci_addr.  I'm arguing about how libvirt  
>> should use it with respect to the "genesis" use-case where libvirt has  
>> no specific reason to choose one PCI slot over another.  In that case,  
>> I'm merely advocating that we want to let QEMU make the decision.
>>     
>
> The allocation code could be moved out into a library, and libvirt could
> link with it (ducks).
>   

Why does libvirt want to do allocation?

Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15 14:24                                                             ` Anthony Liguori
@ 2009-06-15 15:05                                                               ` Avi Kivity
  -1 siblings, 0 replies; 457+ messages in thread
From: Avi Kivity @ 2009-06-15 15:05 UTC (permalink / raw)
  To: Anthony Liguori
  Cc: dlaor, Carsten Otte, Rusty Russell, kvm, Mark McLoughlin,
	Glauber Costa, Michael S. Tsirkin, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook

On 06/15/2009 05:24 PM, Anthony Liguori wrote:
> Dor Laor wrote:
>> Libvirt does not support r2d. I hope it won't start to support it.
>
> It supports mips, sparc, and ppc machines now.  I don't see why it 
> wouldn't support r2d.  For ppcemb, I expect this same problem to 
> occur.  This sort of restriction is going to be common with embedded 
> boards.

I expect these restrictions will have to be known by the management 
application.  Otherwise the users will try invalid configurations only 
to receive errors when they launch them.  GUIs exist to guide users, not 
as an inefficient means of trial-and-error.

>
>> We can have default values for these types of devices or something 
>> like pci_addr=auto.
>
> Why wouldn't libvirt always use pci_addr=auto?  If the only argument 
> for having libvirt do pci slot allocation is error messages, can't we 
> find a nice way to allow libvirt to create friendly error messages 
> when QEMU fails?

Error messages are not the only argument for pushing slot allocation to 
management.  See my previous messages on the topic.

>>> If you let QEMU allocate which PCI slot a device goes in, we can 
>>> hide this detail from libvirt.  If you have libvirt do PCI slot 
>>> allocation by default, it has to know about this restriction in the 
>>> r2d board unless you have a clever way to express this sort of 
>>> information.
>>>
>>> Once QEMU has allocated a device to a slot, libvirt can do a good 
>>> job maintaining that relationship.
>>>
>>
>> The end user should have a mechanism to control device slot 
>> positioning. For example, if you have several pci devices, some
>> get high rate of interrupts and some not, if you want to optimize you 
>> guest you should isolate the high rate 'interesting' devices.
>> This is something the user will need to do. I agree that the default 
>> behavior might be 'auto'
>
> I'm not at all arguing against pci_addr.  I'm arguing about how 
> libvirt should use it with respect to the "genesis" use-case where 
> libvirt has no specific reason to choose one PCI slot over another.  
> In that case, I'm merely advocating that we want to let QEMU make the 
> decision.


However this may end up, isn't it offtopic?  Whatever we do we have to 
support both pci_addr= and default placement, so we can push this 
discussion to livirt-devel and bid them godspeed.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
@ 2009-06-15 15:05                                                               ` Avi Kivity
  0 siblings, 0 replies; 457+ messages in thread
From: Avi Kivity @ 2009-06-15 15:05 UTC (permalink / raw)
  To: Anthony Liguori
  Cc: Carsten Otte, dlaor, kvm, Mark McLoughlin, Glauber Costa,
	Rusty Russell, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Michael S. Tsirkin, Paul Brook

On 06/15/2009 05:24 PM, Anthony Liguori wrote:
> Dor Laor wrote:
>> Libvirt does not support r2d. I hope it won't start to support it.
>
> It supports mips, sparc, and ppc machines now.  I don't see why it 
> wouldn't support r2d.  For ppcemb, I expect this same problem to 
> occur.  This sort of restriction is going to be common with embedded 
> boards.

I expect these restrictions will have to be known by the management 
application.  Otherwise the users will try invalid configurations only 
to receive errors when they launch them.  GUIs exist to guide users, not 
as an inefficient means of trial-and-error.

>
>> We can have default values for these types of devices or something 
>> like pci_addr=auto.
>
> Why wouldn't libvirt always use pci_addr=auto?  If the only argument 
> for having libvirt do pci slot allocation is error messages, can't we 
> find a nice way to allow libvirt to create friendly error messages 
> when QEMU fails?

Error messages are not the only argument for pushing slot allocation to 
management.  See my previous messages on the topic.

>>> If you let QEMU allocate which PCI slot a device goes in, we can 
>>> hide this detail from libvirt.  If you have libvirt do PCI slot 
>>> allocation by default, it has to know about this restriction in the 
>>> r2d board unless you have a clever way to express this sort of 
>>> information.
>>>
>>> Once QEMU has allocated a device to a slot, libvirt can do a good 
>>> job maintaining that relationship.
>>>
>>
>> The end user should have a mechanism to control device slot 
>> positioning. For example, if you have several pci devices, some
>> get high rate of interrupts and some not, if you want to optimize you 
>> guest you should isolate the high rate 'interesting' devices.
>> This is something the user will need to do. I agree that the default 
>> behavior might be 'auto'
>
> I'm not at all arguing against pci_addr.  I'm arguing about how 
> libvirt should use it with respect to the "genesis" use-case where 
> libvirt has no specific reason to choose one PCI slot over another.  
> In that case, I'm merely advocating that we want to let QEMU make the 
> decision.


However this may end up, isn't it offtopic?  Whatever we do we have to 
support both pci_addr= and default placement, so we can push this 
discussion to livirt-devel and bid them godspeed.

-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15 14:24                                                             ` Anthony Liguori
                                                                               ` (2 preceding siblings ...)
  (?)
@ 2009-06-15 15:05                                                             ` Avi Kivity
  -1 siblings, 0 replies; 457+ messages in thread
From: Avi Kivity @ 2009-06-15 15:05 UTC (permalink / raw)
  To: Anthony Liguori
  Cc: Carsten Otte, kvm, Mark McLoughlin, Glauber Costa, qemu-devel,
	virtualization, Blue Swirl, Christian Borntraeger,
	Michael S. Tsirkin, Paul Brook

On 06/15/2009 05:24 PM, Anthony Liguori wrote:
> Dor Laor wrote:
>> Libvirt does not support r2d. I hope it won't start to support it.
>
> It supports mips, sparc, and ppc machines now.  I don't see why it 
> wouldn't support r2d.  For ppcemb, I expect this same problem to 
> occur.  This sort of restriction is going to be common with embedded 
> boards.

I expect these restrictions will have to be known by the management 
application.  Otherwise the users will try invalid configurations only 
to receive errors when they launch them.  GUIs exist to guide users, not 
as an inefficient means of trial-and-error.

>
>> We can have default values for these types of devices or something 
>> like pci_addr=auto.
>
> Why wouldn't libvirt always use pci_addr=auto?  If the only argument 
> for having libvirt do pci slot allocation is error messages, can't we 
> find a nice way to allow libvirt to create friendly error messages 
> when QEMU fails?

Error messages are not the only argument for pushing slot allocation to 
management.  See my previous messages on the topic.

>>> If you let QEMU allocate which PCI slot a device goes in, we can 
>>> hide this detail from libvirt.  If you have libvirt do PCI slot 
>>> allocation by default, it has to know about this restriction in the 
>>> r2d board unless you have a clever way to express this sort of 
>>> information.
>>>
>>> Once QEMU has allocated a device to a slot, libvirt can do a good 
>>> job maintaining that relationship.
>>>
>>
>> The end user should have a mechanism to control device slot 
>> positioning. For example, if you have several pci devices, some
>> get high rate of interrupts and some not, if you want to optimize you 
>> guest you should isolate the high rate 'interesting' devices.
>> This is something the user will need to do. I agree that the default 
>> behavior might be 'auto'
>
> I'm not at all arguing against pci_addr.  I'm arguing about how 
> libvirt should use it with respect to the "genesis" use-case where 
> libvirt has no specific reason to choose one PCI slot over another.  
> In that case, I'm merely advocating that we want to let QEMU make the 
> decision.


However this may end up, isn't it offtopic?  Whatever we do we have to 
support both pci_addr= and default placement, so we can push this 
discussion to livirt-devel and bid them godspeed.

-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15 13:54                                             ` Avi Kivity
@ 2009-06-15 15:07                                               ` Anthony Liguori
  -1 siblings, 0 replies; 457+ messages in thread
From: Anthony Liguori @ 2009-06-15 15:07 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Michael S. Tsirkin, Mark McLoughlin, Jamie Lokier, Carsten Otte,
	kvm, Glauber Costa, Rusty Russell, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook

Avi Kivity wrote:
>
> I'd object to any implicit addressing rules.  If we have to say 
> target=2,lun=7,street=8,city=9,state=99,zip=12345 instead of 
> index=8345345235 so be it.

The next observation is that while we expand the SCSI addressing, the 
current propose flattens the PCI hierarchy (i.e. pci_addr=00:01.0).

An alternative would be to either always expand or always flatten 
addressing.  I think the later has a lot of merit.  Consider:

-controller type=lsi1234,addr=00:01,name=blah
-controller-disk controller=blah,addr=00:01,name=sda

-controller type=ide,addr=00.02,name=ide
-controller-disk controller=ide,addr=3,name=hdd

-drive file=foo.img,controller-disk=sda
-drive file=bar.img,controller-disk=hdd

This means that addr's format depends on the parent device node which is 
a bit less explicit than the previous example.  However, it is much more 
consistent and easier to implement.  Basically, when adding a device to 
it's parent, you hand the parent the "addr" field and that lets you say 
where you want to sit on the bus.

Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
@ 2009-06-15 15:07                                               ` Anthony Liguori
  0 siblings, 0 replies; 457+ messages in thread
From: Anthony Liguori @ 2009-06-15 15:07 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Carsten Otte, Rusty Russell, kvm, Mark McLoughlin, Glauber Costa,
	Michael S. Tsirkin, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Paul Brook

Avi Kivity wrote:
>
> I'd object to any implicit addressing rules.  If we have to say 
> target=2,lun=7,street=8,city=9,state=99,zip=12345 instead of 
> index=8345345235 so be it.

The next observation is that while we expand the SCSI addressing, the 
current propose flattens the PCI hierarchy (i.e. pci_addr=00:01.0).

An alternative would be to either always expand or always flatten 
addressing.  I think the later has a lot of merit.  Consider:

-controller type=lsi1234,addr=00:01,name=blah
-controller-disk controller=blah,addr=00:01,name=sda

-controller type=ide,addr=00.02,name=ide
-controller-disk controller=ide,addr=3,name=hdd

-drive file=foo.img,controller-disk=sda
-drive file=bar.img,controller-disk=hdd

This means that addr's format depends on the parent device node which is 
a bit less explicit than the previous example.  However, it is much more 
consistent and easier to implement.  Basically, when adding a device to 
it's parent, you hand the parent the "addr" field and that lets you say 
where you want to sit on the bus.

Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15 13:54                                             ` Avi Kivity
  (?)
@ 2009-06-15 15:07                                             ` Anthony Liguori
  -1 siblings, 0 replies; 457+ messages in thread
From: Anthony Liguori @ 2009-06-15 15:07 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Carsten Otte, kvm, Mark McLoughlin, Glauber Costa,
	Michael S. Tsirkin, Jamie Lokier, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook

Avi Kivity wrote:
>
> I'd object to any implicit addressing rules.  If we have to say 
> target=2,lun=7,street=8,city=9,state=99,zip=12345 instead of 
> index=8345345235 so be it.

The next observation is that while we expand the SCSI addressing, the 
current propose flattens the PCI hierarchy (i.e. pci_addr=00:01.0).

An alternative would be to either always expand or always flatten 
addressing.  I think the later has a lot of merit.  Consider:

-controller type=lsi1234,addr=00:01,name=blah
-controller-disk controller=blah,addr=00:01,name=sda

-controller type=ide,addr=00.02,name=ide
-controller-disk controller=ide,addr=3,name=hdd

-drive file=foo.img,controller-disk=sda
-drive file=bar.img,controller-disk=hdd

This means that addr's format depends on the parent device node which is 
a bit less explicit than the previous example.  However, it is much more 
consistent and easier to implement.  Basically, when adding a device to 
it's parent, you hand the parent the "addr" field and that lets you say 
where you want to sit on the bus.

Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15 15:03                                                                 ` Anthony Liguori
@ 2009-06-15 15:08                                                                   ` Daniel P. Berrange
  -1 siblings, 0 replies; 457+ messages in thread
From: Daniel P. Berrange @ 2009-06-15 15:08 UTC (permalink / raw)
  To: Anthony Liguori
  Cc: Michael S. Tsirkin, dlaor, Avi Kivity, Carsten Otte,
	Rusty Russell, kvm, Mark McLoughlin, Glauber Costa, qemu-devel,
	virtualization, Blue Swirl, Christian Borntraeger, Paul Brook

On Mon, Jun 15, 2009 at 10:03:22AM -0500, Anthony Liguori wrote:
> Michael S. Tsirkin wrote:
> >>I'm not at all arguing against pci_addr.  I'm arguing about how libvirt  
> >>should use it with respect to the "genesis" use-case where libvirt has  
> >>no specific reason to choose one PCI slot over another.  In that case,  
> >>I'm merely advocating that we want to let QEMU make the decision.
> >>    
> >
> >The allocation code could be moved out into a library, and libvirt could
> >link with it (ducks).
> >  
> 
> Why does libvirt want to do allocation?

It doesn't want to. As Mark said, libvirt just wants to be able to ensure
a stable guest ABI, of which stable PCI addresses is one aspect. This does
not imply libvirt wants to allocate the PCI addresses, just that it wants
a way to keep them stable. All else being equal I'd rather libvirt wasn't
in the PCI address allocation business.


Regards,
Daniel
-- 
|: Red Hat, Engineering, London   -o-   http://people.redhat.com/berrange/ :|
|: http://libvirt.org  -o-  http://virt-manager.org  -o-  http://ovirt.org :|
|: http://autobuild.org       -o-         http://search.cpan.org/~danberr/ :|
|: GnuPG: 7D3B9505  -o-  F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 :|

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
@ 2009-06-15 15:08                                                                   ` Daniel P. Berrange
  0 siblings, 0 replies; 457+ messages in thread
From: Daniel P. Berrange @ 2009-06-15 15:08 UTC (permalink / raw)
  To: Anthony Liguori
  Cc: Carsten Otte, dlaor, kvm, Michael S. Tsirkin, Glauber Costa,
	Rusty Russell, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Avi Kivity, Mark McLoughlin, Paul Brook

On Mon, Jun 15, 2009 at 10:03:22AM -0500, Anthony Liguori wrote:
> Michael S. Tsirkin wrote:
> >>I'm not at all arguing against pci_addr.  I'm arguing about how libvirt  
> >>should use it with respect to the "genesis" use-case where libvirt has  
> >>no specific reason to choose one PCI slot over another.  In that case,  
> >>I'm merely advocating that we want to let QEMU make the decision.
> >>    
> >
> >The allocation code could be moved out into a library, and libvirt could
> >link with it (ducks).
> >  
> 
> Why does libvirt want to do allocation?

It doesn't want to. As Mark said, libvirt just wants to be able to ensure
a stable guest ABI, of which stable PCI addresses is one aspect. This does
not imply libvirt wants to allocate the PCI addresses, just that it wants
a way to keep them stable. All else being equal I'd rather libvirt wasn't
in the PCI address allocation business.


Regards,
Daniel
-- 
|: Red Hat, Engineering, London   -o-   http://people.redhat.com/berrange/ :|
|: http://libvirt.org  -o-  http://virt-manager.org  -o-  http://ovirt.org :|
|: http://autobuild.org       -o-         http://search.cpan.org/~danberr/ :|
|: GnuPG: 7D3B9505  -o-  F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 :|

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15 15:03                                                                 ` Anthony Liguori
  (?)
@ 2009-06-15 15:08                                                                 ` Daniel P. Berrange
  -1 siblings, 0 replies; 457+ messages in thread
From: Daniel P. Berrange @ 2009-06-15 15:08 UTC (permalink / raw)
  To: Anthony Liguori
  Cc: Carsten Otte, kvm, Michael S. Tsirkin, Glauber Costa, qemu-devel,
	virtualization, Blue Swirl, Christian Borntraeger, Avi Kivity,
	Mark McLoughlin, Paul Brook

On Mon, Jun 15, 2009 at 10:03:22AM -0500, Anthony Liguori wrote:
> Michael S. Tsirkin wrote:
> >>I'm not at all arguing against pci_addr.  I'm arguing about how libvirt  
> >>should use it with respect to the "genesis" use-case where libvirt has  
> >>no specific reason to choose one PCI slot over another.  In that case,  
> >>I'm merely advocating that we want to let QEMU make the decision.
> >>    
> >
> >The allocation code could be moved out into a library, and libvirt could
> >link with it (ducks).
> >  
> 
> Why does libvirt want to do allocation?

It doesn't want to. As Mark said, libvirt just wants to be able to ensure
a stable guest ABI, of which stable PCI addresses is one aspect. This does
not imply libvirt wants to allocate the PCI addresses, just that it wants
a way to keep them stable. All else being equal I'd rather libvirt wasn't
in the PCI address allocation business.


Regards,
Daniel
-- 
|: Red Hat, Engineering, London   -o-   http://people.redhat.com/berrange/ :|
|: http://libvirt.org  -o-  http://virt-manager.org  -o-  http://ovirt.org :|
|: http://autobuild.org       -o-         http://search.cpan.org/~danberr/ :|
|: GnuPG: 7D3B9505  -o-  F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 :|

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15 14:34                                           ` Michael S. Tsirkin
@ 2009-06-15 15:11                                             ` Anthony Liguori
  -1 siblings, 0 replies; 457+ messages in thread
From: Anthony Liguori @ 2009-06-15 15:11 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Mark McLoughlin, Avi Kivity, Jamie Lokier, Carsten Otte, kvm,
	Glauber Costa, Rusty Russell, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook

Michael S. Tsirkin wrote:
> And then pc-virtio-class-other-with-balloon-without-usb? Wouldn't it be
> more straightforward to have capability bits which can be switched on
> and off independently rather than trying to fit unrelated features into
> a machine type?  IMO it only seems more work at first, and QA gets a bit
> nervious that they can't exhaustively test all options. But in the long
> run it simplifies things as you don't have to set policy and invent
> silly names.
>   

We're strictly talking about default machine configs.  That has nothing 
to do with capabilities.  You still need to know what the default set of 
enabled capabilities were and keep track of that.  All that I'm 
suggesting is that we use the machine name to collapse the default set 
of capabilities into something that libvirt can track.

The advantage of using something more opaque like that is that it 
simplifies things for management tools as they don't have to keep track 
of "capabilities" that we're adding.  Heck, you could even do:

pc-00000034

Where "pc-%08x" % (capabilities) :-)

Regards,

Anthony Liguori


^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
@ 2009-06-15 15:11                                             ` Anthony Liguori
  0 siblings, 0 replies; 457+ messages in thread
From: Anthony Liguori @ 2009-06-15 15:11 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Mark McLoughlin, kvm, Carsten Otte, Glauber Costa, Rusty Russell,
	qemu-devel, virtualization, Blue Swirl, Christian Borntraeger,
	Avi Kivity, Paul Brook

Michael S. Tsirkin wrote:
> And then pc-virtio-class-other-with-balloon-without-usb? Wouldn't it be
> more straightforward to have capability bits which can be switched on
> and off independently rather than trying to fit unrelated features into
> a machine type?  IMO it only seems more work at first, and QA gets a bit
> nervious that they can't exhaustively test all options. But in the long
> run it simplifies things as you don't have to set policy and invent
> silly names.
>   

We're strictly talking about default machine configs.  That has nothing 
to do with capabilities.  You still need to know what the default set of 
enabled capabilities were and keep track of that.  All that I'm 
suggesting is that we use the machine name to collapse the default set 
of capabilities into something that libvirt can track.

The advantage of using something more opaque like that is that it 
simplifies things for management tools as they don't have to keep track 
of "capabilities" that we're adding.  Heck, you could even do:

pc-00000034

Where "pc-%08x" % (capabilities) :-)

Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15 14:34                                           ` Michael S. Tsirkin
  (?)
  (?)
@ 2009-06-15 15:11                                           ` Anthony Liguori
  -1 siblings, 0 replies; 457+ messages in thread
From: Anthony Liguori @ 2009-06-15 15:11 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Mark McLoughlin, kvm, Carsten Otte, Glauber Costa, Jamie Lokier,
	qemu-devel, virtualization, Blue Swirl, Christian Borntraeger,
	Avi Kivity, Paul Brook

Michael S. Tsirkin wrote:
> And then pc-virtio-class-other-with-balloon-without-usb? Wouldn't it be
> more straightforward to have capability bits which can be switched on
> and off independently rather than trying to fit unrelated features into
> a machine type?  IMO it only seems more work at first, and QA gets a bit
> nervious that they can't exhaustively test all options. But in the long
> run it simplifies things as you don't have to set policy and invent
> silly names.
>   

We're strictly talking about default machine configs.  That has nothing 
to do with capabilities.  You still need to know what the default set of 
enabled capabilities were and keep track of that.  All that I'm 
suggesting is that we use the machine name to collapse the default set 
of capabilities into something that libvirt can track.

The advantage of using something more opaque like that is that it 
simplifies things for management tools as they don't have to keep track 
of "capabilities" that we're adding.  Heck, you could even do:

pc-00000034

Where "pc-%08x" % (capabilities) :-)

Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15 15:07                                               ` Anthony Liguori
@ 2009-06-15 15:11                                                 ` Avi Kivity
  -1 siblings, 0 replies; 457+ messages in thread
From: Avi Kivity @ 2009-06-15 15:11 UTC (permalink / raw)
  To: Anthony Liguori
  Cc: Michael S. Tsirkin, Mark McLoughlin, Jamie Lokier, Carsten Otte,
	kvm, Glauber Costa, Rusty Russell, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook

On 06/15/2009 06:07 PM, Anthony Liguori wrote:
> Avi Kivity wrote:
>>
>> I'd object to any implicit addressing rules.  If we have to say 
>> target=2,lun=7,street=8,city=9,state=99,zip=12345 instead of 
>> index=8345345235 so be it.
>
> The next observation is that while we expand the SCSI addressing, the 
> current propose flattens the PCI hierarchy (i.e. pci_addr=00:01.0).
>
> An alternative would be to either always expand or always flatten 
> addressing.  I think the later has a lot of merit.  Consider:
>
> -controller type=lsi1234,addr=00:01,name=blah
> -controller-disk controller=blah,addr=00:01,name=sda
>
> -controller type=ide,addr=00.02,name=ide
> -controller-disk controller=ide,addr=3,name=hdd
>
> -drive file=foo.img,controller-disk=sda
> -drive file=bar.img,controller-disk=hdd
>
> This means that addr's format depends on the parent device node which 
> is a bit less explicit than the previous example.  However, it is much 
> more consistent and easier to implement.  Basically, when adding a 
> device to it's parent, you hand the parent the "addr" field and that 
> lets you say where you want to sit on the bus.

I would prefer explicit names (pci_addr, lun, etc.) but would be okay 
with generic names too.

There's value in sticking to well-understood names and address formats.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
@ 2009-06-15 15:11                                                 ` Avi Kivity
  0 siblings, 0 replies; 457+ messages in thread
From: Avi Kivity @ 2009-06-15 15:11 UTC (permalink / raw)
  To: Anthony Liguori
  Cc: Carsten Otte, Rusty Russell, kvm, Mark McLoughlin, Glauber Costa,
	Michael S. Tsirkin, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Paul Brook

On 06/15/2009 06:07 PM, Anthony Liguori wrote:
> Avi Kivity wrote:
>>
>> I'd object to any implicit addressing rules.  If we have to say 
>> target=2,lun=7,street=8,city=9,state=99,zip=12345 instead of 
>> index=8345345235 so be it.
>
> The next observation is that while we expand the SCSI addressing, the 
> current propose flattens the PCI hierarchy (i.e. pci_addr=00:01.0).
>
> An alternative would be to either always expand or always flatten 
> addressing.  I think the later has a lot of merit.  Consider:
>
> -controller type=lsi1234,addr=00:01,name=blah
> -controller-disk controller=blah,addr=00:01,name=sda
>
> -controller type=ide,addr=00.02,name=ide
> -controller-disk controller=ide,addr=3,name=hdd
>
> -drive file=foo.img,controller-disk=sda
> -drive file=bar.img,controller-disk=hdd
>
> This means that addr's format depends on the parent device node which 
> is a bit less explicit than the previous example.  However, it is much 
> more consistent and easier to implement.  Basically, when adding a 
> device to it's parent, you hand the parent the "addr" field and that 
> lets you say where you want to sit on the bus.

I would prefer explicit names (pci_addr, lun, etc.) but would be okay 
with generic names too.

There's value in sticking to well-understood names and address formats.

-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15 15:07                                               ` Anthony Liguori
  (?)
  (?)
@ 2009-06-15 15:11                                               ` Avi Kivity
  -1 siblings, 0 replies; 457+ messages in thread
From: Avi Kivity @ 2009-06-15 15:11 UTC (permalink / raw)
  To: Anthony Liguori
  Cc: Carsten Otte, kvm, Mark McLoughlin, Glauber Costa,
	Michael S. Tsirkin, Jamie Lokier, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook

On 06/15/2009 06:07 PM, Anthony Liguori wrote:
> Avi Kivity wrote:
>>
>> I'd object to any implicit addressing rules.  If we have to say 
>> target=2,lun=7,street=8,city=9,state=99,zip=12345 instead of 
>> index=8345345235 so be it.
>
> The next observation is that while we expand the SCSI addressing, the 
> current propose flattens the PCI hierarchy (i.e. pci_addr=00:01.0).
>
> An alternative would be to either always expand or always flatten 
> addressing.  I think the later has a lot of merit.  Consider:
>
> -controller type=lsi1234,addr=00:01,name=blah
> -controller-disk controller=blah,addr=00:01,name=sda
>
> -controller type=ide,addr=00.02,name=ide
> -controller-disk controller=ide,addr=3,name=hdd
>
> -drive file=foo.img,controller-disk=sda
> -drive file=bar.img,controller-disk=hdd
>
> This means that addr's format depends on the parent device node which 
> is a bit less explicit than the previous example.  However, it is much 
> more consistent and easier to implement.  Basically, when adding a 
> device to it's parent, you hand the parent the "addr" field and that 
> lets you say where you want to sit on the bus.

I would prefer explicit names (pci_addr, lun, etc.) but would be okay 
with generic names too.

There's value in sticking to well-understood names and address formats.

-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15 15:05                                                               ` Avi Kivity
@ 2009-06-15 15:11                                                                 ` Anthony Liguori
  -1 siblings, 0 replies; 457+ messages in thread
From: Anthony Liguori @ 2009-06-15 15:11 UTC (permalink / raw)
  To: Avi Kivity
  Cc: dlaor, Carsten Otte, Rusty Russell, kvm, Mark McLoughlin,
	Glauber Costa, Michael S. Tsirkin, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook

Avi Kivity wrote:
> However this may end up, isn't it offtopic?  Whatever we do we have to 
> support both pci_addr= and default placement, so we can push this 
> discussion to livirt-devel and bid them godspeed.

I'm not sure how we got here but yeah, let's table this part of the 
discussion.

Regards,

Anthony Liguori


^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
@ 2009-06-15 15:11                                                                 ` Anthony Liguori
  0 siblings, 0 replies; 457+ messages in thread
From: Anthony Liguori @ 2009-06-15 15:11 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Carsten Otte, dlaor, kvm, Mark McLoughlin, Glauber Costa,
	Rusty Russell, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Michael S. Tsirkin, Paul Brook

Avi Kivity wrote:
> However this may end up, isn't it offtopic?  Whatever we do we have to 
> support both pci_addr= and default placement, so we can push this 
> discussion to livirt-devel and bid them godspeed.

I'm not sure how we got here but yeah, let's table this part of the 
discussion.

Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15 15:05                                                               ` Avi Kivity
  (?)
@ 2009-06-15 15:11                                                               ` Anthony Liguori
  -1 siblings, 0 replies; 457+ messages in thread
From: Anthony Liguori @ 2009-06-15 15:11 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Carsten Otte, kvm, Mark McLoughlin, Glauber Costa, qemu-devel,
	virtualization, Blue Swirl, Christian Borntraeger,
	Michael S. Tsirkin, Paul Brook

Avi Kivity wrote:
> However this may end up, isn't it offtopic?  Whatever we do we have to 
> support both pci_addr= and default placement, so we can push this 
> discussion to livirt-devel and bid them godspeed.

I'm not sure how we got here but yeah, let's table this part of the 
discussion.

Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15 15:08                                                                   ` Daniel P. Berrange
@ 2009-06-15 15:12                                                                     ` Dor Laor
  -1 siblings, 0 replies; 457+ messages in thread
From: Dor Laor @ 2009-06-15 15:12 UTC (permalink / raw)
  To: Daniel P. Berrange
  Cc: Anthony Liguori, Michael S. Tsirkin, Avi Kivity, Carsten Otte,
	Rusty Russell, kvm, Mark McLoughlin, Glauber Costa, qemu-devel,
	virtualization, Blue Swirl, Christian Borntraeger, Paul Brook

Daniel P. Berrange wrote:
> On Mon, Jun 15, 2009 at 10:03:22AM -0500, Anthony Liguori wrote:
>   
>> Michael S. Tsirkin wrote:
>>     
>>>> I'm not at all arguing against pci_addr.  I'm arguing about how libvirt  
>>>> should use it with respect to the "genesis" use-case where libvirt has  
>>>> no specific reason to choose one PCI slot over another.  In that case,  
>>>> I'm merely advocating that we want to let QEMU make the decision.
>>>>    
>>>>         
>>> The allocation code could be moved out into a library, and libvirt could
>>> link with it (ducks).
>>>  
>>>       
>> Why does libvirt want to do allocation?
>>     
>
> It doesn't want to. As Mark said, libvirt just wants to be able to ensure
> a stable guest ABI, of which stable PCI addresses is one aspect. This does
> not imply libvirt wants to allocate the PCI addresses, just that it wants
> a way to keep them stable. All else being equal I'd rather libvirt wasn't
> in the PCI address allocation business.
>   

It's not about what libvirt wants. It's about what will serve the end 
user the most.
Apart for stable guest ABI, end users need to have the option to control 
the slot for
their devices. Just like them have for physical machines. It's not 
theoretical discussion,
limiting issues with shared irq is one real life example.

Thanks, dor


^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
@ 2009-06-15 15:12                                                                     ` Dor Laor
  0 siblings, 0 replies; 457+ messages in thread
From: Dor Laor @ 2009-06-15 15:12 UTC (permalink / raw)
  To: Daniel P. Berrange
  Cc: Carsten Otte, kvm, Michael S. Tsirkin, Glauber Costa,
	Rusty Russell, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Avi Kivity, Mark McLoughlin, Paul Brook

Daniel P. Berrange wrote:
> On Mon, Jun 15, 2009 at 10:03:22AM -0500, Anthony Liguori wrote:
>   
>> Michael S. Tsirkin wrote:
>>     
>>>> I'm not at all arguing against pci_addr.  I'm arguing about how libvirt  
>>>> should use it with respect to the "genesis" use-case where libvirt has  
>>>> no specific reason to choose one PCI slot over another.  In that case,  
>>>> I'm merely advocating that we want to let QEMU make the decision.
>>>>    
>>>>         
>>> The allocation code could be moved out into a library, and libvirt could
>>> link with it (ducks).
>>>  
>>>       
>> Why does libvirt want to do allocation?
>>     
>
> It doesn't want to. As Mark said, libvirt just wants to be able to ensure
> a stable guest ABI, of which stable PCI addresses is one aspect. This does
> not imply libvirt wants to allocate the PCI addresses, just that it wants
> a way to keep them stable. All else being equal I'd rather libvirt wasn't
> in the PCI address allocation business.
>   

It's not about what libvirt wants. It's about what will serve the end 
user the most.
Apart for stable guest ABI, end users need to have the option to control 
the slot for
their devices. Just like them have for physical machines. It's not 
theoretical discussion,
limiting issues with shared irq is one real life example.

Thanks, dor

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15 15:08                                                                   ` Daniel P. Berrange
  (?)
@ 2009-06-15 15:12                                                                   ` Dor Laor
  -1 siblings, 0 replies; 457+ messages in thread
From: Dor Laor @ 2009-06-15 15:12 UTC (permalink / raw)
  To: Daniel P. Berrange
  Cc: Carsten Otte, kvm, Michael S. Tsirkin, Glauber Costa, qemu-devel,
	virtualization, Blue Swirl, Christian Borntraeger, Avi Kivity,
	Anthony Liguori, Mark McLoughlin, Paul Brook

Daniel P. Berrange wrote:
> On Mon, Jun 15, 2009 at 10:03:22AM -0500, Anthony Liguori wrote:
>   
>> Michael S. Tsirkin wrote:
>>     
>>>> I'm not at all arguing against pci_addr.  I'm arguing about how libvirt  
>>>> should use it with respect to the "genesis" use-case where libvirt has  
>>>> no specific reason to choose one PCI slot over another.  In that case,  
>>>> I'm merely advocating that we want to let QEMU make the decision.
>>>>    
>>>>         
>>> The allocation code could be moved out into a library, and libvirt could
>>> link with it (ducks).
>>>  
>>>       
>> Why does libvirt want to do allocation?
>>     
>
> It doesn't want to. As Mark said, libvirt just wants to be able to ensure
> a stable guest ABI, of which stable PCI addresses is one aspect. This does
> not imply libvirt wants to allocate the PCI addresses, just that it wants
> a way to keep them stable. All else being equal I'd rather libvirt wasn't
> in the PCI address allocation business.
>   

It's not about what libvirt wants. It's about what will serve the end 
user the most.
Apart for stable guest ABI, end users need to have the option to control 
the slot for
their devices. Just like them have for physical machines. It's not 
theoretical discussion,
limiting issues with shared irq is one real life example.

Thanks, dor

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15 15:12                                                                     ` Dor Laor
@ 2009-06-15 15:15                                                                       ` Avi Kivity
  -1 siblings, 0 replies; 457+ messages in thread
From: Avi Kivity @ 2009-06-15 15:15 UTC (permalink / raw)
  To: dlaor
  Cc: Daniel P. Berrange, Anthony Liguori, Michael S. Tsirkin,
	Carsten Otte, Rusty Russell, kvm, Mark McLoughlin, Glauber Costa,
	qemu-devel, virtualization, Blue Swirl, Christian Borntraeger,
	Paul Brook

On 06/15/2009 06:12 PM, Dor Laor wrote:
>> It doesn't want to. As Mark said, libvirt just wants to be able to 
>> ensure
>> a stable guest ABI, of which stable PCI addresses is one aspect. This 
>> does
>> not imply libvirt wants to allocate the PCI addresses, just that it 
>> wants
>> a way to keep them stable. All else being equal I'd rather libvirt 
>> wasn't
>> in the PCI address allocation business.
>
>
> It's not about what libvirt wants. It's about what will serve the end 
> user the most.
> Apart for stable guest ABI, end users need to have the option to 
> control the slot for
> their devices. Just like them have for physical machines. It's not 
> theoretical discussion,
> limiting issues with shared irq is one real life example.
>

Another issue is enumeration.  Guests will present their devices in the 
order they find them on the pci bus (of course enumeration is guest 
specific).  So if I have 2 virtio controllers the only way I can 
distinguish between them is using their pci slots.


-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
@ 2009-06-15 15:15                                                                       ` Avi Kivity
  0 siblings, 0 replies; 457+ messages in thread
From: Avi Kivity @ 2009-06-15 15:15 UTC (permalink / raw)
  To: dlaor
  Cc: Carsten Otte, kvm, Michael S. Tsirkin, Glauber Costa,
	Rusty Russell, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Paul Brook, Mark McLoughlin

On 06/15/2009 06:12 PM, Dor Laor wrote:
>> It doesn't want to. As Mark said, libvirt just wants to be able to 
>> ensure
>> a stable guest ABI, of which stable PCI addresses is one aspect. This 
>> does
>> not imply libvirt wants to allocate the PCI addresses, just that it 
>> wants
>> a way to keep them stable. All else being equal I'd rather libvirt 
>> wasn't
>> in the PCI address allocation business.
>
>
> It's not about what libvirt wants. It's about what will serve the end 
> user the most.
> Apart for stable guest ABI, end users need to have the option to 
> control the slot for
> their devices. Just like them have for physical machines. It's not 
> theoretical discussion,
> limiting issues with shared irq is one real life example.
>

Another issue is enumeration.  Guests will present their devices in the 
order they find them on the pci bus (of course enumeration is guest 
specific).  So if I have 2 virtio controllers the only way I can 
distinguish between them is using their pci slots.


-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15 15:12                                                                     ` Dor Laor
  (?)
@ 2009-06-15 15:15                                                                     ` Avi Kivity
  -1 siblings, 0 replies; 457+ messages in thread
From: Avi Kivity @ 2009-06-15 15:15 UTC (permalink / raw)
  To: dlaor
  Cc: Carsten Otte, Daniel P. Berrange, kvm, Michael S. Tsirkin,
	Glauber Costa, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Paul Brook, Anthony Liguori,
	Mark McLoughlin

On 06/15/2009 06:12 PM, Dor Laor wrote:
>> It doesn't want to. As Mark said, libvirt just wants to be able to 
>> ensure
>> a stable guest ABI, of which stable PCI addresses is one aspect. This 
>> does
>> not imply libvirt wants to allocate the PCI addresses, just that it 
>> wants
>> a way to keep them stable. All else being equal I'd rather libvirt 
>> wasn't
>> in the PCI address allocation business.
>
>
> It's not about what libvirt wants. It's about what will serve the end 
> user the most.
> Apart for stable guest ABI, end users need to have the option to 
> control the slot for
> their devices. Just like them have for physical machines. It's not 
> theoretical discussion,
> limiting issues with shared irq is one real life example.
>

Another issue is enumeration.  Guests will present their devices in the 
order they find them on the pci bus (of course enumeration is guest 
specific).  So if I have 2 virtio controllers the only way I can 
distinguish between them is using their pci slots.


-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15 15:11                                                 ` Avi Kivity
@ 2009-06-15 15:20                                                   ` Anthony Liguori
  -1 siblings, 0 replies; 457+ messages in thread
From: Anthony Liguori @ 2009-06-15 15:20 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Michael S. Tsirkin, Mark McLoughlin, Jamie Lokier, Carsten Otte,
	kvm, Glauber Costa, Rusty Russell, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook

Avi Kivity wrote:
> I would prefer explicit names (pci_addr, lun, etc.) but would be okay 
> with generic names too.

I think having a generic address has a lot of value in terms of code 
implementation.  Otherwise, the valid options for -drive become 
context-sensitive which is going to be annoying and error-prone.  Some 
sanity could be added by using addressing prefixes like addr=pci:00:01.0 
or addr=scsi:0.3 but I'll leave that up to whoever takes this on.

Regards,

Anthony Liguori


^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
@ 2009-06-15 15:20                                                   ` Anthony Liguori
  0 siblings, 0 replies; 457+ messages in thread
From: Anthony Liguori @ 2009-06-15 15:20 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Carsten Otte, Rusty Russell, kvm, Mark McLoughlin, Glauber Costa,
	Michael S. Tsirkin, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Paul Brook

Avi Kivity wrote:
> I would prefer explicit names (pci_addr, lun, etc.) but would be okay 
> with generic names too.

I think having a generic address has a lot of value in terms of code 
implementation.  Otherwise, the valid options for -drive become 
context-sensitive which is going to be annoying and error-prone.  Some 
sanity could be added by using addressing prefixes like addr=pci:00:01.0 
or addr=scsi:0.3 but I'll leave that up to whoever takes this on.

Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15 15:11                                                 ` Avi Kivity
  (?)
@ 2009-06-15 15:20                                                 ` Anthony Liguori
  -1 siblings, 0 replies; 457+ messages in thread
From: Anthony Liguori @ 2009-06-15 15:20 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Carsten Otte, kvm, Mark McLoughlin, Glauber Costa,
	Michael S. Tsirkin, Jamie Lokier, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook

Avi Kivity wrote:
> I would prefer explicit names (pci_addr, lun, etc.) but would be okay 
> with generic names too.

I think having a generic address has a lot of value in terms of code 
implementation.  Otherwise, the valid options for -drive become 
context-sensitive which is going to be annoying and error-prone.  Some 
sanity could be added by using addressing prefixes like addr=pci:00:01.0 
or addr=scsi:0.3 but I'll leave that up to whoever takes this on.

Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15 15:20                                                   ` Anthony Liguori
@ 2009-06-15 15:26                                                     ` Avi Kivity
  -1 siblings, 0 replies; 457+ messages in thread
From: Avi Kivity @ 2009-06-15 15:26 UTC (permalink / raw)
  To: Anthony Liguori
  Cc: Michael S. Tsirkin, Mark McLoughlin, Jamie Lokier, Carsten Otte,
	kvm, Glauber Costa, Rusty Russell, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook

On 06/15/2009 06:20 PM, Anthony Liguori wrote:
> Avi Kivity wrote:
>> I would prefer explicit names (pci_addr, lun, etc.) but would be okay 
>> with generic names too.
>
> I think having a generic address has a lot of value in terms of code 
> implementation.  Otherwise, the valid options for -drive become 
> context-sensitive which is going to be annoying and error-prone.  Some 
> sanity could be added by using addressing prefixes like 
> addr=pci:00:01.0 or addr=scsi:0.3 but I'll leave that up to whoever 
> takes this on.

The code problems are easily solved by adding another level of 
indirection.  User confusion problems are only aggravated by additional 
abstraction, though ("what do I put in addr=, here?").

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
@ 2009-06-15 15:26                                                     ` Avi Kivity
  0 siblings, 0 replies; 457+ messages in thread
From: Avi Kivity @ 2009-06-15 15:26 UTC (permalink / raw)
  To: Anthony Liguori
  Cc: Carsten Otte, Rusty Russell, kvm, Mark McLoughlin, Glauber Costa,
	Michael S. Tsirkin, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Paul Brook

On 06/15/2009 06:20 PM, Anthony Liguori wrote:
> Avi Kivity wrote:
>> I would prefer explicit names (pci_addr, lun, etc.) but would be okay 
>> with generic names too.
>
> I think having a generic address has a lot of value in terms of code 
> implementation.  Otherwise, the valid options for -drive become 
> context-sensitive which is going to be annoying and error-prone.  Some 
> sanity could be added by using addressing prefixes like 
> addr=pci:00:01.0 or addr=scsi:0.3 but I'll leave that up to whoever 
> takes this on.

The code problems are easily solved by adding another level of 
indirection.  User confusion problems are only aggravated by additional 
abstraction, though ("what do I put in addr=, here?").

-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15 15:20                                                   ` Anthony Liguori
  (?)
@ 2009-06-15 15:26                                                   ` Avi Kivity
  -1 siblings, 0 replies; 457+ messages in thread
From: Avi Kivity @ 2009-06-15 15:26 UTC (permalink / raw)
  To: Anthony Liguori
  Cc: Carsten Otte, kvm, Mark McLoughlin, Glauber Costa,
	Michael S. Tsirkin, Jamie Lokier, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook

On 06/15/2009 06:20 PM, Anthony Liguori wrote:
> Avi Kivity wrote:
>> I would prefer explicit names (pci_addr, lun, etc.) but would be okay 
>> with generic names too.
>
> I think having a generic address has a lot of value in terms of code 
> implementation.  Otherwise, the valid options for -drive become 
> context-sensitive which is going to be annoying and error-prone.  Some 
> sanity could be added by using addressing prefixes like 
> addr=pci:00:01.0 or addr=scsi:0.3 but I'll leave that up to whoever 
> takes this on.

The code problems are easily solved by adding another level of 
indirection.  User confusion problems are only aggravated by additional 
abstraction, though ("what do I put in addr=, here?").

-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15 15:05                                                               ` Avi Kivity
@ 2009-06-15 16:27                                                                 ` Mark McLoughlin
  -1 siblings, 0 replies; 457+ messages in thread
From: Mark McLoughlin @ 2009-06-15 16:27 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Anthony Liguori, dlaor, Carsten Otte, Rusty Russell, kvm,
	Glauber Costa, Michael S. Tsirkin, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook

On Mon, 2009-06-15 at 18:05 +0300, Avi Kivity wrote:
> On 06/15/2009 05:24 PM, Anthony Liguori wrote:
> > Dor Laor wrote:
> >> Libvirt does not support r2d. I hope it won't start to support it.
> >
> > It supports mips, sparc, and ppc machines now.  I don't see why it 
> > wouldn't support r2d.  For ppcemb, I expect this same problem to 
> > occur.  This sort of restriction is going to be common with embedded 
> > boards.
> 
> I expect these restrictions will have to be known by the management 
> application.  Otherwise the users will try invalid configurations only 
> to receive errors when they launch them.  GUIs exist to guide users, not 
> as an inefficient means of trial-and-error.

So long as the restrictions would be known to the management app via
some "what slots are available" mechanism in qemu, that sounds fine.

> > I'm not at all arguing against pci_addr.  I'm arguing about how 
> > libvirt should use it with respect to the "genesis" use-case where 
> > libvirt has no specific reason to choose one PCI slot over another.  
> > In that case, I'm merely advocating that we want to let QEMU make the 
> > decision.
> 
> However this may end up, isn't it offtopic?  Whatever we do we have to 
> support both pci_addr= and default placement, so we can push this 
> discussion to livirt-devel and bid them godspeed.

Presumably you're not proposing that qemu-devel completely ignore the
typical requirements of management apps?

You can push the discussion to libvirt-devel, and the conclusion would
most likely be:

  "We can do slot allocation if you provide us with a way to query free 
   slots, or we can use qemu's default allocation if you provide us a
   way to query the allocation.

   We'd prefer the default allocation problem, but we don't really 
   care. Both require about the same amount of work for us."

libvirt was only mentioned in this thread as a concrete example of how
the suggested solutions would actually be used by management apps.

Cheers,
Mark.


^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
@ 2009-06-15 16:27                                                                 ` Mark McLoughlin
  0 siblings, 0 replies; 457+ messages in thread
From: Mark McLoughlin @ 2009-06-15 16:27 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Carsten Otte, dlaor, kvm, Michael S. Tsirkin, Glauber Costa,
	Rusty Russell, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Paul Brook

On Mon, 2009-06-15 at 18:05 +0300, Avi Kivity wrote:
> On 06/15/2009 05:24 PM, Anthony Liguori wrote:
> > Dor Laor wrote:
> >> Libvirt does not support r2d. I hope it won't start to support it.
> >
> > It supports mips, sparc, and ppc machines now.  I don't see why it 
> > wouldn't support r2d.  For ppcemb, I expect this same problem to 
> > occur.  This sort of restriction is going to be common with embedded 
> > boards.
> 
> I expect these restrictions will have to be known by the management 
> application.  Otherwise the users will try invalid configurations only 
> to receive errors when they launch them.  GUIs exist to guide users, not 
> as an inefficient means of trial-and-error.

So long as the restrictions would be known to the management app via
some "what slots are available" mechanism in qemu, that sounds fine.

> > I'm not at all arguing against pci_addr.  I'm arguing about how 
> > libvirt should use it with respect to the "genesis" use-case where 
> > libvirt has no specific reason to choose one PCI slot over another.  
> > In that case, I'm merely advocating that we want to let QEMU make the 
> > decision.
> 
> However this may end up, isn't it offtopic?  Whatever we do we have to 
> support both pci_addr= and default placement, so we can push this 
> discussion to livirt-devel and bid them godspeed.

Presumably you're not proposing that qemu-devel completely ignore the
typical requirements of management apps?

You can push the discussion to libvirt-devel, and the conclusion would
most likely be:

  "We can do slot allocation if you provide us with a way to query free 
   slots, or we can use qemu's default allocation if you provide us a
   way to query the allocation.

   We'd prefer the default allocation problem, but we don't really 
   care. Both require about the same amount of work for us."

libvirt was only mentioned in this thread as a concrete example of how
the suggested solutions would actually be used by management apps.

Cheers,
Mark.

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15 15:05                                                               ` Avi Kivity
                                                                                 ` (3 preceding siblings ...)
  (?)
@ 2009-06-15 16:27                                                               ` Mark McLoughlin
  -1 siblings, 0 replies; 457+ messages in thread
From: Mark McLoughlin @ 2009-06-15 16:27 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Carsten Otte, kvm, Michael S. Tsirkin, Glauber Costa, qemu-devel,
	virtualization, Blue Swirl, Christian Borntraeger, Paul Brook,
	Anthony Liguori

On Mon, 2009-06-15 at 18:05 +0300, Avi Kivity wrote:
> On 06/15/2009 05:24 PM, Anthony Liguori wrote:
> > Dor Laor wrote:
> >> Libvirt does not support r2d. I hope it won't start to support it.
> >
> > It supports mips, sparc, and ppc machines now.  I don't see why it 
> > wouldn't support r2d.  For ppcemb, I expect this same problem to 
> > occur.  This sort of restriction is going to be common with embedded 
> > boards.
> 
> I expect these restrictions will have to be known by the management 
> application.  Otherwise the users will try invalid configurations only 
> to receive errors when they launch them.  GUIs exist to guide users, not 
> as an inefficient means of trial-and-error.

So long as the restrictions would be known to the management app via
some "what slots are available" mechanism in qemu, that sounds fine.

> > I'm not at all arguing against pci_addr.  I'm arguing about how 
> > libvirt should use it with respect to the "genesis" use-case where 
> > libvirt has no specific reason to choose one PCI slot over another.  
> > In that case, I'm merely advocating that we want to let QEMU make the 
> > decision.
> 
> However this may end up, isn't it offtopic?  Whatever we do we have to 
> support both pci_addr= and default placement, so we can push this 
> discussion to livirt-devel and bid them godspeed.

Presumably you're not proposing that qemu-devel completely ignore the
typical requirements of management apps?

You can push the discussion to libvirt-devel, and the conclusion would
most likely be:

  "We can do slot allocation if you provide us with a way to query free 
   slots, or we can use qemu's default allocation if you provide us a
   way to query the allocation.

   We'd prefer the default allocation problem, but we don't really 
   care. Both require about the same amount of work for us."

libvirt was only mentioned in this thread as a concrete example of how
the suggested solutions would actually be used by management apps.

Cheers,
Mark.

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15 15:12                                                                     ` Dor Laor
@ 2009-06-15 16:27                                                                       ` Mark McLoughlin
  -1 siblings, 0 replies; 457+ messages in thread
From: Mark McLoughlin @ 2009-06-15 16:27 UTC (permalink / raw)
  To: dlaor
  Cc: Daniel P. Berrange, Anthony Liguori, Michael S. Tsirkin,
	Avi Kivity, Carsten Otte, Rusty Russell, kvm, Glauber Costa,
	qemu-devel, virtualization, Blue Swirl, Christian Borntraeger,
	Paul Brook

On Mon, 2009-06-15 at 18:12 +0300, Dor Laor wrote:
> > It doesn't want to. As Mark said, libvirt just wants to be able to ensure
> > a stable guest ABI, of which stable PCI addresses is one aspect. This does
> > not imply libvirt wants to allocate the PCI addresses, just that it wants
> > a way to keep them stable. All else being equal I'd rather libvirt wasn't
> > in the PCI address allocation business.
> >   
> 
> It's not about what libvirt wants. It's about what will serve the end 
> user the most.

Absolutely. And not just about what most helps end users of libvirt
based management apps, but also any app managing qemu.

> Apart for stable guest ABI, end users need to have the option to
> control the slot for their devices. Just like them have for physical
> machines. It's not theoretical discussion, limiting issues with shared
> irq is one real life example.

Providing end users with the *option* to choose PCI slots sounds like a
fine feature request for any management app.

Requiring all management apps to force end users to explicitly choose
PCI slots in order for slots to be stable is not so reasonable.

Cheers,
Mark.


^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
@ 2009-06-15 16:27                                                                       ` Mark McLoughlin
  0 siblings, 0 replies; 457+ messages in thread
From: Mark McLoughlin @ 2009-06-15 16:27 UTC (permalink / raw)
  To: dlaor
  Cc: Carsten Otte, kvm, Michael S. Tsirkin, Glauber Costa,
	Rusty Russell, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Avi Kivity, Paul Brook

On Mon, 2009-06-15 at 18:12 +0300, Dor Laor wrote:
> > It doesn't want to. As Mark said, libvirt just wants to be able to ensure
> > a stable guest ABI, of which stable PCI addresses is one aspect. This does
> > not imply libvirt wants to allocate the PCI addresses, just that it wants
> > a way to keep them stable. All else being equal I'd rather libvirt wasn't
> > in the PCI address allocation business.
> >   
> 
> It's not about what libvirt wants. It's about what will serve the end 
> user the most.

Absolutely. And not just about what most helps end users of libvirt
based management apps, but also any app managing qemu.

> Apart for stable guest ABI, end users need to have the option to
> control the slot for their devices. Just like them have for physical
> machines. It's not theoretical discussion, limiting issues with shared
> irq is one real life example.

Providing end users with the *option* to choose PCI slots sounds like a
fine feature request for any management app.

Requiring all management apps to force end users to explicitly choose
PCI slots in order for slots to be stable is not so reasonable.

Cheers,
Mark.

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15 15:12                                                                     ` Dor Laor
                                                                                       ` (2 preceding siblings ...)
  (?)
@ 2009-06-15 16:27                                                                     ` Mark McLoughlin
  -1 siblings, 0 replies; 457+ messages in thread
From: Mark McLoughlin @ 2009-06-15 16:27 UTC (permalink / raw)
  To: dlaor
  Cc: Carsten Otte, Daniel P. Berrange, kvm, Michael S. Tsirkin,
	Glauber Costa, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Avi Kivity, Anthony Liguori, Paul Brook

On Mon, 2009-06-15 at 18:12 +0300, Dor Laor wrote:
> > It doesn't want to. As Mark said, libvirt just wants to be able to ensure
> > a stable guest ABI, of which stable PCI addresses is one aspect. This does
> > not imply libvirt wants to allocate the PCI addresses, just that it wants
> > a way to keep them stable. All else being equal I'd rather libvirt wasn't
> > in the PCI address allocation business.
> >   
> 
> It's not about what libvirt wants. It's about what will serve the end 
> user the most.

Absolutely. And not just about what most helps end users of libvirt
based management apps, but also any app managing qemu.

> Apart for stable guest ABI, end users need to have the option to
> control the slot for their devices. Just like them have for physical
> machines. It's not theoretical discussion, limiting issues with shared
> irq is one real life example.

Providing end users with the *option* to choose PCI slots sounds like a
fine feature request for any management app.

Requiring all management apps to force end users to explicitly choose
PCI slots in order for slots to be stable is not so reasonable.

Cheers,
Mark.

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15 16:27                                                                 ` Mark McLoughlin
@ 2009-06-15 17:09                                                                   ` Avi Kivity
  -1 siblings, 0 replies; 457+ messages in thread
From: Avi Kivity @ 2009-06-15 17:09 UTC (permalink / raw)
  To: Mark McLoughlin
  Cc: Carsten Otte, kvm, Michael S. Tsirkin, Glauber Costa, qemu-devel,
	virtualization, Blue Swirl, Christian Borntraeger, Paul Brook,
	Anthony Liguori


[-- Attachment #1.1: Type: text/plain, Size: 1669 bytes --]

On 06/15/2009 07:27 PM, Mark McLoughlin wrote:
>> However this may end up, isn't it offtopic?  Whatever we do we have to
>> support both pci_addr= and default placement, so we can push this
>> discussion to livirt-devel and bid them godspeed.
>>      
>
> Presumably you're not proposing that qemu-devel completely ignore the
> typical requirements of management apps?
>    

We propose to allow both qemu-allocated slots and user-allocated slots, 
so we're only ignoring the actual decision by the management tool 
providers, not their requirements.

> You can push the discussion to libvirt-devel, and the conclusion would
> most likely be:
>
>    "We can do slot allocation if you provide us with a way to query free
>     slots, or we can use qemu's default allocation if you provide us a
>     way to query the allocation.
>
>     We'd prefer the default allocation problem, but we don't really
>     care. Both require about the same amount of work for us."
>    

Well, they'll find out if they try default allocation.  It's traditional 
to try all the complicated solutions before trying the simplest one, so 
I guess we'll just have to let them.

> libvirt was only mentioned in this thread as a concrete example of how
> the suggested solutions would actually be used by management apps.
>    

True, others will wind up doing things differently.  In fact, I'm a 
little surprised that libvirt is involved, since the place to do 
inventory is in the management app itself (it's true that libvirt also 
maintains its own database, so the line is blurred).

-- 
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.


[-- Attachment #1.2: Type: text/html, Size: 2316 bytes --]

[-- Attachment #2: Type: text/plain, Size: 184 bytes --]

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linux-foundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
@ 2009-06-15 17:09                                                                   ` Avi Kivity
  0 siblings, 0 replies; 457+ messages in thread
From: Avi Kivity @ 2009-06-15 17:09 UTC (permalink / raw)
  To: Mark McLoughlin
  Cc: Carsten Otte, dlaor, kvm, Michael S. Tsirkin, Glauber Costa,
	Rusty Russell, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Paul Brook

[-- Attachment #1: Type: text/plain, Size: 1669 bytes --]

On 06/15/2009 07:27 PM, Mark McLoughlin wrote:
>> However this may end up, isn't it offtopic?  Whatever we do we have to
>> support both pci_addr= and default placement, so we can push this
>> discussion to livirt-devel and bid them godspeed.
>>      
>
> Presumably you're not proposing that qemu-devel completely ignore the
> typical requirements of management apps?
>    

We propose to allow both qemu-allocated slots and user-allocated slots, 
so we're only ignoring the actual decision by the management tool 
providers, not their requirements.

> You can push the discussion to libvirt-devel, and the conclusion would
> most likely be:
>
>    "We can do slot allocation if you provide us with a way to query free
>     slots, or we can use qemu's default allocation if you provide us a
>     way to query the allocation.
>
>     We'd prefer the default allocation problem, but we don't really
>     care. Both require about the same amount of work for us."
>    

Well, they'll find out if they try default allocation.  It's traditional 
to try all the complicated solutions before trying the simplest one, so 
I guess we'll just have to let them.

> libvirt was only mentioned in this thread as a concrete example of how
> the suggested solutions would actually be used by management apps.
>    

True, others will wind up doing things differently.  In fact, I'm a 
little surprised that libvirt is involved, since the place to do 
inventory is in the management app itself (it's true that libvirt also 
maintains its own database, so the line is blurred).

-- 
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.


[-- Attachment #2: Type: text/html, Size: 2316 bytes --]

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15 16:27                                                                       ` Mark McLoughlin
@ 2009-06-15 17:13                                                                         ` Avi Kivity
  -1 siblings, 0 replies; 457+ messages in thread
From: Avi Kivity @ 2009-06-15 17:13 UTC (permalink / raw)
  To: Mark McLoughlin
  Cc: Carsten Otte, Daniel P. Berrange, kvm, Michael S. Tsirkin,
	Glauber Costa, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Paul Brook, Anthony Liguori


[-- Attachment #1.1: Type: text/plain, Size: 749 bytes --]

On 06/15/2009 07:27 PM, Mark McLoughlin wrote:
>
> Providing end users with the *option* to choose PCI slots sounds like a
> fine feature request for any management app.
>
> Requiring all management apps to force end users to explicitly choose
> PCI slots in order for slots to be stable is not so reasonable.
>    

Think any installer's partitioning utility.  It will provide a default 
placement and try to hide it from you.  If you ask, it will let you 
place the partitions yourself.

The management app is the end user's agent.  When we push something 
there, we allow it to choose something, or push the decision further up 
to the user.

-- 
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.


[-- Attachment #1.2: Type: text/html, Size: 1135 bytes --]

[-- Attachment #2: Type: text/plain, Size: 184 bytes --]

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linux-foundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
@ 2009-06-15 17:13                                                                         ` Avi Kivity
  0 siblings, 0 replies; 457+ messages in thread
From: Avi Kivity @ 2009-06-15 17:13 UTC (permalink / raw)
  To: Mark McLoughlin
  Cc: Carsten Otte, dlaor, kvm, Michael S. Tsirkin, Glauber Costa,
	Rusty Russell, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Paul Brook

[-- Attachment #1: Type: text/plain, Size: 749 bytes --]

On 06/15/2009 07:27 PM, Mark McLoughlin wrote:
>
> Providing end users with the *option* to choose PCI slots sounds like a
> fine feature request for any management app.
>
> Requiring all management apps to force end users to explicitly choose
> PCI slots in order for slots to be stable is not so reasonable.
>    

Think any installer's partitioning utility.  It will provide a default 
placement and try to hide it from you.  If you ask, it will let you 
place the partitions yourself.

The management app is the end user's agent.  When we push something 
there, we allow it to choose something, or push the decision further up 
to the user.

-- 
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.


[-- Attachment #2: Type: text/html, Size: 1135 bytes --]

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15 16:27                                                                 ` Mark McLoughlin
@ 2009-06-15 18:12                                                                   ` Anthony Liguori
  -1 siblings, 0 replies; 457+ messages in thread
From: Anthony Liguori @ 2009-06-15 18:12 UTC (permalink / raw)
  To: Mark McLoughlin
  Cc: Avi Kivity, dlaor, Carsten Otte, Rusty Russell, kvm,
	Glauber Costa, Michael S. Tsirkin, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook

Mark McLoughlin wrote:
> So long as the restrictions would be known to the management app via
> some "what slots are available" mechanism in qemu, that sounds fine.
>   

I'm not sure a "what slots are available" mechanism is as straight 
forward as has been claimed.  It doesn't matter though because it's 
orthogonal to the current proposal.

>>> I'm not at all arguing against pci_addr.  I'm arguing about how 
>>> libvirt should use it with respect to the "genesis" use-case where 
>>> libvirt has no specific reason to choose one PCI slot over another.  
>>> In that case, I'm merely advocating that we want to let QEMU make the 
>>> decision.
>>>       
>> However this may end up, isn't it offtopic?  Whatever we do we have to 
>> support both pci_addr= and default placement, so we can push this 
>> discussion to livirt-devel and bid them godspeed.
>>     
>
> Presumably you're not proposing that qemu-devel completely ignore the
> typical requirements of management apps?
>   

This is a happy case where the current proposals allow both usages to 
occur.  Which one libvirt chooses it up to it.

To summarize, I think we have:

1) Introduce addressing to all host device configurations
  - Either in the canonical form "pci_addr=bus:dev.fn or target=3,lun=1" 
or in flattened form "addr=bus:dev.fn or addr=target.lun".  I prefer the 
later form but I think either would be acceptable.

2) Whenever the default machine type changes in a guest-visible way, 
introduce a new machine type
  - Use explicit versions in name: pc-v1, pc-v2 or use more descriptive 
names pc-with-usb
  - Easily transitions to device config files

Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
@ 2009-06-15 18:12                                                                   ` Anthony Liguori
  0 siblings, 0 replies; 457+ messages in thread
From: Anthony Liguori @ 2009-06-15 18:12 UTC (permalink / raw)
  To: Mark McLoughlin
  Cc: Carsten Otte, dlaor, kvm, Michael S. Tsirkin, Glauber Costa,
	Rusty Russell, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Avi Kivity, Paul Brook

Mark McLoughlin wrote:
> So long as the restrictions would be known to the management app via
> some "what slots are available" mechanism in qemu, that sounds fine.
>   

I'm not sure a "what slots are available" mechanism is as straight 
forward as has been claimed.  It doesn't matter though because it's 
orthogonal to the current proposal.

>>> I'm not at all arguing against pci_addr.  I'm arguing about how 
>>> libvirt should use it with respect to the "genesis" use-case where 
>>> libvirt has no specific reason to choose one PCI slot over another.  
>>> In that case, I'm merely advocating that we want to let QEMU make the 
>>> decision.
>>>       
>> However this may end up, isn't it offtopic?  Whatever we do we have to 
>> support both pci_addr= and default placement, so we can push this 
>> discussion to livirt-devel and bid them godspeed.
>>     
>
> Presumably you're not proposing that qemu-devel completely ignore the
> typical requirements of management apps?
>   

This is a happy case where the current proposals allow both usages to 
occur.  Which one libvirt chooses it up to it.

To summarize, I think we have:

1) Introduce addressing to all host device configurations
  - Either in the canonical form "pci_addr=bus:dev.fn or target=3,lun=1" 
or in flattened form "addr=bus:dev.fn or addr=target.lun".  I prefer the 
later form but I think either would be acceptable.

2) Whenever the default machine type changes in a guest-visible way, 
introduce a new machine type
  - Use explicit versions in name: pc-v1, pc-v2 or use more descriptive 
names pc-with-usb
  - Easily transitions to device config files

Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15 16:27                                                                 ` Mark McLoughlin
                                                                                   ` (2 preceding siblings ...)
  (?)
@ 2009-06-15 18:12                                                                 ` Anthony Liguori
  -1 siblings, 0 replies; 457+ messages in thread
From: Anthony Liguori @ 2009-06-15 18:12 UTC (permalink / raw)
  To: Mark McLoughlin
  Cc: Carsten Otte, kvm, Michael S. Tsirkin, Glauber Costa, qemu-devel,
	virtualization, Blue Swirl, Christian Borntraeger, Avi Kivity,
	Paul Brook

Mark McLoughlin wrote:
> So long as the restrictions would be known to the management app via
> some "what slots are available" mechanism in qemu, that sounds fine.
>   

I'm not sure a "what slots are available" mechanism is as straight 
forward as has been claimed.  It doesn't matter though because it's 
orthogonal to the current proposal.

>>> I'm not at all arguing against pci_addr.  I'm arguing about how 
>>> libvirt should use it with respect to the "genesis" use-case where 
>>> libvirt has no specific reason to choose one PCI slot over another.  
>>> In that case, I'm merely advocating that we want to let QEMU make the 
>>> decision.
>>>       
>> However this may end up, isn't it offtopic?  Whatever we do we have to 
>> support both pci_addr= and default placement, so we can push this 
>> discussion to livirt-devel and bid them godspeed.
>>     
>
> Presumably you're not proposing that qemu-devel completely ignore the
> typical requirements of management apps?
>   

This is a happy case where the current proposals allow both usages to 
occur.  Which one libvirt chooses it up to it.

To summarize, I think we have:

1) Introduce addressing to all host device configurations
  - Either in the canonical form "pci_addr=bus:dev.fn or target=3,lun=1" 
or in flattened form "addr=bus:dev.fn or addr=target.lun".  I prefer the 
later form but I think either would be acceptable.

2) Whenever the default machine type changes in a guest-visible way, 
introduce a new machine type
  - Use explicit versions in name: pc-v1, pc-v2 or use more descriptive 
names pc-with-usb
  - Easily transitions to device config files

Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15 18:12                                                                   ` Anthony Liguori
@ 2009-06-15 18:21                                                                     ` Avi Kivity
  -1 siblings, 0 replies; 457+ messages in thread
From: Avi Kivity @ 2009-06-15 18:21 UTC (permalink / raw)
  To: Anthony Liguori
  Cc: Mark McLoughlin, dlaor, Carsten Otte, Rusty Russell, kvm,
	Glauber Costa, Michael S. Tsirkin, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook

On 06/15/2009 09:12 PM, Anthony Liguori wrote:
>
> 2) Whenever the default machine type changes in a guest-visible way, 
> introduce a new machine type

s/whenever/qemu stable release/

>  - Use explicit versions in name: pc-v1, pc-v2

pc-qemu-0.10?

This is similar to a hardware vendor's model number (though they tend to 
change components without changing model numbers, though naughty vendors)

> or use more descriptive names pc-with-usb
>  - Easily transitions to device config files

Combinatorial explosion.  Just use -usb.


-- 
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.


^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
@ 2009-06-15 18:21                                                                     ` Avi Kivity
  0 siblings, 0 replies; 457+ messages in thread
From: Avi Kivity @ 2009-06-15 18:21 UTC (permalink / raw)
  To: Anthony Liguori
  Cc: Mark McLoughlin, dlaor, kvm, Carsten Otte, Glauber Costa,
	Rusty Russell, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Michael S. Tsirkin, Paul Brook

On 06/15/2009 09:12 PM, Anthony Liguori wrote:
>
> 2) Whenever the default machine type changes in a guest-visible way, 
> introduce a new machine type

s/whenever/qemu stable release/

>  - Use explicit versions in name: pc-v1, pc-v2

pc-qemu-0.10?

This is similar to a hardware vendor's model number (though they tend to 
change components without changing model numbers, though naughty vendors)

> or use more descriptive names pc-with-usb
>  - Easily transitions to device config files

Combinatorial explosion.  Just use -usb.


-- 
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15 18:12                                                                   ` Anthony Liguori
  (?)
  (?)
@ 2009-06-15 18:21                                                                   ` Avi Kivity
  -1 siblings, 0 replies; 457+ messages in thread
From: Avi Kivity @ 2009-06-15 18:21 UTC (permalink / raw)
  To: Anthony Liguori
  Cc: Mark McLoughlin, kvm, Carsten Otte, Glauber Costa, qemu-devel,
	virtualization, Blue Swirl, Christian Borntraeger,
	Michael S. Tsirkin, Paul Brook

On 06/15/2009 09:12 PM, Anthony Liguori wrote:
>
> 2) Whenever the default machine type changes in a guest-visible way, 
> introduce a new machine type

s/whenever/qemu stable release/

>  - Use explicit versions in name: pc-v1, pc-v2

pc-qemu-0.10?

This is similar to a hardware vendor's model number (though they tend to 
change components without changing model numbers, though naughty vendors)

> or use more descriptive names pc-with-usb
>  - Easily transitions to device config files

Combinatorial explosion.  Just use -usb.


-- 
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15 18:21                                                                     ` Avi Kivity
@ 2009-06-15 18:24                                                                       ` Anthony Liguori
  -1 siblings, 0 replies; 457+ messages in thread
From: Anthony Liguori @ 2009-06-15 18:24 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Mark McLoughlin, dlaor, Carsten Otte, Rusty Russell, kvm,
	Glauber Costa, Michael S. Tsirkin, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook

Avi Kivity wrote:
> On 06/15/2009 09:12 PM, Anthony Liguori wrote:
>>
>> 2) Whenever the default machine type changes in a guest-visible way, 
>> introduce a new machine type
>
> s/whenever/qemu stable release/
>
>>  - Use explicit versions in name: pc-v1, pc-v2
>
> pc-qemu-0.10?
>
> This is similar to a hardware vendor's model number (though they tend 
> to change components without changing model numbers, though naughty 
> vendors)

Yup, that makes a whole lot of sense.

Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
@ 2009-06-15 18:24                                                                       ` Anthony Liguori
  0 siblings, 0 replies; 457+ messages in thread
From: Anthony Liguori @ 2009-06-15 18:24 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Mark McLoughlin, dlaor, kvm, Carsten Otte, Glauber Costa,
	Rusty Russell, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Michael S. Tsirkin, Paul Brook

Avi Kivity wrote:
> On 06/15/2009 09:12 PM, Anthony Liguori wrote:
>>
>> 2) Whenever the default machine type changes in a guest-visible way, 
>> introduce a new machine type
>
> s/whenever/qemu stable release/
>
>>  - Use explicit versions in name: pc-v1, pc-v2
>
> pc-qemu-0.10?
>
> This is similar to a hardware vendor's model number (though they tend 
> to change components without changing model numbers, though naughty 
> vendors)

Yup, that makes a whole lot of sense.

Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15 18:21                                                                     ` Avi Kivity
  (?)
@ 2009-06-15 18:24                                                                     ` Anthony Liguori
  -1 siblings, 0 replies; 457+ messages in thread
From: Anthony Liguori @ 2009-06-15 18:24 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Mark McLoughlin, kvm, Carsten Otte, Glauber Costa, qemu-devel,
	virtualization, Blue Swirl, Christian Borntraeger,
	Michael S. Tsirkin, Paul Brook

Avi Kivity wrote:
> On 06/15/2009 09:12 PM, Anthony Liguori wrote:
>>
>> 2) Whenever the default machine type changes in a guest-visible way, 
>> introduce a new machine type
>
> s/whenever/qemu stable release/
>
>>  - Use explicit versions in name: pc-v1, pc-v2
>
> pc-qemu-0.10?
>
> This is similar to a hardware vendor's model number (though they tend 
> to change components without changing model numbers, though naughty 
> vendors)

Yup, that makes a whole lot of sense.

Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15 18:21                                                                     ` Avi Kivity
@ 2009-06-15 18:44                                                                       ` Blue Swirl
  -1 siblings, 0 replies; 457+ messages in thread
From: Blue Swirl @ 2009-06-15 18:44 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Mark McLoughlin, kvm, Carsten Otte, Glauber Costa, qemu-devel,
	virtualization, Christian Borntraeger, Michael S. Tsirkin,
	Paul Brook, Anthony Liguori

On 6/15/09, Avi Kivity <avi@redhat.com> wrote:
> On 06/15/2009 09:12 PM, Anthony Liguori wrote:
>
> >
> > 2) Whenever the default machine type changes in a guest-visible way,
> introduce a new machine type
> >
>
>  s/whenever/qemu stable release/
>
>
> >  - Use explicit versions in name: pc-v1, pc-v2
> >
>
>  pc-qemu-0.10?

pc-2009.06? Or given the hardware, should that be pc-1997?

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
@ 2009-06-15 18:44                                                                       ` Blue Swirl
  0 siblings, 0 replies; 457+ messages in thread
From: Blue Swirl @ 2009-06-15 18:44 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Mark McLoughlin, dlaor, kvm, Carsten Otte, Glauber Costa,
	Rusty Russell, qemu-devel, virtualization, Christian Borntraeger,
	Michael S. Tsirkin, Paul Brook

On 6/15/09, Avi Kivity <avi@redhat.com> wrote:
> On 06/15/2009 09:12 PM, Anthony Liguori wrote:
>
> >
> > 2) Whenever the default machine type changes in a guest-visible way,
> introduce a new machine type
> >
>
>  s/whenever/qemu stable release/
>
>
> >  - Use explicit versions in name: pc-v1, pc-v2
> >
>
>  pc-qemu-0.10?

pc-2009.06? Or given the hardware, should that be pc-1997?

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15 18:44                                                                       ` Blue Swirl
@ 2009-06-16  8:56                                                                         ` Avi Kivity
  -1 siblings, 0 replies; 457+ messages in thread
From: Avi Kivity @ 2009-06-16  8:56 UTC (permalink / raw)
  To: Blue Swirl
  Cc: Anthony Liguori, Mark McLoughlin, dlaor, Carsten Otte,
	Rusty Russell, kvm, Glauber Costa, Michael S. Tsirkin,
	qemu-devel, virtualization, Christian Borntraeger, Paul Brook

On 06/15/2009 09:44 PM, Blue Swirl wrote:
>>   pc-qemu-0.10?
>>      
>
> pc-2009.06? Or given the hardware, should that be pc-1997?
>    

pc-qemu-0.10 has the obvious benefit of allowing people to immediately 
know what's the oldest version of qemu that supports it.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
@ 2009-06-16  8:56                                                                         ` Avi Kivity
  0 siblings, 0 replies; 457+ messages in thread
From: Avi Kivity @ 2009-06-16  8:56 UTC (permalink / raw)
  To: Blue Swirl
  Cc: Mark McLoughlin, dlaor, kvm, Carsten Otte, Glauber Costa,
	Rusty Russell, qemu-devel, virtualization, Christian Borntraeger,
	Michael S. Tsirkin, Paul Brook

On 06/15/2009 09:44 PM, Blue Swirl wrote:
>>   pc-qemu-0.10?
>>      
>
> pc-2009.06? Or given the hardware, should that be pc-1997?
>    

pc-qemu-0.10 has the obvious benefit of allowing people to immediately 
know what's the oldest version of qemu that supports it.

-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15 18:44                                                                       ` Blue Swirl
  (?)
  (?)
@ 2009-06-16  8:56                                                                       ` Avi Kivity
  -1 siblings, 0 replies; 457+ messages in thread
From: Avi Kivity @ 2009-06-16  8:56 UTC (permalink / raw)
  To: Blue Swirl
  Cc: Mark McLoughlin, kvm, Carsten Otte, Glauber Costa, qemu-devel,
	virtualization, Christian Borntraeger, Michael S. Tsirkin,
	Paul Brook, Anthony Liguori

On 06/15/2009 09:44 PM, Blue Swirl wrote:
>>   pc-qemu-0.10?
>>      
>
> pc-2009.06? Or given the hardware, should that be pc-1997?
>    

pc-qemu-0.10 has the obvious benefit of allowing people to immediately 
know what's the oldest version of qemu that supports it.

-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15 18:12                                                                   ` Anthony Liguori
@ 2009-06-16 12:14                                                                     ` Mark McLoughlin
  -1 siblings, 0 replies; 457+ messages in thread
From: Mark McLoughlin @ 2009-06-16 12:14 UTC (permalink / raw)
  To: Anthony Liguori
  Cc: Avi Kivity, dlaor, Carsten Otte, Rusty Russell, kvm,
	Glauber Costa, Michael S. Tsirkin, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook

On Mon, 2009-06-15 at 13:12 -0500, Anthony Liguori wrote:
> Mark McLoughlin wrote:
> > So long as the restrictions would be known to the management app via
> > some "what slots are available" mechanism in qemu, that sounds fine.
> >   
> 
> I'm not sure a "what slots are available" mechanism is as straight 
> forward as has been claimed.

If qemu can't provide that information, then the management app does not
have sufficient information to do the slot allocation itself. In which
case, it must leave it up to qemu to do it.

> It doesn't matter though because it's orthogonal to the current proposal.

It is not orthogonal to solving the actual problem at hand, though -
i.e. how to allow management apps to provide stable PCI addresses.

> >>> I'm not at all arguing against pci_addr.  I'm arguing about how 
> >>> libvirt should use it with respect to the "genesis" use-case where 
> >>> libvirt has no specific reason to choose one PCI slot over another.  
> >>> In that case, I'm merely advocating that we want to let QEMU make the 
> >>> decision.
> >>>       
> >> However this may end up, isn't it offtopic?  Whatever we do we have to 
> >> support both pci_addr= and default placement, so we can push this 
> >> discussion to livirt-devel and bid them godspeed.
> >>     
> >
> > Presumably you're not proposing that qemu-devel completely ignore the
> > typical requirements of management apps?
> >   
> 
> This is a happy case where the current proposals allow both usages to 
> occur.  Which one libvirt chooses it up to it.
> 
> To summarize, I think we have:
> 
> 1) Introduce addressing to all host device configurations
>   - Either in the canonical form "pci_addr=bus:dev.fn or target=3,lun=1" 
> or in flattened form "addr=bus:dev.fn or addr=target.lun".  I prefer the 
> later form but I think either would be acceptable.

That helps, but it's not enough on its own.

The management app needs to figure out what addresses to pass either by:

   a) Initially allowing qemu to do the address allocation, and 
      thereafter using those addresses - this requires some way to query
      the addresses of devices

or b) Doing the initial address allocation itself - this requires some 
      way to query what slots are available.

> 2) Whenever the default machine type changes in a guest-visible way, 
> introduce a new machine type
>   - Use explicit versions in name: pc-v1, pc-v2 or use more descriptive 
> names pc-with-usb
>   - Easily transitions to device config files

To be clear - you're not proposing this is a solution to the "stable PCI
addresses" problem, are you? The main requirement is for the addresses
to stay stable even if the user adds/removes other devices.

This is a fine solution to the "stable guest ABI" problem ... assuming
there's some way of querying the current default machine type.

Cheers,
Mark.


^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
@ 2009-06-16 12:14                                                                     ` Mark McLoughlin
  0 siblings, 0 replies; 457+ messages in thread
From: Mark McLoughlin @ 2009-06-16 12:14 UTC (permalink / raw)
  To: Anthony Liguori
  Cc: Carsten Otte, dlaor, kvm, Michael S. Tsirkin, Glauber Costa,
	Rusty Russell, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Avi Kivity, Paul Brook

On Mon, 2009-06-15 at 13:12 -0500, Anthony Liguori wrote:
> Mark McLoughlin wrote:
> > So long as the restrictions would be known to the management app via
> > some "what slots are available" mechanism in qemu, that sounds fine.
> >   
> 
> I'm not sure a "what slots are available" mechanism is as straight 
> forward as has been claimed.

If qemu can't provide that information, then the management app does not
have sufficient information to do the slot allocation itself. In which
case, it must leave it up to qemu to do it.

> It doesn't matter though because it's orthogonal to the current proposal.

It is not orthogonal to solving the actual problem at hand, though -
i.e. how to allow management apps to provide stable PCI addresses.

> >>> I'm not at all arguing against pci_addr.  I'm arguing about how 
> >>> libvirt should use it with respect to the "genesis" use-case where 
> >>> libvirt has no specific reason to choose one PCI slot over another.  
> >>> In that case, I'm merely advocating that we want to let QEMU make the 
> >>> decision.
> >>>       
> >> However this may end up, isn't it offtopic?  Whatever we do we have to 
> >> support both pci_addr= and default placement, so we can push this 
> >> discussion to livirt-devel and bid them godspeed.
> >>     
> >
> > Presumably you're not proposing that qemu-devel completely ignore the
> > typical requirements of management apps?
> >   
> 
> This is a happy case where the current proposals allow both usages to 
> occur.  Which one libvirt chooses it up to it.
> 
> To summarize, I think we have:
> 
> 1) Introduce addressing to all host device configurations
>   - Either in the canonical form "pci_addr=bus:dev.fn or target=3,lun=1" 
> or in flattened form "addr=bus:dev.fn or addr=target.lun".  I prefer the 
> later form but I think either would be acceptable.

That helps, but it's not enough on its own.

The management app needs to figure out what addresses to pass either by:

   a) Initially allowing qemu to do the address allocation, and 
      thereafter using those addresses - this requires some way to query
      the addresses of devices

or b) Doing the initial address allocation itself - this requires some 
      way to query what slots are available.

> 2) Whenever the default machine type changes in a guest-visible way, 
> introduce a new machine type
>   - Use explicit versions in name: pc-v1, pc-v2 or use more descriptive 
> names pc-with-usb
>   - Easily transitions to device config files

To be clear - you're not proposing this is a solution to the "stable PCI
addresses" problem, are you? The main requirement is for the addresses
to stay stable even if the user adds/removes other devices.

This is a fine solution to the "stable guest ABI" problem ... assuming
there's some way of querying the current default machine type.

Cheers,
Mark.

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15 18:12                                                                   ` Anthony Liguori
                                                                                     ` (2 preceding siblings ...)
  (?)
@ 2009-06-16 12:14                                                                   ` Mark McLoughlin
  -1 siblings, 0 replies; 457+ messages in thread
From: Mark McLoughlin @ 2009-06-16 12:14 UTC (permalink / raw)
  To: Anthony Liguori
  Cc: Carsten Otte, kvm, Michael S. Tsirkin, Glauber Costa, qemu-devel,
	virtualization, Blue Swirl, Christian Borntraeger, Avi Kivity,
	Paul Brook

On Mon, 2009-06-15 at 13:12 -0500, Anthony Liguori wrote:
> Mark McLoughlin wrote:
> > So long as the restrictions would be known to the management app via
> > some "what slots are available" mechanism in qemu, that sounds fine.
> >   
> 
> I'm not sure a "what slots are available" mechanism is as straight 
> forward as has been claimed.

If qemu can't provide that information, then the management app does not
have sufficient information to do the slot allocation itself. In which
case, it must leave it up to qemu to do it.

> It doesn't matter though because it's orthogonal to the current proposal.

It is not orthogonal to solving the actual problem at hand, though -
i.e. how to allow management apps to provide stable PCI addresses.

> >>> I'm not at all arguing against pci_addr.  I'm arguing about how 
> >>> libvirt should use it with respect to the "genesis" use-case where 
> >>> libvirt has no specific reason to choose one PCI slot over another.  
> >>> In that case, I'm merely advocating that we want to let QEMU make the 
> >>> decision.
> >>>       
> >> However this may end up, isn't it offtopic?  Whatever we do we have to 
> >> support both pci_addr= and default placement, so we can push this 
> >> discussion to livirt-devel and bid them godspeed.
> >>     
> >
> > Presumably you're not proposing that qemu-devel completely ignore the
> > typical requirements of management apps?
> >   
> 
> This is a happy case where the current proposals allow both usages to 
> occur.  Which one libvirt chooses it up to it.
> 
> To summarize, I think we have:
> 
> 1) Introduce addressing to all host device configurations
>   - Either in the canonical form "pci_addr=bus:dev.fn or target=3,lun=1" 
> or in flattened form "addr=bus:dev.fn or addr=target.lun".  I prefer the 
> later form but I think either would be acceptable.

That helps, but it's not enough on its own.

The management app needs to figure out what addresses to pass either by:

   a) Initially allowing qemu to do the address allocation, and 
      thereafter using those addresses - this requires some way to query
      the addresses of devices

or b) Doing the initial address allocation itself - this requires some 
      way to query what slots are available.

> 2) Whenever the default machine type changes in a guest-visible way, 
> introduce a new machine type
>   - Use explicit versions in name: pc-v1, pc-v2 or use more descriptive 
> names pc-with-usb
>   - Easily transitions to device config files

To be clear - you're not proposing this is a solution to the "stable PCI
addresses" problem, are you? The main requirement is for the addresses
to stay stable even if the user adds/removes other devices.

This is a fine solution to the "stable guest ABI" problem ... assuming
there's some way of querying the current default machine type.

Cheers,
Mark.

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-16 12:14                                                                     ` Mark McLoughlin
@ 2009-06-16 12:28                                                                       ` Avi Kivity
  -1 siblings, 0 replies; 457+ messages in thread
From: Avi Kivity @ 2009-06-16 12:28 UTC (permalink / raw)
  To: Mark McLoughlin
  Cc: Anthony Liguori, dlaor, Carsten Otte, Rusty Russell, kvm,
	Glauber Costa, Michael S. Tsirkin, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook

On 06/16/2009 03:14 PM, Mark McLoughlin wrote:
> On Mon, 2009-06-15 at 13:12 -0500, Anthony Liguori wrote:
>    
>> Mark McLoughlin wrote:
>>      
>>> So long as the restrictions would be known to the management app via
>>> some "what slots are available" mechanism in qemu, that sounds fine.
>>>
>>>        
>> I'm not sure a "what slots are available" mechanism is as straight
>> forward as has been claimed.
>>      
>
> If qemu can't provide that information, then the management app does not
> have sufficient information to do the slot allocation itself. In which
> case, it must leave it up to qemu to do it.
>    

A given -M machine will have well-known open slots (since it's an ABI), 
same as it has rtl8139 and ne2000 cards.  Worst case we hardcode those 
numbers (gasp, faint).

>> It doesn't matter though because it's orthogonal to the current proposal.
>>      
>
> It is not orthogonal to solving the actual problem at hand, though -
> i.e. how to allow management apps to provide stable PCI addresses.
>    

It's part of the solution, but hardly a difficult the most difficult part.

> This is a fine solution to the "stable guest ABI" problem ... assuming
> there's some way of querying the current default machine type.
>    

     $ qemu -print-default-machine

or maybe

     $ qemu -show default-machine
     $ qemu -show pci-bus
     $ qemu -show me a way out

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
@ 2009-06-16 12:28                                                                       ` Avi Kivity
  0 siblings, 0 replies; 457+ messages in thread
From: Avi Kivity @ 2009-06-16 12:28 UTC (permalink / raw)
  To: Mark McLoughlin
  Cc: Carsten Otte, dlaor, kvm, Michael S. Tsirkin, Glauber Costa,
	Rusty Russell, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Paul Brook

On 06/16/2009 03:14 PM, Mark McLoughlin wrote:
> On Mon, 2009-06-15 at 13:12 -0500, Anthony Liguori wrote:
>    
>> Mark McLoughlin wrote:
>>      
>>> So long as the restrictions would be known to the management app via
>>> some "what slots are available" mechanism in qemu, that sounds fine.
>>>
>>>        
>> I'm not sure a "what slots are available" mechanism is as straight
>> forward as has been claimed.
>>      
>
> If qemu can't provide that information, then the management app does not
> have sufficient information to do the slot allocation itself. In which
> case, it must leave it up to qemu to do it.
>    

A given -M machine will have well-known open slots (since it's an ABI), 
same as it has rtl8139 and ne2000 cards.  Worst case we hardcode those 
numbers (gasp, faint).

>> It doesn't matter though because it's orthogonal to the current proposal.
>>      
>
> It is not orthogonal to solving the actual problem at hand, though -
> i.e. how to allow management apps to provide stable PCI addresses.
>    

It's part of the solution, but hardly a difficult the most difficult part.

> This is a fine solution to the "stable guest ABI" problem ... assuming
> there's some way of querying the current default machine type.
>    

     $ qemu -print-default-machine

or maybe

     $ qemu -show default-machine
     $ qemu -show pci-bus
     $ qemu -show me a way out

-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-16 12:14                                                                     ` Mark McLoughlin
  (?)
  (?)
@ 2009-06-16 12:28                                                                     ` Avi Kivity
  -1 siblings, 0 replies; 457+ messages in thread
From: Avi Kivity @ 2009-06-16 12:28 UTC (permalink / raw)
  To: Mark McLoughlin
  Cc: Carsten Otte, kvm, Michael S. Tsirkin, Glauber Costa, qemu-devel,
	virtualization, Blue Swirl, Christian Borntraeger, Paul Brook,
	Anthony Liguori

On 06/16/2009 03:14 PM, Mark McLoughlin wrote:
> On Mon, 2009-06-15 at 13:12 -0500, Anthony Liguori wrote:
>    
>> Mark McLoughlin wrote:
>>      
>>> So long as the restrictions would be known to the management app via
>>> some "what slots are available" mechanism in qemu, that sounds fine.
>>>
>>>        
>> I'm not sure a "what slots are available" mechanism is as straight
>> forward as has been claimed.
>>      
>
> If qemu can't provide that information, then the management app does not
> have sufficient information to do the slot allocation itself. In which
> case, it must leave it up to qemu to do it.
>    

A given -M machine will have well-known open slots (since it's an ABI), 
same as it has rtl8139 and ne2000 cards.  Worst case we hardcode those 
numbers (gasp, faint).

>> It doesn't matter though because it's orthogonal to the current proposal.
>>      
>
> It is not orthogonal to solving the actual problem at hand, though -
> i.e. how to allow management apps to provide stable PCI addresses.
>    

It's part of the solution, but hardly a difficult the most difficult part.

> This is a fine solution to the "stable guest ABI" problem ... assuming
> there's some way of querying the current default machine type.
>    

     $ qemu -print-default-machine

or maybe

     $ qemu -show default-machine
     $ qemu -show pci-bus
     $ qemu -show me a way out

-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-16 12:28                                                                       ` Avi Kivity
@ 2009-06-16 12:39                                                                         ` Mark McLoughlin
  -1 siblings, 0 replies; 457+ messages in thread
From: Mark McLoughlin @ 2009-06-16 12:39 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Anthony Liguori, dlaor, Carsten Otte, Rusty Russell, kvm,
	Glauber Costa, Michael S. Tsirkin, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook

On Tue, 2009-06-16 at 15:28 +0300, Avi Kivity wrote:
> On 06/16/2009 03:14 PM, Mark McLoughlin wrote:
> > On Mon, 2009-06-15 at 13:12 -0500, Anthony Liguori wrote:
> >    
> >> Mark McLoughlin wrote:
> >>      
> >>> So long as the restrictions would be known to the management app via
> >>> some "what slots are available" mechanism in qemu, that sounds fine.
> >>>
> >>>        
> >> I'm not sure a "what slots are available" mechanism is as straight
> >> forward as has been claimed.
> >>      
> >
> > If qemu can't provide that information, then the management app does not
> > have sufficient information to do the slot allocation itself. In which
> > case, it must leave it up to qemu to do it.
> >    
> 
> A given -M machine will have well-known open slots (since it's an ABI), 
> same as it has rtl8139 and ne2000 cards.

If they're so obviously well-known, I don't see how the query mechanism
would not be straightforward, which is the comment I was replying to.

> Worst case we hardcode those numbers (gasp, faint).

Maybe we can just add the open slots to the -help output. That'd be nice
and clean.

> >> It doesn't matter though because it's orthogonal to the current proposal.
> >>      
> >
> > It is not orthogonal to solving the actual problem at hand, though -
> > i.e. how to allow management apps to provide stable PCI addresses.
> >    
> 
> It's part of the solution, but hardly a difficult the most difficult part.

Agree.

> > This is a fine solution to the "stable guest ABI" problem ... assuming
> > there's some way of querying the current default machine type.
> >    
> 
>      $ qemu -print-default-machine

Or:

  $ readlink /usr/share/qemu/machine-types/pc.dt

Cheers,
Mark.


^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
@ 2009-06-16 12:39                                                                         ` Mark McLoughlin
  0 siblings, 0 replies; 457+ messages in thread
From: Mark McLoughlin @ 2009-06-16 12:39 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Carsten Otte, dlaor, kvm, Michael S. Tsirkin, Glauber Costa,
	Rusty Russell, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Paul Brook

On Tue, 2009-06-16 at 15:28 +0300, Avi Kivity wrote:
> On 06/16/2009 03:14 PM, Mark McLoughlin wrote:
> > On Mon, 2009-06-15 at 13:12 -0500, Anthony Liguori wrote:
> >    
> >> Mark McLoughlin wrote:
> >>      
> >>> So long as the restrictions would be known to the management app via
> >>> some "what slots are available" mechanism in qemu, that sounds fine.
> >>>
> >>>        
> >> I'm not sure a "what slots are available" mechanism is as straight
> >> forward as has been claimed.
> >>      
> >
> > If qemu can't provide that information, then the management app does not
> > have sufficient information to do the slot allocation itself. In which
> > case, it must leave it up to qemu to do it.
> >    
> 
> A given -M machine will have well-known open slots (since it's an ABI), 
> same as it has rtl8139 and ne2000 cards.

If they're so obviously well-known, I don't see how the query mechanism
would not be straightforward, which is the comment I was replying to.

> Worst case we hardcode those numbers (gasp, faint).

Maybe we can just add the open slots to the -help output. That'd be nice
and clean.

> >> It doesn't matter though because it's orthogonal to the current proposal.
> >>      
> >
> > It is not orthogonal to solving the actual problem at hand, though -
> > i.e. how to allow management apps to provide stable PCI addresses.
> >    
> 
> It's part of the solution, but hardly a difficult the most difficult part.

Agree.

> > This is a fine solution to the "stable guest ABI" problem ... assuming
> > there's some way of querying the current default machine type.
> >    
> 
>      $ qemu -print-default-machine

Or:

  $ readlink /usr/share/qemu/machine-types/pc.dt

Cheers,
Mark.

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-16 12:28                                                                       ` Avi Kivity
  (?)
@ 2009-06-16 12:39                                                                       ` Mark McLoughlin
  -1 siblings, 0 replies; 457+ messages in thread
From: Mark McLoughlin @ 2009-06-16 12:39 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Carsten Otte, kvm, Michael S. Tsirkin, Glauber Costa, qemu-devel,
	virtualization, Blue Swirl, Christian Borntraeger, Paul Brook,
	Anthony Liguori

On Tue, 2009-06-16 at 15:28 +0300, Avi Kivity wrote:
> On 06/16/2009 03:14 PM, Mark McLoughlin wrote:
> > On Mon, 2009-06-15 at 13:12 -0500, Anthony Liguori wrote:
> >    
> >> Mark McLoughlin wrote:
> >>      
> >>> So long as the restrictions would be known to the management app via
> >>> some "what slots are available" mechanism in qemu, that sounds fine.
> >>>
> >>>        
> >> I'm not sure a "what slots are available" mechanism is as straight
> >> forward as has been claimed.
> >>      
> >
> > If qemu can't provide that information, then the management app does not
> > have sufficient information to do the slot allocation itself. In which
> > case, it must leave it up to qemu to do it.
> >    
> 
> A given -M machine will have well-known open slots (since it's an ABI), 
> same as it has rtl8139 and ne2000 cards.

If they're so obviously well-known, I don't see how the query mechanism
would not be straightforward, which is the comment I was replying to.

> Worst case we hardcode those numbers (gasp, faint).

Maybe we can just add the open slots to the -help output. That'd be nice
and clean.

> >> It doesn't matter though because it's orthogonal to the current proposal.
> >>      
> >
> > It is not orthogonal to solving the actual problem at hand, though -
> > i.e. how to allow management apps to provide stable PCI addresses.
> >    
> 
> It's part of the solution, but hardly a difficult the most difficult part.

Agree.

> > This is a fine solution to the "stable guest ABI" problem ... assuming
> > there's some way of querying the current default machine type.
> >    
> 
>      $ qemu -print-default-machine

Or:

  $ readlink /usr/share/qemu/machine-types/pc.dt

Cheers,
Mark.

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-16 12:39                                                                         ` Mark McLoughlin
@ 2009-06-16 12:51                                                                           ` Avi Kivity
  -1 siblings, 0 replies; 457+ messages in thread
From: Avi Kivity @ 2009-06-16 12:51 UTC (permalink / raw)
  To: Mark McLoughlin
  Cc: Anthony Liguori, dlaor, Carsten Otte, Rusty Russell, kvm,
	Glauber Costa, Michael S. Tsirkin, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook

On 06/16/2009 03:39 PM, Mark McLoughlin wrote:

>> Worst case we hardcode those numbers (gasp, faint).
>>      
>
> Maybe we can just add the open slots to the -help output. That'd be nice
> and clean.
>    

Yeah, there's precedent too.

> Or:
>
>    $ readlink /usr/share/qemu/machine-types/pc.dt
>
>    

That works if you have exactly one qemu installed.  It's best if qemu 
itself is the entry point (qemu -print-device-tree).

Though I wouldn't want to inflict it upon the management application 
writers.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
@ 2009-06-16 12:51                                                                           ` Avi Kivity
  0 siblings, 0 replies; 457+ messages in thread
From: Avi Kivity @ 2009-06-16 12:51 UTC (permalink / raw)
  To: Mark McLoughlin
  Cc: Carsten Otte, dlaor, kvm, Michael S. Tsirkin, Glauber Costa,
	Rusty Russell, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Paul Brook

On 06/16/2009 03:39 PM, Mark McLoughlin wrote:

>> Worst case we hardcode those numbers (gasp, faint).
>>      
>
> Maybe we can just add the open slots to the -help output. That'd be nice
> and clean.
>    

Yeah, there's precedent too.

> Or:
>
>    $ readlink /usr/share/qemu/machine-types/pc.dt
>
>    

That works if you have exactly one qemu installed.  It's best if qemu 
itself is the entry point (qemu -print-device-tree).

Though I wouldn't want to inflict it upon the management application 
writers.

-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-16 12:39                                                                         ` Mark McLoughlin
  (?)
@ 2009-06-16 12:51                                                                         ` Avi Kivity
  -1 siblings, 0 replies; 457+ messages in thread
From: Avi Kivity @ 2009-06-16 12:51 UTC (permalink / raw)
  To: Mark McLoughlin
  Cc: Carsten Otte, kvm, Michael S. Tsirkin, Glauber Costa, qemu-devel,
	virtualization, Blue Swirl, Christian Borntraeger, Paul Brook,
	Anthony Liguori

On 06/16/2009 03:39 PM, Mark McLoughlin wrote:

>> Worst case we hardcode those numbers (gasp, faint).
>>      
>
> Maybe we can just add the open slots to the -help output. That'd be nice
> and clean.
>    

Yeah, there's precedent too.

> Or:
>
>    $ readlink /usr/share/qemu/machine-types/pc.dt
>
>    

That works if you have exactly one qemu installed.  It's best if qemu 
itself is the entry point (qemu -print-device-tree).

Though I wouldn't want to inflict it upon the management application 
writers.

-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15 15:15                                                                       ` Avi Kivity
@ 2009-06-16 18:32                                                                         ` Jamie Lokier
  -1 siblings, 0 replies; 457+ messages in thread
From: Jamie Lokier @ 2009-06-16 18:32 UTC (permalink / raw)
  To: Avi Kivity
  Cc: dlaor, Carsten Otte, kvm, Michael S. Tsirkin, Glauber Costa,
	Rusty Russell, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Paul Brook, Mark McLoughlin

Avi Kivity wrote:
> Another issue is enumeration.  Guests will present their devices in the 
> order they find them on the pci bus (of course enumeration is guest 
> specific).  So if I have 2 virtio controllers the only way I can 
> distinguish between them is using their pci slots.

virtio controllers really should have a user-suppliable string or UUID
to identify them to the guest.  Don't they?

-- Jamie

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
@ 2009-06-16 18:32                                                                         ` Jamie Lokier
  0 siblings, 0 replies; 457+ messages in thread
From: Jamie Lokier @ 2009-06-16 18:32 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Carsten Otte, Rusty Russell, kvm, Michael S. Tsirkin,
	Glauber Costa, dlaor, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Paul Brook, Mark McLoughlin

Avi Kivity wrote:
> Another issue is enumeration.  Guests will present their devices in the 
> order they find them on the pci bus (of course enumeration is guest 
> specific).  So if I have 2 virtio controllers the only way I can 
> distinguish between them is using their pci slots.

virtio controllers really should have a user-suppliable string or UUID
to identify them to the guest.  Don't they?

-- Jamie

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-15 15:15                                                                       ` Avi Kivity
  (?)
  (?)
@ 2009-06-16 18:32                                                                       ` Jamie Lokier
  -1 siblings, 0 replies; 457+ messages in thread
From: Jamie Lokier @ 2009-06-16 18:32 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Carsten Otte, kvm, Michael S. Tsirkin, Glauber Costa, qemu-devel,
	virtualization, Blue Swirl, Christian Borntraeger, Paul Brook,
	Mark McLoughlin

Avi Kivity wrote:
> Another issue is enumeration.  Guests will present their devices in the 
> order they find them on the pci bus (of course enumeration is guest 
> specific).  So if I have 2 virtio controllers the only way I can 
> distinguish between them is using their pci slots.

virtio controllers really should have a user-suppliable string or UUID
to identify them to the guest.  Don't they?

-- Jamie

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-12 17:31                                 ` Mark McLoughlin
@ 2009-06-16 18:38                                   ` Jamie Lokier
  -1 siblings, 0 replies; 457+ messages in thread
From: Jamie Lokier @ 2009-06-16 18:38 UTC (permalink / raw)
  To: Mark McLoughlin
  Cc: Anthony Liguori, Michael S. Tsirkin, Carsten Otte, kvm,
	Glauber Costa, Rusty Russell, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook, Avi Kivity

Mark McLoughlin wrote:
> > After libvirt has done -drive file=foo... it should dump the machine 
> > config and use that from then on.
> 
> Right - libvirt then wouldn't be able to avoid the complexity of merging
> any future changes into the dumped machine config.

As long as qemu can accept a machine config _and_ -drive file=foo (and
monitor commands to add/remove devices), libvirt could merge by simply
calling qemu with whatever additional command line options or monitor
commands modify the config, then dump the new config.

That way, virtio would not have to deal with that complexity.  It
would be written in one place: qemu.

Or better, a utility: qemu-machine-config.

-- Jamie

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
@ 2009-06-16 18:38                                   ` Jamie Lokier
  0 siblings, 0 replies; 457+ messages in thread
From: Jamie Lokier @ 2009-06-16 18:38 UTC (permalink / raw)
  To: Mark McLoughlin
  Cc: Carsten Otte, kvm, Michael S. Tsirkin, Glauber Costa,
	Rusty Russell, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Paul Brook, Avi Kivity

Mark McLoughlin wrote:
> > After libvirt has done -drive file=foo... it should dump the machine 
> > config and use that from then on.
> 
> Right - libvirt then wouldn't be able to avoid the complexity of merging
> any future changes into the dumped machine config.

As long as qemu can accept a machine config _and_ -drive file=foo (and
monitor commands to add/remove devices), libvirt could merge by simply
calling qemu with whatever additional command line options or monitor
commands modify the config, then dump the new config.

That way, virtio would not have to deal with that complexity.  It
would be written in one place: qemu.

Or better, a utility: qemu-machine-config.

-- Jamie

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-12 17:31                                 ` Mark McLoughlin
                                                   ` (3 preceding siblings ...)
  (?)
@ 2009-06-16 18:38                                 ` Jamie Lokier
  -1 siblings, 0 replies; 457+ messages in thread
From: Jamie Lokier @ 2009-06-16 18:38 UTC (permalink / raw)
  To: Mark McLoughlin
  Cc: Carsten Otte, kvm, Michael S. Tsirkin, Glauber Costa, qemu-devel,
	virtualization, Blue Swirl, Christian Borntraeger, Paul Brook,
	Anthony Liguori, Avi Kivity

Mark McLoughlin wrote:
> > After libvirt has done -drive file=foo... it should dump the machine 
> > config and use that from then on.
> 
> Right - libvirt then wouldn't be able to avoid the complexity of merging
> any future changes into the dumped machine config.

As long as qemu can accept a machine config _and_ -drive file=foo (and
monitor commands to add/remove devices), libvirt could merge by simply
calling qemu with whatever additional command line options or monitor
commands modify the config, then dump the new config.

That way, virtio would not have to deal with that complexity.  It
would be written in one place: qemu.

Or better, a utility: qemu-machine-config.

-- Jamie

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-16 12:39                                                                         ` Mark McLoughlin
@ 2009-06-16 18:44                                                                           ` Jamie Lokier
  -1 siblings, 0 replies; 457+ messages in thread
From: Jamie Lokier @ 2009-06-16 18:44 UTC (permalink / raw)
  To: Mark McLoughlin
  Cc: Avi Kivity, Carsten Otte, dlaor, kvm, Michael S. Tsirkin,
	Glauber Costa, Rusty Russell, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook

Mark McLoughlin wrote:
> > Worst case we hardcode those numbers (gasp, faint).
> 
> Maybe we can just add the open slots to the -help output. That'd be nice
> and clean.

Make them part of the machine configuration.

After all, they are part of the machine configuration, and ACPI, BIOS
etc. need to know about all the machine slots anyway.

Having said that, I prefer the idea that slot allocation is handled
either in Qemu, or in a separate utility called qemu-machine-config
(for working with machine configs), or in a library
libqemu-machine-config.so.

I particularly don't like the idea of arcane machine-dependent slot
allocation knowledge living in libvirt, because it needs to be in Qemu
anyway for non-libvirt users.  No point in having two implementations
of something tricky and likely to have machine quirks, if one will do.

-- Jamie

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
@ 2009-06-16 18:44                                                                           ` Jamie Lokier
  0 siblings, 0 replies; 457+ messages in thread
From: Jamie Lokier @ 2009-06-16 18:44 UTC (permalink / raw)
  To: Mark McLoughlin
  Cc: Carsten Otte, Rusty Russell, kvm, Michael S. Tsirkin,
	Glauber Costa, dlaor, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Avi Kivity, Paul Brook

Mark McLoughlin wrote:
> > Worst case we hardcode those numbers (gasp, faint).
> 
> Maybe we can just add the open slots to the -help output. That'd be nice
> and clean.

Make them part of the machine configuration.

After all, they are part of the machine configuration, and ACPI, BIOS
etc. need to know about all the machine slots anyway.

Having said that, I prefer the idea that slot allocation is handled
either in Qemu, or in a separate utility called qemu-machine-config
(for working with machine configs), or in a library
libqemu-machine-config.so.

I particularly don't like the idea of arcane machine-dependent slot
allocation knowledge living in libvirt, because it needs to be in Qemu
anyway for non-libvirt users.  No point in having two implementations
of something tricky and likely to have machine quirks, if one will do.

-- Jamie

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-16 12:39                                                                         ` Mark McLoughlin
                                                                                           ` (3 preceding siblings ...)
  (?)
@ 2009-06-16 18:44                                                                         ` Jamie Lokier
  -1 siblings, 0 replies; 457+ messages in thread
From: Jamie Lokier @ 2009-06-16 18:44 UTC (permalink / raw)
  To: Mark McLoughlin
  Cc: Carsten Otte, kvm, Michael S. Tsirkin, Glauber Costa, qemu-devel,
	virtualization, Blue Swirl, Christian Borntraeger, Avi Kivity,
	Paul Brook

Mark McLoughlin wrote:
> > Worst case we hardcode those numbers (gasp, faint).
> 
> Maybe we can just add the open slots to the -help output. That'd be nice
> and clean.

Make them part of the machine configuration.

After all, they are part of the machine configuration, and ACPI, BIOS
etc. need to know about all the machine slots anyway.

Having said that, I prefer the idea that slot allocation is handled
either in Qemu, or in a separate utility called qemu-machine-config
(for working with machine configs), or in a library
libqemu-machine-config.so.

I particularly don't like the idea of arcane machine-dependent slot
allocation knowledge living in libvirt, because it needs to be in Qemu
anyway for non-libvirt users.  No point in having two implementations
of something tricky and likely to have machine quirks, if one will do.

-- Jamie

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-16 18:32                                                                         ` Jamie Lokier
@ 2009-06-17  6:38                                                                           ` Avi Kivity
  -1 siblings, 0 replies; 457+ messages in thread
From: Avi Kivity @ 2009-06-17  6:38 UTC (permalink / raw)
  To: Jamie Lokier
  Cc: dlaor, Carsten Otte, kvm, Michael S. Tsirkin, Glauber Costa,
	Rusty Russell, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Paul Brook, Mark McLoughlin

On 06/16/2009 09:32 PM, Jamie Lokier wrote:
> Avi Kivity wrote:
>    
>> Another issue is enumeration.  Guests will present their devices in the
>> order they find them on the pci bus (of course enumeration is guest
>> specific).  So if I have 2 virtio controllers the only way I can
>> distinguish between them is using their pci slots.
>>      
>
> virtio controllers really should have a user-suppliable string or UUID
> to identify them to the guest.  Don't they?
>    

virtio controllers don't exist.  When they do, they may have a UUID or 
not, but in either case guest infrastructure is in place for reporting 
the PCI slot, not the UUID.

virtio disks do have a UUID.  I don't think older versions of Windows 
will use it though, so if you reorder your slots you'll see your drive 
letters change.  Same with Linux if you don't use udev by-uuid rules.

-- 
Do not meddle in the internals of kernels, for they are subtle and quick to panic.


^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
@ 2009-06-17  6:38                                                                           ` Avi Kivity
  0 siblings, 0 replies; 457+ messages in thread
From: Avi Kivity @ 2009-06-17  6:38 UTC (permalink / raw)
  To: Jamie Lokier
  Cc: Carsten Otte, Rusty Russell, kvm, Michael S. Tsirkin,
	Glauber Costa, dlaor, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Paul Brook, Mark McLoughlin

On 06/16/2009 09:32 PM, Jamie Lokier wrote:
> Avi Kivity wrote:
>    
>> Another issue is enumeration.  Guests will present their devices in the
>> order they find them on the pci bus (of course enumeration is guest
>> specific).  So if I have 2 virtio controllers the only way I can
>> distinguish between them is using their pci slots.
>>      
>
> virtio controllers really should have a user-suppliable string or UUID
> to identify them to the guest.  Don't they?
>    

virtio controllers don't exist.  When they do, they may have a UUID or 
not, but in either case guest infrastructure is in place for reporting 
the PCI slot, not the UUID.

virtio disks do have a UUID.  I don't think older versions of Windows 
will use it though, so if you reorder your slots you'll see your drive 
letters change.  Same with Linux if you don't use udev by-uuid rules.

-- 
Do not meddle in the internals of kernels, for they are subtle and quick to panic.

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-16 18:32                                                                         ` Jamie Lokier
  (?)
  (?)
@ 2009-06-17  6:38                                                                         ` Avi Kivity
  -1 siblings, 0 replies; 457+ messages in thread
From: Avi Kivity @ 2009-06-17  6:38 UTC (permalink / raw)
  To: Jamie Lokier
  Cc: Carsten Otte, kvm, Michael S. Tsirkin, Glauber Costa, qemu-devel,
	virtualization, Blue Swirl, Christian Borntraeger, Paul Brook,
	Mark McLoughlin

On 06/16/2009 09:32 PM, Jamie Lokier wrote:
> Avi Kivity wrote:
>    
>> Another issue is enumeration.  Guests will present their devices in the
>> order they find them on the pci bus (of course enumeration is guest
>> specific).  So if I have 2 virtio controllers the only way I can
>> distinguish between them is using their pci slots.
>>      
>
> virtio controllers really should have a user-suppliable string or UUID
> to identify them to the guest.  Don't they?
>    

virtio controllers don't exist.  When they do, they may have a UUID or 
not, but in either case guest infrastructure is in place for reporting 
the PCI slot, not the UUID.

virtio disks do have a UUID.  I don't think older versions of Windows 
will use it though, so if you reorder your slots you'll see your drive 
letters change.  Same with Linux if you don't use udev by-uuid rules.

-- 
Do not meddle in the internals of kernels, for they are subtle and quick to panic.

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-16 18:44                                                                           ` Jamie Lokier
@ 2009-06-17  8:33                                                                             ` Mark McLoughlin
  -1 siblings, 0 replies; 457+ messages in thread
From: Mark McLoughlin @ 2009-06-17  8:33 UTC (permalink / raw)
  To: Jamie Lokier
  Cc: Avi Kivity, Carsten Otte, dlaor, kvm, Michael S. Tsirkin,
	Glauber Costa, Rusty Russell, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook

On Tue, 2009-06-16 at 19:44 +0100, Jamie Lokier wrote:
> Mark McLoughlin wrote:
> > > Worst case we hardcode those numbers (gasp, faint).
> > 
> > Maybe we can just add the open slots to the -help output. That'd be nice
> > and clean.

I was being sarcastic - libvirt currently must parse qemu -help, and
even has some test infrastructure to check that it works with various
versions of qemu. Extending this would not be nice and clean :-)

> I particularly don't like the idea of arcane machine-dependent slot
> allocation knowledge living in libvirt, because it needs to be in Qemu
> anyway for non-libvirt users.  No point in having two implementations
> of something tricky and likely to have machine quirks, if one will do.

Indeed.

Cheers,
Mark.


^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
@ 2009-06-17  8:33                                                                             ` Mark McLoughlin
  0 siblings, 0 replies; 457+ messages in thread
From: Mark McLoughlin @ 2009-06-17  8:33 UTC (permalink / raw)
  To: Jamie Lokier
  Cc: Carsten Otte, Rusty Russell, kvm, Michael S. Tsirkin,
	Glauber Costa, dlaor, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Avi Kivity, Paul Brook

On Tue, 2009-06-16 at 19:44 +0100, Jamie Lokier wrote:
> Mark McLoughlin wrote:
> > > Worst case we hardcode those numbers (gasp, faint).
> > 
> > Maybe we can just add the open slots to the -help output. That'd be nice
> > and clean.

I was being sarcastic - libvirt currently must parse qemu -help, and
even has some test infrastructure to check that it works with various
versions of qemu. Extending this would not be nice and clean :-)

> I particularly don't like the idea of arcane machine-dependent slot
> allocation knowledge living in libvirt, because it needs to be in Qemu
> anyway for non-libvirt users.  No point in having two implementations
> of something tricky and likely to have machine quirks, if one will do.

Indeed.

Cheers,
Mark.

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-16 18:44                                                                           ` Jamie Lokier
  (?)
  (?)
@ 2009-06-17  8:33                                                                           ` Mark McLoughlin
  -1 siblings, 0 replies; 457+ messages in thread
From: Mark McLoughlin @ 2009-06-17  8:33 UTC (permalink / raw)
  To: Jamie Lokier
  Cc: Carsten Otte, kvm, Michael S. Tsirkin, Glauber Costa, qemu-devel,
	virtualization, Blue Swirl, Christian Borntraeger, Avi Kivity,
	Paul Brook

On Tue, 2009-06-16 at 19:44 +0100, Jamie Lokier wrote:
> Mark McLoughlin wrote:
> > > Worst case we hardcode those numbers (gasp, faint).
> > 
> > Maybe we can just add the open slots to the -help output. That'd be nice
> > and clean.

I was being sarcastic - libvirt currently must parse qemu -help, and
even has some test infrastructure to check that it works with various
versions of qemu. Extending this would not be nice and clean :-)

> I particularly don't like the idea of arcane machine-dependent slot
> allocation knowledge living in libvirt, because it needs to be in Qemu
> anyway for non-libvirt users.  No point in having two implementations
> of something tricky and likely to have machine quirks, if one will do.

Indeed.

Cheers,
Mark.

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-17  8:33                                                                             ` Mark McLoughlin
@ 2009-06-17  9:03                                                                               ` Avi Kivity
  -1 siblings, 0 replies; 457+ messages in thread
From: Avi Kivity @ 2009-06-17  9:03 UTC (permalink / raw)
  To: Mark McLoughlin
  Cc: Jamie Lokier, Carsten Otte, dlaor, kvm, Michael S. Tsirkin,
	Glauber Costa, Rusty Russell, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook

On 06/17/2009 11:33 AM, Mark McLoughlin wrote:
>> I particularly don't like the idea of arcane machine-dependent slot
>> allocation knowledge living in libvirt, because it needs to be in Qemu
>> anyway for non-libvirt users.  No point in having two implementations
>> of something tricky and likely to have machine quirks, if one will do.
>>      
>
> Indeed.
>    

I don't understand this.  Management already has to allocate MAC 
addresses, UUIDs, IDE interface and master/slave role, SCSI 
LUNs/targets/whatever.  It has to understand NUMA (if not do actual 
allocation).  Even if it doesn't allocate the slots, it has to be able 
to query them so it can tell the user which NIC or controller is 
connected where, or to do hotunplug.  It has to understand that there is 
a limitation on the number of slots, and know what that limitation is 
(unless it feels that launching an overcommitted guest and showing an 
error to the user is preferable to not allowing the user to overcommit 
in the first place.

If you'll review my patent application for pci slot allocation, you'll 
see the following line:

   slot_nr = nb_allocated_slots++; /* Allocate pci slot */

while there is a lot of complicated setup code before that (see the 
prior art section as well), I believe licensees could well implement the 
algorithm in two short months, including testing.

-- 
Do not meddle in the internals of kernels, for they are subtle and quick to panic.


^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
@ 2009-06-17  9:03                                                                               ` Avi Kivity
  0 siblings, 0 replies; 457+ messages in thread
From: Avi Kivity @ 2009-06-17  9:03 UTC (permalink / raw)
  To: Mark McLoughlin
  Cc: Carsten Otte, Rusty Russell, kvm, Michael S. Tsirkin,
	Glauber Costa, dlaor, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Paul Brook

On 06/17/2009 11:33 AM, Mark McLoughlin wrote:
>> I particularly don't like the idea of arcane machine-dependent slot
>> allocation knowledge living in libvirt, because it needs to be in Qemu
>> anyway for non-libvirt users.  No point in having two implementations
>> of something tricky and likely to have machine quirks, if one will do.
>>      
>
> Indeed.
>    

I don't understand this.  Management already has to allocate MAC 
addresses, UUIDs, IDE interface and master/slave role, SCSI 
LUNs/targets/whatever.  It has to understand NUMA (if not do actual 
allocation).  Even if it doesn't allocate the slots, it has to be able 
to query them so it can tell the user which NIC or controller is 
connected where, or to do hotunplug.  It has to understand that there is 
a limitation on the number of slots, and know what that limitation is 
(unless it feels that launching an overcommitted guest and showing an 
error to the user is preferable to not allowing the user to overcommit 
in the first place.

If you'll review my patent application for pci slot allocation, you'll 
see the following line:

   slot_nr = nb_allocated_slots++; /* Allocate pci slot */

while there is a lot of complicated setup code before that (see the 
prior art section as well), I believe licensees could well implement the 
algorithm in two short months, including testing.

-- 
Do not meddle in the internals of kernels, for they are subtle and quick to panic.

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-17  8:33                                                                             ` Mark McLoughlin
  (?)
@ 2009-06-17  9:03                                                                             ` Avi Kivity
  -1 siblings, 0 replies; 457+ messages in thread
From: Avi Kivity @ 2009-06-17  9:03 UTC (permalink / raw)
  To: Mark McLoughlin
  Cc: Carsten Otte, kvm, Michael S. Tsirkin, Glauber Costa,
	Jamie Lokier, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Paul Brook

On 06/17/2009 11:33 AM, Mark McLoughlin wrote:
>> I particularly don't like the idea of arcane machine-dependent slot
>> allocation knowledge living in libvirt, because it needs to be in Qemu
>> anyway for non-libvirt users.  No point in having two implementations
>> of something tricky and likely to have machine quirks, if one will do.
>>      
>
> Indeed.
>    

I don't understand this.  Management already has to allocate MAC 
addresses, UUIDs, IDE interface and master/slave role, SCSI 
LUNs/targets/whatever.  It has to understand NUMA (if not do actual 
allocation).  Even if it doesn't allocate the slots, it has to be able 
to query them so it can tell the user which NIC or controller is 
connected where, or to do hotunplug.  It has to understand that there is 
a limitation on the number of slots, and know what that limitation is 
(unless it feels that launching an overcommitted guest and showing an 
error to the user is preferable to not allowing the user to overcommit 
in the first place.

If you'll review my patent application for pci slot allocation, you'll 
see the following line:

   slot_nr = nb_allocated_slots++; /* Allocate pci slot */

while there is a lot of complicated setup code before that (see the 
prior art section as well), I believe licensees could well implement the 
algorithm in two short months, including testing.

-- 
Do not meddle in the internals of kernels, for they are subtle and quick to panic.

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-17  9:03                                                                               ` Avi Kivity
@ 2009-06-17  9:18                                                                                 ` Mark McLoughlin
  -1 siblings, 0 replies; 457+ messages in thread
From: Mark McLoughlin @ 2009-06-17  9:18 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Jamie Lokier, Carsten Otte, dlaor, kvm, Michael S. Tsirkin,
	Glauber Costa, Rusty Russell, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook

On Wed, 2009-06-17 at 12:03 +0300, Avi Kivity wrote:
> On 06/17/2009 11:33 AM, Mark McLoughlin wrote:
> >> I particularly don't like the idea of arcane machine-dependent slot
> >> allocation knowledge living in libvirt, because it needs to be in Qemu
> >> anyway for non-libvirt users.  No point in having two implementations
> >> of something tricky and likely to have machine quirks, if one will do.
> >
> > Indeed.
> 
> I don't understand this.

Take note of the "arcane machine-dependent slot allocation knowledge"
bit.

If the algorithm in for management apps is as simple as "query qemu for
available slots and sequentially allocate slots", then that's perfectly
fine.

If management apps need to hard-code which slots are available on
different targets and different qemu versions, or restrictions on which
devices can use which slots, or knowledge that some devices can be
multi-function, or ... anything like that is just lame.

Cheers,
Mark.


^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
@ 2009-06-17  9:18                                                                                 ` Mark McLoughlin
  0 siblings, 0 replies; 457+ messages in thread
From: Mark McLoughlin @ 2009-06-17  9:18 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Carsten Otte, Rusty Russell, kvm, Michael S. Tsirkin,
	Glauber Costa, dlaor, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Paul Brook

On Wed, 2009-06-17 at 12:03 +0300, Avi Kivity wrote:
> On 06/17/2009 11:33 AM, Mark McLoughlin wrote:
> >> I particularly don't like the idea of arcane machine-dependent slot
> >> allocation knowledge living in libvirt, because it needs to be in Qemu
> >> anyway for non-libvirt users.  No point in having two implementations
> >> of something tricky and likely to have machine quirks, if one will do.
> >
> > Indeed.
> 
> I don't understand this.

Take note of the "arcane machine-dependent slot allocation knowledge"
bit.

If the algorithm in for management apps is as simple as "query qemu for
available slots and sequentially allocate slots", then that's perfectly
fine.

If management apps need to hard-code which slots are available on
different targets and different qemu versions, or restrictions on which
devices can use which slots, or knowledge that some devices can be
multi-function, or ... anything like that is just lame.

Cheers,
Mark.

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-17  9:03                                                                               ` Avi Kivity
  (?)
  (?)
@ 2009-06-17  9:18                                                                               ` Mark McLoughlin
  -1 siblings, 0 replies; 457+ messages in thread
From: Mark McLoughlin @ 2009-06-17  9:18 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Carsten Otte, kvm, Michael S. Tsirkin, Glauber Costa,
	Jamie Lokier, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Paul Brook

On Wed, 2009-06-17 at 12:03 +0300, Avi Kivity wrote:
> On 06/17/2009 11:33 AM, Mark McLoughlin wrote:
> >> I particularly don't like the idea of arcane machine-dependent slot
> >> allocation knowledge living in libvirt, because it needs to be in Qemu
> >> anyway for non-libvirt users.  No point in having two implementations
> >> of something tricky and likely to have machine quirks, if one will do.
> >
> > Indeed.
> 
> I don't understand this.

Take note of the "arcane machine-dependent slot allocation knowledge"
bit.

If the algorithm in for management apps is as simple as "query qemu for
available slots and sequentially allocate slots", then that's perfectly
fine.

If management apps need to hard-code which slots are available on
different targets and different qemu versions, or restrictions on which
devices can use which slots, or knowledge that some devices can be
multi-function, or ... anything like that is just lame.

Cheers,
Mark.

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-17  9:18                                                                                 ` Mark McLoughlin
@ 2009-06-17  9:26                                                                                   ` Avi Kivity
  -1 siblings, 0 replies; 457+ messages in thread
From: Avi Kivity @ 2009-06-17  9:26 UTC (permalink / raw)
  To: Mark McLoughlin
  Cc: Jamie Lokier, Carsten Otte, dlaor, kvm, Michael S. Tsirkin,
	Glauber Costa, Rusty Russell, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook

On 06/17/2009 12:18 PM, Mark McLoughlin wrote:
> On Wed, 2009-06-17 at 12:03 +0300, Avi Kivity wrote:
>    
>> On 06/17/2009 11:33 AM, Mark McLoughlin wrote:
>>      
>>>> I particularly don't like the idea of arcane machine-dependent slot
>>>> allocation knowledge living in libvirt, because it needs to be in Qemu
>>>> anyway for non-libvirt users.  No point in having two implementations
>>>> of something tricky and likely to have machine quirks, if one will do.
>>>>          
>>> Indeed.
>>>        
>> I don't understand this.
>>      
>
> Take note of the "arcane machine-dependent slot allocation knowledge"
> bit.
>
> If the algorithm in for management apps is as simple as "query qemu for
> available slots and sequentially allocate slots", then that's perfectly
> fine.
>    

That's the thinking.

> If management apps need to hard-code which slots are available on
> different targets and different qemu versions, or restrictions on which
> devices can use which slots, or knowledge that some devices can be
> multi-function, or ... anything like that is just lame.
>    

You can't abstract these things away.  If you can't put a NIC in slot 4, 
and you have 7 slots, then you cannot have 7 NICs.  Having qemu allocate 
the slot numbers does not absolve management from knowing this 
limitation and preventing the user from creating a machine with 7 slots.

Likewise, management will have to know which devices are multi-function, 
since that affects their hotpluggability.  Ditto if some slot if faster 
than others, if you want to make use of this information you have to let 
the upper layers know.

It could be done using an elaborate machine description that qemu 
exposes to management coupled with a constraint solver that optimizes 
the machine layout according to user specifications and hardware 
limitations.  Or we could take the view that real life is not perfect 
(especially where computers are involved), add some machine specific 
knowledge, and spend the rest of the summer at the beach.

-- 
Do not meddle in the internals of kernels, for they are subtle and quick to panic.


^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
@ 2009-06-17  9:26                                                                                   ` Avi Kivity
  0 siblings, 0 replies; 457+ messages in thread
From: Avi Kivity @ 2009-06-17  9:26 UTC (permalink / raw)
  To: Mark McLoughlin
  Cc: Carsten Otte, Rusty Russell, kvm, Michael S. Tsirkin,
	Glauber Costa, dlaor, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Paul Brook

On 06/17/2009 12:18 PM, Mark McLoughlin wrote:
> On Wed, 2009-06-17 at 12:03 +0300, Avi Kivity wrote:
>    
>> On 06/17/2009 11:33 AM, Mark McLoughlin wrote:
>>      
>>>> I particularly don't like the idea of arcane machine-dependent slot
>>>> allocation knowledge living in libvirt, because it needs to be in Qemu
>>>> anyway for non-libvirt users.  No point in having two implementations
>>>> of something tricky and likely to have machine quirks, if one will do.
>>>>          
>>> Indeed.
>>>        
>> I don't understand this.
>>      
>
> Take note of the "arcane machine-dependent slot allocation knowledge"
> bit.
>
> If the algorithm in for management apps is as simple as "query qemu for
> available slots and sequentially allocate slots", then that's perfectly
> fine.
>    

That's the thinking.

> If management apps need to hard-code which slots are available on
> different targets and different qemu versions, or restrictions on which
> devices can use which slots, or knowledge that some devices can be
> multi-function, or ... anything like that is just lame.
>    

You can't abstract these things away.  If you can't put a NIC in slot 4, 
and you have 7 slots, then you cannot have 7 NICs.  Having qemu allocate 
the slot numbers does not absolve management from knowing this 
limitation and preventing the user from creating a machine with 7 slots.

Likewise, management will have to know which devices are multi-function, 
since that affects their hotpluggability.  Ditto if some slot if faster 
than others, if you want to make use of this information you have to let 
the upper layers know.

It could be done using an elaborate machine description that qemu 
exposes to management coupled with a constraint solver that optimizes 
the machine layout according to user specifications and hardware 
limitations.  Or we could take the view that real life is not perfect 
(especially where computers are involved), add some machine specific 
knowledge, and spend the rest of the summer at the beach.

-- 
Do not meddle in the internals of kernels, for they are subtle and quick to panic.

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-17  9:18                                                                                 ` Mark McLoughlin
  (?)
  (?)
@ 2009-06-17  9:26                                                                                 ` Avi Kivity
  -1 siblings, 0 replies; 457+ messages in thread
From: Avi Kivity @ 2009-06-17  9:26 UTC (permalink / raw)
  To: Mark McLoughlin
  Cc: Carsten Otte, kvm, Michael S. Tsirkin, Glauber Costa,
	Jamie Lokier, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Paul Brook

On 06/17/2009 12:18 PM, Mark McLoughlin wrote:
> On Wed, 2009-06-17 at 12:03 +0300, Avi Kivity wrote:
>    
>> On 06/17/2009 11:33 AM, Mark McLoughlin wrote:
>>      
>>>> I particularly don't like the idea of arcane machine-dependent slot
>>>> allocation knowledge living in libvirt, because it needs to be in Qemu
>>>> anyway for non-libvirt users.  No point in having two implementations
>>>> of something tricky and likely to have machine quirks, if one will do.
>>>>          
>>> Indeed.
>>>        
>> I don't understand this.
>>      
>
> Take note of the "arcane machine-dependent slot allocation knowledge"
> bit.
>
> If the algorithm in for management apps is as simple as "query qemu for
> available slots and sequentially allocate slots", then that's perfectly
> fine.
>    

That's the thinking.

> If management apps need to hard-code which slots are available on
> different targets and different qemu versions, or restrictions on which
> devices can use which slots, or knowledge that some devices can be
> multi-function, or ... anything like that is just lame.
>    

You can't abstract these things away.  If you can't put a NIC in slot 4, 
and you have 7 slots, then you cannot have 7 NICs.  Having qemu allocate 
the slot numbers does not absolve management from knowing this 
limitation and preventing the user from creating a machine with 7 slots.

Likewise, management will have to know which devices are multi-function, 
since that affects their hotpluggability.  Ditto if some slot if faster 
than others, if you want to make use of this information you have to let 
the upper layers know.

It could be done using an elaborate machine description that qemu 
exposes to management coupled with a constraint solver that optimizes 
the machine layout according to user specifications and hardware 
limitations.  Or we could take the view that real life is not perfect 
(especially where computers are involved), add some machine specific 
knowledge, and spend the rest of the summer at the beach.

-- 
Do not meddle in the internals of kernels, for they are subtle and quick to panic.

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-17  6:38                                                                           ` Avi Kivity
@ 2009-06-17 11:51                                                                             ` Jamie Lokier
  -1 siblings, 0 replies; 457+ messages in thread
From: Jamie Lokier @ 2009-06-17 11:51 UTC (permalink / raw)
  To: Avi Kivity
  Cc: dlaor, Carsten Otte, kvm, Michael S. Tsirkin, Glauber Costa,
	Rusty Russell, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Paul Brook, Mark McLoughlin

Avi Kivity wrote:
> On 06/16/2009 09:32 PM, Jamie Lokier wrote:
> >Avi Kivity wrote:
> >   
> >>Another issue is enumeration.  Guests will present their devices in the
> >>order they find them on the pci bus (of course enumeration is guest
> >>specific).  So if I have 2 virtio controllers the only way I can
> >>distinguish between them is using their pci slots.
> >
> >virtio controllers really should have a user-suppliable string or UUID
> >to identify them to the guest.  Don't they?
> 
> virtio controllers don't exist.  When they do, they may have a UUID or 
> not, but in either case guest infrastructure is in place for reporting 
> the PCI slot, not the UUID.
> 
> virtio disks do have a UUID.  I don't think older versions of Windows 
> will use it though, so if you reorder your slots you'll see your drive 
> letters change.  Same with Linux if you don't use udev by-uuid rules.

I guess I meant virtio disks, so that's ok.

-- Jamie

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
@ 2009-06-17 11:51                                                                             ` Jamie Lokier
  0 siblings, 0 replies; 457+ messages in thread
From: Jamie Lokier @ 2009-06-17 11:51 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Carsten Otte, Rusty Russell, kvm, Michael S. Tsirkin,
	Glauber Costa, dlaor, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Paul Brook, Mark McLoughlin

Avi Kivity wrote:
> On 06/16/2009 09:32 PM, Jamie Lokier wrote:
> >Avi Kivity wrote:
> >   
> >>Another issue is enumeration.  Guests will present their devices in the
> >>order they find them on the pci bus (of course enumeration is guest
> >>specific).  So if I have 2 virtio controllers the only way I can
> >>distinguish between them is using their pci slots.
> >
> >virtio controllers really should have a user-suppliable string or UUID
> >to identify them to the guest.  Don't they?
> 
> virtio controllers don't exist.  When they do, they may have a UUID or 
> not, but in either case guest infrastructure is in place for reporting 
> the PCI slot, not the UUID.
> 
> virtio disks do have a UUID.  I don't think older versions of Windows 
> will use it though, so if you reorder your slots you'll see your drive 
> letters change.  Same with Linux if you don't use udev by-uuid rules.

I guess I meant virtio disks, so that's ok.

-- Jamie

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-17  6:38                                                                           ` Avi Kivity
  (?)
  (?)
@ 2009-06-17 11:51                                                                           ` Jamie Lokier
  -1 siblings, 0 replies; 457+ messages in thread
From: Jamie Lokier @ 2009-06-17 11:51 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Carsten Otte, kvm, Michael S. Tsirkin, Glauber Costa, qemu-devel,
	virtualization, Blue Swirl, Christian Borntraeger, Paul Brook,
	Mark McLoughlin

Avi Kivity wrote:
> On 06/16/2009 09:32 PM, Jamie Lokier wrote:
> >Avi Kivity wrote:
> >   
> >>Another issue is enumeration.  Guests will present their devices in the
> >>order they find them on the pci bus (of course enumeration is guest
> >>specific).  So if I have 2 virtio controllers the only way I can
> >>distinguish between them is using their pci slots.
> >
> >virtio controllers really should have a user-suppliable string or UUID
> >to identify them to the guest.  Don't they?
> 
> virtio controllers don't exist.  When they do, they may have a UUID or 
> not, but in either case guest infrastructure is in place for reporting 
> the PCI slot, not the UUID.
> 
> virtio disks do have a UUID.  I don't think older versions of Windows 
> will use it though, so if you reorder your slots you'll see your drive 
> letters change.  Same with Linux if you don't use udev by-uuid rules.

I guess I meant virtio disks, so that's ok.

-- Jamie

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-17  9:26                                                                                   ` Avi Kivity
@ 2009-06-17 11:58                                                                                     ` Jamie Lokier
  -1 siblings, 0 replies; 457+ messages in thread
From: Jamie Lokier @ 2009-06-17 11:58 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Mark McLoughlin, Carsten Otte, dlaor, kvm, Michael S. Tsirkin,
	Glauber Costa, Rusty Russell, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook

Avi Kivity wrote:
> >If management apps need to hard-code which slots are available on
> >different targets and different qemu versions, or restrictions on which
> >devices can use which slots, or knowledge that some devices can be
> >multi-function, or ... anything like that is just lame.
> >   
> 
> You can't abstract these things away.  If you can't put a NIC in slot 4, 
> and you have 7 slots, then you cannot have 7 NICs.  Having qemu allocate 
> the slot numbers does not absolve management from knowing this 
> limitation and preventing the user from creating a machine with 7 slots.
> 
> Likewise, management will have to know which devices are multi-function, 
> since that affects their hotpluggability.  Ditto if some slot if faster 
> than others, if you want to make use of this information you have to let 
> the upper layers know.
> 
> It could be done using an elaborate machine description that qemu 
> exposes to management coupled with a constraint solver that optimizes 
> the machine layout according to user specifications and hardware 
> limitations.  Or we could take the view that real life is not perfect 
> (especially where computers are involved), add some machine specific 
> knowledge, and spend the rest of the summer at the beach.

To be honest, an elaborate machine description is probably fine...

A fancy constraint solver is not required.  A simple one strikes me as
about as simple as what you'd hard-code anyway, but with fewer special
cases.

Note that the result can fail due to things like insufficient address
space for all the device BARs even when they _are_ in the right slots.
Especially if there are lots of slots, or bridges which can provide
unlimited slots.

That is arcane: device-dependent, CPU-dependent, machine-dependent,
RAM-size dependent (in a non-linear way), device-option-dependent and
probably QEMU-version-dependent too.

It would be nice if libvirt (et al) would prevent the user from
creating a VM with insufficient BAR space for that machine, but I'm
not sure how to do it sanely, without arcane knowledge getting about.

Maybe that idea of a .so shared by qemu and libvirt, to manipulate
device configurations, is a sane one after all.

-- Jamie

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
@ 2009-06-17 11:58                                                                                     ` Jamie Lokier
  0 siblings, 0 replies; 457+ messages in thread
From: Jamie Lokier @ 2009-06-17 11:58 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Mark McLoughlin, Rusty Russell, kvm, Michael S. Tsirkin,
	Glauber Costa, dlaor, qemu-devel, virtualization, Blue Swirl,
	Christian Borntraeger, Paul Brook, Carsten Otte

Avi Kivity wrote:
> >If management apps need to hard-code which slots are available on
> >different targets and different qemu versions, or restrictions on which
> >devices can use which slots, or knowledge that some devices can be
> >multi-function, or ... anything like that is just lame.
> >   
> 
> You can't abstract these things away.  If you can't put a NIC in slot 4, 
> and you have 7 slots, then you cannot have 7 NICs.  Having qemu allocate 
> the slot numbers does not absolve management from knowing this 
> limitation and preventing the user from creating a machine with 7 slots.
> 
> Likewise, management will have to know which devices are multi-function, 
> since that affects their hotpluggability.  Ditto if some slot if faster 
> than others, if you want to make use of this information you have to let 
> the upper layers know.
> 
> It could be done using an elaborate machine description that qemu 
> exposes to management coupled with a constraint solver that optimizes 
> the machine layout according to user specifications and hardware 
> limitations.  Or we could take the view that real life is not perfect 
> (especially where computers are involved), add some machine specific 
> knowledge, and spend the rest of the summer at the beach.

To be honest, an elaborate machine description is probably fine...

A fancy constraint solver is not required.  A simple one strikes me as
about as simple as what you'd hard-code anyway, but with fewer special
cases.

Note that the result can fail due to things like insufficient address
space for all the device BARs even when they _are_ in the right slots.
Especially if there are lots of slots, or bridges which can provide
unlimited slots.

That is arcane: device-dependent, CPU-dependent, machine-dependent,
RAM-size dependent (in a non-linear way), device-option-dependent and
probably QEMU-version-dependent too.

It would be nice if libvirt (et al) would prevent the user from
creating a VM with insufficient BAR space for that machine, but I'm
not sure how to do it sanely, without arcane knowledge getting about.

Maybe that idea of a .so shared by qemu and libvirt, to manipulate
device configurations, is a sane one after all.

-- Jamie

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-17  9:26                                                                                   ` Avi Kivity
  (?)
@ 2009-06-17 11:58                                                                                   ` Jamie Lokier
  -1 siblings, 0 replies; 457+ messages in thread
From: Jamie Lokier @ 2009-06-17 11:58 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Mark McLoughlin, kvm, Michael S. Tsirkin, Glauber Costa,
	qemu-devel, virtualization, Blue Swirl, Christian Borntraeger,
	Paul Brook, Carsten Otte

Avi Kivity wrote:
> >If management apps need to hard-code which slots are available on
> >different targets and different qemu versions, or restrictions on which
> >devices can use which slots, or knowledge that some devices can be
> >multi-function, or ... anything like that is just lame.
> >   
> 
> You can't abstract these things away.  If you can't put a NIC in slot 4, 
> and you have 7 slots, then you cannot have 7 NICs.  Having qemu allocate 
> the slot numbers does not absolve management from knowing this 
> limitation and preventing the user from creating a machine with 7 slots.
> 
> Likewise, management will have to know which devices are multi-function, 
> since that affects their hotpluggability.  Ditto if some slot if faster 
> than others, if you want to make use of this information you have to let 
> the upper layers know.
> 
> It could be done using an elaborate machine description that qemu 
> exposes to management coupled with a constraint solver that optimizes 
> the machine layout according to user specifications and hardware 
> limitations.  Or we could take the view that real life is not perfect 
> (especially where computers are involved), add some machine specific 
> knowledge, and spend the rest of the summer at the beach.

To be honest, an elaborate machine description is probably fine...

A fancy constraint solver is not required.  A simple one strikes me as
about as simple as what you'd hard-code anyway, but with fewer special
cases.

Note that the result can fail due to things like insufficient address
space for all the device BARs even when they _are_ in the right slots.
Especially if there are lots of slots, or bridges which can provide
unlimited slots.

That is arcane: device-dependent, CPU-dependent, machine-dependent,
RAM-size dependent (in a non-linear way), device-option-dependent and
probably QEMU-version-dependent too.

It would be nice if libvirt (et al) would prevent the user from
creating a VM with insufficient BAR space for that machine, but I'm
not sure how to do it sanely, without arcane knowledge getting about.

Maybe that idea of a .so shared by qemu and libvirt, to manipulate
device configurations, is a sane one after all.

-- Jamie

^ permalink raw reply	[flat|nested] 457+ messages in thread

* RE: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
  2009-06-16 12:28                                                                       ` Avi Kivity
                                                                                         ` (2 preceding siblings ...)
  (?)
@ 2009-06-24  8:04                                                                       ` Dietmar Maurer
  -1 siblings, 0 replies; 457+ messages in thread
From: Dietmar Maurer @ 2009-06-24  8:04 UTC (permalink / raw)
  To: qemu-devel

Hi all,

On real hardware, I usually do not care about PCI address - instead I
use slot numbers. A standard main board has about 5 pci slots where you
can plug in cards. So if a card is damaged, I usually say something like
'please replace the card in the first slot'.

Can't we simply define a 'slot to pci address' mapping for each
platform? For example, we reserve some pci addresses for standard
hardware like ide and scsi controller, and use the rest for the slot
mapping:

pci addr 1 -> reserved 
pci addr 2 -> reserved for ide
pci addr 3 -> reserved for scsi
pci addr 4 -> reserved for usb hub
...

pci addr 8 -> slot 7
pci addr 9 -> slot 6
pci addr 10 -> slot 5
pci addr 11 -> slot 4
pci addr 12 -> slot 3
pci addr 13 -> slot 2
pci addr 14 -> slot 1
pci addr 15 -> slot 0

The management application can then use those slot numbers to place a
device into a specific bus position.

 - net nic,model=e1000,slot=0

Or is that too inflexible?

- Dietmar

^ permalink raw reply	[flat|nested] 457+ messages in thread

* [Qemu-devel] [PATCH 0/3] Change virtio blk/console PCI classes and introduce compat machine type [was Re: Configuration vs. compat hints]
  2009-06-15 18:12                                                                   ` Anthony Liguori
                                                                                     ` (4 preceding siblings ...)
  (?)
@ 2009-07-07 11:08                                                                   ` Mark McLoughlin
  2009-07-07 11:09                                                                     ` [Qemu-devel] [PATCH 1/3] Change default PCI class of virtio-blk to PCI_CLASS_STORAGE_SCSI Mark McLoughlin
  -1 siblings, 1 reply; 457+ messages in thread
From: Mark McLoughlin @ 2009-07-07 11:08 UTC (permalink / raw)
  To: Anthony Liguori; +Cc: qemu-devel

On Mon, 2009-06-15 at 13:12 -0500, Anthony Liguori wrote:

> 2) Whenever the default machine type changes in a guest-visible way, 
> introduce a new machine type
>   - Use explicit versions in name: pc-v1, pc-v2 or use more descriptive 
> names pc-with-usb
>   - Easily transitions to device config files

Okay, here's a small series of patches which changes the virtio block
and console machine types and introduces a pc-0-10 machine type which
uses the old PCI classes.

Cheers,
Mark.

^ permalink raw reply	[flat|nested] 457+ messages in thread

* [Qemu-devel] [PATCH 1/3] Change default PCI class of virtio-blk to PCI_CLASS_STORAGE_SCSI
  2009-07-07 11:08                                                                   ` [Qemu-devel] [PATCH 0/3] Change virtio blk/console PCI classes and introduce compat machine type [was Re: Configuration vs. compat hints] Mark McLoughlin
@ 2009-07-07 11:09                                                                     ` Mark McLoughlin
  2009-07-07 11:09                                                                       ` [Qemu-devel] [PATCH 2/3] Change default PCI class of virtio-console to PCI_CLASS_SERIAL_OTHER Mark McLoughlin
  0 siblings, 1 reply; 457+ messages in thread
From: Mark McLoughlin @ 2009-07-07 11:09 UTC (permalink / raw)
  To: Anthony Liguori; +Cc: Dor Laor, qemu-devel

Windows virtio driver cannot pass DTM (certification) tests while the
storage class is PCI_CLASS_STORAGE_UNKNOWN.

A new qdev type is introduced to allow devices using the old class
to be created for compatibility with qemu-0.10.x.

Reported-by: Dor Laor <dlaor@redhat.com>
Signed-off-by: Mark McLoughlin <markmc@redhat.com>
---
 hw/virtio-pci.c |   20 +++++++++++++++++---
 1 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c
index f7da503..93ee5e1 100644
--- a/hw/virtio-pci.c
+++ b/hw/virtio-pci.c
@@ -414,7 +414,8 @@ static void virtio_init_pci(VirtIOPCIProxy *proxy, VirtIODevice *vdev,
     virtio_bind_device(vdev, &virtio_pci_bindings, proxy);
 }
 
-static void virtio_blk_init_pci(PCIDevice *pci_dev)
+static void virtio_blk_init_pci_with_class(PCIDevice *pci_dev,
+                                           uint16_t class_code)
 {
     VirtIOPCIProxy *proxy = DO_UPCAST(VirtIOPCIProxy, pci_dev, pci_dev);
     VirtIODevice *vdev;
@@ -423,8 +424,17 @@ static void virtio_blk_init_pci(PCIDevice *pci_dev)
     virtio_init_pci(proxy, vdev,
                     PCI_VENDOR_ID_REDHAT_QUMRANET,
                     PCI_DEVICE_ID_VIRTIO_BLOCK,
-                    PCI_CLASS_STORAGE_OTHER,
-                    0x00);
+                    class_code, 0x00);
+}
+
+static void virtio_blk_init_pci(PCIDevice *pci_dev)
+{
+    virtio_blk_init_pci_with_class(pci_dev, PCI_CLASS_STORAGE_SCSI);
+}
+
+static void virtio_blk_init_pci_0_10(PCIDevice *pci_dev)
+{
+    virtio_blk_init_pci_with_class(pci_dev, PCI_CLASS_STORAGE_OTHER);
 }
 
 static void virtio_console_init_pci(PCIDevice *pci_dev)
@@ -476,6 +486,10 @@ static void virtio_pci_register_devices(void)
                       virtio_console_init_pci);
     pci_qdev_register("virtio-balloon-pci", sizeof(VirtIOPCIProxy),
                       virtio_balloon_init_pci);
+
+    /* For compatibility with 0.10 */
+    pci_qdev_register("virtio-blk-pci-0-10", sizeof(VirtIOPCIProxy),
+                      virtio_blk_init_pci_0_10);
 }
 
 device_init(virtio_pci_register_devices)
-- 
1.6.2.5

^ permalink raw reply related	[flat|nested] 457+ messages in thread

* [Qemu-devel] [PATCH 2/3] Change default PCI class of virtio-console to PCI_CLASS_SERIAL_OTHER
  2009-07-07 11:09                                                                     ` [Qemu-devel] [PATCH 1/3] Change default PCI class of virtio-blk to PCI_CLASS_STORAGE_SCSI Mark McLoughlin
@ 2009-07-07 11:09                                                                       ` Mark McLoughlin
  2009-07-07 11:10                                                                         ` [Qemu-devel] [PATCH 3/3] Add a pc-0-10 machine type for compatibility with 0.10.x Mark McLoughlin
  2009-07-15 11:27                                                                         ` [Qemu-devel] [PATCH 2/3] Change default PCI class of virtio-console to PCI_CLASS_SERIAL_OTHER Amit Shah
  0 siblings, 2 replies; 457+ messages in thread
From: Mark McLoughlin @ 2009-07-07 11:09 UTC (permalink / raw)
  To: Anthony Liguori; +Cc: Adam Jackson, qemu-devel

We're using PCI_CLASS_DISPLAY_OTHER now, but qemu-kvm.git is using
PCI_CLASS_OTHERS because:

  "As a PCI_CLASS_DISPLAY_OTHER, it reduces primary display somehow on
   Windows XP (possibly Windows disables acceleration since it fails
   to find a driver)."

While this is valid, many versions of X will get confused by it.
Class major number of 0 gets treated as a possibly prehistoric VGA
device, and then the autoconfig logic gets confused trying to figure
out whether the virtio console or the pv vga device are the real VGA.

We should really set a proper class ID. 0x0780 (serial / other) seems
most appropriate. This shouldn't require any kernel changes, the
modalias for virtio looks like:

  alias:          pci:v00001AF4d*sv*sd*bc*sc*i*

so won't care what the base class or subclass are.

It shows up in the guest as:

  00:05.0 Communication controller: Qumranet, Inc. Virtio console

A new qdev type is introduced to allow devices using the old class
to be created for compatibility with qemu-0.10.x.

Reported-by: Adam Jackson <ajax@redhat.com>
Signed-off-by: Mark McLoughlin <markmc@redhat.com>
---
 hw/pci_ids.h    |    3 +++
 hw/virtio-pci.c |   18 +++++++++++++++---
 2 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/hw/pci_ids.h b/hw/pci_ids.h
index 3afe674..2fe60ee 100644
--- a/hw/pci_ids.h
+++ b/hw/pci_ids.h
@@ -35,6 +35,9 @@
 #define PCI_CLASS_BRIDGE_PCI             0x0604
 #define PCI_CLASS_BRIDGE_OTHER           0x0680
 
+#define PCI_CLASS_SERIAL_OTHER           0x0780
+
+#define PCI_CLASS_PROCESSOR_CO           0x0b40
 #define PCI_CLASS_COMMUNICATION_OTHER    0x0780
 
 #define PCI_CLASS_PROCESSOR_CO           0x0b40
diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c
index 93ee5e1..6d3420d 100644
--- a/hw/virtio-pci.c
+++ b/hw/virtio-pci.c
@@ -437,7 +437,8 @@ static void virtio_blk_init_pci_0_10(PCIDevice *pci_dev)
     virtio_blk_init_pci_with_class(pci_dev, PCI_CLASS_STORAGE_OTHER);
 }
 
-static void virtio_console_init_pci(PCIDevice *pci_dev)
+static void virtio_console_init_pci_with_class(PCIDevice *pci_dev,
+                                               uint16_t class_code)
 {
     VirtIOPCIProxy *proxy = DO_UPCAST(VirtIOPCIProxy, pci_dev, pci_dev);
     VirtIODevice *vdev;
@@ -446,8 +447,17 @@ static void virtio_console_init_pci(PCIDevice *pci_dev)
     virtio_init_pci(proxy, vdev,
                     PCI_VENDOR_ID_REDHAT_QUMRANET,
                     PCI_DEVICE_ID_VIRTIO_CONSOLE,
-                    PCI_CLASS_DISPLAY_OTHER,
-                    0x00);
+                    class_code, 0x00);
+}
+
+static void virtio_console_init_pci(PCIDevice *pci_dev)
+{
+    virtio_console_init_pci_with_class(pci_dev, PCI_CLASS_SERIAL_OTHER);
+}
+
+static void virtio_console_init_pci_0_10(PCIDevice *pci_dev)
+{
+    virtio_console_init_pci_with_class(pci_dev, PCI_CLASS_DISPLAY_OTHER);
 }
 
 static void virtio_net_init_pci(PCIDevice *pci_dev)
@@ -490,6 +500,8 @@ static void virtio_pci_register_devices(void)
     /* For compatibility with 0.10 */
     pci_qdev_register("virtio-blk-pci-0-10", sizeof(VirtIOPCIProxy),
                       virtio_blk_init_pci_0_10);
+    pci_qdev_register("virtio-console-pci-0-10", sizeof(VirtIOPCIProxy),
+                      virtio_console_init_pci_0_10);
 }
 
 device_init(virtio_pci_register_devices)
-- 
1.6.2.5

^ permalink raw reply related	[flat|nested] 457+ messages in thread

* [Qemu-devel] [PATCH 3/3] Add a pc-0-10 machine type for compatibility with 0.10.x
  2009-07-07 11:09                                                                       ` [Qemu-devel] [PATCH 2/3] Change default PCI class of virtio-console to PCI_CLASS_SERIAL_OTHER Mark McLoughlin
@ 2009-07-07 11:10                                                                         ` Mark McLoughlin
  2009-07-07 12:01                                                                           ` Avi Kivity
  2009-07-15 11:27                                                                         ` [Qemu-devel] [PATCH 2/3] Change default PCI class of virtio-console to PCI_CLASS_SERIAL_OTHER Amit Shah
  1 sibling, 1 reply; 457+ messages in thread
From: Mark McLoughlin @ 2009-07-07 11:10 UTC (permalink / raw)
  To: Anthony Liguori; +Cc: qemu-devel

Add a pc-0-10 machine type to allow a pc machine to be created with
virtio block and console devices compatibilty with qemu-0.10.x.

Signed-off-by: Mark McLoughlin <markmc@redhat.com>
---
 hw/pc.c |   46 ++++++++++++++++++++++++++++++++++++++++------
 1 files changed, 40 insertions(+), 6 deletions(-)

diff --git a/hw/pc.c b/hw/pc.c
index 553ba5c..9025f78 100644
--- a/hw/pc.c
+++ b/hw/pc.c
@@ -1084,9 +1084,12 @@ static CPUState *pc_new_cpu(const char *cpu_model)
 /* PC hardware initialisation */
 static void pc_init1(ram_addr_t ram_size,
                      const char *boot_device,
-                     const char *kernel_filename, const char *kernel_cmdline,
+                     const char *kernel_filename,
+                     const char *kernel_cmdline,
                      const char *initrd_filename,
-                     int pci_enabled, const char *cpu_model)
+                     const char *cpu_model,
+                     int pci_enabled,
+                     int compat_0_10)
 {
     char *filename;
     int ret, linux_boot, i;
@@ -1104,6 +1107,7 @@ static void pc_init1(ram_addr_t ram_size,
     BlockDriverState *fd[MAX_FD];
     int using_vga = cirrus_vga_enabled || std_vga_enabled || vmsvga_enabled;
     void *fw_cfg;
+    const char *virtio_blk_name, *virtio_console_name;
 
     if (ram_size >= 0xe0000000 ) {
         above_4g_mem_size = ram_size - 0xe0000000;
@@ -1394,13 +1398,21 @@ static void pc_init1(ram_addr_t ram_size,
         }
     }
 
+    virtio_blk_name = "virtio-blk-pci";
+    virtio_console_name = "virtio-console-pci";
+
+    if (compat_0_10) {
+        virtio_blk_name = "virtio-blk-pci-0-10";
+        virtio_console_name = "virtio-console-pci-0-10";
+    }
+
     /* Add virtio block devices */
     if (pci_enabled) {
         int index;
         int unit_id = 0;
 
         while ((index = drive_get_index(IF_VIRTIO, 0, unit_id)) != -1) {
-            pci_dev = pci_create("virtio-blk-pci",
+            pci_dev = pci_create(virtio_blk_name,
                                  drives_table[index].devaddr);
             qdev_init(&pci_dev->qdev);
             unit_id++;
@@ -1417,7 +1429,7 @@ static void pc_init1(ram_addr_t ram_size,
     if (pci_enabled) {
         for(i = 0; i < MAX_VIRTIO_CONSOLES; i++) {
             if (virtcon_hds[i]) {
-                pci_create_simple(pci_bus, -1, "virtio-console-pci");
+                pci_create_simple(pci_bus, -1, virtio_console_name);
             }
         }
     }
@@ -1432,7 +1444,7 @@ static void pc_init_pci(ram_addr_t ram_size,
 {
     pc_init1(ram_size, boot_device,
              kernel_filename, kernel_cmdline,
-             initrd_filename, 1, cpu_model);
+             initrd_filename, cpu_model, 1, 0);
 }
 
 static void pc_init_isa(ram_addr_t ram_size,
@@ -1444,7 +1456,19 @@ static void pc_init_isa(ram_addr_t ram_size,
 {
     pc_init1(ram_size, boot_device,
              kernel_filename, kernel_cmdline,
-             initrd_filename, 0, cpu_model);
+             initrd_filename, cpu_model, 0, 0);
+}
+
+static void pc_init_pci_0_10(ram_addr_t ram_size,
+                             const char *boot_device,
+                             const char *kernel_filename,
+                             const char *kernel_cmdline,
+                             const char *initrd_filename,
+                             const char *cpu_model)
+{
+    pc_init1(ram_size, boot_device,
+             kernel_filename, kernel_cmdline,
+             initrd_filename, cpu_model, 1, 1);
 }
 
 /* set CMOS shutdown status register (index 0xF) as S3_resume(0xFE)
@@ -1470,10 +1494,20 @@ static QEMUMachine isapc_machine = {
     .max_cpus = 1,
 };
 
+static QEMUMachine pc_0_10_machine = {
+    .name = "pc-0-10",
+    .desc = "Standard PC compatibile with qemu 0.10.x",
+    .init = pc_init_pci_0_10,
+    .max_cpus = 255,
+};
+
 static void pc_machine_init(void)
 {
     qemu_register_machine(&pc_machine);
     qemu_register_machine(&isapc_machine);
+
+    /* For compatibility with 0.10.x */
+    qemu_register_machine(&pc_0_10_machine);
 }
 
 machine_init(pc_machine_init);
-- 
1.6.2.5

^ permalink raw reply related	[flat|nested] 457+ messages in thread

* Re: [Qemu-devel] [PATCH 3/3] Add a pc-0-10 machine type for compatibility with 0.10.x
  2009-07-07 11:10                                                                         ` [Qemu-devel] [PATCH 3/3] Add a pc-0-10 machine type for compatibility with 0.10.x Mark McLoughlin
@ 2009-07-07 12:01                                                                           ` Avi Kivity
  2009-07-08 10:46                                                                             ` Mark McLoughlin
  0 siblings, 1 reply; 457+ messages in thread
From: Avi Kivity @ 2009-07-07 12:01 UTC (permalink / raw)
  To: Mark McLoughlin; +Cc: qemu-devel

On 07/07/2009 02:10 PM, Mark McLoughlin wrote:
> Add a pc-0-10 machine type to allow a pc machine to be created with
> virtio block and console devices compatibilty with qemu-0.10.x.
>
> Signed-off-by: Mark McLoughlin<markmc@redhat.com>
> ---
>   hw/pc.c |   46 ++++++++++++++++++++++++++++++++++++++++------
>   1 files changed, 40 insertions(+), 6 deletions(-)
>
> diff --git a/hw/pc.c b/hw/pc.c
> index 553ba5c..9025f78 100644
> --- a/hw/pc.c
> +++ b/hw/pc.c
> @@ -1084,9 +1084,12 @@ static CPUState *pc_new_cpu(const char *cpu_model)
>   /* PC hardware initialisation */
>   static void pc_init1(ram_addr_t ram_size,
>                        const char *boot_device,
> -                     const char *kernel_filename, const char *kernel_cmdline,
> +                     const char *kernel_filename,
> +                     const char *kernel_cmdline,
>                        const char *initrd_filename,
> -                     int pci_enabled, const char *cpu_model)
> +                     const char *cpu_model,
> +                     int pci_enabled,
> +                     int compat_0_10)
>    


compat_level ( == COMPAT_DEFAULT, COMPAT_0_10 ).

> +static QEMUMachine pc_0_10_machine = {
> +    .name = "pc-0-10",
> +    .desc = "Standard PC compatibile with qemu 0.10.x",
>    

"compatible"

> +    .init = pc_init_pci_0_10,
> +    .max_cpus = 255,
>    

.compat_level = COMPAT_0_10,

> +};
> +
>    

-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: [Qemu-devel] [PATCH 3/3] Add a pc-0-10 machine type for compatibility with 0.10.x
  2009-07-07 12:01                                                                           ` Avi Kivity
@ 2009-07-08 10:46                                                                             ` Mark McLoughlin
  2009-07-08 10:48                                                                               ` [Qemu-devel] [PATCH 3/3 v2] " Mark McLoughlin
  0 siblings, 1 reply; 457+ messages in thread
From: Mark McLoughlin @ 2009-07-08 10:46 UTC (permalink / raw)
  To: Avi Kivity; +Cc: qemu-devel

On Tue, 2009-07-07 at 15:01 +0300, Avi Kivity wrote:
> On 07/07/2009 02:10 PM, Mark McLoughlin wrote:
> > Add a pc-0-10 machine type to allow a pc machine to be created with
> > virtio block and console devices compatibilty with qemu-0.10.x.
> >
> > Signed-off-by: Mark McLoughlin<markmc@redhat.com>
> > ---
> >   hw/pc.c |   46 ++++++++++++++++++++++++++++++++++++++++------
> >   1 files changed, 40 insertions(+), 6 deletions(-)
> >
> > diff --git a/hw/pc.c b/hw/pc.c
> > index 553ba5c..9025f78 100644
> > --- a/hw/pc.c
> > +++ b/hw/pc.c
> > @@ -1084,9 +1084,12 @@ static CPUState *pc_new_cpu(const char *cpu_model)
> >   /* PC hardware initialisation */
> >   static void pc_init1(ram_addr_t ram_size,
> >                        const char *boot_device,
> > -                     const char *kernel_filename, const char *kernel_cmdline,
> > +                     const char *kernel_filename,
> > +                     const char *kernel_cmdline,
> >                        const char *initrd_filename,
> > -                     int pci_enabled, const char *cpu_model)
> > +                     const char *cpu_model,
> > +                     int pci_enabled,
> > +                     int compat_0_10)
> >    
> 
> 
> compat_level ( == COMPAT_DEFAULT, COMPAT_0_10 ).
> 
> > +static QEMUMachine pc_0_10_machine = {
> > +    .name = "pc-0-10",
> > +    .desc = "Standard PC compatibile with qemu 0.10.x",
> >    
> 
> "compatible"
> 
> > +    .init = pc_init_pci_0_10,
> > +    .max_cpus = 255,
> >    
> 
> .compat_level = COMPAT_0_10,

Yep, this would be nice.

To do it, I'd add a machine_compat_level enum member to QEMUMachine and
pass a QEMUMachine pointer to QEMUMachineInitFunc.

Happy to do that, and fix up the >50 affected machine types, but before
going to that bother I'd prefer to first get some indication that the
general approach would be accepted :-)

Following up with a slightly better patch, but still confined to hw/pc.c

Cheers,
Mark.

^ permalink raw reply	[flat|nested] 457+ messages in thread

* [Qemu-devel] [PATCH 3/3 v2] Add a pc-0-10 machine type for compatibility with 0.10.x
  2009-07-08 10:46                                                                             ` Mark McLoughlin
@ 2009-07-08 10:48                                                                               ` Mark McLoughlin
  2009-07-08 13:00                                                                                 ` Gerd Hoffmann
  0 siblings, 1 reply; 457+ messages in thread
From: Mark McLoughlin @ 2009-07-08 10:48 UTC (permalink / raw)
  To: Avi Kivity; +Cc: qemu-devel

Add a pc-0-10 machine type to allow a pc machine to be created with
virtio block and console devices compatibility with qemu-0.10.x.

Signed-off-by: Mark McLoughlin <markmc@redhat.com>
---
 hw/pc.c |   59 +++++++++++++++++++++++++++++++++++++++++++++++++++++------
 1 files changed, 53 insertions(+), 6 deletions(-)

diff --git a/hw/pc.c b/hw/pc.c
index 553ba5c..de23fa7 100644
--- a/hw/pc.c
+++ b/hw/pc.c
@@ -1081,12 +1081,20 @@ static CPUState *pc_new_cpu(const char *cpu_model)
     return env;
 }
 
+enum {
+    COMPAT_DEFAULT = 0,
+    COMPAT_0_10, /* compatible with qemu 0.10.x */
+};
+
 /* PC hardware initialisation */
 static void pc_init1(ram_addr_t ram_size,
                      const char *boot_device,
-                     const char *kernel_filename, const char *kernel_cmdline,
+                     const char *kernel_filename,
+                     const char *kernel_cmdline,
                      const char *initrd_filename,
-                     int pci_enabled, const char *cpu_model)
+                     const char *cpu_model,
+                     int pci_enabled,
+                     int compat_level)
 {
     char *filename;
     int ret, linux_boot, i;
@@ -1104,6 +1112,7 @@ static void pc_init1(ram_addr_t ram_size,
     BlockDriverState *fd[MAX_FD];
     int using_vga = cirrus_vga_enabled || std_vga_enabled || vmsvga_enabled;
     void *fw_cfg;
+    const char *virtio_blk_name, *virtio_console_name;
 
     if (ram_size >= 0xe0000000 ) {
         above_4g_mem_size = ram_size - 0xe0000000;
@@ -1394,13 +1403,26 @@ static void pc_init1(ram_addr_t ram_size,
         }
     }
 
+    switch (compat_level) {
+    case COMPAT_DEFAULT:
+    default:
+        virtio_blk_name = "virtio-blk-pci";
+        virtio_console_name = "virtio-console-pci";
+        break;
+
+    case COMPAT_0_10:
+        virtio_blk_name = "virtio-blk-pci-0-10";
+        virtio_console_name = "virtio-console-pci-0-10";
+        break;
+    }
+
     /* Add virtio block devices */
     if (pci_enabled) {
         int index;
         int unit_id = 0;
 
         while ((index = drive_get_index(IF_VIRTIO, 0, unit_id)) != -1) {
-            pci_dev = pci_create("virtio-blk-pci",
+            pci_dev = pci_create(virtio_blk_name,
                                  drives_table[index].devaddr);
             qdev_init(&pci_dev->qdev);
             unit_id++;
@@ -1417,7 +1439,7 @@ static void pc_init1(ram_addr_t ram_size,
     if (pci_enabled) {
         for(i = 0; i < MAX_VIRTIO_CONSOLES; i++) {
             if (virtcon_hds[i]) {
-                pci_create_simple(pci_bus, -1, "virtio-console-pci");
+                pci_create_simple(pci_bus, -1, virtio_console_name);
             }
         }
     }
@@ -1432,7 +1454,8 @@ static void pc_init_pci(ram_addr_t ram_size,
 {
     pc_init1(ram_size, boot_device,
              kernel_filename, kernel_cmdline,
-             initrd_filename, 1, cpu_model);
+             initrd_filename, cpu_model,
+             1, COMPAT_DEFAULT);
 }
 
 static void pc_init_isa(ram_addr_t ram_size,
@@ -1444,7 +1467,21 @@ static void pc_init_isa(ram_addr_t ram_size,
 {
     pc_init1(ram_size, boot_device,
              kernel_filename, kernel_cmdline,
-             initrd_filename, 0, cpu_model);
+             initrd_filename, cpu_model,
+             0, COMPAT_DEFAULT);
+}
+
+static void pc_init_pci_0_10(ram_addr_t ram_size,
+                             const char *boot_device,
+                             const char *kernel_filename,
+                             const char *kernel_cmdline,
+                             const char *initrd_filename,
+                             const char *cpu_model)
+{
+    pc_init1(ram_size, boot_device,
+             kernel_filename, kernel_cmdline,
+             initrd_filename, cpu_model,
+             1, COMPAT_0_10);
 }
 
 /* set CMOS shutdown status register (index 0xF) as S3_resume(0xFE)
@@ -1470,10 +1507,20 @@ static QEMUMachine isapc_machine = {
     .max_cpus = 1,
 };
 
+static QEMUMachine pc_0_10_machine = {
+    .name = "pc-0-10",
+    .desc = "Standard PC compatible with qemu 0.10.x",
+    .init = pc_init_pci_0_10,
+    .max_cpus = 255,
+};
+
 static void pc_machine_init(void)
 {
     qemu_register_machine(&pc_machine);
     qemu_register_machine(&isapc_machine);
+
+    /* For compatibility with 0.10.x */
+    qemu_register_machine(&pc_0_10_machine);
 }
 
 machine_init(pc_machine_init);
-- 
1.6.2.5

^ permalink raw reply related	[flat|nested] 457+ messages in thread

* Re: [Qemu-devel] [PATCH 3/3 v2] Add a pc-0-10 machine type for compatibility with 0.10.x
  2009-07-08 10:48                                                                               ` [Qemu-devel] [PATCH 3/3 v2] " Mark McLoughlin
@ 2009-07-08 13:00                                                                                 ` Gerd Hoffmann
  2009-07-08 13:44                                                                                   ` Anthony Liguori
  0 siblings, 1 reply; 457+ messages in thread
From: Gerd Hoffmann @ 2009-07-08 13:00 UTC (permalink / raw)
  To: Mark McLoughlin; +Cc: Avi Kivity, qemu-devel

On 07/08/09 12:48, Mark McLoughlin wrote:
> Add a pc-0-10 machine type to allow a pc machine to be created with
> virtio block and console devices compatibility with qemu-0.10.x.

> +enum {
> +    COMPAT_DEFAULT = 0,
> +    COMPAT_0_10, /* compatible with qemu 0.10.x */
> +};

Hmm.  Just digged in the virtio blk code.  Noticed some ppc machines 
have virtio block devices too.  So linking it to a machine type looks 
wrong to me.

Maybe better add a global '-compat' switch and a global compat enum 
where every driver can just look at?

Another question:  Is migrating from 0.11 in 0.10-compat mode to 0.10 
supposed to work?  If so we make virtio-net better turn off msi then.

cheers,
   Gerd

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: [Qemu-devel] [PATCH 3/3 v2] Add a pc-0-10 machine type for compatibility with 0.10.x
  2009-07-08 13:00                                                                                 ` Gerd Hoffmann
@ 2009-07-08 13:44                                                                                   ` Anthony Liguori
  2009-07-08 14:09                                                                                     ` Gerd Hoffmann
  0 siblings, 1 reply; 457+ messages in thread
From: Anthony Liguori @ 2009-07-08 13:44 UTC (permalink / raw)
  To: Gerd Hoffmann; +Cc: Mark McLoughlin, Avi Kivity, qemu-devel

Gerd Hoffmann wrote:
> On 07/08/09 12:48, Mark McLoughlin wrote:
>> Add a pc-0-10 machine type to allow a pc machine to be created with
>> virtio block and console devices compatibility with qemu-0.10.x.
>
>> +enum {
>> +    COMPAT_DEFAULT = 0,
>> +    COMPAT_0_10, /* compatible with qemu 0.10.x */
>> +};
>
> Hmm.  Just digged in the virtio blk code.  Noticed some ppc machines 
> have virtio block devices too.  So linking it to a machine type looks 
> wrong to me.

No, it's right.  A machine type is nothing more than a default device 
tree.  If we want to change what the default device tree is and wish to 
maintain compatibility, we need to introduce a new device tree so that 
the old one can be still accessed.

So basically, if we had device config files, it would look like:

/usr/share/qemu/configs/pc-0-10.dts
/usr/share/qemu/configs/pc-0-11.dts
/usr/share/qemu/configs/pc -> /usr/share/qemu/configs/pc-0-11.dts

For other board types, they would also need to version their device 
trees too.

> Another question:  Is migrating from 0.11 in 0.10-compat mode to 0.10 
> supposed to work?  If so we make virtio-net better turn off msi then.

If you mean live migration, no.  However, live migration from 0.10 to 
0.11 in 0.10-compat mode should work.

But from the guest perspective, 0.10-compat mode should look identical 
to 0.10 which means we do want to turn off msi.

Regards,

Anthony Liguori

> cheers,
>   Gerd
>
>
>

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: [Qemu-devel] [PATCH 3/3 v2] Add a pc-0-10 machine type for compatibility with 0.10.x
  2009-07-08 13:44                                                                                   ` Anthony Liguori
@ 2009-07-08 14:09                                                                                     ` Gerd Hoffmann
  2009-07-08 15:08                                                                                       ` Mark McLoughlin
  0 siblings, 1 reply; 457+ messages in thread
From: Gerd Hoffmann @ 2009-07-08 14:09 UTC (permalink / raw)
  To: Anthony Liguori; +Cc: Mark McLoughlin, Avi Kivity, qemu-devel

On 07/08/09 15:44, Anthony Liguori wrote:
> Gerd Hoffmann wrote:
>>> +enum {
>>> + COMPAT_DEFAULT = 0,
>>> + COMPAT_0_10, /* compatible with qemu 0.10.x */
>>> +};
>>
>> Hmm. Just digged in the virtio blk code. Noticed some ppc machines
>> have virtio block devices too. So linking it to a machine type looks
>> wrong to me.
>
> No, it's right. A machine type is nothing more than a default device
> tree.

Plain "-M pc" has no virtio blk device.

> So basically, if we had device config files, it would look like:
>
> /usr/share/qemu/configs/pc-0-10.dts
> /usr/share/qemu/configs/pc-0-11.dts
> /usr/share/qemu/configs/pc -> /usr/share/qemu/configs/pc-0-11.dts

Doesn't work.  Your virtio blk device isn't in there.

cheers,
   Gerd

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: [Qemu-devel] [PATCH 3/3 v2] Add a pc-0-10 machine type for compatibility with 0.10.x
  2009-07-08 14:09                                                                                     ` Gerd Hoffmann
@ 2009-07-08 15:08                                                                                       ` Mark McLoughlin
  2009-07-08 19:07                                                                                         ` Gerd Hoffmann
  0 siblings, 1 reply; 457+ messages in thread
From: Mark McLoughlin @ 2009-07-08 15:08 UTC (permalink / raw)
  To: Gerd Hoffmann; +Cc: Avi Kivity, qemu-devel

On Wed, 2009-07-08 at 16:09 +0200, Gerd Hoffmann wrote:
> On 07/08/09 15:44, Anthony Liguori wrote:
> > Gerd Hoffmann wrote:
> >>> +enum {
> >>> + COMPAT_DEFAULT = 0,
> >>> + COMPAT_0_10, /* compatible with qemu 0.10.x */
> >>> +};
> >>
> >> Hmm. Just digged in the virtio blk code. Noticed some ppc machines
> >> have virtio block devices too. So linking it to a machine type looks
> >> wrong to me.
> >
> > No, it's right. A machine type is nothing more than a default device
> > tree.
> 
> Plain "-M pc" has no virtio blk device.
> 
> > So basically, if we had device config files, it would look like:
> >
> > /usr/share/qemu/configs/pc-0-10.dts
> > /usr/share/qemu/configs/pc-0-11.dts
> > /usr/share/qemu/configs/pc -> /usr/share/qemu/configs/pc-0-11.dts
> 
> Doesn't work.  Your virtio blk device isn't in there.

Right - I had assumed Anthony meant that such a config file would also
have details on the defaults used for devices which can be added to that
machine.

Cheers,
Mark.

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: [Qemu-devel] [PATCH 3/3 v2] Add a pc-0-10 machine type for compatibility with 0.10.x
  2009-07-08 15:08                                                                                       ` Mark McLoughlin
@ 2009-07-08 19:07                                                                                         ` Gerd Hoffmann
  2009-07-08 21:45                                                                                           ` Anthony Liguori
  0 siblings, 1 reply; 457+ messages in thread
From: Gerd Hoffmann @ 2009-07-08 19:07 UTC (permalink / raw)
  To: Mark McLoughlin; +Cc: Avi Kivity, qemu-devel

On 07/08/09 17:08, Mark McLoughlin wrote:
> On Wed, 2009-07-08 at 16:09 +0200, Gerd Hoffmann wrote:
>>> /usr/share/qemu/configs/pc-0-10.dts
>>> /usr/share/qemu/configs/pc-0-11.dts
>>> /usr/share/qemu/configs/pc ->  /usr/share/qemu/configs/pc-0-11.dts
>> Doesn't work.  Your virtio blk device isn't in there.
>
> Right - I had assumed Anthony meant that such a config file would also
> have details on the defaults used for devices which can be added to that
> machine.

Could probably be done that way, but that wouldn't be different from a 
global compat variable where the virtio-block-pci driver looks at.

We have different versions of devices.  We should model them that way. 
I'd start with a global compat_level.  We might switch to a per-device 
compat level later if there is a need.  With qdev it should be easy once 
the devices are converted.  I think it will be much more better 
maintainable than adding new devices because there is a standardized way 
to switch a driver to an older version instead of having to know "oh, 
that driver has these compat versions, but this one hasn't".

virtio blk+console will switch class for compat_level < 0.11.  virtio 
net will disable msi for compat_level < 0.11.  ide might adapt the 
firmware version for the virtual disk/cdrom to the compat level. 
Everybody else is free to ignore it.

Note that different machine config versions might make sense 
nevertheless in case the set of (default) devices changes.  i.e. if we 
turn on usb by default we might add a compat machine type without usb.

cheers,
   Gerd

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: [Qemu-devel] [PATCH 3/3 v2] Add a pc-0-10 machine type for compatibility with 0.10.x
  2009-07-08 19:07                                                                                         ` Gerd Hoffmann
@ 2009-07-08 21:45                                                                                           ` Anthony Liguori
  2009-07-09  7:56                                                                                             ` Gerd Hoffmann
  2009-07-09  8:00                                                                                             ` [Qemu-devel] [PATCH 3/3 v2] Add a pc-0-10 machine type for compatibility with 0.10.x Avi Kivity
  0 siblings, 2 replies; 457+ messages in thread
From: Anthony Liguori @ 2009-07-08 21:45 UTC (permalink / raw)
  To: Gerd Hoffmann; +Cc: Mark McLoughlin, Avi Kivity, qemu-devel

Gerd Hoffmann wrote:
> On 07/08/09 17:08, Mark McLoughlin wrote:
>> On Wed, 2009-07-08 at 16:09 +0200, Gerd Hoffmann wrote:
>>>> /usr/share/qemu/configs/pc-0-10.dts
>>>> /usr/share/qemu/configs/pc-0-11.dts
>>>> /usr/share/qemu/configs/pc ->  /usr/share/qemu/configs/pc-0-11.dts
>>> Doesn't work.  Your virtio blk device isn't in there.
>>
>> Right - I had assumed Anthony meant that such a config file would also
>> have details on the defaults used for devices which can be added to that
>> machine.
>
> Could probably be done that way, but that wouldn't be different from a 
> global compat variable where the virtio-block-pci driver looks at.
> We have different versions of devices.

We have options that we can tweak for devices.  That doesn't mean 
they're versioned.  What the default set of options that is turned on 
for any given device is determined by the machine type (since the 
machine init is what creates the devices to start with).

Linear versioning doesn't work because there are too many forks of QEMU 
(each distro release that includes backported features is a fork).

Regards.

Anthony Liguori

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: [Qemu-devel] [PATCH 3/3 v2] Add a pc-0-10 machine type for compatibility with 0.10.x
  2009-07-08 21:45                                                                                           ` Anthony Liguori
@ 2009-07-09  7:56                                                                                             ` Gerd Hoffmann
  2009-07-09  8:39                                                                                               ` Mark McLoughlin
  2009-07-09  8:00                                                                                             ` [Qemu-devel] [PATCH 3/3 v2] Add a pc-0-10 machine type for compatibility with 0.10.x Avi Kivity
  1 sibling, 1 reply; 457+ messages in thread
From: Gerd Hoffmann @ 2009-07-09  7:56 UTC (permalink / raw)
  To: Anthony Liguori; +Cc: Mark McLoughlin, Avi Kivity, qemu-devel

On 07/08/09 23:45, Anthony Liguori wrote:
> Gerd Hoffmann wrote:
>> On 07/08/09 17:08, Mark McLoughlin wrote:
>>> On Wed, 2009-07-08 at 16:09 +0200, Gerd Hoffmann wrote:
>>>>> /usr/share/qemu/configs/pc-0-10.dts
>>>>> /usr/share/qemu/configs/pc-0-11.dts
>>>>> /usr/share/qemu/configs/pc -> /usr/share/qemu/configs/pc-0-11.dts
>>>> Doesn't work. Your virtio blk device isn't in there.
>>>
>>> Right - I had assumed Anthony meant that such a config file would also
>>> have details on the defaults used for devices which can be added to that
>>> machine.
>>
>> Could probably be done that way, but that wouldn't be different from a
>> global compat variable where the virtio-block-pci driver looks at.
>> We have different versions of devices.
>
> We have options that we can tweak for devices. That doesn't mean they're
> versioned. What the default set of options that is turned on for any
> given device is determined by the machine type (since the machine init
> is what creates the devices to start with).

The machine init doesn't create all devices.  USB devices are created 
outside machine init today, and that will become more and more common.

I expect long-term we will end up with one of these two models:

   (1) - machine init creates just the bare devices, i.e. what is there
         when you specify just '-M pc'.  Either hardcoded or from
         machine config file.
       - generic code adds devices added by cmd line switches.

   (2) - command line switches insert additional devices into the bare
         (-M pc) device tree.
       - The virtual machine is created from that modified device tree.

I both cases it is not clear how the machine init / machine config will 
handle the versioning of the devices added by command line switches.

We could have each machine type register a list of default options. 
Using qdev properties that should be doable in a fairly generic way, 
like this:

virtio-blk-pci and virtio-console-pci get a "class" property. 
virtio-net-pci gets a "msi" property.  ide-disk+cdrom gets a 
"fw-version" property (well, not yet, when being converted to qdev).

pc-0.10 could then register a list of default properties, i.e. something 
like

   "virtio-blk-pci"     => "class=0x??"
   "virtio-console-pci" => "class=0x??"
   "virtio-net-pci      => "msi=0"
   "ide-disk"           => "fw-version=0.10.0"

When creating devices qdev would apply them.

I can prototype that.

comments?

   Gerd

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: [Qemu-devel] [PATCH 3/3 v2] Add a pc-0-10 machine type for compatibility with 0.10.x
  2009-07-08 21:45                                                                                           ` Anthony Liguori
  2009-07-09  7:56                                                                                             ` Gerd Hoffmann
@ 2009-07-09  8:00                                                                                             ` Avi Kivity
  1 sibling, 0 replies; 457+ messages in thread
From: Avi Kivity @ 2009-07-09  8:00 UTC (permalink / raw)
  To: Anthony Liguori; +Cc: Mark McLoughlin, Gerd Hoffmann, qemu-devel

On 07/09/2009 12:45 AM, Anthony Liguori wrote:
> Linear versioning doesn't work because there are too many forks of 
> QEMU (each distro release that includes backported features is a fork).

So long as distro backports are non-default it should work out.  If we 
can set things up so a backport naturally winds up non-default, maybe we 
can restore some sanity.

-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: [Qemu-devel] [PATCH 3/3 v2] Add a pc-0-10 machine type for compatibility with 0.10.x
  2009-07-09  7:56                                                                                             ` Gerd Hoffmann
@ 2009-07-09  8:39                                                                                               ` Mark McLoughlin
  2009-07-09  8:50                                                                                                 ` Avi Kivity
                                                                                                                   ` (6 more replies)
  0 siblings, 7 replies; 457+ messages in thread
From: Mark McLoughlin @ 2009-07-09  8:39 UTC (permalink / raw)
  To: Gerd Hoffmann; +Cc: Avi Kivity, qemu-devel

On Thu, 2009-07-09 at 09:56 +0200, Gerd Hoffmann wrote:
> We could have each machine type register a list of default options. 
> Using qdev properties that should be doable in a fairly generic way, 
> like this:
> 
> virtio-blk-pci and virtio-console-pci get a "class" property. 
> virtio-net-pci gets a "msi" property.  ide-disk+cdrom gets a 
> "fw-version" property (well, not yet, when being converted to qdev).
> 
> pc-0.10 could then register a list of default properties, i.e.
> something 
> like
> 
>    "virtio-blk-pci"     => "class=0x??"
>    "virtio-console-pci" => "class=0x??"
>    "virtio-net-pci      => "msi=0"
>    "ide-disk"           => "fw-version=0.10.0"
> 
> When creating devices qdev would apply them.
> 
> I can prototype that.
> 
> comments?

I think this would be a very nice way of making what I did more generic,
but with the same external interface.

Time is running short for 0.11, though, so I'd suggest it could be an
incremental cleanup on top of the patches I posted.

If we're settled on adding '-M pc-0-10', I next want to:

  1) add '-M pc-0-11' and make it the default

  2) make '-M pc' a symlink to the latest machine type

i.e. 'qemu-system-x86_64 -M ?' would give:

  Supported machines are:
  pc         Standard PC with latest features
  pc-0-11    Standard PC compatible with qemu 0.11.x (default)
  pc-0-10    Standard PC compatible with qemu 0.10.x
  isapc      ISA-only PC

and libvirt, when creating a guest for the first time would parse this
output for the default value and store it in the XML config:

  <os>
    <type arch='x86_64' machine='pc-0-11'>hvm</type>
  </os>

Cheers,
Mark.

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: [Qemu-devel] [PATCH 3/3 v2] Add a pc-0-10 machine type for compatibility with 0.10.x
  2009-07-09  8:39                                                                                               ` Mark McLoughlin
@ 2009-07-09  8:50                                                                                                 ` Avi Kivity
  2009-07-09  8:57                                                                                                   ` Mark McLoughlin
  2009-07-09  9:05                                                                                                 ` Gerd Hoffmann
                                                                                                                   ` (5 subsequent siblings)
  6 siblings, 1 reply; 457+ messages in thread
From: Avi Kivity @ 2009-07-09  8:50 UTC (permalink / raw)
  To: Mark McLoughlin; +Cc: Gerd Hoffmann, qemu-devel

On 07/09/2009 11:39 AM, Mark McLoughlin wrote:
> On Thu, 2009-07-09 at 09:56 +0200, Gerd Hoffmann wrote:
>    
>> We could have each machine type register a list of default options.
>> Using qdev properties that should be doable in a fairly generic way,
>> like this:
>>
>> virtio-blk-pci and virtio-console-pci get a "class" property.
>> virtio-net-pci gets a "msi" property.  ide-disk+cdrom gets a
>> "fw-version" property (well, not yet, when being converted to qdev).
>>
>> pc-0.10 could then register a list of default properties, i.e.
>> something
>> like
>>
>>     "virtio-blk-pci"     =>  "class=0x??"
>>     "virtio-console-pci" =>  "class=0x??"
>>     "virtio-net-pci      =>  "msi=0"
>>     "ide-disk"           =>  "fw-version=0.10.0"
>>
>> When creating devices qdev would apply them.
>>
>> I can prototype that.
>>
>> comments?
>>      
>
> I think this would be a very nice way of making what I did more generic,
> but with the same external interface.
>
> Time is running short for 0.11, though, so I'd suggest it could be an
> incremental cleanup on top of the patches I posted.
>
> If we're settled on adding '-M pc-0-10', I next want to:
>
>    1) add '-M pc-0-11' and make it the default
>
>    2) make '-M pc' a symlink to the latest machine type
>
> i.e. 'qemu-system-x86_64 -M ?' would give:
>
>    Supported machines are:
>    pc         Standard PC with latest features
>    pc-0-11    Standard PC compatible with qemu 0.11.x (default)
>    pc-0-10    Standard PC compatible with qemu 0.10.x
>    isapc      ISA-only PC
>
> and libvirt, when creating a guest for the first time would parse this
> output for the default value and store it in the XML config:
>
>    <os>
>      <type arch='x86_64' machine='pc-0-11'>hvm</type>
>    </os>
>
>    

Looks good.  But why 0-11 and not 0.11?

-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: [Qemu-devel] [PATCH 3/3 v2] Add a pc-0-10 machine type for compatibility with 0.10.x
  2009-07-09  8:50                                                                                                 ` Avi Kivity
@ 2009-07-09  8:57                                                                                                   ` Mark McLoughlin
  2009-07-09  9:04                                                                                                     ` Avi Kivity
  0 siblings, 1 reply; 457+ messages in thread
From: Mark McLoughlin @ 2009-07-09  8:57 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Gerd Hoffmann, qemu-devel

On Thu, 2009-07-09 at 11:50 +0300, Avi Kivity wrote:
> On 07/09/2009 11:39 AM, Mark McLoughlin wrote:
> > On Thu, 2009-07-09 at 09:56 +0200, Gerd Hoffmann wrote:
> >    
> >> We could have each machine type register a list of default options.
> >> Using qdev properties that should be doable in a fairly generic way,
> >> like this:
> >>
> >> virtio-blk-pci and virtio-console-pci get a "class" property.
> >> virtio-net-pci gets a "msi" property.  ide-disk+cdrom gets a
> >> "fw-version" property (well, not yet, when being converted to qdev).
> >>
> >> pc-0.10 could then register a list of default properties, i.e.
> >> something
> >> like
> >>
> >>     "virtio-blk-pci"     =>  "class=0x??"
> >>     "virtio-console-pci" =>  "class=0x??"
> >>     "virtio-net-pci      =>  "msi=0"
> >>     "ide-disk"           =>  "fw-version=0.10.0"
> >>
> >> When creating devices qdev would apply them.
> >>
> >> I can prototype that.
> >>
> >> comments?
> >>      
> >
> > I think this would be a very nice way of making what I did more generic,
> > but with the same external interface.
> >
> > Time is running short for 0.11, though, so I'd suggest it could be an
> > incremental cleanup on top of the patches I posted.
> >
> > If we're settled on adding '-M pc-0-10', I next want to:
> >
> >    1) add '-M pc-0-11' and make it the default
> >
> >    2) make '-M pc' a symlink to the latest machine type
> >
> > i.e. 'qemu-system-x86_64 -M ?' would give:
> >
> >    Supported machines are:
> >    pc         Standard PC with latest features
> >    pc-0-11    Standard PC compatible with qemu 0.11.x (default)
> >    pc-0-10    Standard PC compatible with qemu 0.10.x
> >    isapc      ISA-only PC
> >
> > and libvirt, when creating a guest for the first time would parse this
> > output for the default value and store it in the XML config:
> >
> >    <os>
> >      <type arch='x86_64' machine='pc-0-11'>hvm</type>
> >    </os>
> >
> >    
> 
> Looks good.  But why 0-11 and not 0.11?

I really wanted to use an underscore, but I thought a hyphen was a good
compromise.

Cheers,
Mark.

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: [Qemu-devel] [PATCH 3/3 v2] Add a pc-0-10 machine type for compatibility with 0.10.x
  2009-07-09  8:57                                                                                                   ` Mark McLoughlin
@ 2009-07-09  9:04                                                                                                     ` Avi Kivity
  0 siblings, 0 replies; 457+ messages in thread
From: Avi Kivity @ 2009-07-09  9:04 UTC (permalink / raw)
  To: Mark McLoughlin; +Cc: Gerd Hoffmann, qemu-devel

On 07/09/2009 11:57 AM, Mark McLoughlin wrote:
>> Looks good.  But why 0-11 and not 0.11?
>>      
>
> I really wanted to use an underscore, but I thought a hyphen was a good
> compromise.
>
>    

A period is really a very short underscore.

-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: [Qemu-devel] [PATCH 3/3 v2] Add a pc-0-10 machine type for compatibility with 0.10.x
  2009-07-09  8:39                                                                                               ` Mark McLoughlin
  2009-07-09  8:50                                                                                                 ` Avi Kivity
@ 2009-07-09  9:05                                                                                                 ` Gerd Hoffmann
  2009-07-09 10:01                                                                                                   ` Gerd Hoffmann
                                                                                                                     ` (2 more replies)
  2009-07-21 14:21                                                                                                 ` [Qemu-devel] [PATCH 0/4] Add pc-0.11 machine type and make pc an alias to it Mark McLoughlin
                                                                                                                   ` (4 subsequent siblings)
  6 siblings, 3 replies; 457+ messages in thread
From: Gerd Hoffmann @ 2009-07-09  9:05 UTC (permalink / raw)
  To: Mark McLoughlin; +Cc: Avi Kivity, qemu-devel

>> comments?
>
> I think this would be a very nice way of making what I did more generic,
> but with the same external interface.
>
> Time is running short for 0.11, though, so I'd suggest it could be an
> incremental cleanup on top of the patches I posted.

I'd prefer to not introduce those virtio-$type-pci-$version devices in 
the first place.  It isn't hard to write up something qdev based.  Only 
problem is it will depend on lots of not-yet merged qdev patches.

Speaking of patches:  What is up there?  aliguori-queue.git wasn't 
flushed for more than a week now.  There are lots of patches on the list 
unprocessed, ranging from simple one-liner build fixes to larger patch 
series.  There was rumor on a new -stable release Tue/Wed this week 
which didn't happen yet.  Looks like patch processing needs 
load-balancing to more people ...

cheers,
   Gerd

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: [Qemu-devel] [PATCH 3/3 v2] Add a pc-0-10 machine type for compatibility with 0.10.x
  2009-07-09  9:05                                                                                                 ` Gerd Hoffmann
@ 2009-07-09 10:01                                                                                                   ` Gerd Hoffmann
  2009-07-09 13:31                                                                                                     ` Mark McLoughlin
  2009-07-09 13:35                                                                                                     ` Anthony Liguori
  2009-07-09 11:51                                                                                                   ` Avi Kivity
  2009-07-09 15:00                                                                                                   ` Anthony Liguori
  2 siblings, 2 replies; 457+ messages in thread
From: Gerd Hoffmann @ 2009-07-09 10:01 UTC (permalink / raw)
  To: Mark McLoughlin; +Cc: Avi Kivity, qemu-devel

[-- Attachment #1: Type: text/plain, Size: 563 bytes --]

   Hi,

> I'd prefer to not introduce those virtio-$type-pci-$version devices in
> the first place. It isn't hard to write up something qdev based. Only
> problem is it will depend on lots of not-yet merged qdev patches.

To back that with some code here is a quick patch.  Not splitted up yet. 
  Only virtio-blk is handled for demonstration purposes.

Will not apply cleanly as it depends on both posted and not-yet posted 
patches in my patch queue.  Once my qdev properties patch is in it 
should be easy to rebase to upstream/master though.

cheers,
   Gerd

[-- Attachment #2: compat.diff --]
[-- Type: text/plain, Size: 5312 bytes --]

diff --git a/hw/boards.h b/hw/boards.h
index f6733b7..5a07d07 100644
--- a/hw/boards.h
+++ b/hw/boards.h
@@ -17,6 +17,7 @@ typedef struct QEMUMachine {
     int use_scsi;
     int max_cpus;
     int is_default;
+    struct CompatProperty *compat_props;
     struct QEMUMachine *next;
 } QEMUMachine;
 
diff --git a/hw/pc.c b/hw/pc.c
index 38678da..cd64ccf 100644
--- a/hw/pc.c
+++ b/hw/pc.c
@@ -1460,6 +1460,21 @@ static QEMUMachine pc_machine = {
     .is_default = 1,
 };
 
+static QEMUMachine pc_machine_v0_10 = {
+    .name = "pc-0.10",
+    .desc = "Standard PC, qemu 0.10",
+    .init = pc_init_pci,
+    .max_cpus = 255,
+    .compat_props = (CompatProperty[]) {
+        {
+            .driver   = "virtio-blk-pci",
+            .property = "class",
+            .value    = "0x0180", /* PCI_CLASS_STORAGE_OTHER */
+        },
+        { /* end of list */ }
+    },
+};
+
 static QEMUMachine isapc_machine = {
     .name = "isapc",
     .desc = "ISA-only PC",
@@ -1470,6 +1485,7 @@ static QEMUMachine isapc_machine = {
 static void pc_machine_init(void)
 {
     qemu_register_machine(&pc_machine);
+    qemu_register_machine(&pc_machine_v0_10);
     qemu_register_machine(&isapc_machine);
 }
 
diff --git a/hw/qdev-properties.c b/hw/qdev-properties.c
index 720001b..bec756d 100644
--- a/hw/qdev-properties.c
+++ b/hw/qdev-properties.c
@@ -228,3 +228,22 @@ void qdev_prop_set_defaults(DeviceState *dev, Property *props)
     }
 }
 
+static CompatProperty *compat_props;
+
+void qdev_register_compat_props(CompatProperty *props)
+{
+    compat_props = props;
+}
+
+void qdev_prop_set_compat(DeviceState *dev)
+{
+    CompatProperty *prop;
+
+    if (!compat_props)
+        return;
+    for (prop = compat_props; prop->driver != NULL; prop++) {
+        if (strcmp(dev->info->name, prop->driver) != 0)
+            continue;
+        qdev_prop_parse(dev, prop->property, prop->value);
+    }
+}
diff --git a/hw/qdev.c b/hw/qdev.c
index 9f7ac0b..ebb6bcf 100644
--- a/hw/qdev.c
+++ b/hw/qdev.c
@@ -130,6 +130,7 @@ DeviceState *qdev_create(BusState *bus, const char *name)
     dev->parent_bus = bus;
     qdev_prop_set_defaults(dev, dev->info->props);
     qdev_prop_set_defaults(dev, dev->parent_bus->info->props);
+    qdev_prop_set_compat(dev);
     LIST_INSERT_HEAD(&bus->children, dev, sibling);
     return dev;
 }
diff --git a/hw/qdev.h b/hw/qdev.h
index be10f44..5b4c1b0 100644
--- a/hw/qdev.h
+++ b/hw/qdev.h
@@ -8,6 +8,8 @@ typedef struct Property Property;
 
 typedef struct PropertyInfo PropertyInfo;
 
+typedef struct CompatProperty CompatProperty;
+
 typedef struct DeviceInfo DeviceInfo;
 
 typedef struct BusState BusState;
@@ -63,6 +65,12 @@ struct PropertyInfo {
     int (*print)(DeviceState *dev, Property *prop, char *dest, size_t len);
 };
 
+struct CompatProperty {
+    const char *driver;
+    const char *property;
+    const char *value;
+};
+
 /*** Board API.  This should go away once we have a machine config file.  ***/
 
 DeviceState *qdev_create(BusState *bus, const char *name);
@@ -152,4 +160,7 @@ int qdev_prop_set_uint32(DeviceState *dev, const char *name, uint32_t value);
 int qdev_prop_set_ptr(DeviceState *dev, const char *name, void *value);
 void qdev_prop_set_defaults(DeviceState *dev, Property *props);
 
+void qdev_register_compat_props(CompatProperty *props);
+void qdev_prop_set_compat(DeviceState *dev);
+
 #endif
diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c
index 96f3764..996dadb 100644
--- a/hw/virtio-pci.c
+++ b/hw/virtio-pci.c
@@ -87,12 +87,7 @@ typedef struct {
     VirtIODevice *vdev;
     uint32_t addr;
 
-    uint16_t vendor;
-    uint16_t device;
-    uint16_t subvendor;
-    uint16_t class_code;
-    uint8_t pif;
-
+    uint32_t class_code;
     qname hostlink;
 } VirtIOPCIProxy;
 
@@ -421,12 +416,15 @@ static void virtio_blk_init_pci(PCIDevice *pci_dev)
     VirtIOPCIProxy *proxy = DO_UPCAST(VirtIOPCIProxy, pci_dev, pci_dev);
     VirtIODevice *vdev;
 
+    if (proxy->class_code != PCI_CLASS_STORAGE_SCSI &&
+        proxy->class_code != PCI_CLASS_STORAGE_OTHER)
+        proxy->class_code = PCI_CLASS_STORAGE_SCSI;
+
     vdev = virtio_blk_init(&pci_dev->qdev, proxy->hostlink);
     virtio_init_pci(proxy, vdev,
                     PCI_VENDOR_ID_REDHAT_QUMRANET,
                     PCI_DEVICE_ID_VIRTIO_BLOCK,
-                    PCI_CLASS_STORAGE_OTHER,
-                    0x00);
+                    proxy->class_code, 0x00);
 }
 
 static void virtio_console_init_pci(PCIDevice *pci_dev)
@@ -478,6 +476,10 @@ static PCIDeviceInfo virtio_info[] = {
                 .name   = "drive",
                 .info   = &qdev_prop_name,
                 .offset = offsetof(VirtIOPCIProxy, hostlink),
+            },{
+                .name   = "class",
+                .info   = &qdev_prop_hex32,
+                .offset = offsetof(VirtIOPCIProxy, class_code),
             },
             {/* end of list */}
         },
diff --git a/vl.c b/vl.c
index 0acc1de..2c5608f 100644
--- a/vl.c
+++ b/vl.c
@@ -6030,6 +6030,8 @@ int main(int argc, char **argv, char **envp)
 
     module_call_init(MODULE_INIT_DEVICE);
 
+    if (machine->compat_props)
+        qdev_register_compat_props(machine->compat_props);
     machine->init(ram_size, boot_devices,
                   kernel_filename, kernel_cmdline, initrd_filename, cpu_model);
 

^ permalink raw reply related	[flat|nested] 457+ messages in thread

* Re: [Qemu-devel] [PATCH 3/3 v2] Add a pc-0-10 machine type for compatibility with 0.10.x
  2009-07-09  9:05                                                                                                 ` Gerd Hoffmann
  2009-07-09 10:01                                                                                                   ` Gerd Hoffmann
@ 2009-07-09 11:51                                                                                                   ` Avi Kivity
  2009-07-09 13:29                                                                                                     ` Anthony Liguori
  2009-07-09 15:00                                                                                                   ` Anthony Liguori
  2 siblings, 1 reply; 457+ messages in thread
From: Avi Kivity @ 2009-07-09 11:51 UTC (permalink / raw)
  To: Anthony Liguori; +Cc: Mark McLoughlin, Gerd Hoffmann, qemu-devel

On 07/09/2009 12:05 PM, Gerd Hoffmann wrote:
>
> Speaking of patches:  What is up there?  aliguori-queue.git wasn't 
> flushed for more than a week now.  There are lots of patches on the 
> list unprocessed, ranging from simple one-liner build fixes to larger 
> patch series.  There was rumor on a new -stable release Tue/Wed this 
> week which didn't happen yet.  Looks like patch processing needs 
> load-balancing to more people ...
>

I just found out about this queue.  Can you make it a branch in qemu.git 
instead of some random repository?  You could name it 'next' or 
'next/aliguori' or something.  That means a single git fetch brings both 
commits and queued patches.

I have a similar 'next' branch in qemu-kvm.git.

-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: [Qemu-devel] [PATCH 3/3 v2] Add a pc-0-10 machine type for compatibility with 0.10.x
  2009-07-09 11:51                                                                                                   ` Avi Kivity
@ 2009-07-09 13:29                                                                                                     ` Anthony Liguori
  2009-07-09 13:59                                                                                                       ` Avi Kivity
  0 siblings, 1 reply; 457+ messages in thread
From: Anthony Liguori @ 2009-07-09 13:29 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Mark McLoughlin, Gerd Hoffmann, qemu-devel

Avi Kivity wrote:
> On 07/09/2009 12:05 PM, Gerd Hoffmann wrote:
>>
>> Speaking of patches:  What is up there?  aliguori-queue.git wasn't 
>> flushed for more than a week now.  There are lots of patches on the 
>> list unprocessed, ranging from simple one-liner build fixes to larger 
>> patch series.  There was rumor on a new -stable release Tue/Wed this 
>> week which didn't happen yet.  Looks like patch processing needs 
>> load-balancing to more people ...
>>
>
> I just found out about this queue.  Can you make it a branch in 
> qemu.git instead of some random repository?  You could name it 'next' 
> or 'next/aliguori' or something.  That means a single git fetch brings 
> both commits and queued patches.
>
> I have a similar 'next' branch in qemu-kvm.git.

Do you frequently rebase next?  I was afraid that people would not be 
able to cope with a branch that rebased all of the time.

Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: [Qemu-devel] [PATCH 3/3 v2] Add a pc-0-10 machine type for compatibility with 0.10.x
  2009-07-09 10:01                                                                                                   ` Gerd Hoffmann
@ 2009-07-09 13:31                                                                                                     ` Mark McLoughlin
  2009-07-09 13:47                                                                                                       ` Gerd Hoffmann
  2009-07-09 13:35                                                                                                     ` Anthony Liguori
  1 sibling, 1 reply; 457+ messages in thread
From: Mark McLoughlin @ 2009-07-09 13:31 UTC (permalink / raw)
  To: Gerd Hoffmann; +Cc: Avi Kivity, qemu-devel

On Thu, 2009-07-09 at 12:01 +0200, Gerd Hoffmann wrote:
> I'd prefer to not introduce those virtio-$type-pci-$version devices in
> the first place.

Ah, I see why ... with your -device stuff, these new devices types would
become part of the public interface.

If they were to be public, we'd probably want them to become
virtio-$type-pci-$feature instead. As pointed out before, that becomes
cumbersome.

> diff --git a/hw/pc.c b/hw/pc.c
> index 38678da..cd64ccf 100644
> --- a/hw/pc.c
> +++ b/hw/pc.c
> @@ -1460,6 +1460,21 @@ static QEMUMachine pc_machine = {
>      .is_default = 1,
>  };
>  
> +static QEMUMachine pc_machine_v0_10 = {
> +    .name = "pc-0.10",
> +    .desc = "Standard PC, qemu 0.10",
> +    .init = pc_init_pci,
> +    .max_cpus = 255,
> +    .compat_props = (CompatProperty[]) {
> +        {
> +            .driver   = "virtio-blk-pci",
> +            .property = "class",
> +            .value    = "0x0180", /* PCI_CLASS_STORAGE_OTHER */
> +        },
> +        { /* end of list */ }
> +    },
> +};

I did actually experiment with something similar to this, but backed off
because I didn't like putting such details (i.e. the class value) in
hw/pc.c rather than keeping them self contained in the driver code.
Especially since it'll have to be replicated for every machine which can
use the device.

Hmm, if this was a machine config file, would be do something like
"include compat-props-0.10.conf"? Maybe something similar is in order
here?

Cheers,
Mark.

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: [Qemu-devel] [PATCH 3/3 v2] Add a pc-0-10 machine type for compatibility with 0.10.x
  2009-07-09 10:01                                                                                                   ` Gerd Hoffmann
  2009-07-09 13:31                                                                                                     ` Mark McLoughlin
@ 2009-07-09 13:35                                                                                                     ` Anthony Liguori
  2009-07-09 13:55                                                                                                       ` Gerd Hoffmann
  2009-07-09 16:09                                                                                                       ` Paul Brook
  1 sibling, 2 replies; 457+ messages in thread
From: Anthony Liguori @ 2009-07-09 13:35 UTC (permalink / raw)
  To: Gerd Hoffmann; +Cc: Mark McLoughlin, Avi Kivity, qemu-devel

Gerd Hoffmann wrote:
>   Hi,
>
>> I'd prefer to not introduce those virtio-$type-pci-$version devices in
>> the first place. It isn't hard to write up something qdev based. Only
>> problem is it will depend on lots of not-yet merged qdev patches.
>
> To back that with some code here is a quick patch.  Not splitted up 
> yet.  Only virtio-blk is handled for demonstration purposes.
>
> Will not apply cleanly as it depends on both posted and not-yet posted 
> patches in my patch queue.  Once my qdev properties patch is in it 
> should be easy to rebase to upstream/master though.

It came out looking pretty good.  The literal syntax is new to me:

+    .compat_props = (CompatProperty[]) {
+        {


I assume this is a GCC-ism?

Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: [Qemu-devel] [PATCH 3/3 v2] Add a pc-0-10 machine type for compatibility with 0.10.x
  2009-07-09 13:31                                                                                                     ` Mark McLoughlin
@ 2009-07-09 13:47                                                                                                       ` Gerd Hoffmann
  0 siblings, 0 replies; 457+ messages in thread
From: Gerd Hoffmann @ 2009-07-09 13:47 UTC (permalink / raw)
  To: Mark McLoughlin; +Cc: Avi Kivity, qemu-devel

On 07/09/09 15:31, Mark McLoughlin wrote:
>> +static QEMUMachine pc_machine_v0_10 = {
>> +    .name = "pc-0.10",
>> +    .desc = "Standard PC, qemu 0.10",
>> +    .init = pc_init_pci,
>> +    .max_cpus = 255,
>> +    .compat_props = (CompatProperty[]) {
>> +        {
>> +            .driver   = "virtio-blk-pci",
>> +            .property = "class",
>> +            .value    = "0x0180", /* PCI_CLASS_STORAGE_OTHER */
>> +        },
>> +        { /* end of list */ }
>> +    },
>> +};
>
> I did actually experiment with something similar to this, but backed off
> because I didn't like putting such details (i.e. the class value) in
> hw/pc.c rather than keeping them self contained in the driver code.
> Especially since it'll have to be replicated for every machine which can
> use the device.

We don't need to have the list inline.  We can have

CompatProperty compat_qemu_0_10[] {
     [ properties here ]
};

somewhere and then just reference it like this:

static QEMUMachine pc_machine_v0_10 = {
     .name = "pc-0.10",
     [ ... ]
     .compat_props = &compat_qemu_0_10,
};

from multiple machine types.

cheers,
   Gerd

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: [Qemu-devel] [PATCH 3/3 v2] Add a pc-0-10 machine type for compatibility with 0.10.x
  2009-07-09 13:35                                                                                                     ` Anthony Liguori
@ 2009-07-09 13:55                                                                                                       ` Gerd Hoffmann
  2009-07-09 16:09                                                                                                       ` Paul Brook
  1 sibling, 0 replies; 457+ messages in thread
From: Gerd Hoffmann @ 2009-07-09 13:55 UTC (permalink / raw)
  To: Anthony Liguori; +Cc: Mark McLoughlin, Avi Kivity, qemu-devel

On 07/09/09 15:35, Anthony Liguori wrote:
> It came out looking pretty good. The literal syntax is new to me:
>
> + .compat_props = (CompatProperty[]) {
> + {
>
> I assume this is a GCC-ism?

Not sure.  Could also be C99.  It is new to me too.

One of Pauls patches brought it in (check hw/syborg_timer.c, pretty 
close to the bottom).  I found it very useful for building up static 
data structures and picked it up.

cheers,
   Gerd

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: [Qemu-devel] [PATCH 3/3 v2] Add a pc-0-10 machine type for compatibility with 0.10.x
  2009-07-09 13:29                                                                                                     ` Anthony Liguori
@ 2009-07-09 13:59                                                                                                       ` Avi Kivity
  0 siblings, 0 replies; 457+ messages in thread
From: Avi Kivity @ 2009-07-09 13:59 UTC (permalink / raw)
  To: Anthony Liguori; +Cc: Mark McLoughlin, Gerd Hoffmann, qemu-devel

On 07/09/2009 04:29 PM, Anthony Liguori wrote:
>> I just found out about this queue.  Can you make it a branch in 
>> qemu.git instead of some random repository?  You could name it 'next' 
>> or 'next/aliguori' or something.  That means a single git fetch 
>> brings both commits and queued patches.
>>
>> I have a similar 'next' branch in qemu-kvm.git.
>
>
> Do you frequently rebase next? 

'next' is a throwaway branch.  It's subject to change without notice.

> I was afraid that people would not be able to cope with a branch that 
> rebased all of the time.

It's a way to see what you're thinking; no one should base a long 
running branch on it.  I think it's fairly safe to do, people who are 
likely to create long running branches will be fairly knowledgable about 
the processes we use.

-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: [Qemu-devel] [PATCH 3/3 v2] Add a pc-0-10 machine type for compatibility with 0.10.x
  2009-07-09  9:05                                                                                                 ` Gerd Hoffmann
  2009-07-09 10:01                                                                                                   ` Gerd Hoffmann
  2009-07-09 11:51                                                                                                   ` Avi Kivity
@ 2009-07-09 15:00                                                                                                   ` Anthony Liguori
  2 siblings, 0 replies; 457+ messages in thread
From: Anthony Liguori @ 2009-07-09 15:00 UTC (permalink / raw)
  To: Gerd Hoffmann; +Cc: Mark McLoughlin, Avi Kivity, qemu-devel

Gerd Hoffmann wrote:
>>> comments?
>>
>> I think this would be a very nice way of making what I did more generic,
>> but with the same external interface.
>>
>> Time is running short for 0.11, though, so I'd suggest it could be an
>> incremental cleanup on top of the patches I posted.
>
> I'd prefer to not introduce those virtio-$type-pci-$version devices in 
> the first place.  It isn't hard to write up something qdev based.  
> Only problem is it will depend on lots of not-yet merged qdev patches.
>
> Speaking of patches:  What is up there?  aliguori-queue.git wasn't 
> flushed for more than a week now.

Holiday + sudden non-QEMU work items.  I'm trying to block off some time 
this afternoon but this has been a tough week for me.

>   There are lots of patches on the list unprocessed, ranging from 
> simple one-liner build fixes to larger patch series.  There was rumor 
> on a new -stable release Tue/Wed this week which didn't happen yet.  
> Looks like patch processing needs load-balancing to more people ...

It's not so easy to just distribute commit access to loads of people.  
However, we still have a pretty low patch acceptance rate so peer review 
+ testing would certainly help reduce the overall patch traffic.

So I'd encourage folks to take some time and review/test patches on the 
list.  For those that already do, it's greatly appreciated.

Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: [Qemu-devel] [PATCH 3/3 v2] Add a pc-0-10 machine type for compatibility with 0.10.x
  2009-07-09 13:35                                                                                                     ` Anthony Liguori
  2009-07-09 13:55                                                                                                       ` Gerd Hoffmann
@ 2009-07-09 16:09                                                                                                       ` Paul Brook
  1 sibling, 0 replies; 457+ messages in thread
From: Paul Brook @ 2009-07-09 16:09 UTC (permalink / raw)
  To: qemu-devel; +Cc: Mark McLoughlin, Gerd Hoffmann, Avi Kivity

> It came out looking pretty good.  The literal syntax is new to me:
>
> +    .compat_props = (CompatProperty[]) {
> +        {
>
> I assume this is a GCC-ism?

Compound literals ("(Foo[]){...}") are a standard C99 feature, as are 
designated initializers (".foo = ...")

Paul

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: [Qemu-devel] [PATCH 2/3] Change default PCI class of virtio-console to PCI_CLASS_SERIAL_OTHER
  2009-07-07 11:09                                                                       ` [Qemu-devel] [PATCH 2/3] Change default PCI class of virtio-console to PCI_CLASS_SERIAL_OTHER Mark McLoughlin
  2009-07-07 11:10                                                                         ` [Qemu-devel] [PATCH 3/3] Add a pc-0-10 machine type for compatibility with 0.10.x Mark McLoughlin
@ 2009-07-15 11:27                                                                         ` Amit Shah
  1 sibling, 0 replies; 457+ messages in thread
From: Amit Shah @ 2009-07-15 11:27 UTC (permalink / raw)
  To: Mark McLoughlin; +Cc: Adam Jackson, qemu-devel

On (Tue) Jul 07 2009 [12:09:58], Mark McLoughlin wrote:
> diff --git a/hw/pci_ids.h b/hw/pci_ids.h
> index 3afe674..2fe60ee 100644
> --- a/hw/pci_ids.h
> +++ b/hw/pci_ids.h
> @@ -35,6 +35,9 @@
>  #define PCI_CLASS_BRIDGE_PCI             0x0604
>  #define PCI_CLASS_BRIDGE_OTHER           0x0680
>  
> +#define PCI_CLASS_SERIAL_OTHER           0x0780
> +
> +#define PCI_CLASS_PROCESSOR_CO           0x0b40
>  #define PCI_CLASS_COMMUNICATION_OTHER    0x0780
>  
>  #define PCI_CLASS_PROCESSOR_CO           0x0b40

Those defines already exist; they're also seen in the hunk:
PCI_CLASS_COMMUNICATION_OTHER and the other with the same name.

		Amit

^ permalink raw reply	[flat|nested] 457+ messages in thread

* [Qemu-devel] [PATCH 0/4] Add pc-0.11 machine type and make pc an alias to it
  2009-07-09  8:39                                                                                               ` Mark McLoughlin
  2009-07-09  8:50                                                                                                 ` Avi Kivity
  2009-07-09  9:05                                                                                                 ` Gerd Hoffmann
@ 2009-07-21 14:21                                                                                                 ` Mark McLoughlin
  2009-07-21 14:21                                                                                                 ` [Qemu-devel] [PATCH 1/4] Remove the pc-0-10 machine type Mark McLoughlin
                                                                                                                   ` (3 subsequent siblings)
  6 siblings, 0 replies; 457+ messages in thread
From: Mark McLoughlin @ 2009-07-21 14:21 UTC (permalink / raw)
  To: qemu-devel


Hi,
        Here's a series of small patches to finish off the machine type
compat work for 0.11.

        The idea is that the likes of libvirt can canonicalize the 'pc'
machine type to the current latest version of the pc machine type and
retain that in the VM configuration. That allows users to easily
continue running their VMs with the same machine type with which they
were originally created.

Thanks,
Mark.

^ permalink raw reply	[flat|nested] 457+ messages in thread

* [Qemu-devel] [PATCH 1/4] Remove the pc-0-10 machine type
  2009-07-09  8:39                                                                                               ` Mark McLoughlin
                                                                                                                   ` (2 preceding siblings ...)
  2009-07-21 14:21                                                                                                 ` [Qemu-devel] [PATCH 0/4] Add pc-0.11 machine type and make pc an alias to it Mark McLoughlin
@ 2009-07-21 14:21                                                                                                 ` Mark McLoughlin
  2009-07-21 14:49                                                                                                   ` Mark McLoughlin
  2009-07-21 14:21                                                                                                 ` [Qemu-devel] [PATCH 2/4] Remove the virtio-{blk, console}-pci-0-10 device types Mark McLoughlin
                                                                                                                   ` (2 subsequent siblings)
  6 siblings, 1 reply; 457+ messages in thread
From: Mark McLoughlin @ 2009-07-21 14:21 UTC (permalink / raw)
  To: qemu-devel; +Cc: Mark McLoughlin

We have the pc-0.10 machine type now which does exactly the same
thing.

Signed-off-by: Mark McLoughlin <markmc@redhat.com>
---
 hw/pc.c |   55 +++++--------------------------------------------------
 1 files changed, 5 insertions(+), 50 deletions(-)

diff --git a/hw/pc.c b/hw/pc.c
index a50f23d..0ebc329 100644
--- a/hw/pc.c
+++ b/hw/pc.c
@@ -1100,11 +1100,6 @@ static CPUState *pc_new_cpu(const char *cpu_model)
     return env;
 }
 
-enum {
-    COMPAT_DEFAULT = 0,
-    COMPAT_0_10, /* compatible with qemu 0.10.x */
-};
-
 /* PC hardware initialisation */
 static void pc_init1(ram_addr_t ram_size,
                      const char *boot_device,
@@ -1112,8 +1107,7 @@ static void pc_init1(ram_addr_t ram_size,
                      const char *kernel_cmdline,
                      const char *initrd_filename,
                      const char *cpu_model,
-                     int pci_enabled,
-                     int compat_level)
+                     int pci_enabled)
 {
     char *filename;
     int ret, linux_boot, i;
@@ -1131,7 +1125,6 @@ static void pc_init1(ram_addr_t ram_size,
     BlockDriverState *fd[MAX_FD];
     int using_vga = cirrus_vga_enabled || std_vga_enabled || vmsvga_enabled;
     void *fw_cfg;
-    const char *virtio_blk_name, *virtio_console_name;
 
     if (ram_size >= 0xe0000000 ) {
         above_4g_mem_size = ram_size - 0xe0000000;
@@ -1422,26 +1415,13 @@ static void pc_init1(ram_addr_t ram_size,
         }
     }
 
-    switch (compat_level) {
-    case COMPAT_DEFAULT:
-    default:
-        virtio_blk_name = "virtio-blk-pci";
-        virtio_console_name = "virtio-console-pci";
-        break;
-
-    case COMPAT_0_10:
-        virtio_blk_name = "virtio-blk-pci-0-10";
-        virtio_console_name = "virtio-console-pci-0-10";
-        break;
-    }
-
     /* Add virtio block devices */
     if (pci_enabled) {
         int index;
         int unit_id = 0;
 
         while ((index = drive_get_index(IF_VIRTIO, 0, unit_id)) != -1) {
-            pci_dev = pci_create(virtio_blk_name,
+            pci_dev = pci_create("virtio-blk-pci",
                                  drives_table[index].devaddr);
             qdev_init(&pci_dev->qdev);
             unit_id++;
@@ -1458,7 +1438,7 @@ static void pc_init1(ram_addr_t ram_size,
     if (pci_enabled) {
         for(i = 0; i < MAX_VIRTIO_CONSOLES; i++) {
             if (virtcon_hds[i]) {
-                pci_create_simple(pci_bus, -1, virtio_console_name);
+                pci_create_simple(pci_bus, -1, "virtio-console-pci");
             }
         }
     }
@@ -1473,8 +1453,7 @@ static void pc_init_pci(ram_addr_t ram_size,
 {
     pc_init1(ram_size, boot_device,
              kernel_filename, kernel_cmdline,
-             initrd_filename, cpu_model,
-             1, COMPAT_DEFAULT);
+             initrd_filename, cpu_model, 1);
 }
 
 static void pc_init_isa(ram_addr_t ram_size,
@@ -1486,21 +1465,7 @@ static void pc_init_isa(ram_addr_t ram_size,
 {
     pc_init1(ram_size, boot_device,
              kernel_filename, kernel_cmdline,
-             initrd_filename, cpu_model,
-             0, COMPAT_DEFAULT);
-}
-
-static void pc_init_pci_0_10(ram_addr_t ram_size,
-                             const char *boot_device,
-                             const char *kernel_filename,
-                             const char *kernel_cmdline,
-                             const char *initrd_filename,
-                             const char *cpu_model)
-{
-    pc_init1(ram_size, boot_device,
-             kernel_filename, kernel_cmdline,
-             initrd_filename, cpu_model,
-             1, COMPAT_0_10);
+             initrd_filename, cpu_model, 0);
 }
 
 /* set CMOS shutdown status register (index 0xF) as S3_resume(0xFE)
@@ -1549,21 +1514,11 @@ static QEMUMachine isapc_machine = {
     .max_cpus = 1,
 };
 
-static QEMUMachine pc_0_10_machine = {
-    .name = "pc-0-10",
-    .desc = "Standard PC compatible with qemu 0.10.x",
-    .init = pc_init_pci_0_10,
-    .max_cpus = 255,
-};
-
 static void pc_machine_init(void)
 {
     qemu_register_machine(&pc_machine);
     qemu_register_machine(&pc_machine_v0_10);
     qemu_register_machine(&isapc_machine);
-
-    /* For compatibility with 0.10.x */
-    qemu_register_machine(&pc_0_10_machine);
 }
 
 machine_init(pc_machine_init);
-- 
1.6.2.5

^ permalink raw reply related	[flat|nested] 457+ messages in thread

* [Qemu-devel] [PATCH 2/4] Remove the virtio-{blk, console}-pci-0-10 device types
  2009-07-09  8:39                                                                                               ` Mark McLoughlin
                                                                                                                   ` (3 preceding siblings ...)
  2009-07-21 14:21                                                                                                 ` [Qemu-devel] [PATCH 1/4] Remove the pc-0-10 machine type Mark McLoughlin
@ 2009-07-21 14:21                                                                                                 ` Mark McLoughlin
  2009-07-21 14:21                                                                                                 ` [Qemu-devel] [PATCH 3/4] Add machine type aliases Mark McLoughlin
  2009-07-21 14:21                                                                                                 ` [Qemu-devel] [PATCH 4/4] Add a pc-0.11 machine type and make the pc type an alias Mark McLoughlin
  6 siblings, 0 replies; 457+ messages in thread
From: Mark McLoughlin @ 2009-07-21 14:21 UTC (permalink / raw)
  To: qemu-devel; +Cc: Mark McLoughlin

These are now unused.

However, perhaps the idea is that when we add -device, they will be
useful? In that case, we should add virtio-net-pci-0-10 too.

Signed-off-by: Mark McLoughlin <markmc@redhat.com>
---
 hw/virtio-pci.c |   57 ++++++++++--------------------------------------------
 1 files changed, 11 insertions(+), 46 deletions(-)

diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c
index 1f922c2..703f4fe 100644
--- a/hw/virtio-pci.c
+++ b/hw/virtio-pci.c
@@ -423,63 +423,37 @@ static void virtio_init_pci(VirtIOPCIProxy *proxy, VirtIODevice *vdev,
     virtio_bind_device(vdev, &virtio_pci_bindings, proxy);
 }
 
-static void virtio_blk_init_pci_with_class(PCIDevice *pci_dev,
-                                           uint16_t class_code)
+static void virtio_blk_init_pci(PCIDevice *pci_dev)
 {
     VirtIOPCIProxy *proxy = DO_UPCAST(VirtIOPCIProxy, pci_dev, pci_dev);
     VirtIODevice *vdev;
 
-    vdev = virtio_blk_init(&pci_dev->qdev);
-    virtio_init_pci(proxy, vdev,
-                    PCI_VENDOR_ID_REDHAT_QUMRANET,
-                    PCI_DEVICE_ID_VIRTIO_BLOCK,
-                    class_code, 0x00);
-}
-
-static void virtio_blk_init_pci(PCIDevice *pci_dev)
-{
-    VirtIOPCIProxy *proxy = DO_UPCAST(VirtIOPCIProxy, pci_dev, pci_dev);
-  
     if (proxy->class_code != PCI_CLASS_STORAGE_SCSI &&
         proxy->class_code != PCI_CLASS_STORAGE_OTHER)
         proxy->class_code = PCI_CLASS_STORAGE_SCSI;
- 
-    virtio_blk_init_pci_with_class(pci_dev, proxy->class_code);
-}
-
-static void virtio_blk_init_pci_0_10(PCIDevice *pci_dev)
-{
-    virtio_blk_init_pci_with_class(pci_dev, PCI_CLASS_STORAGE_OTHER);
-}
-
-static void virtio_console_init_pci_with_class(PCIDevice *pci_dev,
-                                               uint16_t class_code)
-{
-    VirtIOPCIProxy *proxy = DO_UPCAST(VirtIOPCIProxy, pci_dev, pci_dev);
-    VirtIODevice *vdev;
 
-    vdev = virtio_console_init(&pci_dev->qdev);
+    vdev = virtio_blk_init(&pci_dev->qdev);
     virtio_init_pci(proxy, vdev,
                     PCI_VENDOR_ID_REDHAT_QUMRANET,
-                    PCI_DEVICE_ID_VIRTIO_CONSOLE,
-                    class_code, 0x00);
+                    PCI_DEVICE_ID_VIRTIO_BLOCK,
+                    proxy->class_code, 0x00);
 }
 
 static void virtio_console_init_pci(PCIDevice *pci_dev)
 {
     VirtIOPCIProxy *proxy = DO_UPCAST(VirtIOPCIProxy, pci_dev, pci_dev);
-  
+    VirtIODevice *vdev;
+
     if (proxy->class_code != PCI_CLASS_COMMUNICATION_OTHER &&
         proxy->class_code != PCI_CLASS_DISPLAY_OTHER && /* qemu 0.10 */
         proxy->class_code != PCI_CLASS_OTHERS)          /* qemu-kvm  */
         proxy->class_code = PCI_CLASS_COMMUNICATION_OTHER;
 
-    virtio_console_init_pci_with_class(pci_dev, proxy->class_code);
-}
-
-static void virtio_console_init_pci_0_10(PCIDevice *pci_dev)
-{
-    virtio_console_init_pci_with_class(pci_dev, PCI_CLASS_DISPLAY_OTHER);
+    vdev = virtio_console_init(&pci_dev->qdev);
+    virtio_init_pci(proxy, vdev,
+                    PCI_VENDOR_ID_REDHAT_QUMRANET,
+                    PCI_DEVICE_ID_VIRTIO_CONSOLE,
+                    proxy->class_code, 0x00);
 }
 
 static void virtio_net_init_pci(PCIDevice *pci_dev)
@@ -561,15 +535,6 @@ static PCIDeviceInfo virtio_info[] = {
         .qdev.size = sizeof(VirtIOPCIProxy),
         .init      = virtio_balloon_init_pci,
     },{
-        /* For compatibility with 0.10 */
-        .qdev.name = "virtio-blk-pci-0-10",
-        .qdev.size = sizeof(VirtIOPCIProxy),
-        .init      = virtio_blk_init_pci_0_10,
-    },{
-        .qdev.name = "virtio-console-pci-0-10",
-        .qdev.size = sizeof(VirtIOPCIProxy),
-        .init      = virtio_console_init_pci_0_10,
-    },{
         /* end of list */
     }
 };
-- 
1.6.2.5

^ permalink raw reply related	[flat|nested] 457+ messages in thread

* [Qemu-devel] [PATCH 3/4] Add machine type aliases
  2009-07-09  8:39                                                                                               ` Mark McLoughlin
                                                                                                                   ` (4 preceding siblings ...)
  2009-07-21 14:21                                                                                                 ` [Qemu-devel] [PATCH 2/4] Remove the virtio-{blk, console}-pci-0-10 device types Mark McLoughlin
@ 2009-07-21 14:21                                                                                                 ` Mark McLoughlin
  2009-07-21 14:21                                                                                                 ` [Qemu-devel] [PATCH 4/4] Add a pc-0.11 machine type and make the pc type an alias Mark McLoughlin
  6 siblings, 0 replies; 457+ messages in thread
From: Mark McLoughlin @ 2009-07-21 14:21 UTC (permalink / raw)
  To: qemu-devel; +Cc: Mark McLoughlin

Add an 'alias' field to QEMUMachine and display it in the output of
'qemu -M ?' with an '(aliased to foo)' suffix.

Aliases can change targets in newer versions of qemu, so management tools
may choose canonicalize machine types to ensure that if a user chooses an
alias, that the actual machine type used will remain compatible in
future.

This is intended to mimic a symlink to a machine description file.

Signed-off-by: Mark McLoughlin <markmc@redhat.com>
---
 hw/boards.h |    1 +
 vl.c        |    5 +++++
 2 files changed, 6 insertions(+), 0 deletions(-)

diff --git a/hw/boards.h b/hw/boards.h
index 11acb89..d889341 100644
--- a/hw/boards.h
+++ b/hw/boards.h
@@ -14,6 +14,7 @@ typedef void QEMUMachineInitFunc(ram_addr_t ram_size,
 
 typedef struct QEMUMachine {
     const char *name;
+    const char *alias;
     const char *desc;
     QEMUMachineInitFunc *init;
     int use_scsi;
diff --git a/vl.c b/vl.c
index f475693..02d5da5 100644
--- a/vl.c
+++ b/vl.c
@@ -3357,6 +3357,8 @@ static QEMUMachine *find_machine(const char *name)
     for(m = first_machine; m != NULL; m = m->next) {
         if (!strcmp(m->name, name))
             return m;
+        if (m->alias && !strcmp(m->alias, name))
+            return m;
     }
     return NULL;
 }
@@ -4993,6 +4995,9 @@ int main(int argc, char **argv, char **envp)
                     QEMUMachine *m;
                     printf("Supported machines are:\n");
                     for(m = first_machine; m != NULL; m = m->next) {
+                        if (m->alias)
+                            printf("%-10s %s (alias of %s)\n",
+                                   m->alias, m->desc, m->name);
                         printf("%-10s %s%s\n",
                                m->name, m->desc,
                                m->is_default ? " (default)" : "");
-- 
1.6.2.5

^ permalink raw reply related	[flat|nested] 457+ messages in thread

* [Qemu-devel] [PATCH 4/4] Add a pc-0.11 machine type and make the pc type an alias
  2009-07-09  8:39                                                                                               ` Mark McLoughlin
                                                                                                                   ` (5 preceding siblings ...)
  2009-07-21 14:21                                                                                                 ` [Qemu-devel] [PATCH 3/4] Add machine type aliases Mark McLoughlin
@ 2009-07-21 14:21                                                                                                 ` Mark McLoughlin
  6 siblings, 0 replies; 457+ messages in thread
From: Mark McLoughlin @ 2009-07-21 14:21 UTC (permalink / raw)
  To: qemu-devel; +Cc: Mark McLoughlin

The pc-0.11 type allows users of qemu-0.11 to use a machine type which
they know will remain compatible when the upgrade to qemu-0.12.

Management tools may choose to canonicalize the 'pc' machine type to
'pc-0.11' so that if the 'pc' alias changes target in future versions
of qemu, the machine type used will remain compatible.

Signed-off-by: Mark McLoughlin <markmc@redhat.com>
---
 hw/pc.c |    3 ++-
 1 files changed, 2 insertions(+), 1 deletions(-)

diff --git a/hw/pc.c b/hw/pc.c
index 0ebc329..d262b76 100644
--- a/hw/pc.c
+++ b/hw/pc.c
@@ -1477,7 +1477,8 @@ void cmos_set_s3_resume(void)
 }
 
 static QEMUMachine pc_machine = {
-    .name = "pc",
+    .name = "pc-0.11",
+    .alias = "pc",
     .desc = "Standard PC",
     .init = pc_init_pci,
     .max_cpus = 255,
-- 
1.6.2.5

^ permalink raw reply related	[flat|nested] 457+ messages in thread

* Re: [Qemu-devel] [PATCH 1/4] Remove the pc-0-10 machine type
  2009-07-21 14:21                                                                                                 ` [Qemu-devel] [PATCH 1/4] Remove the pc-0-10 machine type Mark McLoughlin
@ 2009-07-21 14:49                                                                                                   ` Mark McLoughlin
  2009-07-22  2:14                                                                                                     ` Anthony Liguori
  0 siblings, 1 reply; 457+ messages in thread
From: Mark McLoughlin @ 2009-07-21 14:49 UTC (permalink / raw)
  To: qemu-devel

On Tue, 2009-07-21 at 15:21 +0100, Mark McLoughlin wrote:
> We have the pc-0.10 machine type now which does exactly the same
> thing.

Ah, I see Gerd fixed this already.

My other three patches should apply on top of his.

Cheers,
Mark.

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: [Qemu-devel] [PATCH 1/4] Remove the pc-0-10 machine type
  2009-07-21 14:49                                                                                                   ` Mark McLoughlin
@ 2009-07-22  2:14                                                                                                     ` Anthony Liguori
  2009-07-22  8:56                                                                                                       ` Gerd Hoffmann
  2009-07-22  9:02                                                                                                       ` Mark McLoughlin
  0 siblings, 2 replies; 457+ messages in thread
From: Anthony Liguori @ 2009-07-22  2:14 UTC (permalink / raw)
  To: Mark McLoughlin; +Cc: qemu-devel

Mark McLoughlin wrote:
> On Tue, 2009-07-21 at 15:21 +0100, Mark McLoughlin wrote:
>   
>> We have the pc-0.10 machine type now which does exactly the same
>> thing.
>>     
>
> Ah, I see Gerd fixed this already.
>
> My other three patches should apply on top of his.
>   

Three or two?  Can you take a closer look?

I tried earlier to drop the obvious ones but it didn't apply easily

Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: [Qemu-devel] [PATCH 1/4] Remove the pc-0-10 machine type
  2009-07-22  2:14                                                                                                     ` Anthony Liguori
@ 2009-07-22  8:56                                                                                                       ` Gerd Hoffmann
  2009-07-22  9:05                                                                                                         ` Mark McLoughlin
  2009-07-22  9:02                                                                                                       ` Mark McLoughlin
  1 sibling, 1 reply; 457+ messages in thread
From: Gerd Hoffmann @ 2009-07-22  8:56 UTC (permalink / raw)
  To: Anthony Liguori; +Cc: Mark McLoughlin, qemu-devel

On 07/22/09 04:14, Anthony Liguori wrote:
> Mark McLoughlin wrote:
>> On Tue, 2009-07-21 at 15:21 +0100, Mark McLoughlin wrote:
>>> We have the pc-0.10 machine type now which does exactly the same
>>> thing.
>>
>> Ah, I see Gerd fixed this already.
>>
>> My other three patches should apply on top of his.
>
> Three or two? Can you take a closer look?

Isn't going to work.  I'd suggest to kick out my patch and take Mark's 
complete series instead.  It does all my patch did, cleans up a bit more 
in virtio-pci.c (leading to conflicts) and adds the aliasing.

cheers,
   Gerd

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: [Qemu-devel] [PATCH 1/4] Remove the pc-0-10 machine type
  2009-07-22  2:14                                                                                                     ` Anthony Liguori
  2009-07-22  8:56                                                                                                       ` Gerd Hoffmann
@ 2009-07-22  9:02                                                                                                       ` Mark McLoughlin
  2009-07-22  9:02                                                                                                         ` [Qemu-devel] [PATCH 1/2] Add machine type aliases Mark McLoughlin
  2009-07-22  9:02                                                                                                         ` [Qemu-devel] [PATCH 2/2] Add a pc-0.11 machine type and make the pc type an alias Mark McLoughlin
  1 sibling, 2 replies; 457+ messages in thread
From: Mark McLoughlin @ 2009-07-22  9:02 UTC (permalink / raw)
  To: Anthony Liguori; +Cc: qemu-devel

On Tue, 2009-07-21 at 21:14 -0500, Anthony Liguori wrote:
> Mark McLoughlin wrote:
> > On Tue, 2009-07-21 at 15:21 +0100, Mark McLoughlin wrote:
> >   
> >> We have the pc-0.10 machine type now which does exactly the same
> >> thing.
> >>     
> >
> > Ah, I see Gerd fixed this already.
> >
> > My other three patches should apply on top of his.
> >   
> 
> Three or two?  Can you take a closer look?
> 
> I tried earlier to drop the obvious ones but it didn't apply easily

You're right, Gerd's patch also removes the -0-10 devices, which is what
my 2/4 patch did.

I'll re-send the two just to be clear.

Again, I think these two make sense to apply to both stable-0.11 and
master.

Thanks,
Mark.

^ permalink raw reply	[flat|nested] 457+ messages in thread

* [Qemu-devel] [PATCH 1/2] Add machine type aliases
  2009-07-22  9:02                                                                                                       ` Mark McLoughlin
@ 2009-07-22  9:02                                                                                                         ` Mark McLoughlin
  2009-07-22  9:02                                                                                                         ` [Qemu-devel] [PATCH 2/2] Add a pc-0.11 machine type and make the pc type an alias Mark McLoughlin
  1 sibling, 0 replies; 457+ messages in thread
From: Mark McLoughlin @ 2009-07-22  9:02 UTC (permalink / raw)
  To: qemu-devel; +Cc: Mark McLoughlin

Add an 'alias' field to QEMUMachine and display it in the output of
'qemu -M ?' with an '(aliased to foo)' suffix.

Aliases can change targets in newer versions of qemu, so management tools
may choose canonicalize machine types to ensure that if a user chooses an
alias, that the actual machine type used will remain compatible in
future.

This is intended to mimic a symlink to a machine description file.

Signed-off-by: Mark McLoughlin <markmc@redhat.com>
---
 hw/boards.h |    1 +
 vl.c        |    5 +++++
 2 files changed, 6 insertions(+), 0 deletions(-)

diff --git a/hw/boards.h b/hw/boards.h
index 11acb89..d889341 100644
--- a/hw/boards.h
+++ b/hw/boards.h
@@ -14,6 +14,7 @@ typedef void QEMUMachineInitFunc(ram_addr_t ram_size,
 
 typedef struct QEMUMachine {
     const char *name;
+    const char *alias;
     const char *desc;
     QEMUMachineInitFunc *init;
     int use_scsi;
diff --git a/vl.c b/vl.c
index f475693..02d5da5 100644
--- a/vl.c
+++ b/vl.c
@@ -3357,6 +3357,8 @@ static QEMUMachine *find_machine(const char *name)
     for(m = first_machine; m != NULL; m = m->next) {
         if (!strcmp(m->name, name))
             return m;
+        if (m->alias && !strcmp(m->alias, name))
+            return m;
     }
     return NULL;
 }
@@ -4993,6 +4995,9 @@ int main(int argc, char **argv, char **envp)
                     QEMUMachine *m;
                     printf("Supported machines are:\n");
                     for(m = first_machine; m != NULL; m = m->next) {
+                        if (m->alias)
+                            printf("%-10s %s (alias of %s)\n",
+                                   m->alias, m->desc, m->name);
                         printf("%-10s %s%s\n",
                                m->name, m->desc,
                                m->is_default ? " (default)" : "");
-- 
1.6.2.5

^ permalink raw reply related	[flat|nested] 457+ messages in thread

* [Qemu-devel] [PATCH 2/2] Add a pc-0.11 machine type and make the pc type an alias
  2009-07-22  9:02                                                                                                       ` Mark McLoughlin
  2009-07-22  9:02                                                                                                         ` [Qemu-devel] [PATCH 1/2] Add machine type aliases Mark McLoughlin
@ 2009-07-22  9:02                                                                                                         ` Mark McLoughlin
  2009-07-23 13:34                                                                                                           ` Mark McLoughlin
  1 sibling, 1 reply; 457+ messages in thread
From: Mark McLoughlin @ 2009-07-22  9:02 UTC (permalink / raw)
  To: qemu-devel; +Cc: Mark McLoughlin

The pc-0.11 type allows users of qemu-0.11 to use a machine type which
they know will remain compatible when the upgrade to qemu-0.12.

Management tools may choose to canonicalize the 'pc' machine type to
'pc-0.11' so that if the 'pc' alias changes target in future versions
of qemu, the machine type used will remain compatible.

Signed-off-by: Mark McLoughlin <markmc@redhat.com>
---
 hw/pc.c |    3 ++-
 1 files changed, 2 insertions(+), 1 deletions(-)

diff --git a/hw/pc.c b/hw/pc.c
index a50f23d..c55c303 100644
--- a/hw/pc.c
+++ b/hw/pc.c
@@ -1512,7 +1512,8 @@ void cmos_set_s3_resume(void)
 }
 
 static QEMUMachine pc_machine = {
-    .name = "pc",
+    .name = "pc-0.11",
+    .alias = "pc",
     .desc = "Standard PC",
     .init = pc_init_pci,
     .max_cpus = 255,
-- 
1.6.2.5

^ permalink raw reply related	[flat|nested] 457+ messages in thread

* Re: [Qemu-devel] [PATCH 1/4] Remove the pc-0-10 machine type
  2009-07-22  8:56                                                                                                       ` Gerd Hoffmann
@ 2009-07-22  9:05                                                                                                         ` Mark McLoughlin
  0 siblings, 0 replies; 457+ messages in thread
From: Mark McLoughlin @ 2009-07-22  9:05 UTC (permalink / raw)
  To: Gerd Hoffmann; +Cc: qemu-devel

On Wed, 2009-07-22 at 10:56 +0200, Gerd Hoffmann wrote:
> On 07/22/09 04:14, Anthony Liguori wrote:
> > Mark McLoughlin wrote:
> >> On Tue, 2009-07-21 at 15:21 +0100, Mark McLoughlin wrote:
> >>> We have the pc-0.10 machine type now which does exactly the same
> >>> thing.
> >>
> >> Ah, I see Gerd fixed this already.
> >>
> >> My other three patches should apply on top of his.
> >
> > Three or two? Can you take a closer look?
> 
> Isn't going to work.  I'd suggest to kick out my patch and take Mark's 
> complete series instead.  It does all my patch did, cleans up a bit more 
> in virtio-pci.c (leading to conflicts) and adds the aliasing.

Heh :-)

Either way is fine - the only difference between with Gerd's patch with
the two I just posted and the original four is a small cleanup in
virtio-pci.c

Cheers,
Mark.

^ permalink raw reply	[flat|nested] 457+ messages in thread

* Re: [Qemu-devel] [PATCH 2/2] Add a pc-0.11 machine type and make the pc type an alias
  2009-07-22  9:02                                                                                                         ` [Qemu-devel] [PATCH 2/2] Add a pc-0.11 machine type and make the pc type an alias Mark McLoughlin
@ 2009-07-23 13:34                                                                                                           ` Mark McLoughlin
  0 siblings, 0 replies; 457+ messages in thread
From: Mark McLoughlin @ 2009-07-23 13:34 UTC (permalink / raw)
  To: qemu-devel

Hey,

On Wed, 2009-07-22 at 10:02 +0100, Mark McLoughlin wrote:
> The pc-0.11 type allows users of qemu-0.11 to use a machine type which
> they know will remain compatible when the upgrade to qemu-0.12.
> 
> Management tools may choose to canonicalize the 'pc' machine type to
> 'pc-0.11' so that if the 'pc' alias changes target in future versions
> of qemu, the machine type used will remain compatible.

By the way, I think it would make sense to pull this into stable-0.11
before the 0.11 release.

It's a fairly minor code change which means users can start using the
pc-0.11 machine type now so that, without doing anything in future, they
can avoid compat issues when they update to qemu-0.12.

Cheers,
Mark.

^ permalink raw reply	[flat|nested] 457+ messages in thread

end of thread, other threads:[~2009-07-23 13:35 UTC | newest]

Thread overview: 457+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <cover.1244192535.git.mst@redhat.com>
2009-06-05 10:22 ` [PATCHv3 01/13] qemu: make default_write_config use mask table Michael S. Tsirkin
2009-06-05 10:22 ` Michael S. Tsirkin
2009-06-05 10:22   ` [Qemu-devel] " Michael S. Tsirkin
2009-06-05 10:23 ` [PATCHv3 02/13] qemu: capability bits in pci save/restore Michael S. Tsirkin
2009-06-05 10:23 ` Michael S. Tsirkin
2009-06-05 10:23   ` [Qemu-devel] " Michael S. Tsirkin
2009-06-05 10:23 ` [PATCHv3 03/13] qemu: add routines to manage PCI capabilities Michael S. Tsirkin
2009-06-05 10:23   ` [Qemu-devel] " Michael S. Tsirkin
2009-06-09 17:11   ` Glauber Costa
2009-06-09 17:11     ` Glauber Costa
2009-06-10  9:54     ` Michael S. Tsirkin
2009-06-10  9:54     ` Michael S. Tsirkin
2009-06-10  9:54       ` Michael S. Tsirkin
2009-06-10 14:55       ` Glauber Costa
2009-06-10 14:55         ` Glauber Costa
2009-06-10 15:01         ` Michael S. Tsirkin
2009-06-10 15:01         ` Michael S. Tsirkin
2009-06-10 15:01           ` Michael S. Tsirkin
2009-06-10 15:24           ` Paul Brook
2009-06-10 15:24             ` Paul Brook
2009-06-10 15:50             ` Michael S. Tsirkin
2009-06-10 15:50             ` Michael S. Tsirkin
2009-06-10 15:50               ` Michael S. Tsirkin
2009-06-10 17:43             ` Jamie Lokier
2009-06-10 17:43               ` Jamie Lokier
2009-06-10 18:22               ` Michael S. Tsirkin
2009-06-10 18:22                 ` Michael S. Tsirkin
2009-06-10 19:27                 ` Jamie Lokier
2009-06-10 19:27                   ` Jamie Lokier
2009-06-12  8:43                   ` Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities] Mark McLoughlin
2009-06-12  8:43                   ` Mark McLoughlin
2009-06-12  8:43                     ` Mark McLoughlin
2009-06-12 13:59                     ` Michael S. Tsirkin
2009-06-12 13:59                       ` Michael S. Tsirkin
2009-06-12 14:48                       ` Mark McLoughlin
2009-06-12 14:48                       ` Mark McLoughlin
2009-06-12 14:48                         ` Mark McLoughlin
2009-06-12 13:59                     ` Michael S. Tsirkin
2009-06-12 14:51                     ` Anthony Liguori
2009-06-12 14:51                     ` Anthony Liguori
2009-06-12 14:51                       ` Anthony Liguori
2009-06-12 15:41                       ` Mark McLoughlin
2009-06-12 15:41                         ` Mark McLoughlin
2009-06-12 16:11                         ` Anthony Liguori
2009-06-12 16:11                         ` Anthony Liguori
2009-06-12 16:11                           ` Anthony Liguori
2009-06-12 16:48                           ` Mark McLoughlin
2009-06-12 16:48                           ` Mark McLoughlin
2009-06-12 16:48                             ` Mark McLoughlin
2009-06-12 17:00                             ` Anthony Liguori
2009-06-12 17:00                             ` Anthony Liguori
2009-06-12 17:00                               ` Anthony Liguori
2009-06-12 17:31                               ` Mark McLoughlin
2009-06-12 17:31                                 ` Mark McLoughlin
2009-06-12 17:44                                 ` Blue Swirl
2009-06-12 17:44                                 ` Blue Swirl
2009-06-12 17:44                                   ` Blue Swirl
2009-06-12 17:55                                   ` Mark McLoughlin
2009-06-12 17:55                                   ` Mark McLoughlin
2009-06-12 17:55                                     ` Mark McLoughlin
2009-06-16 18:38                                 ` Jamie Lokier
2009-06-16 18:38                                   ` Jamie Lokier
2009-06-16 18:38                                 ` Jamie Lokier
2009-06-12 17:31                               ` Mark McLoughlin
2009-06-14  9:50                             ` Michael S. Tsirkin
2009-06-14  9:50                               ` Michael S. Tsirkin
2009-06-15  9:08                               ` Mark McLoughlin
2009-06-15  9:08                               ` Mark McLoughlin
2009-06-15  9:08                                 ` Mark McLoughlin
2009-06-15  9:27                                 ` Avi Kivity
2009-06-15  9:27                                   ` Avi Kivity
2009-06-15 10:32                                   ` Michael S. Tsirkin
2009-06-15 10:32                                     ` Michael S. Tsirkin
2009-06-15 10:44                                     ` Gleb Natapov
2009-06-15 10:44                                     ` Gleb Natapov
2009-06-15 10:44                                       ` Gleb Natapov
2009-06-15 10:46                                       ` Michael S. Tsirkin
2009-06-15 10:46                                         ` Michael S. Tsirkin
2009-06-15 10:52                                         ` Gleb Natapov
2009-06-15 10:52                                           ` Gleb Natapov
2009-06-15 11:07                                           ` Michael S. Tsirkin
2009-06-15 11:07                                           ` Michael S. Tsirkin
2009-06-15 11:07                                             ` Michael S. Tsirkin
2009-06-15 11:14                                             ` Gleb Natapov
2009-06-15 11:14                                             ` Gleb Natapov
2009-06-15 11:14                                               ` Gleb Natapov
2009-06-15 11:34                                               ` Michael S. Tsirkin
2009-06-15 11:34                                               ` Michael S. Tsirkin
2009-06-15 11:34                                                 ` Michael S. Tsirkin
2009-06-15 10:52                                         ` Gleb Natapov
2009-06-15 10:46                                       ` Michael S. Tsirkin
2009-06-15 11:27                                     ` Avi Kivity
2009-06-15 11:27                                     ` Avi Kivity
2009-06-15 11:27                                       ` Avi Kivity
2009-06-15 11:48                                       ` Michael S. Tsirkin
2009-06-15 11:48                                       ` Michael S. Tsirkin
2009-06-15 11:48                                         ` Michael S. Tsirkin
2009-06-15 11:56                                         ` Avi Kivity
2009-06-15 11:56                                         ` Avi Kivity
2009-06-15 11:56                                           ` Avi Kivity
2009-06-15 12:41                                           ` Michael S. Tsirkin
2009-06-15 12:41                                             ` Michael S. Tsirkin
2009-06-15 12:50                                             ` Avi Kivity
2009-06-15 12:50                                             ` Avi Kivity
2009-06-15 12:50                                               ` Avi Kivity
2009-06-15 12:52                                               ` Anthony Liguori
2009-06-15 12:52                                               ` Anthony Liguori
2009-06-15 12:52                                                 ` Anthony Liguori
2009-06-15 13:09                                                 ` Avi Kivity
2009-06-15 13:09                                                 ` Avi Kivity
2009-06-15 13:09                                                   ` Avi Kivity
2009-06-15 13:23                                                   ` Anthony Liguori
2009-06-15 13:23                                                     ` Anthony Liguori
2009-06-15 13:42                                                     ` Avi Kivity
2009-06-15 13:42                                                     ` Avi Kivity
2009-06-15 13:42                                                       ` Avi Kivity
2009-06-15 13:51                                                       ` Anthony Liguori
2009-06-15 13:51                                                         ` Anthony Liguori
2009-06-15 14:06                                                         ` Dor Laor
2009-06-15 14:06                                                         ` Dor Laor
2009-06-15 14:06                                                           ` Dor Laor
2009-06-15 14:24                                                           ` Anthony Liguori
2009-06-15 14:24                                                             ` Anthony Liguori
2009-06-15 14:37                                                             ` Michael S. Tsirkin
2009-06-15 14:37                                                               ` Michael S. Tsirkin
2009-06-15 15:03                                                               ` Anthony Liguori
2009-06-15 15:03                                                               ` Anthony Liguori
2009-06-15 15:03                                                                 ` Anthony Liguori
2009-06-15 15:08                                                                 ` Daniel P. Berrange
2009-06-15 15:08                                                                 ` Daniel P. Berrange
2009-06-15 15:08                                                                   ` Daniel P. Berrange
2009-06-15 15:12                                                                   ` Dor Laor
2009-06-15 15:12                                                                   ` Dor Laor
2009-06-15 15:12                                                                     ` Dor Laor
2009-06-15 15:15                                                                     ` Avi Kivity
2009-06-15 15:15                                                                     ` Avi Kivity
2009-06-15 15:15                                                                       ` Avi Kivity
2009-06-16 18:32                                                                       ` Jamie Lokier
2009-06-16 18:32                                                                         ` Jamie Lokier
2009-06-17  6:38                                                                         ` Avi Kivity
2009-06-17  6:38                                                                           ` Avi Kivity
2009-06-17 11:51                                                                           ` Jamie Lokier
2009-06-17 11:51                                                                             ` Jamie Lokier
2009-06-17 11:51                                                                           ` Jamie Lokier
2009-06-17  6:38                                                                         ` Avi Kivity
2009-06-16 18:32                                                                       ` Jamie Lokier
2009-06-15 16:27                                                                     ` Mark McLoughlin
2009-06-15 16:27                                                                     ` Mark McLoughlin
2009-06-15 16:27                                                                       ` Mark McLoughlin
2009-06-15 17:13                                                                       ` Avi Kivity
2009-06-15 17:13                                                                         ` Avi Kivity
2009-06-15 14:37                                                             ` Michael S. Tsirkin
2009-06-15 15:05                                                             ` Avi Kivity
2009-06-15 15:05                                                             ` Avi Kivity
2009-06-15 15:05                                                               ` Avi Kivity
2009-06-15 15:11                                                               ` Anthony Liguori
2009-06-15 15:11                                                               ` Anthony Liguori
2009-06-15 15:11                                                                 ` Anthony Liguori
2009-06-15 16:27                                                               ` Mark McLoughlin
2009-06-15 16:27                                                                 ` Mark McLoughlin
2009-06-15 17:09                                                                 ` Avi Kivity
2009-06-15 17:09                                                                   ` Avi Kivity
2009-06-15 18:12                                                                 ` Anthony Liguori
2009-06-15 18:12                                                                   ` Anthony Liguori
2009-06-15 18:21                                                                   ` Avi Kivity
2009-06-15 18:21                                                                     ` Avi Kivity
2009-06-15 18:24                                                                     ` Anthony Liguori
2009-06-15 18:24                                                                     ` Anthony Liguori
2009-06-15 18:24                                                                       ` Anthony Liguori
2009-06-15 18:44                                                                     ` Blue Swirl
2009-06-15 18:44                                                                       ` Blue Swirl
2009-06-16  8:56                                                                       ` Avi Kivity
2009-06-16  8:56                                                                         ` Avi Kivity
2009-06-16  8:56                                                                       ` Avi Kivity
2009-06-15 18:21                                                                   ` Avi Kivity
2009-06-16 12:14                                                                   ` Mark McLoughlin
2009-06-16 12:14                                                                   ` Mark McLoughlin
2009-06-16 12:14                                                                     ` Mark McLoughlin
2009-06-16 12:28                                                                     ` Avi Kivity
2009-06-16 12:28                                                                       ` Avi Kivity
2009-06-16 12:39                                                                       ` Mark McLoughlin
2009-06-16 12:39                                                                       ` Mark McLoughlin
2009-06-16 12:39                                                                         ` Mark McLoughlin
2009-06-16 12:51                                                                         ` Avi Kivity
2009-06-16 12:51                                                                         ` Avi Kivity
2009-06-16 12:51                                                                           ` Avi Kivity
2009-06-16 18:44                                                                         ` Jamie Lokier
2009-06-16 18:44                                                                           ` Jamie Lokier
2009-06-17  8:33                                                                           ` Mark McLoughlin
2009-06-17  8:33                                                                             ` Mark McLoughlin
2009-06-17  9:03                                                                             ` Avi Kivity
2009-06-17  9:03                                                                             ` Avi Kivity
2009-06-17  9:03                                                                               ` Avi Kivity
2009-06-17  9:18                                                                               ` Mark McLoughlin
2009-06-17  9:18                                                                                 ` Mark McLoughlin
2009-06-17  9:26                                                                                 ` Avi Kivity
2009-06-17  9:26                                                                                   ` Avi Kivity
2009-06-17 11:58                                                                                   ` Jamie Lokier
2009-06-17 11:58                                                                                   ` Jamie Lokier
2009-06-17 11:58                                                                                     ` Jamie Lokier
2009-06-17  9:26                                                                                 ` Avi Kivity
2009-06-17  9:18                                                                               ` Mark McLoughlin
2009-06-17  8:33                                                                           ` Mark McLoughlin
2009-06-16 18:44                                                                         ` Jamie Lokier
2009-06-24  8:04                                                                       ` Dietmar Maurer
2009-06-16 12:28                                                                     ` Avi Kivity
2009-07-07 11:08                                                                   ` [Qemu-devel] [PATCH 0/3] Change virtio blk/console PCI classes and introduce compat machine type [was Re: Configuration vs. compat hints] Mark McLoughlin
2009-07-07 11:09                                                                     ` [Qemu-devel] [PATCH 1/3] Change default PCI class of virtio-blk to PCI_CLASS_STORAGE_SCSI Mark McLoughlin
2009-07-07 11:09                                                                       ` [Qemu-devel] [PATCH 2/3] Change default PCI class of virtio-console to PCI_CLASS_SERIAL_OTHER Mark McLoughlin
2009-07-07 11:10                                                                         ` [Qemu-devel] [PATCH 3/3] Add a pc-0-10 machine type for compatibility with 0.10.x Mark McLoughlin
2009-07-07 12:01                                                                           ` Avi Kivity
2009-07-08 10:46                                                                             ` Mark McLoughlin
2009-07-08 10:48                                                                               ` [Qemu-devel] [PATCH 3/3 v2] " Mark McLoughlin
2009-07-08 13:00                                                                                 ` Gerd Hoffmann
2009-07-08 13:44                                                                                   ` Anthony Liguori
2009-07-08 14:09                                                                                     ` Gerd Hoffmann
2009-07-08 15:08                                                                                       ` Mark McLoughlin
2009-07-08 19:07                                                                                         ` Gerd Hoffmann
2009-07-08 21:45                                                                                           ` Anthony Liguori
2009-07-09  7:56                                                                                             ` Gerd Hoffmann
2009-07-09  8:39                                                                                               ` Mark McLoughlin
2009-07-09  8:50                                                                                                 ` Avi Kivity
2009-07-09  8:57                                                                                                   ` Mark McLoughlin
2009-07-09  9:04                                                                                                     ` Avi Kivity
2009-07-09  9:05                                                                                                 ` Gerd Hoffmann
2009-07-09 10:01                                                                                                   ` Gerd Hoffmann
2009-07-09 13:31                                                                                                     ` Mark McLoughlin
2009-07-09 13:47                                                                                                       ` Gerd Hoffmann
2009-07-09 13:35                                                                                                     ` Anthony Liguori
2009-07-09 13:55                                                                                                       ` Gerd Hoffmann
2009-07-09 16:09                                                                                                       ` Paul Brook
2009-07-09 11:51                                                                                                   ` Avi Kivity
2009-07-09 13:29                                                                                                     ` Anthony Liguori
2009-07-09 13:59                                                                                                       ` Avi Kivity
2009-07-09 15:00                                                                                                   ` Anthony Liguori
2009-07-21 14:21                                                                                                 ` [Qemu-devel] [PATCH 0/4] Add pc-0.11 machine type and make pc an alias to it Mark McLoughlin
2009-07-21 14:21                                                                                                 ` [Qemu-devel] [PATCH 1/4] Remove the pc-0-10 machine type Mark McLoughlin
2009-07-21 14:49                                                                                                   ` Mark McLoughlin
2009-07-22  2:14                                                                                                     ` Anthony Liguori
2009-07-22  8:56                                                                                                       ` Gerd Hoffmann
2009-07-22  9:05                                                                                                         ` Mark McLoughlin
2009-07-22  9:02                                                                                                       ` Mark McLoughlin
2009-07-22  9:02                                                                                                         ` [Qemu-devel] [PATCH 1/2] Add machine type aliases Mark McLoughlin
2009-07-22  9:02                                                                                                         ` [Qemu-devel] [PATCH 2/2] Add a pc-0.11 machine type and make the pc type an alias Mark McLoughlin
2009-07-23 13:34                                                                                                           ` Mark McLoughlin
2009-07-21 14:21                                                                                                 ` [Qemu-devel] [PATCH 2/4] Remove the virtio-{blk, console}-pci-0-10 device types Mark McLoughlin
2009-07-21 14:21                                                                                                 ` [Qemu-devel] [PATCH 3/4] Add machine type aliases Mark McLoughlin
2009-07-21 14:21                                                                                                 ` [Qemu-devel] [PATCH 4/4] Add a pc-0.11 machine type and make the pc type an alias Mark McLoughlin
2009-07-09  8:00                                                                                             ` [Qemu-devel] [PATCH 3/3 v2] Add a pc-0-10 machine type for compatibility with 0.10.x Avi Kivity
2009-07-15 11:27                                                                         ` [Qemu-devel] [PATCH 2/3] Change default PCI class of virtio-console to PCI_CLASS_SERIAL_OTHER Amit Shah
2009-06-15 18:12                                                                 ` Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities] Anthony Liguori
2009-06-15 16:27                                                               ` Mark McLoughlin
2009-06-15 13:51                                                       ` Anthony Liguori
2009-06-15 13:23                                                   ` Anthony Liguori
2009-06-15 12:41                                           ` Michael S. Tsirkin
2009-06-15 10:32                                   ` Michael S. Tsirkin
2009-06-15 11:35                                   ` Configuration vs. compat hints Markus Armbruster
2009-06-15 11:35                                     ` [Qemu-devel] " Markus Armbruster
2009-06-15 11:43                                     ` Avi Kivity
2009-06-15 11:43                                     ` Avi Kivity
2009-06-15 11:43                                       ` [Qemu-devel] " Avi Kivity
2009-06-15 11:59                                       ` Stefano Stabellini
2009-06-15 11:59                                       ` Stefano Stabellini
2009-06-15 11:59                                         ` [Qemu-devel] " Stefano Stabellini
2009-06-15 12:41                                       ` Markus Armbruster
2009-06-15 12:41                                       ` Markus Armbruster
2009-06-15 12:50                                         ` Anthony Liguori
2009-06-15 12:50                                           ` Anthony Liguori
2009-06-15 12:50                                         ` Anthony Liguori
2009-06-15 14:23                                       ` Javier Guerra
2009-06-15 14:23                                         ` [Qemu-devel] " Javier Guerra
2009-06-15 14:23                                       ` Javier Guerra
2009-06-15 11:35                                   ` Markus Armbruster
2009-06-15 12:41                                   ` Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities] Anthony Liguori
2009-06-15 12:41                                     ` Anthony Liguori
2009-06-15 12:55                                     ` Avi Kivity
2009-06-15 12:55                                     ` Avi Kivity
2009-06-15 12:55                                       ` Avi Kivity
2009-06-15 13:04                                     ` Configuration vs. compat hints Markus Armbruster
2009-06-15 13:04                                     ` Markus Armbruster
2009-06-15 13:04                                       ` [Qemu-devel] " Markus Armbruster
2009-06-15 12:41                                   ` Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities] Anthony Liguori
2009-06-15  9:27                                 ` Avi Kivity
2009-06-15  9:43                               ` Avi Kivity
2009-06-15  9:43                               ` Avi Kivity
2009-06-15  9:43                                 ` Avi Kivity
2009-06-15 10:29                                 ` Michael S. Tsirkin
2009-06-15 10:29                                 ` Michael S. Tsirkin
2009-06-15 10:29                                   ` Michael S. Tsirkin
2009-06-15 12:45                                 ` Anthony Liguori
2009-06-15 12:45                                   ` Anthony Liguori
2009-06-15 13:03                                   ` Avi Kivity
2009-06-15 13:03                                     ` Avi Kivity
2009-06-15 13:20                                     ` Anthony Liguori
2009-06-15 13:20                                       ` Anthony Liguori
2009-06-15 13:35                                       ` Avi Kivity
2009-06-15 13:35                                       ` Avi Kivity
2009-06-15 13:35                                         ` Avi Kivity
2009-06-15 13:45                                         ` Anthony Liguori
2009-06-15 13:45                                           ` Anthony Liguori
2009-06-15 13:54                                           ` Avi Kivity
2009-06-15 13:54                                             ` Avi Kivity
2009-06-15 15:07                                             ` Anthony Liguori
2009-06-15 15:07                                             ` Anthony Liguori
2009-06-15 15:07                                               ` Anthony Liguori
2009-06-15 15:11                                               ` Avi Kivity
2009-06-15 15:11                                                 ` Avi Kivity
2009-06-15 15:20                                                 ` Anthony Liguori
2009-06-15 15:20                                                 ` Anthony Liguori
2009-06-15 15:20                                                   ` Anthony Liguori
2009-06-15 15:26                                                   ` Avi Kivity
2009-06-15 15:26                                                   ` Avi Kivity
2009-06-15 15:26                                                     ` Avi Kivity
2009-06-15 15:11                                               ` Avi Kivity
2009-06-15 13:54                                           ` Avi Kivity
2009-06-15 13:45                                         ` Anthony Liguori
2009-06-15 13:20                                     ` Anthony Liguori
2009-06-15 13:03                                   ` Avi Kivity
2009-06-15 13:17                                   ` Gerd Hoffmann
2009-06-15 13:17                                   ` Gerd Hoffmann
2009-06-15 13:17                                     ` Gerd Hoffmann
2009-06-15 12:45                                 ` Anthony Liguori
2009-06-14  9:50                             ` Michael S. Tsirkin
2009-06-12 15:41                       ` Mark McLoughlin
2009-06-14  7:55                       ` Avi Kivity
2009-06-14  7:55                       ` Avi Kivity
2009-06-14  7:55                         ` Avi Kivity
2009-06-12 14:55                     ` Anthony Liguori
2009-06-12 14:55                       ` Anthony Liguori
2009-06-12 15:53                       ` Mark McLoughlin
2009-06-12 15:53                         ` Mark McLoughlin
2009-06-12 16:12                         ` Anthony Liguori
2009-06-12 16:12                         ` Anthony Liguori
2009-06-12 16:12                           ` Anthony Liguori
2009-06-12 16:48                           ` Mark McLoughlin
2009-06-12 16:48                           ` Mark McLoughlin
2009-06-12 16:48                             ` Mark McLoughlin
2009-06-14  7:58                             ` Avi Kivity
2009-06-14  7:58                             ` Avi Kivity
2009-06-14  7:58                               ` Avi Kivity
2009-06-15  5:32                               ` Configuration vs. compat hints Markus Armbruster
2009-06-15  5:32                               ` Markus Armbruster
2009-06-15  5:32                                 ` [Qemu-devel] " Markus Armbruster
2009-06-15  9:09                               ` Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities] Mark McLoughlin
2009-06-15  9:09                                 ` Mark McLoughlin
2009-06-15 11:32                                 ` Avi Kivity
2009-06-15 11:32                                 ` Avi Kivity
2009-06-15 11:32                                   ` Avi Kivity
2009-06-15 12:48                                   ` Anthony Liguori
2009-06-15 12:48                                   ` Anthony Liguori
2009-06-15 12:48                                     ` Anthony Liguori
2009-06-15 13:12                                     ` Avi Kivity
2009-06-15 13:12                                       ` Avi Kivity
2009-06-15 13:24                                       ` Anthony Liguori
2009-06-15 13:24                                         ` Anthony Liguori
2009-06-15 13:43                                         ` Avi Kivity
2009-06-15 13:43                                         ` Avi Kivity
2009-06-15 13:43                                           ` Avi Kivity
2009-06-15 13:24                                       ` Anthony Liguori
2009-06-15 13:12                                     ` Avi Kivity
2009-06-15 14:00                                     ` Mark McLoughlin
2009-06-15 14:00                                       ` Mark McLoughlin
2009-06-15 14:20                                       ` Anthony Liguori
2009-06-15 14:20                                         ` Anthony Liguori
2009-06-15 14:34                                         ` Michael S. Tsirkin
2009-06-15 14:34                                           ` Michael S. Tsirkin
2009-06-15 15:11                                           ` Anthony Liguori
2009-06-15 15:11                                             ` Anthony Liguori
2009-06-15 15:11                                           ` Anthony Liguori
2009-06-15 14:34                                         ` Michael S. Tsirkin
2009-06-15 14:20                                       ` Anthony Liguori
2009-06-15 14:00                                     ` Mark McLoughlin
2009-06-15  9:09                               ` Mark McLoughlin
2009-06-14  9:34                         ` Michael S. Tsirkin
2009-06-14  9:34                         ` Michael S. Tsirkin
2009-06-14  9:34                           ` Michael S. Tsirkin
2009-06-14  9:37                           ` Avi Kivity
2009-06-14  9:37                           ` Avi Kivity
2009-06-14  9:37                             ` Avi Kivity
2009-06-14  9:47                             ` Michael S. Tsirkin
2009-06-14  9:47                               ` Michael S. Tsirkin
2009-06-15  9:38                               ` Avi Kivity
2009-06-15  9:38                                 ` Avi Kivity
2009-06-15  9:38                               ` Avi Kivity
2009-06-14  9:47                             ` Michael S. Tsirkin
2009-06-15  9:02                           ` Mark McLoughlin
2009-06-15  9:02                           ` Mark McLoughlin
2009-06-15  9:02                             ` Mark McLoughlin
2009-06-12 15:53                       ` Mark McLoughlin
2009-06-12 14:55                     ` Anthony Liguori
2009-06-10 19:27                 ` [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities Jamie Lokier
2009-06-10 18:22               ` Michael S. Tsirkin
2009-06-10 17:43             ` Jamie Lokier
2009-06-10 15:24           ` Paul Brook
2009-06-10 14:55       ` Glauber Costa
2009-06-09 17:11   ` Glauber Costa
2009-06-05 10:23 ` Michael S. Tsirkin
2009-06-05 10:23 ` [PATCHv3 04/13] qemu: helper routines for pci access Michael S. Tsirkin
2009-06-05 10:23   ` [Qemu-devel] " Michael S. Tsirkin
2009-06-05 10:23 ` Michael S. Tsirkin
2009-06-05 10:23 ` [PATCHv3 05/13] qemu: MSI-X support functions Michael S. Tsirkin
2009-06-05 10:23   ` [Qemu-devel] " Michael S. Tsirkin
2009-06-09 17:26   ` Glauber Costa
2009-06-09 17:26   ` Glauber Costa
2009-06-09 17:26     ` Glauber Costa
2009-06-10  9:58     ` Michael S. Tsirkin
2009-06-10  9:58     ` Michael S. Tsirkin
2009-06-10  9:58       ` Michael S. Tsirkin
2009-06-05 10:23 ` Michael S. Tsirkin
2009-06-05 10:23 ` [PATCHv3 06/13] qemu: add flag to disable MSI-X by default Michael S. Tsirkin
2009-06-05 10:23   ` [Qemu-devel] " Michael S. Tsirkin
2009-06-05 10:23 ` Michael S. Tsirkin
2009-06-05 10:23 ` [PATCHv3 07/13] qemu: minimal MSI/MSI-X implementation for PC Michael S. Tsirkin
2009-06-05 10:23   ` [Qemu-devel] " Michael S. Tsirkin
2009-06-09 17:33   ` Glauber Costa
2009-06-09 17:33   ` Glauber Costa
2009-06-09 17:33     ` Glauber Costa
2009-06-10  9:59     ` Michael S. Tsirkin
2009-06-10  9:59     ` Michael S. Tsirkin
2009-06-10  9:59       ` Michael S. Tsirkin
2009-06-05 10:23 ` Michael S. Tsirkin
2009-06-05 10:23 ` [PATCHv3 08/13] qemu: add support for resizing regions Michael S. Tsirkin
2009-06-05 10:23 ` Michael S. Tsirkin
2009-06-05 10:23   ` [Qemu-devel] " Michael S. Tsirkin
2009-06-09 17:36   ` Glauber Costa
2009-06-09 17:36     ` Glauber Costa
2009-06-10 10:05     ` Michael S. Tsirkin
2009-06-10 10:05     ` Michael S. Tsirkin
2009-06-10 10:05       ` Michael S. Tsirkin
2009-06-10 10:46     ` Michael S. Tsirkin
2009-06-10 10:46       ` Michael S. Tsirkin
2009-06-10 10:46     ` Michael S. Tsirkin
2009-06-09 17:36   ` Glauber Costa
2009-06-05 10:24 ` [PATCHv3 09/13] qemu: virtio support for many interrupt vectors Michael S. Tsirkin
2009-06-05 10:24 ` Michael S. Tsirkin
2009-06-05 10:24   ` [Qemu-devel] " Michael S. Tsirkin
2009-06-05 10:24 ` [PATCHv3 10/13] qemu: MSI-X support in virtio PCI Michael S. Tsirkin
2009-06-05 10:24   ` [Qemu-devel] " Michael S. Tsirkin
2009-06-05 10:24 ` Michael S. Tsirkin
2009-06-05 10:24 ` [PATCHv3 11/13] qemu: request 3 vectors in virtio-net Michael S. Tsirkin
2009-06-05 10:24   ` [Qemu-devel] " Michael S. Tsirkin
2009-06-05 10:24 ` Michael S. Tsirkin
2009-06-05 10:24 ` [PATCHv3 12/13] qemu: virtio save/load bindings Michael S. Tsirkin
2009-06-05 10:24 ` Michael S. Tsirkin
2009-06-05 10:24   ` [Qemu-devel] " Michael S. Tsirkin
2009-06-09 17:45   ` Glauber Costa
2009-06-09 17:45   ` Glauber Costa
2009-06-09 17:45     ` Glauber Costa
2009-06-10 10:11     ` Michael S. Tsirkin
2009-06-10 10:11     ` Michael S. Tsirkin
2009-06-10 10:11       ` Michael S. Tsirkin
2009-06-10 11:33     ` Michael S. Tsirkin
2009-06-10 11:33       ` Michael S. Tsirkin
2009-06-10 11:33     ` Michael S. Tsirkin
2009-06-05 10:24 ` [PATCHv3 13/13] qemu: add pci_get/set_byte Michael S. Tsirkin
2009-06-05 10:24   ` [Qemu-devel] " Michael S. Tsirkin
2009-06-05 10:24 ` Michael S. Tsirkin

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.