All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCHv6 01/12] qemu/pci: make default_write_config use mask table
       [not found] <cover.1245594586.git.mst@redhat.com>
@ 2009-06-21 16:45   ` Michael S. Tsirkin
  2009-06-21 16:45   ` [Qemu-devel] " Michael S. Tsirkin
                     ` (22 subsequent siblings)
  23 siblings, 0 replies; 42+ messages in thread
From: Michael S. Tsirkin @ 2009-06-21 16:45 UTC (permalink / raw)
  To: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm, Rusty Russell, vi

Change much of hw/pci to use symbolic constants and a table-driven
design: add a mask table with writable bits set and readonly bits unset.
Detect change by comparing original and new registers.

This makes it easy to support capabilities where read-only/writeable
bit layout differs between devices, depending on capabilities present.

As a result, writing a single byte in BAR registers now works as
it should. Writing to upper limit registers in the bridge
also works as it should. Code is also shorter.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/pci.c |  145 ++++++++++++-------------------------------------------------
 hw/pci.h |   18 +++++++-
 2 files changed, 46 insertions(+), 117 deletions(-)

diff --git a/hw/pci.c b/hw/pci.c
index 0a738db..359959e 100644
--- a/hw/pci.c
+++ b/hw/pci.c
@@ -238,6 +238,17 @@ int pci_assign_devaddr(const char *addr, int *domp, int *busp, unsigned *slotp)
     return pci_parse_devaddr(devaddr, domp, busp, slotp);
 }
 
+static void pci_init_wmask(PCIDevice *dev)
+{
+    int i;
+    dev->wmask[PCI_CACHE_LINE_SIZE] = 0xff;
+    dev->wmask[PCI_INTERRUPT_LINE] = 0xff;
+    dev->wmask[PCI_COMMAND] = PCI_COMMAND_IO | PCI_COMMAND_MEMORY
+                              | PCI_COMMAND_MASTER;
+    for (i = PCI_CONFIG_HEADER_SIZE; i < PCI_CONFIG_SPACE_SIZE; ++i)
+        dev->wmask[i] = 0xff;
+}
+
 /* -1 for devfn means auto assign */
 static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus,
                                          const char *name, int devfn,
@@ -257,6 +268,7 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus,
     pstrcpy(pci_dev->name, sizeof(pci_dev->name), name);
     memset(pci_dev->irq_state, 0, sizeof(pci_dev->irq_state));
     pci_set_default_subsystem_id(pci_dev);
+    pci_init_wmask(pci_dev);
 
     if (!config_read)
         config_read = pci_default_read_config;
@@ -328,6 +340,7 @@ void pci_register_bar(PCIDevice *pci_dev, int region_num,
 {
     PCIIORegion *r;
     uint32_t addr;
+    uint32_t wmask;
 
     if ((unsigned int)region_num >= PCI_NUM_REGIONS)
         return;
@@ -343,12 +356,17 @@ void pci_register_bar(PCIDevice *pci_dev, int region_num,
     r->size = size;
     r->type = type;
     r->map_func = map_func;
+
+    wmask = ~(size - 1);
     if (region_num == PCI_ROM_SLOT) {
         addr = 0x30;
+        /* ROM enable bit is writeable */
+        wmask |= 1;
     } else {
         addr = 0x10 + region_num * 4;
     }
     *(uint32_t *)(pci_dev->config + addr) = cpu_to_le32(type);
+    *(uint32_t *)(pci_dev->wmask + addr) = cpu_to_le32(wmask);
 }
 
 static void pci_update_mappings(PCIDevice *d)
@@ -457,118 +475,21 @@ uint32_t pci_default_read_config(PCIDevice *d,
     return val;
 }
 
-void pci_default_write_config(PCIDevice *d,
-                              uint32_t address, uint32_t val, int len)
+void pci_default_write_config(PCIDevice *d, uint32_t addr, uint32_t val, int l)
 {
-    int can_write, i;
-    uint32_t end, addr;
-
-    if (len == 4 && ((address >= 0x10 && address < 0x10 + 4 * 6) ||
-                     (address >= 0x30 && address < 0x34))) {
-        PCIIORegion *r;
-        int reg;
+    uint8_t orig[PCI_CONFIG_SPACE_SIZE];
+    int i;
 
-        if ( address >= 0x30 ) {
-            reg = PCI_ROM_SLOT;
-        }else{
-            reg = (address - 0x10) >> 2;
-        }
-        r = &d->io_regions[reg];
-        if (r->size == 0)
-            goto default_config;
-        /* compute the stored value */
-        if (reg == PCI_ROM_SLOT) {
-            /* keep ROM enable bit */
-            val &= (~(r->size - 1)) | 1;
-        } else {
-            val &= ~(r->size - 1);
-            val |= r->type;
-        }
-        *(uint32_t *)(d->config + address) = cpu_to_le32(val);
-        pci_update_mappings(d);
-        return;
-    }
- default_config:
     /* not efficient, but simple */
-    addr = address;
-    for(i = 0; i < len; i++) {
-        /* default read/write accesses */
-        switch(d->config[0x0e]) {
-        case 0x00:
-        case 0x80:
-            switch(addr) {
-            case 0x00:
-            case 0x01:
-            case 0x02:
-            case 0x03:
-            case 0x06:
-            case 0x07:
-            case 0x08:
-            case 0x09:
-            case 0x0a:
-            case 0x0b:
-            case 0x0e:
-            case 0x10 ... 0x27: /* base */
-            case 0x2c ... 0x2f: /* read-only subsystem ID & vendor ID */
-            case 0x30 ... 0x33: /* rom */
-            case 0x3d:
-                can_write = 0;
-                break;
-            default:
-                can_write = 1;
-                break;
-            }
-            break;
-        default:
-        case 0x01:
-            switch(addr) {
-            case 0x00:
-            case 0x01:
-            case 0x02:
-            case 0x03:
-            case 0x06:
-            case 0x07:
-            case 0x08:
-            case 0x09:
-            case 0x0a:
-            case 0x0b:
-            case 0x0e:
-            case 0x2c ... 0x2f: /* read-only subsystem ID & vendor ID */
-            case 0x38 ... 0x3b: /* rom */
-            case 0x3d:
-                can_write = 0;
-                break;
-            default:
-                can_write = 1;
-                break;
-            }
-            break;
-        }
-        if (can_write) {
-            /* Mask out writes to reserved bits in registers */
-            switch (addr) {
-	    case 0x05:
-                val &= ~PCI_COMMAND_RESERVED_MASK_HI;
-                break;
-            case 0x06:
-                val &= ~PCI_STATUS_RESERVED_MASK_LO;
-                break;
-            case 0x07:
-                val &= ~PCI_STATUS_RESERVED_MASK_HI;
-                break;
-            }
-            d->config[addr] = val;
-        }
-        if (++addr > 0xff)
-        	break;
-        val >>= 8;
+    memcpy(orig, d->config, PCI_CONFIG_SPACE_SIZE);
+    for(i = 0; i < l && addr < PCI_CONFIG_SPACE_SIZE; val >>= 8, ++i, ++addr) {
+        uint8_t wmask = d->wmask[addr];
+        d->config[addr] = (d->config[addr] & ~wmask) | (val & wmask);
     }
-
-    end = address + len;
-    if (end > PCI_COMMAND && address < (PCI_COMMAND + 2)) {
-        /* if the command register is modified, we must modify the mappings */
+    if (memcmp(orig + PCI_BASE_ADDRESS_0, d->config + PCI_BASE_ADDRESS_0, 24)
+        || ((orig[PCI_COMMAND] ^ d->config[PCI_COMMAND])
+            & (PCI_COMMAND_MEMORY | PCI_COMMAND_IO)))
         pci_update_mappings(d);
-    }
 }
 
 void pci_data_write(void *opaque, uint32_t addr, uint32_t val, int len)
@@ -841,16 +762,8 @@ static void pci_bridge_write_config(PCIDevice *d,
 {
     PCIBridge *s = (PCIBridge *)d;
 
-    if (address == 0x19 || (address == 0x18 && len > 1)) {
-        if (address == 0x19)
-            s->bus->bus_num = val & 0xff;
-        else
-            s->bus->bus_num = (val >> 8) & 0xff;
-#if defined(DEBUG_PCI)
-        printf ("pci-bridge: %s: Assigned bus %d\n", d->name, s->bus->bus_num);
-#endif
-    }
     pci_default_write_config(d, address, val, len);
+    s->bus->bus_num = d->config[PCI_SECONDARY_BUS];
 }
 
 PCIBus *pci_find_bus(int bus_num)
diff --git a/hw/pci.h b/hw/pci.h
index fcca526..44aa61b 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -98,16 +98,24 @@ typedef struct PCIIORegion {
 #define PCI_COMMAND		0x04	/* 16 bits */
 #define  PCI_COMMAND_IO		0x1	/* Enable response in I/O space */
 #define  PCI_COMMAND_MEMORY	0x2	/* Enable response in Memory space */
+#define  PCI_COMMAND_MASTER	0x4	/* Enable bus master */
 #define PCI_STATUS              0x06    /* 16 bits */
 #define PCI_REVISION_ID         0x08    /* 8 bits  */
 #define PCI_CLASS_DEVICE        0x0a    /* Device class */
+#define PCI_CACHE_LINE_SIZE	0x0c	/* 8 bits */
+#define PCI_LATENCY_TIMER	0x0d	/* 8 bits */
 #define PCI_HEADER_TYPE         0x0e    /* 8 bits */
 #define  PCI_HEADER_TYPE_NORMAL		0
 #define  PCI_HEADER_TYPE_BRIDGE		1
 #define  PCI_HEADER_TYPE_CARDBUS	2
 #define  PCI_HEADER_TYPE_MULTI_FUNCTION 0x80
+#define PCI_BASE_ADDRESS_0	0x10	/* 32 bits */
+#define PCI_PRIMARY_BUS		0x18	/* Primary bus number */
+#define PCI_SECONDARY_BUS	0x19	/* Secondary bus number */
+#define PCI_SEC_STATUS		0x1e	/* Secondary status register, only bit 14 used */
 #define PCI_SUBSYSTEM_VENDOR_ID 0x2c    /* 16 bits */
 #define PCI_SUBSYSTEM_ID        0x2e    /* 16 bits */
+#define PCI_CAPABILITY_LIST	0x34	/* Offset of first capability list entry */
 #define PCI_INTERRUPT_LINE	0x3c	/* 8 bits */
 #define PCI_INTERRUPT_PIN	0x3d	/* 8 bits */
 #define PCI_MIN_GNT		0x3e	/* 8 bits */
@@ -137,10 +145,18 @@ typedef struct PCIIORegion {
 
 #define PCI_COMMAND_RESERVED_MASK_HI (PCI_COMMAND_RESERVED >> 8)
 
+/* Size of the standard PCI config header */
+#define PCI_CONFIG_HEADER_SIZE 0x40
+/* Size of the standard PCI config space */
+#define PCI_CONFIG_SPACE_SIZE 0x100
+
 struct PCIDevice {
     DeviceState qdev;
     /* PCI config space */
-    uint8_t config[256];
+    uint8_t config[PCI_CONFIG_SPACE_SIZE];
+
+    /* Used to implement R/W bytes */
+    uint8_t wmask[PCI_CONFIG_SPACE_SIZE];
 
     /* the following fields are read only */
     PCIBus *bus;
-- 
1.6.2.2


^ permalink raw reply related	[flat|nested] 42+ messages in thread

* [Qemu-devel] [PATCHv6 01/12] qemu/pci: make default_write_config use mask table
@ 2009-06-21 16:45   ` Michael S. Tsirkin
  0 siblings, 0 replies; 42+ messages in thread
From: Michael S. Tsirkin @ 2009-06-21 16:45 UTC (permalink / raw)
  To: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm,
	Rusty Russell, virtualization, Christian Borntraeger, Blue Swirl,
	Anthony Liguori, Glauber Costa

Change much of hw/pci to use symbolic constants and a table-driven
design: add a mask table with writable bits set and readonly bits unset.
Detect change by comparing original and new registers.

This makes it easy to support capabilities where read-only/writeable
bit layout differs between devices, depending on capabilities present.

As a result, writing a single byte in BAR registers now works as
it should. Writing to upper limit registers in the bridge
also works as it should. Code is also shorter.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/pci.c |  145 ++++++++++++-------------------------------------------------
 hw/pci.h |   18 +++++++-
 2 files changed, 46 insertions(+), 117 deletions(-)

diff --git a/hw/pci.c b/hw/pci.c
index 0a738db..359959e 100644
--- a/hw/pci.c
+++ b/hw/pci.c
@@ -238,6 +238,17 @@ int pci_assign_devaddr(const char *addr, int *domp, int *busp, unsigned *slotp)
     return pci_parse_devaddr(devaddr, domp, busp, slotp);
 }
 
+static void pci_init_wmask(PCIDevice *dev)
+{
+    int i;
+    dev->wmask[PCI_CACHE_LINE_SIZE] = 0xff;
+    dev->wmask[PCI_INTERRUPT_LINE] = 0xff;
+    dev->wmask[PCI_COMMAND] = PCI_COMMAND_IO | PCI_COMMAND_MEMORY
+                              | PCI_COMMAND_MASTER;
+    for (i = PCI_CONFIG_HEADER_SIZE; i < PCI_CONFIG_SPACE_SIZE; ++i)
+        dev->wmask[i] = 0xff;
+}
+
 /* -1 for devfn means auto assign */
 static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus,
                                          const char *name, int devfn,
@@ -257,6 +268,7 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus,
     pstrcpy(pci_dev->name, sizeof(pci_dev->name), name);
     memset(pci_dev->irq_state, 0, sizeof(pci_dev->irq_state));
     pci_set_default_subsystem_id(pci_dev);
+    pci_init_wmask(pci_dev);
 
     if (!config_read)
         config_read = pci_default_read_config;
@@ -328,6 +340,7 @@ void pci_register_bar(PCIDevice *pci_dev, int region_num,
 {
     PCIIORegion *r;
     uint32_t addr;
+    uint32_t wmask;
 
     if ((unsigned int)region_num >= PCI_NUM_REGIONS)
         return;
@@ -343,12 +356,17 @@ void pci_register_bar(PCIDevice *pci_dev, int region_num,
     r->size = size;
     r->type = type;
     r->map_func = map_func;
+
+    wmask = ~(size - 1);
     if (region_num == PCI_ROM_SLOT) {
         addr = 0x30;
+        /* ROM enable bit is writeable */
+        wmask |= 1;
     } else {
         addr = 0x10 + region_num * 4;
     }
     *(uint32_t *)(pci_dev->config + addr) = cpu_to_le32(type);
+    *(uint32_t *)(pci_dev->wmask + addr) = cpu_to_le32(wmask);
 }
 
 static void pci_update_mappings(PCIDevice *d)
@@ -457,118 +475,21 @@ uint32_t pci_default_read_config(PCIDevice *d,
     return val;
 }
 
-void pci_default_write_config(PCIDevice *d,
-                              uint32_t address, uint32_t val, int len)
+void pci_default_write_config(PCIDevice *d, uint32_t addr, uint32_t val, int l)
 {
-    int can_write, i;
-    uint32_t end, addr;
-
-    if (len == 4 && ((address >= 0x10 && address < 0x10 + 4 * 6) ||
-                     (address >= 0x30 && address < 0x34))) {
-        PCIIORegion *r;
-        int reg;
+    uint8_t orig[PCI_CONFIG_SPACE_SIZE];
+    int i;
 
-        if ( address >= 0x30 ) {
-            reg = PCI_ROM_SLOT;
-        }else{
-            reg = (address - 0x10) >> 2;
-        }
-        r = &d->io_regions[reg];
-        if (r->size == 0)
-            goto default_config;
-        /* compute the stored value */
-        if (reg == PCI_ROM_SLOT) {
-            /* keep ROM enable bit */
-            val &= (~(r->size - 1)) | 1;
-        } else {
-            val &= ~(r->size - 1);
-            val |= r->type;
-        }
-        *(uint32_t *)(d->config + address) = cpu_to_le32(val);
-        pci_update_mappings(d);
-        return;
-    }
- default_config:
     /* not efficient, but simple */
-    addr = address;
-    for(i = 0; i < len; i++) {
-        /* default read/write accesses */
-        switch(d->config[0x0e]) {
-        case 0x00:
-        case 0x80:
-            switch(addr) {
-            case 0x00:
-            case 0x01:
-            case 0x02:
-            case 0x03:
-            case 0x06:
-            case 0x07:
-            case 0x08:
-            case 0x09:
-            case 0x0a:
-            case 0x0b:
-            case 0x0e:
-            case 0x10 ... 0x27: /* base */
-            case 0x2c ... 0x2f: /* read-only subsystem ID & vendor ID */
-            case 0x30 ... 0x33: /* rom */
-            case 0x3d:
-                can_write = 0;
-                break;
-            default:
-                can_write = 1;
-                break;
-            }
-            break;
-        default:
-        case 0x01:
-            switch(addr) {
-            case 0x00:
-            case 0x01:
-            case 0x02:
-            case 0x03:
-            case 0x06:
-            case 0x07:
-            case 0x08:
-            case 0x09:
-            case 0x0a:
-            case 0x0b:
-            case 0x0e:
-            case 0x2c ... 0x2f: /* read-only subsystem ID & vendor ID */
-            case 0x38 ... 0x3b: /* rom */
-            case 0x3d:
-                can_write = 0;
-                break;
-            default:
-                can_write = 1;
-                break;
-            }
-            break;
-        }
-        if (can_write) {
-            /* Mask out writes to reserved bits in registers */
-            switch (addr) {
-	    case 0x05:
-                val &= ~PCI_COMMAND_RESERVED_MASK_HI;
-                break;
-            case 0x06:
-                val &= ~PCI_STATUS_RESERVED_MASK_LO;
-                break;
-            case 0x07:
-                val &= ~PCI_STATUS_RESERVED_MASK_HI;
-                break;
-            }
-            d->config[addr] = val;
-        }
-        if (++addr > 0xff)
-        	break;
-        val >>= 8;
+    memcpy(orig, d->config, PCI_CONFIG_SPACE_SIZE);
+    for(i = 0; i < l && addr < PCI_CONFIG_SPACE_SIZE; val >>= 8, ++i, ++addr) {
+        uint8_t wmask = d->wmask[addr];
+        d->config[addr] = (d->config[addr] & ~wmask) | (val & wmask);
     }
-
-    end = address + len;
-    if (end > PCI_COMMAND && address < (PCI_COMMAND + 2)) {
-        /* if the command register is modified, we must modify the mappings */
+    if (memcmp(orig + PCI_BASE_ADDRESS_0, d->config + PCI_BASE_ADDRESS_0, 24)
+        || ((orig[PCI_COMMAND] ^ d->config[PCI_COMMAND])
+            & (PCI_COMMAND_MEMORY | PCI_COMMAND_IO)))
         pci_update_mappings(d);
-    }
 }
 
 void pci_data_write(void *opaque, uint32_t addr, uint32_t val, int len)
@@ -841,16 +762,8 @@ static void pci_bridge_write_config(PCIDevice *d,
 {
     PCIBridge *s = (PCIBridge *)d;
 
-    if (address == 0x19 || (address == 0x18 && len > 1)) {
-        if (address == 0x19)
-            s->bus->bus_num = val & 0xff;
-        else
-            s->bus->bus_num = (val >> 8) & 0xff;
-#if defined(DEBUG_PCI)
-        printf ("pci-bridge: %s: Assigned bus %d\n", d->name, s->bus->bus_num);
-#endif
-    }
     pci_default_write_config(d, address, val, len);
+    s->bus->bus_num = d->config[PCI_SECONDARY_BUS];
 }
 
 PCIBus *pci_find_bus(int bus_num)
diff --git a/hw/pci.h b/hw/pci.h
index fcca526..44aa61b 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -98,16 +98,24 @@ typedef struct PCIIORegion {
 #define PCI_COMMAND		0x04	/* 16 bits */
 #define  PCI_COMMAND_IO		0x1	/* Enable response in I/O space */
 #define  PCI_COMMAND_MEMORY	0x2	/* Enable response in Memory space */
+#define  PCI_COMMAND_MASTER	0x4	/* Enable bus master */
 #define PCI_STATUS              0x06    /* 16 bits */
 #define PCI_REVISION_ID         0x08    /* 8 bits  */
 #define PCI_CLASS_DEVICE        0x0a    /* Device class */
+#define PCI_CACHE_LINE_SIZE	0x0c	/* 8 bits */
+#define PCI_LATENCY_TIMER	0x0d	/* 8 bits */
 #define PCI_HEADER_TYPE         0x0e    /* 8 bits */
 #define  PCI_HEADER_TYPE_NORMAL		0
 #define  PCI_HEADER_TYPE_BRIDGE		1
 #define  PCI_HEADER_TYPE_CARDBUS	2
 #define  PCI_HEADER_TYPE_MULTI_FUNCTION 0x80
+#define PCI_BASE_ADDRESS_0	0x10	/* 32 bits */
+#define PCI_PRIMARY_BUS		0x18	/* Primary bus number */
+#define PCI_SECONDARY_BUS	0x19	/* Secondary bus number */
+#define PCI_SEC_STATUS		0x1e	/* Secondary status register, only bit 14 used */
 #define PCI_SUBSYSTEM_VENDOR_ID 0x2c    /* 16 bits */
 #define PCI_SUBSYSTEM_ID        0x2e    /* 16 bits */
+#define PCI_CAPABILITY_LIST	0x34	/* Offset of first capability list entry */
 #define PCI_INTERRUPT_LINE	0x3c	/* 8 bits */
 #define PCI_INTERRUPT_PIN	0x3d	/* 8 bits */
 #define PCI_MIN_GNT		0x3e	/* 8 bits */
@@ -137,10 +145,18 @@ typedef struct PCIIORegion {
 
 #define PCI_COMMAND_RESERVED_MASK_HI (PCI_COMMAND_RESERVED >> 8)
 
+/* Size of the standard PCI config header */
+#define PCI_CONFIG_HEADER_SIZE 0x40
+/* Size of the standard PCI config space */
+#define PCI_CONFIG_SPACE_SIZE 0x100
+
 struct PCIDevice {
     DeviceState qdev;
     /* PCI config space */
-    uint8_t config[256];
+    uint8_t config[PCI_CONFIG_SPACE_SIZE];
+
+    /* Used to implement R/W bytes */
+    uint8_t wmask[PCI_CONFIG_SPACE_SIZE];
 
     /* the following fields are read only */
     PCIBus *bus;
-- 
1.6.2.2

^ permalink raw reply related	[flat|nested] 42+ messages in thread

* [PATCHv6 01/12] qemu/pci: make default_write_config use mask table
       [not found] <cover.1245594586.git.mst@redhat.com>
@ 2009-06-21 16:45 ` Michael S. Tsirkin
  2009-06-21 16:45   ` [Qemu-devel] " Michael S. Tsirkin
                   ` (22 subsequent siblings)
  23 siblings, 0 replies; 42+ messages in thread
From: Michael S. Tsirkin @ 2009-06-21 16:45 UTC (permalink / raw)
  To: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm, Rusty Russell

Change much of hw/pci to use symbolic constants and a table-driven
design: add a mask table with writable bits set and readonly bits unset.
Detect change by comparing original and new registers.

This makes it easy to support capabilities where read-only/writeable
bit layout differs between devices, depending on capabilities present.

As a result, writing a single byte in BAR registers now works as
it should. Writing to upper limit registers in the bridge
also works as it should. Code is also shorter.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/pci.c |  145 ++++++++++++-------------------------------------------------
 hw/pci.h |   18 +++++++-
 2 files changed, 46 insertions(+), 117 deletions(-)

diff --git a/hw/pci.c b/hw/pci.c
index 0a738db..359959e 100644
--- a/hw/pci.c
+++ b/hw/pci.c
@@ -238,6 +238,17 @@ int pci_assign_devaddr(const char *addr, int *domp, int *busp, unsigned *slotp)
     return pci_parse_devaddr(devaddr, domp, busp, slotp);
 }
 
+static void pci_init_wmask(PCIDevice *dev)
+{
+    int i;
+    dev->wmask[PCI_CACHE_LINE_SIZE] = 0xff;
+    dev->wmask[PCI_INTERRUPT_LINE] = 0xff;
+    dev->wmask[PCI_COMMAND] = PCI_COMMAND_IO | PCI_COMMAND_MEMORY
+                              | PCI_COMMAND_MASTER;
+    for (i = PCI_CONFIG_HEADER_SIZE; i < PCI_CONFIG_SPACE_SIZE; ++i)
+        dev->wmask[i] = 0xff;
+}
+
 /* -1 for devfn means auto assign */
 static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus,
                                          const char *name, int devfn,
@@ -257,6 +268,7 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus,
     pstrcpy(pci_dev->name, sizeof(pci_dev->name), name);
     memset(pci_dev->irq_state, 0, sizeof(pci_dev->irq_state));
     pci_set_default_subsystem_id(pci_dev);
+    pci_init_wmask(pci_dev);
 
     if (!config_read)
         config_read = pci_default_read_config;
@@ -328,6 +340,7 @@ void pci_register_bar(PCIDevice *pci_dev, int region_num,
 {
     PCIIORegion *r;
     uint32_t addr;
+    uint32_t wmask;
 
     if ((unsigned int)region_num >= PCI_NUM_REGIONS)
         return;
@@ -343,12 +356,17 @@ void pci_register_bar(PCIDevice *pci_dev, int region_num,
     r->size = size;
     r->type = type;
     r->map_func = map_func;
+
+    wmask = ~(size - 1);
     if (region_num == PCI_ROM_SLOT) {
         addr = 0x30;
+        /* ROM enable bit is writeable */
+        wmask |= 1;
     } else {
         addr = 0x10 + region_num * 4;
     }
     *(uint32_t *)(pci_dev->config + addr) = cpu_to_le32(type);
+    *(uint32_t *)(pci_dev->wmask + addr) = cpu_to_le32(wmask);
 }
 
 static void pci_update_mappings(PCIDevice *d)
@@ -457,118 +475,21 @@ uint32_t pci_default_read_config(PCIDevice *d,
     return val;
 }
 
-void pci_default_write_config(PCIDevice *d,
-                              uint32_t address, uint32_t val, int len)
+void pci_default_write_config(PCIDevice *d, uint32_t addr, uint32_t val, int l)
 {
-    int can_write, i;
-    uint32_t end, addr;
-
-    if (len == 4 && ((address >= 0x10 && address < 0x10 + 4 * 6) ||
-                     (address >= 0x30 && address < 0x34))) {
-        PCIIORegion *r;
-        int reg;
+    uint8_t orig[PCI_CONFIG_SPACE_SIZE];
+    int i;
 
-        if ( address >= 0x30 ) {
-            reg = PCI_ROM_SLOT;
-        }else{
-            reg = (address - 0x10) >> 2;
-        }
-        r = &d->io_regions[reg];
-        if (r->size == 0)
-            goto default_config;
-        /* compute the stored value */
-        if (reg == PCI_ROM_SLOT) {
-            /* keep ROM enable bit */
-            val &= (~(r->size - 1)) | 1;
-        } else {
-            val &= ~(r->size - 1);
-            val |= r->type;
-        }
-        *(uint32_t *)(d->config + address) = cpu_to_le32(val);
-        pci_update_mappings(d);
-        return;
-    }
- default_config:
     /* not efficient, but simple */
-    addr = address;
-    for(i = 0; i < len; i++) {
-        /* default read/write accesses */
-        switch(d->config[0x0e]) {
-        case 0x00:
-        case 0x80:
-            switch(addr) {
-            case 0x00:
-            case 0x01:
-            case 0x02:
-            case 0x03:
-            case 0x06:
-            case 0x07:
-            case 0x08:
-            case 0x09:
-            case 0x0a:
-            case 0x0b:
-            case 0x0e:
-            case 0x10 ... 0x27: /* base */
-            case 0x2c ... 0x2f: /* read-only subsystem ID & vendor ID */
-            case 0x30 ... 0x33: /* rom */
-            case 0x3d:
-                can_write = 0;
-                break;
-            default:
-                can_write = 1;
-                break;
-            }
-            break;
-        default:
-        case 0x01:
-            switch(addr) {
-            case 0x00:
-            case 0x01:
-            case 0x02:
-            case 0x03:
-            case 0x06:
-            case 0x07:
-            case 0x08:
-            case 0x09:
-            case 0x0a:
-            case 0x0b:
-            case 0x0e:
-            case 0x2c ... 0x2f: /* read-only subsystem ID & vendor ID */
-            case 0x38 ... 0x3b: /* rom */
-            case 0x3d:
-                can_write = 0;
-                break;
-            default:
-                can_write = 1;
-                break;
-            }
-            break;
-        }
-        if (can_write) {
-            /* Mask out writes to reserved bits in registers */
-            switch (addr) {
-	    case 0x05:
-                val &= ~PCI_COMMAND_RESERVED_MASK_HI;
-                break;
-            case 0x06:
-                val &= ~PCI_STATUS_RESERVED_MASK_LO;
-                break;
-            case 0x07:
-                val &= ~PCI_STATUS_RESERVED_MASK_HI;
-                break;
-            }
-            d->config[addr] = val;
-        }
-        if (++addr > 0xff)
-        	break;
-        val >>= 8;
+    memcpy(orig, d->config, PCI_CONFIG_SPACE_SIZE);
+    for(i = 0; i < l && addr < PCI_CONFIG_SPACE_SIZE; val >>= 8, ++i, ++addr) {
+        uint8_t wmask = d->wmask[addr];
+        d->config[addr] = (d->config[addr] & ~wmask) | (val & wmask);
     }
-
-    end = address + len;
-    if (end > PCI_COMMAND && address < (PCI_COMMAND + 2)) {
-        /* if the command register is modified, we must modify the mappings */
+    if (memcmp(orig + PCI_BASE_ADDRESS_0, d->config + PCI_BASE_ADDRESS_0, 24)
+        || ((orig[PCI_COMMAND] ^ d->config[PCI_COMMAND])
+            & (PCI_COMMAND_MEMORY | PCI_COMMAND_IO)))
         pci_update_mappings(d);
-    }
 }
 
 void pci_data_write(void *opaque, uint32_t addr, uint32_t val, int len)
@@ -841,16 +762,8 @@ static void pci_bridge_write_config(PCIDevice *d,
 {
     PCIBridge *s = (PCIBridge *)d;
 
-    if (address == 0x19 || (address == 0x18 && len > 1)) {
-        if (address == 0x19)
-            s->bus->bus_num = val & 0xff;
-        else
-            s->bus->bus_num = (val >> 8) & 0xff;
-#if defined(DEBUG_PCI)
-        printf ("pci-bridge: %s: Assigned bus %d\n", d->name, s->bus->bus_num);
-#endif
-    }
     pci_default_write_config(d, address, val, len);
+    s->bus->bus_num = d->config[PCI_SECONDARY_BUS];
 }
 
 PCIBus *pci_find_bus(int bus_num)
diff --git a/hw/pci.h b/hw/pci.h
index fcca526..44aa61b 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -98,16 +98,24 @@ typedef struct PCIIORegion {
 #define PCI_COMMAND		0x04	/* 16 bits */
 #define  PCI_COMMAND_IO		0x1	/* Enable response in I/O space */
 #define  PCI_COMMAND_MEMORY	0x2	/* Enable response in Memory space */
+#define  PCI_COMMAND_MASTER	0x4	/* Enable bus master */
 #define PCI_STATUS              0x06    /* 16 bits */
 #define PCI_REVISION_ID         0x08    /* 8 bits  */
 #define PCI_CLASS_DEVICE        0x0a    /* Device class */
+#define PCI_CACHE_LINE_SIZE	0x0c	/* 8 bits */
+#define PCI_LATENCY_TIMER	0x0d	/* 8 bits */
 #define PCI_HEADER_TYPE         0x0e    /* 8 bits */
 #define  PCI_HEADER_TYPE_NORMAL		0
 #define  PCI_HEADER_TYPE_BRIDGE		1
 #define  PCI_HEADER_TYPE_CARDBUS	2
 #define  PCI_HEADER_TYPE_MULTI_FUNCTION 0x80
+#define PCI_BASE_ADDRESS_0	0x10	/* 32 bits */
+#define PCI_PRIMARY_BUS		0x18	/* Primary bus number */
+#define PCI_SECONDARY_BUS	0x19	/* Secondary bus number */
+#define PCI_SEC_STATUS		0x1e	/* Secondary status register, only bit 14 used */
 #define PCI_SUBSYSTEM_VENDOR_ID 0x2c    /* 16 bits */
 #define PCI_SUBSYSTEM_ID        0x2e    /* 16 bits */
+#define PCI_CAPABILITY_LIST	0x34	/* Offset of first capability list entry */
 #define PCI_INTERRUPT_LINE	0x3c	/* 8 bits */
 #define PCI_INTERRUPT_PIN	0x3d	/* 8 bits */
 #define PCI_MIN_GNT		0x3e	/* 8 bits */
@@ -137,10 +145,18 @@ typedef struct PCIIORegion {
 
 #define PCI_COMMAND_RESERVED_MASK_HI (PCI_COMMAND_RESERVED >> 8)
 
+/* Size of the standard PCI config header */
+#define PCI_CONFIG_HEADER_SIZE 0x40
+/* Size of the standard PCI config space */
+#define PCI_CONFIG_SPACE_SIZE 0x100
+
 struct PCIDevice {
     DeviceState qdev;
     /* PCI config space */
-    uint8_t config[256];
+    uint8_t config[PCI_CONFIG_SPACE_SIZE];
+
+    /* Used to implement R/W bytes */
+    uint8_t wmask[PCI_CONFIG_SPACE_SIZE];
 
     /* the following fields are read only */
     PCIBus *bus;
-- 
1.6.2.2

^ permalink raw reply related	[flat|nested] 42+ messages in thread

* [PATCHv6 02/12] qemu/pci: helper routines for pci access
       [not found] <cover.1245594586.git.mst@redhat.com>
@ 2009-06-21 16:45   ` Michael S. Tsirkin
  2009-06-21 16:45   ` [Qemu-devel] " Michael S. Tsirkin
                     ` (22 subsequent siblings)
  23 siblings, 0 replies; 42+ messages in thread
From: Michael S. Tsirkin @ 2009-06-21 16:45 UTC (permalink / raw)
  To: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm, Rusty Russell, vi

Add inline routines for convenient access to pci devices
with correct (little) endianness. Will be used by MSI-X support.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/pci.h |   30 +++++++++++++++++++++++++++---
 1 files changed, 27 insertions(+), 3 deletions(-)

diff --git a/hw/pci.h b/hw/pci.h
index 44aa61b..2ba582e 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -216,21 +216,45 @@ PCIBus *pci_bridge_init(PCIBus *bus, int devfn, uint16_t vid, uint16_t did,
                         pci_map_irq_fn map_irq, const char *name);
 
 static inline void
+pci_set_word(uint8_t *config, uint16_t val)
+{
+    cpu_to_le16wu((uint16_t *)config, val);
+}
+
+static inline uint16_t
+pci_get_word(uint8_t *config)
+{
+    return le16_to_cpupu((uint16_t *)config);
+}
+
+static inline void
+pci_set_long(uint8_t *config, uint32_t val)
+{
+    cpu_to_le32wu((uint32_t *)config, val);
+}
+
+static inline uint32_t
+pci_get_long(uint8_t *config)
+{
+    return le32_to_cpupu((uint32_t *)config);
+}
+
+static inline void
 pci_config_set_vendor_id(uint8_t *pci_config, uint16_t val)
 {
-    cpu_to_le16wu((uint16_t *)&pci_config[PCI_VENDOR_ID], val);
+    pci_set_word(&pci_config[PCI_VENDOR_ID], val);
 }
 
 static inline void
 pci_config_set_device_id(uint8_t *pci_config, uint16_t val)
 {
-    cpu_to_le16wu((uint16_t *)&pci_config[PCI_DEVICE_ID], val);
+    pci_set_word(&pci_config[PCI_DEVICE_ID], val);
 }
 
 static inline void
 pci_config_set_class(uint8_t *pci_config, uint16_t val)
 {
-    cpu_to_le16wu((uint16_t *)&pci_config[PCI_CLASS_DEVICE], val);
+    pci_set_word(&pci_config[PCI_CLASS_DEVICE], val);
 }
 
 typedef void (*pci_qdev_initfn)(PCIDevice *dev);
-- 
1.6.2.2


^ permalink raw reply related	[flat|nested] 42+ messages in thread

* [Qemu-devel] [PATCHv6 02/12] qemu/pci: helper routines for pci access
@ 2009-06-21 16:45   ` Michael S. Tsirkin
  0 siblings, 0 replies; 42+ messages in thread
From: Michael S. Tsirkin @ 2009-06-21 16:45 UTC (permalink / raw)
  To: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm,
	Rusty Russell, virtualization, Christian Borntraeger, Blue Swirl,
	Anthony Liguori, Glauber Costa

Add inline routines for convenient access to pci devices
with correct (little) endianness. Will be used by MSI-X support.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/pci.h |   30 +++++++++++++++++++++++++++---
 1 files changed, 27 insertions(+), 3 deletions(-)

diff --git a/hw/pci.h b/hw/pci.h
index 44aa61b..2ba582e 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -216,21 +216,45 @@ PCIBus *pci_bridge_init(PCIBus *bus, int devfn, uint16_t vid, uint16_t did,
                         pci_map_irq_fn map_irq, const char *name);
 
 static inline void
+pci_set_word(uint8_t *config, uint16_t val)
+{
+    cpu_to_le16wu((uint16_t *)config, val);
+}
+
+static inline uint16_t
+pci_get_word(uint8_t *config)
+{
+    return le16_to_cpupu((uint16_t *)config);
+}
+
+static inline void
+pci_set_long(uint8_t *config, uint32_t val)
+{
+    cpu_to_le32wu((uint32_t *)config, val);
+}
+
+static inline uint32_t
+pci_get_long(uint8_t *config)
+{
+    return le32_to_cpupu((uint32_t *)config);
+}
+
+static inline void
 pci_config_set_vendor_id(uint8_t *pci_config, uint16_t val)
 {
-    cpu_to_le16wu((uint16_t *)&pci_config[PCI_VENDOR_ID], val);
+    pci_set_word(&pci_config[PCI_VENDOR_ID], val);
 }
 
 static inline void
 pci_config_set_device_id(uint8_t *pci_config, uint16_t val)
 {
-    cpu_to_le16wu((uint16_t *)&pci_config[PCI_DEVICE_ID], val);
+    pci_set_word(&pci_config[PCI_DEVICE_ID], val);
 }
 
 static inline void
 pci_config_set_class(uint8_t *pci_config, uint16_t val)
 {
-    cpu_to_le16wu((uint16_t *)&pci_config[PCI_CLASS_DEVICE], val);
+    pci_set_word(&pci_config[PCI_CLASS_DEVICE], val);
 }
 
 typedef void (*pci_qdev_initfn)(PCIDevice *dev);
-- 
1.6.2.2

^ permalink raw reply related	[flat|nested] 42+ messages in thread

* [PATCHv6 02/12] qemu/pci: helper routines for pci access
       [not found] <cover.1245594586.git.mst@redhat.com>
  2009-06-21 16:45 ` [PATCHv6 01/12] qemu/pci: make default_write_config use mask table Michael S. Tsirkin
  2009-06-21 16:45   ` [Qemu-devel] " Michael S. Tsirkin
@ 2009-06-21 16:45 ` Michael S. Tsirkin
  2009-06-21 16:45   ` [Qemu-devel] " Michael S. Tsirkin
                   ` (20 subsequent siblings)
  23 siblings, 0 replies; 42+ messages in thread
From: Michael S. Tsirkin @ 2009-06-21 16:45 UTC (permalink / raw)
  To: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm, Rusty Russell

Add inline routines for convenient access to pci devices
with correct (little) endianness. Will be used by MSI-X support.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/pci.h |   30 +++++++++++++++++++++++++++---
 1 files changed, 27 insertions(+), 3 deletions(-)

diff --git a/hw/pci.h b/hw/pci.h
index 44aa61b..2ba582e 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -216,21 +216,45 @@ PCIBus *pci_bridge_init(PCIBus *bus, int devfn, uint16_t vid, uint16_t did,
                         pci_map_irq_fn map_irq, const char *name);
 
 static inline void
+pci_set_word(uint8_t *config, uint16_t val)
+{
+    cpu_to_le16wu((uint16_t *)config, val);
+}
+
+static inline uint16_t
+pci_get_word(uint8_t *config)
+{
+    return le16_to_cpupu((uint16_t *)config);
+}
+
+static inline void
+pci_set_long(uint8_t *config, uint32_t val)
+{
+    cpu_to_le32wu((uint32_t *)config, val);
+}
+
+static inline uint32_t
+pci_get_long(uint8_t *config)
+{
+    return le32_to_cpupu((uint32_t *)config);
+}
+
+static inline void
 pci_config_set_vendor_id(uint8_t *pci_config, uint16_t val)
 {
-    cpu_to_le16wu((uint16_t *)&pci_config[PCI_VENDOR_ID], val);
+    pci_set_word(&pci_config[PCI_VENDOR_ID], val);
 }
 
 static inline void
 pci_config_set_device_id(uint8_t *pci_config, uint16_t val)
 {
-    cpu_to_le16wu((uint16_t *)&pci_config[PCI_DEVICE_ID], val);
+    pci_set_word(&pci_config[PCI_DEVICE_ID], val);
 }
 
 static inline void
 pci_config_set_class(uint8_t *pci_config, uint16_t val)
 {
-    cpu_to_le16wu((uint16_t *)&pci_config[PCI_CLASS_DEVICE], val);
+    pci_set_word(&pci_config[PCI_CLASS_DEVICE], val);
 }
 
 typedef void (*pci_qdev_initfn)(PCIDevice *dev);
-- 
1.6.2.2

^ permalink raw reply related	[flat|nested] 42+ messages in thread

* [PATCHv6 03/12] qemu/pci: add routines to manage PCI capabilities
       [not found] <cover.1245594586.git.mst@redhat.com>
@ 2009-06-21 16:45   ` Michael S. Tsirkin
  2009-06-21 16:45   ` [Qemu-devel] " Michael S. Tsirkin
                     ` (22 subsequent siblings)
  23 siblings, 0 replies; 42+ messages in thread
From: Michael S. Tsirkin @ 2009-06-21 16:45 UTC (permalink / raw)
  To: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm, Rusty Russell, vi

Add routines to manage PCI capability list. First user will be MSI-X.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/pci.c |   74 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 hw/pci.h |   18 ++++++++++++++-
 2 files changed, 90 insertions(+), 2 deletions(-)

diff --git a/hw/pci.c b/hw/pci.c
index 359959e..72ca27b 100644
--- a/hw/pci.c
+++ b/hw/pci.c
@@ -148,7 +148,6 @@ int pci_device_load(PCIDevice *s, QEMUFile *f)
     if (version_id >= 2)
         for (i = 0; i < 4; i ++)
             s->irq_state[i] = qemu_get_be32(f);
-
     return 0;
 }
 
@@ -856,3 +855,76 @@ PCIDevice *pci_create_simple(PCIBus *bus, int devfn, const char *name)
 
     return (PCIDevice *)dev;
 }
+
+static int pci_find_space(PCIDevice *pdev, uint8_t size)
+{
+    int offset = PCI_CONFIG_HEADER_SIZE;
+    int i;
+    for (i = PCI_CONFIG_HEADER_SIZE; i < PCI_CONFIG_SPACE_SIZE; ++i)
+        if (pdev->used[i])
+            offset = i + 1;
+        else if (i - offset + 1 == size)
+            return offset;
+    return 0;
+}
+
+static uint8_t pci_find_capability_list(PCIDevice *pdev, uint8_t cap_id,
+                                        uint8_t *prev_p)
+{
+    uint8_t next, prev;
+
+    if (!(pdev->config[PCI_STATUS] & PCI_STATUS_CAP_LIST))
+        return 0;
+
+    for (prev = PCI_CAPABILITY_LIST; (next = pdev->config[prev]);
+         prev = next + PCI_CAP_LIST_NEXT)
+        if (pdev->config[next + PCI_CAP_LIST_ID] == cap_id)
+            break;
+
+    if (prev_p)
+        *prev_p = prev;
+    return next;
+}
+
+/* Reserve space and add capability to the linked list in pci config space */
+int pci_add_capability(PCIDevice *pdev, uint8_t cap_id, uint8_t size)
+{
+    uint8_t offset = pci_find_space(pdev, size);
+    uint8_t *config = pdev->config + offset;
+    if (!offset)
+        return -ENOSPC;
+    config[PCI_CAP_LIST_ID] = cap_id;
+    config[PCI_CAP_LIST_NEXT] = pdev->config[PCI_CAPABILITY_LIST];
+    pdev->config[PCI_CAPABILITY_LIST] = offset;
+    pdev->config[PCI_STATUS] |= PCI_STATUS_CAP_LIST;
+    memset(pdev->used + offset, 0xFF, size);
+    /* Make capability read-only by default */
+    memset(pdev->wmask + offset, 0, size);
+    return offset;
+}
+
+/* Unlink capability from the pci config space. */
+void pci_del_capability(PCIDevice *pdev, uint8_t cap_id, uint8_t size)
+{
+    uint8_t prev, offset = pci_find_capability_list(pdev, cap_id, &prev);
+    if (!offset)
+        return;
+    pdev->config[prev] = pdev->config[offset + PCI_CAP_LIST_NEXT];
+    /* Make capability writeable again */
+    memset(pdev->wmask + offset, 0xff, size);
+    memset(pdev->used + offset, 0, size);
+
+    if (!pdev->config[PCI_CAPABILITY_LIST])
+        pdev->config[PCI_STATUS] &= ~PCI_STATUS_CAP_LIST;
+}
+
+/* Reserve space for capability at a known offset (to call after load). */
+void pci_reserve_capability(PCIDevice *pdev, uint8_t offset, uint8_t size)
+{
+    memset(pdev->used + offset, 0xff, size);
+}
+
+uint8_t pci_find_capability(PCIDevice *pdev, uint8_t cap_id)
+{
+    return pci_find_capability_list(pdev, cap_id, NULL);
+}
diff --git a/hw/pci.h b/hw/pci.h
index 2ba582e..7172d81 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -121,6 +121,10 @@ typedef struct PCIIORegion {
 #define PCI_MIN_GNT		0x3e	/* 8 bits */
 #define PCI_MAX_LAT		0x3f	/* 8 bits */
 
+/* Capability lists */
+#define PCI_CAP_LIST_ID		0	/* Capability ID */
+#define PCI_CAP_LIST_NEXT	1	/* Next capability in the list */
+
 #define PCI_REVISION            0x08    /* obsolete, use PCI_REVISION_ID */
 #define PCI_SUBVENDOR_ID        0x2c    /* obsolete, use PCI_SUBSYSTEM_VENDOR_ID */
 #define PCI_SUBDEVICE_ID        0x2e    /* obsolete, use PCI_SUBSYSTEM_ID */
@@ -128,7 +132,7 @@ typedef struct PCIIORegion {
 /* Bits in the PCI Status Register (PCI 2.3 spec) */
 #define PCI_STATUS_RESERVED1	0x007
 #define PCI_STATUS_INT_STATUS	0x008
-#define PCI_STATUS_CAPABILITIES	0x010
+#define PCI_STATUS_CAP_LIST	0x010
 #define PCI_STATUS_66MHZ	0x020
 #define PCI_STATUS_RESERVED2	0x040
 #define PCI_STATUS_FAST_BACK	0x080
@@ -158,6 +162,9 @@ struct PCIDevice {
     /* Used to implement R/W bytes */
     uint8_t wmask[PCI_CONFIG_SPACE_SIZE];
 
+    /* Used to allocate config space for capabilities. */
+    uint8_t used[PCI_CONFIG_SPACE_SIZE];
+
     /* the following fields are read only */
     PCIBus *bus;
     int devfn;
@@ -186,6 +193,15 @@ void pci_register_bar(PCIDevice *pci_dev, int region_num,
                             uint32_t size, int type,
                             PCIMapIORegionFunc *map_func);
 
+int pci_add_capability(PCIDevice *pci_dev, uint8_t cap_id, uint8_t cap_size);
+
+void pci_del_capability(PCIDevice *pci_dev, uint8_t cap_id, uint8_t cap_size);
+
+void pci_reserve_capability(PCIDevice *pci_dev, uint8_t offset, uint8_t size);
+
+uint8_t pci_find_capability(PCIDevice *pci_dev, uint8_t cap_id);
+
+
 uint32_t pci_default_read_config(PCIDevice *d,
                                  uint32_t address, int len);
 void pci_default_write_config(PCIDevice *d,
-- 
1.6.2.2


^ permalink raw reply related	[flat|nested] 42+ messages in thread

* [Qemu-devel] [PATCHv6 03/12] qemu/pci: add routines to manage PCI capabilities
@ 2009-06-21 16:45   ` Michael S. Tsirkin
  0 siblings, 0 replies; 42+ messages in thread
From: Michael S. Tsirkin @ 2009-06-21 16:45 UTC (permalink / raw)
  To: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm,
	Rusty Russell, virtualization, Christian Borntraeger, Blue Swirl,
	Anthony Liguori, Glauber Costa

Add routines to manage PCI capability list. First user will be MSI-X.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/pci.c |   74 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 hw/pci.h |   18 ++++++++++++++-
 2 files changed, 90 insertions(+), 2 deletions(-)

diff --git a/hw/pci.c b/hw/pci.c
index 359959e..72ca27b 100644
--- a/hw/pci.c
+++ b/hw/pci.c
@@ -148,7 +148,6 @@ int pci_device_load(PCIDevice *s, QEMUFile *f)
     if (version_id >= 2)
         for (i = 0; i < 4; i ++)
             s->irq_state[i] = qemu_get_be32(f);
-
     return 0;
 }
 
@@ -856,3 +855,76 @@ PCIDevice *pci_create_simple(PCIBus *bus, int devfn, const char *name)
 
     return (PCIDevice *)dev;
 }
+
+static int pci_find_space(PCIDevice *pdev, uint8_t size)
+{
+    int offset = PCI_CONFIG_HEADER_SIZE;
+    int i;
+    for (i = PCI_CONFIG_HEADER_SIZE; i < PCI_CONFIG_SPACE_SIZE; ++i)
+        if (pdev->used[i])
+            offset = i + 1;
+        else if (i - offset + 1 == size)
+            return offset;
+    return 0;
+}
+
+static uint8_t pci_find_capability_list(PCIDevice *pdev, uint8_t cap_id,
+                                        uint8_t *prev_p)
+{
+    uint8_t next, prev;
+
+    if (!(pdev->config[PCI_STATUS] & PCI_STATUS_CAP_LIST))
+        return 0;
+
+    for (prev = PCI_CAPABILITY_LIST; (next = pdev->config[prev]);
+         prev = next + PCI_CAP_LIST_NEXT)
+        if (pdev->config[next + PCI_CAP_LIST_ID] == cap_id)
+            break;
+
+    if (prev_p)
+        *prev_p = prev;
+    return next;
+}
+
+/* Reserve space and add capability to the linked list in pci config space */
+int pci_add_capability(PCIDevice *pdev, uint8_t cap_id, uint8_t size)
+{
+    uint8_t offset = pci_find_space(pdev, size);
+    uint8_t *config = pdev->config + offset;
+    if (!offset)
+        return -ENOSPC;
+    config[PCI_CAP_LIST_ID] = cap_id;
+    config[PCI_CAP_LIST_NEXT] = pdev->config[PCI_CAPABILITY_LIST];
+    pdev->config[PCI_CAPABILITY_LIST] = offset;
+    pdev->config[PCI_STATUS] |= PCI_STATUS_CAP_LIST;
+    memset(pdev->used + offset, 0xFF, size);
+    /* Make capability read-only by default */
+    memset(pdev->wmask + offset, 0, size);
+    return offset;
+}
+
+/* Unlink capability from the pci config space. */
+void pci_del_capability(PCIDevice *pdev, uint8_t cap_id, uint8_t size)
+{
+    uint8_t prev, offset = pci_find_capability_list(pdev, cap_id, &prev);
+    if (!offset)
+        return;
+    pdev->config[prev] = pdev->config[offset + PCI_CAP_LIST_NEXT];
+    /* Make capability writeable again */
+    memset(pdev->wmask + offset, 0xff, size);
+    memset(pdev->used + offset, 0, size);
+
+    if (!pdev->config[PCI_CAPABILITY_LIST])
+        pdev->config[PCI_STATUS] &= ~PCI_STATUS_CAP_LIST;
+}
+
+/* Reserve space for capability at a known offset (to call after load). */
+void pci_reserve_capability(PCIDevice *pdev, uint8_t offset, uint8_t size)
+{
+    memset(pdev->used + offset, 0xff, size);
+}
+
+uint8_t pci_find_capability(PCIDevice *pdev, uint8_t cap_id)
+{
+    return pci_find_capability_list(pdev, cap_id, NULL);
+}
diff --git a/hw/pci.h b/hw/pci.h
index 2ba582e..7172d81 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -121,6 +121,10 @@ typedef struct PCIIORegion {
 #define PCI_MIN_GNT		0x3e	/* 8 bits */
 #define PCI_MAX_LAT		0x3f	/* 8 bits */
 
+/* Capability lists */
+#define PCI_CAP_LIST_ID		0	/* Capability ID */
+#define PCI_CAP_LIST_NEXT	1	/* Next capability in the list */
+
 #define PCI_REVISION            0x08    /* obsolete, use PCI_REVISION_ID */
 #define PCI_SUBVENDOR_ID        0x2c    /* obsolete, use PCI_SUBSYSTEM_VENDOR_ID */
 #define PCI_SUBDEVICE_ID        0x2e    /* obsolete, use PCI_SUBSYSTEM_ID */
@@ -128,7 +132,7 @@ typedef struct PCIIORegion {
 /* Bits in the PCI Status Register (PCI 2.3 spec) */
 #define PCI_STATUS_RESERVED1	0x007
 #define PCI_STATUS_INT_STATUS	0x008
-#define PCI_STATUS_CAPABILITIES	0x010
+#define PCI_STATUS_CAP_LIST	0x010
 #define PCI_STATUS_66MHZ	0x020
 #define PCI_STATUS_RESERVED2	0x040
 #define PCI_STATUS_FAST_BACK	0x080
@@ -158,6 +162,9 @@ struct PCIDevice {
     /* Used to implement R/W bytes */
     uint8_t wmask[PCI_CONFIG_SPACE_SIZE];
 
+    /* Used to allocate config space for capabilities. */
+    uint8_t used[PCI_CONFIG_SPACE_SIZE];
+
     /* the following fields are read only */
     PCIBus *bus;
     int devfn;
@@ -186,6 +193,15 @@ void pci_register_bar(PCIDevice *pci_dev, int region_num,
                             uint32_t size, int type,
                             PCIMapIORegionFunc *map_func);
 
+int pci_add_capability(PCIDevice *pci_dev, uint8_t cap_id, uint8_t cap_size);
+
+void pci_del_capability(PCIDevice *pci_dev, uint8_t cap_id, uint8_t cap_size);
+
+void pci_reserve_capability(PCIDevice *pci_dev, uint8_t offset, uint8_t size);
+
+uint8_t pci_find_capability(PCIDevice *pci_dev, uint8_t cap_id);
+
+
 uint32_t pci_default_read_config(PCIDevice *d,
                                  uint32_t address, int len);
 void pci_default_write_config(PCIDevice *d,
-- 
1.6.2.2

^ permalink raw reply related	[flat|nested] 42+ messages in thread

* [PATCHv6 03/12] qemu/pci: add routines to manage PCI capabilities
       [not found] <cover.1245594586.git.mst@redhat.com>
                   ` (3 preceding siblings ...)
  2009-06-21 16:45   ` [Qemu-devel] " Michael S. Tsirkin
@ 2009-06-21 16:45 ` Michael S. Tsirkin
  2009-06-21 16:45   ` [Qemu-devel] " Michael S. Tsirkin
                   ` (18 subsequent siblings)
  23 siblings, 0 replies; 42+ messages in thread
From: Michael S. Tsirkin @ 2009-06-21 16:45 UTC (permalink / raw)
  To: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm, Rusty Russell

Add routines to manage PCI capability list. First user will be MSI-X.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/pci.c |   74 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 hw/pci.h |   18 ++++++++++++++-
 2 files changed, 90 insertions(+), 2 deletions(-)

diff --git a/hw/pci.c b/hw/pci.c
index 359959e..72ca27b 100644
--- a/hw/pci.c
+++ b/hw/pci.c
@@ -148,7 +148,6 @@ int pci_device_load(PCIDevice *s, QEMUFile *f)
     if (version_id >= 2)
         for (i = 0; i < 4; i ++)
             s->irq_state[i] = qemu_get_be32(f);
-
     return 0;
 }
 
@@ -856,3 +855,76 @@ PCIDevice *pci_create_simple(PCIBus *bus, int devfn, const char *name)
 
     return (PCIDevice *)dev;
 }
+
+static int pci_find_space(PCIDevice *pdev, uint8_t size)
+{
+    int offset = PCI_CONFIG_HEADER_SIZE;
+    int i;
+    for (i = PCI_CONFIG_HEADER_SIZE; i < PCI_CONFIG_SPACE_SIZE; ++i)
+        if (pdev->used[i])
+            offset = i + 1;
+        else if (i - offset + 1 == size)
+            return offset;
+    return 0;
+}
+
+static uint8_t pci_find_capability_list(PCIDevice *pdev, uint8_t cap_id,
+                                        uint8_t *prev_p)
+{
+    uint8_t next, prev;
+
+    if (!(pdev->config[PCI_STATUS] & PCI_STATUS_CAP_LIST))
+        return 0;
+
+    for (prev = PCI_CAPABILITY_LIST; (next = pdev->config[prev]);
+         prev = next + PCI_CAP_LIST_NEXT)
+        if (pdev->config[next + PCI_CAP_LIST_ID] == cap_id)
+            break;
+
+    if (prev_p)
+        *prev_p = prev;
+    return next;
+}
+
+/* Reserve space and add capability to the linked list in pci config space */
+int pci_add_capability(PCIDevice *pdev, uint8_t cap_id, uint8_t size)
+{
+    uint8_t offset = pci_find_space(pdev, size);
+    uint8_t *config = pdev->config + offset;
+    if (!offset)
+        return -ENOSPC;
+    config[PCI_CAP_LIST_ID] = cap_id;
+    config[PCI_CAP_LIST_NEXT] = pdev->config[PCI_CAPABILITY_LIST];
+    pdev->config[PCI_CAPABILITY_LIST] = offset;
+    pdev->config[PCI_STATUS] |= PCI_STATUS_CAP_LIST;
+    memset(pdev->used + offset, 0xFF, size);
+    /* Make capability read-only by default */
+    memset(pdev->wmask + offset, 0, size);
+    return offset;
+}
+
+/* Unlink capability from the pci config space. */
+void pci_del_capability(PCIDevice *pdev, uint8_t cap_id, uint8_t size)
+{
+    uint8_t prev, offset = pci_find_capability_list(pdev, cap_id, &prev);
+    if (!offset)
+        return;
+    pdev->config[prev] = pdev->config[offset + PCI_CAP_LIST_NEXT];
+    /* Make capability writeable again */
+    memset(pdev->wmask + offset, 0xff, size);
+    memset(pdev->used + offset, 0, size);
+
+    if (!pdev->config[PCI_CAPABILITY_LIST])
+        pdev->config[PCI_STATUS] &= ~PCI_STATUS_CAP_LIST;
+}
+
+/* Reserve space for capability at a known offset (to call after load). */
+void pci_reserve_capability(PCIDevice *pdev, uint8_t offset, uint8_t size)
+{
+    memset(pdev->used + offset, 0xff, size);
+}
+
+uint8_t pci_find_capability(PCIDevice *pdev, uint8_t cap_id)
+{
+    return pci_find_capability_list(pdev, cap_id, NULL);
+}
diff --git a/hw/pci.h b/hw/pci.h
index 2ba582e..7172d81 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -121,6 +121,10 @@ typedef struct PCIIORegion {
 #define PCI_MIN_GNT		0x3e	/* 8 bits */
 #define PCI_MAX_LAT		0x3f	/* 8 bits */
 
+/* Capability lists */
+#define PCI_CAP_LIST_ID		0	/* Capability ID */
+#define PCI_CAP_LIST_NEXT	1	/* Next capability in the list */
+
 #define PCI_REVISION            0x08    /* obsolete, use PCI_REVISION_ID */
 #define PCI_SUBVENDOR_ID        0x2c    /* obsolete, use PCI_SUBSYSTEM_VENDOR_ID */
 #define PCI_SUBDEVICE_ID        0x2e    /* obsolete, use PCI_SUBSYSTEM_ID */
@@ -128,7 +132,7 @@ typedef struct PCIIORegion {
 /* Bits in the PCI Status Register (PCI 2.3 spec) */
 #define PCI_STATUS_RESERVED1	0x007
 #define PCI_STATUS_INT_STATUS	0x008
-#define PCI_STATUS_CAPABILITIES	0x010
+#define PCI_STATUS_CAP_LIST	0x010
 #define PCI_STATUS_66MHZ	0x020
 #define PCI_STATUS_RESERVED2	0x040
 #define PCI_STATUS_FAST_BACK	0x080
@@ -158,6 +162,9 @@ struct PCIDevice {
     /* Used to implement R/W bytes */
     uint8_t wmask[PCI_CONFIG_SPACE_SIZE];
 
+    /* Used to allocate config space for capabilities. */
+    uint8_t used[PCI_CONFIG_SPACE_SIZE];
+
     /* the following fields are read only */
     PCIBus *bus;
     int devfn;
@@ -186,6 +193,15 @@ void pci_register_bar(PCIDevice *pci_dev, int region_num,
                             uint32_t size, int type,
                             PCIMapIORegionFunc *map_func);
 
+int pci_add_capability(PCIDevice *pci_dev, uint8_t cap_id, uint8_t cap_size);
+
+void pci_del_capability(PCIDevice *pci_dev, uint8_t cap_id, uint8_t cap_size);
+
+void pci_reserve_capability(PCIDevice *pci_dev, uint8_t offset, uint8_t size);
+
+uint8_t pci_find_capability(PCIDevice *pci_dev, uint8_t cap_id);
+
+
 uint32_t pci_default_read_config(PCIDevice *d,
                                  uint32_t address, int len);
 void pci_default_write_config(PCIDevice *d,
-- 
1.6.2.2

^ permalink raw reply related	[flat|nested] 42+ messages in thread

* [PATCHv6 04/12] qemu/pci: check constant registers on load
       [not found] <cover.1245594586.git.mst@redhat.com>
@ 2009-06-21 16:49   ` Michael S. Tsirkin
  2009-06-21 16:45   ` [Qemu-devel] " Michael S. Tsirkin
                     ` (22 subsequent siblings)
  23 siblings, 0 replies; 42+ messages in thread
From: Michael S. Tsirkin @ 2009-06-21 16:49 UTC (permalink / raw)
  To: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm, Rusty Russell, vi

Add "cmask" table of constant register masks: if a bit is not writeable
and is set in cmask table, this bit is checked on load.  An attempt to
load an image that would change such a register causes load to fail.
Use this table to make sure that load does not modify registers that
guest can not change (directly or indirectly).

Note: we can't just assume that read-only registers never change,
because the guest could change a register indirectly.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/pci.c |   26 +++++++++++++++++++++++++-
 hw/pci.h |    5 +++++
 2 files changed, 30 insertions(+), 1 deletions(-)

diff --git a/hw/pci.c b/hw/pci.c
index 72ca27b..d8fa439 100644
--- a/hw/pci.c
+++ b/hw/pci.c
@@ -136,13 +136,19 @@ void pci_device_save(PCIDevice *s, QEMUFile *f)
 
 int pci_device_load(PCIDevice *s, QEMUFile *f)
 {
+    uint8_t config[PCI_CONFIG_SPACE_SIZE];
     uint32_t version_id;
     int i;
 
     version_id = qemu_get_be32(f);
     if (version_id > 2)
         return -EINVAL;
-    qemu_get_buffer(f, s->config, 256);
+    qemu_get_buffer(f, config, sizeof config);
+    for (i = 0; i < sizeof config; ++i)
+        if ((config[i] ^ s->config[i]) & s->cmask[i] & ~s->wmask[i])
+            return -EINVAL;
+    memcpy(s->config, config, sizeof config);
+
     pci_update_mappings(s);
 
     if (version_id >= 2)
@@ -237,6 +243,18 @@ int pci_assign_devaddr(const char *addr, int *domp, int *busp, unsigned *slotp)
     return pci_parse_devaddr(devaddr, domp, busp, slotp);
 }
 
+static void pci_init_cmask(PCIDevice *dev)
+{
+    pci_set_word(dev->cmask + PCI_VENDOR_ID, 0xffff);
+    pci_set_word(dev->cmask + PCI_DEVICE_ID, 0xffff);
+    dev->cmask[PCI_STATUS] = PCI_STATUS_CAP_LIST;
+    dev->cmask[PCI_REVISION_ID] = 0xff;
+    dev->cmask[PCI_CLASS_PROG] = 0xff;
+    pci_set_word(dev->cmask + PCI_CLASS_DEVICE, 0xffff);
+    dev->cmask[PCI_HEADER_TYPE] = 0xff;
+    dev->cmask[PCI_CAPABILITY_LIST] = 0xff;
+}
+
 static void pci_init_wmask(PCIDevice *dev)
 {
     int i;
@@ -267,6 +285,7 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus,
     pstrcpy(pci_dev->name, sizeof(pci_dev->name), name);
     memset(pci_dev->irq_state, 0, sizeof(pci_dev->irq_state));
     pci_set_default_subsystem_id(pci_dev);
+    pci_init_cmask(pci_dev);
     pci_init_wmask(pci_dev);
 
     if (!config_read)
@@ -366,6 +385,7 @@ void pci_register_bar(PCIDevice *pci_dev, int region_num,
     }
     *(uint32_t *)(pci_dev->config + addr) = cpu_to_le32(type);
     *(uint32_t *)(pci_dev->wmask + addr) = cpu_to_le32(wmask);
+    *(uint32_t *)(pci_dev->cmask + addr) = 0xffffffff;
 }
 
 static void pci_update_mappings(PCIDevice *d)
@@ -900,6 +920,8 @@ int pci_add_capability(PCIDevice *pdev, uint8_t cap_id, uint8_t size)
     memset(pdev->used + offset, 0xFF, size);
     /* Make capability read-only by default */
     memset(pdev->wmask + offset, 0, size);
+    /* Check capability by default */
+    memset(pdev->cmask + offset, 0xFF, size);
     return offset;
 }
 
@@ -912,6 +934,8 @@ void pci_del_capability(PCIDevice *pdev, uint8_t cap_id, uint8_t size)
     pdev->config[prev] = pdev->config[offset + PCI_CAP_LIST_NEXT];
     /* Make capability writeable again */
     memset(pdev->wmask + offset, 0xff, size);
+    /* Clear cmask as device-specific registers can't be checked */
+    memset(pdev->cmask + offset, 0, size);
     memset(pdev->used + offset, 0, size);
 
     if (!pdev->config[PCI_CAPABILITY_LIST])
diff --git a/hw/pci.h b/hw/pci.h
index 7172d81..558e18f 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -101,6 +101,7 @@ typedef struct PCIIORegion {
 #define  PCI_COMMAND_MASTER	0x4	/* Enable bus master */
 #define PCI_STATUS              0x06    /* 16 bits */
 #define PCI_REVISION_ID         0x08    /* 8 bits  */
+#define PCI_CLASS_PROG		0x09	/* Reg. Level Programming Interface */
 #define PCI_CLASS_DEVICE        0x0a    /* Device class */
 #define PCI_CACHE_LINE_SIZE	0x0c	/* 8 bits */
 #define PCI_LATENCY_TIMER	0x0d	/* 8 bits */
@@ -159,6 +160,10 @@ struct PCIDevice {
     /* PCI config space */
     uint8_t config[PCI_CONFIG_SPACE_SIZE];
 
+    /* Used to enable config checks on load. Note that writeable bits are
+     * never checked even if set in cmask. */
+    uint8_t cmask[PCI_CONFIG_SPACE_SIZE];
+
     /* Used to implement R/W bytes */
     uint8_t wmask[PCI_CONFIG_SPACE_SIZE];
 
-- 
1.6.2.2


^ permalink raw reply related	[flat|nested] 42+ messages in thread

* [Qemu-devel] [PATCHv6 04/12] qemu/pci: check constant registers on load
@ 2009-06-21 16:49   ` Michael S. Tsirkin
  0 siblings, 0 replies; 42+ messages in thread
From: Michael S. Tsirkin @ 2009-06-21 16:49 UTC (permalink / raw)
  To: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm,
	Rusty Russell, virtualization, Christian Borntraeger, Blue Swirl,
	Anthony Liguori, Glauber Costa

Add "cmask" table of constant register masks: if a bit is not writeable
and is set in cmask table, this bit is checked on load.  An attempt to
load an image that would change such a register causes load to fail.
Use this table to make sure that load does not modify registers that
guest can not change (directly or indirectly).

Note: we can't just assume that read-only registers never change,
because the guest could change a register indirectly.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/pci.c |   26 +++++++++++++++++++++++++-
 hw/pci.h |    5 +++++
 2 files changed, 30 insertions(+), 1 deletions(-)

diff --git a/hw/pci.c b/hw/pci.c
index 72ca27b..d8fa439 100644
--- a/hw/pci.c
+++ b/hw/pci.c
@@ -136,13 +136,19 @@ void pci_device_save(PCIDevice *s, QEMUFile *f)
 
 int pci_device_load(PCIDevice *s, QEMUFile *f)
 {
+    uint8_t config[PCI_CONFIG_SPACE_SIZE];
     uint32_t version_id;
     int i;
 
     version_id = qemu_get_be32(f);
     if (version_id > 2)
         return -EINVAL;
-    qemu_get_buffer(f, s->config, 256);
+    qemu_get_buffer(f, config, sizeof config);
+    for (i = 0; i < sizeof config; ++i)
+        if ((config[i] ^ s->config[i]) & s->cmask[i] & ~s->wmask[i])
+            return -EINVAL;
+    memcpy(s->config, config, sizeof config);
+
     pci_update_mappings(s);
 
     if (version_id >= 2)
@@ -237,6 +243,18 @@ int pci_assign_devaddr(const char *addr, int *domp, int *busp, unsigned *slotp)
     return pci_parse_devaddr(devaddr, domp, busp, slotp);
 }
 
+static void pci_init_cmask(PCIDevice *dev)
+{
+    pci_set_word(dev->cmask + PCI_VENDOR_ID, 0xffff);
+    pci_set_word(dev->cmask + PCI_DEVICE_ID, 0xffff);
+    dev->cmask[PCI_STATUS] = PCI_STATUS_CAP_LIST;
+    dev->cmask[PCI_REVISION_ID] = 0xff;
+    dev->cmask[PCI_CLASS_PROG] = 0xff;
+    pci_set_word(dev->cmask + PCI_CLASS_DEVICE, 0xffff);
+    dev->cmask[PCI_HEADER_TYPE] = 0xff;
+    dev->cmask[PCI_CAPABILITY_LIST] = 0xff;
+}
+
 static void pci_init_wmask(PCIDevice *dev)
 {
     int i;
@@ -267,6 +285,7 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus,
     pstrcpy(pci_dev->name, sizeof(pci_dev->name), name);
     memset(pci_dev->irq_state, 0, sizeof(pci_dev->irq_state));
     pci_set_default_subsystem_id(pci_dev);
+    pci_init_cmask(pci_dev);
     pci_init_wmask(pci_dev);
 
     if (!config_read)
@@ -366,6 +385,7 @@ void pci_register_bar(PCIDevice *pci_dev, int region_num,
     }
     *(uint32_t *)(pci_dev->config + addr) = cpu_to_le32(type);
     *(uint32_t *)(pci_dev->wmask + addr) = cpu_to_le32(wmask);
+    *(uint32_t *)(pci_dev->cmask + addr) = 0xffffffff;
 }
 
 static void pci_update_mappings(PCIDevice *d)
@@ -900,6 +920,8 @@ int pci_add_capability(PCIDevice *pdev, uint8_t cap_id, uint8_t size)
     memset(pdev->used + offset, 0xFF, size);
     /* Make capability read-only by default */
     memset(pdev->wmask + offset, 0, size);
+    /* Check capability by default */
+    memset(pdev->cmask + offset, 0xFF, size);
     return offset;
 }
 
@@ -912,6 +934,8 @@ void pci_del_capability(PCIDevice *pdev, uint8_t cap_id, uint8_t size)
     pdev->config[prev] = pdev->config[offset + PCI_CAP_LIST_NEXT];
     /* Make capability writeable again */
     memset(pdev->wmask + offset, 0xff, size);
+    /* Clear cmask as device-specific registers can't be checked */
+    memset(pdev->cmask + offset, 0, size);
     memset(pdev->used + offset, 0, size);
 
     if (!pdev->config[PCI_CAPABILITY_LIST])
diff --git a/hw/pci.h b/hw/pci.h
index 7172d81..558e18f 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -101,6 +101,7 @@ typedef struct PCIIORegion {
 #define  PCI_COMMAND_MASTER	0x4	/* Enable bus master */
 #define PCI_STATUS              0x06    /* 16 bits */
 #define PCI_REVISION_ID         0x08    /* 8 bits  */
+#define PCI_CLASS_PROG		0x09	/* Reg. Level Programming Interface */
 #define PCI_CLASS_DEVICE        0x0a    /* Device class */
 #define PCI_CACHE_LINE_SIZE	0x0c	/* 8 bits */
 #define PCI_LATENCY_TIMER	0x0d	/* 8 bits */
@@ -159,6 +160,10 @@ struct PCIDevice {
     /* PCI config space */
     uint8_t config[PCI_CONFIG_SPACE_SIZE];
 
+    /* Used to enable config checks on load. Note that writeable bits are
+     * never checked even if set in cmask. */
+    uint8_t cmask[PCI_CONFIG_SPACE_SIZE];
+
     /* Used to implement R/W bytes */
     uint8_t wmask[PCI_CONFIG_SPACE_SIZE];
 
-- 
1.6.2.2

^ permalink raw reply related	[flat|nested] 42+ messages in thread

* [PATCHv6 04/12] qemu/pci: check constant registers on load
       [not found] <cover.1245594586.git.mst@redhat.com>
                   ` (5 preceding siblings ...)
  2009-06-21 16:45   ` [Qemu-devel] " Michael S. Tsirkin
@ 2009-06-21 16:49 ` Michael S. Tsirkin
  2009-06-21 16:49   ` [Qemu-devel] " Michael S. Tsirkin
                   ` (16 subsequent siblings)
  23 siblings, 0 replies; 42+ messages in thread
From: Michael S. Tsirkin @ 2009-06-21 16:49 UTC (permalink / raw)
  To: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm, Rusty Russell

Add "cmask" table of constant register masks: if a bit is not writeable
and is set in cmask table, this bit is checked on load.  An attempt to
load an image that would change such a register causes load to fail.
Use this table to make sure that load does not modify registers that
guest can not change (directly or indirectly).

Note: we can't just assume that read-only registers never change,
because the guest could change a register indirectly.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/pci.c |   26 +++++++++++++++++++++++++-
 hw/pci.h |    5 +++++
 2 files changed, 30 insertions(+), 1 deletions(-)

diff --git a/hw/pci.c b/hw/pci.c
index 72ca27b..d8fa439 100644
--- a/hw/pci.c
+++ b/hw/pci.c
@@ -136,13 +136,19 @@ void pci_device_save(PCIDevice *s, QEMUFile *f)
 
 int pci_device_load(PCIDevice *s, QEMUFile *f)
 {
+    uint8_t config[PCI_CONFIG_SPACE_SIZE];
     uint32_t version_id;
     int i;
 
     version_id = qemu_get_be32(f);
     if (version_id > 2)
         return -EINVAL;
-    qemu_get_buffer(f, s->config, 256);
+    qemu_get_buffer(f, config, sizeof config);
+    for (i = 0; i < sizeof config; ++i)
+        if ((config[i] ^ s->config[i]) & s->cmask[i] & ~s->wmask[i])
+            return -EINVAL;
+    memcpy(s->config, config, sizeof config);
+
     pci_update_mappings(s);
 
     if (version_id >= 2)
@@ -237,6 +243,18 @@ int pci_assign_devaddr(const char *addr, int *domp, int *busp, unsigned *slotp)
     return pci_parse_devaddr(devaddr, domp, busp, slotp);
 }
 
+static void pci_init_cmask(PCIDevice *dev)
+{
+    pci_set_word(dev->cmask + PCI_VENDOR_ID, 0xffff);
+    pci_set_word(dev->cmask + PCI_DEVICE_ID, 0xffff);
+    dev->cmask[PCI_STATUS] = PCI_STATUS_CAP_LIST;
+    dev->cmask[PCI_REVISION_ID] = 0xff;
+    dev->cmask[PCI_CLASS_PROG] = 0xff;
+    pci_set_word(dev->cmask + PCI_CLASS_DEVICE, 0xffff);
+    dev->cmask[PCI_HEADER_TYPE] = 0xff;
+    dev->cmask[PCI_CAPABILITY_LIST] = 0xff;
+}
+
 static void pci_init_wmask(PCIDevice *dev)
 {
     int i;
@@ -267,6 +285,7 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus,
     pstrcpy(pci_dev->name, sizeof(pci_dev->name), name);
     memset(pci_dev->irq_state, 0, sizeof(pci_dev->irq_state));
     pci_set_default_subsystem_id(pci_dev);
+    pci_init_cmask(pci_dev);
     pci_init_wmask(pci_dev);
 
     if (!config_read)
@@ -366,6 +385,7 @@ void pci_register_bar(PCIDevice *pci_dev, int region_num,
     }
     *(uint32_t *)(pci_dev->config + addr) = cpu_to_le32(type);
     *(uint32_t *)(pci_dev->wmask + addr) = cpu_to_le32(wmask);
+    *(uint32_t *)(pci_dev->cmask + addr) = 0xffffffff;
 }
 
 static void pci_update_mappings(PCIDevice *d)
@@ -900,6 +920,8 @@ int pci_add_capability(PCIDevice *pdev, uint8_t cap_id, uint8_t size)
     memset(pdev->used + offset, 0xFF, size);
     /* Make capability read-only by default */
     memset(pdev->wmask + offset, 0, size);
+    /* Check capability by default */
+    memset(pdev->cmask + offset, 0xFF, size);
     return offset;
 }
 
@@ -912,6 +934,8 @@ void pci_del_capability(PCIDevice *pdev, uint8_t cap_id, uint8_t size)
     pdev->config[prev] = pdev->config[offset + PCI_CAP_LIST_NEXT];
     /* Make capability writeable again */
     memset(pdev->wmask + offset, 0xff, size);
+    /* Clear cmask as device-specific registers can't be checked */
+    memset(pdev->cmask + offset, 0, size);
     memset(pdev->used + offset, 0, size);
 
     if (!pdev->config[PCI_CAPABILITY_LIST])
diff --git a/hw/pci.h b/hw/pci.h
index 7172d81..558e18f 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -101,6 +101,7 @@ typedef struct PCIIORegion {
 #define  PCI_COMMAND_MASTER	0x4	/* Enable bus master */
 #define PCI_STATUS              0x06    /* 16 bits */
 #define PCI_REVISION_ID         0x08    /* 8 bits  */
+#define PCI_CLASS_PROG		0x09	/* Reg. Level Programming Interface */
 #define PCI_CLASS_DEVICE        0x0a    /* Device class */
 #define PCI_CACHE_LINE_SIZE	0x0c	/* 8 bits */
 #define PCI_LATENCY_TIMER	0x0d	/* 8 bits */
@@ -159,6 +160,10 @@ struct PCIDevice {
     /* PCI config space */
     uint8_t config[PCI_CONFIG_SPACE_SIZE];
 
+    /* Used to enable config checks on load. Note that writeable bits are
+     * never checked even if set in cmask. */
+    uint8_t cmask[PCI_CONFIG_SPACE_SIZE];
+
     /* Used to implement R/W bytes */
     uint8_t wmask[PCI_CONFIG_SPACE_SIZE];
 
-- 
1.6.2.2

^ permalink raw reply related	[flat|nested] 42+ messages in thread

* [PATCHv6 05/12] qemu/pci: MSI-X support functions
       [not found] <cover.1245594586.git.mst@redhat.com>
@ 2009-06-21 16:49   ` Michael S. Tsirkin
  2009-06-21 16:45   ` [Qemu-devel] " Michael S. Tsirkin
                     ` (22 subsequent siblings)
  23 siblings, 0 replies; 42+ messages in thread
From: Michael S. Tsirkin @ 2009-06-21 16:49 UTC (permalink / raw)
  To: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm, Rusty Russell, vi

Add functions implementing MSI-X support. First user will be virtio-pci.
Note that platform must set a flag to declare MSI supported: this
is a safety measure to avoid breaking platforms which should support
MSI-X but currently lack this in the interrupt controller emulation.
For PC this will be set by APIC.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 Makefile.target |    2 +-
 hw/msix.c       |  378 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 hw/msix.h       |   34 +++++
 hw/pci.h        |   23 ++++
 4 files changed, 436 insertions(+), 1 deletions(-)
 create mode 100644 hw/msix.c
 create mode 100644 hw/msix.h

diff --git a/Makefile.target b/Makefile.target
index 0159bf7..145b3a9 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -494,7 +494,7 @@ endif #CONFIG_BSD_USER
 ifndef CONFIG_USER_ONLY
 
 OBJS=vl.o osdep.o monitor.o pci.o loader.o isa_mmio.o machine.o \
-     gdbstub.o gdbstub-xml.o
+     gdbstub.o gdbstub-xml.o msix.o
 # virtio has to be here due to weird dependency between PCI and virtio-net.
 # need to fix this properly
 OBJS+=virtio-blk.o virtio-balloon.o virtio-net.o virtio-console.o
diff --git a/hw/msix.c b/hw/msix.c
new file mode 100644
index 0000000..773581f
--- /dev/null
+++ b/hw/msix.c
@@ -0,0 +1,378 @@
+/*
+ * MSI-X device support
+ *
+ * This module includes support for MSI-X in pci devices.
+ *
+ * Author: Michael S. Tsirkin <mst@redhat.com>
+ *
+ *  Copyright (c) 2009, Red Hat Inc, Michael S. Tsirkin (mst@redhat.com)
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#include "hw.h"
+#include "msix.h"
+#include "pci.h"
+
+/* Declaration from linux/pci_regs.h */
+#define  PCI_CAP_ID_MSIX 0x11 /* MSI-X */
+#define  PCI_MSIX_FLAGS 2     /* Table at lower 11 bits */
+#define  PCI_MSIX_FLAGS_QSIZE	0x7FF
+#define  PCI_MSIX_FLAGS_ENABLE	(1 << 15)
+#define  PCI_MSIX_FLAGS_BIRMASK	(7 << 0)
+
+/* MSI-X capability structure */
+#define MSIX_TABLE_OFFSET 4
+#define MSIX_PBA_OFFSET 8
+#define MSIX_CAP_LENGTH 12
+
+/* MSI enable bit is in byte 1 in FLAGS register */
+#define MSIX_ENABLE_OFFSET (PCI_MSIX_FLAGS + 1)
+#define MSIX_ENABLE_MASK (PCI_MSIX_FLAGS_ENABLE >> 8)
+
+/* MSI-X table format */
+#define MSIX_MSG_ADDR 0
+#define MSIX_MSG_UPPER_ADDR 4
+#define MSIX_MSG_DATA 8
+#define MSIX_VECTOR_CTRL 12
+#define MSIX_ENTRY_SIZE 16
+#define MSIX_VECTOR_MASK 0x1
+
+/* How much space does an MSIX table need. */
+/* The spec requires giving the table structure
+ * a 4K aligned region all by itself. Align it to
+ * target pages so that drivers can do passthrough
+ * on the rest of the region. */
+#define MSIX_PAGE_SIZE TARGET_PAGE_ALIGN(0x1000)
+/* Reserve second half of the page for pending bits */
+#define MSIX_PAGE_PENDING (MSIX_PAGE_SIZE / 2)
+#define MSIX_MAX_ENTRIES 32
+
+
+#ifdef MSIX_DEBUG
+#define DEBUG(fmt, ...)                                       \
+    do {                                                      \
+      fprintf(stderr, "%s: " fmt, __func__ , __VA_ARGS__);    \
+    } while (0)
+#else
+#define DEBUG(fmt, ...) do { } while(0)
+#endif
+
+/* Flag for interrupt controller to declare MSI-X support */
+int msix_supported;
+
+/* Add MSI-X capability to the config space for the device. */
+/* Given a bar and its size, add MSI-X table on top of it
+ * and fill MSI-X capability in the config space.
+ * Original bar size must be a power of 2 or 0.
+ * New bar size is returned. */
+static int msix_add_config(struct PCIDevice *pdev, unsigned short nentries,
+                           unsigned bar_nr, unsigned bar_size)
+{
+    int config_offset;
+    uint8_t *config;
+    uint32_t new_size;
+
+    if (nentries < 1 || nentries > PCI_MSIX_FLAGS_QSIZE + 1)
+        return -EINVAL;
+    if (bar_size > 0x80000000)
+        return -ENOSPC;
+
+    /* Add space for MSI-X structures */
+    if (!bar_size)
+        new_size = MSIX_PAGE_SIZE;
+    else if (bar_size < MSIX_PAGE_SIZE) {
+        bar_size = MSIX_PAGE_SIZE;
+        new_size = MSIX_PAGE_SIZE * 2;
+    } else
+        new_size = bar_size * 2;
+
+    pdev->msix_bar_size = new_size;
+    config_offset = pci_add_capability(pdev, PCI_CAP_ID_MSIX, MSIX_CAP_LENGTH);
+    if (config_offset < 0)
+        return config_offset;
+    config = pdev->config + config_offset;
+
+    pci_set_word(config + PCI_MSIX_FLAGS, nentries - 1);
+    /* Table on top of BAR */
+    pci_set_long(config + MSIX_TABLE_OFFSET, bar_size | bar_nr);
+    /* Pending bits on top of that */
+    pci_set_long(config + MSIX_PBA_OFFSET, (bar_size + MSIX_PAGE_PENDING) |
+                 bar_nr);
+    pdev->msix_cap = config_offset;
+    /* Make flags bit writeable. */
+    pdev->wmask[config_offset + MSIX_ENABLE_OFFSET] |= MSIX_ENABLE_MASK;
+    return 0;
+}
+
+static void msix_free_irq_entries(PCIDevice *dev)
+{
+    int vector;
+
+    for (vector = 0; vector < dev->msix_entries_nr; ++vector)
+        dev->msix_entry_used[vector] = 0;
+}
+
+/* Handle MSI-X capability config write. */
+void msix_write_config(PCIDevice *dev, uint32_t addr,
+                       uint32_t val, int len)
+{
+    unsigned enable_pos = dev->msix_cap + MSIX_ENABLE_OFFSET;
+    if (addr + len <= enable_pos || addr > enable_pos)
+        return;
+
+    if (msix_enabled(dev))
+        qemu_set_irq(dev->irq[0], 0);
+}
+
+static uint32_t msix_mmio_readl(void *opaque, target_phys_addr_t addr)
+{
+    PCIDevice *dev = opaque;
+    unsigned int offset = addr & (MSIX_PAGE_SIZE - 1);
+    void *page = dev->msix_table_page;
+    uint32_t val = 0;
+
+    memcpy(&val, (void *)((char *)page + offset), 4);
+
+    return val;
+}
+
+static uint32_t msix_mmio_read_unallowed(void *opaque, target_phys_addr_t addr)
+{
+    fprintf(stderr, "MSI-X: only dword read is allowed!\n");
+    return 0;
+}
+
+static uint8_t msix_pending_mask(int vector)
+{
+    return 1 << (vector % 8);
+}
+
+static uint8_t *msix_pending_byte(PCIDevice *dev, int vector)
+{
+    return dev->msix_table_page + MSIX_PAGE_PENDING + vector / 8;
+}
+
+static int msix_is_pending(PCIDevice *dev, int vector)
+{
+    return *msix_pending_byte(dev, vector) & msix_pending_mask(vector);
+}
+
+static void msix_set_pending(PCIDevice *dev, int vector)
+{
+    *msix_pending_byte(dev, vector) |= msix_pending_mask(vector);
+}
+
+static void msix_clr_pending(PCIDevice *dev, int vector)
+{
+    *msix_pending_byte(dev, vector) &= ~msix_pending_mask(vector);
+}
+
+static int msix_is_masked(PCIDevice *dev, int vector)
+{
+    unsigned offset = vector * MSIX_ENTRY_SIZE + MSIX_VECTOR_CTRL;
+    return dev->msix_table_page[offset] & MSIX_VECTOR_MASK;
+}
+
+static void msix_mmio_writel(void *opaque, target_phys_addr_t addr,
+                             uint32_t val)
+{
+    PCIDevice *dev = opaque;
+    unsigned int offset = addr & (MSIX_PAGE_SIZE - 1);
+    int vector = offset / MSIX_ENTRY_SIZE;
+    memcpy(dev->msix_table_page + offset, &val, 4);
+    if (!msix_is_masked(dev, vector) && msix_is_pending(dev, vector)) {
+        msix_clr_pending(dev, vector);
+        msix_notify(dev, vector);
+    }
+}
+
+static void msix_mmio_write_unallowed(void *opaque, target_phys_addr_t addr,
+                                      uint32_t val)
+{
+    fprintf(stderr, "MSI-X: only dword write is allowed!\n");
+}
+
+static CPUWriteMemoryFunc *msix_mmio_write[] = {
+    msix_mmio_write_unallowed, msix_mmio_write_unallowed, msix_mmio_writel
+};
+
+static CPUReadMemoryFunc *msix_mmio_read[] = {
+    msix_mmio_read_unallowed, msix_mmio_read_unallowed, msix_mmio_readl
+};
+
+/* Should be called from device's map method. */
+void msix_mmio_map(PCIDevice *d, int region_num,
+                   uint32_t addr, uint32_t size, int type)
+{
+    uint8_t *config = d->config + d->msix_cap;
+    uint32_t table = pci_get_long(config + MSIX_TABLE_OFFSET);
+    uint32_t offset = table & ~(MSIX_PAGE_SIZE - 1);
+    /* TODO: for assigned devices, we'll want to make it possible to map
+     * pending bits separately in case they are in a separate bar. */
+    int table_bir = table & PCI_MSIX_FLAGS_BIRMASK;
+
+    if (table_bir != region_num)
+        return;
+    if (size <= offset)
+        return;
+    cpu_register_physical_memory(addr + offset, size - offset,
+                                 d->msix_mmio_index);
+}
+
+/* Initialize the MSI-X structures. Note: if MSI-X is supported, BAR size is
+ * modified, it should be retrieved with msix_bar_size. */
+int msix_init(struct PCIDevice *dev, unsigned short nentries,
+              unsigned bar_nr, unsigned bar_size)
+{
+    int ret;
+    /* Nothing to do if MSI is not supported by interrupt controller */
+    if (!msix_supported)
+        return -ENOTSUP;
+
+    if (nentries > MSIX_MAX_ENTRIES)
+        return -EINVAL;
+
+    dev->msix_entry_used = qemu_mallocz(MSIX_MAX_ENTRIES *
+                                        sizeof *dev->msix_entry_used);
+
+    dev->msix_table_page = qemu_mallocz(MSIX_PAGE_SIZE);
+
+    dev->msix_mmio_index = cpu_register_io_memory(0, msix_mmio_read,
+                                                  msix_mmio_write, dev);
+    if (dev->msix_mmio_index == -1) {
+        ret = -EBUSY;
+        goto err_index;
+    }
+
+    dev->msix_entries_nr = nentries;
+    ret = msix_add_config(dev, nentries, bar_nr, bar_size);
+    if (ret)
+        goto err_config;
+
+    dev->cap_present |= QEMU_PCI_CAP_MSIX;
+    return 0;
+
+err_config:
+    cpu_unregister_io_memory(dev->msix_mmio_index);
+err_index:
+    qemu_free(dev->msix_table_page);
+    dev->msix_table_page = NULL;
+    qemu_free(dev->msix_entry_used);
+    dev->msix_entry_used = NULL;
+    return ret;
+}
+
+/* Clean up resources for the device. */
+int msix_uninit(PCIDevice *dev)
+{
+    if (!(dev->cap_present & QEMU_PCI_CAP_MSIX))
+        return 0;
+    pci_del_capability(dev, PCI_CAP_ID_MSIX, MSIX_CAP_LENGTH);
+    dev->msix_cap = 0;
+    msix_free_irq_entries(dev);
+    dev->msix_entries_nr = 0;
+    cpu_unregister_io_memory(dev->msix_mmio_index);
+    qemu_free(dev->msix_table_page);
+    dev->msix_table_page = NULL;
+    qemu_free(dev->msix_entry_used);
+    dev->msix_entry_used = NULL;
+    dev->cap_present &= ~QEMU_PCI_CAP_MSIX;
+    return 0;
+}
+
+void msix_save(PCIDevice *dev, QEMUFile *f)
+{
+    unsigned nentries = (pci_get_word(dev->config + PCI_MSIX_FLAGS) &
+                         PCI_MSIX_FLAGS_QSIZE) + 1;
+    qemu_put_buffer(f, dev->msix_table_page, nentries * MSIX_ENTRY_SIZE);
+    qemu_put_buffer(f, dev->msix_table_page + MSIX_PAGE_PENDING,
+                    (nentries + 7) / 8);
+}
+
+/* Should be called after restoring the config space. */
+void msix_load(PCIDevice *dev, QEMUFile *f)
+{
+    unsigned n = dev->msix_entries_nr;
+
+    if (!dev->cap_present & QEMU_PCI_CAP_MSIX)
+        return;
+
+    qemu_get_buffer(f, dev->msix_table_page, n * MSIX_ENTRY_SIZE);
+    qemu_get_buffer(f, dev->msix_table_page + MSIX_PAGE_PENDING, (n + 7) / 8);
+}
+
+/* Does device support MSI-X? */
+int msix_present(PCIDevice *dev)
+{
+    return dev->cap_present & QEMU_PCI_CAP_MSIX;
+}
+
+/* Is MSI-X enabled? */
+int msix_enabled(PCIDevice *dev)
+{
+    return (dev->cap_present & QEMU_PCI_CAP_MSIX) &&
+        (dev->config[dev->msix_cap + MSIX_ENABLE_OFFSET] &
+         MSIX_ENABLE_MASK);
+}
+
+/* Size of bar where MSI-X table resides, or 0 if MSI-X not supported. */
+uint32_t msix_bar_size(PCIDevice *dev)
+{
+    return (dev->cap_present & QEMU_PCI_CAP_MSIX) ?
+        dev->msix_bar_size : 0;
+}
+
+/* Send an MSI-X message */
+void msix_notify(PCIDevice *dev, unsigned vector)
+{
+    uint8_t *table_entry = dev->msix_table_page + vector * MSIX_ENTRY_SIZE;
+    uint64_t address;
+    uint32_t data;
+
+    if (vector >= dev->msix_entries_nr || !dev->msix_entry_used[vector])
+        return;
+    if (msix_is_masked(dev, vector)) {
+        msix_set_pending(dev, vector);
+        return;
+    }
+
+    address = pci_get_long(table_entry + MSIX_MSG_UPPER_ADDR);
+    address = (address << 32) | pci_get_long(table_entry + MSIX_MSG_ADDR);
+    data = pci_get_long(table_entry + MSIX_MSG_DATA);
+    stl_phys(address, data);
+}
+
+void msix_reset(PCIDevice *dev)
+{
+    if (!(dev->cap_present & QEMU_PCI_CAP_MSIX))
+        return;
+    msix_free_irq_entries(dev);
+    dev->config[dev->msix_cap + MSIX_ENABLE_OFFSET] &= MSIX_ENABLE_MASK;
+    memset(dev->msix_table_page, 0, MSIX_PAGE_SIZE);
+}
+
+/* PCI spec suggests that devices make it possible for software to configure
+ * less vectors than supported by the device, but does not specify a standard
+ * mechanism for devices to do so.
+ *
+ * We support this by asking devices to declare vectors software is going to
+ * actually use, and checking this on the notification path. Devices that
+ * don't want to follow the spec suggestion can declare all vectors as used. */
+
+/* Mark vector as used. */
+int msix_vector_use(PCIDevice *dev, unsigned vector)
+{
+    if (vector >= dev->msix_entries_nr)
+        return -EINVAL;
+    dev->msix_entry_used[vector]++;
+    return 0;
+}
+
+/* Mark vector as unused. */
+void msix_vector_unuse(PCIDevice *dev, unsigned vector)
+{
+    if (vector < dev->msix_entries_nr && dev->msix_entry_used[vector])
+        --dev->msix_entry_used[vector];
+}
diff --git a/hw/msix.h b/hw/msix.h
new file mode 100644
index 0000000..3427778
--- /dev/null
+++ b/hw/msix.h
@@ -0,0 +1,34 @@
+#ifndef QEMU_MSIX_H
+#define QEMU_MSIX_H
+
+#include "qemu-common.h"
+
+int msix_init(PCIDevice *pdev, unsigned short nentries,
+              unsigned bar_nr, unsigned bar_size);
+
+void msix_write_config(PCIDevice *pci_dev, uint32_t address,
+                       uint32_t val, int len);
+
+void msix_mmio_map(PCIDevice *pci_dev, int region_num,
+                   uint32_t addr, uint32_t size, int type);
+
+int msix_uninit(PCIDevice *d);
+
+void msix_save(PCIDevice *dev, QEMUFile *f);
+void msix_load(PCIDevice *dev, QEMUFile *f);
+
+int msix_enabled(PCIDevice *dev);
+int msix_present(PCIDevice *dev);
+
+uint32_t msix_bar_size(PCIDevice *dev);
+
+int msix_vector_use(PCIDevice *dev, unsigned vector);
+void msix_vector_unuse(PCIDevice *dev, unsigned vector);
+
+void msix_notify(PCIDevice *dev, unsigned vector);
+
+void msix_reset(PCIDevice *dev);
+
+extern int msix_supported;
+
+#endif
diff --git a/hw/pci.h b/hw/pci.h
index 558e18f..03b0a70 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -155,6 +155,11 @@ typedef struct PCIIORegion {
 /* Size of the standard PCI config space */
 #define PCI_CONFIG_SPACE_SIZE 0x100
 
+/* Bits in cap_present field. */
+enum {
+    QEMU_PCI_CAP_MSIX = 0x1,
+};
+
 struct PCIDevice {
     DeviceState qdev;
     /* PCI config space */
@@ -186,6 +191,24 @@ struct PCIDevice {
 
     /* Current IRQ levels.  Used internally by the generic PCI code.  */
     int irq_state[4];
+
+    /* Capability bits */
+    uint32_t cap_present;
+
+    /* Offset of MSI-X capability in config space */
+    uint8_t msix_cap;
+
+    /* MSI-X entries */
+    int msix_entries_nr;
+
+    /* Space to store MSIX table */
+    uint8_t *msix_table_page;
+    /* MMIO index used to map MSIX table and pending bit entries. */
+    int msix_mmio_index;
+    /* Reference-count for entries actually in use by driver. */
+    unsigned *msix_entry_used;
+    /* Region including the MSI-X table */
+    uint32_t msix_bar_size;
 };
 
 PCIDevice *pci_register_device(PCIBus *bus, const char *name,
-- 
1.6.2.2


^ permalink raw reply related	[flat|nested] 42+ messages in thread

* [Qemu-devel] [PATCHv6 05/12] qemu/pci: MSI-X support functions
@ 2009-06-21 16:49   ` Michael S. Tsirkin
  0 siblings, 0 replies; 42+ messages in thread
From: Michael S. Tsirkin @ 2009-06-21 16:49 UTC (permalink / raw)
  To: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm,
	Rusty Russell, virtualization, Christian Borntraeger, Blue Swirl,
	Anthony Liguori, Glauber Costa

Add functions implementing MSI-X support. First user will be virtio-pci.
Note that platform must set a flag to declare MSI supported: this
is a safety measure to avoid breaking platforms which should support
MSI-X but currently lack this in the interrupt controller emulation.
For PC this will be set by APIC.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 Makefile.target |    2 +-
 hw/msix.c       |  378 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 hw/msix.h       |   34 +++++
 hw/pci.h        |   23 ++++
 4 files changed, 436 insertions(+), 1 deletions(-)
 create mode 100644 hw/msix.c
 create mode 100644 hw/msix.h

diff --git a/Makefile.target b/Makefile.target
index 0159bf7..145b3a9 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -494,7 +494,7 @@ endif #CONFIG_BSD_USER
 ifndef CONFIG_USER_ONLY
 
 OBJS=vl.o osdep.o monitor.o pci.o loader.o isa_mmio.o machine.o \
-     gdbstub.o gdbstub-xml.o
+     gdbstub.o gdbstub-xml.o msix.o
 # virtio has to be here due to weird dependency between PCI and virtio-net.
 # need to fix this properly
 OBJS+=virtio-blk.o virtio-balloon.o virtio-net.o virtio-console.o
diff --git a/hw/msix.c b/hw/msix.c
new file mode 100644
index 0000000..773581f
--- /dev/null
+++ b/hw/msix.c
@@ -0,0 +1,378 @@
+/*
+ * MSI-X device support
+ *
+ * This module includes support for MSI-X in pci devices.
+ *
+ * Author: Michael S. Tsirkin <mst@redhat.com>
+ *
+ *  Copyright (c) 2009, Red Hat Inc, Michael S. Tsirkin (mst@redhat.com)
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#include "hw.h"
+#include "msix.h"
+#include "pci.h"
+
+/* Declaration from linux/pci_regs.h */
+#define  PCI_CAP_ID_MSIX 0x11 /* MSI-X */
+#define  PCI_MSIX_FLAGS 2     /* Table at lower 11 bits */
+#define  PCI_MSIX_FLAGS_QSIZE	0x7FF
+#define  PCI_MSIX_FLAGS_ENABLE	(1 << 15)
+#define  PCI_MSIX_FLAGS_BIRMASK	(7 << 0)
+
+/* MSI-X capability structure */
+#define MSIX_TABLE_OFFSET 4
+#define MSIX_PBA_OFFSET 8
+#define MSIX_CAP_LENGTH 12
+
+/* MSI enable bit is in byte 1 in FLAGS register */
+#define MSIX_ENABLE_OFFSET (PCI_MSIX_FLAGS + 1)
+#define MSIX_ENABLE_MASK (PCI_MSIX_FLAGS_ENABLE >> 8)
+
+/* MSI-X table format */
+#define MSIX_MSG_ADDR 0
+#define MSIX_MSG_UPPER_ADDR 4
+#define MSIX_MSG_DATA 8
+#define MSIX_VECTOR_CTRL 12
+#define MSIX_ENTRY_SIZE 16
+#define MSIX_VECTOR_MASK 0x1
+
+/* How much space does an MSIX table need. */
+/* The spec requires giving the table structure
+ * a 4K aligned region all by itself. Align it to
+ * target pages so that drivers can do passthrough
+ * on the rest of the region. */
+#define MSIX_PAGE_SIZE TARGET_PAGE_ALIGN(0x1000)
+/* Reserve second half of the page for pending bits */
+#define MSIX_PAGE_PENDING (MSIX_PAGE_SIZE / 2)
+#define MSIX_MAX_ENTRIES 32
+
+
+#ifdef MSIX_DEBUG
+#define DEBUG(fmt, ...)                                       \
+    do {                                                      \
+      fprintf(stderr, "%s: " fmt, __func__ , __VA_ARGS__);    \
+    } while (0)
+#else
+#define DEBUG(fmt, ...) do { } while(0)
+#endif
+
+/* Flag for interrupt controller to declare MSI-X support */
+int msix_supported;
+
+/* Add MSI-X capability to the config space for the device. */
+/* Given a bar and its size, add MSI-X table on top of it
+ * and fill MSI-X capability in the config space.
+ * Original bar size must be a power of 2 or 0.
+ * New bar size is returned. */
+static int msix_add_config(struct PCIDevice *pdev, unsigned short nentries,
+                           unsigned bar_nr, unsigned bar_size)
+{
+    int config_offset;
+    uint8_t *config;
+    uint32_t new_size;
+
+    if (nentries < 1 || nentries > PCI_MSIX_FLAGS_QSIZE + 1)
+        return -EINVAL;
+    if (bar_size > 0x80000000)
+        return -ENOSPC;
+
+    /* Add space for MSI-X structures */
+    if (!bar_size)
+        new_size = MSIX_PAGE_SIZE;
+    else if (bar_size < MSIX_PAGE_SIZE) {
+        bar_size = MSIX_PAGE_SIZE;
+        new_size = MSIX_PAGE_SIZE * 2;
+    } else
+        new_size = bar_size * 2;
+
+    pdev->msix_bar_size = new_size;
+    config_offset = pci_add_capability(pdev, PCI_CAP_ID_MSIX, MSIX_CAP_LENGTH);
+    if (config_offset < 0)
+        return config_offset;
+    config = pdev->config + config_offset;
+
+    pci_set_word(config + PCI_MSIX_FLAGS, nentries - 1);
+    /* Table on top of BAR */
+    pci_set_long(config + MSIX_TABLE_OFFSET, bar_size | bar_nr);
+    /* Pending bits on top of that */
+    pci_set_long(config + MSIX_PBA_OFFSET, (bar_size + MSIX_PAGE_PENDING) |
+                 bar_nr);
+    pdev->msix_cap = config_offset;
+    /* Make flags bit writeable. */
+    pdev->wmask[config_offset + MSIX_ENABLE_OFFSET] |= MSIX_ENABLE_MASK;
+    return 0;
+}
+
+static void msix_free_irq_entries(PCIDevice *dev)
+{
+    int vector;
+
+    for (vector = 0; vector < dev->msix_entries_nr; ++vector)
+        dev->msix_entry_used[vector] = 0;
+}
+
+/* Handle MSI-X capability config write. */
+void msix_write_config(PCIDevice *dev, uint32_t addr,
+                       uint32_t val, int len)
+{
+    unsigned enable_pos = dev->msix_cap + MSIX_ENABLE_OFFSET;
+    if (addr + len <= enable_pos || addr > enable_pos)
+        return;
+
+    if (msix_enabled(dev))
+        qemu_set_irq(dev->irq[0], 0);
+}
+
+static uint32_t msix_mmio_readl(void *opaque, target_phys_addr_t addr)
+{
+    PCIDevice *dev = opaque;
+    unsigned int offset = addr & (MSIX_PAGE_SIZE - 1);
+    void *page = dev->msix_table_page;
+    uint32_t val = 0;
+
+    memcpy(&val, (void *)((char *)page + offset), 4);
+
+    return val;
+}
+
+static uint32_t msix_mmio_read_unallowed(void *opaque, target_phys_addr_t addr)
+{
+    fprintf(stderr, "MSI-X: only dword read is allowed!\n");
+    return 0;
+}
+
+static uint8_t msix_pending_mask(int vector)
+{
+    return 1 << (vector % 8);
+}
+
+static uint8_t *msix_pending_byte(PCIDevice *dev, int vector)
+{
+    return dev->msix_table_page + MSIX_PAGE_PENDING + vector / 8;
+}
+
+static int msix_is_pending(PCIDevice *dev, int vector)
+{
+    return *msix_pending_byte(dev, vector) & msix_pending_mask(vector);
+}
+
+static void msix_set_pending(PCIDevice *dev, int vector)
+{
+    *msix_pending_byte(dev, vector) |= msix_pending_mask(vector);
+}
+
+static void msix_clr_pending(PCIDevice *dev, int vector)
+{
+    *msix_pending_byte(dev, vector) &= ~msix_pending_mask(vector);
+}
+
+static int msix_is_masked(PCIDevice *dev, int vector)
+{
+    unsigned offset = vector * MSIX_ENTRY_SIZE + MSIX_VECTOR_CTRL;
+    return dev->msix_table_page[offset] & MSIX_VECTOR_MASK;
+}
+
+static void msix_mmio_writel(void *opaque, target_phys_addr_t addr,
+                             uint32_t val)
+{
+    PCIDevice *dev = opaque;
+    unsigned int offset = addr & (MSIX_PAGE_SIZE - 1);
+    int vector = offset / MSIX_ENTRY_SIZE;
+    memcpy(dev->msix_table_page + offset, &val, 4);
+    if (!msix_is_masked(dev, vector) && msix_is_pending(dev, vector)) {
+        msix_clr_pending(dev, vector);
+        msix_notify(dev, vector);
+    }
+}
+
+static void msix_mmio_write_unallowed(void *opaque, target_phys_addr_t addr,
+                                      uint32_t val)
+{
+    fprintf(stderr, "MSI-X: only dword write is allowed!\n");
+}
+
+static CPUWriteMemoryFunc *msix_mmio_write[] = {
+    msix_mmio_write_unallowed, msix_mmio_write_unallowed, msix_mmio_writel
+};
+
+static CPUReadMemoryFunc *msix_mmio_read[] = {
+    msix_mmio_read_unallowed, msix_mmio_read_unallowed, msix_mmio_readl
+};
+
+/* Should be called from device's map method. */
+void msix_mmio_map(PCIDevice *d, int region_num,
+                   uint32_t addr, uint32_t size, int type)
+{
+    uint8_t *config = d->config + d->msix_cap;
+    uint32_t table = pci_get_long(config + MSIX_TABLE_OFFSET);
+    uint32_t offset = table & ~(MSIX_PAGE_SIZE - 1);
+    /* TODO: for assigned devices, we'll want to make it possible to map
+     * pending bits separately in case they are in a separate bar. */
+    int table_bir = table & PCI_MSIX_FLAGS_BIRMASK;
+
+    if (table_bir != region_num)
+        return;
+    if (size <= offset)
+        return;
+    cpu_register_physical_memory(addr + offset, size - offset,
+                                 d->msix_mmio_index);
+}
+
+/* Initialize the MSI-X structures. Note: if MSI-X is supported, BAR size is
+ * modified, it should be retrieved with msix_bar_size. */
+int msix_init(struct PCIDevice *dev, unsigned short nentries,
+              unsigned bar_nr, unsigned bar_size)
+{
+    int ret;
+    /* Nothing to do if MSI is not supported by interrupt controller */
+    if (!msix_supported)
+        return -ENOTSUP;
+
+    if (nentries > MSIX_MAX_ENTRIES)
+        return -EINVAL;
+
+    dev->msix_entry_used = qemu_mallocz(MSIX_MAX_ENTRIES *
+                                        sizeof *dev->msix_entry_used);
+
+    dev->msix_table_page = qemu_mallocz(MSIX_PAGE_SIZE);
+
+    dev->msix_mmio_index = cpu_register_io_memory(0, msix_mmio_read,
+                                                  msix_mmio_write, dev);
+    if (dev->msix_mmio_index == -1) {
+        ret = -EBUSY;
+        goto err_index;
+    }
+
+    dev->msix_entries_nr = nentries;
+    ret = msix_add_config(dev, nentries, bar_nr, bar_size);
+    if (ret)
+        goto err_config;
+
+    dev->cap_present |= QEMU_PCI_CAP_MSIX;
+    return 0;
+
+err_config:
+    cpu_unregister_io_memory(dev->msix_mmio_index);
+err_index:
+    qemu_free(dev->msix_table_page);
+    dev->msix_table_page = NULL;
+    qemu_free(dev->msix_entry_used);
+    dev->msix_entry_used = NULL;
+    return ret;
+}
+
+/* Clean up resources for the device. */
+int msix_uninit(PCIDevice *dev)
+{
+    if (!(dev->cap_present & QEMU_PCI_CAP_MSIX))
+        return 0;
+    pci_del_capability(dev, PCI_CAP_ID_MSIX, MSIX_CAP_LENGTH);
+    dev->msix_cap = 0;
+    msix_free_irq_entries(dev);
+    dev->msix_entries_nr = 0;
+    cpu_unregister_io_memory(dev->msix_mmio_index);
+    qemu_free(dev->msix_table_page);
+    dev->msix_table_page = NULL;
+    qemu_free(dev->msix_entry_used);
+    dev->msix_entry_used = NULL;
+    dev->cap_present &= ~QEMU_PCI_CAP_MSIX;
+    return 0;
+}
+
+void msix_save(PCIDevice *dev, QEMUFile *f)
+{
+    unsigned nentries = (pci_get_word(dev->config + PCI_MSIX_FLAGS) &
+                         PCI_MSIX_FLAGS_QSIZE) + 1;
+    qemu_put_buffer(f, dev->msix_table_page, nentries * MSIX_ENTRY_SIZE);
+    qemu_put_buffer(f, dev->msix_table_page + MSIX_PAGE_PENDING,
+                    (nentries + 7) / 8);
+}
+
+/* Should be called after restoring the config space. */
+void msix_load(PCIDevice *dev, QEMUFile *f)
+{
+    unsigned n = dev->msix_entries_nr;
+
+    if (!dev->cap_present & QEMU_PCI_CAP_MSIX)
+        return;
+
+    qemu_get_buffer(f, dev->msix_table_page, n * MSIX_ENTRY_SIZE);
+    qemu_get_buffer(f, dev->msix_table_page + MSIX_PAGE_PENDING, (n + 7) / 8);
+}
+
+/* Does device support MSI-X? */
+int msix_present(PCIDevice *dev)
+{
+    return dev->cap_present & QEMU_PCI_CAP_MSIX;
+}
+
+/* Is MSI-X enabled? */
+int msix_enabled(PCIDevice *dev)
+{
+    return (dev->cap_present & QEMU_PCI_CAP_MSIX) &&
+        (dev->config[dev->msix_cap + MSIX_ENABLE_OFFSET] &
+         MSIX_ENABLE_MASK);
+}
+
+/* Size of bar where MSI-X table resides, or 0 if MSI-X not supported. */
+uint32_t msix_bar_size(PCIDevice *dev)
+{
+    return (dev->cap_present & QEMU_PCI_CAP_MSIX) ?
+        dev->msix_bar_size : 0;
+}
+
+/* Send an MSI-X message */
+void msix_notify(PCIDevice *dev, unsigned vector)
+{
+    uint8_t *table_entry = dev->msix_table_page + vector * MSIX_ENTRY_SIZE;
+    uint64_t address;
+    uint32_t data;
+
+    if (vector >= dev->msix_entries_nr || !dev->msix_entry_used[vector])
+        return;
+    if (msix_is_masked(dev, vector)) {
+        msix_set_pending(dev, vector);
+        return;
+    }
+
+    address = pci_get_long(table_entry + MSIX_MSG_UPPER_ADDR);
+    address = (address << 32) | pci_get_long(table_entry + MSIX_MSG_ADDR);
+    data = pci_get_long(table_entry + MSIX_MSG_DATA);
+    stl_phys(address, data);
+}
+
+void msix_reset(PCIDevice *dev)
+{
+    if (!(dev->cap_present & QEMU_PCI_CAP_MSIX))
+        return;
+    msix_free_irq_entries(dev);
+    dev->config[dev->msix_cap + MSIX_ENABLE_OFFSET] &= MSIX_ENABLE_MASK;
+    memset(dev->msix_table_page, 0, MSIX_PAGE_SIZE);
+}
+
+/* PCI spec suggests that devices make it possible for software to configure
+ * less vectors than supported by the device, but does not specify a standard
+ * mechanism for devices to do so.
+ *
+ * We support this by asking devices to declare vectors software is going to
+ * actually use, and checking this on the notification path. Devices that
+ * don't want to follow the spec suggestion can declare all vectors as used. */
+
+/* Mark vector as used. */
+int msix_vector_use(PCIDevice *dev, unsigned vector)
+{
+    if (vector >= dev->msix_entries_nr)
+        return -EINVAL;
+    dev->msix_entry_used[vector]++;
+    return 0;
+}
+
+/* Mark vector as unused. */
+void msix_vector_unuse(PCIDevice *dev, unsigned vector)
+{
+    if (vector < dev->msix_entries_nr && dev->msix_entry_used[vector])
+        --dev->msix_entry_used[vector];
+}
diff --git a/hw/msix.h b/hw/msix.h
new file mode 100644
index 0000000..3427778
--- /dev/null
+++ b/hw/msix.h
@@ -0,0 +1,34 @@
+#ifndef QEMU_MSIX_H
+#define QEMU_MSIX_H
+
+#include "qemu-common.h"
+
+int msix_init(PCIDevice *pdev, unsigned short nentries,
+              unsigned bar_nr, unsigned bar_size);
+
+void msix_write_config(PCIDevice *pci_dev, uint32_t address,
+                       uint32_t val, int len);
+
+void msix_mmio_map(PCIDevice *pci_dev, int region_num,
+                   uint32_t addr, uint32_t size, int type);
+
+int msix_uninit(PCIDevice *d);
+
+void msix_save(PCIDevice *dev, QEMUFile *f);
+void msix_load(PCIDevice *dev, QEMUFile *f);
+
+int msix_enabled(PCIDevice *dev);
+int msix_present(PCIDevice *dev);
+
+uint32_t msix_bar_size(PCIDevice *dev);
+
+int msix_vector_use(PCIDevice *dev, unsigned vector);
+void msix_vector_unuse(PCIDevice *dev, unsigned vector);
+
+void msix_notify(PCIDevice *dev, unsigned vector);
+
+void msix_reset(PCIDevice *dev);
+
+extern int msix_supported;
+
+#endif
diff --git a/hw/pci.h b/hw/pci.h
index 558e18f..03b0a70 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -155,6 +155,11 @@ typedef struct PCIIORegion {
 /* Size of the standard PCI config space */
 #define PCI_CONFIG_SPACE_SIZE 0x100
 
+/* Bits in cap_present field. */
+enum {
+    QEMU_PCI_CAP_MSIX = 0x1,
+};
+
 struct PCIDevice {
     DeviceState qdev;
     /* PCI config space */
@@ -186,6 +191,24 @@ struct PCIDevice {
 
     /* Current IRQ levels.  Used internally by the generic PCI code.  */
     int irq_state[4];
+
+    /* Capability bits */
+    uint32_t cap_present;
+
+    /* Offset of MSI-X capability in config space */
+    uint8_t msix_cap;
+
+    /* MSI-X entries */
+    int msix_entries_nr;
+
+    /* Space to store MSIX table */
+    uint8_t *msix_table_page;
+    /* MMIO index used to map MSIX table and pending bit entries. */
+    int msix_mmio_index;
+    /* Reference-count for entries actually in use by driver. */
+    unsigned *msix_entry_used;
+    /* Region including the MSI-X table */
+    uint32_t msix_bar_size;
 };
 
 PCIDevice *pci_register_device(PCIBus *bus, const char *name,
-- 
1.6.2.2

^ permalink raw reply related	[flat|nested] 42+ messages in thread

* [PATCHv6 05/12] qemu/pci: MSI-X support functions
       [not found] <cover.1245594586.git.mst@redhat.com>
                   ` (7 preceding siblings ...)
  2009-06-21 16:49   ` [Qemu-devel] " Michael S. Tsirkin
@ 2009-06-21 16:49 ` Michael S. Tsirkin
  2009-06-21 16:49   ` [Qemu-devel] " Michael S. Tsirkin
                   ` (14 subsequent siblings)
  23 siblings, 0 replies; 42+ messages in thread
From: Michael S. Tsirkin @ 2009-06-21 16:49 UTC (permalink / raw)
  To: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm, Rusty Russell

Add functions implementing MSI-X support. First user will be virtio-pci.
Note that platform must set a flag to declare MSI supported: this
is a safety measure to avoid breaking platforms which should support
MSI-X but currently lack this in the interrupt controller emulation.
For PC this will be set by APIC.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 Makefile.target |    2 +-
 hw/msix.c       |  378 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 hw/msix.h       |   34 +++++
 hw/pci.h        |   23 ++++
 4 files changed, 436 insertions(+), 1 deletions(-)
 create mode 100644 hw/msix.c
 create mode 100644 hw/msix.h

diff --git a/Makefile.target b/Makefile.target
index 0159bf7..145b3a9 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -494,7 +494,7 @@ endif #CONFIG_BSD_USER
 ifndef CONFIG_USER_ONLY
 
 OBJS=vl.o osdep.o monitor.o pci.o loader.o isa_mmio.o machine.o \
-     gdbstub.o gdbstub-xml.o
+     gdbstub.o gdbstub-xml.o msix.o
 # virtio has to be here due to weird dependency between PCI and virtio-net.
 # need to fix this properly
 OBJS+=virtio-blk.o virtio-balloon.o virtio-net.o virtio-console.o
diff --git a/hw/msix.c b/hw/msix.c
new file mode 100644
index 0000000..773581f
--- /dev/null
+++ b/hw/msix.c
@@ -0,0 +1,378 @@
+/*
+ * MSI-X device support
+ *
+ * This module includes support for MSI-X in pci devices.
+ *
+ * Author: Michael S. Tsirkin <mst@redhat.com>
+ *
+ *  Copyright (c) 2009, Red Hat Inc, Michael S. Tsirkin (mst@redhat.com)
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#include "hw.h"
+#include "msix.h"
+#include "pci.h"
+
+/* Declaration from linux/pci_regs.h */
+#define  PCI_CAP_ID_MSIX 0x11 /* MSI-X */
+#define  PCI_MSIX_FLAGS 2     /* Table at lower 11 bits */
+#define  PCI_MSIX_FLAGS_QSIZE	0x7FF
+#define  PCI_MSIX_FLAGS_ENABLE	(1 << 15)
+#define  PCI_MSIX_FLAGS_BIRMASK	(7 << 0)
+
+/* MSI-X capability structure */
+#define MSIX_TABLE_OFFSET 4
+#define MSIX_PBA_OFFSET 8
+#define MSIX_CAP_LENGTH 12
+
+/* MSI enable bit is in byte 1 in FLAGS register */
+#define MSIX_ENABLE_OFFSET (PCI_MSIX_FLAGS + 1)
+#define MSIX_ENABLE_MASK (PCI_MSIX_FLAGS_ENABLE >> 8)
+
+/* MSI-X table format */
+#define MSIX_MSG_ADDR 0
+#define MSIX_MSG_UPPER_ADDR 4
+#define MSIX_MSG_DATA 8
+#define MSIX_VECTOR_CTRL 12
+#define MSIX_ENTRY_SIZE 16
+#define MSIX_VECTOR_MASK 0x1
+
+/* How much space does an MSIX table need. */
+/* The spec requires giving the table structure
+ * a 4K aligned region all by itself. Align it to
+ * target pages so that drivers can do passthrough
+ * on the rest of the region. */
+#define MSIX_PAGE_SIZE TARGET_PAGE_ALIGN(0x1000)
+/* Reserve second half of the page for pending bits */
+#define MSIX_PAGE_PENDING (MSIX_PAGE_SIZE / 2)
+#define MSIX_MAX_ENTRIES 32
+
+
+#ifdef MSIX_DEBUG
+#define DEBUG(fmt, ...)                                       \
+    do {                                                      \
+      fprintf(stderr, "%s: " fmt, __func__ , __VA_ARGS__);    \
+    } while (0)
+#else
+#define DEBUG(fmt, ...) do { } while(0)
+#endif
+
+/* Flag for interrupt controller to declare MSI-X support */
+int msix_supported;
+
+/* Add MSI-X capability to the config space for the device. */
+/* Given a bar and its size, add MSI-X table on top of it
+ * and fill MSI-X capability in the config space.
+ * Original bar size must be a power of 2 or 0.
+ * New bar size is returned. */
+static int msix_add_config(struct PCIDevice *pdev, unsigned short nentries,
+                           unsigned bar_nr, unsigned bar_size)
+{
+    int config_offset;
+    uint8_t *config;
+    uint32_t new_size;
+
+    if (nentries < 1 || nentries > PCI_MSIX_FLAGS_QSIZE + 1)
+        return -EINVAL;
+    if (bar_size > 0x80000000)
+        return -ENOSPC;
+
+    /* Add space for MSI-X structures */
+    if (!bar_size)
+        new_size = MSIX_PAGE_SIZE;
+    else if (bar_size < MSIX_PAGE_SIZE) {
+        bar_size = MSIX_PAGE_SIZE;
+        new_size = MSIX_PAGE_SIZE * 2;
+    } else
+        new_size = bar_size * 2;
+
+    pdev->msix_bar_size = new_size;
+    config_offset = pci_add_capability(pdev, PCI_CAP_ID_MSIX, MSIX_CAP_LENGTH);
+    if (config_offset < 0)
+        return config_offset;
+    config = pdev->config + config_offset;
+
+    pci_set_word(config + PCI_MSIX_FLAGS, nentries - 1);
+    /* Table on top of BAR */
+    pci_set_long(config + MSIX_TABLE_OFFSET, bar_size | bar_nr);
+    /* Pending bits on top of that */
+    pci_set_long(config + MSIX_PBA_OFFSET, (bar_size + MSIX_PAGE_PENDING) |
+                 bar_nr);
+    pdev->msix_cap = config_offset;
+    /* Make flags bit writeable. */
+    pdev->wmask[config_offset + MSIX_ENABLE_OFFSET] |= MSIX_ENABLE_MASK;
+    return 0;
+}
+
+static void msix_free_irq_entries(PCIDevice *dev)
+{
+    int vector;
+
+    for (vector = 0; vector < dev->msix_entries_nr; ++vector)
+        dev->msix_entry_used[vector] = 0;
+}
+
+/* Handle MSI-X capability config write. */
+void msix_write_config(PCIDevice *dev, uint32_t addr,
+                       uint32_t val, int len)
+{
+    unsigned enable_pos = dev->msix_cap + MSIX_ENABLE_OFFSET;
+    if (addr + len <= enable_pos || addr > enable_pos)
+        return;
+
+    if (msix_enabled(dev))
+        qemu_set_irq(dev->irq[0], 0);
+}
+
+static uint32_t msix_mmio_readl(void *opaque, target_phys_addr_t addr)
+{
+    PCIDevice *dev = opaque;
+    unsigned int offset = addr & (MSIX_PAGE_SIZE - 1);
+    void *page = dev->msix_table_page;
+    uint32_t val = 0;
+
+    memcpy(&val, (void *)((char *)page + offset), 4);
+
+    return val;
+}
+
+static uint32_t msix_mmio_read_unallowed(void *opaque, target_phys_addr_t addr)
+{
+    fprintf(stderr, "MSI-X: only dword read is allowed!\n");
+    return 0;
+}
+
+static uint8_t msix_pending_mask(int vector)
+{
+    return 1 << (vector % 8);
+}
+
+static uint8_t *msix_pending_byte(PCIDevice *dev, int vector)
+{
+    return dev->msix_table_page + MSIX_PAGE_PENDING + vector / 8;
+}
+
+static int msix_is_pending(PCIDevice *dev, int vector)
+{
+    return *msix_pending_byte(dev, vector) & msix_pending_mask(vector);
+}
+
+static void msix_set_pending(PCIDevice *dev, int vector)
+{
+    *msix_pending_byte(dev, vector) |= msix_pending_mask(vector);
+}
+
+static void msix_clr_pending(PCIDevice *dev, int vector)
+{
+    *msix_pending_byte(dev, vector) &= ~msix_pending_mask(vector);
+}
+
+static int msix_is_masked(PCIDevice *dev, int vector)
+{
+    unsigned offset = vector * MSIX_ENTRY_SIZE + MSIX_VECTOR_CTRL;
+    return dev->msix_table_page[offset] & MSIX_VECTOR_MASK;
+}
+
+static void msix_mmio_writel(void *opaque, target_phys_addr_t addr,
+                             uint32_t val)
+{
+    PCIDevice *dev = opaque;
+    unsigned int offset = addr & (MSIX_PAGE_SIZE - 1);
+    int vector = offset / MSIX_ENTRY_SIZE;
+    memcpy(dev->msix_table_page + offset, &val, 4);
+    if (!msix_is_masked(dev, vector) && msix_is_pending(dev, vector)) {
+        msix_clr_pending(dev, vector);
+        msix_notify(dev, vector);
+    }
+}
+
+static void msix_mmio_write_unallowed(void *opaque, target_phys_addr_t addr,
+                                      uint32_t val)
+{
+    fprintf(stderr, "MSI-X: only dword write is allowed!\n");
+}
+
+static CPUWriteMemoryFunc *msix_mmio_write[] = {
+    msix_mmio_write_unallowed, msix_mmio_write_unallowed, msix_mmio_writel
+};
+
+static CPUReadMemoryFunc *msix_mmio_read[] = {
+    msix_mmio_read_unallowed, msix_mmio_read_unallowed, msix_mmio_readl
+};
+
+/* Should be called from device's map method. */
+void msix_mmio_map(PCIDevice *d, int region_num,
+                   uint32_t addr, uint32_t size, int type)
+{
+    uint8_t *config = d->config + d->msix_cap;
+    uint32_t table = pci_get_long(config + MSIX_TABLE_OFFSET);
+    uint32_t offset = table & ~(MSIX_PAGE_SIZE - 1);
+    /* TODO: for assigned devices, we'll want to make it possible to map
+     * pending bits separately in case they are in a separate bar. */
+    int table_bir = table & PCI_MSIX_FLAGS_BIRMASK;
+
+    if (table_bir != region_num)
+        return;
+    if (size <= offset)
+        return;
+    cpu_register_physical_memory(addr + offset, size - offset,
+                                 d->msix_mmio_index);
+}
+
+/* Initialize the MSI-X structures. Note: if MSI-X is supported, BAR size is
+ * modified, it should be retrieved with msix_bar_size. */
+int msix_init(struct PCIDevice *dev, unsigned short nentries,
+              unsigned bar_nr, unsigned bar_size)
+{
+    int ret;
+    /* Nothing to do if MSI is not supported by interrupt controller */
+    if (!msix_supported)
+        return -ENOTSUP;
+
+    if (nentries > MSIX_MAX_ENTRIES)
+        return -EINVAL;
+
+    dev->msix_entry_used = qemu_mallocz(MSIX_MAX_ENTRIES *
+                                        sizeof *dev->msix_entry_used);
+
+    dev->msix_table_page = qemu_mallocz(MSIX_PAGE_SIZE);
+
+    dev->msix_mmio_index = cpu_register_io_memory(0, msix_mmio_read,
+                                                  msix_mmio_write, dev);
+    if (dev->msix_mmio_index == -1) {
+        ret = -EBUSY;
+        goto err_index;
+    }
+
+    dev->msix_entries_nr = nentries;
+    ret = msix_add_config(dev, nentries, bar_nr, bar_size);
+    if (ret)
+        goto err_config;
+
+    dev->cap_present |= QEMU_PCI_CAP_MSIX;
+    return 0;
+
+err_config:
+    cpu_unregister_io_memory(dev->msix_mmio_index);
+err_index:
+    qemu_free(dev->msix_table_page);
+    dev->msix_table_page = NULL;
+    qemu_free(dev->msix_entry_used);
+    dev->msix_entry_used = NULL;
+    return ret;
+}
+
+/* Clean up resources for the device. */
+int msix_uninit(PCIDevice *dev)
+{
+    if (!(dev->cap_present & QEMU_PCI_CAP_MSIX))
+        return 0;
+    pci_del_capability(dev, PCI_CAP_ID_MSIX, MSIX_CAP_LENGTH);
+    dev->msix_cap = 0;
+    msix_free_irq_entries(dev);
+    dev->msix_entries_nr = 0;
+    cpu_unregister_io_memory(dev->msix_mmio_index);
+    qemu_free(dev->msix_table_page);
+    dev->msix_table_page = NULL;
+    qemu_free(dev->msix_entry_used);
+    dev->msix_entry_used = NULL;
+    dev->cap_present &= ~QEMU_PCI_CAP_MSIX;
+    return 0;
+}
+
+void msix_save(PCIDevice *dev, QEMUFile *f)
+{
+    unsigned nentries = (pci_get_word(dev->config + PCI_MSIX_FLAGS) &
+                         PCI_MSIX_FLAGS_QSIZE) + 1;
+    qemu_put_buffer(f, dev->msix_table_page, nentries * MSIX_ENTRY_SIZE);
+    qemu_put_buffer(f, dev->msix_table_page + MSIX_PAGE_PENDING,
+                    (nentries + 7) / 8);
+}
+
+/* Should be called after restoring the config space. */
+void msix_load(PCIDevice *dev, QEMUFile *f)
+{
+    unsigned n = dev->msix_entries_nr;
+
+    if (!dev->cap_present & QEMU_PCI_CAP_MSIX)
+        return;
+
+    qemu_get_buffer(f, dev->msix_table_page, n * MSIX_ENTRY_SIZE);
+    qemu_get_buffer(f, dev->msix_table_page + MSIX_PAGE_PENDING, (n + 7) / 8);
+}
+
+/* Does device support MSI-X? */
+int msix_present(PCIDevice *dev)
+{
+    return dev->cap_present & QEMU_PCI_CAP_MSIX;
+}
+
+/* Is MSI-X enabled? */
+int msix_enabled(PCIDevice *dev)
+{
+    return (dev->cap_present & QEMU_PCI_CAP_MSIX) &&
+        (dev->config[dev->msix_cap + MSIX_ENABLE_OFFSET] &
+         MSIX_ENABLE_MASK);
+}
+
+/* Size of bar where MSI-X table resides, or 0 if MSI-X not supported. */
+uint32_t msix_bar_size(PCIDevice *dev)
+{
+    return (dev->cap_present & QEMU_PCI_CAP_MSIX) ?
+        dev->msix_bar_size : 0;
+}
+
+/* Send an MSI-X message */
+void msix_notify(PCIDevice *dev, unsigned vector)
+{
+    uint8_t *table_entry = dev->msix_table_page + vector * MSIX_ENTRY_SIZE;
+    uint64_t address;
+    uint32_t data;
+
+    if (vector >= dev->msix_entries_nr || !dev->msix_entry_used[vector])
+        return;
+    if (msix_is_masked(dev, vector)) {
+        msix_set_pending(dev, vector);
+        return;
+    }
+
+    address = pci_get_long(table_entry + MSIX_MSG_UPPER_ADDR);
+    address = (address << 32) | pci_get_long(table_entry + MSIX_MSG_ADDR);
+    data = pci_get_long(table_entry + MSIX_MSG_DATA);
+    stl_phys(address, data);
+}
+
+void msix_reset(PCIDevice *dev)
+{
+    if (!(dev->cap_present & QEMU_PCI_CAP_MSIX))
+        return;
+    msix_free_irq_entries(dev);
+    dev->config[dev->msix_cap + MSIX_ENABLE_OFFSET] &= MSIX_ENABLE_MASK;
+    memset(dev->msix_table_page, 0, MSIX_PAGE_SIZE);
+}
+
+/* PCI spec suggests that devices make it possible for software to configure
+ * less vectors than supported by the device, but does not specify a standard
+ * mechanism for devices to do so.
+ *
+ * We support this by asking devices to declare vectors software is going to
+ * actually use, and checking this on the notification path. Devices that
+ * don't want to follow the spec suggestion can declare all vectors as used. */
+
+/* Mark vector as used. */
+int msix_vector_use(PCIDevice *dev, unsigned vector)
+{
+    if (vector >= dev->msix_entries_nr)
+        return -EINVAL;
+    dev->msix_entry_used[vector]++;
+    return 0;
+}
+
+/* Mark vector as unused. */
+void msix_vector_unuse(PCIDevice *dev, unsigned vector)
+{
+    if (vector < dev->msix_entries_nr && dev->msix_entry_used[vector])
+        --dev->msix_entry_used[vector];
+}
diff --git a/hw/msix.h b/hw/msix.h
new file mode 100644
index 0000000..3427778
--- /dev/null
+++ b/hw/msix.h
@@ -0,0 +1,34 @@
+#ifndef QEMU_MSIX_H
+#define QEMU_MSIX_H
+
+#include "qemu-common.h"
+
+int msix_init(PCIDevice *pdev, unsigned short nentries,
+              unsigned bar_nr, unsigned bar_size);
+
+void msix_write_config(PCIDevice *pci_dev, uint32_t address,
+                       uint32_t val, int len);
+
+void msix_mmio_map(PCIDevice *pci_dev, int region_num,
+                   uint32_t addr, uint32_t size, int type);
+
+int msix_uninit(PCIDevice *d);
+
+void msix_save(PCIDevice *dev, QEMUFile *f);
+void msix_load(PCIDevice *dev, QEMUFile *f);
+
+int msix_enabled(PCIDevice *dev);
+int msix_present(PCIDevice *dev);
+
+uint32_t msix_bar_size(PCIDevice *dev);
+
+int msix_vector_use(PCIDevice *dev, unsigned vector);
+void msix_vector_unuse(PCIDevice *dev, unsigned vector);
+
+void msix_notify(PCIDevice *dev, unsigned vector);
+
+void msix_reset(PCIDevice *dev);
+
+extern int msix_supported;
+
+#endif
diff --git a/hw/pci.h b/hw/pci.h
index 558e18f..03b0a70 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -155,6 +155,11 @@ typedef struct PCIIORegion {
 /* Size of the standard PCI config space */
 #define PCI_CONFIG_SPACE_SIZE 0x100
 
+/* Bits in cap_present field. */
+enum {
+    QEMU_PCI_CAP_MSIX = 0x1,
+};
+
 struct PCIDevice {
     DeviceState qdev;
     /* PCI config space */
@@ -186,6 +191,24 @@ struct PCIDevice {
 
     /* Current IRQ levels.  Used internally by the generic PCI code.  */
     int irq_state[4];
+
+    /* Capability bits */
+    uint32_t cap_present;
+
+    /* Offset of MSI-X capability in config space */
+    uint8_t msix_cap;
+
+    /* MSI-X entries */
+    int msix_entries_nr;
+
+    /* Space to store MSIX table */
+    uint8_t *msix_table_page;
+    /* MMIO index used to map MSIX table and pending bit entries. */
+    int msix_mmio_index;
+    /* Reference-count for entries actually in use by driver. */
+    unsigned *msix_entry_used;
+    /* Region including the MSI-X table */
+    uint32_t msix_bar_size;
 };
 
 PCIDevice *pci_register_device(PCIBus *bus, const char *name,
-- 
1.6.2.2

^ permalink raw reply related	[flat|nested] 42+ messages in thread

* [PATCHv6 06/12] qemu/apic: minimal MSI/MSI-X implementation for PC
       [not found] <cover.1245594586.git.mst@redhat.com>
@ 2009-06-21 16:50   ` Michael S. Tsirkin
  2009-06-21 16:45   ` [Qemu-devel] " Michael S. Tsirkin
                     ` (22 subsequent siblings)
  23 siblings, 0 replies; 42+ messages in thread
From: Michael S. Tsirkin @ 2009-06-21 16:50 UTC (permalink / raw)
  To: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm, Rusty Russell, vi

Implement MSI support in APIC. Note that MSI and MMIO APIC registers
are at the same memory location, but actually not on the global bus: MSI
is on PCI bus, APIC is connected directly to the CPU. We map them on the
global bus at the same address which happens to work because MSI
registers are reserved in APIC MMIO and vice versa.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/apic.c |   43 +++++++++++++++++++++++++++++++++++++++----
 1 files changed, 39 insertions(+), 4 deletions(-)

diff --git a/hw/apic.c b/hw/apic.c
index 3e04132..3bcab46 100644
--- a/hw/apic.c
+++ b/hw/apic.c
@@ -19,6 +19,8 @@
  */
 #include "hw.h"
 #include "pc.h"
+#include "pci.h"
+#include "msix.h"
 #include "qemu-timer.h"
 #include "host-utils.h"
 
@@ -63,6 +65,19 @@
 #define MAX_APICS 255
 #define MAX_APIC_WORDS 8
 
+/* Intel APIC constants: from include/asm/msidef.h */
+#define MSI_DATA_VECTOR_SHIFT		0
+#define MSI_DATA_VECTOR_MASK		0x000000ff
+#define MSI_DATA_DELIVERY_MODE_SHIFT	8
+#define MSI_DATA_TRIGGER_SHIFT		15
+#define MSI_DATA_LEVEL_SHIFT		14
+#define MSI_ADDR_DEST_MODE_SHIFT	2
+#define MSI_ADDR_DEST_ID_SHIFT		12
+#define	MSI_ADDR_DEST_ID_MASK		0x00ffff0
+
+#define MSI_ADDR_BASE                   0xfee00000
+#define MSI_ADDR_SIZE                   0x100000
+
 typedef struct APICState {
     CPUState *cpu_env;
     uint32_t apicbase;
@@ -731,11 +746,31 @@ static uint32_t apic_mem_readl(void *opaque, target_phys_addr_t addr)
     return val;
 }
 
+static void apic_send_msi(target_phys_addr_t addr, uint32 data)
+{
+    uint8_t dest = (addr & MSI_ADDR_DEST_ID_MASK) >> MSI_ADDR_DEST_ID_SHIFT;
+    uint8_t vector = (data & MSI_DATA_VECTOR_MASK) >> MSI_DATA_VECTOR_SHIFT;
+    uint8_t dest_mode = (addr >> MSI_ADDR_DEST_MODE_SHIFT) & 0x1;
+    uint8_t trigger_mode = (data >> MSI_DATA_TRIGGER_SHIFT) & 0x1;
+    uint8_t delivery = (data >> MSI_DATA_DELIVERY_MODE_SHIFT) & 0x7;
+    /* XXX: Ignore redirection hint. */
+    apic_deliver_irq(dest, dest_mode, delivery, vector, 0, trigger_mode);
+}
+
 static void apic_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
 {
     CPUState *env;
     APICState *s;
-    int index;
+    int index = (addr >> 4) & 0xff;
+    if (addr > 0xfff || !index) {
+        /* MSI and MMIO APIC are at the same memory location,
+         * but actually not on the global bus: MSI is on PCI bus
+         * APIC is connected directly to the CPU.
+         * Mapping them on the global bus happens to work because
+         * MSI registers are reserved in APIC MMIO and vice versa. */
+        apic_send_msi(addr, val);
+        return;
+    }
 
     env = cpu_single_env;
     if (!env)
@@ -746,7 +781,6 @@ static void apic_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
     printf("APIC write: %08x = %08x\n", (uint32_t)addr, val);
 #endif
 
-    index = (addr >> 4) & 0xff;
     switch(index) {
     case 0x02:
         s->id = (val >> 24);
@@ -931,6 +965,7 @@ int apic_init(CPUState *env)
     s->cpu_env = env;
 
     apic_reset(s);
+    msix_supported = 1;
 
     /* XXX: mapping more APICs at the same memory location */
     if (apic_io_memory == 0) {
@@ -938,7 +973,8 @@ int apic_init(CPUState *env)
            on the global memory bus. */
         apic_io_memory = cpu_register_io_memory(apic_mem_read,
                                                 apic_mem_write, NULL);
-        cpu_register_physical_memory(s->apicbase & ~0xfff, 0x1000,
+        /* XXX: what if the base changes? */
+        cpu_register_physical_memory(MSI_ADDR_BASE, MSI_ADDR_SIZE,
                                      apic_io_memory);
     }
     s->timer = qemu_new_timer(vm_clock, apic_timer, s);
@@ -949,4 +985,3 @@ int apic_init(CPUState *env)
     local_apics[s->idx] = s;
     return 0;
 }
-
-- 
1.6.2.2


^ permalink raw reply related	[flat|nested] 42+ messages in thread

* [Qemu-devel] [PATCHv6 06/12] qemu/apic: minimal MSI/MSI-X implementation for PC
@ 2009-06-21 16:50   ` Michael S. Tsirkin
  0 siblings, 0 replies; 42+ messages in thread
From: Michael S. Tsirkin @ 2009-06-21 16:50 UTC (permalink / raw)
  To: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm,
	Rusty Russell, virtualization, Christian Borntraeger, Blue Swirl,
	Anthony Liguori, Glauber Costa

Implement MSI support in APIC. Note that MSI and MMIO APIC registers
are at the same memory location, but actually not on the global bus: MSI
is on PCI bus, APIC is connected directly to the CPU. We map them on the
global bus at the same address which happens to work because MSI
registers are reserved in APIC MMIO and vice versa.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/apic.c |   43 +++++++++++++++++++++++++++++++++++++++----
 1 files changed, 39 insertions(+), 4 deletions(-)

diff --git a/hw/apic.c b/hw/apic.c
index 3e04132..3bcab46 100644
--- a/hw/apic.c
+++ b/hw/apic.c
@@ -19,6 +19,8 @@
  */
 #include "hw.h"
 #include "pc.h"
+#include "pci.h"
+#include "msix.h"
 #include "qemu-timer.h"
 #include "host-utils.h"
 
@@ -63,6 +65,19 @@
 #define MAX_APICS 255
 #define MAX_APIC_WORDS 8
 
+/* Intel APIC constants: from include/asm/msidef.h */
+#define MSI_DATA_VECTOR_SHIFT		0
+#define MSI_DATA_VECTOR_MASK		0x000000ff
+#define MSI_DATA_DELIVERY_MODE_SHIFT	8
+#define MSI_DATA_TRIGGER_SHIFT		15
+#define MSI_DATA_LEVEL_SHIFT		14
+#define MSI_ADDR_DEST_MODE_SHIFT	2
+#define MSI_ADDR_DEST_ID_SHIFT		12
+#define	MSI_ADDR_DEST_ID_MASK		0x00ffff0
+
+#define MSI_ADDR_BASE                   0xfee00000
+#define MSI_ADDR_SIZE                   0x100000
+
 typedef struct APICState {
     CPUState *cpu_env;
     uint32_t apicbase;
@@ -731,11 +746,31 @@ static uint32_t apic_mem_readl(void *opaque, target_phys_addr_t addr)
     return val;
 }
 
+static void apic_send_msi(target_phys_addr_t addr, uint32 data)
+{
+    uint8_t dest = (addr & MSI_ADDR_DEST_ID_MASK) >> MSI_ADDR_DEST_ID_SHIFT;
+    uint8_t vector = (data & MSI_DATA_VECTOR_MASK) >> MSI_DATA_VECTOR_SHIFT;
+    uint8_t dest_mode = (addr >> MSI_ADDR_DEST_MODE_SHIFT) & 0x1;
+    uint8_t trigger_mode = (data >> MSI_DATA_TRIGGER_SHIFT) & 0x1;
+    uint8_t delivery = (data >> MSI_DATA_DELIVERY_MODE_SHIFT) & 0x7;
+    /* XXX: Ignore redirection hint. */
+    apic_deliver_irq(dest, dest_mode, delivery, vector, 0, trigger_mode);
+}
+
 static void apic_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
 {
     CPUState *env;
     APICState *s;
-    int index;
+    int index = (addr >> 4) & 0xff;
+    if (addr > 0xfff || !index) {
+        /* MSI and MMIO APIC are at the same memory location,
+         * but actually not on the global bus: MSI is on PCI bus
+         * APIC is connected directly to the CPU.
+         * Mapping them on the global bus happens to work because
+         * MSI registers are reserved in APIC MMIO and vice versa. */
+        apic_send_msi(addr, val);
+        return;
+    }
 
     env = cpu_single_env;
     if (!env)
@@ -746,7 +781,6 @@ static void apic_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
     printf("APIC write: %08x = %08x\n", (uint32_t)addr, val);
 #endif
 
-    index = (addr >> 4) & 0xff;
     switch(index) {
     case 0x02:
         s->id = (val >> 24);
@@ -931,6 +965,7 @@ int apic_init(CPUState *env)
     s->cpu_env = env;
 
     apic_reset(s);
+    msix_supported = 1;
 
     /* XXX: mapping more APICs at the same memory location */
     if (apic_io_memory == 0) {
@@ -938,7 +973,8 @@ int apic_init(CPUState *env)
            on the global memory bus. */
         apic_io_memory = cpu_register_io_memory(apic_mem_read,
                                                 apic_mem_write, NULL);
-        cpu_register_physical_memory(s->apicbase & ~0xfff, 0x1000,
+        /* XXX: what if the base changes? */
+        cpu_register_physical_memory(MSI_ADDR_BASE, MSI_ADDR_SIZE,
                                      apic_io_memory);
     }
     s->timer = qemu_new_timer(vm_clock, apic_timer, s);
@@ -949,4 +985,3 @@ int apic_init(CPUState *env)
     local_apics[s->idx] = s;
     return 0;
 }
-
-- 
1.6.2.2

^ permalink raw reply related	[flat|nested] 42+ messages in thread

* [PATCHv6 06/12] qemu/apic: minimal MSI/MSI-X implementation for PC
       [not found] <cover.1245594586.git.mst@redhat.com>
                   ` (9 preceding siblings ...)
  2009-06-21 16:49   ` [Qemu-devel] " Michael S. Tsirkin
@ 2009-06-21 16:50 ` Michael S. Tsirkin
  2009-06-21 16:50   ` [Qemu-devel] " Michael S. Tsirkin
                   ` (12 subsequent siblings)
  23 siblings, 0 replies; 42+ messages in thread
From: Michael S. Tsirkin @ 2009-06-21 16:50 UTC (permalink / raw)
  To: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm, Rusty Russell

Implement MSI support in APIC. Note that MSI and MMIO APIC registers
are at the same memory location, but actually not on the global bus: MSI
is on PCI bus, APIC is connected directly to the CPU. We map them on the
global bus at the same address which happens to work because MSI
registers are reserved in APIC MMIO and vice versa.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/apic.c |   43 +++++++++++++++++++++++++++++++++++++++----
 1 files changed, 39 insertions(+), 4 deletions(-)

diff --git a/hw/apic.c b/hw/apic.c
index 3e04132..3bcab46 100644
--- a/hw/apic.c
+++ b/hw/apic.c
@@ -19,6 +19,8 @@
  */
 #include "hw.h"
 #include "pc.h"
+#include "pci.h"
+#include "msix.h"
 #include "qemu-timer.h"
 #include "host-utils.h"
 
@@ -63,6 +65,19 @@
 #define MAX_APICS 255
 #define MAX_APIC_WORDS 8
 
+/* Intel APIC constants: from include/asm/msidef.h */
+#define MSI_DATA_VECTOR_SHIFT		0
+#define MSI_DATA_VECTOR_MASK		0x000000ff
+#define MSI_DATA_DELIVERY_MODE_SHIFT	8
+#define MSI_DATA_TRIGGER_SHIFT		15
+#define MSI_DATA_LEVEL_SHIFT		14
+#define MSI_ADDR_DEST_MODE_SHIFT	2
+#define MSI_ADDR_DEST_ID_SHIFT		12
+#define	MSI_ADDR_DEST_ID_MASK		0x00ffff0
+
+#define MSI_ADDR_BASE                   0xfee00000
+#define MSI_ADDR_SIZE                   0x100000
+
 typedef struct APICState {
     CPUState *cpu_env;
     uint32_t apicbase;
@@ -731,11 +746,31 @@ static uint32_t apic_mem_readl(void *opaque, target_phys_addr_t addr)
     return val;
 }
 
+static void apic_send_msi(target_phys_addr_t addr, uint32 data)
+{
+    uint8_t dest = (addr & MSI_ADDR_DEST_ID_MASK) >> MSI_ADDR_DEST_ID_SHIFT;
+    uint8_t vector = (data & MSI_DATA_VECTOR_MASK) >> MSI_DATA_VECTOR_SHIFT;
+    uint8_t dest_mode = (addr >> MSI_ADDR_DEST_MODE_SHIFT) & 0x1;
+    uint8_t trigger_mode = (data >> MSI_DATA_TRIGGER_SHIFT) & 0x1;
+    uint8_t delivery = (data >> MSI_DATA_DELIVERY_MODE_SHIFT) & 0x7;
+    /* XXX: Ignore redirection hint. */
+    apic_deliver_irq(dest, dest_mode, delivery, vector, 0, trigger_mode);
+}
+
 static void apic_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
 {
     CPUState *env;
     APICState *s;
-    int index;
+    int index = (addr >> 4) & 0xff;
+    if (addr > 0xfff || !index) {
+        /* MSI and MMIO APIC are at the same memory location,
+         * but actually not on the global bus: MSI is on PCI bus
+         * APIC is connected directly to the CPU.
+         * Mapping them on the global bus happens to work because
+         * MSI registers are reserved in APIC MMIO and vice versa. */
+        apic_send_msi(addr, val);
+        return;
+    }
 
     env = cpu_single_env;
     if (!env)
@@ -746,7 +781,6 @@ static void apic_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
     printf("APIC write: %08x = %08x\n", (uint32_t)addr, val);
 #endif
 
-    index = (addr >> 4) & 0xff;
     switch(index) {
     case 0x02:
         s->id = (val >> 24);
@@ -931,6 +965,7 @@ int apic_init(CPUState *env)
     s->cpu_env = env;
 
     apic_reset(s);
+    msix_supported = 1;
 
     /* XXX: mapping more APICs at the same memory location */
     if (apic_io_memory == 0) {
@@ -938,7 +973,8 @@ int apic_init(CPUState *env)
            on the global memory bus. */
         apic_io_memory = cpu_register_io_memory(apic_mem_read,
                                                 apic_mem_write, NULL);
-        cpu_register_physical_memory(s->apicbase & ~0xfff, 0x1000,
+        /* XXX: what if the base changes? */
+        cpu_register_physical_memory(MSI_ADDR_BASE, MSI_ADDR_SIZE,
                                      apic_io_memory);
     }
     s->timer = qemu_new_timer(vm_clock, apic_timer, s);
@@ -949,4 +985,3 @@ int apic_init(CPUState *env)
     local_apics[s->idx] = s;
     return 0;
 }
-
-- 
1.6.2.2

^ permalink raw reply related	[flat|nested] 42+ messages in thread

* [PATCHv6 07/12] qemu/virtio: virtio support for many interrupt vectors
       [not found] <cover.1245594586.git.mst@redhat.com>
@ 2009-06-21 16:50   ` Michael S. Tsirkin
  2009-06-21 16:45   ` [Qemu-devel] " Michael S. Tsirkin
                     ` (22 subsequent siblings)
  23 siblings, 0 replies; 42+ messages in thread
From: Michael S. Tsirkin @ 2009-06-21 16:50 UTC (permalink / raw)
  To: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm, Rusty Russell, vi

Extend virtio to support many interrupt vectors, and rearrange code in
preparation for multi-vector support (mostly move reset out to bindings,
because we will have to reset the vectors in transport-specific code).
Actual bindings in pci, and use in net, to follow.
Load and save are not connected to bindings yet, so they are left
stubbed out for now.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/syborg_virtio.c |   13 ++++++++--
 hw/virtio-pci.c    |   24 ++++++++++++++++----
 hw/virtio.c        |   59 ++++++++++++++++++++++++++++++++++++++-------------
 hw/virtio.h        |   10 +++++++-
 4 files changed, 81 insertions(+), 25 deletions(-)

diff --git a/hw/syborg_virtio.c b/hw/syborg_virtio.c
index 8e665c6..108af06 100644
--- a/hw/syborg_virtio.c
+++ b/hw/syborg_virtio.c
@@ -134,7 +134,10 @@ static void syborg_virtio_writel(void *opaque, target_phys_addr_t offset,
         vdev->features = value;
         break;
     case SYBORG_VIRTIO_QUEUE_BASE:
-        virtio_queue_set_addr(vdev, vdev->queue_sel, value);
+        if (value == 0)
+            virtio_reset(vdev);
+        else
+            virtio_queue_set_addr(vdev, vdev->queue_sel, value);
         break;
     case SYBORG_VIRTIO_QUEUE_SEL:
         if (value < VIRTIO_PCI_QUEUE_MAX)
@@ -228,7 +231,7 @@ static CPUWriteMemoryFunc *syborg_virtio_writefn[] = {
      syborg_virtio_writel
 };
 
-static void syborg_virtio_update_irq(void *opaque)
+static void syborg_virtio_update_irq(void *opaque, uint16_t vector)
 {
     SyborgVirtIOProxy *proxy = opaque;
     int level;
@@ -239,7 +242,7 @@ static void syborg_virtio_update_irq(void *opaque)
 }
 
 static VirtIOBindings syborg_virtio_bindings = {
-    .update_irq = syborg_virtio_update_irq
+    .notify = syborg_virtio_update_irq
 };
 
 static void syborg_virtio_init(SyborgVirtIOProxy *proxy, VirtIODevice *vdev)
@@ -248,6 +251,8 @@ static void syborg_virtio_init(SyborgVirtIOProxy *proxy, VirtIODevice *vdev)
 
     proxy->vdev = vdev;
 
+    /* Don't support multiple vectors */
+    proxy->vdev->nvectors = 0;
     sysbus_init_irq(&proxy->busdev, &proxy->irq);
     iomemtype = cpu_register_io_memory(syborg_virtio_readfn,
                                        syborg_virtio_writefn, proxy);
@@ -255,6 +260,8 @@ static void syborg_virtio_init(SyborgVirtIOProxy *proxy, VirtIODevice *vdev)
 
     proxy->id = ((uint32_t)0x1af4 << 16) | vdev->device_id;
 
+    qemu_register_reset(virtio_reset, 0, vdev);
+
     virtio_bind_device(vdev, &syborg_virtio_bindings, proxy);
 }
 
diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c
index 24fe837..d4a134d 100644
--- a/hw/virtio-pci.c
+++ b/hw/virtio-pci.c
@@ -78,13 +78,19 @@ typedef struct {
 
 /* virtio device */
 
-static void virtio_pci_update_irq(void *opaque)
+static void virtio_pci_notify(void *opaque, uint16_t vector)
 {
     VirtIOPCIProxy *proxy = opaque;
 
     qemu_set_irq(proxy->pci_dev.irq[0], proxy->vdev->isr & 1);
 }
 
+static void virtio_pci_reset(void *opaque)
+{
+    VirtIOPCIProxy *proxy = opaque;
+    virtio_reset(proxy->vdev);
+}
+
 static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val)
 {
     VirtIOPCIProxy *proxy = opaque;
@@ -108,7 +114,10 @@ static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val)
         break;
     case VIRTIO_PCI_QUEUE_PFN:
         pa = (target_phys_addr_t)val << VIRTIO_PCI_QUEUE_ADDR_SHIFT;
-        virtio_queue_set_addr(vdev, vdev->queue_sel, pa);
+        if (pa == 0)
+            virtio_pci_reset(proxy);
+        else
+            virtio_queue_set_addr(vdev, vdev->queue_sel, pa);
         break;
     case VIRTIO_PCI_QUEUE_SEL:
         if (val < VIRTIO_PCI_QUEUE_MAX)
@@ -120,7 +129,7 @@ static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val)
     case VIRTIO_PCI_STATUS:
         vdev->status = val & 0xFF;
         if (vdev->status == 0)
-            virtio_reset(vdev);
+            virtio_pci_reset(proxy);
         break;
     }
 }
@@ -158,7 +167,7 @@ static uint32_t virtio_ioport_read(void *opaque, uint32_t addr)
         /* reading from the ISR also clears it. */
         ret = vdev->isr;
         vdev->isr = 0;
-        virtio_update_irq(vdev);
+        qemu_set_irq(proxy->pci_dev.irq[0], 0);
         break;
     default:
         break;
@@ -243,7 +252,7 @@ static void virtio_map(PCIDevice *pci_dev, int region_num,
 }
 
 static const VirtIOBindings virtio_pci_bindings = {
-    .update_irq = virtio_pci_update_irq
+    .notify = virtio_pci_notify
 };
 
 static void virtio_init_pci(VirtIOPCIProxy *proxy, VirtIODevice *vdev,
@@ -255,6 +264,9 @@ static void virtio_init_pci(VirtIOPCIProxy *proxy, VirtIODevice *vdev,
 
     proxy->vdev = vdev;
 
+    /* No support for multiple vectors yet. */
+    proxy->vdev->nvectors = 0;
+
     config = proxy->pci_dev.config;
     pci_config_set_vendor_id(config, vendor);
     pci_config_set_device_id(config, device);
@@ -279,6 +291,8 @@ static void virtio_init_pci(VirtIOPCIProxy *proxy, VirtIODevice *vdev,
     pci_register_bar(&proxy->pci_dev, 0, size, PCI_ADDRESS_SPACE_IO,
                            virtio_map);
 
+    qemu_register_reset(virtio_pci_reset, 0, proxy);
+
     virtio_bind_device(vdev, &virtio_pci_bindings, proxy);
 }
 
diff --git a/hw/virtio.c b/hw/virtio.c
index 45a49fa..fe9f793 100644
--- a/hw/virtio.c
+++ b/hw/virtio.c
@@ -68,6 +68,7 @@ struct VirtQueue
     target_phys_addr_t pa;
     uint16_t last_avail_idx;
     int inuse;
+    uint16_t vector;
     void (*handle_output)(VirtIODevice *vdev, VirtQueue *vq);
 };
 
@@ -373,12 +374,16 @@ int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem)
 }
 
 /* virtio device */
+static void virtio_notify_vector(VirtIODevice *vdev, uint16_t vector)
+{
+    if (vdev->binding->notify) {
+        vdev->binding->notify(vdev->binding_opaque, vector);
+    }
+}
 
 void virtio_update_irq(VirtIODevice *vdev)
 {
-    if (vdev->binding->update_irq) {
-        vdev->binding->update_irq(vdev->binding_opaque);
-    }
+    virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
 }
 
 void virtio_reset(void *opaque)
@@ -393,7 +398,8 @@ void virtio_reset(void *opaque)
     vdev->queue_sel = 0;
     vdev->status = 0;
     vdev->isr = 0;
-    virtio_update_irq(vdev);
+    vdev->config_vector = VIRTIO_NO_VECTOR;
+    virtio_notify_vector(vdev, vdev->config_vector);
 
     for(i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
         vdev->vq[i].vring.desc = 0;
@@ -401,6 +407,7 @@ void virtio_reset(void *opaque)
         vdev->vq[i].vring.used = 0;
         vdev->vq[i].last_avail_idx = 0;
         vdev->vq[i].pa = 0;
+        vdev->vq[i].vector = VIRTIO_NO_VECTOR;
     }
 }
 
@@ -484,12 +491,8 @@ void virtio_config_writel(VirtIODevice *vdev, uint32_t addr, uint32_t data)
 
 void virtio_queue_set_addr(VirtIODevice *vdev, int n, target_phys_addr_t addr)
 {
-    if (addr == 0) {
-        virtio_reset(vdev);
-    } else {
-        vdev->vq[n].pa = addr;
-        virtqueue_init(&vdev->vq[n]);
-    }
+    vdev->vq[n].pa = addr;
+    virtqueue_init(&vdev->vq[n]);
 }
 
 target_phys_addr_t virtio_queue_get_addr(VirtIODevice *vdev, int n)
@@ -509,6 +512,18 @@ void virtio_queue_notify(VirtIODevice *vdev, int n)
     }
 }
 
+uint16_t virtio_queue_vector(VirtIODevice *vdev, int n)
+{
+    return n < VIRTIO_PCI_QUEUE_MAX ? vdev->vq[n].vector :
+        VIRTIO_NO_VECTOR;
+}
+
+void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector)
+{
+    if (n < VIRTIO_PCI_QUEUE_MAX)
+        vdev->vq[n].vector = vector;
+}
+
 VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
                             void (*handle_output)(VirtIODevice *, VirtQueue *))
 {
@@ -537,7 +552,7 @@ void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
         return;
 
     vdev->isr |= 0x01;
-    virtio_update_irq(vdev);
+    virtio_notify_vector(vdev, vq->vector);
 }
 
 void virtio_notify_config(VirtIODevice *vdev)
@@ -546,7 +561,7 @@ void virtio_notify_config(VirtIODevice *vdev)
         return;
 
     vdev->isr |= 0x03;
-    virtio_update_irq(vdev);
+    virtio_notify_vector(vdev, vdev->config_vector);
 }
 
 void virtio_save(VirtIODevice *vdev, QEMUFile *f)
@@ -555,6 +570,7 @@ void virtio_save(VirtIODevice *vdev, QEMUFile *f)
 
     /* FIXME: load/save binding.  */
     //pci_device_save(&vdev->pci_dev, f);
+    //msix_save(&vdev->pci_dev, f);
 
     qemu_put_8s(f, &vdev->status);
     qemu_put_8s(f, &vdev->isr);
@@ -563,6 +579,9 @@ void virtio_save(VirtIODevice *vdev, QEMUFile *f)
     qemu_put_be32(f, vdev->config_len);
     qemu_put_buffer(f, vdev->config, vdev->config_len);
 
+    if (vdev->nvectors)
+        qemu_put_be16s(f, &vdev->config_vector);
+
     for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
         if (vdev->vq[i].vring.num == 0)
             break;
@@ -577,6 +596,8 @@ void virtio_save(VirtIODevice *vdev, QEMUFile *f)
         qemu_put_be32(f, vdev->vq[i].vring.num);
         qemu_put_be64(f, vdev->vq[i].pa);
         qemu_put_be16s(f, &vdev->vq[i].last_avail_idx);
+        if (vdev->nvectors)
+            qemu_put_be16s(f, &vdev->vq[i].vector);
     }
 }
 
@@ -586,6 +607,7 @@ void virtio_load(VirtIODevice *vdev, QEMUFile *f)
 
     /* FIXME: load/save binding.  */
     //pci_device_load(&vdev->pci_dev, f);
+    //r = msix_load(&vdev->pci_dev, f);
 
     qemu_get_8s(f, &vdev->status);
     qemu_get_8s(f, &vdev->isr);
@@ -594,6 +616,10 @@ void virtio_load(VirtIODevice *vdev, QEMUFile *f)
     vdev->config_len = qemu_get_be32(f);
     qemu_get_buffer(f, vdev->config, vdev->config_len);
 
+    if (vdev->nvectors) {
+        qemu_get_be16s(f, &vdev->config_vector);
+        //msix_vector_use(&vdev->pci_dev, vdev->config_vector);
+    }
     num = qemu_get_be32(f);
 
     for (i = 0; i < num; i++) {
@@ -604,9 +630,13 @@ void virtio_load(VirtIODevice *vdev, QEMUFile *f)
         if (vdev->vq[i].pa) {
             virtqueue_init(&vdev->vq[i]);
         }
+        if (vdev->nvectors) {
+            qemu_get_be16s(f, &vdev->vq[i].vector);
+            //msix_vector_use(&vdev->pci_dev, vdev->config_vector);
+        }
     }
 
-    virtio_update_irq(vdev);
+    virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
 }
 
 void virtio_cleanup(VirtIODevice *vdev)
@@ -627,6 +657,7 @@ VirtIODevice *virtio_common_init(const char *name, uint16_t device_id,
     vdev->status = 0;
     vdev->isr = 0;
     vdev->queue_sel = 0;
+    vdev->config_vector = VIRTIO_NO_VECTOR;
     vdev->vq = qemu_mallocz(sizeof(VirtQueue) * VIRTIO_PCI_QUEUE_MAX);
 
     vdev->name = name;
@@ -636,8 +667,6 @@ VirtIODevice *virtio_common_init(const char *name, uint16_t device_id,
     else
         vdev->config = NULL;
 
-    qemu_register_reset(virtio_reset, 0, vdev);
-
     return vdev;
 }
 
diff --git a/hw/virtio.h b/hw/virtio.h
index 425727e..04a3c3d 100644
--- a/hw/virtio.h
+++ b/hw/virtio.h
@@ -71,11 +71,13 @@ typedef struct VirtQueueElement
 } VirtQueueElement;
 
 typedef struct {
-    void (*update_irq)(void * opaque);
+    void (*notify)(void * opaque, uint16_t vector);
 } VirtIOBindings;
 
 #define VIRTIO_PCI_QUEUE_MAX 16
 
+#define VIRTIO_NO_VECTOR 0xffff
+
 struct VirtIODevice
 {
     const char *name;
@@ -85,6 +87,8 @@ struct VirtIODevice
     uint32_t features;
     size_t config_len;
     void *config;
+    uint16_t config_vector;
+    int nvectors;
     uint32_t (*get_features)(VirtIODevice *vdev);
     uint32_t (*bad_features)(VirtIODevice *vdev);
     void (*set_features)(VirtIODevice *vdev, uint32_t val);
@@ -114,7 +118,7 @@ void virtio_notify(VirtIODevice *vdev, VirtQueue *vq);
 
 void virtio_save(VirtIODevice *vdev, QEMUFile *f);
 
-void virtio_load(VirtIODevice *vdev, QEMUFile *f);
+int virtio_load(VirtIODevice *vdev, QEMUFile *f);
 
 void virtio_cleanup(VirtIODevice *vdev);
 
@@ -140,6 +144,8 @@ void virtio_queue_set_addr(VirtIODevice *vdev, int n, target_phys_addr_t addr);
 target_phys_addr_t virtio_queue_get_addr(VirtIODevice *vdev, int n);
 int virtio_queue_get_num(VirtIODevice *vdev, int n);
 void virtio_queue_notify(VirtIODevice *vdev, int n);
+uint16_t virtio_queue_vector(VirtIODevice *vdev, int n);
+void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector);
 void virtio_reset(void *opaque);
 void virtio_update_irq(VirtIODevice *vdev);
 
-- 
1.6.2.2


^ permalink raw reply related	[flat|nested] 42+ messages in thread

* [Qemu-devel] [PATCHv6 07/12] qemu/virtio: virtio support for many interrupt vectors
@ 2009-06-21 16:50   ` Michael S. Tsirkin
  0 siblings, 0 replies; 42+ messages in thread
From: Michael S. Tsirkin @ 2009-06-21 16:50 UTC (permalink / raw)
  To: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm,
	Rusty Russell, virtualization, Christian Borntraeger, Blue Swirl,
	Anthony Liguori, Glauber Costa

Extend virtio to support many interrupt vectors, and rearrange code in
preparation for multi-vector support (mostly move reset out to bindings,
because we will have to reset the vectors in transport-specific code).
Actual bindings in pci, and use in net, to follow.
Load and save are not connected to bindings yet, so they are left
stubbed out for now.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/syborg_virtio.c |   13 ++++++++--
 hw/virtio-pci.c    |   24 ++++++++++++++++----
 hw/virtio.c        |   59 ++++++++++++++++++++++++++++++++++++++-------------
 hw/virtio.h        |   10 +++++++-
 4 files changed, 81 insertions(+), 25 deletions(-)

diff --git a/hw/syborg_virtio.c b/hw/syborg_virtio.c
index 8e665c6..108af06 100644
--- a/hw/syborg_virtio.c
+++ b/hw/syborg_virtio.c
@@ -134,7 +134,10 @@ static void syborg_virtio_writel(void *opaque, target_phys_addr_t offset,
         vdev->features = value;
         break;
     case SYBORG_VIRTIO_QUEUE_BASE:
-        virtio_queue_set_addr(vdev, vdev->queue_sel, value);
+        if (value == 0)
+            virtio_reset(vdev);
+        else
+            virtio_queue_set_addr(vdev, vdev->queue_sel, value);
         break;
     case SYBORG_VIRTIO_QUEUE_SEL:
         if (value < VIRTIO_PCI_QUEUE_MAX)
@@ -228,7 +231,7 @@ static CPUWriteMemoryFunc *syborg_virtio_writefn[] = {
      syborg_virtio_writel
 };
 
-static void syborg_virtio_update_irq(void *opaque)
+static void syborg_virtio_update_irq(void *opaque, uint16_t vector)
 {
     SyborgVirtIOProxy *proxy = opaque;
     int level;
@@ -239,7 +242,7 @@ static void syborg_virtio_update_irq(void *opaque)
 }
 
 static VirtIOBindings syborg_virtio_bindings = {
-    .update_irq = syborg_virtio_update_irq
+    .notify = syborg_virtio_update_irq
 };
 
 static void syborg_virtio_init(SyborgVirtIOProxy *proxy, VirtIODevice *vdev)
@@ -248,6 +251,8 @@ static void syborg_virtio_init(SyborgVirtIOProxy *proxy, VirtIODevice *vdev)
 
     proxy->vdev = vdev;
 
+    /* Don't support multiple vectors */
+    proxy->vdev->nvectors = 0;
     sysbus_init_irq(&proxy->busdev, &proxy->irq);
     iomemtype = cpu_register_io_memory(syborg_virtio_readfn,
                                        syborg_virtio_writefn, proxy);
@@ -255,6 +260,8 @@ static void syborg_virtio_init(SyborgVirtIOProxy *proxy, VirtIODevice *vdev)
 
     proxy->id = ((uint32_t)0x1af4 << 16) | vdev->device_id;
 
+    qemu_register_reset(virtio_reset, 0, vdev);
+
     virtio_bind_device(vdev, &syborg_virtio_bindings, proxy);
 }
 
diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c
index 24fe837..d4a134d 100644
--- a/hw/virtio-pci.c
+++ b/hw/virtio-pci.c
@@ -78,13 +78,19 @@ typedef struct {
 
 /* virtio device */
 
-static void virtio_pci_update_irq(void *opaque)
+static void virtio_pci_notify(void *opaque, uint16_t vector)
 {
     VirtIOPCIProxy *proxy = opaque;
 
     qemu_set_irq(proxy->pci_dev.irq[0], proxy->vdev->isr & 1);
 }
 
+static void virtio_pci_reset(void *opaque)
+{
+    VirtIOPCIProxy *proxy = opaque;
+    virtio_reset(proxy->vdev);
+}
+
 static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val)
 {
     VirtIOPCIProxy *proxy = opaque;
@@ -108,7 +114,10 @@ static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val)
         break;
     case VIRTIO_PCI_QUEUE_PFN:
         pa = (target_phys_addr_t)val << VIRTIO_PCI_QUEUE_ADDR_SHIFT;
-        virtio_queue_set_addr(vdev, vdev->queue_sel, pa);
+        if (pa == 0)
+            virtio_pci_reset(proxy);
+        else
+            virtio_queue_set_addr(vdev, vdev->queue_sel, pa);
         break;
     case VIRTIO_PCI_QUEUE_SEL:
         if (val < VIRTIO_PCI_QUEUE_MAX)
@@ -120,7 +129,7 @@ static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val)
     case VIRTIO_PCI_STATUS:
         vdev->status = val & 0xFF;
         if (vdev->status == 0)
-            virtio_reset(vdev);
+            virtio_pci_reset(proxy);
         break;
     }
 }
@@ -158,7 +167,7 @@ static uint32_t virtio_ioport_read(void *opaque, uint32_t addr)
         /* reading from the ISR also clears it. */
         ret = vdev->isr;
         vdev->isr = 0;
-        virtio_update_irq(vdev);
+        qemu_set_irq(proxy->pci_dev.irq[0], 0);
         break;
     default:
         break;
@@ -243,7 +252,7 @@ static void virtio_map(PCIDevice *pci_dev, int region_num,
 }
 
 static const VirtIOBindings virtio_pci_bindings = {
-    .update_irq = virtio_pci_update_irq
+    .notify = virtio_pci_notify
 };
 
 static void virtio_init_pci(VirtIOPCIProxy *proxy, VirtIODevice *vdev,
@@ -255,6 +264,9 @@ static void virtio_init_pci(VirtIOPCIProxy *proxy, VirtIODevice *vdev,
 
     proxy->vdev = vdev;
 
+    /* No support for multiple vectors yet. */
+    proxy->vdev->nvectors = 0;
+
     config = proxy->pci_dev.config;
     pci_config_set_vendor_id(config, vendor);
     pci_config_set_device_id(config, device);
@@ -279,6 +291,8 @@ static void virtio_init_pci(VirtIOPCIProxy *proxy, VirtIODevice *vdev,
     pci_register_bar(&proxy->pci_dev, 0, size, PCI_ADDRESS_SPACE_IO,
                            virtio_map);
 
+    qemu_register_reset(virtio_pci_reset, 0, proxy);
+
     virtio_bind_device(vdev, &virtio_pci_bindings, proxy);
 }
 
diff --git a/hw/virtio.c b/hw/virtio.c
index 45a49fa..fe9f793 100644
--- a/hw/virtio.c
+++ b/hw/virtio.c
@@ -68,6 +68,7 @@ struct VirtQueue
     target_phys_addr_t pa;
     uint16_t last_avail_idx;
     int inuse;
+    uint16_t vector;
     void (*handle_output)(VirtIODevice *vdev, VirtQueue *vq);
 };
 
@@ -373,12 +374,16 @@ int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem)
 }
 
 /* virtio device */
+static void virtio_notify_vector(VirtIODevice *vdev, uint16_t vector)
+{
+    if (vdev->binding->notify) {
+        vdev->binding->notify(vdev->binding_opaque, vector);
+    }
+}
 
 void virtio_update_irq(VirtIODevice *vdev)
 {
-    if (vdev->binding->update_irq) {
-        vdev->binding->update_irq(vdev->binding_opaque);
-    }
+    virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
 }
 
 void virtio_reset(void *opaque)
@@ -393,7 +398,8 @@ void virtio_reset(void *opaque)
     vdev->queue_sel = 0;
     vdev->status = 0;
     vdev->isr = 0;
-    virtio_update_irq(vdev);
+    vdev->config_vector = VIRTIO_NO_VECTOR;
+    virtio_notify_vector(vdev, vdev->config_vector);
 
     for(i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
         vdev->vq[i].vring.desc = 0;
@@ -401,6 +407,7 @@ void virtio_reset(void *opaque)
         vdev->vq[i].vring.used = 0;
         vdev->vq[i].last_avail_idx = 0;
         vdev->vq[i].pa = 0;
+        vdev->vq[i].vector = VIRTIO_NO_VECTOR;
     }
 }
 
@@ -484,12 +491,8 @@ void virtio_config_writel(VirtIODevice *vdev, uint32_t addr, uint32_t data)
 
 void virtio_queue_set_addr(VirtIODevice *vdev, int n, target_phys_addr_t addr)
 {
-    if (addr == 0) {
-        virtio_reset(vdev);
-    } else {
-        vdev->vq[n].pa = addr;
-        virtqueue_init(&vdev->vq[n]);
-    }
+    vdev->vq[n].pa = addr;
+    virtqueue_init(&vdev->vq[n]);
 }
 
 target_phys_addr_t virtio_queue_get_addr(VirtIODevice *vdev, int n)
@@ -509,6 +512,18 @@ void virtio_queue_notify(VirtIODevice *vdev, int n)
     }
 }
 
+uint16_t virtio_queue_vector(VirtIODevice *vdev, int n)
+{
+    return n < VIRTIO_PCI_QUEUE_MAX ? vdev->vq[n].vector :
+        VIRTIO_NO_VECTOR;
+}
+
+void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector)
+{
+    if (n < VIRTIO_PCI_QUEUE_MAX)
+        vdev->vq[n].vector = vector;
+}
+
 VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
                             void (*handle_output)(VirtIODevice *, VirtQueue *))
 {
@@ -537,7 +552,7 @@ void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
         return;
 
     vdev->isr |= 0x01;
-    virtio_update_irq(vdev);
+    virtio_notify_vector(vdev, vq->vector);
 }
 
 void virtio_notify_config(VirtIODevice *vdev)
@@ -546,7 +561,7 @@ void virtio_notify_config(VirtIODevice *vdev)
         return;
 
     vdev->isr |= 0x03;
-    virtio_update_irq(vdev);
+    virtio_notify_vector(vdev, vdev->config_vector);
 }
 
 void virtio_save(VirtIODevice *vdev, QEMUFile *f)
@@ -555,6 +570,7 @@ void virtio_save(VirtIODevice *vdev, QEMUFile *f)
 
     /* FIXME: load/save binding.  */
     //pci_device_save(&vdev->pci_dev, f);
+    //msix_save(&vdev->pci_dev, f);
 
     qemu_put_8s(f, &vdev->status);
     qemu_put_8s(f, &vdev->isr);
@@ -563,6 +579,9 @@ void virtio_save(VirtIODevice *vdev, QEMUFile *f)
     qemu_put_be32(f, vdev->config_len);
     qemu_put_buffer(f, vdev->config, vdev->config_len);
 
+    if (vdev->nvectors)
+        qemu_put_be16s(f, &vdev->config_vector);
+
     for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
         if (vdev->vq[i].vring.num == 0)
             break;
@@ -577,6 +596,8 @@ void virtio_save(VirtIODevice *vdev, QEMUFile *f)
         qemu_put_be32(f, vdev->vq[i].vring.num);
         qemu_put_be64(f, vdev->vq[i].pa);
         qemu_put_be16s(f, &vdev->vq[i].last_avail_idx);
+        if (vdev->nvectors)
+            qemu_put_be16s(f, &vdev->vq[i].vector);
     }
 }
 
@@ -586,6 +607,7 @@ void virtio_load(VirtIODevice *vdev, QEMUFile *f)
 
     /* FIXME: load/save binding.  */
     //pci_device_load(&vdev->pci_dev, f);
+    //r = msix_load(&vdev->pci_dev, f);
 
     qemu_get_8s(f, &vdev->status);
     qemu_get_8s(f, &vdev->isr);
@@ -594,6 +616,10 @@ void virtio_load(VirtIODevice *vdev, QEMUFile *f)
     vdev->config_len = qemu_get_be32(f);
     qemu_get_buffer(f, vdev->config, vdev->config_len);
 
+    if (vdev->nvectors) {
+        qemu_get_be16s(f, &vdev->config_vector);
+        //msix_vector_use(&vdev->pci_dev, vdev->config_vector);
+    }
     num = qemu_get_be32(f);
 
     for (i = 0; i < num; i++) {
@@ -604,9 +630,13 @@ void virtio_load(VirtIODevice *vdev, QEMUFile *f)
         if (vdev->vq[i].pa) {
             virtqueue_init(&vdev->vq[i]);
         }
+        if (vdev->nvectors) {
+            qemu_get_be16s(f, &vdev->vq[i].vector);
+            //msix_vector_use(&vdev->pci_dev, vdev->config_vector);
+        }
     }
 
-    virtio_update_irq(vdev);
+    virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
 }
 
 void virtio_cleanup(VirtIODevice *vdev)
@@ -627,6 +657,7 @@ VirtIODevice *virtio_common_init(const char *name, uint16_t device_id,
     vdev->status = 0;
     vdev->isr = 0;
     vdev->queue_sel = 0;
+    vdev->config_vector = VIRTIO_NO_VECTOR;
     vdev->vq = qemu_mallocz(sizeof(VirtQueue) * VIRTIO_PCI_QUEUE_MAX);
 
     vdev->name = name;
@@ -636,8 +667,6 @@ VirtIODevice *virtio_common_init(const char *name, uint16_t device_id,
     else
         vdev->config = NULL;
 
-    qemu_register_reset(virtio_reset, 0, vdev);
-
     return vdev;
 }
 
diff --git a/hw/virtio.h b/hw/virtio.h
index 425727e..04a3c3d 100644
--- a/hw/virtio.h
+++ b/hw/virtio.h
@@ -71,11 +71,13 @@ typedef struct VirtQueueElement
 } VirtQueueElement;
 
 typedef struct {
-    void (*update_irq)(void * opaque);
+    void (*notify)(void * opaque, uint16_t vector);
 } VirtIOBindings;
 
 #define VIRTIO_PCI_QUEUE_MAX 16
 
+#define VIRTIO_NO_VECTOR 0xffff
+
 struct VirtIODevice
 {
     const char *name;
@@ -85,6 +87,8 @@ struct VirtIODevice
     uint32_t features;
     size_t config_len;
     void *config;
+    uint16_t config_vector;
+    int nvectors;
     uint32_t (*get_features)(VirtIODevice *vdev);
     uint32_t (*bad_features)(VirtIODevice *vdev);
     void (*set_features)(VirtIODevice *vdev, uint32_t val);
@@ -114,7 +118,7 @@ void virtio_notify(VirtIODevice *vdev, VirtQueue *vq);
 
 void virtio_save(VirtIODevice *vdev, QEMUFile *f);
 
-void virtio_load(VirtIODevice *vdev, QEMUFile *f);
+int virtio_load(VirtIODevice *vdev, QEMUFile *f);
 
 void virtio_cleanup(VirtIODevice *vdev);
 
@@ -140,6 +144,8 @@ void virtio_queue_set_addr(VirtIODevice *vdev, int n, target_phys_addr_t addr);
 target_phys_addr_t virtio_queue_get_addr(VirtIODevice *vdev, int n);
 int virtio_queue_get_num(VirtIODevice *vdev, int n);
 void virtio_queue_notify(VirtIODevice *vdev, int n);
+uint16_t virtio_queue_vector(VirtIODevice *vdev, int n);
+void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector);
 void virtio_reset(void *opaque);
 void virtio_update_irq(VirtIODevice *vdev);
 
-- 
1.6.2.2

^ permalink raw reply related	[flat|nested] 42+ messages in thread

* [PATCHv6 07/12] qemu/virtio: virtio support for many interrupt vectors
       [not found] <cover.1245594586.git.mst@redhat.com>
                   ` (11 preceding siblings ...)
  2009-06-21 16:50   ` [Qemu-devel] " Michael S. Tsirkin
@ 2009-06-21 16:50 ` Michael S. Tsirkin
  2009-06-21 16:50   ` [Qemu-devel] " Michael S. Tsirkin
                   ` (10 subsequent siblings)
  23 siblings, 0 replies; 42+ messages in thread
From: Michael S. Tsirkin @ 2009-06-21 16:50 UTC (permalink / raw)
  To: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm, Rusty Russell

Extend virtio to support many interrupt vectors, and rearrange code in
preparation for multi-vector support (mostly move reset out to bindings,
because we will have to reset the vectors in transport-specific code).
Actual bindings in pci, and use in net, to follow.
Load and save are not connected to bindings yet, so they are left
stubbed out for now.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/syborg_virtio.c |   13 ++++++++--
 hw/virtio-pci.c    |   24 ++++++++++++++++----
 hw/virtio.c        |   59 ++++++++++++++++++++++++++++++++++++++-------------
 hw/virtio.h        |   10 +++++++-
 4 files changed, 81 insertions(+), 25 deletions(-)

diff --git a/hw/syborg_virtio.c b/hw/syborg_virtio.c
index 8e665c6..108af06 100644
--- a/hw/syborg_virtio.c
+++ b/hw/syborg_virtio.c
@@ -134,7 +134,10 @@ static void syborg_virtio_writel(void *opaque, target_phys_addr_t offset,
         vdev->features = value;
         break;
     case SYBORG_VIRTIO_QUEUE_BASE:
-        virtio_queue_set_addr(vdev, vdev->queue_sel, value);
+        if (value == 0)
+            virtio_reset(vdev);
+        else
+            virtio_queue_set_addr(vdev, vdev->queue_sel, value);
         break;
     case SYBORG_VIRTIO_QUEUE_SEL:
         if (value < VIRTIO_PCI_QUEUE_MAX)
@@ -228,7 +231,7 @@ static CPUWriteMemoryFunc *syborg_virtio_writefn[] = {
      syborg_virtio_writel
 };
 
-static void syborg_virtio_update_irq(void *opaque)
+static void syborg_virtio_update_irq(void *opaque, uint16_t vector)
 {
     SyborgVirtIOProxy *proxy = opaque;
     int level;
@@ -239,7 +242,7 @@ static void syborg_virtio_update_irq(void *opaque)
 }
 
 static VirtIOBindings syborg_virtio_bindings = {
-    .update_irq = syborg_virtio_update_irq
+    .notify = syborg_virtio_update_irq
 };
 
 static void syborg_virtio_init(SyborgVirtIOProxy *proxy, VirtIODevice *vdev)
@@ -248,6 +251,8 @@ static void syborg_virtio_init(SyborgVirtIOProxy *proxy, VirtIODevice *vdev)
 
     proxy->vdev = vdev;
 
+    /* Don't support multiple vectors */
+    proxy->vdev->nvectors = 0;
     sysbus_init_irq(&proxy->busdev, &proxy->irq);
     iomemtype = cpu_register_io_memory(syborg_virtio_readfn,
                                        syborg_virtio_writefn, proxy);
@@ -255,6 +260,8 @@ static void syborg_virtio_init(SyborgVirtIOProxy *proxy, VirtIODevice *vdev)
 
     proxy->id = ((uint32_t)0x1af4 << 16) | vdev->device_id;
 
+    qemu_register_reset(virtio_reset, 0, vdev);
+
     virtio_bind_device(vdev, &syborg_virtio_bindings, proxy);
 }
 
diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c
index 24fe837..d4a134d 100644
--- a/hw/virtio-pci.c
+++ b/hw/virtio-pci.c
@@ -78,13 +78,19 @@ typedef struct {
 
 /* virtio device */
 
-static void virtio_pci_update_irq(void *opaque)
+static void virtio_pci_notify(void *opaque, uint16_t vector)
 {
     VirtIOPCIProxy *proxy = opaque;
 
     qemu_set_irq(proxy->pci_dev.irq[0], proxy->vdev->isr & 1);
 }
 
+static void virtio_pci_reset(void *opaque)
+{
+    VirtIOPCIProxy *proxy = opaque;
+    virtio_reset(proxy->vdev);
+}
+
 static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val)
 {
     VirtIOPCIProxy *proxy = opaque;
@@ -108,7 +114,10 @@ static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val)
         break;
     case VIRTIO_PCI_QUEUE_PFN:
         pa = (target_phys_addr_t)val << VIRTIO_PCI_QUEUE_ADDR_SHIFT;
-        virtio_queue_set_addr(vdev, vdev->queue_sel, pa);
+        if (pa == 0)
+            virtio_pci_reset(proxy);
+        else
+            virtio_queue_set_addr(vdev, vdev->queue_sel, pa);
         break;
     case VIRTIO_PCI_QUEUE_SEL:
         if (val < VIRTIO_PCI_QUEUE_MAX)
@@ -120,7 +129,7 @@ static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val)
     case VIRTIO_PCI_STATUS:
         vdev->status = val & 0xFF;
         if (vdev->status == 0)
-            virtio_reset(vdev);
+            virtio_pci_reset(proxy);
         break;
     }
 }
@@ -158,7 +167,7 @@ static uint32_t virtio_ioport_read(void *opaque, uint32_t addr)
         /* reading from the ISR also clears it. */
         ret = vdev->isr;
         vdev->isr = 0;
-        virtio_update_irq(vdev);
+        qemu_set_irq(proxy->pci_dev.irq[0], 0);
         break;
     default:
         break;
@@ -243,7 +252,7 @@ static void virtio_map(PCIDevice *pci_dev, int region_num,
 }
 
 static const VirtIOBindings virtio_pci_bindings = {
-    .update_irq = virtio_pci_update_irq
+    .notify = virtio_pci_notify
 };
 
 static void virtio_init_pci(VirtIOPCIProxy *proxy, VirtIODevice *vdev,
@@ -255,6 +264,9 @@ static void virtio_init_pci(VirtIOPCIProxy *proxy, VirtIODevice *vdev,
 
     proxy->vdev = vdev;
 
+    /* No support for multiple vectors yet. */
+    proxy->vdev->nvectors = 0;
+
     config = proxy->pci_dev.config;
     pci_config_set_vendor_id(config, vendor);
     pci_config_set_device_id(config, device);
@@ -279,6 +291,8 @@ static void virtio_init_pci(VirtIOPCIProxy *proxy, VirtIODevice *vdev,
     pci_register_bar(&proxy->pci_dev, 0, size, PCI_ADDRESS_SPACE_IO,
                            virtio_map);
 
+    qemu_register_reset(virtio_pci_reset, 0, proxy);
+
     virtio_bind_device(vdev, &virtio_pci_bindings, proxy);
 }
 
diff --git a/hw/virtio.c b/hw/virtio.c
index 45a49fa..fe9f793 100644
--- a/hw/virtio.c
+++ b/hw/virtio.c
@@ -68,6 +68,7 @@ struct VirtQueue
     target_phys_addr_t pa;
     uint16_t last_avail_idx;
     int inuse;
+    uint16_t vector;
     void (*handle_output)(VirtIODevice *vdev, VirtQueue *vq);
 };
 
@@ -373,12 +374,16 @@ int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem)
 }
 
 /* virtio device */
+static void virtio_notify_vector(VirtIODevice *vdev, uint16_t vector)
+{
+    if (vdev->binding->notify) {
+        vdev->binding->notify(vdev->binding_opaque, vector);
+    }
+}
 
 void virtio_update_irq(VirtIODevice *vdev)
 {
-    if (vdev->binding->update_irq) {
-        vdev->binding->update_irq(vdev->binding_opaque);
-    }
+    virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
 }
 
 void virtio_reset(void *opaque)
@@ -393,7 +398,8 @@ void virtio_reset(void *opaque)
     vdev->queue_sel = 0;
     vdev->status = 0;
     vdev->isr = 0;
-    virtio_update_irq(vdev);
+    vdev->config_vector = VIRTIO_NO_VECTOR;
+    virtio_notify_vector(vdev, vdev->config_vector);
 
     for(i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
         vdev->vq[i].vring.desc = 0;
@@ -401,6 +407,7 @@ void virtio_reset(void *opaque)
         vdev->vq[i].vring.used = 0;
         vdev->vq[i].last_avail_idx = 0;
         vdev->vq[i].pa = 0;
+        vdev->vq[i].vector = VIRTIO_NO_VECTOR;
     }
 }
 
@@ -484,12 +491,8 @@ void virtio_config_writel(VirtIODevice *vdev, uint32_t addr, uint32_t data)
 
 void virtio_queue_set_addr(VirtIODevice *vdev, int n, target_phys_addr_t addr)
 {
-    if (addr == 0) {
-        virtio_reset(vdev);
-    } else {
-        vdev->vq[n].pa = addr;
-        virtqueue_init(&vdev->vq[n]);
-    }
+    vdev->vq[n].pa = addr;
+    virtqueue_init(&vdev->vq[n]);
 }
 
 target_phys_addr_t virtio_queue_get_addr(VirtIODevice *vdev, int n)
@@ -509,6 +512,18 @@ void virtio_queue_notify(VirtIODevice *vdev, int n)
     }
 }
 
+uint16_t virtio_queue_vector(VirtIODevice *vdev, int n)
+{
+    return n < VIRTIO_PCI_QUEUE_MAX ? vdev->vq[n].vector :
+        VIRTIO_NO_VECTOR;
+}
+
+void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector)
+{
+    if (n < VIRTIO_PCI_QUEUE_MAX)
+        vdev->vq[n].vector = vector;
+}
+
 VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
                             void (*handle_output)(VirtIODevice *, VirtQueue *))
 {
@@ -537,7 +552,7 @@ void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
         return;
 
     vdev->isr |= 0x01;
-    virtio_update_irq(vdev);
+    virtio_notify_vector(vdev, vq->vector);
 }
 
 void virtio_notify_config(VirtIODevice *vdev)
@@ -546,7 +561,7 @@ void virtio_notify_config(VirtIODevice *vdev)
         return;
 
     vdev->isr |= 0x03;
-    virtio_update_irq(vdev);
+    virtio_notify_vector(vdev, vdev->config_vector);
 }
 
 void virtio_save(VirtIODevice *vdev, QEMUFile *f)
@@ -555,6 +570,7 @@ void virtio_save(VirtIODevice *vdev, QEMUFile *f)
 
     /* FIXME: load/save binding.  */
     //pci_device_save(&vdev->pci_dev, f);
+    //msix_save(&vdev->pci_dev, f);
 
     qemu_put_8s(f, &vdev->status);
     qemu_put_8s(f, &vdev->isr);
@@ -563,6 +579,9 @@ void virtio_save(VirtIODevice *vdev, QEMUFile *f)
     qemu_put_be32(f, vdev->config_len);
     qemu_put_buffer(f, vdev->config, vdev->config_len);
 
+    if (vdev->nvectors)
+        qemu_put_be16s(f, &vdev->config_vector);
+
     for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
         if (vdev->vq[i].vring.num == 0)
             break;
@@ -577,6 +596,8 @@ void virtio_save(VirtIODevice *vdev, QEMUFile *f)
         qemu_put_be32(f, vdev->vq[i].vring.num);
         qemu_put_be64(f, vdev->vq[i].pa);
         qemu_put_be16s(f, &vdev->vq[i].last_avail_idx);
+        if (vdev->nvectors)
+            qemu_put_be16s(f, &vdev->vq[i].vector);
     }
 }
 
@@ -586,6 +607,7 @@ void virtio_load(VirtIODevice *vdev, QEMUFile *f)
 
     /* FIXME: load/save binding.  */
     //pci_device_load(&vdev->pci_dev, f);
+    //r = msix_load(&vdev->pci_dev, f);
 
     qemu_get_8s(f, &vdev->status);
     qemu_get_8s(f, &vdev->isr);
@@ -594,6 +616,10 @@ void virtio_load(VirtIODevice *vdev, QEMUFile *f)
     vdev->config_len = qemu_get_be32(f);
     qemu_get_buffer(f, vdev->config, vdev->config_len);
 
+    if (vdev->nvectors) {
+        qemu_get_be16s(f, &vdev->config_vector);
+        //msix_vector_use(&vdev->pci_dev, vdev->config_vector);
+    }
     num = qemu_get_be32(f);
 
     for (i = 0; i < num; i++) {
@@ -604,9 +630,13 @@ void virtio_load(VirtIODevice *vdev, QEMUFile *f)
         if (vdev->vq[i].pa) {
             virtqueue_init(&vdev->vq[i]);
         }
+        if (vdev->nvectors) {
+            qemu_get_be16s(f, &vdev->vq[i].vector);
+            //msix_vector_use(&vdev->pci_dev, vdev->config_vector);
+        }
     }
 
-    virtio_update_irq(vdev);
+    virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
 }
 
 void virtio_cleanup(VirtIODevice *vdev)
@@ -627,6 +657,7 @@ VirtIODevice *virtio_common_init(const char *name, uint16_t device_id,
     vdev->status = 0;
     vdev->isr = 0;
     vdev->queue_sel = 0;
+    vdev->config_vector = VIRTIO_NO_VECTOR;
     vdev->vq = qemu_mallocz(sizeof(VirtQueue) * VIRTIO_PCI_QUEUE_MAX);
 
     vdev->name = name;
@@ -636,8 +667,6 @@ VirtIODevice *virtio_common_init(const char *name, uint16_t device_id,
     else
         vdev->config = NULL;
 
-    qemu_register_reset(virtio_reset, 0, vdev);
-
     return vdev;
 }
 
diff --git a/hw/virtio.h b/hw/virtio.h
index 425727e..04a3c3d 100644
--- a/hw/virtio.h
+++ b/hw/virtio.h
@@ -71,11 +71,13 @@ typedef struct VirtQueueElement
 } VirtQueueElement;
 
 typedef struct {
-    void (*update_irq)(void * opaque);
+    void (*notify)(void * opaque, uint16_t vector);
 } VirtIOBindings;
 
 #define VIRTIO_PCI_QUEUE_MAX 16
 
+#define VIRTIO_NO_VECTOR 0xffff
+
 struct VirtIODevice
 {
     const char *name;
@@ -85,6 +87,8 @@ struct VirtIODevice
     uint32_t features;
     size_t config_len;
     void *config;
+    uint16_t config_vector;
+    int nvectors;
     uint32_t (*get_features)(VirtIODevice *vdev);
     uint32_t (*bad_features)(VirtIODevice *vdev);
     void (*set_features)(VirtIODevice *vdev, uint32_t val);
@@ -114,7 +118,7 @@ void virtio_notify(VirtIODevice *vdev, VirtQueue *vq);
 
 void virtio_save(VirtIODevice *vdev, QEMUFile *f);
 
-void virtio_load(VirtIODevice *vdev, QEMUFile *f);
+int virtio_load(VirtIODevice *vdev, QEMUFile *f);
 
 void virtio_cleanup(VirtIODevice *vdev);
 
@@ -140,6 +144,8 @@ void virtio_queue_set_addr(VirtIODevice *vdev, int n, target_phys_addr_t addr);
 target_phys_addr_t virtio_queue_get_addr(VirtIODevice *vdev, int n);
 int virtio_queue_get_num(VirtIODevice *vdev, int n);
 void virtio_queue_notify(VirtIODevice *vdev, int n);
+uint16_t virtio_queue_vector(VirtIODevice *vdev, int n);
+void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector);
 void virtio_reset(void *opaque);
 void virtio_update_irq(VirtIODevice *vdev);
 
-- 
1.6.2.2

^ permalink raw reply related	[flat|nested] 42+ messages in thread

* [PATCHv6 08/12] qemu/virtio: MSI-X support in virtio PCI
       [not found] <cover.1245594586.git.mst@redhat.com>
@ 2009-06-21 16:50   ` Michael S. Tsirkin
  2009-06-21 16:45   ` [Qemu-devel] " Michael S. Tsirkin
                     ` (22 subsequent siblings)
  23 siblings, 0 replies; 42+ messages in thread
From: Michael S. Tsirkin @ 2009-06-21 16:50 UTC (permalink / raw)
  To: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm, Rusty Russell, vi

This enables actual support for MSI-X in virtio PCI.
First user will be virtio-net.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/virtio-pci.c |  152 ++++++++++++++++++++++++++++++++++++++++--------------
 rules.mak       |    2 +-
 2 files changed, 113 insertions(+), 41 deletions(-)

diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c
index d4a134d..d3f4884 100644
--- a/hw/virtio-pci.c
+++ b/hw/virtio-pci.c
@@ -18,6 +18,7 @@
 #include "virtio.h"
 #include "pci.h"
 //#include "sysemu.h"
+#include "msix.h"
 
 /* from Linux's linux/virtio_pci.h */
 
@@ -47,7 +48,24 @@
  * a read-and-acknowledge. */
 #define VIRTIO_PCI_ISR                  19
 
-#define VIRTIO_PCI_CONFIG               20
+/* MSI-X registers: only enabled if MSI-X is enabled. */
+/* A 16-bit vector for configuration changes. */
+#define VIRTIO_MSI_CONFIG_VECTOR        20
+/* A 16-bit vector for selected queue notifications. */
+#define VIRTIO_MSI_QUEUE_VECTOR         22
+
+/* Config space size */
+#define VIRTIO_PCI_CONFIG_NOMSI         20
+#define VIRTIO_PCI_CONFIG_MSI           24
+#define VIRTIO_PCI_REGION_SIZE(dev)     (msix_present(dev) ? \
+                                         VIRTIO_PCI_CONFIG_MSI : \
+                                         VIRTIO_PCI_CONFIG_NOMSI)
+
+/* The remaining space is defined by each driver as the per-driver
+ * configuration space */
+#define VIRTIO_PCI_CONFIG(dev)          (msix_enabled(dev) ? \
+                                         VIRTIO_PCI_CONFIG_MSI : \
+                                         VIRTIO_PCI_CONFIG_NOMSI)
 
 /* Virtio ABI version, if we increment this, we break the guest driver. */
 #define VIRTIO_PCI_ABI_VERSION          0
@@ -81,14 +99,17 @@ typedef struct {
 static void virtio_pci_notify(void *opaque, uint16_t vector)
 {
     VirtIOPCIProxy *proxy = opaque;
-
-    qemu_set_irq(proxy->pci_dev.irq[0], proxy->vdev->isr & 1);
+    if (msix_enabled(&proxy->pci_dev))
+        msix_notify(&proxy->pci_dev, vector);
+    else
+        qemu_set_irq(proxy->pci_dev.irq[0], proxy->vdev->isr & 1);
 }
 
 static void virtio_pci_reset(void *opaque)
 {
     VirtIOPCIProxy *proxy = opaque;
     virtio_reset(proxy->vdev);
+    msix_reset(&proxy->pci_dev);
 }
 
 static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val)
@@ -97,8 +118,6 @@ static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val)
     VirtIODevice *vdev = proxy->vdev;
     target_phys_addr_t pa;
 
-    addr -= proxy->addr;
-
     switch (addr) {
     case VIRTIO_PCI_GUEST_FEATURES:
 	/* Guest does not negotiate properly?  We have to assume nothing. */
@@ -131,17 +150,33 @@ static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val)
         if (vdev->status == 0)
             virtio_pci_reset(proxy);
         break;
+    case VIRTIO_MSI_CONFIG_VECTOR:
+        msix_vector_unuse(&proxy->pci_dev, vdev->config_vector);
+        /* Make it possible for guest to discover an error took place. */
+        if (msix_vector_use(&proxy->pci_dev, val) < 0)
+            val = VIRTIO_NO_VECTOR;
+        vdev->config_vector = val;
+        break;
+    case VIRTIO_MSI_QUEUE_VECTOR:
+        msix_vector_unuse(&proxy->pci_dev,
+                          virtio_queue_vector(vdev, vdev->queue_sel));
+        /* Make it possible for guest to discover an error took place. */
+        if (msix_vector_use(&proxy->pci_dev, val) < 0)
+            val = VIRTIO_NO_VECTOR;
+        virtio_queue_set_vector(vdev, vdev->queue_sel, val);
+        break;
+    default:
+        fprintf(stderr, "%s: unexpected address 0x%x value 0x%x\n",
+                __func__, addr, val);
+        break;
     }
 }
 
-static uint32_t virtio_ioport_read(void *opaque, uint32_t addr)
+static uint32_t virtio_ioport_read(VirtIOPCIProxy *proxy, uint32_t addr)
 {
-    VirtIOPCIProxy *proxy = opaque;
     VirtIODevice *vdev = proxy->vdev;
     uint32_t ret = 0xFFFFFFFF;
 
-    addr -= proxy->addr;
-
     switch (addr) {
     case VIRTIO_PCI_HOST_FEATURES:
         ret = vdev->get_features(vdev);
@@ -169,6 +204,12 @@ static uint32_t virtio_ioport_read(void *opaque, uint32_t addr)
         vdev->isr = 0;
         qemu_set_irq(proxy->pci_dev.irq[0], 0);
         break;
+    case VIRTIO_MSI_CONFIG_VECTOR:
+        ret = vdev->config_vector;
+        break;
+    case VIRTIO_MSI_QUEUE_VECTOR:
+        ret = virtio_queue_vector(vdev, vdev->queue_sel);
+        break;
     default:
         break;
     }
@@ -179,42 +220,72 @@ static uint32_t virtio_ioport_read(void *opaque, uint32_t addr)
 static uint32_t virtio_pci_config_readb(void *opaque, uint32_t addr)
 {
     VirtIOPCIProxy *proxy = opaque;
-    addr -= proxy->addr + VIRTIO_PCI_CONFIG;
+    uint32_t config = VIRTIO_PCI_CONFIG(&proxy->pci_dev);
+    addr -= proxy->addr;
+    if (addr < config)
+        return virtio_ioport_read(proxy, addr);
+    addr -= config;
     return virtio_config_readb(proxy->vdev, addr);
 }
 
 static uint32_t virtio_pci_config_readw(void *opaque, uint32_t addr)
 {
     VirtIOPCIProxy *proxy = opaque;
-    addr -= proxy->addr + VIRTIO_PCI_CONFIG;
+    uint32_t config = VIRTIO_PCI_CONFIG(&proxy->pci_dev);
+    addr -= proxy->addr;
+    if (addr < config)
+        return virtio_ioport_read(proxy, addr);
+    addr -= config;
     return virtio_config_readw(proxy->vdev, addr);
 }
 
 static uint32_t virtio_pci_config_readl(void *opaque, uint32_t addr)
 {
     VirtIOPCIProxy *proxy = opaque;
-    addr -= proxy->addr + VIRTIO_PCI_CONFIG;
+    uint32_t config = VIRTIO_PCI_CONFIG(&proxy->pci_dev);
+    addr -= proxy->addr;
+    if (addr < config)
+        return virtio_ioport_read(proxy, addr);
+    addr -= config;
     return virtio_config_readl(proxy->vdev, addr);
 }
 
 static void virtio_pci_config_writeb(void *opaque, uint32_t addr, uint32_t val)
 {
     VirtIOPCIProxy *proxy = opaque;
-    addr -= proxy->addr + VIRTIO_PCI_CONFIG;
+    uint32_t config = VIRTIO_PCI_CONFIG(&proxy->pci_dev);
+    addr -= proxy->addr;
+    if (addr < config) {
+        virtio_ioport_write(proxy, addr, val);
+        return;
+    }
+    addr -= config;
     virtio_config_writeb(proxy->vdev, addr, val);
 }
 
 static void virtio_pci_config_writew(void *opaque, uint32_t addr, uint32_t val)
 {
     VirtIOPCIProxy *proxy = opaque;
-    addr -= proxy->addr + VIRTIO_PCI_CONFIG;
+    uint32_t config = VIRTIO_PCI_CONFIG(&proxy->pci_dev);
+    addr -= proxy->addr;
+    if (addr < config) {
+        virtio_ioport_write(proxy, addr, val);
+        return;
+    }
+    addr -= config;
     virtio_config_writew(proxy->vdev, addr, val);
 }
 
 static void virtio_pci_config_writel(void *opaque, uint32_t addr, uint32_t val)
 {
     VirtIOPCIProxy *proxy = opaque;
-    addr -= proxy->addr + VIRTIO_PCI_CONFIG;
+    uint32_t config = VIRTIO_PCI_CONFIG(&proxy->pci_dev);
+    addr -= proxy->addr;
+    if (addr < config) {
+        virtio_ioport_write(proxy, addr, val);
+        return;
+    }
+    addr -= config;
     virtio_config_writel(proxy->vdev, addr, val);
 }
 
@@ -223,32 +294,26 @@ static void virtio_map(PCIDevice *pci_dev, int region_num,
 {
     VirtIOPCIProxy *proxy = container_of(pci_dev, VirtIOPCIProxy, pci_dev);
     VirtIODevice *vdev = proxy->vdev;
-    int i;
+    unsigned config_len = VIRTIO_PCI_REGION_SIZE(pci_dev) + vdev->config_len;
 
     proxy->addr = addr;
-    for (i = 0; i < 3; i++) {
-        register_ioport_write(addr, VIRTIO_PCI_CONFIG, 1 << i,
-                              virtio_ioport_write, proxy);
-        register_ioport_read(addr, VIRTIO_PCI_CONFIG, 1 << i,
-                             virtio_ioport_read, proxy);
-    }
 
-    if (vdev->config_len) {
-        register_ioport_write(addr + VIRTIO_PCI_CONFIG, vdev->config_len, 1,
-                              virtio_pci_config_writeb, proxy);
-        register_ioport_write(addr + VIRTIO_PCI_CONFIG, vdev->config_len, 2,
-                              virtio_pci_config_writew, proxy);
-        register_ioport_write(addr + VIRTIO_PCI_CONFIG, vdev->config_len, 4,
-                              virtio_pci_config_writel, proxy);
-        register_ioport_read(addr + VIRTIO_PCI_CONFIG, vdev->config_len, 1,
-                             virtio_pci_config_readb, proxy);
-        register_ioport_read(addr + VIRTIO_PCI_CONFIG, vdev->config_len, 2,
-                             virtio_pci_config_readw, proxy);
-        register_ioport_read(addr + VIRTIO_PCI_CONFIG, vdev->config_len, 4,
-                             virtio_pci_config_readl, proxy);
+    register_ioport_write(addr, config_len, 1, virtio_pci_config_writeb, proxy);
+    register_ioport_write(addr, config_len, 2, virtio_pci_config_writew, proxy);
+    register_ioport_write(addr, config_len, 4, virtio_pci_config_writel, proxy);
+    register_ioport_read(addr, config_len, 1, virtio_pci_config_readb, proxy);
+    register_ioport_read(addr, config_len, 2, virtio_pci_config_readw, proxy);
+    register_ioport_read(addr, config_len, 4, virtio_pci_config_readl, proxy);
 
+    if (vdev->config_len)
         vdev->get_config(vdev, vdev->config);
-    }
+}
+
+static void virtio_write_config(PCIDevice *pci_dev, uint32_t address,
+                                uint32_t val, int len)
+{
+    pci_default_write_config(pci_dev, address, val, len);
+    msix_write_config(pci_dev, address, val, len);
 }
 
 static const VirtIOBindings virtio_pci_bindings = {
@@ -264,9 +329,6 @@ static void virtio_init_pci(VirtIOPCIProxy *proxy, VirtIODevice *vdev,
 
     proxy->vdev = vdev;
 
-    /* No support for multiple vectors yet. */
-    proxy->vdev->nvectors = 0;
-
     config = proxy->pci_dev.config;
     pci_config_set_vendor_id(config, vendor);
     pci_config_set_device_id(config, device);
@@ -284,7 +346,17 @@ static void virtio_init_pci(VirtIOPCIProxy *proxy, VirtIODevice *vdev,
 
     config[0x3d] = 1;
 
-    size = 20 + vdev->config_len;
+    if (vdev->nvectors && !msix_init(&proxy->pci_dev, vdev->nvectors, 1, 0)) {
+        pci_register_bar(&proxy->pci_dev, 1,
+                         msix_bar_size(&proxy->pci_dev),
+                         PCI_ADDRESS_SPACE_MEM,
+                         msix_mmio_map);
+        proxy->pci_dev.config_write = virtio_write_config;
+        proxy->pci_dev.unregister = msix_uninit;
+    } else
+        vdev->nvectors = 0;
+
+    size = VIRTIO_PCI_REGION_SIZE(&proxy->pci_dev) + vdev->config_len;
     if (size & (size-1))
         size = 1 << qemu_fls(size);
 
diff --git a/rules.mak b/rules.mak
index 8d6d96e..defee1d 100644
--- a/rules.mak
+++ b/rules.mak
@@ -1,6 +1,6 @@
 
 %.o: %.c
-	$(call quiet-command,$(CC) $(CPPFLAGS) $(CFLAGS) -c -o $@ $<,"  CC    $(TARGET_DIR)$@")
+	$(call quiet-command,$(CC) $(CPPFLAGS) $(CFLAGS) -Werror -Wno-error=uninitialized -c -o $@ $<,"  CC    $(TARGET_DIR)$@")
 
 %.o: %.S
 	$(call quiet-command,$(CC) $(CPPFLAGS) -c -o $@ $<,"  AS    $(TARGET_DIR)$@")
-- 
1.6.2.2


^ permalink raw reply related	[flat|nested] 42+ messages in thread

* [Qemu-devel] [PATCHv6 08/12] qemu/virtio: MSI-X support in virtio PCI
@ 2009-06-21 16:50   ` Michael S. Tsirkin
  0 siblings, 0 replies; 42+ messages in thread
From: Michael S. Tsirkin @ 2009-06-21 16:50 UTC (permalink / raw)
  To: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm,
	Rusty Russell, virtualization, Christian Borntraeger, Blue Swirl,
	Anthony Liguori, Glauber Costa

This enables actual support for MSI-X in virtio PCI.
First user will be virtio-net.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/virtio-pci.c |  152 ++++++++++++++++++++++++++++++++++++++++--------------
 rules.mak       |    2 +-
 2 files changed, 113 insertions(+), 41 deletions(-)

diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c
index d4a134d..d3f4884 100644
--- a/hw/virtio-pci.c
+++ b/hw/virtio-pci.c
@@ -18,6 +18,7 @@
 #include "virtio.h"
 #include "pci.h"
 //#include "sysemu.h"
+#include "msix.h"
 
 /* from Linux's linux/virtio_pci.h */
 
@@ -47,7 +48,24 @@
  * a read-and-acknowledge. */
 #define VIRTIO_PCI_ISR                  19
 
-#define VIRTIO_PCI_CONFIG               20
+/* MSI-X registers: only enabled if MSI-X is enabled. */
+/* A 16-bit vector for configuration changes. */
+#define VIRTIO_MSI_CONFIG_VECTOR        20
+/* A 16-bit vector for selected queue notifications. */
+#define VIRTIO_MSI_QUEUE_VECTOR         22
+
+/* Config space size */
+#define VIRTIO_PCI_CONFIG_NOMSI         20
+#define VIRTIO_PCI_CONFIG_MSI           24
+#define VIRTIO_PCI_REGION_SIZE(dev)     (msix_present(dev) ? \
+                                         VIRTIO_PCI_CONFIG_MSI : \
+                                         VIRTIO_PCI_CONFIG_NOMSI)
+
+/* The remaining space is defined by each driver as the per-driver
+ * configuration space */
+#define VIRTIO_PCI_CONFIG(dev)          (msix_enabled(dev) ? \
+                                         VIRTIO_PCI_CONFIG_MSI : \
+                                         VIRTIO_PCI_CONFIG_NOMSI)
 
 /* Virtio ABI version, if we increment this, we break the guest driver. */
 #define VIRTIO_PCI_ABI_VERSION          0
@@ -81,14 +99,17 @@ typedef struct {
 static void virtio_pci_notify(void *opaque, uint16_t vector)
 {
     VirtIOPCIProxy *proxy = opaque;
-
-    qemu_set_irq(proxy->pci_dev.irq[0], proxy->vdev->isr & 1);
+    if (msix_enabled(&proxy->pci_dev))
+        msix_notify(&proxy->pci_dev, vector);
+    else
+        qemu_set_irq(proxy->pci_dev.irq[0], proxy->vdev->isr & 1);
 }
 
 static void virtio_pci_reset(void *opaque)
 {
     VirtIOPCIProxy *proxy = opaque;
     virtio_reset(proxy->vdev);
+    msix_reset(&proxy->pci_dev);
 }
 
 static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val)
@@ -97,8 +118,6 @@ static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val)
     VirtIODevice *vdev = proxy->vdev;
     target_phys_addr_t pa;
 
-    addr -= proxy->addr;
-
     switch (addr) {
     case VIRTIO_PCI_GUEST_FEATURES:
 	/* Guest does not negotiate properly?  We have to assume nothing. */
@@ -131,17 +150,33 @@ static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val)
         if (vdev->status == 0)
             virtio_pci_reset(proxy);
         break;
+    case VIRTIO_MSI_CONFIG_VECTOR:
+        msix_vector_unuse(&proxy->pci_dev, vdev->config_vector);
+        /* Make it possible for guest to discover an error took place. */
+        if (msix_vector_use(&proxy->pci_dev, val) < 0)
+            val = VIRTIO_NO_VECTOR;
+        vdev->config_vector = val;
+        break;
+    case VIRTIO_MSI_QUEUE_VECTOR:
+        msix_vector_unuse(&proxy->pci_dev,
+                          virtio_queue_vector(vdev, vdev->queue_sel));
+        /* Make it possible for guest to discover an error took place. */
+        if (msix_vector_use(&proxy->pci_dev, val) < 0)
+            val = VIRTIO_NO_VECTOR;
+        virtio_queue_set_vector(vdev, vdev->queue_sel, val);
+        break;
+    default:
+        fprintf(stderr, "%s: unexpected address 0x%x value 0x%x\n",
+                __func__, addr, val);
+        break;
     }
 }
 
-static uint32_t virtio_ioport_read(void *opaque, uint32_t addr)
+static uint32_t virtio_ioport_read(VirtIOPCIProxy *proxy, uint32_t addr)
 {
-    VirtIOPCIProxy *proxy = opaque;
     VirtIODevice *vdev = proxy->vdev;
     uint32_t ret = 0xFFFFFFFF;
 
-    addr -= proxy->addr;
-
     switch (addr) {
     case VIRTIO_PCI_HOST_FEATURES:
         ret = vdev->get_features(vdev);
@@ -169,6 +204,12 @@ static uint32_t virtio_ioport_read(void *opaque, uint32_t addr)
         vdev->isr = 0;
         qemu_set_irq(proxy->pci_dev.irq[0], 0);
         break;
+    case VIRTIO_MSI_CONFIG_VECTOR:
+        ret = vdev->config_vector;
+        break;
+    case VIRTIO_MSI_QUEUE_VECTOR:
+        ret = virtio_queue_vector(vdev, vdev->queue_sel);
+        break;
     default:
         break;
     }
@@ -179,42 +220,72 @@ static uint32_t virtio_ioport_read(void *opaque, uint32_t addr)
 static uint32_t virtio_pci_config_readb(void *opaque, uint32_t addr)
 {
     VirtIOPCIProxy *proxy = opaque;
-    addr -= proxy->addr + VIRTIO_PCI_CONFIG;
+    uint32_t config = VIRTIO_PCI_CONFIG(&proxy->pci_dev);
+    addr -= proxy->addr;
+    if (addr < config)
+        return virtio_ioport_read(proxy, addr);
+    addr -= config;
     return virtio_config_readb(proxy->vdev, addr);
 }
 
 static uint32_t virtio_pci_config_readw(void *opaque, uint32_t addr)
 {
     VirtIOPCIProxy *proxy = opaque;
-    addr -= proxy->addr + VIRTIO_PCI_CONFIG;
+    uint32_t config = VIRTIO_PCI_CONFIG(&proxy->pci_dev);
+    addr -= proxy->addr;
+    if (addr < config)
+        return virtio_ioport_read(proxy, addr);
+    addr -= config;
     return virtio_config_readw(proxy->vdev, addr);
 }
 
 static uint32_t virtio_pci_config_readl(void *opaque, uint32_t addr)
 {
     VirtIOPCIProxy *proxy = opaque;
-    addr -= proxy->addr + VIRTIO_PCI_CONFIG;
+    uint32_t config = VIRTIO_PCI_CONFIG(&proxy->pci_dev);
+    addr -= proxy->addr;
+    if (addr < config)
+        return virtio_ioport_read(proxy, addr);
+    addr -= config;
     return virtio_config_readl(proxy->vdev, addr);
 }
 
 static void virtio_pci_config_writeb(void *opaque, uint32_t addr, uint32_t val)
 {
     VirtIOPCIProxy *proxy = opaque;
-    addr -= proxy->addr + VIRTIO_PCI_CONFIG;
+    uint32_t config = VIRTIO_PCI_CONFIG(&proxy->pci_dev);
+    addr -= proxy->addr;
+    if (addr < config) {
+        virtio_ioport_write(proxy, addr, val);
+        return;
+    }
+    addr -= config;
     virtio_config_writeb(proxy->vdev, addr, val);
 }
 
 static void virtio_pci_config_writew(void *opaque, uint32_t addr, uint32_t val)
 {
     VirtIOPCIProxy *proxy = opaque;
-    addr -= proxy->addr + VIRTIO_PCI_CONFIG;
+    uint32_t config = VIRTIO_PCI_CONFIG(&proxy->pci_dev);
+    addr -= proxy->addr;
+    if (addr < config) {
+        virtio_ioport_write(proxy, addr, val);
+        return;
+    }
+    addr -= config;
     virtio_config_writew(proxy->vdev, addr, val);
 }
 
 static void virtio_pci_config_writel(void *opaque, uint32_t addr, uint32_t val)
 {
     VirtIOPCIProxy *proxy = opaque;
-    addr -= proxy->addr + VIRTIO_PCI_CONFIG;
+    uint32_t config = VIRTIO_PCI_CONFIG(&proxy->pci_dev);
+    addr -= proxy->addr;
+    if (addr < config) {
+        virtio_ioport_write(proxy, addr, val);
+        return;
+    }
+    addr -= config;
     virtio_config_writel(proxy->vdev, addr, val);
 }
 
@@ -223,32 +294,26 @@ static void virtio_map(PCIDevice *pci_dev, int region_num,
 {
     VirtIOPCIProxy *proxy = container_of(pci_dev, VirtIOPCIProxy, pci_dev);
     VirtIODevice *vdev = proxy->vdev;
-    int i;
+    unsigned config_len = VIRTIO_PCI_REGION_SIZE(pci_dev) + vdev->config_len;
 
     proxy->addr = addr;
-    for (i = 0; i < 3; i++) {
-        register_ioport_write(addr, VIRTIO_PCI_CONFIG, 1 << i,
-                              virtio_ioport_write, proxy);
-        register_ioport_read(addr, VIRTIO_PCI_CONFIG, 1 << i,
-                             virtio_ioport_read, proxy);
-    }
 
-    if (vdev->config_len) {
-        register_ioport_write(addr + VIRTIO_PCI_CONFIG, vdev->config_len, 1,
-                              virtio_pci_config_writeb, proxy);
-        register_ioport_write(addr + VIRTIO_PCI_CONFIG, vdev->config_len, 2,
-                              virtio_pci_config_writew, proxy);
-        register_ioport_write(addr + VIRTIO_PCI_CONFIG, vdev->config_len, 4,
-                              virtio_pci_config_writel, proxy);
-        register_ioport_read(addr + VIRTIO_PCI_CONFIG, vdev->config_len, 1,
-                             virtio_pci_config_readb, proxy);
-        register_ioport_read(addr + VIRTIO_PCI_CONFIG, vdev->config_len, 2,
-                             virtio_pci_config_readw, proxy);
-        register_ioport_read(addr + VIRTIO_PCI_CONFIG, vdev->config_len, 4,
-                             virtio_pci_config_readl, proxy);
+    register_ioport_write(addr, config_len, 1, virtio_pci_config_writeb, proxy);
+    register_ioport_write(addr, config_len, 2, virtio_pci_config_writew, proxy);
+    register_ioport_write(addr, config_len, 4, virtio_pci_config_writel, proxy);
+    register_ioport_read(addr, config_len, 1, virtio_pci_config_readb, proxy);
+    register_ioport_read(addr, config_len, 2, virtio_pci_config_readw, proxy);
+    register_ioport_read(addr, config_len, 4, virtio_pci_config_readl, proxy);
 
+    if (vdev->config_len)
         vdev->get_config(vdev, vdev->config);
-    }
+}
+
+static void virtio_write_config(PCIDevice *pci_dev, uint32_t address,
+                                uint32_t val, int len)
+{
+    pci_default_write_config(pci_dev, address, val, len);
+    msix_write_config(pci_dev, address, val, len);
 }
 
 static const VirtIOBindings virtio_pci_bindings = {
@@ -264,9 +329,6 @@ static void virtio_init_pci(VirtIOPCIProxy *proxy, VirtIODevice *vdev,
 
     proxy->vdev = vdev;
 
-    /* No support for multiple vectors yet. */
-    proxy->vdev->nvectors = 0;
-
     config = proxy->pci_dev.config;
     pci_config_set_vendor_id(config, vendor);
     pci_config_set_device_id(config, device);
@@ -284,7 +346,17 @@ static void virtio_init_pci(VirtIOPCIProxy *proxy, VirtIODevice *vdev,
 
     config[0x3d] = 1;
 
-    size = 20 + vdev->config_len;
+    if (vdev->nvectors && !msix_init(&proxy->pci_dev, vdev->nvectors, 1, 0)) {
+        pci_register_bar(&proxy->pci_dev, 1,
+                         msix_bar_size(&proxy->pci_dev),
+                         PCI_ADDRESS_SPACE_MEM,
+                         msix_mmio_map);
+        proxy->pci_dev.config_write = virtio_write_config;
+        proxy->pci_dev.unregister = msix_uninit;
+    } else
+        vdev->nvectors = 0;
+
+    size = VIRTIO_PCI_REGION_SIZE(&proxy->pci_dev) + vdev->config_len;
     if (size & (size-1))
         size = 1 << qemu_fls(size);
 
diff --git a/rules.mak b/rules.mak
index 8d6d96e..defee1d 100644
--- a/rules.mak
+++ b/rules.mak
@@ -1,6 +1,6 @@
 
 %.o: %.c
-	$(call quiet-command,$(CC) $(CPPFLAGS) $(CFLAGS) -c -o $@ $<,"  CC    $(TARGET_DIR)$@")
+	$(call quiet-command,$(CC) $(CPPFLAGS) $(CFLAGS) -Werror -Wno-error=uninitialized -c -o $@ $<,"  CC    $(TARGET_DIR)$@")
 
 %.o: %.S
 	$(call quiet-command,$(CC) $(CPPFLAGS) -c -o $@ $<,"  AS    $(TARGET_DIR)$@")
-- 
1.6.2.2

^ permalink raw reply related	[flat|nested] 42+ messages in thread

* [PATCHv6 08/12] qemu/virtio: MSI-X support in virtio PCI
       [not found] <cover.1245594586.git.mst@redhat.com>
                   ` (13 preceding siblings ...)
  2009-06-21 16:50   ` [Qemu-devel] " Michael S. Tsirkin
@ 2009-06-21 16:50 ` Michael S. Tsirkin
  2009-06-21 16:50   ` [Qemu-devel] " Michael S. Tsirkin
                   ` (8 subsequent siblings)
  23 siblings, 0 replies; 42+ messages in thread
From: Michael S. Tsirkin @ 2009-06-21 16:50 UTC (permalink / raw)
  To: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm, Rusty Russell

This enables actual support for MSI-X in virtio PCI.
First user will be virtio-net.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/virtio-pci.c |  152 ++++++++++++++++++++++++++++++++++++++++--------------
 rules.mak       |    2 +-
 2 files changed, 113 insertions(+), 41 deletions(-)

diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c
index d4a134d..d3f4884 100644
--- a/hw/virtio-pci.c
+++ b/hw/virtio-pci.c
@@ -18,6 +18,7 @@
 #include "virtio.h"
 #include "pci.h"
 //#include "sysemu.h"
+#include "msix.h"
 
 /* from Linux's linux/virtio_pci.h */
 
@@ -47,7 +48,24 @@
  * a read-and-acknowledge. */
 #define VIRTIO_PCI_ISR                  19
 
-#define VIRTIO_PCI_CONFIG               20
+/* MSI-X registers: only enabled if MSI-X is enabled. */
+/* A 16-bit vector for configuration changes. */
+#define VIRTIO_MSI_CONFIG_VECTOR        20
+/* A 16-bit vector for selected queue notifications. */
+#define VIRTIO_MSI_QUEUE_VECTOR         22
+
+/* Config space size */
+#define VIRTIO_PCI_CONFIG_NOMSI         20
+#define VIRTIO_PCI_CONFIG_MSI           24
+#define VIRTIO_PCI_REGION_SIZE(dev)     (msix_present(dev) ? \
+                                         VIRTIO_PCI_CONFIG_MSI : \
+                                         VIRTIO_PCI_CONFIG_NOMSI)
+
+/* The remaining space is defined by each driver as the per-driver
+ * configuration space */
+#define VIRTIO_PCI_CONFIG(dev)          (msix_enabled(dev) ? \
+                                         VIRTIO_PCI_CONFIG_MSI : \
+                                         VIRTIO_PCI_CONFIG_NOMSI)
 
 /* Virtio ABI version, if we increment this, we break the guest driver. */
 #define VIRTIO_PCI_ABI_VERSION          0
@@ -81,14 +99,17 @@ typedef struct {
 static void virtio_pci_notify(void *opaque, uint16_t vector)
 {
     VirtIOPCIProxy *proxy = opaque;
-
-    qemu_set_irq(proxy->pci_dev.irq[0], proxy->vdev->isr & 1);
+    if (msix_enabled(&proxy->pci_dev))
+        msix_notify(&proxy->pci_dev, vector);
+    else
+        qemu_set_irq(proxy->pci_dev.irq[0], proxy->vdev->isr & 1);
 }
 
 static void virtio_pci_reset(void *opaque)
 {
     VirtIOPCIProxy *proxy = opaque;
     virtio_reset(proxy->vdev);
+    msix_reset(&proxy->pci_dev);
 }
 
 static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val)
@@ -97,8 +118,6 @@ static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val)
     VirtIODevice *vdev = proxy->vdev;
     target_phys_addr_t pa;
 
-    addr -= proxy->addr;
-
     switch (addr) {
     case VIRTIO_PCI_GUEST_FEATURES:
 	/* Guest does not negotiate properly?  We have to assume nothing. */
@@ -131,17 +150,33 @@ static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val)
         if (vdev->status == 0)
             virtio_pci_reset(proxy);
         break;
+    case VIRTIO_MSI_CONFIG_VECTOR:
+        msix_vector_unuse(&proxy->pci_dev, vdev->config_vector);
+        /* Make it possible for guest to discover an error took place. */
+        if (msix_vector_use(&proxy->pci_dev, val) < 0)
+            val = VIRTIO_NO_VECTOR;
+        vdev->config_vector = val;
+        break;
+    case VIRTIO_MSI_QUEUE_VECTOR:
+        msix_vector_unuse(&proxy->pci_dev,
+                          virtio_queue_vector(vdev, vdev->queue_sel));
+        /* Make it possible for guest to discover an error took place. */
+        if (msix_vector_use(&proxy->pci_dev, val) < 0)
+            val = VIRTIO_NO_VECTOR;
+        virtio_queue_set_vector(vdev, vdev->queue_sel, val);
+        break;
+    default:
+        fprintf(stderr, "%s: unexpected address 0x%x value 0x%x\n",
+                __func__, addr, val);
+        break;
     }
 }
 
-static uint32_t virtio_ioport_read(void *opaque, uint32_t addr)
+static uint32_t virtio_ioport_read(VirtIOPCIProxy *proxy, uint32_t addr)
 {
-    VirtIOPCIProxy *proxy = opaque;
     VirtIODevice *vdev = proxy->vdev;
     uint32_t ret = 0xFFFFFFFF;
 
-    addr -= proxy->addr;
-
     switch (addr) {
     case VIRTIO_PCI_HOST_FEATURES:
         ret = vdev->get_features(vdev);
@@ -169,6 +204,12 @@ static uint32_t virtio_ioport_read(void *opaque, uint32_t addr)
         vdev->isr = 0;
         qemu_set_irq(proxy->pci_dev.irq[0], 0);
         break;
+    case VIRTIO_MSI_CONFIG_VECTOR:
+        ret = vdev->config_vector;
+        break;
+    case VIRTIO_MSI_QUEUE_VECTOR:
+        ret = virtio_queue_vector(vdev, vdev->queue_sel);
+        break;
     default:
         break;
     }
@@ -179,42 +220,72 @@ static uint32_t virtio_ioport_read(void *opaque, uint32_t addr)
 static uint32_t virtio_pci_config_readb(void *opaque, uint32_t addr)
 {
     VirtIOPCIProxy *proxy = opaque;
-    addr -= proxy->addr + VIRTIO_PCI_CONFIG;
+    uint32_t config = VIRTIO_PCI_CONFIG(&proxy->pci_dev);
+    addr -= proxy->addr;
+    if (addr < config)
+        return virtio_ioport_read(proxy, addr);
+    addr -= config;
     return virtio_config_readb(proxy->vdev, addr);
 }
 
 static uint32_t virtio_pci_config_readw(void *opaque, uint32_t addr)
 {
     VirtIOPCIProxy *proxy = opaque;
-    addr -= proxy->addr + VIRTIO_PCI_CONFIG;
+    uint32_t config = VIRTIO_PCI_CONFIG(&proxy->pci_dev);
+    addr -= proxy->addr;
+    if (addr < config)
+        return virtio_ioport_read(proxy, addr);
+    addr -= config;
     return virtio_config_readw(proxy->vdev, addr);
 }
 
 static uint32_t virtio_pci_config_readl(void *opaque, uint32_t addr)
 {
     VirtIOPCIProxy *proxy = opaque;
-    addr -= proxy->addr + VIRTIO_PCI_CONFIG;
+    uint32_t config = VIRTIO_PCI_CONFIG(&proxy->pci_dev);
+    addr -= proxy->addr;
+    if (addr < config)
+        return virtio_ioport_read(proxy, addr);
+    addr -= config;
     return virtio_config_readl(proxy->vdev, addr);
 }
 
 static void virtio_pci_config_writeb(void *opaque, uint32_t addr, uint32_t val)
 {
     VirtIOPCIProxy *proxy = opaque;
-    addr -= proxy->addr + VIRTIO_PCI_CONFIG;
+    uint32_t config = VIRTIO_PCI_CONFIG(&proxy->pci_dev);
+    addr -= proxy->addr;
+    if (addr < config) {
+        virtio_ioport_write(proxy, addr, val);
+        return;
+    }
+    addr -= config;
     virtio_config_writeb(proxy->vdev, addr, val);
 }
 
 static void virtio_pci_config_writew(void *opaque, uint32_t addr, uint32_t val)
 {
     VirtIOPCIProxy *proxy = opaque;
-    addr -= proxy->addr + VIRTIO_PCI_CONFIG;
+    uint32_t config = VIRTIO_PCI_CONFIG(&proxy->pci_dev);
+    addr -= proxy->addr;
+    if (addr < config) {
+        virtio_ioport_write(proxy, addr, val);
+        return;
+    }
+    addr -= config;
     virtio_config_writew(proxy->vdev, addr, val);
 }
 
 static void virtio_pci_config_writel(void *opaque, uint32_t addr, uint32_t val)
 {
     VirtIOPCIProxy *proxy = opaque;
-    addr -= proxy->addr + VIRTIO_PCI_CONFIG;
+    uint32_t config = VIRTIO_PCI_CONFIG(&proxy->pci_dev);
+    addr -= proxy->addr;
+    if (addr < config) {
+        virtio_ioport_write(proxy, addr, val);
+        return;
+    }
+    addr -= config;
     virtio_config_writel(proxy->vdev, addr, val);
 }
 
@@ -223,32 +294,26 @@ static void virtio_map(PCIDevice *pci_dev, int region_num,
 {
     VirtIOPCIProxy *proxy = container_of(pci_dev, VirtIOPCIProxy, pci_dev);
     VirtIODevice *vdev = proxy->vdev;
-    int i;
+    unsigned config_len = VIRTIO_PCI_REGION_SIZE(pci_dev) + vdev->config_len;
 
     proxy->addr = addr;
-    for (i = 0; i < 3; i++) {
-        register_ioport_write(addr, VIRTIO_PCI_CONFIG, 1 << i,
-                              virtio_ioport_write, proxy);
-        register_ioport_read(addr, VIRTIO_PCI_CONFIG, 1 << i,
-                             virtio_ioport_read, proxy);
-    }
 
-    if (vdev->config_len) {
-        register_ioport_write(addr + VIRTIO_PCI_CONFIG, vdev->config_len, 1,
-                              virtio_pci_config_writeb, proxy);
-        register_ioport_write(addr + VIRTIO_PCI_CONFIG, vdev->config_len, 2,
-                              virtio_pci_config_writew, proxy);
-        register_ioport_write(addr + VIRTIO_PCI_CONFIG, vdev->config_len, 4,
-                              virtio_pci_config_writel, proxy);
-        register_ioport_read(addr + VIRTIO_PCI_CONFIG, vdev->config_len, 1,
-                             virtio_pci_config_readb, proxy);
-        register_ioport_read(addr + VIRTIO_PCI_CONFIG, vdev->config_len, 2,
-                             virtio_pci_config_readw, proxy);
-        register_ioport_read(addr + VIRTIO_PCI_CONFIG, vdev->config_len, 4,
-                             virtio_pci_config_readl, proxy);
+    register_ioport_write(addr, config_len, 1, virtio_pci_config_writeb, proxy);
+    register_ioport_write(addr, config_len, 2, virtio_pci_config_writew, proxy);
+    register_ioport_write(addr, config_len, 4, virtio_pci_config_writel, proxy);
+    register_ioport_read(addr, config_len, 1, virtio_pci_config_readb, proxy);
+    register_ioport_read(addr, config_len, 2, virtio_pci_config_readw, proxy);
+    register_ioport_read(addr, config_len, 4, virtio_pci_config_readl, proxy);
 
+    if (vdev->config_len)
         vdev->get_config(vdev, vdev->config);
-    }
+}
+
+static void virtio_write_config(PCIDevice *pci_dev, uint32_t address,
+                                uint32_t val, int len)
+{
+    pci_default_write_config(pci_dev, address, val, len);
+    msix_write_config(pci_dev, address, val, len);
 }
 
 static const VirtIOBindings virtio_pci_bindings = {
@@ -264,9 +329,6 @@ static void virtio_init_pci(VirtIOPCIProxy *proxy, VirtIODevice *vdev,
 
     proxy->vdev = vdev;
 
-    /* No support for multiple vectors yet. */
-    proxy->vdev->nvectors = 0;
-
     config = proxy->pci_dev.config;
     pci_config_set_vendor_id(config, vendor);
     pci_config_set_device_id(config, device);
@@ -284,7 +346,17 @@ static void virtio_init_pci(VirtIOPCIProxy *proxy, VirtIODevice *vdev,
 
     config[0x3d] = 1;
 
-    size = 20 + vdev->config_len;
+    if (vdev->nvectors && !msix_init(&proxy->pci_dev, vdev->nvectors, 1, 0)) {
+        pci_register_bar(&proxy->pci_dev, 1,
+                         msix_bar_size(&proxy->pci_dev),
+                         PCI_ADDRESS_SPACE_MEM,
+                         msix_mmio_map);
+        proxy->pci_dev.config_write = virtio_write_config;
+        proxy->pci_dev.unregister = msix_uninit;
+    } else
+        vdev->nvectors = 0;
+
+    size = VIRTIO_PCI_REGION_SIZE(&proxy->pci_dev) + vdev->config_len;
     if (size & (size-1))
         size = 1 << qemu_fls(size);
 
diff --git a/rules.mak b/rules.mak
index 8d6d96e..defee1d 100644
--- a/rules.mak
+++ b/rules.mak
@@ -1,6 +1,6 @@
 
 %.o: %.c
-	$(call quiet-command,$(CC) $(CPPFLAGS) $(CFLAGS) -c -o $@ $<,"  CC    $(TARGET_DIR)$@")
+	$(call quiet-command,$(CC) $(CPPFLAGS) $(CFLAGS) -Werror -Wno-error=uninitialized -c -o $@ $<,"  CC    $(TARGET_DIR)$@")
 
 %.o: %.S
 	$(call quiet-command,$(CC) $(CPPFLAGS) -c -o $@ $<,"  AS    $(TARGET_DIR)$@")
-- 
1.6.2.2

^ permalink raw reply related	[flat|nested] 42+ messages in thread

* [PATCHv6 09/12] qemu/virtio: virtio save/load bindings
       [not found] <cover.1245594586.git.mst@redhat.com>
@ 2009-06-21 16:50   ` Michael S. Tsirkin
  2009-06-21 16:45   ` [Qemu-devel] " Michael S. Tsirkin
                     ` (22 subsequent siblings)
  23 siblings, 0 replies; 42+ messages in thread
From: Michael S. Tsirkin @ 2009-06-21 16:50 UTC (permalink / raw)
  To: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm, Rusty Russell, vi

Implement bindings for virtio save/load. Use them in virtio pci.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/virtio-pci.c |   46 +++++++++++++++++++++++++++++++++++++++++++++-
 hw/virtio.c     |   33 ++++++++++++++++-----------------
 hw/virtio.h     |    4 ++++
 3 files changed, 65 insertions(+), 18 deletions(-)

diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c
index d3f4884..4db0b88 100644
--- a/hw/virtio-pci.c
+++ b/hw/virtio-pci.c
@@ -105,6 +105,46 @@ static void virtio_pci_notify(void *opaque, uint16_t vector)
         qemu_set_irq(proxy->pci_dev.irq[0], proxy->vdev->isr & 1);
 }
 
+static void virtio_pci_save_config(void * opaque, QEMUFile *f)
+{
+    VirtIOPCIProxy *proxy = opaque;
+    pci_device_save(&proxy->pci_dev, f);
+    msix_save(&proxy->pci_dev, f);
+    if (msix_present(&proxy->pci_dev))
+        qemu_put_be16(f, proxy->vdev->config_vector);
+}
+
+static void virtio_pci_save_queue(void * opaque, int n, QEMUFile *f)
+{
+    VirtIOPCIProxy *proxy = opaque;
+    if (msix_present(&proxy->pci_dev))
+        qemu_put_be16(f, virtio_queue_vector(proxy->vdev, n));
+}
+
+static int virtio_pci_load_config(void * opaque, QEMUFile *f)
+{
+    VirtIOPCIProxy *proxy = opaque;
+    int ret;
+    ret = pci_device_load(&proxy->pci_dev, f);
+    if (ret)
+        return ret;
+    msix_load(&proxy->pci_dev, f);
+    if (msix_present(&proxy->pci_dev))
+        qemu_get_be16s(f, &proxy->vdev->config_vector);
+    return 0;
+}
+
+static int virtio_pci_load_queue(void * opaque, int n, QEMUFile *f)
+{
+    VirtIOPCIProxy *proxy = opaque;
+    uint16_t vector;
+    if (!msix_present(&proxy->pci_dev))
+        return 0;
+    qemu_get_be16s(f, &vector);
+    virtio_queue_set_vector(proxy->vdev, n, vector);
+    return 0;
+}
+
 static void virtio_pci_reset(void *opaque)
 {
     VirtIOPCIProxy *proxy = opaque;
@@ -317,7 +357,11 @@ static void virtio_write_config(PCIDevice *pci_dev, uint32_t address,
 }
 
 static const VirtIOBindings virtio_pci_bindings = {
-    .notify = virtio_pci_notify
+    .notify = virtio_pci_notify,
+    .save_config = virtio_pci_save_config,
+    .load_config = virtio_pci_load_config,
+    .save_queue = virtio_pci_save_queue,
+    .load_queue = virtio_pci_load_queue,
 };
 
 static void virtio_init_pci(VirtIOPCIProxy *proxy, VirtIODevice *vdev,
diff --git a/hw/virtio.c b/hw/virtio.c
index fe9f793..b773dff 100644
--- a/hw/virtio.c
+++ b/hw/virtio.c
@@ -568,9 +568,8 @@ void virtio_save(VirtIODevice *vdev, QEMUFile *f)
 {
     int i;
 
-    /* FIXME: load/save binding.  */
-    //pci_device_save(&vdev->pci_dev, f);
-    //msix_save(&vdev->pci_dev, f);
+    if (vdev->binding->save_config)
+        vdev->binding->save_config(vdev->binding_opaque, f);
 
     qemu_put_8s(f, &vdev->status);
     qemu_put_8s(f, &vdev->isr);
@@ -596,18 +595,20 @@ void virtio_save(VirtIODevice *vdev, QEMUFile *f)
         qemu_put_be32(f, vdev->vq[i].vring.num);
         qemu_put_be64(f, vdev->vq[i].pa);
         qemu_put_be16s(f, &vdev->vq[i].last_avail_idx);
-        if (vdev->nvectors)
-            qemu_put_be16s(f, &vdev->vq[i].vector);
+        if (vdev->binding->save_queue)
+            vdev->binding->save_queue(vdev->binding_opaque, i, f);
     }
 }
 
-void virtio_load(VirtIODevice *vdev, QEMUFile *f)
+int virtio_load(VirtIODevice *vdev, QEMUFile *f)
 {
-    int num, i;
+    int num, i, ret;
 
-    /* FIXME: load/save binding.  */
-    //pci_device_load(&vdev->pci_dev, f);
-    //r = msix_load(&vdev->pci_dev, f);
+    if (vdev->binding->load_config) {
+        ret = vdev->binding->load_config(vdev->binding_opaque, f);
+        if (ret)
+            return ret;
+    }
 
     qemu_get_8s(f, &vdev->status);
     qemu_get_8s(f, &vdev->isr);
@@ -616,10 +617,6 @@ void virtio_load(VirtIODevice *vdev, QEMUFile *f)
     vdev->config_len = qemu_get_be32(f);
     qemu_get_buffer(f, vdev->config, vdev->config_len);
 
-    if (vdev->nvectors) {
-        qemu_get_be16s(f, &vdev->config_vector);
-        //msix_vector_use(&vdev->pci_dev, vdev->config_vector);
-    }
     num = qemu_get_be32(f);
 
     for (i = 0; i < num; i++) {
@@ -630,13 +627,15 @@ void virtio_load(VirtIODevice *vdev, QEMUFile *f)
         if (vdev->vq[i].pa) {
             virtqueue_init(&vdev->vq[i]);
         }
-        if (vdev->nvectors) {
-            qemu_get_be16s(f, &vdev->vq[i].vector);
-            //msix_vector_use(&vdev->pci_dev, vdev->config_vector);
+        if (vdev->binding->load_queue) {
+            ret = vdev->binding->load_queue(vdev->binding_opaque, i, f);
+            if (ret)
+                return ret;
         }
     }
 
     virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
+    return 0;
 }
 
 void virtio_cleanup(VirtIODevice *vdev)
diff --git a/hw/virtio.h b/hw/virtio.h
index 04a3c3d..ce05517 100644
--- a/hw/virtio.h
+++ b/hw/virtio.h
@@ -72,6 +72,10 @@ typedef struct VirtQueueElement
 
 typedef struct {
     void (*notify)(void * opaque, uint16_t vector);
+    void (*save_config)(void * opaque, QEMUFile *f);
+    void (*save_queue)(void * opaque, int n, QEMUFile *f);
+    int (*load_config)(void * opaque, QEMUFile *f);
+    int (*load_queue)(void * opaque, int n, QEMUFile *f);
 } VirtIOBindings;
 
 #define VIRTIO_PCI_QUEUE_MAX 16
-- 
1.6.2.2


^ permalink raw reply related	[flat|nested] 42+ messages in thread

* [Qemu-devel] [PATCHv6 09/12] qemu/virtio: virtio save/load bindings
@ 2009-06-21 16:50   ` Michael S. Tsirkin
  0 siblings, 0 replies; 42+ messages in thread
From: Michael S. Tsirkin @ 2009-06-21 16:50 UTC (permalink / raw)
  To: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm,
	Rusty Russell, virtualization, Christian Borntraeger, Blue Swirl,
	Anthony Liguori, Glauber Costa

Implement bindings for virtio save/load. Use them in virtio pci.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/virtio-pci.c |   46 +++++++++++++++++++++++++++++++++++++++++++++-
 hw/virtio.c     |   33 ++++++++++++++++-----------------
 hw/virtio.h     |    4 ++++
 3 files changed, 65 insertions(+), 18 deletions(-)

diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c
index d3f4884..4db0b88 100644
--- a/hw/virtio-pci.c
+++ b/hw/virtio-pci.c
@@ -105,6 +105,46 @@ static void virtio_pci_notify(void *opaque, uint16_t vector)
         qemu_set_irq(proxy->pci_dev.irq[0], proxy->vdev->isr & 1);
 }
 
+static void virtio_pci_save_config(void * opaque, QEMUFile *f)
+{
+    VirtIOPCIProxy *proxy = opaque;
+    pci_device_save(&proxy->pci_dev, f);
+    msix_save(&proxy->pci_dev, f);
+    if (msix_present(&proxy->pci_dev))
+        qemu_put_be16(f, proxy->vdev->config_vector);
+}
+
+static void virtio_pci_save_queue(void * opaque, int n, QEMUFile *f)
+{
+    VirtIOPCIProxy *proxy = opaque;
+    if (msix_present(&proxy->pci_dev))
+        qemu_put_be16(f, virtio_queue_vector(proxy->vdev, n));
+}
+
+static int virtio_pci_load_config(void * opaque, QEMUFile *f)
+{
+    VirtIOPCIProxy *proxy = opaque;
+    int ret;
+    ret = pci_device_load(&proxy->pci_dev, f);
+    if (ret)
+        return ret;
+    msix_load(&proxy->pci_dev, f);
+    if (msix_present(&proxy->pci_dev))
+        qemu_get_be16s(f, &proxy->vdev->config_vector);
+    return 0;
+}
+
+static int virtio_pci_load_queue(void * opaque, int n, QEMUFile *f)
+{
+    VirtIOPCIProxy *proxy = opaque;
+    uint16_t vector;
+    if (!msix_present(&proxy->pci_dev))
+        return 0;
+    qemu_get_be16s(f, &vector);
+    virtio_queue_set_vector(proxy->vdev, n, vector);
+    return 0;
+}
+
 static void virtio_pci_reset(void *opaque)
 {
     VirtIOPCIProxy *proxy = opaque;
@@ -317,7 +357,11 @@ static void virtio_write_config(PCIDevice *pci_dev, uint32_t address,
 }
 
 static const VirtIOBindings virtio_pci_bindings = {
-    .notify = virtio_pci_notify
+    .notify = virtio_pci_notify,
+    .save_config = virtio_pci_save_config,
+    .load_config = virtio_pci_load_config,
+    .save_queue = virtio_pci_save_queue,
+    .load_queue = virtio_pci_load_queue,
 };
 
 static void virtio_init_pci(VirtIOPCIProxy *proxy, VirtIODevice *vdev,
diff --git a/hw/virtio.c b/hw/virtio.c
index fe9f793..b773dff 100644
--- a/hw/virtio.c
+++ b/hw/virtio.c
@@ -568,9 +568,8 @@ void virtio_save(VirtIODevice *vdev, QEMUFile *f)
 {
     int i;
 
-    /* FIXME: load/save binding.  */
-    //pci_device_save(&vdev->pci_dev, f);
-    //msix_save(&vdev->pci_dev, f);
+    if (vdev->binding->save_config)
+        vdev->binding->save_config(vdev->binding_opaque, f);
 
     qemu_put_8s(f, &vdev->status);
     qemu_put_8s(f, &vdev->isr);
@@ -596,18 +595,20 @@ void virtio_save(VirtIODevice *vdev, QEMUFile *f)
         qemu_put_be32(f, vdev->vq[i].vring.num);
         qemu_put_be64(f, vdev->vq[i].pa);
         qemu_put_be16s(f, &vdev->vq[i].last_avail_idx);
-        if (vdev->nvectors)
-            qemu_put_be16s(f, &vdev->vq[i].vector);
+        if (vdev->binding->save_queue)
+            vdev->binding->save_queue(vdev->binding_opaque, i, f);
     }
 }
 
-void virtio_load(VirtIODevice *vdev, QEMUFile *f)
+int virtio_load(VirtIODevice *vdev, QEMUFile *f)
 {
-    int num, i;
+    int num, i, ret;
 
-    /* FIXME: load/save binding.  */
-    //pci_device_load(&vdev->pci_dev, f);
-    //r = msix_load(&vdev->pci_dev, f);
+    if (vdev->binding->load_config) {
+        ret = vdev->binding->load_config(vdev->binding_opaque, f);
+        if (ret)
+            return ret;
+    }
 
     qemu_get_8s(f, &vdev->status);
     qemu_get_8s(f, &vdev->isr);
@@ -616,10 +617,6 @@ void virtio_load(VirtIODevice *vdev, QEMUFile *f)
     vdev->config_len = qemu_get_be32(f);
     qemu_get_buffer(f, vdev->config, vdev->config_len);
 
-    if (vdev->nvectors) {
-        qemu_get_be16s(f, &vdev->config_vector);
-        //msix_vector_use(&vdev->pci_dev, vdev->config_vector);
-    }
     num = qemu_get_be32(f);
 
     for (i = 0; i < num; i++) {
@@ -630,13 +627,15 @@ void virtio_load(VirtIODevice *vdev, QEMUFile *f)
         if (vdev->vq[i].pa) {
             virtqueue_init(&vdev->vq[i]);
         }
-        if (vdev->nvectors) {
-            qemu_get_be16s(f, &vdev->vq[i].vector);
-            //msix_vector_use(&vdev->pci_dev, vdev->config_vector);
+        if (vdev->binding->load_queue) {
+            ret = vdev->binding->load_queue(vdev->binding_opaque, i, f);
+            if (ret)
+                return ret;
         }
     }
 
     virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
+    return 0;
 }
 
 void virtio_cleanup(VirtIODevice *vdev)
diff --git a/hw/virtio.h b/hw/virtio.h
index 04a3c3d..ce05517 100644
--- a/hw/virtio.h
+++ b/hw/virtio.h
@@ -72,6 +72,10 @@ typedef struct VirtQueueElement
 
 typedef struct {
     void (*notify)(void * opaque, uint16_t vector);
+    void (*save_config)(void * opaque, QEMUFile *f);
+    void (*save_queue)(void * opaque, int n, QEMUFile *f);
+    int (*load_config)(void * opaque, QEMUFile *f);
+    int (*load_queue)(void * opaque, int n, QEMUFile *f);
 } VirtIOBindings;
 
 #define VIRTIO_PCI_QUEUE_MAX 16
-- 
1.6.2.2

^ permalink raw reply related	[flat|nested] 42+ messages in thread

* [PATCHv6 09/12] qemu/virtio: virtio save/load bindings
       [not found] <cover.1245594586.git.mst@redhat.com>
                   ` (15 preceding siblings ...)
  2009-06-21 16:50   ` [Qemu-devel] " Michael S. Tsirkin
@ 2009-06-21 16:50 ` Michael S. Tsirkin
  2009-06-21 16:50   ` [Qemu-devel] " Michael S. Tsirkin
                   ` (6 subsequent siblings)
  23 siblings, 0 replies; 42+ messages in thread
From: Michael S. Tsirkin @ 2009-06-21 16:50 UTC (permalink / raw)
  To: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm, Rusty Russell

Implement bindings for virtio save/load. Use them in virtio pci.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/virtio-pci.c |   46 +++++++++++++++++++++++++++++++++++++++++++++-
 hw/virtio.c     |   33 ++++++++++++++++-----------------
 hw/virtio.h     |    4 ++++
 3 files changed, 65 insertions(+), 18 deletions(-)

diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c
index d3f4884..4db0b88 100644
--- a/hw/virtio-pci.c
+++ b/hw/virtio-pci.c
@@ -105,6 +105,46 @@ static void virtio_pci_notify(void *opaque, uint16_t vector)
         qemu_set_irq(proxy->pci_dev.irq[0], proxy->vdev->isr & 1);
 }
 
+static void virtio_pci_save_config(void * opaque, QEMUFile *f)
+{
+    VirtIOPCIProxy *proxy = opaque;
+    pci_device_save(&proxy->pci_dev, f);
+    msix_save(&proxy->pci_dev, f);
+    if (msix_present(&proxy->pci_dev))
+        qemu_put_be16(f, proxy->vdev->config_vector);
+}
+
+static void virtio_pci_save_queue(void * opaque, int n, QEMUFile *f)
+{
+    VirtIOPCIProxy *proxy = opaque;
+    if (msix_present(&proxy->pci_dev))
+        qemu_put_be16(f, virtio_queue_vector(proxy->vdev, n));
+}
+
+static int virtio_pci_load_config(void * opaque, QEMUFile *f)
+{
+    VirtIOPCIProxy *proxy = opaque;
+    int ret;
+    ret = pci_device_load(&proxy->pci_dev, f);
+    if (ret)
+        return ret;
+    msix_load(&proxy->pci_dev, f);
+    if (msix_present(&proxy->pci_dev))
+        qemu_get_be16s(f, &proxy->vdev->config_vector);
+    return 0;
+}
+
+static int virtio_pci_load_queue(void * opaque, int n, QEMUFile *f)
+{
+    VirtIOPCIProxy *proxy = opaque;
+    uint16_t vector;
+    if (!msix_present(&proxy->pci_dev))
+        return 0;
+    qemu_get_be16s(f, &vector);
+    virtio_queue_set_vector(proxy->vdev, n, vector);
+    return 0;
+}
+
 static void virtio_pci_reset(void *opaque)
 {
     VirtIOPCIProxy *proxy = opaque;
@@ -317,7 +357,11 @@ static void virtio_write_config(PCIDevice *pci_dev, uint32_t address,
 }
 
 static const VirtIOBindings virtio_pci_bindings = {
-    .notify = virtio_pci_notify
+    .notify = virtio_pci_notify,
+    .save_config = virtio_pci_save_config,
+    .load_config = virtio_pci_load_config,
+    .save_queue = virtio_pci_save_queue,
+    .load_queue = virtio_pci_load_queue,
 };
 
 static void virtio_init_pci(VirtIOPCIProxy *proxy, VirtIODevice *vdev,
diff --git a/hw/virtio.c b/hw/virtio.c
index fe9f793..b773dff 100644
--- a/hw/virtio.c
+++ b/hw/virtio.c
@@ -568,9 +568,8 @@ void virtio_save(VirtIODevice *vdev, QEMUFile *f)
 {
     int i;
 
-    /* FIXME: load/save binding.  */
-    //pci_device_save(&vdev->pci_dev, f);
-    //msix_save(&vdev->pci_dev, f);
+    if (vdev->binding->save_config)
+        vdev->binding->save_config(vdev->binding_opaque, f);
 
     qemu_put_8s(f, &vdev->status);
     qemu_put_8s(f, &vdev->isr);
@@ -596,18 +595,20 @@ void virtio_save(VirtIODevice *vdev, QEMUFile *f)
         qemu_put_be32(f, vdev->vq[i].vring.num);
         qemu_put_be64(f, vdev->vq[i].pa);
         qemu_put_be16s(f, &vdev->vq[i].last_avail_idx);
-        if (vdev->nvectors)
-            qemu_put_be16s(f, &vdev->vq[i].vector);
+        if (vdev->binding->save_queue)
+            vdev->binding->save_queue(vdev->binding_opaque, i, f);
     }
 }
 
-void virtio_load(VirtIODevice *vdev, QEMUFile *f)
+int virtio_load(VirtIODevice *vdev, QEMUFile *f)
 {
-    int num, i;
+    int num, i, ret;
 
-    /* FIXME: load/save binding.  */
-    //pci_device_load(&vdev->pci_dev, f);
-    //r = msix_load(&vdev->pci_dev, f);
+    if (vdev->binding->load_config) {
+        ret = vdev->binding->load_config(vdev->binding_opaque, f);
+        if (ret)
+            return ret;
+    }
 
     qemu_get_8s(f, &vdev->status);
     qemu_get_8s(f, &vdev->isr);
@@ -616,10 +617,6 @@ void virtio_load(VirtIODevice *vdev, QEMUFile *f)
     vdev->config_len = qemu_get_be32(f);
     qemu_get_buffer(f, vdev->config, vdev->config_len);
 
-    if (vdev->nvectors) {
-        qemu_get_be16s(f, &vdev->config_vector);
-        //msix_vector_use(&vdev->pci_dev, vdev->config_vector);
-    }
     num = qemu_get_be32(f);
 
     for (i = 0; i < num; i++) {
@@ -630,13 +627,15 @@ void virtio_load(VirtIODevice *vdev, QEMUFile *f)
         if (vdev->vq[i].pa) {
             virtqueue_init(&vdev->vq[i]);
         }
-        if (vdev->nvectors) {
-            qemu_get_be16s(f, &vdev->vq[i].vector);
-            //msix_vector_use(&vdev->pci_dev, vdev->config_vector);
+        if (vdev->binding->load_queue) {
+            ret = vdev->binding->load_queue(vdev->binding_opaque, i, f);
+            if (ret)
+                return ret;
         }
     }
 
     virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
+    return 0;
 }
 
 void virtio_cleanup(VirtIODevice *vdev)
diff --git a/hw/virtio.h b/hw/virtio.h
index 04a3c3d..ce05517 100644
--- a/hw/virtio.h
+++ b/hw/virtio.h
@@ -72,6 +72,10 @@ typedef struct VirtQueueElement
 
 typedef struct {
     void (*notify)(void * opaque, uint16_t vector);
+    void (*save_config)(void * opaque, QEMUFile *f);
+    void (*save_queue)(void * opaque, int n, QEMUFile *f);
+    int (*load_config)(void * opaque, QEMUFile *f);
+    int (*load_queue)(void * opaque, int n, QEMUFile *f);
 } VirtIOBindings;
 
 #define VIRTIO_PCI_QUEUE_MAX 16
-- 
1.6.2.2

^ permalink raw reply related	[flat|nested] 42+ messages in thread

* [PATCHv6 10/12] qemu/pci: add pci_get/set_byte
       [not found] <cover.1245594586.git.mst@redhat.com>
@ 2009-06-21 16:50   ` Michael S. Tsirkin
  2009-06-21 16:45   ` [Qemu-devel] " Michael S. Tsirkin
                     ` (22 subsequent siblings)
  23 siblings, 0 replies; 42+ messages in thread
From: Michael S. Tsirkin @ 2009-06-21 16:50 UTC (permalink / raw)
  To: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm, Rusty Russell, vi

Add pci_get/set_byte to keep *_word and *_long access functions company.
They are unused for now.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/pci.h |   12 ++++++++++++
 1 files changed, 12 insertions(+), 0 deletions(-)

diff --git a/hw/pci.h b/hw/pci.h
index 03b0a70..52b66b3 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -260,6 +260,18 @@ PCIBus *pci_bridge_init(PCIBus *bus, int devfn, uint16_t vid, uint16_t did,
                         pci_map_irq_fn map_irq, const char *name);
 
 static inline void
+pci_set_byte(uint8_t *config, uint8_t val)
+{
+    *config = val;
+}
+
+static inline uint8_t
+pci_get_byte(uint8_t *config)
+{
+    return *config;
+}
+
+static inline void
 pci_set_word(uint8_t *config, uint16_t val)
 {
     cpu_to_le16wu((uint16_t *)config, val);
-- 
1.6.2.2


^ permalink raw reply related	[flat|nested] 42+ messages in thread

* [Qemu-devel] [PATCHv6 10/12] qemu/pci: add pci_get/set_byte
@ 2009-06-21 16:50   ` Michael S. Tsirkin
  0 siblings, 0 replies; 42+ messages in thread
From: Michael S. Tsirkin @ 2009-06-21 16:50 UTC (permalink / raw)
  To: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm,
	Rusty Russell, virtualization, Christian Borntraeger, Blue Swirl,
	Anthony Liguori, Glauber Costa

Add pci_get/set_byte to keep *_word and *_long access functions company.
They are unused for now.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/pci.h |   12 ++++++++++++
 1 files changed, 12 insertions(+), 0 deletions(-)

diff --git a/hw/pci.h b/hw/pci.h
index 03b0a70..52b66b3 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -260,6 +260,18 @@ PCIBus *pci_bridge_init(PCIBus *bus, int devfn, uint16_t vid, uint16_t did,
                         pci_map_irq_fn map_irq, const char *name);
 
 static inline void
+pci_set_byte(uint8_t *config, uint8_t val)
+{
+    *config = val;
+}
+
+static inline uint8_t
+pci_get_byte(uint8_t *config)
+{
+    return *config;
+}
+
+static inline void
 pci_set_word(uint8_t *config, uint16_t val)
 {
     cpu_to_le16wu((uint16_t *)config, val);
-- 
1.6.2.2

^ permalink raw reply related	[flat|nested] 42+ messages in thread

* [PATCHv6 10/12] qemu/pci: add pci_get/set_byte
       [not found] <cover.1245594586.git.mst@redhat.com>
                   ` (18 preceding siblings ...)
  2009-06-21 16:50   ` [Qemu-devel] " Michael S. Tsirkin
@ 2009-06-21 16:50 ` Michael S. Tsirkin
  2009-06-21 16:51   ` [Qemu-devel] " Michael S. Tsirkin
                   ` (3 subsequent siblings)
  23 siblings, 0 replies; 42+ messages in thread
From: Michael S. Tsirkin @ 2009-06-21 16:50 UTC (permalink / raw)
  To: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm, Rusty Russell

Add pci_get/set_byte to keep *_word and *_long access functions company.
They are unused for now.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/pci.h |   12 ++++++++++++
 1 files changed, 12 insertions(+), 0 deletions(-)

diff --git a/hw/pci.h b/hw/pci.h
index 03b0a70..52b66b3 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -260,6 +260,18 @@ PCIBus *pci_bridge_init(PCIBus *bus, int devfn, uint16_t vid, uint16_t did,
                         pci_map_irq_fn map_irq, const char *name);
 
 static inline void
+pci_set_byte(uint8_t *config, uint8_t val)
+{
+    *config = val;
+}
+
+static inline uint8_t
+pci_get_byte(uint8_t *config)
+{
+    return *config;
+}
+
+static inline void
 pci_set_word(uint8_t *config, uint16_t val)
 {
     cpu_to_le16wu((uint16_t *)config, val);
-- 
1.6.2.2

^ permalink raw reply related	[flat|nested] 42+ messages in thread

* [PATCHv6 11/12] qemu/net: request 3 vectors in virtio-net
       [not found] <cover.1245594586.git.mst@redhat.com>
@ 2009-06-21 16:51   ` Michael S. Tsirkin
  2009-06-21 16:45   ` [Qemu-devel] " Michael S. Tsirkin
                     ` (22 subsequent siblings)
  23 siblings, 0 replies; 42+ messages in thread
From: Michael S. Tsirkin @ 2009-06-21 16:51 UTC (permalink / raw)
  To: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm, Rusty Russell, vi

Request up to 3 vectors in virtio-net. Actual bindings might supply
less.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/virtio-net.c |    1 +
 1 files changed, 1 insertions(+), 0 deletions(-)

diff --git a/hw/virtio-net.c b/hw/virtio-net.c
index d584287..c8dabbe 100644
--- a/hw/virtio-net.c
+++ b/hw/virtio-net.c
@@ -709,6 +709,7 @@ VirtIODevice *virtio_net_init(DeviceState *dev)
     n->mac_table.macs = qemu_mallocz(MAC_TABLE_ENTRIES * ETH_ALEN);
 
     n->vlans = qemu_mallocz(MAX_VLAN >> 3);
+    n->vdev.nvectors = 3;
 
     register_savevm("virtio-net", virtio_net_id++, VIRTIO_NET_VM_VERSION,
                     virtio_net_save, virtio_net_load, n);
-- 
1.6.2.2


^ permalink raw reply related	[flat|nested] 42+ messages in thread

* [Qemu-devel] [PATCHv6 11/12] qemu/net: request 3 vectors in virtio-net
@ 2009-06-21 16:51   ` Michael S. Tsirkin
  0 siblings, 0 replies; 42+ messages in thread
From: Michael S. Tsirkin @ 2009-06-21 16:51 UTC (permalink / raw)
  To: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm,
	Rusty Russell, virtualization, Christian Borntraeger, Blue Swirl,
	Anthony Liguori, Glauber Costa

Request up to 3 vectors in virtio-net. Actual bindings might supply
less.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/virtio-net.c |    1 +
 1 files changed, 1 insertions(+), 0 deletions(-)

diff --git a/hw/virtio-net.c b/hw/virtio-net.c
index d584287..c8dabbe 100644
--- a/hw/virtio-net.c
+++ b/hw/virtio-net.c
@@ -709,6 +709,7 @@ VirtIODevice *virtio_net_init(DeviceState *dev)
     n->mac_table.macs = qemu_mallocz(MAC_TABLE_ENTRIES * ETH_ALEN);
 
     n->vlans = qemu_mallocz(MAX_VLAN >> 3);
+    n->vdev.nvectors = 3;
 
     register_savevm("virtio-net", virtio_net_id++, VIRTIO_NET_VM_VERSION,
                     virtio_net_save, virtio_net_load, n);
-- 
1.6.2.2

^ permalink raw reply related	[flat|nested] 42+ messages in thread

* [PATCHv6 11/12] qemu/net: request 3 vectors in virtio-net
       [not found] <cover.1245594586.git.mst@redhat.com>
                   ` (20 preceding siblings ...)
  2009-06-21 16:51   ` [Qemu-devel] " Michael S. Tsirkin
@ 2009-06-21 16:51 ` Michael S. Tsirkin
  2009-06-21 16:51 ` [PATCHv6 12/12] qemu/net: flag to control the number of vectors a nic has Michael S. Tsirkin
  2009-06-21 16:51   ` [Qemu-devel] " Michael S. Tsirkin
  23 siblings, 0 replies; 42+ messages in thread
From: Michael S. Tsirkin @ 2009-06-21 16:51 UTC (permalink / raw)
  To: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm, Rusty Russell

Request up to 3 vectors in virtio-net. Actual bindings might supply
less.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/virtio-net.c |    1 +
 1 files changed, 1 insertions(+), 0 deletions(-)

diff --git a/hw/virtio-net.c b/hw/virtio-net.c
index d584287..c8dabbe 100644
--- a/hw/virtio-net.c
+++ b/hw/virtio-net.c
@@ -709,6 +709,7 @@ VirtIODevice *virtio_net_init(DeviceState *dev)
     n->mac_table.macs = qemu_mallocz(MAC_TABLE_ENTRIES * ETH_ALEN);
 
     n->vlans = qemu_mallocz(MAX_VLAN >> 3);
+    n->vdev.nvectors = 3;
 
     register_savevm("virtio-net", virtio_net_id++, VIRTIO_NET_VM_VERSION,
                     virtio_net_save, virtio_net_load, n);
-- 
1.6.2.2

^ permalink raw reply related	[flat|nested] 42+ messages in thread

* [PATCHv6 12/12] qemu/net: flag to control the number of vectors a nic has
       [not found] <cover.1245594586.git.mst@redhat.com>
@ 2009-06-21 16:51   ` Michael S. Tsirkin
  2009-06-21 16:45   ` [Qemu-devel] " Michael S. Tsirkin
                     ` (22 subsequent siblings)
  23 siblings, 0 replies; 42+ messages in thread
From: Michael S. Tsirkin @ 2009-06-21 16:51 UTC (permalink / raw)
  To: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm, Rusty Russell, vi

Add an option to specify the number of MSI-X vectors for PCI NIC cards. This
can also be used to disable MSI-X, for compatibility with old qemu. This
option currently only affects virtio cards.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/virtio-net.c |    5 ++++-
 net.c           |   18 +++++++++++++++++-
 net.h           |    4 ++++
 qemu-options.hx |   14 ++++++++------
 4 files changed, 33 insertions(+), 8 deletions(-)

diff --git a/hw/virtio-net.c b/hw/virtio-net.c
index c8dabbe..4a82348 100644
--- a/hw/virtio-net.c
+++ b/hw/virtio-net.c
@@ -709,7 +709,10 @@ VirtIODevice *virtio_net_init(DeviceState *dev)
     n->mac_table.macs = qemu_mallocz(MAC_TABLE_ENTRIES * ETH_ALEN);
 
     n->vlans = qemu_mallocz(MAX_VLAN >> 3);
-    n->vdev.nvectors = 3;
+    if (dev->nd->nvectors == NIC_NVECTORS_UNSPECIFIED)
+        n->vdev.nvectors = 3;
+    else
+        n->vdev.nvectors = dev->nd->nvectors;
 
     register_savevm("virtio-net", virtio_net_id++, VIRTIO_NET_VM_VERSION,
                     virtio_net_save, virtio_net_load, n);
diff --git a/net.c b/net.c
index af9de73..1a71b75 100644
--- a/net.c
+++ b/net.c
@@ -2103,7 +2103,7 @@ int net_client_init(Monitor *mon, const char *device, const char *p)
     }
     if (!strcmp(device, "nic")) {
         static const char * const nic_params[] = {
-            "vlan", "name", "macaddr", "model", NULL
+            "vlan", "name", "macaddr", "model", "vectors", NULL
         };
         NICInfo *nd;
         uint8_t *macaddr;
@@ -2138,6 +2138,22 @@ int net_client_init(Monitor *mon, const char *device, const char *p)
         if (get_param_value(buf, sizeof(buf), "model", p)) {
             nd->model = strdup(buf);
         }
+        nd->nvectors = NIC_NVECTORS_UNSPECIFIED;
+        if (get_param_value(buf, sizeof(buf), "vectors", p)) {
+            char *endptr;
+            long vectors = strtol(buf, &endptr, 0);
+            if (*endptr) {
+                config_error(mon, "invalid syntax for # of vectors\n");
+                ret = -1;
+                goto out;
+            }
+            if (vectors < 0 || vectors > 0x7ffffff) {
+                config_error(mon, "invalid # of vectors\n");
+                ret = -1;
+                goto out;
+            }
+            nd->nvectors = vectors;
+        }
         nd->vlan = vlan;
         nd->name = name;
         nd->used = 1;
diff --git a/net.h b/net.h
index 89e7706..f8b9eac 100644
--- a/net.h
+++ b/net.h
@@ -83,6 +83,9 @@ int do_set_link(Monitor *mon, const char *name, const char *up_or_down);
 /* NIC info */
 
 #define MAX_NICS 8
+enum {
+	NIC_NVECTORS_UNSPECIFIED = -1
+};
 
 struct NICInfo {
     uint8_t macaddr[6];
@@ -91,6 +94,7 @@ struct NICInfo {
     VLANState *vlan;
     void *private;
     int used;
+    int nvectors;
 };
 
 extern int nb_nics;
diff --git a/qemu-options.hx b/qemu-options.hx
index 9d5e05a..a8a1db4 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -733,7 +733,7 @@ STEXI
 ETEXI
 
 DEF("net", HAS_ARG, QEMU_OPTION_net,
-    "-net nic[,vlan=n][,macaddr=addr][,model=type][,name=str]\n"
+    "-net nic[,vlan=n][,macaddr=addr][,model=type][,name=str][,vectors=v]\n"
     "                create a new Network Interface Card and connect it to VLAN 'n'\n"
 #ifdef CONFIG_SLIRP
     "-net user[,vlan=n][,name=str][,hostname=host]\n"
@@ -767,15 +767,17 @@ DEF("net", HAS_ARG, QEMU_OPTION_net,
     "-net none       use it alone to have zero network devices; if no -net option\n"
     "                is provided, the default is '-net nic -net user'\n")
 STEXI
-@item -net nic[,vlan=@var{n}][,macaddr=@var{addr}][,model=@var{type}][,name=@var{name}]
+@item -net nic[,vlan=@var{n}][,macaddr=@var{addr}][,model=@var{type}][,name=@var{name}][,vectors=@var{v}]
 Create a new Network Interface Card and connect it to VLAN @var{n} (@var{n}
 = 0 is the default). The NIC is an ne2k_pci by default on the PC
 target. Optionally, the MAC address can be changed to @var{addr}
-and a @var{name} can be assigned for use in monitor commands. If no
-@option{-net} option is specified, a single NIC is created.
-Qemu can emulate several different models of network card.
+and a @var{name} can be assigned for use in monitor commands.
+Optionally, for PCI cards, you can specify the number @var{v} of MSI-X vectors
+that the card should have; this option currently only affects virtio cards; set
+@var{v} = 0 to disable MSI-X. If no @option{-net} option is specified, a single
+NIC is created. Qemu can emulate several different models of network card.
 Valid values for @var{type} are
-@code{i82551}, @code{i82557b}, @code{i82559er},
+@code{virtio}, @code{i82551}, @code{i82557b}, @code{i82559er},
 @code{ne2k_pci}, @code{ne2k_isa}, @code{pcnet}, @code{rtl8139},
 @code{e1000}, @code{smc91c111}, @code{lance} and @code{mcf_fec}.
 Not all devices are supported on all targets.  Use -net nic,model=?
-- 
1.6.2.2

^ permalink raw reply related	[flat|nested] 42+ messages in thread

* [Qemu-devel] [PATCHv6 12/12] qemu/net: flag to control the number of vectors a nic has
@ 2009-06-21 16:51   ` Michael S. Tsirkin
  0 siblings, 0 replies; 42+ messages in thread
From: Michael S. Tsirkin @ 2009-06-21 16:51 UTC (permalink / raw)
  To: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm,
	Rusty Russell, virtualization, Christian Borntraeger, Blue Swirl,
	Anthony Liguori, Glauber Costa

Add an option to specify the number of MSI-X vectors for PCI NIC cards. This
can also be used to disable MSI-X, for compatibility with old qemu. This
option currently only affects virtio cards.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/virtio-net.c |    5 ++++-
 net.c           |   18 +++++++++++++++++-
 net.h           |    4 ++++
 qemu-options.hx |   14 ++++++++------
 4 files changed, 33 insertions(+), 8 deletions(-)

diff --git a/hw/virtio-net.c b/hw/virtio-net.c
index c8dabbe..4a82348 100644
--- a/hw/virtio-net.c
+++ b/hw/virtio-net.c
@@ -709,7 +709,10 @@ VirtIODevice *virtio_net_init(DeviceState *dev)
     n->mac_table.macs = qemu_mallocz(MAC_TABLE_ENTRIES * ETH_ALEN);
 
     n->vlans = qemu_mallocz(MAX_VLAN >> 3);
-    n->vdev.nvectors = 3;
+    if (dev->nd->nvectors == NIC_NVECTORS_UNSPECIFIED)
+        n->vdev.nvectors = 3;
+    else
+        n->vdev.nvectors = dev->nd->nvectors;
 
     register_savevm("virtio-net", virtio_net_id++, VIRTIO_NET_VM_VERSION,
                     virtio_net_save, virtio_net_load, n);
diff --git a/net.c b/net.c
index af9de73..1a71b75 100644
--- a/net.c
+++ b/net.c
@@ -2103,7 +2103,7 @@ int net_client_init(Monitor *mon, const char *device, const char *p)
     }
     if (!strcmp(device, "nic")) {
         static const char * const nic_params[] = {
-            "vlan", "name", "macaddr", "model", NULL
+            "vlan", "name", "macaddr", "model", "vectors", NULL
         };
         NICInfo *nd;
         uint8_t *macaddr;
@@ -2138,6 +2138,22 @@ int net_client_init(Monitor *mon, const char *device, const char *p)
         if (get_param_value(buf, sizeof(buf), "model", p)) {
             nd->model = strdup(buf);
         }
+        nd->nvectors = NIC_NVECTORS_UNSPECIFIED;
+        if (get_param_value(buf, sizeof(buf), "vectors", p)) {
+            char *endptr;
+            long vectors = strtol(buf, &endptr, 0);
+            if (*endptr) {
+                config_error(mon, "invalid syntax for # of vectors\n");
+                ret = -1;
+                goto out;
+            }
+            if (vectors < 0 || vectors > 0x7ffffff) {
+                config_error(mon, "invalid # of vectors\n");
+                ret = -1;
+                goto out;
+            }
+            nd->nvectors = vectors;
+        }
         nd->vlan = vlan;
         nd->name = name;
         nd->used = 1;
diff --git a/net.h b/net.h
index 89e7706..f8b9eac 100644
--- a/net.h
+++ b/net.h
@@ -83,6 +83,9 @@ int do_set_link(Monitor *mon, const char *name, const char *up_or_down);
 /* NIC info */
 
 #define MAX_NICS 8
+enum {
+	NIC_NVECTORS_UNSPECIFIED = -1
+};
 
 struct NICInfo {
     uint8_t macaddr[6];
@@ -91,6 +94,7 @@ struct NICInfo {
     VLANState *vlan;
     void *private;
     int used;
+    int nvectors;
 };
 
 extern int nb_nics;
diff --git a/qemu-options.hx b/qemu-options.hx
index 9d5e05a..a8a1db4 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -733,7 +733,7 @@ STEXI
 ETEXI
 
 DEF("net", HAS_ARG, QEMU_OPTION_net,
-    "-net nic[,vlan=n][,macaddr=addr][,model=type][,name=str]\n"
+    "-net nic[,vlan=n][,macaddr=addr][,model=type][,name=str][,vectors=v]\n"
     "                create a new Network Interface Card and connect it to VLAN 'n'\n"
 #ifdef CONFIG_SLIRP
     "-net user[,vlan=n][,name=str][,hostname=host]\n"
@@ -767,15 +767,17 @@ DEF("net", HAS_ARG, QEMU_OPTION_net,
     "-net none       use it alone to have zero network devices; if no -net option\n"
     "                is provided, the default is '-net nic -net user'\n")
 STEXI
-@item -net nic[,vlan=@var{n}][,macaddr=@var{addr}][,model=@var{type}][,name=@var{name}]
+@item -net nic[,vlan=@var{n}][,macaddr=@var{addr}][,model=@var{type}][,name=@var{name}][,vectors=@var{v}]
 Create a new Network Interface Card and connect it to VLAN @var{n} (@var{n}
 = 0 is the default). The NIC is an ne2k_pci by default on the PC
 target. Optionally, the MAC address can be changed to @var{addr}
-and a @var{name} can be assigned for use in monitor commands. If no
-@option{-net} option is specified, a single NIC is created.
-Qemu can emulate several different models of network card.
+and a @var{name} can be assigned for use in monitor commands.
+Optionally, for PCI cards, you can specify the number @var{v} of MSI-X vectors
+that the card should have; this option currently only affects virtio cards; set
+@var{v} = 0 to disable MSI-X. If no @option{-net} option is specified, a single
+NIC is created. Qemu can emulate several different models of network card.
 Valid values for @var{type} are
-@code{i82551}, @code{i82557b}, @code{i82559er},
+@code{virtio}, @code{i82551}, @code{i82557b}, @code{i82559er},
 @code{ne2k_pci}, @code{ne2k_isa}, @code{pcnet}, @code{rtl8139},
 @code{e1000}, @code{smc91c111}, @code{lance} and @code{mcf_fec}.
 Not all devices are supported on all targets.  Use -net nic,model=?
-- 
1.6.2.2

^ permalink raw reply related	[flat|nested] 42+ messages in thread

* [PATCHv6 12/12] qemu/net: flag to control the number of vectors a nic has
       [not found] <cover.1245594586.git.mst@redhat.com>
                   ` (21 preceding siblings ...)
  2009-06-21 16:51 ` Michael S. Tsirkin
@ 2009-06-21 16:51 ` Michael S. Tsirkin
  2009-06-21 16:51   ` [Qemu-devel] " Michael S. Tsirkin
  23 siblings, 0 replies; 42+ messages in thread
From: Michael S. Tsirkin @ 2009-06-21 16:51 UTC (permalink / raw)
  To: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm, Rusty Russell

Add an option to specify the number of MSI-X vectors for PCI NIC cards. This
can also be used to disable MSI-X, for compatibility with old qemu. This
option currently only affects virtio cards.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/virtio-net.c |    5 ++++-
 net.c           |   18 +++++++++++++++++-
 net.h           |    4 ++++
 qemu-options.hx |   14 ++++++++------
 4 files changed, 33 insertions(+), 8 deletions(-)

diff --git a/hw/virtio-net.c b/hw/virtio-net.c
index c8dabbe..4a82348 100644
--- a/hw/virtio-net.c
+++ b/hw/virtio-net.c
@@ -709,7 +709,10 @@ VirtIODevice *virtio_net_init(DeviceState *dev)
     n->mac_table.macs = qemu_mallocz(MAC_TABLE_ENTRIES * ETH_ALEN);
 
     n->vlans = qemu_mallocz(MAX_VLAN >> 3);
-    n->vdev.nvectors = 3;
+    if (dev->nd->nvectors == NIC_NVECTORS_UNSPECIFIED)
+        n->vdev.nvectors = 3;
+    else
+        n->vdev.nvectors = dev->nd->nvectors;
 
     register_savevm("virtio-net", virtio_net_id++, VIRTIO_NET_VM_VERSION,
                     virtio_net_save, virtio_net_load, n);
diff --git a/net.c b/net.c
index af9de73..1a71b75 100644
--- a/net.c
+++ b/net.c
@@ -2103,7 +2103,7 @@ int net_client_init(Monitor *mon, const char *device, const char *p)
     }
     if (!strcmp(device, "nic")) {
         static const char * const nic_params[] = {
-            "vlan", "name", "macaddr", "model", NULL
+            "vlan", "name", "macaddr", "model", "vectors", NULL
         };
         NICInfo *nd;
         uint8_t *macaddr;
@@ -2138,6 +2138,22 @@ int net_client_init(Monitor *mon, const char *device, const char *p)
         if (get_param_value(buf, sizeof(buf), "model", p)) {
             nd->model = strdup(buf);
         }
+        nd->nvectors = NIC_NVECTORS_UNSPECIFIED;
+        if (get_param_value(buf, sizeof(buf), "vectors", p)) {
+            char *endptr;
+            long vectors = strtol(buf, &endptr, 0);
+            if (*endptr) {
+                config_error(mon, "invalid syntax for # of vectors\n");
+                ret = -1;
+                goto out;
+            }
+            if (vectors < 0 || vectors > 0x7ffffff) {
+                config_error(mon, "invalid # of vectors\n");
+                ret = -1;
+                goto out;
+            }
+            nd->nvectors = vectors;
+        }
         nd->vlan = vlan;
         nd->name = name;
         nd->used = 1;
diff --git a/net.h b/net.h
index 89e7706..f8b9eac 100644
--- a/net.h
+++ b/net.h
@@ -83,6 +83,9 @@ int do_set_link(Monitor *mon, const char *name, const char *up_or_down);
 /* NIC info */
 
 #define MAX_NICS 8
+enum {
+	NIC_NVECTORS_UNSPECIFIED = -1
+};
 
 struct NICInfo {
     uint8_t macaddr[6];
@@ -91,6 +94,7 @@ struct NICInfo {
     VLANState *vlan;
     void *private;
     int used;
+    int nvectors;
 };
 
 extern int nb_nics;
diff --git a/qemu-options.hx b/qemu-options.hx
index 9d5e05a..a8a1db4 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -733,7 +733,7 @@ STEXI
 ETEXI
 
 DEF("net", HAS_ARG, QEMU_OPTION_net,
-    "-net nic[,vlan=n][,macaddr=addr][,model=type][,name=str]\n"
+    "-net nic[,vlan=n][,macaddr=addr][,model=type][,name=str][,vectors=v]\n"
     "                create a new Network Interface Card and connect it to VLAN 'n'\n"
 #ifdef CONFIG_SLIRP
     "-net user[,vlan=n][,name=str][,hostname=host]\n"
@@ -767,15 +767,17 @@ DEF("net", HAS_ARG, QEMU_OPTION_net,
     "-net none       use it alone to have zero network devices; if no -net option\n"
     "                is provided, the default is '-net nic -net user'\n")
 STEXI
-@item -net nic[,vlan=@var{n}][,macaddr=@var{addr}][,model=@var{type}][,name=@var{name}]
+@item -net nic[,vlan=@var{n}][,macaddr=@var{addr}][,model=@var{type}][,name=@var{name}][,vectors=@var{v}]
 Create a new Network Interface Card and connect it to VLAN @var{n} (@var{n}
 = 0 is the default). The NIC is an ne2k_pci by default on the PC
 target. Optionally, the MAC address can be changed to @var{addr}
-and a @var{name} can be assigned for use in monitor commands. If no
-@option{-net} option is specified, a single NIC is created.
-Qemu can emulate several different models of network card.
+and a @var{name} can be assigned for use in monitor commands.
+Optionally, for PCI cards, you can specify the number @var{v} of MSI-X vectors
+that the card should have; this option currently only affects virtio cards; set
+@var{v} = 0 to disable MSI-X. If no @option{-net} option is specified, a single
+NIC is created. Qemu can emulate several different models of network card.
 Valid values for @var{type} are
-@code{i82551}, @code{i82557b}, @code{i82559er},
+@code{virtio}, @code{i82551}, @code{i82557b}, @code{i82559er},
 @code{ne2k_pci}, @code{ne2k_isa}, @code{pcnet}, @code{rtl8139},
 @code{e1000}, @code{smc91c111}, @code{lance} and @code{mcf_fec}.
 Not all devices are supported on all targets.  Use -net nic,model=?
-- 
1.6.2.2

^ permalink raw reply related	[flat|nested] 42+ messages in thread

* Re: [PATCHv6 05/12] qemu/pci: MSI-X support functions
  2009-06-21 16:49   ` [Qemu-devel] " Michael S. Tsirkin
@ 2009-06-26 14:26     ` Michael S. Tsirkin
  -1 siblings, 0 replies; 42+ messages in thread
From: Michael S. Tsirkin @ 2009-06-26 14:26 UTC (permalink / raw)
  To: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm, Rusty Russell, vi

On Sun, Jun 21, 2009 at 07:49:54PM +0300, Michael S. Tsirkin wrote:
> +    dev->msix_mmio_index = cpu_register_io_memory(0, msix_mmio_read,
> +                                                  msix_mmio_write, dev);


The signature for cpu_register_io_memory changed meanwhile so this now
needs to be:

+    dev->msix_mmio_index = cpu_register_io_memory(msix_mmio_read,
+                                                  msix_mmio_write, dev);

Anthony, I guess you figured this out, or do you want me to respin the series
with this minor fix?

-- 
MST

^ permalink raw reply	[flat|nested] 42+ messages in thread

* [Qemu-devel] Re: [PATCHv6 05/12] qemu/pci: MSI-X support functions
@ 2009-06-26 14:26     ` Michael S. Tsirkin
  0 siblings, 0 replies; 42+ messages in thread
From: Michael S. Tsirkin @ 2009-06-26 14:26 UTC (permalink / raw)
  To: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm,
	Rusty Russell, virtualization, Christian Borntraeger, Blue Swirl,
	Anthony Liguori, Glauber Costa

On Sun, Jun 21, 2009 at 07:49:54PM +0300, Michael S. Tsirkin wrote:
> +    dev->msix_mmio_index = cpu_register_io_memory(0, msix_mmio_read,
> +                                                  msix_mmio_write, dev);


The signature for cpu_register_io_memory changed meanwhile so this now
needs to be:

+    dev->msix_mmio_index = cpu_register_io_memory(msix_mmio_read,
+                                                  msix_mmio_write, dev);

Anthony, I guess you figured this out, or do you want me to respin the series
with this minor fix?

-- 
MST

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCHv6 05/12] qemu/pci: MSI-X support functions
  2009-06-21 16:49   ` [Qemu-devel] " Michael S. Tsirkin
  (?)
  (?)
@ 2009-06-26 14:26   ` Michael S. Tsirkin
  -1 siblings, 0 replies; 42+ messages in thread
From: Michael S. Tsirkin @ 2009-06-26 14:26 UTC (permalink / raw)
  To: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm, Rusty Russell

On Sun, Jun 21, 2009 at 07:49:54PM +0300, Michael S. Tsirkin wrote:
> +    dev->msix_mmio_index = cpu_register_io_memory(0, msix_mmio_read,
> +                                                  msix_mmio_write, dev);


The signature for cpu_register_io_memory changed meanwhile so this now
needs to be:

+    dev->msix_mmio_index = cpu_register_io_memory(msix_mmio_read,
+                                                  msix_mmio_write, dev);

Anthony, I guess you figured this out, or do you want me to respin the series
with this minor fix?

-- 
MST

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCHv6 05/12] qemu/pci: MSI-X support functions
  2009-06-26 14:26     ` [Qemu-devel] " Michael S. Tsirkin
@ 2009-06-26 16:01       ` Anthony Liguori
  -1 siblings, 0 replies; 42+ messages in thread
From: Anthony Liguori @ 2009-06-26 16:01 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm,
	Rusty Russell, virtualization, Christian Borntraeger, Blue Swirl,
	Glauber Costa

Michael S. Tsirkin wrote:
> On Sun, Jun 21, 2009 at 07:49:54PM +0300, Michael S. Tsirkin wrote:
>   
>> +    dev->msix_mmio_index = cpu_register_io_memory(0, msix_mmio_read,
>> +                                                  msix_mmio_write, dev);
>>     
>
>
> The signature for cpu_register_io_memory changed meanwhile so this now
> needs to be:
>
> +    dev->msix_mmio_index = cpu_register_io_memory(msix_mmio_read,
> +                                                  msix_mmio_write, dev);
>
> Anthony, I guess you figured this out, or do you want me to respin the series
> with this minor fix?
>   

It's already changed in staging.

Regards,

Anthony Liguori


^ permalink raw reply	[flat|nested] 42+ messages in thread

* [Qemu-devel] Re: [PATCHv6 05/12] qemu/pci: MSI-X support functions
@ 2009-06-26 16:01       ` Anthony Liguori
  0 siblings, 0 replies; 42+ messages in thread
From: Anthony Liguori @ 2009-06-26 16:01 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Carsten Otte, kvm, Glauber Costa, Rusty Russell, qemu-devel,
	virtualization, Blue Swirl, Christian Borntraeger, Paul Brook,
	Avi Kivity

Michael S. Tsirkin wrote:
> On Sun, Jun 21, 2009 at 07:49:54PM +0300, Michael S. Tsirkin wrote:
>   
>> +    dev->msix_mmio_index = cpu_register_io_memory(0, msix_mmio_read,
>> +                                                  msix_mmio_write, dev);
>>     
>
>
> The signature for cpu_register_io_memory changed meanwhile so this now
> needs to be:
>
> +    dev->msix_mmio_index = cpu_register_io_memory(msix_mmio_read,
> +                                                  msix_mmio_write, dev);
>
> Anthony, I guess you figured this out, or do you want me to respin the series
> with this minor fix?
>   

It's already changed in staging.

Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCHv6 05/12] qemu/pci: MSI-X support functions
  2009-06-26 14:26     ` [Qemu-devel] " Michael S. Tsirkin
  (?)
@ 2009-06-26 16:01     ` Anthony Liguori
  -1 siblings, 0 replies; 42+ messages in thread
From: Anthony Liguori @ 2009-06-26 16:01 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Carsten Otte, kvm, Glauber Costa, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook, Avi Kivity

Michael S. Tsirkin wrote:
> On Sun, Jun 21, 2009 at 07:49:54PM +0300, Michael S. Tsirkin wrote:
>   
>> +    dev->msix_mmio_index = cpu_register_io_memory(0, msix_mmio_read,
>> +                                                  msix_mmio_write, dev);
>>     
>
>
> The signature for cpu_register_io_memory changed meanwhile so this now
> needs to be:
>
> +    dev->msix_mmio_index = cpu_register_io_memory(msix_mmio_read,
> +                                                  msix_mmio_write, dev);
>
> Anthony, I guess you figured this out, or do you want me to respin the series
> with this minor fix?
>   

It's already changed in staging.

Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 42+ messages in thread

end of thread, other threads:[~2009-06-26 16:01 UTC | newest]

Thread overview: 42+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <cover.1245594586.git.mst@redhat.com>
2009-06-21 16:45 ` [PATCHv6 01/12] qemu/pci: make default_write_config use mask table Michael S. Tsirkin
2009-06-21 16:45 ` Michael S. Tsirkin
2009-06-21 16:45   ` [Qemu-devel] " Michael S. Tsirkin
2009-06-21 16:45 ` [PATCHv6 02/12] qemu/pci: helper routines for pci access Michael S. Tsirkin
2009-06-21 16:45 ` Michael S. Tsirkin
2009-06-21 16:45   ` [Qemu-devel] " Michael S. Tsirkin
2009-06-21 16:45 ` [PATCHv6 03/12] qemu/pci: add routines to manage PCI capabilities Michael S. Tsirkin
2009-06-21 16:45 ` Michael S. Tsirkin
2009-06-21 16:45   ` [Qemu-devel] " Michael S. Tsirkin
2009-06-21 16:49 ` [PATCHv6 04/12] qemu/pci: check constant registers on load Michael S. Tsirkin
2009-06-21 16:49 ` Michael S. Tsirkin
2009-06-21 16:49   ` [Qemu-devel] " Michael S. Tsirkin
2009-06-21 16:49 ` [PATCHv6 05/12] qemu/pci: MSI-X support functions Michael S. Tsirkin
2009-06-21 16:49 ` Michael S. Tsirkin
2009-06-21 16:49   ` [Qemu-devel] " Michael S. Tsirkin
2009-06-26 14:26   ` Michael S. Tsirkin
2009-06-26 14:26     ` [Qemu-devel] " Michael S. Tsirkin
2009-06-26 16:01     ` Anthony Liguori
2009-06-26 16:01     ` Anthony Liguori
2009-06-26 16:01       ` [Qemu-devel] " Anthony Liguori
2009-06-26 14:26   ` Michael S. Tsirkin
2009-06-21 16:50 ` [PATCHv6 06/12] qemu/apic: minimal MSI/MSI-X implementation for PC Michael S. Tsirkin
2009-06-21 16:50 ` Michael S. Tsirkin
2009-06-21 16:50   ` [Qemu-devel] " Michael S. Tsirkin
2009-06-21 16:50 ` [PATCHv6 07/12] qemu/virtio: virtio support for many interrupt vectors Michael S. Tsirkin
2009-06-21 16:50 ` Michael S. Tsirkin
2009-06-21 16:50   ` [Qemu-devel] " Michael S. Tsirkin
2009-06-21 16:50 ` [PATCHv6 08/12] qemu/virtio: MSI-X support in virtio PCI Michael S. Tsirkin
2009-06-21 16:50 ` Michael S. Tsirkin
2009-06-21 16:50   ` [Qemu-devel] " Michael S. Tsirkin
2009-06-21 16:50 ` [PATCHv6 09/12] qemu/virtio: virtio save/load bindings Michael S. Tsirkin
2009-06-21 16:50 ` Michael S. Tsirkin
2009-06-21 16:50   ` [Qemu-devel] " Michael S. Tsirkin
2009-06-21 16:50 ` [PATCHv6 10/12] qemu/pci: add pci_get/set_byte Michael S. Tsirkin
2009-06-21 16:50   ` [Qemu-devel] " Michael S. Tsirkin
2009-06-21 16:50 ` Michael S. Tsirkin
2009-06-21 16:51 ` [PATCHv6 11/12] qemu/net: request 3 vectors in virtio-net Michael S. Tsirkin
2009-06-21 16:51   ` [Qemu-devel] " Michael S. Tsirkin
2009-06-21 16:51 ` Michael S. Tsirkin
2009-06-21 16:51 ` [PATCHv6 12/12] qemu/net: flag to control the number of vectors a nic has Michael S. Tsirkin
2009-06-21 16:51 ` Michael S. Tsirkin
2009-06-21 16:51   ` [Qemu-devel] " Michael S. Tsirkin

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.