kvm.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/6][v3] Userspace support for MSI
@ 2009-01-07 10:49 Sheng Yang
  2009-01-07 10:49 ` [PATCH 1/6] kvm: Replace force type convert with container_of() Sheng Yang
                   ` (5 more replies)
  0 siblings, 6 replies; 7+ messages in thread
From: Sheng Yang @ 2009-01-07 10:49 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Anthony Liguori, kvm

Update from v2:
Change API to gsi_route.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH 1/6] kvm: Replace force type convert with container_of()
  2009-01-07 10:49 [PATCH 0/6][v3] Userspace support for MSI Sheng Yang
@ 2009-01-07 10:49 ` Sheng Yang
  2009-01-07 10:49 ` [PATCH 2/6] Make device assignment depend on libpci Sheng Yang
                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 7+ messages in thread
From: Sheng Yang @ 2009-01-07 10:49 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Anthony Liguori, kvm, Sheng Yang


Signed-off-by: Sheng Yang <sheng@linux.intel.com>
---
 qemu/hw/device-assignment.c |   20 ++++++++++++--------
 1 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/qemu/hw/device-assignment.c b/qemu/hw/device-assignment.c
index d5eb7b2..f357d17 100644
--- a/qemu/hw/device-assignment.c
+++ b/qemu/hw/device-assignment.c
@@ -144,7 +144,7 @@ static uint32_t assigned_dev_ioport_readl(void *opaque, uint32_t addr)
 static void assigned_dev_iomem_map(PCIDevice *pci_dev, int region_num,
                                    uint32_t e_phys, uint32_t e_size, int type)
 {
-    AssignedDevice *r_dev = (AssignedDevice *) pci_dev;
+    AssignedDevice *r_dev = container_of(pci_dev, AssignedDevice, dev);
     AssignedDevRegion *region = &r_dev->v_addrs[region_num];
     uint32_t old_ephys = region->e_physbase;
     uint32_t old_esize = region->e_size;
@@ -178,7 +178,7 @@ static void assigned_dev_iomem_map(PCIDevice *pci_dev, int region_num,
 static void assigned_dev_ioport_map(PCIDevice *pci_dev, int region_num,
                                     uint32_t addr, uint32_t size, int type)
 {
-    AssignedDevice *r_dev = (AssignedDevice *) pci_dev;
+    AssignedDevice *r_dev = container_of(pci_dev, AssignedDevice, dev);
     AssignedDevRegion *region = &r_dev->v_addrs[region_num];
     int first_map = (region->e_size == 0);
     CPUState *env;
@@ -227,6 +227,7 @@ static void assigned_dev_pci_write_config(PCIDevice *d, uint32_t address,
 {
     int fd;
     ssize_t ret;
+    AssignedDevice *pci_dev = container_of(d, AssignedDevice, dev);
 
     DEBUG("(%x.%x): address=%04x val=0x%08x len=%d\n",
           ((d->devfn >> 3) & 0x1F), (d->devfn & 0x7),
@@ -248,7 +249,7 @@ static void assigned_dev_pci_write_config(PCIDevice *d, uint32_t address,
           ((d->devfn >> 3) & 0x1F), (d->devfn & 0x7),
           (uint16_t) address, val, len);
 
-    fd = ((AssignedDevice *)d)->real_device.config_fd;
+    fd = pci_dev->real_device.config_fd;
 
 again:
     ret = pwrite(fd, &val, len, address);
@@ -269,6 +270,7 @@ static uint32_t assigned_dev_pci_read_config(PCIDevice *d, uint32_t address,
     uint32_t val = 0;
     int fd;
     ssize_t ret;
+    AssignedDevice *pci_dev = container_of(d, AssignedDevice, dev);
 
     if ((address >= 0x10 && address <= 0x24) || address == 0x34 ||
         address == 0x3c || address == 0x3d) {
@@ -282,7 +284,7 @@ static uint32_t assigned_dev_pci_read_config(PCIDevice *d, uint32_t address,
     if (address == 0xFC)
         goto do_log;
 
-    fd = ((AssignedDevice *)d)->real_device.config_fd;
+    fd = pci_dev->real_device.config_fd;
 
 again:
     ret = pread(fd, &val, len, address);
@@ -539,16 +541,18 @@ struct PCIDevice *init_assigned_device(AssignedDevInfo *adev, PCIBus *bus)
 {
     int r;
     AssignedDevice *dev;
+    PCIDevice *pci_dev;
     uint8_t e_device, e_intx;
     struct kvm_assigned_pci_dev assigned_dev_data;
 
     DEBUG("Registering real physical device %s (bus=%x dev=%x func=%x)\n",
           adev->name, adev->bus, adev->dev, adev->func);
 
-    dev = (AssignedDevice *)
-        pci_register_device(bus, adev->name, sizeof(AssignedDevice),
-                            -1, assigned_dev_pci_read_config,
-                            assigned_dev_pci_write_config);
+    pci_dev = pci_register_device(bus, adev->name,
+              sizeof(AssignedDevice), -1, assigned_dev_pci_read_config,
+              assigned_dev_pci_write_config);
+    dev = container_of(pci_dev, AssignedDevice, dev);
+
     if (NULL == dev) {
         fprintf(stderr, "%s: Error: Couldn't register real device %s\n",
                 __func__, adev->name);
-- 
1.5.4.5


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 2/6] Make device assignment depend on libpci
  2009-01-07 10:49 [PATCH 0/6][v3] Userspace support for MSI Sheng Yang
  2009-01-07 10:49 ` [PATCH 1/6] kvm: Replace force type convert with container_of() Sheng Yang
@ 2009-01-07 10:49 ` Sheng Yang
  2009-01-07 10:49 ` [PATCH 3/6] kvm: ioctl for gsi_route Sheng Yang
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 7+ messages in thread
From: Sheng Yang @ 2009-01-07 10:49 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Anthony Liguori, kvm, Sheng Yang

Which is used later for capability detection.

Signed-off-by: Sheng Yang <sheng@linux.intel.com>
---
 qemu/Makefile.target |    1 +
 qemu/configure       |   20 ++++++++++++++++++++
 2 files changed, 21 insertions(+), 0 deletions(-)

diff --git a/qemu/Makefile.target b/qemu/Makefile.target
index f58015b..a58f31d 100644
--- a/qemu/Makefile.target
+++ b/qemu/Makefile.target
@@ -696,6 +696,7 @@ OBJS += device-hotplug.o
 
 ifeq ($(USE_KVM_DEVICE_ASSIGNMENT), 1)
 OBJS+= device-assignment.o
+LIBS+=-lpci
 endif
 
 ifeq ($(TARGET_BASE_ARCH), i386)
diff --git a/qemu/configure b/qemu/configure
index 6eb12ae..f5d3f89 100755
--- a/qemu/configure
+++ b/qemu/configure
@@ -780,6 +780,26 @@ EOF
     fi
 fi
 
+# libpci probe for kvm_cap_device_assignment
+if test $kvm_cap_device_assignment = "yes" ; then
+cat > $TMPC << EOF
+#include <pci/pci.h>
+#ifndef PCI_VENDOR_ID
+#error NO LIBPCI
+#endif
+int main(void) { return 0; }
+EOF
+    if $cc $ARCH_CFLAGS -o $TMPE ${OS_CFLAGS} $TMPC 2>/dev/null ; then
+        :
+    else
+        echo
+        echo "Error: libpci check failed"
+        echo "Disable KVM Device Assignment capability."
+        echo
+        kvm_cap_device_assignment="no"
+    fi
+fi
+
 ##########################################
 # zlib check
 
-- 
1.5.4.5


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 3/6] kvm: ioctl for gsi_route
  2009-01-07 10:49 [PATCH 0/6][v3] Userspace support for MSI Sheng Yang
  2009-01-07 10:49 ` [PATCH 1/6] kvm: Replace force type convert with container_of() Sheng Yang
  2009-01-07 10:49 ` [PATCH 2/6] Make device assignment depend on libpci Sheng Yang
@ 2009-01-07 10:49 ` Sheng Yang
  2009-01-07 10:49 ` [PATCH 4/6] Figure out device capability Sheng Yang
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 7+ messages in thread
From: Sheng Yang @ 2009-01-07 10:49 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Anthony Liguori, kvm, Sheng Yang


Signed-off-by: Sheng Yang <sheng@linux.intel.com>
---
 libkvm/libkvm.c |   27 +++++++++++++++++++++++++++
 libkvm/libkvm.h |    8 ++++++++
 2 files changed, 35 insertions(+), 0 deletions(-)

diff --git a/libkvm/libkvm.c b/libkvm/libkvm.c
index 0408fdb..6d53f38 100644
--- a/libkvm/libkvm.c
+++ b/libkvm/libkvm.c
@@ -1164,3 +1164,30 @@ int kvm_reinject_control(kvm_context_t kvm, int pit_reinject)
 #endif
 	return -ENOSYS;
 }
+
+#ifdef KVM_CAP_GSI_ROUTE
+int kvm_request_gsi_route(kvm_context_t kvm,
+			  struct kvm_gsi_route_guest *route)
+{
+        int ret;
+
+        ret = ioctl(kvm->vm_fd, KVM_REQUEST_GSI_ROUTE, route);
+        if (ret < 0)
+                return -errno;
+
+        return ret;
+}
+
+int kvm_free_gsi_route(kvm_context_t kvm,
+		       struct kvm_gsi_route_guest *route)
+{
+        int ret;
+
+        ret = ioctl(kvm->vm_fd, KVM_FREE_GSI_ROUTE, route);
+        if (ret < 0)
+                return -errno;
+
+        return ret;
+}
+
+#endif
diff --git a/libkvm/libkvm.h b/libkvm/libkvm.h
index ee1ba68..2bfcfe3 100644
--- a/libkvm/libkvm.h
+++ b/libkvm/libkvm.h
@@ -720,4 +720,12 @@ int kvm_assign_irq(kvm_context_t kvm,
  */
 int kvm_destroy_memory_region_works(kvm_context_t kvm);
 #endif
+
+#ifdef KVM_CAP_GSI_ROUTE
+int kvm_request_gsi_route(kvm_context_t kvm,
+			  struct kvm_gsi_route_guest *route);
+int kvm_free_gsi_route(kvm_context_t kvm,
+		       struct kvm_gsi_route_guest *route);
+#endif
+
 #endif
-- 
1.5.4.5


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 4/6] Figure out device capability
  2009-01-07 10:49 [PATCH 0/6][v3] Userspace support for MSI Sheng Yang
                   ` (2 preceding siblings ...)
  2009-01-07 10:49 ` [PATCH 3/6] kvm: ioctl for gsi_route Sheng Yang
@ 2009-01-07 10:49 ` Sheng Yang
  2009-01-07 10:49 ` [PATCH 5/6] Support for " Sheng Yang
  2009-01-07 10:49 ` [PATCH 6/6] kvm: expose MSI capability to guest Sheng Yang
  5 siblings, 0 replies; 7+ messages in thread
From: Sheng Yang @ 2009-01-07 10:49 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Anthony Liguori, kvm, Sheng Yang, Allen Kay

Try to figure out device capability in update_dev_cap(). Now we are only care
about MSI capability.

The function pci_find_cap_offset original function wrote by Allen for Xen.
Notice the function need root privilege to work. This depends on libpci to work.

Signed-off-by: Allen Kay <allen.m.kay@intel.com>
Signed-off-by: Sheng Yang <sheng@linux.intel.com>
---
 qemu/hw/device-assignment.c |   29 +++++++++++++++++++++++++++++
 qemu/hw/device-assignment.h |    1 +
 2 files changed, 30 insertions(+), 0 deletions(-)

diff --git a/qemu/hw/device-assignment.c b/qemu/hw/device-assignment.c
index f357d17..169357f 100644
--- a/qemu/hw/device-assignment.c
+++ b/qemu/hw/device-assignment.c
@@ -222,6 +222,35 @@ static void assigned_dev_ioport_map(PCIDevice *pci_dev, int region_num,
                           (r_dev->v_addrs + region_num));
 }
 
+static uint8_t pci_find_cap_offset(struct pci_dev *pci_dev, uint8_t cap)
+{
+    int id;
+    int max_cap = 48;
+    int pos = PCI_CAPABILITY_LIST;
+    int status;
+
+    status = pci_read_byte(pci_dev, PCI_STATUS);
+    if ((status & PCI_STATUS_CAP_LIST) == 0)
+        return 0;
+
+    while (max_cap--) {
+        pos = pci_read_byte(pci_dev, pos);
+        if (pos < 0x40)
+            break;
+
+        pos &= ~3;
+        id = pci_read_byte(pci_dev, pos + PCI_CAP_LIST_ID);
+
+        if (id == 0xff)
+            break;
+        if (id == cap)
+            return pos;
+
+        pos += PCI_CAP_LIST_NEXT;
+    }
+    return 0;
+}
+
 static void assigned_dev_pci_write_config(PCIDevice *d, uint32_t address,
                                           uint32_t val, int len)
 {
diff --git a/qemu/hw/device-assignment.h b/qemu/hw/device-assignment.h
index a565948..2d83566 100644
--- a/qemu/hw/device-assignment.h
+++ b/qemu/hw/device-assignment.h
@@ -29,6 +29,7 @@
 #define __DEVICE_ASSIGNMENT_H__
 
 #include <sys/mman.h>
+#include <pci/pci.h>
 #include "qemu-common.h"
 #include "sys-queue.h"
 #include "pci.h"
-- 
1.5.4.5


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 5/6] Support for device capability
  2009-01-07 10:49 [PATCH 0/6][v3] Userspace support for MSI Sheng Yang
                   ` (3 preceding siblings ...)
  2009-01-07 10:49 ` [PATCH 4/6] Figure out device capability Sheng Yang
@ 2009-01-07 10:49 ` Sheng Yang
  2009-01-07 10:49 ` [PATCH 6/6] kvm: expose MSI capability to guest Sheng Yang
  5 siblings, 0 replies; 7+ messages in thread
From: Sheng Yang @ 2009-01-07 10:49 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Anthony Liguori, kvm, Sheng Yang

This framework can be easily extended to support device capability, like
MSI/MSI-x.

Signed-off-by: Sheng Yang <sheng@linux.intel.com>
---
 qemu/hw/pci.c |   85 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 qemu/hw/pci.h |   30 ++++++++++++++++++++
 2 files changed, 115 insertions(+), 0 deletions(-)

diff --git a/qemu/hw/pci.c b/qemu/hw/pci.c
index 8589dfa..d755516 100644
--- a/qemu/hw/pci.c
+++ b/qemu/hw/pci.c
@@ -351,11 +351,65 @@ static void pci_update_mappings(PCIDevice *d)
     }
 }
 
+int pci_access_cap_config(PCIDevice *pci_dev, uint32_t address, int len)
+{
+    if (pci_dev->cap.supported && address >= pci_dev->cap.start &&
+            (address + len) < pci_dev->cap.start + pci_dev->cap.length)
+        return 1;
+    return 0;
+}
+
+uint32_t pci_default_cap_read_config(PCIDevice *pci_dev,
+                                     uint32_t address, int len)
+{
+    uint32_t val = 0;
+
+    if (pci_access_cap_config(pci_dev, address, len)) {
+        switch(len) {
+        default:
+        case 4:
+            if (address < pci_dev->cap.start + pci_dev->cap.length - 4) {
+                val = le32_to_cpu(*(uint32_t *)(pci_dev->cap.config
+                            + address - pci_dev->cap.start));
+                break;
+            }
+            /* fall through */
+        case 2:
+            if (address < pci_dev->cap.start + pci_dev->cap.length - 2) {
+                val = le16_to_cpu(*(uint16_t *)(pci_dev->cap.config
+                            + address - pci_dev->cap.start));
+                break;
+            }
+            /* fall through */
+        case 1:
+            val = pci_dev->cap.config[address - pci_dev->cap.start];
+            break;
+        }
+    }
+    return val;
+}
+
+void pci_default_cap_write_config(PCIDevice *pci_dev,
+                                  uint32_t address, uint32_t val, int len)
+{
+    if (pci_access_cap_config(pci_dev, address, len)) {
+        int i;
+        for (i = 0; i < len; i++) {
+            pci_dev->cap.config[address + i - pci_dev->cap.start] = val;
+            val >>= 8;
+        }
+        return;
+    }
+}
+
 uint32_t pci_default_read_config(PCIDevice *d,
                                  uint32_t address, int len)
 {
     uint32_t val;
 
+    if (pci_access_cap_config(d, address, len))
+        return d->cap.config_read(d, address, len);
+
     switch(len) {
     default:
     case 4:
@@ -409,6 +463,11 @@ void pci_default_write_config(PCIDevice *d,
         return;
     }
  default_config:
+    if (pci_access_cap_config(d, address, len)) {
+        d->cap.config_write(d, address, val, len);
+        return;
+    }
+
     /* not efficient, but simple */
     addr = address;
     for(i = 0; i < len; i++) {
@@ -828,3 +887,29 @@ PCIBus *pci_bridge_init(PCIBus *bus, int devfn, uint32_t id,
     s->bus = pci_register_secondary_bus(&s->dev, map_irq);
     return s->bus;
 }
+
+void pci_enable_capability_support(PCIDevice *pci_dev,
+                                   uint32_t config_start,
+                                   PCICapConfigReadFunc *config_read,
+                                   PCICapConfigWriteFunc *config_write,
+                                   PCICapConfigInitFunc *config_init)
+{
+    if (!pci_dev)
+        return;
+
+    if (config_start >= 0x40 && config_start < 0xff)
+        pci_dev->cap.start = config_start;
+    else
+        pci_dev->cap.start = PCI_CAPABILITY_CONFIG_DEFAULT_START_ADDR;
+    if (config_read)
+        pci_dev->cap.config_read = config_read;
+    else
+        pci_dev->cap.config_read = pci_default_cap_read_config;
+    if (config_write)
+        pci_dev->cap.config_write = config_write;
+    else
+        pci_dev->cap.config_write = pci_default_cap_write_config;
+    pci_dev->cap.supported = 1;
+    pci_dev->config[0x34] = pci_dev->cap.start;
+    config_init(pci_dev);
+}
diff --git a/qemu/hw/pci.h b/qemu/hw/pci.h
index 1f33819..f2a622c 100644
--- a/qemu/hw/pci.h
+++ b/qemu/hw/pci.h
@@ -28,6 +28,12 @@ typedef void PCIMapIORegionFunc(PCIDevice *pci_dev, int region_num,
                                 uint32_t addr, uint32_t size, int type);
 typedef int PCIUnregisterFunc(PCIDevice *pci_dev);
 
+typedef void PCICapConfigWriteFunc(PCIDevice *pci_dev,
+                                   uint32_t address, uint32_t val, int len);
+typedef uint32_t PCICapConfigReadFunc(PCIDevice *pci_dev,
+                                      uint32_t address, int len);
+typedef void PCICapConfigInitFunc(PCIDevice *pci_dev);
+
 #define PCI_ADDRESS_SPACE_MEM		0x00
 #define PCI_ADDRESS_SPACE_IO		0x01
 #define PCI_ADDRESS_SPACE_MEM_PREFETCH	0x08
@@ -78,6 +84,10 @@ typedef struct PCIIORegion {
 
 #define PCI_COMMAND_RESERVED_MASK_HI (PCI_COMMAND_RESERVED >> 8)
 
+#define PCI_CAPABILITY_CONFIG_MAX_LENGTH 0x60
+#define PCI_CAPABILITY_CONFIG_DEFAULT_START_ADDR 0x40
+#define PCI_CAPABILITY_CONFIG_MSI_LENGTH 0x10
+
 struct PCIDevice {
     /* PCI config space */
     uint8_t config[256];
@@ -100,6 +110,15 @@ struct PCIDevice {
 
     /* Current IRQ levels.  Used internally by the generic PCI code.  */
     int irq_state[4];
+
+    /* Device capability configuration space */
+    struct {
+        int supported;
+        uint8_t config[PCI_CAPABILITY_CONFIG_MAX_LENGTH];
+        unsigned int start, length;
+        PCICapConfigReadFunc *config_read;
+        PCICapConfigWriteFunc *config_write;
+    } cap;
 };
 
 PCIDevice *pci_register_device(PCIBus *bus, const char *name,
@@ -113,6 +132,12 @@ void pci_register_io_region(PCIDevice *pci_dev, int region_num,
                             uint32_t size, int type,
                             PCIMapIORegionFunc *map_func);
 
+void pci_enable_capability_support(PCIDevice *pci_dev,
+                                   uint32_t config_start,
+                                   PCICapConfigReadFunc *config_read,
+                                   PCICapConfigWriteFunc *config_write,
+                                   PCICapConfigInitFunc *config_init);
+
 int pci_map_irq(PCIDevice *pci_dev, int pin);
 uint32_t pci_default_read_config(PCIDevice *d,
                                  uint32_t address, int len);
@@ -120,6 +145,11 @@ void pci_default_write_config(PCIDevice *d,
                               uint32_t address, uint32_t val, int len);
 void pci_device_save(PCIDevice *s, QEMUFile *f);
 int pci_device_load(PCIDevice *s, QEMUFile *f);
+uint32_t pci_default_cap_read_config(PCIDevice *pci_dev,
+                                     uint32_t address, int len);
+void pci_default_cap_write_config(PCIDevice *pci_dev,
+                                  uint32_t address, uint32_t val, int len);
+int pci_access_cap_config(PCIDevice *pci_dev, uint32_t address, int len);
 
 typedef void (*pci_set_irq_fn)(qemu_irq *pic, int irq_num, int level);
 typedef int (*pci_map_irq_fn)(PCIDevice *pci_dev, int irq_num);
-- 
1.5.4.5


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 6/6] kvm: expose MSI capability to guest
  2009-01-07 10:49 [PATCH 0/6][v3] Userspace support for MSI Sheng Yang
                   ` (4 preceding siblings ...)
  2009-01-07 10:49 ` [PATCH 5/6] Support for " Sheng Yang
@ 2009-01-07 10:49 ` Sheng Yang
  5 siblings, 0 replies; 7+ messages in thread
From: Sheng Yang @ 2009-01-07 10:49 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Anthony Liguori, kvm, Sheng Yang


Signed-off-by: Sheng Yang <sheng@linux.intel.com>
---
 qemu/hw/device-assignment.c |  111 ++++++++++++++++++++++++++++++++++++++++---
 qemu/hw/device-assignment.h |    7 +++
 2 files changed, 111 insertions(+), 7 deletions(-)

diff --git a/qemu/hw/device-assignment.c b/qemu/hw/device-assignment.c
index 169357f..4c08b00 100644
--- a/qemu/hw/device-assignment.c
+++ b/qemu/hw/device-assignment.c
@@ -268,7 +268,8 @@ static void assigned_dev_pci_write_config(PCIDevice *d, uint32_t address,
     }
 
     if ((address >= 0x10 && address <= 0x24) || address == 0x34 ||
-        address == 0x3c || address == 0x3d) {
+        address == 0x3c || address == 0x3d ||
+        pci_access_cap_config(d, address, len)) {
         /* used for update-mappings (BAR emulation) */
         pci_default_write_config(d, address, val, len);
         return;
@@ -302,7 +303,8 @@ static uint32_t assigned_dev_pci_read_config(PCIDevice *d, uint32_t address,
     AssignedDevice *pci_dev = container_of(d, AssignedDevice, dev);
 
     if ((address >= 0x10 && address <= 0x24) || address == 0x34 ||
-        address == 0x3c || address == 0x3d) {
+        address == 0x3c || address == 0x3d ||
+        pci_access_cap_config(d, address, len)) {
         val = pci_default_read_config(d, address, len);
         DEBUG("(%x.%x): address=%04x val=0x%08x len=%d\n",
               (d->devfn >> 3) & 0x1F, (d->devfn & 0x7), address, val, len);
@@ -331,11 +333,13 @@ do_log:
     DEBUG("(%x.%x): address=%04x val=0x%08x len=%d\n",
           (d->devfn >> 3) & 0x1F, (d->devfn & 0x7), address, val, len);
 
-    /* kill the special capabilities */
-    if (address == 4 && len == 4)
-        val &= ~0x100000;
-    else if (address == 6)
-        val &= ~0x10;
+    if (!pci_dev->cap.available) {
+        /* kill the special capabilities */
+        if (address == 4 && len == 4)
+            val &= ~0x100000;
+        else if (address == 6)
+            val &= ~0x10;
+    }
 
     return val;
 }
@@ -566,6 +570,95 @@ void assigned_dev_update_irq(PCIDevice *d)
     }
 }
 
+#if defined(KVM_CAP_DEVICE_MSI) && defined (KVM_CAP_GSI_ROUTE)
+static void assigned_dev_update_msi(PCIDevice *pci_dev, unsigned int ctrl_pos)
+{
+    struct kvm_assigned_irq assigned_irq_data;
+    struct kvm_gsi_route_guest gsi_route;
+    struct kvm_gsi_route_entry_guest gsi_entry[1];
+    AssignedDevice *assigned_dev = container_of(pci_dev, AssignedDevice, dev);
+    uint8_t ctrl_byte = pci_dev->cap.config[ctrl_pos];
+
+    memset(&assigned_irq_data, 0, sizeof assigned_irq_data);
+    assigned_irq_data.assigned_dev_id  =
+            calc_assigned_dev_id(assigned_dev->h_busnr,
+                    (uint8_t)assigned_dev->h_devfn);
+
+    if (ctrl_byte & PCI_MSI_FLAGS_ENABLE) {
+	gsi_route.entries_nr = 1;
+        gsi_entry[0].msi.addr_lo = *(uint32_t *)(pci_dev->cap.config +
+                                            PCI_MSI_ADDRESS_LO);
+        gsi_entry[0].msi.data = *(uint16_t *)(pci_dev->cap.config +
+                                         PCI_MSI_DATA_32);
+        gsi_entry[0].type = KVM_GSI_ROUTE_MSI;
+        gsi_route.entries = gsi_entry;
+        if (kvm_request_gsi_route(kvm_context, &gsi_route) < 0) {
+            perror("assigned_dev_enable_msi: kvm_request_gsi_route");
+            assigned_dev->cap.state &= ~ASSIGNED_DEVICE_MSI_ENABLED;
+            return;
+        }
+        assigned_irq_data.guest_irq = gsi_entry[0].gsi;
+        assigned_irq_data.flags = KVM_DEV_IRQ_ASSIGN_ENABLE_MSI;
+    } else
+	assigned_irq_data.guest_irq = assigned_dev->girq;
+
+    if (kvm_assign_irq(kvm_context, &assigned_irq_data) < 0)
+        perror("assigned_dev_enable_msi");
+    if (assigned_irq_data.flags & KVM_DEV_IRQ_ASSIGN_ENABLE_MSI) {
+        assigned_dev->cap.state |= ASSIGNED_DEVICE_MSI_ENABLED;
+        pci_dev->cap.config[ctrl_pos] |= PCI_MSI_FLAGS_ENABLE;
+    } else {
+        assigned_dev->cap.state &= ~ASSIGNED_DEVICE_MSI_ENABLED;
+        pci_dev->cap.config[ctrl_pos] &= ~PCI_MSI_FLAGS_ENABLE;
+    }
+}
+#endif
+
+void assigned_device_pci_cap_write_config(PCIDevice *pci_dev, uint32_t address,
+                                          uint32_t val, int len)
+{
+    AssignedDevice *assigned_dev = container_of(pci_dev, AssignedDevice, dev);
+    unsigned int pos = pci_dev->cap.start, ctrl_pos;
+
+    pci_default_cap_write_config(pci_dev, address, val, len);
+#if defined(KVM_CAP_DEVICE_MSI) && defined (KVM_CAP_GSI_ROUTE)
+    if (assigned_dev->cap.available & ASSIGNED_DEVICE_CAP_MSI) {
+        ctrl_pos = pos + PCI_MSI_FLAGS;
+        if (address <= ctrl_pos && address + len > ctrl_pos)
+            assigned_dev_update_msi(pci_dev, ctrl_pos - pci_dev->cap.start);
+        pos += PCI_CAPABILITY_CONFIG_MSI_LENGTH;
+    }
+#endif
+    return;
+}
+
+static void assigned_device_pci_cap_init(PCIDevice *pci_dev)
+{
+    AssignedDevice *dev = container_of(pci_dev, AssignedDevice, dev);
+    int next_cap_pt;
+    struct pci_access *pacc;
+    int h_bus, h_dev, h_func;
+
+    pci_dev->cap.length = 0;
+    h_bus = dev->h_busnr;
+    h_dev = dev->h_devfn >> 3;
+    h_func = dev->h_devfn & 0x07;
+    pacc = pci_alloc();
+    pci_init(pacc);
+    dev->pdev = pci_get_dev(pacc, 0, h_bus, h_dev, h_func);
+    pci_cleanup(pacc);
+#if defined(KVM_CAP_DEVICE_MSI) && defined (KVM_CAP_GSI_ROUTE)
+    /* Expose MSI capability
+     * MSI capability is the 1st capability in cap.config */
+    if (pci_find_cap_offset(dev->pdev, PCI_CAP_ID_MSI)) {
+        dev->cap.available |= ASSIGNED_DEVICE_CAP_MSI;
+        pci_dev->cap.config[pci_dev->cap.length] = PCI_CAP_ID_MSI;
+        pci_dev->cap.length += PCI_CAPABILITY_CONFIG_MSI_LENGTH;
+        next_cap_pt = 1;
+    }
+#endif
+}
+
 struct PCIDevice *init_assigned_device(AssignedDevInfo *adev, PCIBus *bus)
 {
     int r;
@@ -633,6 +726,10 @@ struct PCIDevice *init_assigned_device(AssignedDevInfo *adev, PCIBus *bus)
 	return NULL;
     }
 
+    pci_enable_capability_support(pci_dev, 0, NULL,
+                    assigned_device_pci_cap_write_config,
+                    assigned_device_pci_cap_init);
+
     return &dev->dev;
 }
 
diff --git a/qemu/hw/device-assignment.h b/qemu/hw/device-assignment.h
index 2d83566..292ab32 100644
--- a/qemu/hw/device-assignment.h
+++ b/qemu/hw/device-assignment.h
@@ -81,6 +81,13 @@ typedef struct {
     unsigned char h_busnr;
     unsigned int h_devfn;
     int bound;
+    struct pci_dev *pdev;
+    struct {
+#define ASSIGNED_DEVICE_CAP_MSI (1 << 0)
+        uint32_t available;
+#define ASSIGNED_DEVICE_MSI_ENABLED (1 << 0)
+        uint32_t state;
+    } cap;
 } AssignedDevice;
 
 typedef struct AssignedDevInfo AssignedDevInfo;
-- 
1.5.4.5


^ permalink raw reply related	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2009-01-07 10:49 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-01-07 10:49 [PATCH 0/6][v3] Userspace support for MSI Sheng Yang
2009-01-07 10:49 ` [PATCH 1/6] kvm: Replace force type convert with container_of() Sheng Yang
2009-01-07 10:49 ` [PATCH 2/6] Make device assignment depend on libpci Sheng Yang
2009-01-07 10:49 ` [PATCH 3/6] kvm: ioctl for gsi_route Sheng Yang
2009-01-07 10:49 ` [PATCH 4/6] Figure out device capability Sheng Yang
2009-01-07 10:49 ` [PATCH 5/6] Support for " Sheng Yang
2009-01-07 10:49 ` [PATCH 6/6] kvm: expose MSI capability to guest Sheng Yang

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).