All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/3][v3] Userspace for MSI-X enabling
@ 2009-02-11  8:14 Sheng Yang
  2009-02-11  8:14 ` [PATCH 1/3] Add MSI-X related macro to pci.c Sheng Yang
                   ` (2 more replies)
  0 siblings, 3 replies; 7+ messages in thread
From: Sheng Yang @ 2009-02-11  8:14 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm, Anthony Liguori

Update from v2:

Change the interface to irq routing method.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH 1/3] Add MSI-X related macro to pci.c
  2009-02-11  8:14 [PATCH 0/3][v3] Userspace for MSI-X enabling Sheng Yang
@ 2009-02-11  8:14 ` Sheng Yang
  2009-02-11  8:14 ` [PATCH 2/3] kvm: add ioctl KVM_SET_MSIX_ENTRY_NR and KVM_SET_MSIX_ENTRY Sheng Yang
  2009-02-11  8:14 ` [PATCH 3/3] kvm: enable MSI-X capabilty for assigned device Sheng Yang
  2 siblings, 0 replies; 7+ messages in thread
From: Sheng Yang @ 2009-02-11  8:14 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm, Anthony Liguori, Sheng Yang


Signed-off-by: Sheng Yang <sheng@linux.intel.com>
---
 qemu/hw/pci.h |    1 +
 1 files changed, 1 insertions(+), 0 deletions(-)

diff --git a/qemu/hw/pci.h b/qemu/hw/pci.h
index b6fc330..8466cf8 100644
--- a/qemu/hw/pci.h
+++ b/qemu/hw/pci.h
@@ -185,6 +185,7 @@ typedef struct PCIIORegion {
 #define PCI_CAPABILITY_CONFIG_MAX_LENGTH 0x60
 #define PCI_CAPABILITY_CONFIG_DEFAULT_START_ADDR 0x40
 #define PCI_CAPABILITY_CONFIG_MSI_LENGTH 0x10
+#define PCI_CAPABILITY_CONFIG_MSIX_LENGTH 0x10
 
 struct PCIDevice {
     /* PCI config space */
-- 
1.5.4.5


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 2/3] kvm: add ioctl KVM_SET_MSIX_ENTRY_NR and KVM_SET_MSIX_ENTRY
  2009-02-11  8:14 [PATCH 0/3][v3] Userspace for MSI-X enabling Sheng Yang
  2009-02-11  8:14 ` [PATCH 1/3] Add MSI-X related macro to pci.c Sheng Yang
@ 2009-02-11  8:14 ` Sheng Yang
  2009-02-11  8:14 ` [PATCH 3/3] kvm: enable MSI-X capabilty for assigned device Sheng Yang
  2 siblings, 0 replies; 7+ messages in thread
From: Sheng Yang @ 2009-02-11  8:14 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm, Anthony Liguori, Sheng Yang


Signed-off-by: Sheng Yang <sheng@linux.intel.com>
---
 libkvm/libkvm.c |   25 +++++++++++++++++++++++++
 libkvm/libkvm.h |    7 +++++++
 2 files changed, 32 insertions(+), 0 deletions(-)

diff --git a/libkvm/libkvm.c b/libkvm/libkvm.c
index b4433bd..ef0066a 100644
--- a/libkvm/libkvm.c
+++ b/libkvm/libkvm.c
@@ -1342,3 +1342,28 @@ int kvm_get_irq_route_gsi(kvm_context_t kvm)
 		kvm->max_used_gsi : KVM_IOAPIC_NUM_PINS;
 }
 
+#ifdef KVM_CAP_DEVICE_MSIX
+int kvm_set_msix_nr(kvm_context_t kvm,
+                    struct kvm_assigned_msix_nr *msix_nr)
+{
+        int ret;
+
+        ret = ioctl(kvm->vm_fd, KVM_SET_MSIX_NR, msix_nr);
+        if (ret < 0)
+                return -errno;
+
+        return ret;
+}
+
+int kvm_set_msix_entry(kvm_context_t kvm,
+                       struct kvm_assigned_msix_entry *entry)
+{
+        int ret;
+
+        ret = ioctl(kvm->vm_fd, KVM_SET_MSIX_ENTRY, entry);
+        if (ret < 0)
+                return -errno;
+
+        return ret;
+}
+#endif
diff --git a/libkvm/libkvm.h b/libkvm/libkvm.h
index 3c09be1..e2f6787 100644
--- a/libkvm/libkvm.h
+++ b/libkvm/libkvm.h
@@ -828,4 +828,11 @@ int kvm_commit_irq_routes(kvm_context_t kvm);
  * \param kvm Pointer to the current kvm_context
  */
 int kvm_get_irq_route_gsi(kvm_context_t kvm);
+
+#ifdef KVM_CAP_DEVICE_MSIX
+int kvm_set_msix_nr(kvm_context_t kvm,
+                    struct kvm_assigned_msix_nr *msix_nr);
+int kvm_set_msix_entry(kvm_context_t kvm,
+                       struct kvm_assigned_msix_entry *entry);
+#endif
 #endif
-- 
1.5.4.5


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 3/3] kvm: enable MSI-X capabilty for assigned device
  2009-02-11  8:14 [PATCH 0/3][v3] Userspace for MSI-X enabling Sheng Yang
  2009-02-11  8:14 ` [PATCH 1/3] Add MSI-X related macro to pci.c Sheng Yang
  2009-02-11  8:14 ` [PATCH 2/3] kvm: add ioctl KVM_SET_MSIX_ENTRY_NR and KVM_SET_MSIX_ENTRY Sheng Yang
@ 2009-02-11  8:14 ` Sheng Yang
  2009-02-12 10:14   ` Sheng Yang
  2 siblings, 1 reply; 7+ messages in thread
From: Sheng Yang @ 2009-02-11  8:14 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm, Anthony Liguori, Sheng Yang

The most important part here, is we emulate a page of MMIO region using a
page of memory. That's because MSI-X table was put in the region and we have to
intercept it.

Signed-off-by: Sheng Yang <sheng@linux.intel.com>
---
 qemu/hw/device-assignment.c |  287 ++++++++++++++++++++++++++++++++++++++++++-
 qemu/hw/device-assignment.h |    6 +
 2 files changed, 287 insertions(+), 6 deletions(-)

diff --git a/qemu/hw/device-assignment.c b/qemu/hw/device-assignment.c
index 3ff8fde..efdf2ca 100644
--- a/qemu/hw/device-assignment.c
+++ b/qemu/hw/device-assignment.c
@@ -146,6 +146,7 @@ static void assigned_dev_iomem_map(PCIDevice *pci_dev, int region_num,
 {
     AssignedDevice *r_dev = container_of(pci_dev, AssignedDevice, dev);
     AssignedDevRegion *region = &r_dev->v_addrs[region_num];
+    PCIRegion *real_region = &r_dev->real_device.regions[region_num];
     uint32_t old_ephys = region->e_physbase;
     uint32_t old_esize = region->e_size;
     int first_map = (region->e_size == 0);
@@ -161,10 +162,27 @@ static void assigned_dev_iomem_map(PCIDevice *pci_dev, int region_num,
 	kvm_destroy_phys_mem(kvm_context, old_ephys,
                              TARGET_PAGE_ALIGN(old_esize));
 
-    if (e_size > 0)
+    if (e_size > 0) {
+        /* deal with MSI-X MMIO page */
+        if (real_region->base_addr <= r_dev->msix_table_addr &&
+                real_region->base_addr + real_region->size >=
+                r_dev->msix_table_addr) {
+            int offset = r_dev->msix_table_addr - real_region->base_addr;
+            ret = munmap(region->u.r_virtbase + offset, TARGET_PAGE_SIZE);
+            if (ret == 0)
+                DEBUG("munmap done, virt_base 0x%p\n",
+                        region->u.r_virtbase + offset);
+            else {
+                fprintf(stderr, "%s: fail munmap msix table!\n", __func__);
+                exit(1);
+            }
+            cpu_register_physical_memory(e_phys + offset,
+                    TARGET_PAGE_SIZE, r_dev->mmio_index);
+        }
 	ret = kvm_register_phys_mem(kvm_context, e_phys,
                                     region->u.r_virtbase,
                                     TARGET_PAGE_ALIGN(e_size), 0);
+    }
 
     if (ret != 0) {
 	fprintf(stderr, "%s: Error: create new mapping failed\n", __func__);
@@ -568,7 +586,9 @@ void assigned_dev_update_irqs()
     }
 }
 
-#if defined(KVM_CAP_DEVICE_MSI) && defined (KVM_CAP_IRQ_ROUTING)
+#ifdef KVM_CAP_IRQ_ROUTING
+
+#ifdef KVM_CAP_DEVICE_MSI
 static void assigned_dev_update_msi(PCIDevice *pci_dev, unsigned int ctrl_pos)
 {
     struct kvm_assigned_irq assigned_irq_data;
@@ -613,14 +633,147 @@ static void assigned_dev_update_msi(PCIDevice *pci_dev, unsigned int ctrl_pos)
 }
 #endif
 
-void assigned_device_pci_cap_write_config(PCIDevice *pci_dev, uint32_t address,
+#ifdef KVM_CAP_DEVICE_MSIX
+static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev)
+{
+    AssignedDevice *adev = container_of(pci_dev, AssignedDevice, dev);
+    u16 entries_nr = 0, entries_max_nr;
+    int pos = 0, i, r = 0;
+    u32 msg_addr, msg_upper_addr, msg_data, msg_ctrl;
+    struct kvm_assigned_msix_nr msix_nr;
+    struct kvm_assigned_msix_entry msix_entry;
+    struct kvm_irq_routing_entry routing_entry;
+    void *va = adev->msix_table_page;
+
+    if (adev->cap.available & ASSIGNED_DEVICE_CAP_MSI)
+        pos = PCI_CAPABILITY_CONFIG_MSI_LENGTH;
+    entries_max_nr = pci_dev->cap.config[pos + 2];
+    entries_max_nr &= PCI_MSIX_TABSIZE;
+
+    /* Get the usable entry number for allocating */
+    for (i = 0; i < entries_max_nr; i++) {
+        memcpy(&msg_ctrl, va + i * 16 + 12, 4);
+        /* 0x1 is mask bit for per vector */
+        if (msg_ctrl & 0x1)
+            continue;
+        memcpy(&msg_data, va + i * 16 + 8, 4);
+        /* Ignore unused entry even it's unmasked */
+        if (msg_data == 0)
+            continue;
+        entries_nr ++;
+    }
+
+    if (entries_nr == 0) {
+        fprintf(stderr, "MSI-X entry number is zero!\n");
+        return -EINVAL;
+    }
+    msix_nr.assigned_dev_id = calc_assigned_dev_id(adev->h_busnr,
+                                          (uint8_t)adev->h_devfn);
+    msix_nr.entry_nr = entries_nr;
+    r = kvm_set_msix_nr(kvm_context, &msix_nr);
+    if (r != 0) {
+        fprintf(stderr, "fail to set MSI-X entry number for MSIX! %s\n",
+			strerror(-r));
+        return r;
+    }
+
+    msix_entry.assigned_dev_id = msix_nr.assigned_dev_id;
+    entries_nr = 0;
+    for (i = 0; i < entries_max_nr; i++) {
+        if (entries_nr >= msix_nr.entry_nr)
+            break;
+        memcpy(&msg_ctrl, va + i * 16 + 12, 4);
+        if (msg_ctrl & 0x1)
+            continue;
+        memcpy(&msg_data, va + i * 16 + 8, 4);
+        if (msg_data == 0)
+            continue;
+
+        memcpy(&msg_addr, va + i * 16, 4);
+        memcpy(&msg_upper_addr, va + i * 16 + 4, 4);
+
+        routing_entry.gsi = kvm_get_irq_route_gsi(kvm_context);
+        routing_entry.type = KVM_IRQ_ROUTING_MSI;
+        routing_entry.flags = 0;
+        routing_entry.u.msi.address_lo = msg_addr;
+        routing_entry.u.msi.address_hi = msg_upper_addr;
+        routing_entry.u.msi.data = msg_data;
+        DEBUG("MSI-X data 0x%x, MSI-X addr_lo 0x%x\n!", msg_data, msg_addr);
+	kvm_add_routing_entry(kvm_context, &routing_entry);
+
+	msix_entry.gsi = routing_entry.gsi;
+        msix_entry.entry = i;
+        msix_entry.pos = entries_nr;
+        r = kvm_set_msix_entry(kvm_context, &msix_entry);
+        if (r) {
+            fprintf(stderr, "fail to set MSI-X entry! %s\n", strerror(-r));
+            break;
+        }
+        DEBUG("MSI-X entry gsi 0x%x, entry %d, pos %d\n!",
+                msix_entry.gsi, msix_entry.entry, msix_entry.pos);
+        entries_nr ++;
+    }
+
+    if (r == 0 && kvm_commit_irq_routes(kvm_context) < 0) {
+	    perror("assigned_dev_update_msix_mmio: kvm_commit_irq_routes");
+	    return -EINVAL;
+    }
+
+    return r;
+}
+
+static void assigned_dev_update_msix(PCIDevice *pci_dev, unsigned int ctrl_pos)
+{
+    struct kvm_assigned_irq assigned_irq_data;
+    AssignedDevice *assigned_dev = container_of(pci_dev, AssignedDevice, dev);
+    uint16_t *ctrl_word = (uint16_t *)(pci_dev->cap.config + ctrl_pos);
+
+    memset(&assigned_irq_data, 0, sizeof assigned_irq_data);
+    assigned_irq_data.assigned_dev_id  =
+            calc_assigned_dev_id(assigned_dev->h_busnr,
+                    (uint8_t)assigned_dev->h_devfn);
+
+    if (*ctrl_word & PCI_MSIX_ENABLE) {
+        assigned_irq_data.flags = KVM_DEV_IRQ_ASSIGN_ENABLE_MSIX;
+        if (assigned_dev_update_msix_mmio(pci_dev) < 0) {
+            perror("assigned_dev_update_msix_mmio");
+        }
+    }
+    if (*ctrl_word & PCI_MSIX_MASK)
+        assigned_irq_data.flags |= KVM_DEV_IRQ_ASSIGN_MASK_MSIX;
+
+    if ((*ctrl_word & PCI_MSIX_ENABLE) !=
+		(assigned_dev->cap.state & ASSIGNED_DEVICE_MSIX_ENABLED))
+	    if (kvm_assign_irq(kvm_context, &assigned_irq_data) < 0)
+		    perror("assigned_dev_enable_msix");
+
+    if (assigned_irq_data.flags & KVM_DEV_IRQ_ASSIGN_ENABLE_MSIX) {
+        assigned_dev->cap.state |= ASSIGNED_DEVICE_MSIX_ENABLED;
+        *ctrl_word |= PCI_MSIX_ENABLE;
+    } else {
+        assigned_dev->cap.state &= ~ASSIGNED_DEVICE_MSIX_ENABLED;
+        *ctrl_word &= ~PCI_MSIX_ENABLE;
+    }
+    if (assigned_irq_data.flags & KVM_DEV_IRQ_ASSIGN_MASK_MSIX) {
+        assigned_dev->cap.state |= ASSIGNED_DEVICE_MSIX_MASKED;
+        *ctrl_word |= PCI_MSIX_MASK;
+    } else {
+        assigned_dev->cap.state &= ~ASSIGNED_DEVICE_MSIX_MASKED;
+        *ctrl_word &= ~PCI_MSIX_MASK;
+    }
+}
+#endif
+#endif
+
+static void assigned_device_pci_cap_write_config(PCIDevice *pci_dev, uint32_t address,
                                           uint32_t val, int len)
 {
     AssignedDevice *assigned_dev = container_of(pci_dev, AssignedDevice, dev);
     unsigned int pos = pci_dev->cap.start, ctrl_pos;
 
     pci_default_cap_write_config(pci_dev, address, val, len);
-#if defined(KVM_CAP_DEVICE_MSI) && defined (KVM_CAP_IRQ_ROUTING)
+#ifdef KVM_CAP_IRQ_ROUTING
+#ifdef KVM_CAP_DEVICE_MSI
     if (assigned_dev->cap.available & ASSIGNED_DEVICE_CAP_MSI) {
         ctrl_pos = pos + PCI_MSI_FLAGS;
         if (address <= ctrl_pos && address + len > ctrl_pos)
@@ -628,13 +781,25 @@ void assigned_device_pci_cap_write_config(PCIDevice *pci_dev, uint32_t address,
         pos += PCI_CAPABILITY_CONFIG_MSI_LENGTH;
     }
 #endif
+#ifdef KVM_CAP_DEVICE_MSIX
+    if (assigned_dev->cap.available & ASSIGNED_DEVICE_CAP_MSIX) {
+        ctrl_pos = pos + 3;
+        if (address <= ctrl_pos && address + len > ctrl_pos) {
+            ctrl_pos--; /* control is word long */
+            assigned_dev_update_msix(pci_dev, ctrl_pos - pci_dev->cap.start);
+	}
+        pos += PCI_CAPABILITY_CONFIG_MSIX_LENGTH;
+    }
+#endif
+#endif
     return;
 }
 
 static void assigned_device_pci_cap_init(PCIDevice *pci_dev)
 {
     AssignedDevice *dev = container_of(pci_dev, AssignedDevice, dev);
-    int next_cap_pt;
+    PCIRegion *pci_region = dev->real_device.regions;
+    int next_cap_pt = 0;
     struct pci_access *pacc;
     int h_bus, h_dev, h_func;
 
@@ -646,7 +811,8 @@ static void assigned_device_pci_cap_init(PCIDevice *pci_dev)
     pci_init(pacc);
     dev->pdev = pci_get_dev(pacc, 0, h_bus, h_dev, h_func);
     pci_cleanup(pacc);
-#if defined(KVM_CAP_DEVICE_MSI) && defined (KVM_CAP_IRQ_ROUTING)
+#ifdef KVM_CAP_IRQ_ROUTING
+#ifdef KVM_CAP_DEVICE_MSI
     /* Expose MSI capability
      * MSI capability is the 1st capability in cap.config */
     if (pci_find_cap_offset(dev->pdev, PCI_CAP_ID_MSI)) {
@@ -656,6 +822,110 @@ static void assigned_device_pci_cap_init(PCIDevice *pci_dev)
         next_cap_pt = 1;
     }
 #endif
+#ifdef KVM_CAP_DEVICE_MSIX
+    /* Expose MSI-X capability */
+    if (pci_find_cap_offset(dev->pdev, PCI_CAP_ID_MSIX)) {
+        int pos, entry_nr, bar_nr;
+        u32 msix_table_entry;
+        dev->cap.available |= ASSIGNED_DEVICE_CAP_MSIX;
+        pos = pci_find_cap_offset(dev->pdev, PCI_CAP_ID_MSIX);
+        entry_nr = pci_read_word(dev->pdev, pos + 2) & PCI_MSIX_TABSIZE;
+        pci_dev->cap.config[pci_dev->cap.length] = 0x11;
+        pci_dev->cap.config[pci_dev->cap.length + 2] = entry_nr;
+        msix_table_entry = pci_read_long(dev->pdev, pos + PCI_MSIX_TABLE);
+        *(uint32_t *)(pci_dev->cap.config +
+                      pci_dev->cap.length + PCI_MSIX_TABLE) = msix_table_entry;
+        *(uint32_t *)(pci_dev->cap.config +
+                      pci_dev->cap.length + PCI_MSIX_PBA) =
+                    pci_read_long(dev->pdev, pos + PCI_MSIX_PBA);
+        bar_nr = msix_table_entry & PCI_MSIX_BIR;
+        msix_table_entry &= ~PCI_MSIX_BIR;
+        dev->msix_table_addr = pci_region[bar_nr].base_addr + msix_table_entry;
+        if (next_cap_pt != 0) {
+            pci_dev->cap.config[next_cap_pt] =
+                pci_dev->cap.start + pci_dev->cap.length;
+            next_cap_pt += PCI_CAPABILITY_CONFIG_MSI_LENGTH;
+        } else
+            next_cap_pt = 1;
+        pci_dev->cap.length += PCI_CAPABILITY_CONFIG_MSIX_LENGTH;
+    }
+#endif
+#endif
+}
+
+static uint32_t msix_mmio_readl(void *opaque, target_phys_addr_t addr)
+{
+    AssignedDevice *adev = opaque;
+    unsigned int offset = addr & 0xfff;
+    void *page = adev->msix_table_page;
+    uint32_t val = 0;
+
+    DEBUG("read from MSI-X entry table mmio 0x%lx", addr);
+    memcpy(&val, (void *)((char *)page + offset), 4);
+
+    return val;
+}
+
+static uint32_t msix_mmio_readb(void *opaque, target_phys_addr_t addr)
+{
+    return ((msix_mmio_readl(opaque, addr & ~3)) >>
+            (8 * (addr & 3))) & 0xff;
+}
+
+static uint32_t msix_mmio_readw(void *opaque, target_phys_addr_t addr)
+{
+    return ((msix_mmio_readl(opaque, addr & ~3)) >>
+            (8 * (addr & 3))) & 0xffff;
+}
+
+static void msix_mmio_writel(void *opaque,
+                             target_phys_addr_t addr, uint32_t val)
+{
+    AssignedDevice *adev = opaque;
+    unsigned int offset = addr & 0xfff;
+    void *page = adev->msix_table_page;
+
+    DEBUG("write to MSI-X entry table mmio offset 0x%lx, val 0x%lx\n",
+		    addr, val);
+    memcpy((void *)((char *)page + offset), &val, 4);
+}
+
+static void msix_mmio_writew(void *opaque,
+                             target_phys_addr_t addr, uint32_t val)
+{
+    msix_mmio_writel(opaque, addr & ~3,
+                     (val & 0xffff) << (8*(addr & 3)));
+}
+
+static void msix_mmio_writeb(void *opaque,
+                             target_phys_addr_t addr, uint32_t val)
+{
+    msix_mmio_writel(opaque, addr & ~3,
+                     (val & 0xff) << (8*(addr & 3)));
+}
+
+static CPUWriteMemoryFunc *msix_mmio_write[] = {
+    msix_mmio_writeb,	msix_mmio_writew,	msix_mmio_writel
+};
+
+static CPUReadMemoryFunc *msix_mmio_read[] = {
+    msix_mmio_readb,	msix_mmio_readw,	msix_mmio_readl
+};
+
+static int assigned_dev_register_msix_mmio(AssignedDevice *dev)
+{
+    dev->msix_table_page = mmap(NULL, 0x1000,
+                                PROT_READ|PROT_WRITE,
+                                MAP_ANONYMOUS|MAP_PRIVATE, 0, 0);
+    memset(dev->msix_table_page, 0, 0x1000);
+    if (dev->msix_table_page == MAP_FAILED) {
+        fprintf(stderr, "fail allocate msix_table_page! %s\n",
+                strerror(errno));
+        return -EFAULT;
+    }
+    dev->mmio_index = cpu_register_io_memory(0,
+                        msix_mmio_read, msix_mmio_write, dev);
+    return 0;
 }
 
 struct PCIDevice *init_assigned_device(AssignedDevInfo *adev, PCIBus *bus)
@@ -729,6 +999,11 @@ struct PCIDevice *init_assigned_device(AssignedDevInfo *adev, PCIBus *bus)
                     assigned_device_pci_cap_write_config,
                     assigned_device_pci_cap_init);
 
+    /* intercept MSI-X entry page in the MMIO */
+    if (dev->cap.available & ASSIGNED_DEVICE_CAP_MSIX)
+        if (assigned_dev_register_msix_mmio(dev))
+            return NULL;
+
     return &dev->dev;
 }
 
diff --git a/qemu/hw/device-assignment.h b/qemu/hw/device-assignment.h
index cc7164f..4697e82 100644
--- a/qemu/hw/device-assignment.h
+++ b/qemu/hw/device-assignment.h
@@ -84,10 +84,16 @@ typedef struct {
     struct pci_dev *pdev;
     struct {
 #define ASSIGNED_DEVICE_CAP_MSI (1 << 0)
+#define ASSIGNED_DEVICE_CAP_MSIX (1 << 1)
         uint32_t available;
 #define ASSIGNED_DEVICE_MSI_ENABLED (1 << 0)
+#define ASSIGNED_DEVICE_MSIX_ENABLED (1 << 1)
+#define ASSIGNED_DEVICE_MSIX_MASKED (1 << 2)
         uint32_t state;
     } cap;
+    void *msix_table_page;
+    target_phys_addr_t msix_table_addr;
+    int mmio_index;
 } AssignedDevice;
 
 typedef struct AssignedDevInfo AssignedDevInfo;
-- 
1.5.4.5


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 3/3] kvm: enable MSI-X capabilty for assigned device
  2009-02-11  8:14 ` [PATCH 3/3] kvm: enable MSI-X capabilty for assigned device Sheng Yang
@ 2009-02-12 10:14   ` Sheng Yang
  0 siblings, 0 replies; 7+ messages in thread
From: Sheng Yang @ 2009-02-12 10:14 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm, Sheng Yang

The most important part here, is we emulate a page of MMIO region using a
page of memory. That's because MSI-X table was put in the region and we have to
intercept it.

Signed-off-by: Sheng Yang <sheng@linux.intel.com>
---
 qemu/hw/device-assignment.c |  286 ++++++++++++++++++++++++++++++++++++++++++-
 qemu/hw/device-assignment.h |    6 +
 2 files changed, 286 insertions(+), 6 deletions(-)

diff --git a/qemu/hw/device-assignment.c b/qemu/hw/device-assignment.c
index 3ff8fde..e51d0a2 100644
--- a/qemu/hw/device-assignment.c
+++ b/qemu/hw/device-assignment.c
@@ -146,6 +146,7 @@ static void assigned_dev_iomem_map(PCIDevice *pci_dev, int region_num,
 {
     AssignedDevice *r_dev = container_of(pci_dev, AssignedDevice, dev);
     AssignedDevRegion *region = &r_dev->v_addrs[region_num];
+    PCIRegion *real_region = &r_dev->real_device.regions[region_num];
     uint32_t old_ephys = region->e_physbase;
     uint32_t old_esize = region->e_size;
     int first_map = (region->e_size == 0);
@@ -161,10 +162,27 @@ static void assigned_dev_iomem_map(PCIDevice *pci_dev, int region_num,
 	kvm_destroy_phys_mem(kvm_context, old_ephys,
                              TARGET_PAGE_ALIGN(old_esize));
 
-    if (e_size > 0)
+    if (e_size > 0) {
+        /* deal with MSI-X MMIO page */
+        if (real_region->base_addr <= r_dev->msix_table_addr &&
+                real_region->base_addr + real_region->size >=
+                r_dev->msix_table_addr) {
+            int offset = r_dev->msix_table_addr - real_region->base_addr;
+            ret = munmap(region->u.r_virtbase + offset, TARGET_PAGE_SIZE);
+            if (ret == 0)
+                DEBUG("munmap done, virt_base 0x%p\n",
+                        region->u.r_virtbase + offset);
+            else {
+                fprintf(stderr, "%s: fail munmap msix table!\n", __func__);
+                exit(1);
+            }
+            cpu_register_physical_memory(e_phys + offset,
+                    TARGET_PAGE_SIZE, r_dev->mmio_index);
+        }
 	ret = kvm_register_phys_mem(kvm_context, e_phys,
                                     region->u.r_virtbase,
                                     TARGET_PAGE_ALIGN(e_size), 0);
+    }
 
     if (ret != 0) {
 	fprintf(stderr, "%s: Error: create new mapping failed\n", __func__);
@@ -568,7 +586,9 @@ void assigned_dev_update_irqs()
     }
 }
 
-#if defined(KVM_CAP_DEVICE_MSI) && defined (KVM_CAP_IRQ_ROUTING)
+#ifdef KVM_CAP_IRQ_ROUTING
+
+#ifdef KVM_CAP_DEVICE_MSI
 static void assigned_dev_update_msi(PCIDevice *pci_dev, unsigned int ctrl_pos)
 {
     struct kvm_assigned_irq assigned_irq_data;
@@ -613,14 +633,146 @@ static void assigned_dev_update_msi(PCIDevice *pci_dev, unsigned int ctrl_pos)
 }
 #endif
 
-void assigned_device_pci_cap_write_config(PCIDevice *pci_dev, uint32_t address,
+#ifdef KVM_CAP_DEVICE_MSIX
+static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev)
+{
+    AssignedDevice *adev = container_of(pci_dev, AssignedDevice, dev);
+    u16 entries_nr = 0, entries_max_nr;
+    int pos = 0, i, r = 0;
+    u32 msg_addr, msg_upper_addr, msg_data, msg_ctrl;
+    struct kvm_assigned_msix_nr msix_nr;
+    struct kvm_assigned_msix_entry msix_entry;
+    struct kvm_irq_routing_entry routing_entry;
+    void *va = adev->msix_table_page;
+
+    if (adev->cap.available & ASSIGNED_DEVICE_CAP_MSI)
+        pos = PCI_CAPABILITY_CONFIG_MSI_LENGTH;
+    entries_max_nr = pci_dev->cap.config[pos + 2];
+    entries_max_nr &= PCI_MSIX_TABSIZE;
+
+    /* Get the usable entry number for allocating */
+    for (i = 0; i < entries_max_nr; i++) {
+        memcpy(&msg_ctrl, va + i * 16 + 12, 4);
+        /* 0x1 is mask bit for per vector */
+        if (msg_ctrl & 0x1)
+            continue;
+        memcpy(&msg_data, va + i * 16 + 8, 4);
+        /* Ignore unused entry even it's unmasked */
+        if (msg_data == 0)
+            continue;
+        entries_nr ++;
+    }
+
+    if (entries_nr == 0) {
+        fprintf(stderr, "MSI-X entry number is zero!\n");
+        return -EINVAL;
+    }
+    msix_nr.assigned_dev_id = calc_assigned_dev_id(adev->h_busnr,
+                                          (uint8_t)adev->h_devfn);
+    msix_nr.entry_nr = entries_nr;
+    r = kvm_set_msix_nr(kvm_context, &msix_nr);
+    if (r != 0) {
+        fprintf(stderr, "fail to set MSI-X entry number for MSIX! %s\n",
+			strerror(-r));
+        return r;
+    }
+
+    msix_entry.assigned_dev_id = msix_nr.assigned_dev_id;
+    entries_nr = 0;
+    for (i = 0; i < entries_max_nr; i++) {
+        if (entries_nr >= msix_nr.entry_nr)
+            break;
+        memcpy(&msg_ctrl, va + i * 16 + 12, 4);
+        if (msg_ctrl & 0x1)
+            continue;
+        memcpy(&msg_data, va + i * 16 + 8, 4);
+        if (msg_data == 0)
+            continue;
+
+        memcpy(&msg_addr, va + i * 16, 4);
+        memcpy(&msg_upper_addr, va + i * 16 + 4, 4);
+
+        routing_entry.gsi = kvm_get_irq_route_gsi(kvm_context);
+        routing_entry.type = KVM_IRQ_ROUTING_MSI;
+        routing_entry.flags = 0;
+        routing_entry.u.msi.address_lo = msg_addr;
+        routing_entry.u.msi.address_hi = msg_upper_addr;
+        routing_entry.u.msi.data = msg_data;
+        DEBUG("MSI-X data 0x%x, MSI-X addr_lo 0x%x\n!", msg_data, msg_addr);
+        kvm_add_routing_entry(kvm_context, &routing_entry);
+
+        msix_entry.gsi = routing_entry.gsi;
+        msix_entry.entry = i;
+        r = kvm_set_msix_entry(kvm_context, &msix_entry);
+        if (r) {
+            fprintf(stderr, "fail to set MSI-X entry! %s\n", strerror(-r));
+            break;
+        }
+        DEBUG("MSI-X entry gsi 0x%x, entry %d, pos %d\n!",
+                msix_entry.gsi, msix_entry.entry, msix_entry.pos);
+        entries_nr ++;
+    }
+
+    if (r == 0 && kvm_commit_irq_routes(kvm_context) < 0) {
+	    perror("assigned_dev_update_msix_mmio: kvm_commit_irq_routes");
+	    return -EINVAL;
+    }
+
+    return r;
+}
+
+static void assigned_dev_update_msix(PCIDevice *pci_dev, unsigned int ctrl_pos)
+{
+    struct kvm_assigned_irq assigned_irq_data;
+    AssignedDevice *assigned_dev = container_of(pci_dev, AssignedDevice, dev);
+    uint16_t *ctrl_word = (uint16_t *)(pci_dev->cap.config + ctrl_pos);
+
+    memset(&assigned_irq_data, 0, sizeof assigned_irq_data);
+    assigned_irq_data.assigned_dev_id  =
+            calc_assigned_dev_id(assigned_dev->h_busnr,
+                    (uint8_t)assigned_dev->h_devfn);
+
+    if (*ctrl_word & PCI_MSIX_ENABLE) {
+        assigned_irq_data.flags = KVM_DEV_IRQ_ASSIGN_ENABLE_MSIX;
+        if (assigned_dev_update_msix_mmio(pci_dev) < 0) {
+            perror("assigned_dev_update_msix_mmio");
+        }
+    }
+    if (*ctrl_word & PCI_MSIX_MASK)
+        assigned_irq_data.flags |= KVM_DEV_IRQ_ASSIGN_MASK_MSIX;
+
+    if ((*ctrl_word & PCI_MSIX_ENABLE) !=
+		(assigned_dev->cap.state & ASSIGNED_DEVICE_MSIX_ENABLED))
+	    if (kvm_assign_irq(kvm_context, &assigned_irq_data) < 0)
+		    perror("assigned_dev_enable_msix");
+
+    if (assigned_irq_data.flags & KVM_DEV_IRQ_ASSIGN_ENABLE_MSIX) {
+        assigned_dev->cap.state |= ASSIGNED_DEVICE_MSIX_ENABLED;
+        *ctrl_word |= PCI_MSIX_ENABLE;
+    } else {
+        assigned_dev->cap.state &= ~ASSIGNED_DEVICE_MSIX_ENABLED;
+        *ctrl_word &= ~PCI_MSIX_ENABLE;
+    }
+    if (assigned_irq_data.flags & KVM_DEV_IRQ_ASSIGN_MASK_MSIX) {
+        assigned_dev->cap.state |= ASSIGNED_DEVICE_MSIX_MASKED;
+        *ctrl_word |= PCI_MSIX_MASK;
+    } else {
+        assigned_dev->cap.state &= ~ASSIGNED_DEVICE_MSIX_MASKED;
+        *ctrl_word &= ~PCI_MSIX_MASK;
+    }
+}
+#endif
+#endif
+
+static void assigned_device_pci_cap_write_config(PCIDevice *pci_dev, uint32_t address,
                                           uint32_t val, int len)
 {
     AssignedDevice *assigned_dev = container_of(pci_dev, AssignedDevice, dev);
     unsigned int pos = pci_dev->cap.start, ctrl_pos;
 
     pci_default_cap_write_config(pci_dev, address, val, len);
-#if defined(KVM_CAP_DEVICE_MSI) && defined (KVM_CAP_IRQ_ROUTING)
+#ifdef KVM_CAP_IRQ_ROUTING
+#ifdef KVM_CAP_DEVICE_MSI
     if (assigned_dev->cap.available & ASSIGNED_DEVICE_CAP_MSI) {
         ctrl_pos = pos + PCI_MSI_FLAGS;
         if (address <= ctrl_pos && address + len > ctrl_pos)
@@ -628,13 +780,25 @@ void assigned_device_pci_cap_write_config(PCIDevice *pci_dev, uint32_t address,
         pos += PCI_CAPABILITY_CONFIG_MSI_LENGTH;
     }
 #endif
+#ifdef KVM_CAP_DEVICE_MSIX
+    if (assigned_dev->cap.available & ASSIGNED_DEVICE_CAP_MSIX) {
+        ctrl_pos = pos + 3;
+        if (address <= ctrl_pos && address + len > ctrl_pos) {
+            ctrl_pos--; /* control is word long */
+            assigned_dev_update_msix(pci_dev, ctrl_pos - pci_dev->cap.start);
+	}
+        pos += PCI_CAPABILITY_CONFIG_MSIX_LENGTH;
+    }
+#endif
+#endif
     return;
 }
 
 static void assigned_device_pci_cap_init(PCIDevice *pci_dev)
 {
     AssignedDevice *dev = container_of(pci_dev, AssignedDevice, dev);
-    int next_cap_pt;
+    PCIRegion *pci_region = dev->real_device.regions;
+    int next_cap_pt = 0;
     struct pci_access *pacc;
     int h_bus, h_dev, h_func;
 
@@ -646,7 +810,8 @@ static void assigned_device_pci_cap_init(PCIDevice *pci_dev)
     pci_init(pacc);
     dev->pdev = pci_get_dev(pacc, 0, h_bus, h_dev, h_func);
     pci_cleanup(pacc);
-#if defined(KVM_CAP_DEVICE_MSI) && defined (KVM_CAP_IRQ_ROUTING)
+#ifdef KVM_CAP_IRQ_ROUTING
+#ifdef KVM_CAP_DEVICE_MSI
     /* Expose MSI capability
      * MSI capability is the 1st capability in cap.config */
     if (pci_find_cap_offset(dev->pdev, PCI_CAP_ID_MSI)) {
@@ -656,6 +821,110 @@ static void assigned_device_pci_cap_init(PCIDevice *pci_dev)
         next_cap_pt = 1;
     }
 #endif
+#ifdef KVM_CAP_DEVICE_MSIX
+    /* Expose MSI-X capability */
+    if (pci_find_cap_offset(dev->pdev, PCI_CAP_ID_MSIX)) {
+        int pos, entry_nr, bar_nr;
+        u32 msix_table_entry;
+        dev->cap.available |= ASSIGNED_DEVICE_CAP_MSIX;
+        pos = pci_find_cap_offset(dev->pdev, PCI_CAP_ID_MSIX);
+        entry_nr = pci_read_word(dev->pdev, pos + 2) & PCI_MSIX_TABSIZE;
+        pci_dev->cap.config[pci_dev->cap.length] = 0x11;
+        pci_dev->cap.config[pci_dev->cap.length + 2] = entry_nr;
+        msix_table_entry = pci_read_long(dev->pdev, pos + PCI_MSIX_TABLE);
+        *(uint32_t *)(pci_dev->cap.config +
+                      pci_dev->cap.length + PCI_MSIX_TABLE) = msix_table_entry;
+        *(uint32_t *)(pci_dev->cap.config +
+                      pci_dev->cap.length + PCI_MSIX_PBA) =
+                    pci_read_long(dev->pdev, pos + PCI_MSIX_PBA);
+        bar_nr = msix_table_entry & PCI_MSIX_BIR;
+        msix_table_entry &= ~PCI_MSIX_BIR;
+        dev->msix_table_addr = pci_region[bar_nr].base_addr + msix_table_entry;
+        if (next_cap_pt != 0) {
+            pci_dev->cap.config[next_cap_pt] =
+                pci_dev->cap.start + pci_dev->cap.length;
+            next_cap_pt += PCI_CAPABILITY_CONFIG_MSI_LENGTH;
+        } else
+            next_cap_pt = 1;
+        pci_dev->cap.length += PCI_CAPABILITY_CONFIG_MSIX_LENGTH;
+    }
+#endif
+#endif
+}
+
+static uint32_t msix_mmio_readl(void *opaque, target_phys_addr_t addr)
+{
+    AssignedDevice *adev = opaque;
+    unsigned int offset = addr & 0xfff;
+    void *page = adev->msix_table_page;
+    uint32_t val = 0;
+
+    DEBUG("read from MSI-X entry table mmio 0x%lx", addr);
+    memcpy(&val, (void *)((char *)page + offset), 4);
+
+    return val;
+}
+
+static uint32_t msix_mmio_readb(void *opaque, target_phys_addr_t addr)
+{
+    return ((msix_mmio_readl(opaque, addr & ~3)) >>
+            (8 * (addr & 3))) & 0xff;
+}
+
+static uint32_t msix_mmio_readw(void *opaque, target_phys_addr_t addr)
+{
+    return ((msix_mmio_readl(opaque, addr & ~3)) >>
+            (8 * (addr & 3))) & 0xffff;
+}
+
+static void msix_mmio_writel(void *opaque,
+                             target_phys_addr_t addr, uint32_t val)
+{
+    AssignedDevice *adev = opaque;
+    unsigned int offset = addr & 0xfff;
+    void *page = adev->msix_table_page;
+
+    DEBUG("write to MSI-X entry table mmio offset 0x%lx, val 0x%lx\n",
+		    addr, val);
+    memcpy((void *)((char *)page + offset), &val, 4);
+}
+
+static void msix_mmio_writew(void *opaque,
+                             target_phys_addr_t addr, uint32_t val)
+{
+    msix_mmio_writel(opaque, addr & ~3,
+                     (val & 0xffff) << (8*(addr & 3)));
+}
+
+static void msix_mmio_writeb(void *opaque,
+                             target_phys_addr_t addr, uint32_t val)
+{
+    msix_mmio_writel(opaque, addr & ~3,
+                     (val & 0xff) << (8*(addr & 3)));
+}
+
+static CPUWriteMemoryFunc *msix_mmio_write[] = {
+    msix_mmio_writeb,	msix_mmio_writew,	msix_mmio_writel
+};
+
+static CPUReadMemoryFunc *msix_mmio_read[] = {
+    msix_mmio_readb,	msix_mmio_readw,	msix_mmio_readl
+};
+
+static int assigned_dev_register_msix_mmio(AssignedDevice *dev)
+{
+    dev->msix_table_page = mmap(NULL, 0x1000,
+                                PROT_READ|PROT_WRITE,
+                                MAP_ANONYMOUS|MAP_PRIVATE, 0, 0);
+    memset(dev->msix_table_page, 0, 0x1000);
+    if (dev->msix_table_page == MAP_FAILED) {
+        fprintf(stderr, "fail allocate msix_table_page! %s\n",
+                strerror(errno));
+        return -EFAULT;
+    }
+    dev->mmio_index = cpu_register_io_memory(0,
+                        msix_mmio_read, msix_mmio_write, dev);
+    return 0;
 }
 
 struct PCIDevice *init_assigned_device(AssignedDevInfo *adev, PCIBus *bus)
@@ -729,6 +998,11 @@ struct PCIDevice *init_assigned_device(AssignedDevInfo *adev, PCIBus *bus)
                     assigned_device_pci_cap_write_config,
                     assigned_device_pci_cap_init);
 
+    /* intercept MSI-X entry page in the MMIO */
+    if (dev->cap.available & ASSIGNED_DEVICE_CAP_MSIX)
+        if (assigned_dev_register_msix_mmio(dev))
+            return NULL;
+
     return &dev->dev;
 }
 
diff --git a/qemu/hw/device-assignment.h b/qemu/hw/device-assignment.h
index cc7164f..4697e82 100644
--- a/qemu/hw/device-assignment.h
+++ b/qemu/hw/device-assignment.h
@@ -84,10 +84,16 @@ typedef struct {
     struct pci_dev *pdev;
     struct {
 #define ASSIGNED_DEVICE_CAP_MSI (1 << 0)
+#define ASSIGNED_DEVICE_CAP_MSIX (1 << 1)
         uint32_t available;
 #define ASSIGNED_DEVICE_MSI_ENABLED (1 << 0)
+#define ASSIGNED_DEVICE_MSIX_ENABLED (1 << 1)
+#define ASSIGNED_DEVICE_MSIX_MASKED (1 << 2)
         uint32_t state;
     } cap;
+    void *msix_table_page;
+    target_phys_addr_t msix_table_addr;
+    int mmio_index;
 } AssignedDevice;
 
 typedef struct AssignedDevInfo AssignedDevInfo;
-- 
1.5.4.5


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 1/3] Add MSI-X related macro to pci.c
  2008-12-30  6:29 [Patch 0/3][v2] Userspace for MSI-X Sheng Yang
@ 2008-12-30  6:29 ` Sheng Yang
  0 siblings, 0 replies; 7+ messages in thread
From: Sheng Yang @ 2008-12-30  6:29 UTC (permalink / raw)
  To: Avi Kivity, Anthony Liguori; +Cc: kvm, Sheng Yang


Signed-off-by: Sheng Yang <sheng@linux.intel.com>
---
 qemu/hw/pci.h |    1 +
 1 files changed, 1 insertions(+), 0 deletions(-)

diff --git a/qemu/hw/pci.h b/qemu/hw/pci.h
index f2a622c..22c5de1 100644
--- a/qemu/hw/pci.h
+++ b/qemu/hw/pci.h
@@ -87,6 +87,7 @@ typedef struct PCIIORegion {
 #define PCI_CAPABILITY_CONFIG_MAX_LENGTH 0x60
 #define PCI_CAPABILITY_CONFIG_DEFAULT_START_ADDR 0x40
 #define PCI_CAPABILITY_CONFIG_MSI_LENGTH 0x10
+#define PCI_CAPABILITY_CONFIG_MSIX_LENGTH 0x10
 
 struct PCIDevice {
     /* PCI config space */
-- 
1.5.4.5


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 1/3] Add MSI-X related macro to pci.c
  2008-12-23  8:32 [PATCH 0/3] Userspace for MSI-X Sheng Yang
@ 2008-12-23  8:32 ` Sheng Yang
  0 siblings, 0 replies; 7+ messages in thread
From: Sheng Yang @ 2008-12-23  8:32 UTC (permalink / raw)
  To: Avi Kivity, Anthony Liguori; +Cc: kvm, Sheng Yang


Signed-off-by: Sheng Yang <sheng@linux.intel.com>
---
 qemu/hw/pci.h |    1 +
 1 files changed, 1 insertions(+), 0 deletions(-)

diff --git a/qemu/hw/pci.h b/qemu/hw/pci.h
index 3a0e9c2..8b9ac5f 100644
--- a/qemu/hw/pci.h
+++ b/qemu/hw/pci.h
@@ -67,6 +67,7 @@ typedef struct PCIIORegion {
 #define PCI_CAPABILITY_CONFIG_MAX_LENGTH 0x60
 #define PCI_CAPABILITY_CONFIG_DEFAULT_START_ADDR 0x40
 #define PCI_CAPABILITY_CONFIG_MSI_LENGTH 0x10
+#define PCI_CAPABILITY_CONFIG_MSIX_LENGTH 0x10
 
 struct PCIDevice {
     /* PCI config space */
-- 
1.5.4.5


^ permalink raw reply related	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2009-02-12 10:14 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-02-11  8:14 [PATCH 0/3][v3] Userspace for MSI-X enabling Sheng Yang
2009-02-11  8:14 ` [PATCH 1/3] Add MSI-X related macro to pci.c Sheng Yang
2009-02-11  8:14 ` [PATCH 2/3] kvm: add ioctl KVM_SET_MSIX_ENTRY_NR and KVM_SET_MSIX_ENTRY Sheng Yang
2009-02-11  8:14 ` [PATCH 3/3] kvm: enable MSI-X capabilty for assigned device Sheng Yang
2009-02-12 10:14   ` Sheng Yang
  -- strict thread matches above, loose matches on Subject: below --
2008-12-30  6:29 [Patch 0/3][v2] Userspace for MSI-X Sheng Yang
2008-12-30  6:29 ` [PATCH 1/3] Add MSI-X related macro to pci.c Sheng Yang
2008-12-23  8:32 [PATCH 0/3] Userspace for MSI-X Sheng Yang
2008-12-23  8:32 ` [PATCH 1/3] Add MSI-X related macro to pci.c Sheng Yang

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.