All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/4 v6] MSI-X MMIO support in userspace for assigned devices
@ 2010-12-22  9:25 Sheng Yang
  2010-12-22  9:25 ` [PATCH 1/4] qemu-kvm: device assignment: Enabling MSI-X according to the entries' mask bit Sheng Yang
                   ` (3 more replies)
  0 siblings, 4 replies; 8+ messages in thread
From: Sheng Yang @ 2010-12-22  9:25 UTC (permalink / raw)
  To: Avi Kivity, Marcelo Tosatti; +Cc: Michael S. Tsirkin, kvm, Sheng Yang

BTW: the first patch can be applied alone.

Sheng Yang (4):
  qemu-kvm: device assignment: Enabling MSI-X according to the entries'
    mask bit
  qemu-kvm: Ioctl for MSIX MMIO support
  qemu-kvm: Header file update for MSI-X MMIO support
  qemu-kvm: MSI-X MMIO support for assigned device

 hw/device-assignment.c  |  325 +++++++++++++++++++++++++++++++++++++++++------
 hw/device-assignment.h  |    9 +-
 kvm/include/linux/kvm.h |   22 +++
 qemu-kvm.c              |   50 +++++++
 qemu-kvm.h              |   18 +++
 5 files changed, 382 insertions(+), 42 deletions(-)


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH 1/4] qemu-kvm: device assignment: Enabling MSI-X according to the entries' mask bit
  2010-12-22  9:25 [PATCH 0/4 v6] MSI-X MMIO support in userspace for assigned devices Sheng Yang
@ 2010-12-22  9:25 ` Sheng Yang
  2010-12-22  9:25 ` [PATCH 2/4] qemu-kvm: Ioctl for MSIX MMIO support Sheng Yang
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 8+ messages in thread
From: Sheng Yang @ 2010-12-22  9:25 UTC (permalink / raw)
  To: Avi Kivity, Marcelo Tosatti; +Cc: Michael S. Tsirkin, kvm, Sheng Yang

The old MSI-X enabling method assume the entries are written before MSI-X
enabled, but some OS didn't obey this, e.g. FreeBSD. This patch would fix
this.

Also, according to the PCI spec, mask bit of MSI-X table should be set
after reset.

Signed-off-by: Sheng Yang <sheng@linux.intel.com>
---
 hw/device-assignment.c |  188 +++++++++++++++++++++++++++++++++++++++++-------
 hw/device-assignment.h |    2 +-
 2 files changed, 162 insertions(+), 28 deletions(-)

diff --git a/hw/device-assignment.c b/hw/device-assignment.c
index 832c236..ed0b491 100644
--- a/hw/device-assignment.c
+++ b/hw/device-assignment.c
@@ -1111,15 +1111,12 @@ static void assigned_dev_update_msi(PCIDevice *pci_dev, unsigned int ctrl_pos)
 #endif
 
 #ifdef KVM_CAP_DEVICE_MSIX
-static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev)
+
+#define PCI_MSIX_CTRL_MASKBIT	1ul
+static int get_msix_entries_max_nr(AssignedDevice *adev)
 {
-    AssignedDevice *adev = container_of(pci_dev, AssignedDevice, dev);
-    uint16_t entries_nr = 0, entries_max_nr;
-    int pos = 0, i, r = 0;
-    uint32_t msg_addr, msg_upper_addr, msg_data, msg_ctrl;
-    struct kvm_assigned_msix_nr msix_nr;
-    struct kvm_assigned_msix_entry msix_entry;
-    void *va = adev->msix_table_page;
+    int pos, entries_max_nr;
+    PCIDevice *pci_dev = &adev->dev;
 
     pos = pci_find_capability(pci_dev, PCI_CAP_ID_MSIX);
 
@@ -1127,20 +1124,48 @@ static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev)
     entries_max_nr &= PCI_MSIX_TABSIZE;
     entries_max_nr += 1;
 
+    return entries_max_nr;
+}
+
+static int assigned_dev_msix_entry_masked(AssignedDevice *adev, int entry)
+{
+    uint32_t msg_ctrl;
+    void *va = adev->msix_table_page;
+
+    memcpy(&msg_ctrl, va + entry * 16 + 12, 4);
+    return (msg_ctrl & PCI_MSIX_CTRL_MASKBIT);
+}
+
+static int get_msix_valid_entries_nr(AssignedDevice *adev,
+				     uint16_t entries_max_nr)
+{
+    void *va = adev->msix_table_page;
+    uint32_t msg_ctrl;
+    uint16_t entries_nr = 0;
+    int i;
+
     /* Get the usable entry number for allocating */
     for (i = 0; i < entries_max_nr; i++) {
         memcpy(&msg_ctrl, va + i * 16 + 12, 4);
-        memcpy(&msg_data, va + i * 16 + 8, 4);
         /* Ignore unused entry even it's unmasked */
-        if (msg_data == 0)
+        if (assigned_dev_msix_entry_masked(adev, i))
             continue;
         entries_nr ++;
     }
+    return entries_nr;
+}
+
+static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev,
+                                         uint16_t entries_nr,
+                                         uint16_t entries_max_nr)
+{
+    AssignedDevice *adev = container_of(pci_dev, AssignedDevice, dev);
+    int i, r = 0;
+    uint32_t msg_addr, msg_upper_addr, msg_data, msg_ctrl;
+    struct kvm_assigned_msix_nr msix_nr;
+    struct kvm_assigned_msix_entry msix_entry;
+    void *va = adev->msix_table_page;
 
-    if (entries_nr == 0) {
-        fprintf(stderr, "MSI-X entry number is zero!\n");
-        return -EINVAL;
-    }
     msix_nr.assigned_dev_id = calc_assigned_dev_id(adev->h_segnr, adev->h_busnr,
                                           (uint8_t)adev->h_devfn);
     msix_nr.entry_nr = entries_nr;
@@ -1152,6 +1177,8 @@ static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev)
     }
 
     free_dev_irq_entries(adev);
+    memset(pci_dev->msix_entry_used, 0, KVM_MAX_MSIX_PER_DEV *
+                                        sizeof(*pci_dev->msix_entry_used));
     adev->irq_entries_nr = entries_nr;
     adev->entry = calloc(entries_nr, sizeof(struct kvm_irq_routing_entry));
     if (!adev->entry) {
@@ -1165,10 +1192,10 @@ static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev)
         if (entries_nr >= msix_nr.entry_nr)
             break;
         memcpy(&msg_ctrl, va + i * 16 + 12, 4);
-        memcpy(&msg_data, va + i * 16 + 8, 4);
-        if (msg_data == 0)
+        if (assigned_dev_msix_entry_masked(adev, i))
             continue;
 
+        memcpy(&msg_data, va + i * 16 + 8, 4);
         memcpy(&msg_addr, va + i * 16, 4);
         memcpy(&msg_upper_addr, va + i * 16 + 4, 4);
 
@@ -1182,17 +1209,18 @@ static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev)
         adev->entry[entries_nr].u.msi.address_lo = msg_addr;
         adev->entry[entries_nr].u.msi.address_hi = msg_upper_addr;
         adev->entry[entries_nr].u.msi.data = msg_data;
-        DEBUG("MSI-X data 0x%x, MSI-X addr_lo 0x%x\n!", msg_data, msg_addr);
-	kvm_add_routing_entry(&adev->entry[entries_nr]);
+        DEBUG("MSI-X data 0x%x, MSI-X addr_lo 0x%x!\n", msg_data, msg_addr);
+        kvm_add_routing_entry(&adev->entry[entries_nr]);
 
         msix_entry.gsi = adev->entry[entries_nr].gsi;
         msix_entry.entry = i;
+        pci_dev->msix_entry_used[i] = 1;
         r = kvm_assign_set_msix_entry(kvm_context, &msix_entry);
         if (r) {
             fprintf(stderr, "fail to set MSI-X entry! %s\n", strerror(-r));
             break;
         }
-        DEBUG("MSI-X entry gsi 0x%x, entry %d\n!",
+        DEBUG("MSI-X entry gsi 0x%x, entry %d!\n",
                 msix_entry.gsi, msix_entry.entry);
         entries_nr ++;
     }
@@ -1209,20 +1237,24 @@ static void assigned_dev_update_msix(PCIDevice *pci_dev, unsigned int ctrl_pos)
 {
     struct kvm_assigned_irq assigned_irq_data;
     AssignedDevice *assigned_dev = container_of(pci_dev, AssignedDevice, dev);
-    uint16_t *ctrl_word = (uint16_t *)(pci_dev->config + ctrl_pos);
+    uint16_t ctrl_word = *(uint16_t *)(pci_dev->config + ctrl_pos);
     int r;
+    uint16_t entries_nr, entries_max_nr;
+    int enable_msix;
 
     memset(&assigned_irq_data, 0, sizeof assigned_irq_data);
     assigned_irq_data.assigned_dev_id  =
             calc_assigned_dev_id(assigned_dev->h_segnr, assigned_dev->h_busnr,
                     (uint8_t)assigned_dev->h_devfn);
 
+    enable_msix = ((ctrl_word & PCI_MSIX_ENABLE) &&
+                  !(ctrl_word & PCI_MSIX_MASK));
+
     /* Some guests gratuitously disable MSIX even if they're not using it,
      * try to catch this by only deassigning irqs if the guest is using
      * MSIX or intends to start. */
     if ((assigned_dev->irq_requested_type & KVM_DEV_IRQ_GUEST_MSIX) ||
-        (*ctrl_word & PCI_MSIX_ENABLE)) {
-
+            enable_msix) {
         assigned_irq_data.flags = assigned_dev->irq_requested_type;
         free_dev_irq_entries(assigned_dev);
         r = kvm_deassign_irq(kvm_context, &assigned_irq_data);
@@ -1231,16 +1263,30 @@ static void assigned_dev_update_msix(PCIDevice *pci_dev, unsigned int ctrl_pos)
             perror("assigned_dev_update_msix: deassign irq");
 
         assigned_dev->irq_requested_type = 0;
+        memset(pci_dev->msix_entry_used, 0, KVM_MAX_MSIX_PER_DEV *
+                                        sizeof(*pci_dev->msix_entry_used));
     }
 
-    if (*ctrl_word & PCI_MSIX_ENABLE) {
-        assigned_irq_data.flags = KVM_DEV_IRQ_HOST_MSIX |
-                                  KVM_DEV_IRQ_GUEST_MSIX;
-
-        if (assigned_dev_update_msix_mmio(pci_dev) < 0) {
+    entries_max_nr = assigned_dev->max_msix_entries_nr;
+    if (entries_max_nr == 0) {
+        fprintf(stderr, "assigned_dev_update_msix: MSI-X entries_max_nr == 0");
+        return;
+    }
+    /*
+     * Guest may try to enable MSI-X before setting MSI-X entry done, so
+     * let's wait until guest unmask the entries.
+     */
+    entries_nr = get_msix_valid_entries_nr(assigned_dev, entries_max_nr);
+    if (entries_nr == 0)
+        return;
+    if (enable_msix) {
+        if (assigned_dev_update_msix_mmio(pci_dev,
+                    entries_nr, entries_max_nr) < 0) {
             perror("assigned_dev_update_msix_mmio");
             return;
         }
+        assigned_irq_data.flags = KVM_DEV_IRQ_HOST_MSIX |
+                                  KVM_DEV_IRQ_GUEST_MSIX;
         if (kvm_assign_irq(kvm_context, &assigned_irq_data) < 0) {
             perror("assigned_dev_enable_msix: assign irq");
             return;
@@ -1341,6 +1387,7 @@ static int assigned_device_pci_cap_init(PCIDevice *pci_dev)
         bar_nr = msix_table_entry & PCI_MSIX_BIR;
         msix_table_entry &= ~PCI_MSIX_BIR;
         dev->msix_table_addr = pci_region[bar_nr].base_addr + msix_table_entry;
+        dev->max_msix_entries_nr = get_msix_entries_max_nr(dev);
     }
 #endif
 #endif
@@ -1378,10 +1425,90 @@ static void msix_mmio_writel(void *opaque,
     AssignedDevice *adev = opaque;
     unsigned int offset = addr & 0xfff;
     void *page = adev->msix_table_page;
+    int ctrl_word, index;
+    struct kvm_irq_routing_entry new_entry = {};
+    int entry_idx, entries_max_nr, r = 0, i;
+    uint32_t msg_ctrl, msg_data, msg_upper_addr, msg_addr;
+    struct PCIDevice *pci_dev = &adev->dev;
+    uint8_t cap = pci_find_capability(pci_dev, PCI_CAP_ID_MSIX);
 
     DEBUG("write to MSI-X entry table mmio offset 0x%lx, val 0x%x\n",
 		    addr, val);
     memcpy((void *)((char *)page + offset), &val, 4);
+
+    index = offset / 16;
+
+    /* Check if mask bit is being accessed */
+    memcpy(&msg_addr, (char *)page + index * 16, 4);
+    memcpy(&msg_upper_addr, (char *)page + index * 16 + 4, 4);
+    memcpy(&msg_data, (char *)page + index * 16 + 8, 4);
+    memcpy(&msg_ctrl, (char *)page + index * 16 + 12, 4);
+    DEBUG("MSI-X entries index %d: "
+            "msg_addr 0x%x, msg_upper_addr 0x%x, msg_data 0x%x, vec_ctl 0x%x\n",
+            index, msg_addr, msg_upper_addr, msg_data, msg_ctrl);
+
+    ctrl_word = pci_get_word(pci_dev->config + cap + PCI_MSIX_FLAGS);
+
+    if (!((ctrl_word & PCI_MSIX_ENABLE) && !(ctrl_word & PCI_MSIX_MASK)))
+        return;
+
+    if (!assigned_dev_msix_entry_masked(adev, index)) {
+        if (!adev->dev.msix_entry_used[index]) {
+            DEBUG("Try to modify unenabled MSI-X entry %d's mask. "
+                    "Reenable MSI-X.\n",
+                    index);
+            assigned_dev_update_msix(&adev->dev, cap + PCI_MSIX_FLAGS);
+        }
+        return;
+    }
+
+    if (!adev->dev.msix_entry_used[index])
+        return;
+
+    entries_max_nr = adev->max_msix_entries_nr;
+
+    /*
+     * Find the index of routing entry, it can be different from 'index' if
+     * empty entry existed in between
+     */
+    entry_idx = -1;
+    for (i = 0; i <= index; i++) {
+        if (adev->dev.msix_entry_used[i])
+            entry_idx ++;
+    }
+    if (entry_idx >= entries_max_nr || entry_idx == -1) {
+        fprintf(stderr, "msix_mmio_writel: Entry idx %d exceed limit!\n",
+			entry_idx);
+        return;
+    }
+
+    if (!assigned_dev_msix_entry_masked(adev, index)) {
+        fprintf(stderr, "msix_mmio_writel: Trying write to unmasked entry!\n");
+        return;
+    }
+
+    new_entry.gsi = adev->entry[entry_idx].gsi;
+    new_entry.type = KVM_IRQ_ROUTING_MSI;
+    new_entry.flags = 0;
+    new_entry.u.msi.address_lo = msg_addr;
+    new_entry.u.msi.address_hi = msg_upper_addr;
+    new_entry.u.msi.data = msg_data;
+    if (memcmp(&adev->entry[entry_idx].u.msi, &new_entry.u.msi,
+                sizeof new_entry.u.msi)) {
+        r = kvm_update_routing_entry(&adev->entry[entry_idx], &new_entry);
+        if (r) {
+            perror("msix_mmio_writel: kvm_update_routing_entry failed\n");
+            return;
+        }
+        r = kvm_commit_irq_routes();
+        if (r) {
+            perror("msix_mmio_writel: kvm_commit_irq_routes failed\n");
+            return;
+        }
+    }
+    adev->entry[entry_idx].u.msi.address_lo = msg_addr;
+    adev->entry[entry_idx].u.msi.address_hi = msg_upper_addr;
+    adev->entry[entry_idx].u.msi.data = msg_data;
 }
 
 static void msix_mmio_writew(void *opaque,
@@ -1408,6 +1535,7 @@ static CPUReadMemoryFunc *msix_mmio_read[] = {
 
 static int assigned_dev_register_msix_mmio(AssignedDevice *dev)
 {
+    int i;
     dev->msix_table_page = mmap(NULL, 0x1000,
                                 PROT_READ|PROT_WRITE,
                                 MAP_ANONYMOUS|MAP_PRIVATE, 0, 0);
@@ -1417,8 +1545,12 @@ static int assigned_dev_register_msix_mmio(AssignedDevice *dev)
         return -EFAULT;
     }
     memset(dev->msix_table_page, 0, 0x1000);
+    for (i = 0; i < 0x1000; i += 0x10)
+        *(uint32_t *)(dev->msix_table_page + i + 0xc) = 1;
     dev->mmio_index = cpu_register_io_memory(
                         msix_mmio_read, msix_mmio_write, dev);
+    dev->dev.msix_entry_used = qemu_mallocz(KVM_MAX_MSIX_PER_DEV *
+                                        sizeof *dev->dev.msix_entry_used);
     return 0;
 }
 
@@ -1435,6 +1567,8 @@ static void assigned_dev_unregister_msix_mmio(AssignedDevice *dev)
                 strerror(errno));
     }
     dev->msix_table_page = NULL;
+    free(dev->dev.msix_entry_used);
+    dev->dev.msix_entry_used = NULL;
 }
 
 static const VMStateDescription vmstate_assigned_device = {
diff --git a/hw/device-assignment.h b/hw/device-assignment.h
index c94a730..754e5c0 100644
--- a/hw/device-assignment.h
+++ b/hw/device-assignment.h
@@ -104,7 +104,7 @@ typedef struct AssignedDevice {
 #define ASSIGNED_DEVICE_MSIX_MASKED (1 << 2)
         uint32_t state;
     } cap;
-    int irq_entries_nr;
+    int irq_entries_nr, max_msix_entries_nr;
     struct kvm_irq_routing_entry *entry;
     void *msix_table_page;
     target_phys_addr_t msix_table_addr;
-- 
1.7.0.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 2/4] qemu-kvm: Ioctl for MSIX MMIO support
  2010-12-22  9:25 [PATCH 0/4 v6] MSI-X MMIO support in userspace for assigned devices Sheng Yang
  2010-12-22  9:25 ` [PATCH 1/4] qemu-kvm: device assignment: Enabling MSI-X according to the entries' mask bit Sheng Yang
@ 2010-12-22  9:25 ` Sheng Yang
  2010-12-22  9:25 ` [PATCH 3/4] qemu-kvm: Header file update for MSI-X " Sheng Yang
  2010-12-22  9:25 ` [PATCH 4/4] qemu-kvm: MSI-X MMIO support for assigned device Sheng Yang
  3 siblings, 0 replies; 8+ messages in thread
From: Sheng Yang @ 2010-12-22  9:25 UTC (permalink / raw)
  To: Avi Kivity, Marcelo Tosatti; +Cc: Michael S. Tsirkin, kvm, Sheng Yang


Signed-off-by: Sheng Yang <sheng@linux.intel.com>
---
 qemu-kvm.c |   14 ++++++++++++++
 qemu-kvm.h |    7 +++++++
 2 files changed, 21 insertions(+), 0 deletions(-)

diff --git a/qemu-kvm.c b/qemu-kvm.c
index 471306b..956b62a 100644
--- a/qemu-kvm.c
+++ b/qemu-kvm.c
@@ -1050,6 +1050,20 @@ int kvm_assign_set_msix_entry(kvm_context_t kvm,
 }
 #endif
 
+#ifdef KVM_CAP_MSIX_MMIO
+int kvm_register_msix_mmio(kvm_context_t kvm,
+                           struct kvm_msix_mmio_user *mmio_user)
+{
+    return kvm_vm_ioctl(kvm_state, KVM_REGISTER_MSIX_MMIO, mmio_user);
+}
+
+int kvm_unregister_msix_mmio(kvm_context_t kvm,
+                             struct kvm_msix_mmio_user *mmio_user)
+{
+    return kvm_vm_ioctl(kvm_state, KVM_UNREGISTER_MSIX_MMIO, mmio_user);
+}
+#endif
+
 #if defined(KVM_CAP_IRQFD) && defined(CONFIG_EVENTFD)
 
 #include <sys/eventfd.h>
diff --git a/qemu-kvm.h b/qemu-kvm.h
index 7e6edfb..86799e6 100644
--- a/qemu-kvm.h
+++ b/qemu-kvm.h
@@ -602,6 +602,13 @@ int kvm_assign_set_msix_entry(kvm_context_t kvm,
                               struct kvm_assigned_msix_entry *entry);
 #endif
 
+#ifdef KVM_CAP_MSIX_MMIO
+int kvm_register_msix_mmio(kvm_context_t kvm,
+                           struct kvm_msix_mmio_user *mmio_user);
+int kvm_unregister_msix_mmio(kvm_context_t kvm,
+                             struct kvm_msix_mmio_user *mmio_user);
+#endif
+
 #else                           /* !CONFIG_KVM */
 
 typedef struct kvm_context *kvm_context_t;
-- 
1.7.0.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 3/4] qemu-kvm: Header file update for MSI-X MMIO support
  2010-12-22  9:25 [PATCH 0/4 v6] MSI-X MMIO support in userspace for assigned devices Sheng Yang
  2010-12-22  9:25 ` [PATCH 1/4] qemu-kvm: device assignment: Enabling MSI-X according to the entries' mask bit Sheng Yang
  2010-12-22  9:25 ` [PATCH 2/4] qemu-kvm: Ioctl for MSIX MMIO support Sheng Yang
@ 2010-12-22  9:25 ` Sheng Yang
  2010-12-22  9:25 ` [PATCH 4/4] qemu-kvm: MSI-X MMIO support for assigned device Sheng Yang
  3 siblings, 0 replies; 8+ messages in thread
From: Sheng Yang @ 2010-12-22  9:25 UTC (permalink / raw)
  To: Avi Kivity, Marcelo Tosatti; +Cc: Michael S. Tsirkin, kvm, Sheng Yang


Signed-off-by: Sheng Yang <sheng@linux.intel.com>
---
 kvm/include/linux/kvm.h |   22 ++++++++++++++++++++++
 1 files changed, 22 insertions(+), 0 deletions(-)

diff --git a/kvm/include/linux/kvm.h b/kvm/include/linux/kvm.h
index e46729e..e11d2b2 100644
--- a/kvm/include/linux/kvm.h
+++ b/kvm/include/linux/kvm.h
@@ -161,6 +161,7 @@ struct kvm_pit_config {
 #define KVM_EXIT_NMI              16
 #define KVM_EXIT_INTERNAL_ERROR   17
 #define KVM_EXIT_OSI              18
+#define KVM_EXIT_MSIX_ROUTING_UPDATE 19
 
 /* For KVM_EXIT_INTERNAL_ERROR */
 #define KVM_INTERNAL_ERROR_EMULATION 1
@@ -530,6 +531,7 @@ struct kvm_enable_cap {
 #ifdef __KVM_HAVE_XCRS
 #define KVM_CAP_XCRS 56
 #endif
+#define KVM_CAP_MSIX_MMIO 60
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -660,6 +662,9 @@ struct kvm_clock_data {
 #define KVM_XEN_HVM_CONFIG        _IOW(KVMIO,  0x7a, struct kvm_xen_hvm_config)
 #define KVM_SET_CLOCK             _IOW(KVMIO,  0x7b, struct kvm_clock_data)
 #define KVM_GET_CLOCK             _IOR(KVMIO,  0x7c, struct kvm_clock_data)
+/* Available with KVM_CAP_MSIX_MMIO */
+#define KVM_REGISTER_MSIX_MMIO    _IOW(KVMIO, 0x7d, struct kvm_msix_mmio_user)
+#define KVM_UNREGISTER_MSIX_MMIO  _IOW(KVMIO, 0x7e, struct kvm_msix_mmio_user)
 /* Available with KVM_CAP_PIT_STATE2 */
 #define KVM_GET_PIT2              _IOR(KVMIO,  0x9f, struct kvm_pit_state2)
 #define KVM_SET_PIT2              _IOW(KVMIO,  0xa0, struct kvm_pit_state2)
@@ -781,4 +786,21 @@ struct kvm_assigned_msix_entry {
 	__u16 padding[3];
 };
 
+#define KVM_MSIX_MMIO_TYPE_ASSIGNED_DEV	    (1 << 0)
+
+#define KVM_MSIX_MMIO_TYPE_BASE_TABLE	    (1 << 8)
+#define KVM_MSIX_MMIO_TYPE_BASE_PBA	    (1 << 9)
+
+#define KVM_MSIX_MMIO_TYPE_DEV_MASK	    0x00ff
+#define KVM_MSIX_MMIO_TYPE_BASE_MASK	    0xff00
+struct kvm_msix_mmio_user {
+	__u32 dev_id;
+	__u16 type;
+	__u16 max_entries_nr;
+	__u64 base_addr;
+	__u64 base_va;
+	__u64 flags;
+	__u64 reserved[4];
+};
+
 #endif /* __LINUX_KVM_H */
-- 
1.7.0.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 4/4] qemu-kvm: MSI-X MMIO support for assigned device
  2010-12-22  9:25 [PATCH 0/4 v6] MSI-X MMIO support in userspace for assigned devices Sheng Yang
                   ` (2 preceding siblings ...)
  2010-12-22  9:25 ` [PATCH 3/4] qemu-kvm: Header file update for MSI-X " Sheng Yang
@ 2010-12-22  9:25 ` Sheng Yang
  3 siblings, 0 replies; 8+ messages in thread
From: Sheng Yang @ 2010-12-22  9:25 UTC (permalink / raw)
  To: Avi Kivity, Marcelo Tosatti; +Cc: Michael S. Tsirkin, kvm, Sheng Yang


Signed-off-by: Sheng Yang <sheng@linux.intel.com>
---
 hw/device-assignment.c |  143 +++++++++++++++++++++++++++++++++++++++++++-----
 hw/device-assignment.h |    7 ++-
 qemu-kvm.c             |   36 ++++++++++++
 qemu-kvm.h             |   11 ++++
 4 files changed, 180 insertions(+), 17 deletions(-)

diff --git a/hw/device-assignment.c b/hw/device-assignment.c
index ed0b491..0aec1f4 100644
--- a/hw/device-assignment.c
+++ b/hw/device-assignment.c
@@ -67,6 +67,11 @@ static void assigned_device_pci_cap_write_config(PCIDevice *pci_dev,
                                                  uint32_t address,
                                                  uint32_t val, int len);
 
+static uint32_t calc_assigned_dev_id(uint16_t seg, uint8_t bus, uint8_t devfn)
+{
+    return (uint32_t)seg << 16 | (uint32_t)bus << 8 | (uint32_t)devfn;
+}
+
 static uint32_t assigned_dev_ioport_rw(AssignedDevRegion *dev_region,
                                        uint32_t addr, int len, uint32_t *val)
 {
@@ -269,6 +274,10 @@ static void assigned_dev_iomem_map(PCIDevice *pci_dev, int region_num,
     AssignedDevRegion *region = &r_dev->v_addrs[region_num];
     PCIRegion *real_region = &r_dev->real_device.regions[region_num];
     int ret = 0;
+#ifdef KVM_CAP_MSIX_MMIO
+    int cap_mask = kvm_check_extension(kvm_state, KVM_CAP_MSIX_MMIO);
+    struct kvm_msix_mmio_user msix_mmio;
+#endif
 
     DEBUG("e_phys=%08" FMT_PCIBUS " r_virt=%p type=%d len=%08" FMT_PCIBUS " region_num=%d \n",
           e_phys, region->u.r_virtbase, type, e_size, region_num);
@@ -287,6 +296,45 @@ static void assigned_dev_iomem_map(PCIDevice *pci_dev, int region_num,
 
             cpu_register_physical_memory(e_phys + offset,
                     TARGET_PAGE_SIZE, r_dev->mmio_index);
+#ifdef KVM_CAP_MSIX_MMIO
+            if (cap_mask) {
+                r_dev->guest_msix_table_addr = e_phys + offset;
+                memset(&msix_mmio, 0, sizeof msix_mmio);
+                msix_mmio.dev_id = calc_assigned_dev_id(r_dev->h_segnr,
+                        r_dev->h_busnr, r_dev->h_devfn);
+                msix_mmio.type = KVM_MSIX_MMIO_TYPE_ASSIGNED_DEV |
+				KVM_MSIX_MMIO_TYPE_BASE_TABLE;
+                msix_mmio.base_addr = e_phys + offset;
+                msix_mmio.base_va = (unsigned long)r_dev->msix_table_page;
+                msix_mmio.max_entries_nr = r_dev->max_msix_entries_nr;
+                msix_mmio.flags = 0;
+                ret = kvm_register_msix_mmio(kvm_context, &msix_mmio);
+                if (ret)
+                    fprintf(stderr, "fail to register in-kernel msix_mmio!\n");
+            }
+#endif
+        if (real_region->base_addr <= r_dev->msix_pba_addr &&
+                real_region->base_addr + real_region->size >=
+                r_dev->msix_pba_addr) {
+#ifdef KVM_CAP_MSIX_MMIO
+            int offset = r_dev->msix_pba_addr - real_region->base_addr;
+            if (cap_mask) {
+                r_dev->guest_msix_pba_addr = e_phys + offset;
+                memset(&msix_mmio, 0, sizeof msix_mmio);
+                msix_mmio.dev_id = calc_assigned_dev_id(r_dev->h_segnr,
+                        r_dev->h_busnr, r_dev->h_devfn);
+                msix_mmio.type = KVM_MSIX_MMIO_TYPE_ASSIGNED_DEV |
+				KVM_MSIX_MMIO_TYPE_BASE_PBA;
+                msix_mmio.base_addr = e_phys + offset;
+                msix_mmio.base_va = (unsigned long)r_dev->msix_pba_page;
+                msix_mmio.max_entries_nr = r_dev->max_msix_entries_nr;
+                msix_mmio.flags = 0;
+                ret = kvm_register_msix_mmio(kvm_context, &msix_mmio);
+                if (ret)
+                    fprintf(stderr, "fail to register in-kernel msix_mmio!\n");
+            }
+#endif
+         }
         }
     }
 
@@ -822,11 +870,6 @@ static void free_assigned_device(AssignedDevice *dev)
     }
 }
 
-static uint32_t calc_assigned_dev_id(uint16_t seg, uint8_t bus, uint8_t devfn)
-{
-    return (uint32_t)seg << 16 | (uint32_t)bus << 8 | (uint32_t)devfn;
-}
-
 static void assign_failed_examine(AssignedDevice *dev)
 {
     char name[PATH_MAX], dir[PATH_MAX], driver[PATH_MAX] = {}, *ns;
@@ -1233,6 +1276,8 @@ static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev,
     return r;
 }
 
+static int assigned_dev_update_routing_handler(void *opaque, unsigned long addr);
+
 static void assigned_dev_update_msix(PCIDevice *pci_dev, unsigned int ctrl_pos)
 {
     struct kvm_assigned_irq assigned_irq_data;
@@ -1368,8 +1413,8 @@ static int assigned_device_pci_cap_init(PCIDevice *pci_dev)
 #ifdef KVM_CAP_DEVICE_MSIX
     /* Expose MSI-X capability */
     if ((pos = pci_find_cap_offset(pci_dev, PCI_CAP_ID_MSIX))) {
-        int bar_nr;
-        uint32_t msix_table_entry;
+        int table_bar_nr, pba_bar_nr;
+        uint32_t msix_table_entry, msix_pba_entry;
 
         dev->cap.available |= ASSIGNED_DEVICE_CAP_MSIX;
         pci_add_capability(pci_dev, PCI_CAP_ID_MSIX, pos,
@@ -1384,9 +1429,17 @@ static int assigned_device_pci_cap_init(PCIDevice *pci_dev)
                      PCI_MSIX_FLAGS_ENABLE | PCI_MSIX_FLAGS_MASKALL);
 
         msix_table_entry = pci_get_long(pci_dev->config + pos + PCI_MSIX_TABLE);
-        bar_nr = msix_table_entry & PCI_MSIX_BIR;
+        table_bar_nr = msix_table_entry & PCI_MSIX_BIR;
         msix_table_entry &= ~PCI_MSIX_BIR;
-        dev->msix_table_addr = pci_region[bar_nr].base_addr + msix_table_entry;
+        dev->msix_table_addr = pci_region[table_bar_nr].base_addr +
+                               msix_table_entry;
+
+        msix_pba_entry = pci_get_long(pci_dev->config + pos + PCI_MSIX_PBA);
+        pba_bar_nr = msix_pba_entry & PCI_MSIX_BIR;
+        msix_pba_entry &= ~PCI_MSIX_BIR;
+        dev->msix_pba_addr = pci_region[pba_bar_nr].base_addr +
+                               msix_pba_entry;
+
         dev->max_msix_entries_nr = get_msix_entries_max_nr(dev);
     }
 #endif
@@ -1419,8 +1472,7 @@ static uint32_t msix_mmio_readw(void *opaque, target_phys_addr_t addr)
             (8 * (addr & 3))) & 0xffff;
 }
 
-static void msix_mmio_writel(void *opaque,
-                             target_phys_addr_t addr, uint32_t val)
+static void assigned_dev_update_routing(void *opaque, unsigned long addr)
 {
     AssignedDevice *adev = opaque;
     unsigned int offset = addr & 0xfff;
@@ -1432,10 +1484,6 @@ static void msix_mmio_writel(void *opaque,
     struct PCIDevice *pci_dev = &adev->dev;
     uint8_t cap = pci_find_capability(pci_dev, PCI_CAP_ID_MSIX);
 
-    DEBUG("write to MSI-X entry table mmio offset 0x%lx, val 0x%x\n",
-		    addr, val);
-    memcpy((void *)((char *)page + offset), &val, 4);
-
     index = offset / 16;
 
     /* Check if mask bit is being accessed */
@@ -1511,6 +1559,41 @@ static void msix_mmio_writel(void *opaque,
     adev->entry[entry_idx].u.msi.data = msg_data;
 }
 
+static int assigned_dev_update_routing_handler(void *opaque, unsigned long addr)
+{
+    AssignedDevice *adev = opaque;
+
+    if (addr >= adev->guest_msix_table_addr &&
+            addr < adev->guest_msix_table_addr + adev->max_msix_entries_nr * 16) {
+        assigned_dev_update_routing(opaque, addr);
+        return 0;
+    }
+    return -EINVAL;
+}
+
+static void msix_mmio_writel(void *opaque,
+                             target_phys_addr_t addr, uint32_t val)
+{
+    AssignedDevice *adev = opaque;
+    void *page = adev->msix_table_page;
+    unsigned int offset = addr & 0xfff;
+#ifdef KVM_CAP_MSIX_MMIO
+    int cap_mask = kvm_check_extension(kvm_state, KVM_CAP_MSIX_MMIO);
+#else
+    int cap_mask = 0;
+#endif
+
+    DEBUG("write to MSI-X entry table mmio offset 0x%lx, val 0x%x\n",
+            addr, val);
+    if (!cap_mask) {
+        memcpy((void *)((char *)page + offset), &val, 4);
+    } else {
+        fprintf(stderr, "msix_mmio_writel: shouldn't be here with KVM_CAP_MSIX_MMIO!\n");
+    }
+
+    assigned_dev_update_routing(opaque, addr);
+}
+
 static void msix_mmio_writew(void *opaque,
                              target_phys_addr_t addr, uint32_t val)
 {
@@ -1547,11 +1630,32 @@ static int assigned_dev_register_msix_mmio(AssignedDevice *dev)
     memset(dev->msix_table_page, 0, 0x1000);
     for (i = 0; i < 0x1000; i += 0x10)
         *(uint32_t *)(dev->msix_table_page + i + 0xc) = 1;
+    dev->msix_pba_page = mmap(NULL, 0x1000,
+                                PROT_READ|PROT_WRITE,
+                                MAP_ANONYMOUS|MAP_PRIVATE, 0, 0);
+    if (dev->msix_pba_page == MAP_FAILED) {
+        fprintf(stderr, "fail allocate msix_table_page! %s\n",
+                strerror(errno));
+	goto out;
+    }
+    memset(dev->msix_pba_page, 0, 0x1000);
     dev->mmio_index = cpu_register_io_memory(
                         msix_mmio_read, msix_mmio_write, dev);
     dev->dev.msix_entry_used = qemu_mallocz(KVM_MAX_MSIX_PER_DEV *
                                         sizeof *dev->dev.msix_entry_used);
+    dev->routing_updater_entry =
+        kvm_add_routing_updater(assigned_dev_update_routing_handler, dev);
+    if (!dev->routing_updater_entry) {
+        perror("kvm_add_routing_updater");
+	goto out2;
+    }
     return 0;
+out2:
+    free(dev->dev.msix_entry_used);
+    munmap(dev->msix_pba_page, 0x1000);
+out:
+    munmap(dev->msix_table_page, 0x1000);
+    return -EFAULT;
 }
 
 static void assigned_dev_unregister_msix_mmio(AssignedDevice *dev)
@@ -1567,6 +1671,15 @@ static void assigned_dev_unregister_msix_mmio(AssignedDevice *dev)
                 strerror(errno));
     }
     dev->msix_table_page = NULL;
+    if (munmap(dev->msix_pba_page, 0x1000) == -1) {
+        fprintf(stderr, "error unmapping msix_table_page! %s\n",
+                strerror(errno));
+    }
+    if (dev->routing_updater_entry) {
+        kvm_del_routing_updater(dev->routing_updater_entry);
+        dev->routing_updater_entry = NULL;
+    }
+    dev->msix_pba_page = NULL;
     free(dev->dev.msix_entry_used);
     dev->dev.msix_entry_used = NULL;
 }
diff --git a/hw/device-assignment.h b/hw/device-assignment.h
index 754e5c0..9288753 100644
--- a/hw/device-assignment.h
+++ b/hw/device-assignment.h
@@ -32,6 +32,7 @@
 #include "qemu-common.h"
 #include "qemu-queue.h"
 #include "pci.h"
+#include "qemu-kvm.h"
 
 /* From include/linux/pci.h in the kernel sources */
 #define PCI_DEVFN(slot, func)   ((((slot) & 0x1f) << 3) | ((func) & 0x07))
@@ -106,11 +107,13 @@ typedef struct AssignedDevice {
     } cap;
     int irq_entries_nr, max_msix_entries_nr;
     struct kvm_irq_routing_entry *entry;
-    void *msix_table_page;
-    target_phys_addr_t msix_table_addr;
+    void *msix_table_page, *msix_pba_page;
+    target_phys_addr_t msix_table_addr, msix_pba_addr;
+    target_phys_addr_t guest_msix_table_addr, guest_msix_pba_addr;
     int mmio_index;
     int need_emulate_cmd;
     char *configfd_name;
+    KVMRoutingUpdateEntry *routing_updater_entry;
     QLIST_ENTRY(AssignedDevice) next;
 } AssignedDevice;
 
diff --git a/qemu-kvm.c b/qemu-kvm.c
index 956b62a..bee398c 100644
--- a/qemu-kvm.c
+++ b/qemu-kvm.c
@@ -511,6 +511,38 @@ static int handle_mmio(CPUState *env)
     return 0;
 }
 
+static QLIST_HEAD(kvm_routing_update_entry_head, kvm_routing_update_entry) kvm_routing_update_entry_head;
+
+KVMRoutingUpdateEntry *kvm_add_routing_updater(KVMRoutingUpdateHandler *cb, void *opaque)
+{
+    KVMRoutingUpdateEntry *e;
+
+    e = qemu_mallocz(sizeof (*e));
+
+    e->cb = cb;
+    e->opaque = opaque;
+    QLIST_INSERT_HEAD(&kvm_routing_update_entry_head, e, entries);
+    return e;
+}
+
+void kvm_del_routing_updater(KVMRoutingUpdateEntry *e)
+{
+    QLIST_REMOVE(e, entries);
+    qemu_free(e);
+}
+
+static void kvm_update_msix_routing(CPUState *env)
+{
+    unsigned long addr = env->kvm_run->mmio.phys_addr;
+    KVMRoutingUpdateEntry *e;
+
+    for (e = kvm_routing_update_entry_head.lh_first; e; e = e->entries.le_next) {
+        if (e->cb(e->opaque, addr) == 0)
+            return;
+    }
+    fprintf(stderr, "unhandled MSI-X routing update addr: 0x%lx\n", addr);
+}
+
 int handle_io_window(kvm_context_t kvm)
 {
     return 1;
@@ -647,6 +679,10 @@ int kvm_run(CPUState *env)
             kvm_handle_internal_error(env, run);
             r = 1;
 	    break;
+	case KVM_EXIT_MSIX_ROUTING_UPDATE:
+            kvm_update_msix_routing(env);
+            r = 1;
+            break;
         default:
             if (kvm_arch_run(env)) {
                 fprintf(stderr, "unhandled vm exit: 0x%x\n", run->exit_reason);
diff --git a/qemu-kvm.h b/qemu-kvm.h
index 86799e6..21a3274 100644
--- a/qemu-kvm.h
+++ b/qemu-kvm.h
@@ -772,6 +772,17 @@ int kvm_tpr_enable_vapic(CPUState *env);
 unsigned long kvm_get_thread_id(void);
 int kvm_cpu_is_stopped(CPUState *env);
 
+typedef struct kvm_routing_update_entry KVMRoutingUpdateEntry;
+typedef int KVMRoutingUpdateHandler(void *opaque, unsigned long addr);
+
+struct kvm_routing_update_entry {
+    KVMRoutingUpdateHandler *cb;
+    void *opaque;
+    QLIST_ENTRY (kvm_routing_update_entry) entries;
+};
+
+KVMRoutingUpdateEntry *kvm_add_routing_updater(KVMRoutingUpdateHandler *cb, void *opaque);
+void kvm_del_routing_updater(KVMRoutingUpdateEntry *e);
 #endif
 
 #endif
-- 
1.7.0.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 1/4] qemu-kvm: device assignment: Enabling MSI-X according to the entries' mask bit
  2011-02-18  8:55 [PATCH 0/4 v9] qemu-kvm: MSI-X MMIO support for assigned device Sheng Yang
@ 2011-02-18  8:55 ` Sheng Yang
  0 siblings, 0 replies; 8+ messages in thread
From: Sheng Yang @ 2011-02-18  8:55 UTC (permalink / raw)
  To: Avi Kivity, Marcelo Tosatti; +Cc: kvm, Michael S. Tsirkin, Sheng Yang

The old MSI-X enabling method assume the entries are written before MSI-X
enabled, but some OS didn't obey this, e.g. FreeBSD. This patch would fix
this.

Also, according to the PCI spec, mask bit of MSI-X table should be set
after reset.

Signed-off-by: Sheng Yang <sheng@linux.intel.com>
---
 hw/device-assignment.c |  188 +++++++++++++++++++++++++++++++++++++++++-------
 hw/device-assignment.h |    2 +-
 2 files changed, 162 insertions(+), 28 deletions(-)

diff --git a/hw/device-assignment.c b/hw/device-assignment.c
index e5205cf..5c162c4 100644
--- a/hw/device-assignment.c
+++ b/hw/device-assignment.c
@@ -1146,15 +1146,12 @@ static void assigned_dev_update_msi(PCIDevice *pci_dev, unsigned int ctrl_pos)
 #endif
 
 #ifdef KVM_CAP_DEVICE_MSIX
-static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev)
+
+#define PCI_MSIX_CTRL_MASKBIT	1ul
+static int get_msix_entries_max_nr(AssignedDevice *adev)
 {
-    AssignedDevice *adev = container_of(pci_dev, AssignedDevice, dev);
-    uint16_t entries_nr = 0, entries_max_nr;
-    int pos = 0, i, r = 0;
-    uint32_t msg_addr, msg_upper_addr, msg_data, msg_ctrl;
-    struct kvm_assigned_msix_nr msix_nr;
-    struct kvm_assigned_msix_entry msix_entry;
-    void *va = adev->msix_table_page;
+    int pos, entries_max_nr;
+    PCIDevice *pci_dev = &adev->dev;
 
     pos = pci_find_capability(pci_dev, PCI_CAP_ID_MSIX);
 
@@ -1162,20 +1159,48 @@ static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev)
     entries_max_nr &= PCI_MSIX_TABSIZE;
     entries_max_nr += 1;
 
+    return entries_max_nr;
+}
+
+static int assigned_dev_msix_entry_masked(AssignedDevice *adev, int entry)
+{
+    uint32_t msg_ctrl;
+    void *va = adev->msix_table_page;
+
+    memcpy(&msg_ctrl, va + entry * 16 + 12, 4);
+    return (msg_ctrl & PCI_MSIX_CTRL_MASKBIT);
+}
+
+static int get_msix_valid_entries_nr(AssignedDevice *adev,
+				     uint16_t entries_max_nr)
+{
+    void *va = adev->msix_table_page;
+    uint32_t msg_ctrl;
+    uint16_t entries_nr = 0;
+    int i;
+
     /* Get the usable entry number for allocating */
     for (i = 0; i < entries_max_nr; i++) {
         memcpy(&msg_ctrl, va + i * 16 + 12, 4);
-        memcpy(&msg_data, va + i * 16 + 8, 4);
         /* Ignore unused entry even it's unmasked */
-        if (msg_data == 0)
+        if (assigned_dev_msix_entry_masked(adev, i))
             continue;
         entries_nr ++;
     }
+    return entries_nr;
+}
+
+static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev,
+                                         uint16_t entries_nr,
+                                         uint16_t entries_max_nr)
+{
+    AssignedDevice *adev = container_of(pci_dev, AssignedDevice, dev);
+    int i, r = 0;
+    uint32_t msg_addr, msg_upper_addr, msg_data, msg_ctrl;
+    struct kvm_assigned_msix_nr msix_nr;
+    struct kvm_assigned_msix_entry msix_entry;
+    void *va = adev->msix_table_page;
 
-    if (entries_nr == 0) {
-        fprintf(stderr, "MSI-X entry number is zero!\n");
-        return -EINVAL;
-    }
     msix_nr.assigned_dev_id = calc_assigned_dev_id(adev->h_segnr, adev->h_busnr,
                                           (uint8_t)adev->h_devfn);
     msix_nr.entry_nr = entries_nr;
@@ -1187,6 +1212,8 @@ static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev)
     }
 
     free_dev_irq_entries(adev);
+    memset(pci_dev->msix_entry_used, 0, KVM_MAX_MSIX_PER_DEV *
+                                        sizeof(*pci_dev->msix_entry_used));
     adev->irq_entries_nr = entries_nr;
     adev->entry = calloc(entries_nr, sizeof(struct kvm_irq_routing_entry));
     if (!adev->entry) {
@@ -1200,10 +1227,10 @@ static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev)
         if (entries_nr >= msix_nr.entry_nr)
             break;
         memcpy(&msg_ctrl, va + i * 16 + 12, 4);
-        memcpy(&msg_data, va + i * 16 + 8, 4);
-        if (msg_data == 0)
+        if (assigned_dev_msix_entry_masked(adev, i))
             continue;
 
+        memcpy(&msg_data, va + i * 16 + 8, 4);
         memcpy(&msg_addr, va + i * 16, 4);
         memcpy(&msg_upper_addr, va + i * 16 + 4, 4);
 
@@ -1217,17 +1244,18 @@ static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev)
         adev->entry[entries_nr].u.msi.address_lo = msg_addr;
         adev->entry[entries_nr].u.msi.address_hi = msg_upper_addr;
         adev->entry[entries_nr].u.msi.data = msg_data;
-        DEBUG("MSI-X data 0x%x, MSI-X addr_lo 0x%x\n!", msg_data, msg_addr);
-	kvm_add_routing_entry(&adev->entry[entries_nr]);
+        DEBUG("MSI-X data 0x%x, MSI-X addr_lo 0x%x!\n", msg_data, msg_addr);
+        kvm_add_routing_entry(&adev->entry[entries_nr]);
 
         msix_entry.gsi = adev->entry[entries_nr].gsi;
         msix_entry.entry = i;
+        pci_dev->msix_entry_used[i] = 1;
         r = kvm_assign_set_msix_entry(kvm_context, &msix_entry);
         if (r) {
             fprintf(stderr, "fail to set MSI-X entry! %s\n", strerror(-r));
             break;
         }
-        DEBUG("MSI-X entry gsi 0x%x, entry %d\n!",
+        DEBUG("MSI-X entry gsi 0x%x, entry %d!\n",
                 msix_entry.gsi, msix_entry.entry);
         entries_nr ++;
     }
@@ -1244,20 +1272,24 @@ static void assigned_dev_update_msix(PCIDevice *pci_dev, unsigned int ctrl_pos)
 {
     struct kvm_assigned_irq assigned_irq_data;
     AssignedDevice *assigned_dev = container_of(pci_dev, AssignedDevice, dev);
-    uint16_t *ctrl_word = (uint16_t *)(pci_dev->config + ctrl_pos);
+    uint16_t ctrl_word = *(uint16_t *)(pci_dev->config + ctrl_pos);
     int r;
+    uint16_t entries_nr, entries_max_nr;
+    int enable_msix;
 
     memset(&assigned_irq_data, 0, sizeof assigned_irq_data);
     assigned_irq_data.assigned_dev_id  =
             calc_assigned_dev_id(assigned_dev->h_segnr, assigned_dev->h_busnr,
                     (uint8_t)assigned_dev->h_devfn);
 
+    enable_msix = ((ctrl_word & PCI_MSIX_ENABLE) &&
+                  !(ctrl_word & PCI_MSIX_MASK));
+
     /* Some guests gratuitously disable MSIX even if they're not using it,
      * try to catch this by only deassigning irqs if the guest is using
      * MSIX or intends to start. */
     if ((assigned_dev->irq_requested_type & KVM_DEV_IRQ_GUEST_MSIX) ||
-        (*ctrl_word & PCI_MSIX_ENABLE)) {
-
+            enable_msix) {
         assigned_irq_data.flags = assigned_dev->irq_requested_type;
         free_dev_irq_entries(assigned_dev);
         r = kvm_deassign_irq(kvm_context, &assigned_irq_data);
@@ -1266,16 +1298,30 @@ static void assigned_dev_update_msix(PCIDevice *pci_dev, unsigned int ctrl_pos)
             perror("assigned_dev_update_msix: deassign irq");
 
         assigned_dev->irq_requested_type = 0;
+        memset(pci_dev->msix_entry_used, 0, KVM_MAX_MSIX_PER_DEV *
+                                        sizeof(*pci_dev->msix_entry_used));
     }
 
-    if (*ctrl_word & PCI_MSIX_ENABLE) {
-        assigned_irq_data.flags = KVM_DEV_IRQ_HOST_MSIX |
-                                  KVM_DEV_IRQ_GUEST_MSIX;
-
-        if (assigned_dev_update_msix_mmio(pci_dev) < 0) {
+    entries_max_nr = assigned_dev->max_msix_entries_nr;
+    if (entries_max_nr == 0) {
+        fprintf(stderr, "assigned_dev_update_msix: MSI-X entries_max_nr == 0");
+        return;
+    }
+    /*
+     * Guest may try to enable MSI-X before setting MSI-X entry done, so
+     * let's wait until guest unmask the entries.
+     */
+    entries_nr = get_msix_valid_entries_nr(assigned_dev, entries_max_nr);
+    if (entries_nr == 0)
+        return;
+    if (enable_msix) {
+        if (assigned_dev_update_msix_mmio(pci_dev,
+                    entries_nr, entries_max_nr) < 0) {
             perror("assigned_dev_update_msix_mmio");
             return;
         }
+        assigned_irq_data.flags = KVM_DEV_IRQ_HOST_MSIX |
+                                  KVM_DEV_IRQ_GUEST_MSIX;
         if (kvm_assign_irq(kvm_context, &assigned_irq_data) < 0) {
             perror("assigned_dev_enable_msix: assign irq");
             return;
@@ -1449,6 +1495,7 @@ static int assigned_device_pci_cap_init(PCIDevice *pci_dev)
         bar_nr = msix_table_entry & PCI_MSIX_BIR;
         msix_table_entry &= ~PCI_MSIX_BIR;
         dev->msix_table_addr = pci_region[bar_nr].base_addr + msix_table_entry;
+        dev->max_msix_entries_nr = get_msix_entries_max_nr(dev);
     }
 #endif
 #endif
@@ -1637,10 +1684,90 @@ static void msix_mmio_writel(void *opaque,
     AssignedDevice *adev = opaque;
     unsigned int offset = addr & 0xfff;
     void *page = adev->msix_table_page;
+    int ctrl_word, index;
+    struct kvm_irq_routing_entry new_entry = {};
+    int entry_idx, entries_max_nr, r = 0, i;
+    uint32_t msg_ctrl, msg_data, msg_upper_addr, msg_addr;
+    struct PCIDevice *pci_dev = &adev->dev;
+    uint8_t cap = pci_find_capability(pci_dev, PCI_CAP_ID_MSIX);
 
     DEBUG("write to MSI-X entry table mmio offset 0x%lx, val 0x%x\n",
 		    addr, val);
     memcpy((void *)((char *)page + offset), &val, 4);
+
+    index = offset / 16;
+
+    /* Check if mask bit is being accessed */
+    memcpy(&msg_addr, (char *)page + index * 16, 4);
+    memcpy(&msg_upper_addr, (char *)page + index * 16 + 4, 4);
+    memcpy(&msg_data, (char *)page + index * 16 + 8, 4);
+    memcpy(&msg_ctrl, (char *)page + index * 16 + 12, 4);
+    DEBUG("MSI-X entries index %d: "
+            "msg_addr 0x%x, msg_upper_addr 0x%x, msg_data 0x%x, vec_ctl 0x%x\n",
+            index, msg_addr, msg_upper_addr, msg_data, msg_ctrl);
+
+    ctrl_word = pci_get_word(pci_dev->config + cap + PCI_MSIX_FLAGS);
+
+    if (!((ctrl_word & PCI_MSIX_ENABLE) && !(ctrl_word & PCI_MSIX_MASK)))
+        return;
+
+    if (!assigned_dev_msix_entry_masked(adev, index)) {
+        if (!adev->dev.msix_entry_used[index]) {
+            DEBUG("Try to modify unenabled MSI-X entry %d's mask. "
+                    "Reenable MSI-X.\n",
+                    index);
+            assigned_dev_update_msix(&adev->dev, cap + PCI_MSIX_FLAGS);
+        }
+        return;
+    }
+
+    if (!adev->dev.msix_entry_used[index])
+        return;
+
+    entries_max_nr = adev->max_msix_entries_nr;
+
+    /*
+     * Find the index of routing entry, it can be different from 'index' if
+     * empty entry existed in between
+     */
+    entry_idx = -1;
+    for (i = 0; i <= index; i++) {
+        if (adev->dev.msix_entry_used[i])
+            entry_idx ++;
+    }
+    if (entry_idx >= entries_max_nr || entry_idx == -1) {
+        fprintf(stderr, "msix_mmio_writel: Entry idx %d exceed limit!\n",
+			entry_idx);
+        return;
+    }
+
+    if (!assigned_dev_msix_entry_masked(adev, index)) {
+        fprintf(stderr, "msix_mmio_writel: Trying write to unmasked entry!\n");
+        return;
+    }
+
+    new_entry.gsi = adev->entry[entry_idx].gsi;
+    new_entry.type = KVM_IRQ_ROUTING_MSI;
+    new_entry.flags = 0;
+    new_entry.u.msi.address_lo = msg_addr;
+    new_entry.u.msi.address_hi = msg_upper_addr;
+    new_entry.u.msi.data = msg_data;
+    if (memcmp(&adev->entry[entry_idx].u.msi, &new_entry.u.msi,
+                sizeof new_entry.u.msi)) {
+        r = kvm_update_routing_entry(&adev->entry[entry_idx], &new_entry);
+        if (r) {
+            perror("msix_mmio_writel: kvm_update_routing_entry failed\n");
+            return;
+        }
+        r = kvm_commit_irq_routes();
+        if (r) {
+            perror("msix_mmio_writel: kvm_commit_irq_routes failed\n");
+            return;
+        }
+    }
+    adev->entry[entry_idx].u.msi.address_lo = msg_addr;
+    adev->entry[entry_idx].u.msi.address_hi = msg_upper_addr;
+    adev->entry[entry_idx].u.msi.data = msg_data;
 }
 
 static void msix_mmio_writew(void *opaque,
@@ -1667,6 +1794,7 @@ static CPUReadMemoryFunc *msix_mmio_read[] = {
 
 static int assigned_dev_register_msix_mmio(AssignedDevice *dev)
 {
+    int i;
     dev->msix_table_page = mmap(NULL, 0x1000,
                                 PROT_READ|PROT_WRITE,
                                 MAP_ANONYMOUS|MAP_PRIVATE, 0, 0);
@@ -1676,9 +1804,13 @@ static int assigned_dev_register_msix_mmio(AssignedDevice *dev)
         return -EFAULT;
     }
     memset(dev->msix_table_page, 0, 0x1000);
+    for (i = 0; i < 0x1000; i += 0x10)
+        *(uint32_t *)(dev->msix_table_page + i + 0xc) = 1;
     dev->mmio_index = cpu_register_io_memory(
                         msix_mmio_read, msix_mmio_write, dev,
                         DEVICE_NATIVE_ENDIAN);
+    dev->dev.msix_entry_used = qemu_mallocz(KVM_MAX_MSIX_PER_DEV *
+                                        sizeof *dev->dev.msix_entry_used);
     return 0;
 }
 
@@ -1695,6 +1827,8 @@ static void assigned_dev_unregister_msix_mmio(AssignedDevice *dev)
                 strerror(errno));
     }
     dev->msix_table_page = NULL;
+    free(dev->dev.msix_entry_used);
+    dev->dev.msix_entry_used = NULL;
 }
 
 static const VMStateDescription vmstate_assigned_device = {
diff --git a/hw/device-assignment.h b/hw/device-assignment.h
index 86af0a9..d92606e 100644
--- a/hw/device-assignment.h
+++ b/hw/device-assignment.h
@@ -104,7 +104,7 @@ typedef struct AssignedDevice {
 #define ASSIGNED_DEVICE_MSIX_MASKED (1 << 2)
         uint32_t state;
     } cap;
-    int irq_entries_nr;
+    int irq_entries_nr, max_msix_entries_nr;
     struct kvm_irq_routing_entry *entry;
     void *msix_table_page;
     target_phys_addr_t msix_table_addr;
-- 
1.7.0.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 1/4] qemu-kvm: device assignment: Enabling MSI-X according to the entries' mask bit
  2011-01-30  5:12 [PATCH 0/4 v8] qemu-kvm: MSI-X MMIO support for assigned device Sheng Yang
@ 2011-01-30  5:12 ` Sheng Yang
  0 siblings, 0 replies; 8+ messages in thread
From: Sheng Yang @ 2011-01-30  5:12 UTC (permalink / raw)
  To: Marcelo Tosatti, Avi Kivity; +Cc: Michael S. Tsirkin, kvm, Sheng Yang

The old MSI-X enabling method assume the entries are written before MSI-X
enabled, but some OS didn't obey this, e.g. FreeBSD. This patch would fix
this.

Also, according to the PCI spec, mask bit of MSI-X table should be set
after reset.

Signed-off-by: Sheng Yang <sheng@linux.intel.com>
---
 hw/device-assignment.c |  188 +++++++++++++++++++++++++++++++++++++++++-------
 hw/device-assignment.h |    2 +-
 2 files changed, 162 insertions(+), 28 deletions(-)

diff --git a/hw/device-assignment.c b/hw/device-assignment.c
index 8446cd4..f81050f 100644
--- a/hw/device-assignment.c
+++ b/hw/device-assignment.c
@@ -1141,15 +1141,12 @@ static void assigned_dev_update_msi(PCIDevice *pci_dev, unsigned int ctrl_pos)
 #endif
 
 #ifdef KVM_CAP_DEVICE_MSIX
-static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev)
+
+#define PCI_MSIX_CTRL_MASKBIT	1ul
+static int get_msix_entries_max_nr(AssignedDevice *adev)
 {
-    AssignedDevice *adev = container_of(pci_dev, AssignedDevice, dev);
-    uint16_t entries_nr = 0, entries_max_nr;
-    int pos = 0, i, r = 0;
-    uint32_t msg_addr, msg_upper_addr, msg_data, msg_ctrl;
-    struct kvm_assigned_msix_nr msix_nr;
-    struct kvm_assigned_msix_entry msix_entry;
-    void *va = adev->msix_table_page;
+    int pos, entries_max_nr;
+    PCIDevice *pci_dev = &adev->dev;
 
     pos = pci_find_capability(pci_dev, PCI_CAP_ID_MSIX);
 
@@ -1157,20 +1154,48 @@ static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev)
     entries_max_nr &= PCI_MSIX_TABSIZE;
     entries_max_nr += 1;
 
+    return entries_max_nr;
+}
+
+static int assigned_dev_msix_entry_masked(AssignedDevice *adev, int entry)
+{
+    uint32_t msg_ctrl;
+    void *va = adev->msix_table_page;
+
+    memcpy(&msg_ctrl, va + entry * 16 + 12, 4);
+    return (msg_ctrl & PCI_MSIX_CTRL_MASKBIT);
+}
+
+static int get_msix_valid_entries_nr(AssignedDevice *adev,
+				     uint16_t entries_max_nr)
+{
+    void *va = adev->msix_table_page;
+    uint32_t msg_ctrl;
+    uint16_t entries_nr = 0;
+    int i;
+
     /* Get the usable entry number for allocating */
     for (i = 0; i < entries_max_nr; i++) {
         memcpy(&msg_ctrl, va + i * 16 + 12, 4);
-        memcpy(&msg_data, va + i * 16 + 8, 4);
         /* Ignore unused entry even it's unmasked */
-        if (msg_data == 0)
+        if (assigned_dev_msix_entry_masked(adev, i))
             continue;
         entries_nr ++;
     }
+    return entries_nr;
+}
+
+static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev,
+                                         uint16_t entries_nr,
+                                         uint16_t entries_max_nr)
+{
+    AssignedDevice *adev = container_of(pci_dev, AssignedDevice, dev);
+    int i, r = 0;
+    uint32_t msg_addr, msg_upper_addr, msg_data, msg_ctrl;
+    struct kvm_assigned_msix_nr msix_nr;
+    struct kvm_assigned_msix_entry msix_entry;
+    void *va = adev->msix_table_page;
 
-    if (entries_nr == 0) {
-        fprintf(stderr, "MSI-X entry number is zero!\n");
-        return -EINVAL;
-    }
     msix_nr.assigned_dev_id = calc_assigned_dev_id(adev->h_segnr, adev->h_busnr,
                                           (uint8_t)adev->h_devfn);
     msix_nr.entry_nr = entries_nr;
@@ -1182,6 +1207,8 @@ static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev)
     }
 
     free_dev_irq_entries(adev);
+    memset(pci_dev->msix_entry_used, 0, KVM_MAX_MSIX_PER_DEV *
+                                        sizeof(*pci_dev->msix_entry_used));
     adev->irq_entries_nr = entries_nr;
     adev->entry = calloc(entries_nr, sizeof(struct kvm_irq_routing_entry));
     if (!adev->entry) {
@@ -1195,10 +1222,10 @@ static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev)
         if (entries_nr >= msix_nr.entry_nr)
             break;
         memcpy(&msg_ctrl, va + i * 16 + 12, 4);
-        memcpy(&msg_data, va + i * 16 + 8, 4);
-        if (msg_data == 0)
+        if (assigned_dev_msix_entry_masked(adev, i))
             continue;
 
+        memcpy(&msg_data, va + i * 16 + 8, 4);
         memcpy(&msg_addr, va + i * 16, 4);
         memcpy(&msg_upper_addr, va + i * 16 + 4, 4);
 
@@ -1212,17 +1239,18 @@ static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev)
         adev->entry[entries_nr].u.msi.address_lo = msg_addr;
         adev->entry[entries_nr].u.msi.address_hi = msg_upper_addr;
         adev->entry[entries_nr].u.msi.data = msg_data;
-        DEBUG("MSI-X data 0x%x, MSI-X addr_lo 0x%x\n!", msg_data, msg_addr);
-	kvm_add_routing_entry(&adev->entry[entries_nr]);
+        DEBUG("MSI-X data 0x%x, MSI-X addr_lo 0x%x!\n", msg_data, msg_addr);
+        kvm_add_routing_entry(&adev->entry[entries_nr]);
 
         msix_entry.gsi = adev->entry[entries_nr].gsi;
         msix_entry.entry = i;
+        pci_dev->msix_entry_used[i] = 1;
         r = kvm_assign_set_msix_entry(kvm_context, &msix_entry);
         if (r) {
             fprintf(stderr, "fail to set MSI-X entry! %s\n", strerror(-r));
             break;
         }
-        DEBUG("MSI-X entry gsi 0x%x, entry %d\n!",
+        DEBUG("MSI-X entry gsi 0x%x, entry %d!\n",
                 msix_entry.gsi, msix_entry.entry);
         entries_nr ++;
     }
@@ -1239,20 +1267,24 @@ static void assigned_dev_update_msix(PCIDevice *pci_dev, unsigned int ctrl_pos)
 {
     struct kvm_assigned_irq assigned_irq_data;
     AssignedDevice *assigned_dev = container_of(pci_dev, AssignedDevice, dev);
-    uint16_t *ctrl_word = (uint16_t *)(pci_dev->config + ctrl_pos);
+    uint16_t ctrl_word = *(uint16_t *)(pci_dev->config + ctrl_pos);
     int r;
+    uint16_t entries_nr, entries_max_nr;
+    int enable_msix;
 
     memset(&assigned_irq_data, 0, sizeof assigned_irq_data);
     assigned_irq_data.assigned_dev_id  =
             calc_assigned_dev_id(assigned_dev->h_segnr, assigned_dev->h_busnr,
                     (uint8_t)assigned_dev->h_devfn);
 
+    enable_msix = ((ctrl_word & PCI_MSIX_ENABLE) &&
+                  !(ctrl_word & PCI_MSIX_MASK));
+
     /* Some guests gratuitously disable MSIX even if they're not using it,
      * try to catch this by only deassigning irqs if the guest is using
      * MSIX or intends to start. */
     if ((assigned_dev->irq_requested_type & KVM_DEV_IRQ_GUEST_MSIX) ||
-        (*ctrl_word & PCI_MSIX_ENABLE)) {
-
+            enable_msix) {
         assigned_irq_data.flags = assigned_dev->irq_requested_type;
         free_dev_irq_entries(assigned_dev);
         r = kvm_deassign_irq(kvm_context, &assigned_irq_data);
@@ -1261,16 +1293,30 @@ static void assigned_dev_update_msix(PCIDevice *pci_dev, unsigned int ctrl_pos)
             perror("assigned_dev_update_msix: deassign irq");
 
         assigned_dev->irq_requested_type = 0;
+        memset(pci_dev->msix_entry_used, 0, KVM_MAX_MSIX_PER_DEV *
+                                        sizeof(*pci_dev->msix_entry_used));
     }
 
-    if (*ctrl_word & PCI_MSIX_ENABLE) {
-        assigned_irq_data.flags = KVM_DEV_IRQ_HOST_MSIX |
-                                  KVM_DEV_IRQ_GUEST_MSIX;
-
-        if (assigned_dev_update_msix_mmio(pci_dev) < 0) {
+    entries_max_nr = assigned_dev->max_msix_entries_nr;
+    if (entries_max_nr == 0) {
+        fprintf(stderr, "assigned_dev_update_msix: MSI-X entries_max_nr == 0");
+        return;
+    }
+    /*
+     * Guest may try to enable MSI-X before setting MSI-X entry done, so
+     * let's wait until guest unmask the entries.
+     */
+    entries_nr = get_msix_valid_entries_nr(assigned_dev, entries_max_nr);
+    if (entries_nr == 0)
+        return;
+    if (enable_msix) {
+        if (assigned_dev_update_msix_mmio(pci_dev,
+                    entries_nr, entries_max_nr) < 0) {
             perror("assigned_dev_update_msix_mmio");
             return;
         }
+        assigned_irq_data.flags = KVM_DEV_IRQ_HOST_MSIX |
+                                  KVM_DEV_IRQ_GUEST_MSIX;
         if (kvm_assign_irq(kvm_context, &assigned_irq_data) < 0) {
             perror("assigned_dev_enable_msix: assign irq");
             return;
@@ -1441,6 +1487,7 @@ static int assigned_device_pci_cap_init(PCIDevice *pci_dev)
         bar_nr = msix_table_entry & PCI_MSIX_BIR;
         msix_table_entry &= ~PCI_MSIX_BIR;
         dev->msix_table_addr = pci_region[bar_nr].base_addr + msix_table_entry;
+        dev->max_msix_entries_nr = get_msix_entries_max_nr(dev);
     }
 #endif
 #endif
@@ -1629,10 +1676,90 @@ static void msix_mmio_writel(void *opaque,
     AssignedDevice *adev = opaque;
     unsigned int offset = addr & 0xfff;
     void *page = adev->msix_table_page;
+    int ctrl_word, index;
+    struct kvm_irq_routing_entry new_entry = {};
+    int entry_idx, entries_max_nr, r = 0, i;
+    uint32_t msg_ctrl, msg_data, msg_upper_addr, msg_addr;
+    struct PCIDevice *pci_dev = &adev->dev;
+    uint8_t cap = pci_find_capability(pci_dev, PCI_CAP_ID_MSIX);
 
     DEBUG("write to MSI-X entry table mmio offset 0x%lx, val 0x%x\n",
 		    addr, val);
     memcpy((void *)((char *)page + offset), &val, 4);
+
+    index = offset / 16;
+
+    /* Check if mask bit is being accessed */
+    memcpy(&msg_addr, (char *)page + index * 16, 4);
+    memcpy(&msg_upper_addr, (char *)page + index * 16 + 4, 4);
+    memcpy(&msg_data, (char *)page + index * 16 + 8, 4);
+    memcpy(&msg_ctrl, (char *)page + index * 16 + 12, 4);
+    DEBUG("MSI-X entries index %d: "
+            "msg_addr 0x%x, msg_upper_addr 0x%x, msg_data 0x%x, vec_ctl 0x%x\n",
+            index, msg_addr, msg_upper_addr, msg_data, msg_ctrl);
+
+    ctrl_word = pci_get_word(pci_dev->config + cap + PCI_MSIX_FLAGS);
+
+    if (!((ctrl_word & PCI_MSIX_ENABLE) && !(ctrl_word & PCI_MSIX_MASK)))
+        return;
+
+    if (!assigned_dev_msix_entry_masked(adev, index)) {
+        if (!adev->dev.msix_entry_used[index]) {
+            DEBUG("Try to modify unenabled MSI-X entry %d's mask. "
+                    "Reenable MSI-X.\n",
+                    index);
+            assigned_dev_update_msix(&adev->dev, cap + PCI_MSIX_FLAGS);
+        }
+        return;
+    }
+
+    if (!adev->dev.msix_entry_used[index])
+        return;
+
+    entries_max_nr = adev->max_msix_entries_nr;
+
+    /*
+     * Find the index of routing entry, it can be different from 'index' if
+     * empty entry existed in between
+     */
+    entry_idx = -1;
+    for (i = 0; i <= index; i++) {
+        if (adev->dev.msix_entry_used[i])
+            entry_idx ++;
+    }
+    if (entry_idx >= entries_max_nr || entry_idx == -1) {
+        fprintf(stderr, "msix_mmio_writel: Entry idx %d exceed limit!\n",
+			entry_idx);
+        return;
+    }
+
+    if (!assigned_dev_msix_entry_masked(adev, index)) {
+        fprintf(stderr, "msix_mmio_writel: Trying write to unmasked entry!\n");
+        return;
+    }
+
+    new_entry.gsi = adev->entry[entry_idx].gsi;
+    new_entry.type = KVM_IRQ_ROUTING_MSI;
+    new_entry.flags = 0;
+    new_entry.u.msi.address_lo = msg_addr;
+    new_entry.u.msi.address_hi = msg_upper_addr;
+    new_entry.u.msi.data = msg_data;
+    if (memcmp(&adev->entry[entry_idx].u.msi, &new_entry.u.msi,
+                sizeof new_entry.u.msi)) {
+        r = kvm_update_routing_entry(&adev->entry[entry_idx], &new_entry);
+        if (r) {
+            perror("msix_mmio_writel: kvm_update_routing_entry failed\n");
+            return;
+        }
+        r = kvm_commit_irq_routes();
+        if (r) {
+            perror("msix_mmio_writel: kvm_commit_irq_routes failed\n");
+            return;
+        }
+    }
+    adev->entry[entry_idx].u.msi.address_lo = msg_addr;
+    adev->entry[entry_idx].u.msi.address_hi = msg_upper_addr;
+    adev->entry[entry_idx].u.msi.data = msg_data;
 }
 
 static void msix_mmio_writew(void *opaque,
@@ -1659,6 +1786,7 @@ static CPUReadMemoryFunc *msix_mmio_read[] = {
 
 static int assigned_dev_register_msix_mmio(AssignedDevice *dev)
 {
+    int i;
     dev->msix_table_page = mmap(NULL, 0x1000,
                                 PROT_READ|PROT_WRITE,
                                 MAP_ANONYMOUS|MAP_PRIVATE, 0, 0);
@@ -1668,8 +1796,12 @@ static int assigned_dev_register_msix_mmio(AssignedDevice *dev)
         return -EFAULT;
     }
     memset(dev->msix_table_page, 0, 0x1000);
+    for (i = 0; i < 0x1000; i += 0x10)
+        *(uint32_t *)(dev->msix_table_page + i + 0xc) = 1;
     dev->mmio_index = cpu_register_io_memory(
                         msix_mmio_read, msix_mmio_write, dev);
+    dev->dev.msix_entry_used = qemu_mallocz(KVM_MAX_MSIX_PER_DEV *
+                                        sizeof *dev->dev.msix_entry_used);
     return 0;
 }
 
@@ -1686,6 +1818,8 @@ static void assigned_dev_unregister_msix_mmio(AssignedDevice *dev)
                 strerror(errno));
     }
     dev->msix_table_page = NULL;
+    free(dev->dev.msix_entry_used);
+    dev->dev.msix_entry_used = NULL;
 }
 
 static const VMStateDescription vmstate_assigned_device = {
diff --git a/hw/device-assignment.h b/hw/device-assignment.h
index c94a730..754e5c0 100644
--- a/hw/device-assignment.h
+++ b/hw/device-assignment.h
@@ -104,7 +104,7 @@ typedef struct AssignedDevice {
 #define ASSIGNED_DEVICE_MSIX_MASKED (1 << 2)
         uint32_t state;
     } cap;
-    int irq_entries_nr;
+    int irq_entries_nr, max_msix_entries_nr;
     struct kvm_irq_routing_entry *entry;
     void *msix_table_page;
     target_phys_addr_t msix_table_addr;
-- 
1.7.0.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 1/4] qemu-kvm: device assignment: Enabling MSI-X according to the entries' mask bit
  2011-01-06 10:24 [PATCH 0/4 v7] " Sheng Yang
@ 2011-01-06 10:24 ` Sheng Yang
  0 siblings, 0 replies; 8+ messages in thread
From: Sheng Yang @ 2011-01-06 10:24 UTC (permalink / raw)
  To: Avi Kivity, Marcelo Tosatti; +Cc: Michael S. Tsirkin, kvm, Sheng Yang

The old MSI-X enabling method assume the entries are written before MSI-X
enabled, but some OS didn't obey this, e.g. FreeBSD. This patch would fix
this.

Also, according to the PCI spec, mask bit of MSI-X table should be set
after reset.

Signed-off-by: Sheng Yang <sheng@linux.intel.com>
---
 hw/device-assignment.c |  188 +++++++++++++++++++++++++++++++++++++++++-------
 hw/device-assignment.h |    2 +-
 2 files changed, 162 insertions(+), 28 deletions(-)

diff --git a/hw/device-assignment.c b/hw/device-assignment.c
index 8446cd4..f81050f 100644
--- a/hw/device-assignment.c
+++ b/hw/device-assignment.c
@@ -1141,15 +1141,12 @@ static void assigned_dev_update_msi(PCIDevice *pci_dev, unsigned int ctrl_pos)
 #endif
 
 #ifdef KVM_CAP_DEVICE_MSIX
-static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev)
+
+#define PCI_MSIX_CTRL_MASKBIT	1ul
+static int get_msix_entries_max_nr(AssignedDevice *adev)
 {
-    AssignedDevice *adev = container_of(pci_dev, AssignedDevice, dev);
-    uint16_t entries_nr = 0, entries_max_nr;
-    int pos = 0, i, r = 0;
-    uint32_t msg_addr, msg_upper_addr, msg_data, msg_ctrl;
-    struct kvm_assigned_msix_nr msix_nr;
-    struct kvm_assigned_msix_entry msix_entry;
-    void *va = adev->msix_table_page;
+    int pos, entries_max_nr;
+    PCIDevice *pci_dev = &adev->dev;
 
     pos = pci_find_capability(pci_dev, PCI_CAP_ID_MSIX);
 
@@ -1157,20 +1154,48 @@ static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev)
     entries_max_nr &= PCI_MSIX_TABSIZE;
     entries_max_nr += 1;
 
+    return entries_max_nr;
+}
+
+static int assigned_dev_msix_entry_masked(AssignedDevice *adev, int entry)
+{
+    uint32_t msg_ctrl;
+    void *va = adev->msix_table_page;
+
+    memcpy(&msg_ctrl, va + entry * 16 + 12, 4);
+    return (msg_ctrl & PCI_MSIX_CTRL_MASKBIT);
+}
+
+static int get_msix_valid_entries_nr(AssignedDevice *adev,
+				     uint16_t entries_max_nr)
+{
+    void *va = adev->msix_table_page;
+    uint32_t msg_ctrl;
+    uint16_t entries_nr = 0;
+    int i;
+
     /* Get the usable entry number for allocating */
     for (i = 0; i < entries_max_nr; i++) {
         memcpy(&msg_ctrl, va + i * 16 + 12, 4);
-        memcpy(&msg_data, va + i * 16 + 8, 4);
         /* Ignore unused entry even it's unmasked */
-        if (msg_data == 0)
+        if (assigned_dev_msix_entry_masked(adev, i))
             continue;
         entries_nr ++;
     }
+    return entries_nr;
+}
+
+static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev,
+                                         uint16_t entries_nr,
+                                         uint16_t entries_max_nr)
+{
+    AssignedDevice *adev = container_of(pci_dev, AssignedDevice, dev);
+    int i, r = 0;
+    uint32_t msg_addr, msg_upper_addr, msg_data, msg_ctrl;
+    struct kvm_assigned_msix_nr msix_nr;
+    struct kvm_assigned_msix_entry msix_entry;
+    void *va = adev->msix_table_page;
 
-    if (entries_nr == 0) {
-        fprintf(stderr, "MSI-X entry number is zero!\n");
-        return -EINVAL;
-    }
     msix_nr.assigned_dev_id = calc_assigned_dev_id(adev->h_segnr, adev->h_busnr,
                                           (uint8_t)adev->h_devfn);
     msix_nr.entry_nr = entries_nr;
@@ -1182,6 +1207,8 @@ static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev)
     }
 
     free_dev_irq_entries(adev);
+    memset(pci_dev->msix_entry_used, 0, KVM_MAX_MSIX_PER_DEV *
+                                        sizeof(*pci_dev->msix_entry_used));
     adev->irq_entries_nr = entries_nr;
     adev->entry = calloc(entries_nr, sizeof(struct kvm_irq_routing_entry));
     if (!adev->entry) {
@@ -1195,10 +1222,10 @@ static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev)
         if (entries_nr >= msix_nr.entry_nr)
             break;
         memcpy(&msg_ctrl, va + i * 16 + 12, 4);
-        memcpy(&msg_data, va + i * 16 + 8, 4);
-        if (msg_data == 0)
+        if (assigned_dev_msix_entry_masked(adev, i))
             continue;
 
+        memcpy(&msg_data, va + i * 16 + 8, 4);
         memcpy(&msg_addr, va + i * 16, 4);
         memcpy(&msg_upper_addr, va + i * 16 + 4, 4);
 
@@ -1212,17 +1239,18 @@ static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev)
         adev->entry[entries_nr].u.msi.address_lo = msg_addr;
         adev->entry[entries_nr].u.msi.address_hi = msg_upper_addr;
         adev->entry[entries_nr].u.msi.data = msg_data;
-        DEBUG("MSI-X data 0x%x, MSI-X addr_lo 0x%x\n!", msg_data, msg_addr);
-	kvm_add_routing_entry(&adev->entry[entries_nr]);
+        DEBUG("MSI-X data 0x%x, MSI-X addr_lo 0x%x!\n", msg_data, msg_addr);
+        kvm_add_routing_entry(&adev->entry[entries_nr]);
 
         msix_entry.gsi = adev->entry[entries_nr].gsi;
         msix_entry.entry = i;
+        pci_dev->msix_entry_used[i] = 1;
         r = kvm_assign_set_msix_entry(kvm_context, &msix_entry);
         if (r) {
             fprintf(stderr, "fail to set MSI-X entry! %s\n", strerror(-r));
             break;
         }
-        DEBUG("MSI-X entry gsi 0x%x, entry %d\n!",
+        DEBUG("MSI-X entry gsi 0x%x, entry %d!\n",
                 msix_entry.gsi, msix_entry.entry);
         entries_nr ++;
     }
@@ -1239,20 +1267,24 @@ static void assigned_dev_update_msix(PCIDevice *pci_dev, unsigned int ctrl_pos)
 {
     struct kvm_assigned_irq assigned_irq_data;
     AssignedDevice *assigned_dev = container_of(pci_dev, AssignedDevice, dev);
-    uint16_t *ctrl_word = (uint16_t *)(pci_dev->config + ctrl_pos);
+    uint16_t ctrl_word = *(uint16_t *)(pci_dev->config + ctrl_pos);
     int r;
+    uint16_t entries_nr, entries_max_nr;
+    int enable_msix;
 
     memset(&assigned_irq_data, 0, sizeof assigned_irq_data);
     assigned_irq_data.assigned_dev_id  =
             calc_assigned_dev_id(assigned_dev->h_segnr, assigned_dev->h_busnr,
                     (uint8_t)assigned_dev->h_devfn);
 
+    enable_msix = ((ctrl_word & PCI_MSIX_ENABLE) &&
+                  !(ctrl_word & PCI_MSIX_MASK));
+
     /* Some guests gratuitously disable MSIX even if they're not using it,
      * try to catch this by only deassigning irqs if the guest is using
      * MSIX or intends to start. */
     if ((assigned_dev->irq_requested_type & KVM_DEV_IRQ_GUEST_MSIX) ||
-        (*ctrl_word & PCI_MSIX_ENABLE)) {
-
+            enable_msix) {
         assigned_irq_data.flags = assigned_dev->irq_requested_type;
         free_dev_irq_entries(assigned_dev);
         r = kvm_deassign_irq(kvm_context, &assigned_irq_data);
@@ -1261,16 +1293,30 @@ static void assigned_dev_update_msix(PCIDevice *pci_dev, unsigned int ctrl_pos)
             perror("assigned_dev_update_msix: deassign irq");
 
         assigned_dev->irq_requested_type = 0;
+        memset(pci_dev->msix_entry_used, 0, KVM_MAX_MSIX_PER_DEV *
+                                        sizeof(*pci_dev->msix_entry_used));
     }
 
-    if (*ctrl_word & PCI_MSIX_ENABLE) {
-        assigned_irq_data.flags = KVM_DEV_IRQ_HOST_MSIX |
-                                  KVM_DEV_IRQ_GUEST_MSIX;
-
-        if (assigned_dev_update_msix_mmio(pci_dev) < 0) {
+    entries_max_nr = assigned_dev->max_msix_entries_nr;
+    if (entries_max_nr == 0) {
+        fprintf(stderr, "assigned_dev_update_msix: MSI-X entries_max_nr == 0");
+        return;
+    }
+    /*
+     * Guest may try to enable MSI-X before setting MSI-X entry done, so
+     * let's wait until guest unmask the entries.
+     */
+    entries_nr = get_msix_valid_entries_nr(assigned_dev, entries_max_nr);
+    if (entries_nr == 0)
+        return;
+    if (enable_msix) {
+        if (assigned_dev_update_msix_mmio(pci_dev,
+                    entries_nr, entries_max_nr) < 0) {
             perror("assigned_dev_update_msix_mmio");
             return;
         }
+        assigned_irq_data.flags = KVM_DEV_IRQ_HOST_MSIX |
+                                  KVM_DEV_IRQ_GUEST_MSIX;
         if (kvm_assign_irq(kvm_context, &assigned_irq_data) < 0) {
             perror("assigned_dev_enable_msix: assign irq");
             return;
@@ -1441,6 +1487,7 @@ static int assigned_device_pci_cap_init(PCIDevice *pci_dev)
         bar_nr = msix_table_entry & PCI_MSIX_BIR;
         msix_table_entry &= ~PCI_MSIX_BIR;
         dev->msix_table_addr = pci_region[bar_nr].base_addr + msix_table_entry;
+        dev->max_msix_entries_nr = get_msix_entries_max_nr(dev);
     }
 #endif
 #endif
@@ -1629,10 +1676,90 @@ static void msix_mmio_writel(void *opaque,
     AssignedDevice *adev = opaque;
     unsigned int offset = addr & 0xfff;
     void *page = adev->msix_table_page;
+    int ctrl_word, index;
+    struct kvm_irq_routing_entry new_entry = {};
+    int entry_idx, entries_max_nr, r = 0, i;
+    uint32_t msg_ctrl, msg_data, msg_upper_addr, msg_addr;
+    struct PCIDevice *pci_dev = &adev->dev;
+    uint8_t cap = pci_find_capability(pci_dev, PCI_CAP_ID_MSIX);
 
     DEBUG("write to MSI-X entry table mmio offset 0x%lx, val 0x%x\n",
 		    addr, val);
     memcpy((void *)((char *)page + offset), &val, 4);
+
+    index = offset / 16;
+
+    /* Check if mask bit is being accessed */
+    memcpy(&msg_addr, (char *)page + index * 16, 4);
+    memcpy(&msg_upper_addr, (char *)page + index * 16 + 4, 4);
+    memcpy(&msg_data, (char *)page + index * 16 + 8, 4);
+    memcpy(&msg_ctrl, (char *)page + index * 16 + 12, 4);
+    DEBUG("MSI-X entries index %d: "
+            "msg_addr 0x%x, msg_upper_addr 0x%x, msg_data 0x%x, vec_ctl 0x%x\n",
+            index, msg_addr, msg_upper_addr, msg_data, msg_ctrl);
+
+    ctrl_word = pci_get_word(pci_dev->config + cap + PCI_MSIX_FLAGS);
+
+    if (!((ctrl_word & PCI_MSIX_ENABLE) && !(ctrl_word & PCI_MSIX_MASK)))
+        return;
+
+    if (!assigned_dev_msix_entry_masked(adev, index)) {
+        if (!adev->dev.msix_entry_used[index]) {
+            DEBUG("Try to modify unenabled MSI-X entry %d's mask. "
+                    "Reenable MSI-X.\n",
+                    index);
+            assigned_dev_update_msix(&adev->dev, cap + PCI_MSIX_FLAGS);
+        }
+        return;
+    }
+
+    if (!adev->dev.msix_entry_used[index])
+        return;
+
+    entries_max_nr = adev->max_msix_entries_nr;
+
+    /*
+     * Find the index of routing entry, it can be different from 'index' if
+     * empty entry existed in between
+     */
+    entry_idx = -1;
+    for (i = 0; i <= index; i++) {
+        if (adev->dev.msix_entry_used[i])
+            entry_idx ++;
+    }
+    if (entry_idx >= entries_max_nr || entry_idx == -1) {
+        fprintf(stderr, "msix_mmio_writel: Entry idx %d exceed limit!\n",
+			entry_idx);
+        return;
+    }
+
+    if (!assigned_dev_msix_entry_masked(adev, index)) {
+        fprintf(stderr, "msix_mmio_writel: Trying write to unmasked entry!\n");
+        return;
+    }
+
+    new_entry.gsi = adev->entry[entry_idx].gsi;
+    new_entry.type = KVM_IRQ_ROUTING_MSI;
+    new_entry.flags = 0;
+    new_entry.u.msi.address_lo = msg_addr;
+    new_entry.u.msi.address_hi = msg_upper_addr;
+    new_entry.u.msi.data = msg_data;
+    if (memcmp(&adev->entry[entry_idx].u.msi, &new_entry.u.msi,
+                sizeof new_entry.u.msi)) {
+        r = kvm_update_routing_entry(&adev->entry[entry_idx], &new_entry);
+        if (r) {
+            perror("msix_mmio_writel: kvm_update_routing_entry failed\n");
+            return;
+        }
+        r = kvm_commit_irq_routes();
+        if (r) {
+            perror("msix_mmio_writel: kvm_commit_irq_routes failed\n");
+            return;
+        }
+    }
+    adev->entry[entry_idx].u.msi.address_lo = msg_addr;
+    adev->entry[entry_idx].u.msi.address_hi = msg_upper_addr;
+    adev->entry[entry_idx].u.msi.data = msg_data;
 }
 
 static void msix_mmio_writew(void *opaque,
@@ -1659,6 +1786,7 @@ static CPUReadMemoryFunc *msix_mmio_read[] = {
 
 static int assigned_dev_register_msix_mmio(AssignedDevice *dev)
 {
+    int i;
     dev->msix_table_page = mmap(NULL, 0x1000,
                                 PROT_READ|PROT_WRITE,
                                 MAP_ANONYMOUS|MAP_PRIVATE, 0, 0);
@@ -1668,8 +1796,12 @@ static int assigned_dev_register_msix_mmio(AssignedDevice *dev)
         return -EFAULT;
     }
     memset(dev->msix_table_page, 0, 0x1000);
+    for (i = 0; i < 0x1000; i += 0x10)
+        *(uint32_t *)(dev->msix_table_page + i + 0xc) = 1;
     dev->mmio_index = cpu_register_io_memory(
                         msix_mmio_read, msix_mmio_write, dev);
+    dev->dev.msix_entry_used = qemu_mallocz(KVM_MAX_MSIX_PER_DEV *
+                                        sizeof *dev->dev.msix_entry_used);
     return 0;
 }
 
@@ -1686,6 +1818,8 @@ static void assigned_dev_unregister_msix_mmio(AssignedDevice *dev)
                 strerror(errno));
     }
     dev->msix_table_page = NULL;
+    free(dev->dev.msix_entry_used);
+    dev->dev.msix_entry_used = NULL;
 }
 
 static const VMStateDescription vmstate_assigned_device = {
diff --git a/hw/device-assignment.h b/hw/device-assignment.h
index c94a730..754e5c0 100644
--- a/hw/device-assignment.h
+++ b/hw/device-assignment.h
@@ -104,7 +104,7 @@ typedef struct AssignedDevice {
 #define ASSIGNED_DEVICE_MSIX_MASKED (1 << 2)
         uint32_t state;
     } cap;
-    int irq_entries_nr;
+    int irq_entries_nr, max_msix_entries_nr;
     struct kvm_irq_routing_entry *entry;
     void *msix_table_page;
     target_phys_addr_t msix_table_addr;
-- 
1.7.0.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2011-02-18  8:53 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-12-22  9:25 [PATCH 0/4 v6] MSI-X MMIO support in userspace for assigned devices Sheng Yang
2010-12-22  9:25 ` [PATCH 1/4] qemu-kvm: device assignment: Enabling MSI-X according to the entries' mask bit Sheng Yang
2010-12-22  9:25 ` [PATCH 2/4] qemu-kvm: Ioctl for MSIX MMIO support Sheng Yang
2010-12-22  9:25 ` [PATCH 3/4] qemu-kvm: Header file update for MSI-X " Sheng Yang
2010-12-22  9:25 ` [PATCH 4/4] qemu-kvm: MSI-X MMIO support for assigned device Sheng Yang
2011-01-06 10:24 [PATCH 0/4 v7] " Sheng Yang
2011-01-06 10:24 ` [PATCH 1/4] qemu-kvm: device assignment: Enabling MSI-X according to the entries' mask bit Sheng Yang
2011-01-30  5:12 [PATCH 0/4 v8] qemu-kvm: MSI-X MMIO support for assigned device Sheng Yang
2011-01-30  5:12 ` [PATCH 1/4] qemu-kvm: device assignment: Enabling MSI-X according to the entries' mask bit Sheng Yang
2011-02-18  8:55 [PATCH 0/4 v9] qemu-kvm: MSI-X MMIO support for assigned device Sheng Yang
2011-02-18  8:55 ` [PATCH 1/4] qemu-kvm: device assignment: Enabling MSI-X according to the entries' mask bit Sheng Yang

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.