All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/6] x86/IOMMU: multi-vector MSI
@ 2013-04-19 10:50 Jan Beulich
  2013-04-19 10:57 ` [PATCH 1/6] AMD IOMMU: allocate IRTE entries instead of using a static mapping Jan Beulich
                   ` (6 more replies)
  0 siblings, 7 replies; 20+ messages in thread
From: Jan Beulich @ 2013-04-19 10:50 UTC (permalink / raw)
  To: xen-devel
  Cc: Jacob Shin, xiantao.zhang, suravee.suthikulpanit, Konrad Rzeszutek Wilk

1: AMD IOMMU: allocate IRTE entries instead of using a static mapping
2: AMD IOMMU: untie remap and vector maps
3: VT-d: enable for multi-vector MSI
4: AMD IOMMU: enable for multi-vector MSI
5: x86: enable multi-vector MSI
6: pciif: add multi-vector-MSI command

The first two patches were posted before, and reportedly still don't
fully work (which means the series isn't meant to be applied as is).

Signed-off-by: Jan Beulich <jbeulich@suse.com>

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH 1/6] AMD IOMMU: allocate IRTE entries instead of using a static mapping
  2013-04-19 10:50 [PATCH 0/6] x86/IOMMU: multi-vector MSI Jan Beulich
@ 2013-04-19 10:57 ` Jan Beulich
  2013-04-23 13:21   ` Suravee Suthikulanit
  2013-04-26 17:13   ` Suravee Suthikulanit
  2013-04-19 10:57 ` [PATCH 2/6] AMD IOMMU: untie remap and vector maps Jan Beulich
                   ` (5 subsequent siblings)
  6 siblings, 2 replies; 20+ messages in thread
From: Jan Beulich @ 2013-04-19 10:57 UTC (permalink / raw)
  To: xen-devel
  Cc: Jacob Shin, xiantao.zhang, suravee.suthikulpanit, Konrad Rzeszutek Wilk

[-- Attachment #1: Type: text/plain, Size: 22429 bytes --]

For multi-vector MSI, where we surely don't want to allocate
contiguous vectors and be able to set affinities of the individual
vectors separately, we need to drop the use of the tuple of vector and
delivery mode to determine the IRTE to use, and instead allocate IRTEs
(which imo should have been done from the beginning).

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
One thing I surely need confirmation on is whether this

        BUG_ON(get_ivrs_mappings(iommu->seg)[req_id].intremap_table !=
               get_ivrs_mappings(iommu->seg)[alias_id].intremap_table);

in update_intremap_entry_from_msi_msg() is valid. If it isn't, it's not
clear to me how to properly set up things for affected devices, as we
would need an identical index allocated for two different remap table
instances (which can hardly be expected to work out well).

--- a/xen/drivers/passthrough/amd/iommu_acpi.c
+++ b/xen/drivers/passthrough/amd/iommu_acpi.c
@@ -72,12 +72,15 @@ static void __init add_ivrs_mapping_entr
          /* allocate per-device interrupt remapping table */
          if ( amd_iommu_perdev_intremap )
              ivrs_mappings[alias_id].intremap_table =
-                amd_iommu_alloc_intremap_table();
+                amd_iommu_alloc_intremap_table(
+                    &ivrs_mappings[alias_id].intremap_inuse);
          else
          {
              if ( shared_intremap_table == NULL  )
-                 shared_intremap_table = amd_iommu_alloc_intremap_table();
+                 shared_intremap_table = amd_iommu_alloc_intremap_table(
+                     &shared_intremap_inuse);
              ivrs_mappings[alias_id].intremap_table = shared_intremap_table;
+             ivrs_mappings[alias_id].intremap_inuse = shared_intremap_inuse;
          }
     }
     /* assgin iommu hardware */
@@ -671,7 +674,7 @@ static u16 __init parse_ivhd_device_spec
             if ( IO_APIC_ID(apic) != special->handle )
                 continue;
 
-            if ( ioapic_sbdf[special->handle].pin_setup )
+            if ( ioapic_sbdf[special->handle].pin_2_idx )
             {
                 if ( ioapic_sbdf[special->handle].bdf == bdf &&
                      ioapic_sbdf[special->handle].seg == seg )
@@ -691,14 +694,16 @@ static u16 __init parse_ivhd_device_spec
                 ioapic_sbdf[special->handle].bdf = bdf;
                 ioapic_sbdf[special->handle].seg = seg;
 
-                ioapic_sbdf[special->handle].pin_setup = xzalloc_array(
-                    unsigned long, BITS_TO_LONGS(nr_ioapic_entries[apic]));
+                ioapic_sbdf[special->handle].pin_2_idx = xmalloc_array(
+                    u16, nr_ioapic_entries[apic]);
                 if ( nr_ioapic_entries[apic] &&
-                     !ioapic_sbdf[IO_APIC_ID(apic)].pin_setup )
+                     !ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx )
                 {
                     printk(XENLOG_ERR "IVHD Error: Out of memory\n");
                     return 0;
                 }
+                memset(ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx, -1,
+                       nr_ioapic_entries[apic]);
             }
             break;
         }
@@ -926,7 +931,7 @@ static int __init parse_ivrs_table(struc
     for ( apic = 0; !error && iommu_intremap && apic < nr_ioapics; ++apic )
     {
         if ( !nr_ioapic_entries[apic] ||
-             ioapic_sbdf[IO_APIC_ID(apic)].pin_setup )
+             ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx )
             continue;
 
         printk(XENLOG_ERR "IVHD Error: no information for IO-APIC %#x\n",
@@ -935,13 +940,15 @@ static int __init parse_ivrs_table(struc
             error = -ENXIO;
         else
         {
-            ioapic_sbdf[IO_APIC_ID(apic)].pin_setup = xzalloc_array(
-                unsigned long, BITS_TO_LONGS(nr_ioapic_entries[apic]));
-            if ( !ioapic_sbdf[IO_APIC_ID(apic)].pin_setup )
+            ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx = xmalloc_array(
+                u16, nr_ioapic_entries[apic]);
+            if ( !ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx )
             {
                 printk(XENLOG_ERR "IVHD Error: Out of memory\n");
                 error = -ENOMEM;
             }
+            memset(ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx, -1,
+                   nr_ioapic_entries[apic]);
         }
     }
 
--- a/xen/drivers/passthrough/amd/iommu_intr.c
+++ b/xen/drivers/passthrough/amd/iommu_intr.c
@@ -31,6 +31,7 @@
 struct ioapic_sbdf ioapic_sbdf[MAX_IO_APICS];
 struct hpet_sbdf hpet_sbdf;
 void *shared_intremap_table;
+unsigned long *shared_intremap_inuse;
 static DEFINE_SPINLOCK(shared_intremap_lock);
 
 static spinlock_t* get_intremap_lock(int seg, int req_id)
@@ -46,30 +47,31 @@ static int get_intremap_requestor_id(int
     return get_ivrs_mappings(seg)[bdf].dte_requestor_id;
 }
 
-static int get_intremap_offset(u8 vector, u8 dm)
+static unsigned int alloc_intremap_entry(int seg, int bdf)
 {
-    int offset = 0;
-    offset = (dm << INT_REMAP_INDEX_DM_SHIFT) & INT_REMAP_INDEX_DM_MASK;
-    offset |= (vector << INT_REMAP_INDEX_VECTOR_SHIFT ) & 
-        INT_REMAP_INDEX_VECTOR_MASK;
-    return offset;
+    unsigned long *inuse = get_ivrs_mappings(seg)[bdf].intremap_inuse;
+    unsigned int slot = find_first_zero_bit(inuse, INTREMAP_ENTRIES);
+
+    if ( slot < INTREMAP_ENTRIES )
+        __set_bit(slot, inuse);
+    return slot;
 }
 
-static u8 *get_intremap_entry(int seg, int bdf, int offset)
+static u32 *get_intremap_entry(int seg, int bdf, int offset)
 {
-    u8 *table;
+    u32 *table = get_ivrs_mappings(seg)[bdf].intremap_table;
 
-    table = (u8*)get_ivrs_mappings(seg)[bdf].intremap_table;
     ASSERT( (table != NULL) && (offset < INTREMAP_ENTRIES) );
 
-    return (u8*) (table + offset);
+    return table + offset;
 }
 
 static void free_intremap_entry(int seg, int bdf, int offset)
 {
-    u32* entry;
-    entry = (u32*)get_intremap_entry(seg, bdf, offset);
+    u32 *entry = get_intremap_entry(seg, bdf, offset);
+
     memset(entry, 0, sizeof(u32));
+    __clear_bit(offset, get_ivrs_mappings(seg)[bdf].intremap_inuse);
 }
 
 static void update_intremap_entry(u32* entry, u8 vector, u8 int_type,
@@ -98,18 +100,24 @@ static void update_intremap_entry(u32* e
                             INT_REMAP_ENTRY_VECTOR_SHIFT, entry);
 }
 
-static void update_intremap_entry_from_ioapic(
+static void set_rte_index(struct IO_APIC_route_entry *rte, int offset)
+{
+    rte->vector = (u8)offset;
+    rte->delivery_mode = offset >> 8;
+}
+
+static int update_intremap_entry_from_ioapic(
     int bdf,
     struct amd_iommu *iommu,
-    const struct IO_APIC_route_entry *rte,
-    const struct IO_APIC_route_entry *old_rte)
+    struct IO_APIC_route_entry *rte,
+    u16 *index)
 {
     unsigned long flags;
     u32* entry;
     u8 delivery_mode, dest, vector, dest_mode;
     int req_id;
     spinlock_t *lock;
-    int offset;
+    unsigned int offset;
 
     req_id = get_intremap_requestor_id(iommu->seg, bdf);
     lock = get_intremap_lock(iommu->seg, req_id);
@@ -121,16 +129,20 @@ static void update_intremap_entry_from_i
 
     spin_lock_irqsave(lock, flags);
 
-    offset = get_intremap_offset(vector, delivery_mode);
-    if ( old_rte )
+    offset = *index;
+    if ( offset >= INTREMAP_ENTRIES )
     {
-        int old_offset = get_intremap_offset(old_rte->vector,
-                                             old_rte->delivery_mode);
-
-        if ( offset != old_offset )
-            free_intremap_entry(iommu->seg, bdf, old_offset);
+        offset = alloc_intremap_entry(iommu->seg, req_id);
+        if ( offset >= INTREMAP_ENTRIES )
+        {
+            spin_unlock_irqrestore(lock, flags);
+            rte->mask = 1;
+            return -ENOSPC;
+        }
+        *index = offset;
     }
-    entry = (u32*)get_intremap_entry(iommu->seg, req_id, offset);
+
+    entry = get_intremap_entry(iommu->seg, req_id, offset);
     update_intremap_entry(entry, vector, delivery_mode, dest_mode, dest);
 
     spin_unlock_irqrestore(lock, flags);
@@ -141,6 +153,10 @@ static void update_intremap_entry_from_i
         amd_iommu_flush_intremap(iommu, req_id);
         spin_unlock_irqrestore(&iommu->lock, flags);
     }
+
+    set_rte_index(rte, offset);
+
+    return 0;
 }
 
 int __init amd_iommu_setup_ioapic_remapping(void)
@@ -153,7 +169,7 @@ int __init amd_iommu_setup_ioapic_remapp
     u16 seg, bdf, req_id;
     struct amd_iommu *iommu;
     spinlock_t *lock;
-    int offset;
+    unsigned int offset;
 
     /* Read ioapic entries and update interrupt remapping table accordingly */
     for ( apic = 0; apic < nr_ioapics; apic++ )
@@ -184,19 +200,23 @@ int __init amd_iommu_setup_ioapic_remapp
             dest = rte.dest.logical.logical_dest;
 
             spin_lock_irqsave(lock, flags);
-            offset = get_intremap_offset(vector, delivery_mode);
-            entry = (u32*)get_intremap_entry(iommu->seg, req_id, offset);
+            offset = alloc_intremap_entry(seg, req_id);
+            BUG_ON(offset >= INTREMAP_ENTRIES);
+            entry = get_intremap_entry(iommu->seg, req_id, offset);
             update_intremap_entry(entry, vector,
                                   delivery_mode, dest_mode, dest);
             spin_unlock_irqrestore(lock, flags);
 
+            set_rte_index(&rte, offset);
+            ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx[pin] = offset;
+            __ioapic_write_entry(apic, pin, 1, rte);
+
             if ( iommu->enabled )
             {
                 spin_lock_irqsave(&iommu->lock, flags);
                 amd_iommu_flush_intremap(iommu, req_id);
                 spin_unlock_irqrestore(&iommu->lock, flags);
             }
-            set_bit(pin, ioapic_sbdf[IO_APIC_ID(apic)].pin_setup);
         }
     }
     return 0;
@@ -209,7 +229,7 @@ void amd_iommu_ioapic_update_ire(
     struct IO_APIC_route_entry new_rte = { 0 };
     unsigned int rte_lo = (reg & 1) ? reg - 1 : reg;
     unsigned int pin = (reg - 0x10) / 2;
-    int saved_mask, seg, bdf;
+    int saved_mask, seg, bdf, rc;
     struct amd_iommu *iommu;
 
     if ( !iommu_intremap )
@@ -247,7 +267,7 @@ void amd_iommu_ioapic_update_ire(
     }
 
     if ( new_rte.mask &&
-         !test_bit(pin, ioapic_sbdf[IO_APIC_ID(apic)].pin_setup) )
+         ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx[pin] >= INTREMAP_ENTRIES )
     {
         ASSERT(saved_mask);
         __io_apic_write(apic, reg, value);
@@ -262,14 +282,19 @@ void amd_iommu_ioapic_update_ire(
     }
 
     /* Update interrupt remapping entry */
-    update_intremap_entry_from_ioapic(
-        bdf, iommu, &new_rte,
-        test_and_set_bit(pin,
-                         ioapic_sbdf[IO_APIC_ID(apic)].pin_setup) ? &old_rte
-                                                                  : NULL);
+    rc = update_intremap_entry_from_ioapic(
+             bdf, iommu, &new_rte,
+             &ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx[pin]);
 
-    /* Forward write access to IO-APIC RTE */
-    __io_apic_write(apic, reg, value);
+    __io_apic_write(apic, reg, ((u32 *)&new_rte)[reg != rte_lo]);
+
+    if ( rc )
+    {
+        /* Keep the entry masked. */
+        printk(XENLOG_ERR "Remapping IO-APIC %#x pin %u failed (%d)\n",
+               IO_APIC_ID(apic), pin, rc);
+        return;
+    }
 
     /* For lower bits access, return directly to avoid double writes */
     if ( reg == rte_lo )
@@ -283,16 +308,41 @@ void amd_iommu_ioapic_update_ire(
     }
 }
 
-static void update_intremap_entry_from_msi_msg(
+unsigned int amd_iommu_read_ioapic_from_ire(
+    unsigned int apic, unsigned int reg)
+{
+    unsigned int val = __io_apic_read(apic, reg);
+
+    if ( !(reg & 1) )
+    {
+        unsigned int offset = val & (INTREMAP_ENTRIES - 1);
+        u16 bdf = ioapic_sbdf[IO_APIC_ID(apic)].bdf;
+        u16 seg = ioapic_sbdf[IO_APIC_ID(apic)].seg;
+        u16 req_id = get_intremap_requestor_id(seg, bdf);
+        const u32 *entry = get_intremap_entry(seg, req_id, offset);
+
+        val &= ~(INTREMAP_ENTRIES - 1);
+        val |= get_field_from_reg_u32(*entry,
+                                      INT_REMAP_ENTRY_INTTYPE_MASK,
+                                      INT_REMAP_ENTRY_INTTYPE_SHIFT) << 8;
+        val |= get_field_from_reg_u32(*entry,
+                                      INT_REMAP_ENTRY_VECTOR_MASK,
+                                      INT_REMAP_ENTRY_VECTOR_SHIFT);
+    }
+
+    return val;
+}
+
+static int update_intremap_entry_from_msi_msg(
     struct amd_iommu *iommu, u16 bdf,
-    int *remap_index, const struct msi_msg *msg)
+    int *remap_index, const struct msi_msg *msg, u32 *data)
 {
     unsigned long flags;
     u32* entry;
     u16 req_id, alias_id;
     u8 delivery_mode, dest, vector, dest_mode;
     spinlock_t *lock;
-    int offset;
+    unsigned int offset;
 
     req_id = get_dma_requestor_id(iommu->seg, bdf);
     alias_id = get_intremap_requestor_id(iommu->seg, bdf);
@@ -303,15 +353,6 @@ static void update_intremap_entry_from_m
         spin_lock_irqsave(lock, flags);
         free_intremap_entry(iommu->seg, req_id, *remap_index);
         spin_unlock_irqrestore(lock, flags);
-
-        if ( ( req_id != alias_id ) &&
-             get_ivrs_mappings(iommu->seg)[alias_id].intremap_table != NULL )
-        {
-            lock = get_intremap_lock(iommu->seg, alias_id);
-            spin_lock_irqsave(lock, flags);
-            free_intremap_entry(iommu->seg, alias_id, *remap_index);
-            spin_unlock_irqrestore(lock, flags);
-        }
         goto done;
     }
 
@@ -322,16 +363,24 @@ static void update_intremap_entry_from_m
     delivery_mode = (msg->data >> MSI_DATA_DELIVERY_MODE_SHIFT) & 0x1;
     vector = (msg->data >> MSI_DATA_VECTOR_SHIFT) & MSI_DATA_VECTOR_MASK;
     dest = (msg->address_lo >> MSI_ADDR_DEST_ID_SHIFT) & 0xff;
-    offset = get_intremap_offset(vector, delivery_mode);
-    if ( *remap_index < 0)
+    offset = *remap_index;
+    if ( offset >= INTREMAP_ENTRIES )
+    {
+        offset = alloc_intremap_entry(iommu->seg, bdf);
+        if ( offset >= INTREMAP_ENTRIES )
+        {
+            spin_unlock_irqrestore(lock, flags);
+            return -ENOSPC;
+        }
         *remap_index = offset;
-    else
-        BUG_ON(*remap_index != offset);
+    }
 
-    entry = (u32*)get_intremap_entry(iommu->seg, req_id, offset);
+    entry = get_intremap_entry(iommu->seg, req_id, offset);
     update_intremap_entry(entry, vector, delivery_mode, dest_mode, dest);
     spin_unlock_irqrestore(lock, flags);
 
+    *data = (msg->data & ~(INTREMAP_ENTRIES - 1)) | offset;
+
     /*
      * In some special cases, a pci-e device(e.g SATA controller in IDE mode)
      * will use alias id to index interrupt remapping table.
@@ -343,10 +392,8 @@ static void update_intremap_entry_from_m
     if ( ( req_id != alias_id ) &&
          get_ivrs_mappings(iommu->seg)[alias_id].intremap_table != NULL )
     {
-        spin_lock_irqsave(lock, flags);
-        entry = (u32*)get_intremap_entry(iommu->seg, alias_id, offset);
-        update_intremap_entry(entry, vector, delivery_mode, dest_mode, dest);
-        spin_unlock_irqrestore(lock, flags);
+        BUG_ON(get_ivrs_mappings(iommu->seg)[req_id].intremap_table !=
+               get_ivrs_mappings(iommu->seg)[alias_id].intremap_table);
     }
 
 done:
@@ -358,19 +405,22 @@ done:
             amd_iommu_flush_intremap(iommu, alias_id);
         spin_unlock_irqrestore(&iommu->lock, flags);
     }
+
+    return 0;
 }
 
 static struct amd_iommu *_find_iommu_for_device(int seg, int bdf)
 {
-    struct amd_iommu *iommu = find_iommu_for_device(seg, bdf);
-
-    if ( iommu )
-        return iommu;
+    struct amd_iommu *iommu;
 
     list_for_each_entry ( iommu, &amd_iommu_head, list )
         if ( iommu->seg == seg && iommu->bdf == bdf )
             return NULL;
 
+    iommu = find_iommu_for_device(seg, bdf);
+    if ( iommu )
+        return iommu;
+
     AMD_IOMMU_DEBUG("No IOMMU for MSI dev = %04x:%02x:%02x.%u\n",
                     seg, PCI_BUS(bdf), PCI_SLOT(bdf), PCI_FUNC(bdf));
     return ERR_PTR(-EINVAL);
@@ -380,8 +430,9 @@ int amd_iommu_msi_msg_update_ire(
     struct msi_desc *msi_desc, struct msi_msg *msg)
 {
     struct pci_dev *pdev = msi_desc->dev;
-    int bdf, seg;
+    int bdf, seg, rc;
     struct amd_iommu *iommu;
+    u32 data;
 
     bdf = pdev ? PCI_BDF2(pdev->bus, pdev->devfn) : hpet_sbdf.bdf;
     seg = pdev ? pdev->seg : hpet_sbdf.seg;
@@ -390,11 +441,12 @@ int amd_iommu_msi_msg_update_ire(
     if ( IS_ERR_OR_NULL(iommu) )
         return PTR_ERR(iommu);
 
-    if ( msi_desc->remap_index >= 0 )
+    if ( msi_desc->remap_index >= 0 && !msg )
     {
         do {
             update_intremap_entry_from_msi_msg(iommu, bdf,
-                                               &msi_desc->remap_index, NULL);
+                                               &msi_desc->remap_index,
+                                               NULL, NULL);
             if ( !pdev || !pdev->phantom_stride )
                 break;
             bdf += pdev->phantom_stride;
@@ -409,19 +461,39 @@ int amd_iommu_msi_msg_update_ire(
         return 0;
 
     do {
-        update_intremap_entry_from_msi_msg(iommu, bdf, &msi_desc->remap_index,
-                                           msg);
-        if ( !pdev || !pdev->phantom_stride )
+        rc = update_intremap_entry_from_msi_msg(iommu, bdf,
+                                                &msi_desc->remap_index,
+                                                msg, &data);
+        if ( rc || !pdev || !pdev->phantom_stride )
             break;
         bdf += pdev->phantom_stride;
     } while ( PCI_SLOT(bdf) == PCI_SLOT(pdev->devfn) );
 
-    return 0;
+    msg->data = data;
+    return rc;
 }
 
 void amd_iommu_read_msi_from_ire(
     struct msi_desc *msi_desc, struct msi_msg *msg)
 {
+    unsigned int offset = msg->data & (INTREMAP_ENTRIES - 1);
+    const struct pci_dev *pdev = msi_desc->dev;
+    u16 bdf = pdev ? PCI_BDF2(pdev->bus, pdev->devfn) : hpet_sbdf.bdf;
+    u16 seg = pdev ? pdev->seg : hpet_sbdf.seg;
+    const u32 *entry;
+
+    if ( IS_ERR_OR_NULL(_find_iommu_for_device(seg, bdf)) )
+        return;
+
+    entry = get_intremap_entry(seg, get_dma_requestor_id(seg, bdf), offset);
+
+    msg->data &= ~(INTREMAP_ENTRIES - 1);
+    msg->data |= get_field_from_reg_u32(*entry,
+                                        INT_REMAP_ENTRY_INTTYPE_MASK,
+                                        INT_REMAP_ENTRY_INTTYPE_SHIFT) << 8;
+    msg->data |= get_field_from_reg_u32(*entry,
+                                        INT_REMAP_ENTRY_VECTOR_MASK,
+                                        INT_REMAP_ENTRY_VECTOR_SHIFT);
 }
 
 int __init amd_iommu_free_intremap_table(
@@ -438,12 +510,14 @@ int __init amd_iommu_free_intremap_table
     return 0;
 }
 
-void* __init amd_iommu_alloc_intremap_table(void)
+void* __init amd_iommu_alloc_intremap_table(unsigned long **inuse_map)
 {
     void *tb;
     tb = __alloc_amd_iommu_tables(INTREMAP_TABLE_ORDER);
     BUG_ON(tb == NULL);
     memset(tb, 0, PAGE_SIZE * (1UL << INTREMAP_TABLE_ORDER));
+    *inuse_map = xzalloc_array(unsigned long, BITS_TO_LONGS(INTREMAP_ENTRIES));
+    BUG_ON(*inuse_map == NULL);
     return tb;
 }
 
--- a/xen/drivers/passthrough/amd/pci_amd_iommu.c
+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c
@@ -622,7 +622,7 @@ const struct iommu_ops amd_iommu_ops = {
     .get_device_group_id = amd_iommu_group_id,
     .update_ire_from_apic = amd_iommu_ioapic_update_ire,
     .update_ire_from_msi = amd_iommu_msi_msg_update_ire,
-    .read_apic_from_ire = __io_apic_read,
+    .read_apic_from_ire = amd_iommu_read_ioapic_from_ire,
     .read_msi_from_ire = amd_iommu_read_msi_from_ire,
     .setup_hpet_msi = amd_setup_hpet_msi,
     .suspend = amd_iommu_suspend,
--- a/xen/include/asm-x86/amd-iommu.h
+++ b/xen/include/asm-x86/amd-iommu.h
@@ -119,6 +119,7 @@ struct ivrs_mappings {
 
     /* per device interrupt remapping table */
     void *intremap_table;
+    unsigned long *intremap_inuse;
     spinlock_t intremap_lock;
 
     /* ivhd device data settings */
--- a/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h
+++ b/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h
@@ -458,10 +458,6 @@
 #define MAX_AMD_IOMMUS                  32
 
 /* interrupt remapping table */
-#define INT_REMAP_INDEX_DM_MASK         0x1C00
-#define INT_REMAP_INDEX_DM_SHIFT        10
-#define INT_REMAP_INDEX_VECTOR_MASK     0x3FC
-#define INT_REMAP_INDEX_VECTOR_SHIFT    2
 #define INT_REMAP_ENTRY_REMAPEN_MASK    0x00000001
 #define INT_REMAP_ENTRY_REMAPEN_SHIFT   0
 #define INT_REMAP_ENTRY_SUPIOPF_MASK    0x00000002
--- a/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
+++ b/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
@@ -89,10 +89,12 @@ struct amd_iommu *find_iommu_for_device(
 
 /* interrupt remapping */
 int amd_iommu_setup_ioapic_remapping(void);
-void *amd_iommu_alloc_intremap_table(void);
+void *amd_iommu_alloc_intremap_table(unsigned long **);
 int amd_iommu_free_intremap_table(u16 seg, struct ivrs_mappings *);
 void amd_iommu_ioapic_update_ire(
     unsigned int apic, unsigned int reg, unsigned int value);
+unsigned int amd_iommu_read_ioapic_from_ire(
+    unsigned int apic, unsigned int reg);
 int amd_iommu_msi_msg_update_ire(
     struct msi_desc *msi_desc, struct msi_msg *msg);
 void amd_iommu_read_msi_from_ire(
@@ -101,15 +103,17 @@ int amd_setup_hpet_msi(struct msi_desc *
 
 extern struct ioapic_sbdf {
     u16 bdf, seg;
-    unsigned long *pin_setup;
+    u16 *pin_2_idx;
 } ioapic_sbdf[MAX_IO_APICS];
-extern void *shared_intremap_table;
 
 extern struct hpet_sbdf {
     u16 bdf, seg, id;
     struct amd_iommu *iommu;
 } hpet_sbdf;
 
+extern void *shared_intremap_table;
+extern unsigned long *shared_intremap_inuse;
+
 /* power management support */
 void amd_iommu_resume(void);
 void amd_iommu_suspend(void);



[-- Attachment #2: AMD-IOMMU-irte-alloc.patch --]
[-- Type: text/plain, Size: 22495 bytes --]

AMD IOMMU: allocate IRTE entries instead of using a static mapping

For multi-vector MSI, where we surely don't want to allocate
contiguous vectors and be able to set affinities of the individual
vectors separately, we need to drop the use of the tuple of vector and
delivery mode to determine the IRTE to use, and instead allocate IRTEs
(which imo should have been done from the beginning).

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
One thing I surely need confirmation on is whether this

        BUG_ON(get_ivrs_mappings(iommu->seg)[req_id].intremap_table !=
               get_ivrs_mappings(iommu->seg)[alias_id].intremap_table);

in update_intremap_entry_from_msi_msg() is valid. If it isn't, it's not
clear to me how to properly set up things for affected devices, as we
would need an identical index allocated for two different remap table
instances (which can hardly be expected to work out well).

--- a/xen/drivers/passthrough/amd/iommu_acpi.c
+++ b/xen/drivers/passthrough/amd/iommu_acpi.c
@@ -72,12 +72,15 @@ static void __init add_ivrs_mapping_entr
          /* allocate per-device interrupt remapping table */
          if ( amd_iommu_perdev_intremap )
              ivrs_mappings[alias_id].intremap_table =
-                amd_iommu_alloc_intremap_table();
+                amd_iommu_alloc_intremap_table(
+                    &ivrs_mappings[alias_id].intremap_inuse);
          else
          {
              if ( shared_intremap_table == NULL  )
-                 shared_intremap_table = amd_iommu_alloc_intremap_table();
+                 shared_intremap_table = amd_iommu_alloc_intremap_table(
+                     &shared_intremap_inuse);
              ivrs_mappings[alias_id].intremap_table = shared_intremap_table;
+             ivrs_mappings[alias_id].intremap_inuse = shared_intremap_inuse;
          }
     }
     /* assgin iommu hardware */
@@ -671,7 +674,7 @@ static u16 __init parse_ivhd_device_spec
             if ( IO_APIC_ID(apic) != special->handle )
                 continue;
 
-            if ( ioapic_sbdf[special->handle].pin_setup )
+            if ( ioapic_sbdf[special->handle].pin_2_idx )
             {
                 if ( ioapic_sbdf[special->handle].bdf == bdf &&
                      ioapic_sbdf[special->handle].seg == seg )
@@ -691,14 +694,16 @@ static u16 __init parse_ivhd_device_spec
                 ioapic_sbdf[special->handle].bdf = bdf;
                 ioapic_sbdf[special->handle].seg = seg;
 
-                ioapic_sbdf[special->handle].pin_setup = xzalloc_array(
-                    unsigned long, BITS_TO_LONGS(nr_ioapic_entries[apic]));
+                ioapic_sbdf[special->handle].pin_2_idx = xmalloc_array(
+                    u16, nr_ioapic_entries[apic]);
                 if ( nr_ioapic_entries[apic] &&
-                     !ioapic_sbdf[IO_APIC_ID(apic)].pin_setup )
+                     !ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx )
                 {
                     printk(XENLOG_ERR "IVHD Error: Out of memory\n");
                     return 0;
                 }
+                memset(ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx, -1,
+                       nr_ioapic_entries[apic]);
             }
             break;
         }
@@ -926,7 +931,7 @@ static int __init parse_ivrs_table(struc
     for ( apic = 0; !error && iommu_intremap && apic < nr_ioapics; ++apic )
     {
         if ( !nr_ioapic_entries[apic] ||
-             ioapic_sbdf[IO_APIC_ID(apic)].pin_setup )
+             ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx )
             continue;
 
         printk(XENLOG_ERR "IVHD Error: no information for IO-APIC %#x\n",
@@ -935,13 +940,15 @@ static int __init parse_ivrs_table(struc
             error = -ENXIO;
         else
         {
-            ioapic_sbdf[IO_APIC_ID(apic)].pin_setup = xzalloc_array(
-                unsigned long, BITS_TO_LONGS(nr_ioapic_entries[apic]));
-            if ( !ioapic_sbdf[IO_APIC_ID(apic)].pin_setup )
+            ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx = xmalloc_array(
+                u16, nr_ioapic_entries[apic]);
+            if ( !ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx )
             {
                 printk(XENLOG_ERR "IVHD Error: Out of memory\n");
                 error = -ENOMEM;
             }
+            memset(ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx, -1,
+                   nr_ioapic_entries[apic]);
         }
     }
 
--- a/xen/drivers/passthrough/amd/iommu_intr.c
+++ b/xen/drivers/passthrough/amd/iommu_intr.c
@@ -31,6 +31,7 @@
 struct ioapic_sbdf ioapic_sbdf[MAX_IO_APICS];
 struct hpet_sbdf hpet_sbdf;
 void *shared_intremap_table;
+unsigned long *shared_intremap_inuse;
 static DEFINE_SPINLOCK(shared_intremap_lock);
 
 static spinlock_t* get_intremap_lock(int seg, int req_id)
@@ -46,30 +47,31 @@ static int get_intremap_requestor_id(int
     return get_ivrs_mappings(seg)[bdf].dte_requestor_id;
 }
 
-static int get_intremap_offset(u8 vector, u8 dm)
+static unsigned int alloc_intremap_entry(int seg, int bdf)
 {
-    int offset = 0;
-    offset = (dm << INT_REMAP_INDEX_DM_SHIFT) & INT_REMAP_INDEX_DM_MASK;
-    offset |= (vector << INT_REMAP_INDEX_VECTOR_SHIFT ) & 
-        INT_REMAP_INDEX_VECTOR_MASK;
-    return offset;
+    unsigned long *inuse = get_ivrs_mappings(seg)[bdf].intremap_inuse;
+    unsigned int slot = find_first_zero_bit(inuse, INTREMAP_ENTRIES);
+
+    if ( slot < INTREMAP_ENTRIES )
+        __set_bit(slot, inuse);
+    return slot;
 }
 
-static u8 *get_intremap_entry(int seg, int bdf, int offset)
+static u32 *get_intremap_entry(int seg, int bdf, int offset)
 {
-    u8 *table;
+    u32 *table = get_ivrs_mappings(seg)[bdf].intremap_table;
 
-    table = (u8*)get_ivrs_mappings(seg)[bdf].intremap_table;
     ASSERT( (table != NULL) && (offset < INTREMAP_ENTRIES) );
 
-    return (u8*) (table + offset);
+    return table + offset;
 }
 
 static void free_intremap_entry(int seg, int bdf, int offset)
 {
-    u32* entry;
-    entry = (u32*)get_intremap_entry(seg, bdf, offset);
+    u32 *entry = get_intremap_entry(seg, bdf, offset);
+
     memset(entry, 0, sizeof(u32));
+    __clear_bit(offset, get_ivrs_mappings(seg)[bdf].intremap_inuse);
 }
 
 static void update_intremap_entry(u32* entry, u8 vector, u8 int_type,
@@ -98,18 +100,24 @@ static void update_intremap_entry(u32* e
                             INT_REMAP_ENTRY_VECTOR_SHIFT, entry);
 }
 
-static void update_intremap_entry_from_ioapic(
+static void set_rte_index(struct IO_APIC_route_entry *rte, int offset)
+{
+    rte->vector = (u8)offset;
+    rte->delivery_mode = offset >> 8;
+}
+
+static int update_intremap_entry_from_ioapic(
     int bdf,
     struct amd_iommu *iommu,
-    const struct IO_APIC_route_entry *rte,
-    const struct IO_APIC_route_entry *old_rte)
+    struct IO_APIC_route_entry *rte,
+    u16 *index)
 {
     unsigned long flags;
     u32* entry;
     u8 delivery_mode, dest, vector, dest_mode;
     int req_id;
     spinlock_t *lock;
-    int offset;
+    unsigned int offset;
 
     req_id = get_intremap_requestor_id(iommu->seg, bdf);
     lock = get_intremap_lock(iommu->seg, req_id);
@@ -121,16 +129,20 @@ static void update_intremap_entry_from_i
 
     spin_lock_irqsave(lock, flags);
 
-    offset = get_intremap_offset(vector, delivery_mode);
-    if ( old_rte )
+    offset = *index;
+    if ( offset >= INTREMAP_ENTRIES )
     {
-        int old_offset = get_intremap_offset(old_rte->vector,
-                                             old_rte->delivery_mode);
-
-        if ( offset != old_offset )
-            free_intremap_entry(iommu->seg, bdf, old_offset);
+        offset = alloc_intremap_entry(iommu->seg, req_id);
+        if ( offset >= INTREMAP_ENTRIES )
+        {
+            spin_unlock_irqrestore(lock, flags);
+            rte->mask = 1;
+            return -ENOSPC;
+        }
+        *index = offset;
     }
-    entry = (u32*)get_intremap_entry(iommu->seg, req_id, offset);
+
+    entry = get_intremap_entry(iommu->seg, req_id, offset);
     update_intremap_entry(entry, vector, delivery_mode, dest_mode, dest);
 
     spin_unlock_irqrestore(lock, flags);
@@ -141,6 +153,10 @@ static void update_intremap_entry_from_i
         amd_iommu_flush_intremap(iommu, req_id);
         spin_unlock_irqrestore(&iommu->lock, flags);
     }
+
+    set_rte_index(rte, offset);
+
+    return 0;
 }
 
 int __init amd_iommu_setup_ioapic_remapping(void)
@@ -153,7 +169,7 @@ int __init amd_iommu_setup_ioapic_remapp
     u16 seg, bdf, req_id;
     struct amd_iommu *iommu;
     spinlock_t *lock;
-    int offset;
+    unsigned int offset;
 
     /* Read ioapic entries and update interrupt remapping table accordingly */
     for ( apic = 0; apic < nr_ioapics; apic++ )
@@ -184,19 +200,23 @@ int __init amd_iommu_setup_ioapic_remapp
             dest = rte.dest.logical.logical_dest;
 
             spin_lock_irqsave(lock, flags);
-            offset = get_intremap_offset(vector, delivery_mode);
-            entry = (u32*)get_intremap_entry(iommu->seg, req_id, offset);
+            offset = alloc_intremap_entry(seg, req_id);
+            BUG_ON(offset >= INTREMAP_ENTRIES);
+            entry = get_intremap_entry(iommu->seg, req_id, offset);
             update_intremap_entry(entry, vector,
                                   delivery_mode, dest_mode, dest);
             spin_unlock_irqrestore(lock, flags);
 
+            set_rte_index(&rte, offset);
+            ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx[pin] = offset;
+            __ioapic_write_entry(apic, pin, 1, rte);
+
             if ( iommu->enabled )
             {
                 spin_lock_irqsave(&iommu->lock, flags);
                 amd_iommu_flush_intremap(iommu, req_id);
                 spin_unlock_irqrestore(&iommu->lock, flags);
             }
-            set_bit(pin, ioapic_sbdf[IO_APIC_ID(apic)].pin_setup);
         }
     }
     return 0;
@@ -209,7 +229,7 @@ void amd_iommu_ioapic_update_ire(
     struct IO_APIC_route_entry new_rte = { 0 };
     unsigned int rte_lo = (reg & 1) ? reg - 1 : reg;
     unsigned int pin = (reg - 0x10) / 2;
-    int saved_mask, seg, bdf;
+    int saved_mask, seg, bdf, rc;
     struct amd_iommu *iommu;
 
     if ( !iommu_intremap )
@@ -247,7 +267,7 @@ void amd_iommu_ioapic_update_ire(
     }
 
     if ( new_rte.mask &&
-         !test_bit(pin, ioapic_sbdf[IO_APIC_ID(apic)].pin_setup) )
+         ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx[pin] >= INTREMAP_ENTRIES )
     {
         ASSERT(saved_mask);
         __io_apic_write(apic, reg, value);
@@ -262,14 +282,19 @@ void amd_iommu_ioapic_update_ire(
     }
 
     /* Update interrupt remapping entry */
-    update_intremap_entry_from_ioapic(
-        bdf, iommu, &new_rte,
-        test_and_set_bit(pin,
-                         ioapic_sbdf[IO_APIC_ID(apic)].pin_setup) ? &old_rte
-                                                                  : NULL);
+    rc = update_intremap_entry_from_ioapic(
+             bdf, iommu, &new_rte,
+             &ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx[pin]);
 
-    /* Forward write access to IO-APIC RTE */
-    __io_apic_write(apic, reg, value);
+    __io_apic_write(apic, reg, ((u32 *)&new_rte)[reg != rte_lo]);
+
+    if ( rc )
+    {
+        /* Keep the entry masked. */
+        printk(XENLOG_ERR "Remapping IO-APIC %#x pin %u failed (%d)\n",
+               IO_APIC_ID(apic), pin, rc);
+        return;
+    }
 
     /* For lower bits access, return directly to avoid double writes */
     if ( reg == rte_lo )
@@ -283,16 +308,41 @@ void amd_iommu_ioapic_update_ire(
     }
 }
 
-static void update_intremap_entry_from_msi_msg(
+unsigned int amd_iommu_read_ioapic_from_ire(
+    unsigned int apic, unsigned int reg)
+{
+    unsigned int val = __io_apic_read(apic, reg);
+
+    if ( !(reg & 1) )
+    {
+        unsigned int offset = val & (INTREMAP_ENTRIES - 1);
+        u16 bdf = ioapic_sbdf[IO_APIC_ID(apic)].bdf;
+        u16 seg = ioapic_sbdf[IO_APIC_ID(apic)].seg;
+        u16 req_id = get_intremap_requestor_id(seg, bdf);
+        const u32 *entry = get_intremap_entry(seg, req_id, offset);
+
+        val &= ~(INTREMAP_ENTRIES - 1);
+        val |= get_field_from_reg_u32(*entry,
+                                      INT_REMAP_ENTRY_INTTYPE_MASK,
+                                      INT_REMAP_ENTRY_INTTYPE_SHIFT) << 8;
+        val |= get_field_from_reg_u32(*entry,
+                                      INT_REMAP_ENTRY_VECTOR_MASK,
+                                      INT_REMAP_ENTRY_VECTOR_SHIFT);
+    }
+
+    return val;
+}
+
+static int update_intremap_entry_from_msi_msg(
     struct amd_iommu *iommu, u16 bdf,
-    int *remap_index, const struct msi_msg *msg)
+    int *remap_index, const struct msi_msg *msg, u32 *data)
 {
     unsigned long flags;
     u32* entry;
     u16 req_id, alias_id;
     u8 delivery_mode, dest, vector, dest_mode;
     spinlock_t *lock;
-    int offset;
+    unsigned int offset;
 
     req_id = get_dma_requestor_id(iommu->seg, bdf);
     alias_id = get_intremap_requestor_id(iommu->seg, bdf);
@@ -303,15 +353,6 @@ static void update_intremap_entry_from_m
         spin_lock_irqsave(lock, flags);
         free_intremap_entry(iommu->seg, req_id, *remap_index);
         spin_unlock_irqrestore(lock, flags);
-
-        if ( ( req_id != alias_id ) &&
-             get_ivrs_mappings(iommu->seg)[alias_id].intremap_table != NULL )
-        {
-            lock = get_intremap_lock(iommu->seg, alias_id);
-            spin_lock_irqsave(lock, flags);
-            free_intremap_entry(iommu->seg, alias_id, *remap_index);
-            spin_unlock_irqrestore(lock, flags);
-        }
         goto done;
     }
 
@@ -322,16 +363,24 @@ static void update_intremap_entry_from_m
     delivery_mode = (msg->data >> MSI_DATA_DELIVERY_MODE_SHIFT) & 0x1;
     vector = (msg->data >> MSI_DATA_VECTOR_SHIFT) & MSI_DATA_VECTOR_MASK;
     dest = (msg->address_lo >> MSI_ADDR_DEST_ID_SHIFT) & 0xff;
-    offset = get_intremap_offset(vector, delivery_mode);
-    if ( *remap_index < 0)
+    offset = *remap_index;
+    if ( offset >= INTREMAP_ENTRIES )
+    {
+        offset = alloc_intremap_entry(iommu->seg, bdf);
+        if ( offset >= INTREMAP_ENTRIES )
+        {
+            spin_unlock_irqrestore(lock, flags);
+            return -ENOSPC;
+        }
         *remap_index = offset;
-    else
-        BUG_ON(*remap_index != offset);
+    }
 
-    entry = (u32*)get_intremap_entry(iommu->seg, req_id, offset);
+    entry = get_intremap_entry(iommu->seg, req_id, offset);
     update_intremap_entry(entry, vector, delivery_mode, dest_mode, dest);
     spin_unlock_irqrestore(lock, flags);
 
+    *data = (msg->data & ~(INTREMAP_ENTRIES - 1)) | offset;
+
     /*
      * In some special cases, a pci-e device(e.g SATA controller in IDE mode)
      * will use alias id to index interrupt remapping table.
@@ -343,10 +392,8 @@ static void update_intremap_entry_from_m
     if ( ( req_id != alias_id ) &&
          get_ivrs_mappings(iommu->seg)[alias_id].intremap_table != NULL )
     {
-        spin_lock_irqsave(lock, flags);
-        entry = (u32*)get_intremap_entry(iommu->seg, alias_id, offset);
-        update_intremap_entry(entry, vector, delivery_mode, dest_mode, dest);
-        spin_unlock_irqrestore(lock, flags);
+        BUG_ON(get_ivrs_mappings(iommu->seg)[req_id].intremap_table !=
+               get_ivrs_mappings(iommu->seg)[alias_id].intremap_table);
     }
 
 done:
@@ -358,19 +405,22 @@ done:
             amd_iommu_flush_intremap(iommu, alias_id);
         spin_unlock_irqrestore(&iommu->lock, flags);
     }
+
+    return 0;
 }
 
 static struct amd_iommu *_find_iommu_for_device(int seg, int bdf)
 {
-    struct amd_iommu *iommu = find_iommu_for_device(seg, bdf);
-
-    if ( iommu )
-        return iommu;
+    struct amd_iommu *iommu;
 
     list_for_each_entry ( iommu, &amd_iommu_head, list )
         if ( iommu->seg == seg && iommu->bdf == bdf )
             return NULL;
 
+    iommu = find_iommu_for_device(seg, bdf);
+    if ( iommu )
+        return iommu;
+
     AMD_IOMMU_DEBUG("No IOMMU for MSI dev = %04x:%02x:%02x.%u\n",
                     seg, PCI_BUS(bdf), PCI_SLOT(bdf), PCI_FUNC(bdf));
     return ERR_PTR(-EINVAL);
@@ -380,8 +430,9 @@ int amd_iommu_msi_msg_update_ire(
     struct msi_desc *msi_desc, struct msi_msg *msg)
 {
     struct pci_dev *pdev = msi_desc->dev;
-    int bdf, seg;
+    int bdf, seg, rc;
     struct amd_iommu *iommu;
+    u32 data;
 
     bdf = pdev ? PCI_BDF2(pdev->bus, pdev->devfn) : hpet_sbdf.bdf;
     seg = pdev ? pdev->seg : hpet_sbdf.seg;
@@ -390,11 +441,12 @@ int amd_iommu_msi_msg_update_ire(
     if ( IS_ERR_OR_NULL(iommu) )
         return PTR_ERR(iommu);
 
-    if ( msi_desc->remap_index >= 0 )
+    if ( msi_desc->remap_index >= 0 && !msg )
     {
         do {
             update_intremap_entry_from_msi_msg(iommu, bdf,
-                                               &msi_desc->remap_index, NULL);
+                                               &msi_desc->remap_index,
+                                               NULL, NULL);
             if ( !pdev || !pdev->phantom_stride )
                 break;
             bdf += pdev->phantom_stride;
@@ -409,19 +461,39 @@ int amd_iommu_msi_msg_update_ire(
         return 0;
 
     do {
-        update_intremap_entry_from_msi_msg(iommu, bdf, &msi_desc->remap_index,
-                                           msg);
-        if ( !pdev || !pdev->phantom_stride )
+        rc = update_intremap_entry_from_msi_msg(iommu, bdf,
+                                                &msi_desc->remap_index,
+                                                msg, &data);
+        if ( rc || !pdev || !pdev->phantom_stride )
             break;
         bdf += pdev->phantom_stride;
     } while ( PCI_SLOT(bdf) == PCI_SLOT(pdev->devfn) );
 
-    return 0;
+    msg->data = data;
+    return rc;
 }
 
 void amd_iommu_read_msi_from_ire(
     struct msi_desc *msi_desc, struct msi_msg *msg)
 {
+    unsigned int offset = msg->data & (INTREMAP_ENTRIES - 1);
+    const struct pci_dev *pdev = msi_desc->dev;
+    u16 bdf = pdev ? PCI_BDF2(pdev->bus, pdev->devfn) : hpet_sbdf.bdf;
+    u16 seg = pdev ? pdev->seg : hpet_sbdf.seg;
+    const u32 *entry;
+
+    if ( IS_ERR_OR_NULL(_find_iommu_for_device(seg, bdf)) )
+        return;
+
+    entry = get_intremap_entry(seg, get_dma_requestor_id(seg, bdf), offset);
+
+    msg->data &= ~(INTREMAP_ENTRIES - 1);
+    msg->data |= get_field_from_reg_u32(*entry,
+                                        INT_REMAP_ENTRY_INTTYPE_MASK,
+                                        INT_REMAP_ENTRY_INTTYPE_SHIFT) << 8;
+    msg->data |= get_field_from_reg_u32(*entry,
+                                        INT_REMAP_ENTRY_VECTOR_MASK,
+                                        INT_REMAP_ENTRY_VECTOR_SHIFT);
 }
 
 int __init amd_iommu_free_intremap_table(
@@ -438,12 +510,14 @@ int __init amd_iommu_free_intremap_table
     return 0;
 }
 
-void* __init amd_iommu_alloc_intremap_table(void)
+void* __init amd_iommu_alloc_intremap_table(unsigned long **inuse_map)
 {
     void *tb;
     tb = __alloc_amd_iommu_tables(INTREMAP_TABLE_ORDER);
     BUG_ON(tb == NULL);
     memset(tb, 0, PAGE_SIZE * (1UL << INTREMAP_TABLE_ORDER));
+    *inuse_map = xzalloc_array(unsigned long, BITS_TO_LONGS(INTREMAP_ENTRIES));
+    BUG_ON(*inuse_map == NULL);
     return tb;
 }
 
--- a/xen/drivers/passthrough/amd/pci_amd_iommu.c
+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c
@@ -622,7 +622,7 @@ const struct iommu_ops amd_iommu_ops = {
     .get_device_group_id = amd_iommu_group_id,
     .update_ire_from_apic = amd_iommu_ioapic_update_ire,
     .update_ire_from_msi = amd_iommu_msi_msg_update_ire,
-    .read_apic_from_ire = __io_apic_read,
+    .read_apic_from_ire = amd_iommu_read_ioapic_from_ire,
     .read_msi_from_ire = amd_iommu_read_msi_from_ire,
     .setup_hpet_msi = amd_setup_hpet_msi,
     .suspend = amd_iommu_suspend,
--- a/xen/include/asm-x86/amd-iommu.h
+++ b/xen/include/asm-x86/amd-iommu.h
@@ -119,6 +119,7 @@ struct ivrs_mappings {
 
     /* per device interrupt remapping table */
     void *intremap_table;
+    unsigned long *intremap_inuse;
     spinlock_t intremap_lock;
 
     /* ivhd device data settings */
--- a/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h
+++ b/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h
@@ -458,10 +458,6 @@
 #define MAX_AMD_IOMMUS                  32
 
 /* interrupt remapping table */
-#define INT_REMAP_INDEX_DM_MASK         0x1C00
-#define INT_REMAP_INDEX_DM_SHIFT        10
-#define INT_REMAP_INDEX_VECTOR_MASK     0x3FC
-#define INT_REMAP_INDEX_VECTOR_SHIFT    2
 #define INT_REMAP_ENTRY_REMAPEN_MASK    0x00000001
 #define INT_REMAP_ENTRY_REMAPEN_SHIFT   0
 #define INT_REMAP_ENTRY_SUPIOPF_MASK    0x00000002
--- a/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
+++ b/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
@@ -89,10 +89,12 @@ struct amd_iommu *find_iommu_for_device(
 
 /* interrupt remapping */
 int amd_iommu_setup_ioapic_remapping(void);
-void *amd_iommu_alloc_intremap_table(void);
+void *amd_iommu_alloc_intremap_table(unsigned long **);
 int amd_iommu_free_intremap_table(u16 seg, struct ivrs_mappings *);
 void amd_iommu_ioapic_update_ire(
     unsigned int apic, unsigned int reg, unsigned int value);
+unsigned int amd_iommu_read_ioapic_from_ire(
+    unsigned int apic, unsigned int reg);
 int amd_iommu_msi_msg_update_ire(
     struct msi_desc *msi_desc, struct msi_msg *msg);
 void amd_iommu_read_msi_from_ire(
@@ -101,15 +103,17 @@ int amd_setup_hpet_msi(struct msi_desc *
 
 extern struct ioapic_sbdf {
     u16 bdf, seg;
-    unsigned long *pin_setup;
+    u16 *pin_2_idx;
 } ioapic_sbdf[MAX_IO_APICS];
-extern void *shared_intremap_table;
 
 extern struct hpet_sbdf {
     u16 bdf, seg, id;
     struct amd_iommu *iommu;
 } hpet_sbdf;
 
+extern void *shared_intremap_table;
+extern unsigned long *shared_intremap_inuse;
+
 /* power management support */
 void amd_iommu_resume(void);
 void amd_iommu_suspend(void);

[-- Attachment #3: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH 2/6] AMD IOMMU: untie remap and vector maps
  2013-04-19 10:50 [PATCH 0/6] x86/IOMMU: multi-vector MSI Jan Beulich
  2013-04-19 10:57 ` [PATCH 1/6] AMD IOMMU: allocate IRTE entries instead of using a static mapping Jan Beulich
@ 2013-04-19 10:57 ` Jan Beulich
  2013-04-19 10:58 ` [PATCH 3/6] VT-d: enable for multi‑vector MSI Jan Beulich
                   ` (4 subsequent siblings)
  6 siblings, 0 replies; 20+ messages in thread
From: Jan Beulich @ 2013-04-19 10:57 UTC (permalink / raw)
  To: xen-devel
  Cc: Jacob Shin, xiantao.zhang, suravee.suthikulpanit, Konrad Rzeszutek Wilk

[-- Attachment #1: Type: text/plain, Size: 1703 bytes --]

With the specific IRTEs used for an interrupt no longer depending on
the vector, there's no need to tie the remap sharing model to the
vector sharing one.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: George Dunlap <george.dunlap@eu.citrix.com>

--- a/xen/drivers/passthrough/amd/pci_amd_iommu.c
+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c
@@ -207,35 +207,6 @@ int __init amd_iov_detect(void)
 
     init_done = 1;
 
-    /*
-     * AMD IOMMUs don't distinguish between vectors destined for
-     * different cpus when doing interrupt remapping.  This means
-     * that interrupts going through the same intremap table
-     * can't share the same vector.
-     *
-     * If irq_vector_map isn't specified, choose a sensible default:
-     * - If we're using per-device interemap tables, per-device
-     *   vector non-sharing maps
-     * - If we're using a global interemap table, global vector
-     *   non-sharing map
-     */
-    if ( opt_irq_vector_map == OPT_IRQ_VECTOR_MAP_DEFAULT )
-    {
-        if ( amd_iommu_perdev_intremap )
-        {
-            printk("AMD-Vi: Enabling per-device vector maps\n");
-            opt_irq_vector_map = OPT_IRQ_VECTOR_MAP_PERDEV;
-        }
-        else
-        {
-            printk("AMD-Vi: Enabling global vector map\n");
-            opt_irq_vector_map = OPT_IRQ_VECTOR_MAP_GLOBAL;
-        }
-    }
-    else
-    {
-        printk("AMD-Vi: Not overriding irq_vector_map setting\n");
-    }
     if ( !amd_iommu_perdev_intremap )
         printk(XENLOG_WARNING "AMD-Vi: Using global interrupt remap table is not recommended (see XSA-36)!\n");
     return scan_pci_devices();




[-- Attachment #2: AMD-IOMMU-untie-remap-and-vector-maps.patch --]
[-- Type: text/plain, Size: 1739 bytes --]

AMD IOMMU: untie remap and vector maps

With the specific IRTEs used for an interrupt no longer depending on
the vector, there's no need to tie the remap sharing model to the
vector sharing one.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: George Dunlap <george.dunlap@eu.citrix.com>

--- a/xen/drivers/passthrough/amd/pci_amd_iommu.c
+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c
@@ -207,35 +207,6 @@ int __init amd_iov_detect(void)
 
     init_done = 1;
 
-    /*
-     * AMD IOMMUs don't distinguish between vectors destined for
-     * different cpus when doing interrupt remapping.  This means
-     * that interrupts going through the same intremap table
-     * can't share the same vector.
-     *
-     * If irq_vector_map isn't specified, choose a sensible default:
-     * - If we're using per-device interemap tables, per-device
-     *   vector non-sharing maps
-     * - If we're using a global interemap table, global vector
-     *   non-sharing map
-     */
-    if ( opt_irq_vector_map == OPT_IRQ_VECTOR_MAP_DEFAULT )
-    {
-        if ( amd_iommu_perdev_intremap )
-        {
-            printk("AMD-Vi: Enabling per-device vector maps\n");
-            opt_irq_vector_map = OPT_IRQ_VECTOR_MAP_PERDEV;
-        }
-        else
-        {
-            printk("AMD-Vi: Enabling global vector map\n");
-            opt_irq_vector_map = OPT_IRQ_VECTOR_MAP_GLOBAL;
-        }
-    }
-    else
-    {
-        printk("AMD-Vi: Not overriding irq_vector_map setting\n");
-    }
     if ( !amd_iommu_perdev_intremap )
         printk(XENLOG_WARNING "AMD-Vi: Using global interrupt remap table is not recommended (see XSA-36)!\n");
     return scan_pci_devices();

[-- Attachment #3: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH 3/6] VT-d: enable for multi‑vector MSI
  2013-04-19 10:50 [PATCH 0/6] x86/IOMMU: multi-vector MSI Jan Beulich
  2013-04-19 10:57 ` [PATCH 1/6] AMD IOMMU: allocate IRTE entries instead of using a static mapping Jan Beulich
  2013-04-19 10:57 ` [PATCH 2/6] AMD IOMMU: untie remap and vector maps Jan Beulich
@ 2013-04-19 10:58 ` Jan Beulich
  2013-04-19 10:59 ` [PATCH 4/6] AMD IOMMU: enable for multi-vector MSI Jan Beulich
                   ` (3 subsequent siblings)
  6 siblings, 0 replies; 20+ messages in thread
From: Jan Beulich @ 2013-04-19 10:58 UTC (permalink / raw)
  To: xen-devel
  Cc: Jacob Shin, xiantao.zhang, suravee.suthikulpanit, Konrad Rzeszutek Wilk

[-- Attachment #1: Type: text/plain, Size: 6549 bytes --]

The main change being to make alloc_remap_entry() capable of allocating
a block of entries.

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/drivers/passthrough/vtd/intremap.c
+++ b/xen/drivers/passthrough/vtd/intremap.c
@@ -194,18 +194,18 @@ static void free_remap_entry(struct iomm
 }
 
 /*
- * Look for a free intr remap entry.
+ * Look for a free intr remap entry (or a contiguous set thereof).
  * Need hold iremap_lock, and setup returned entry before releasing lock.
  */
-static int alloc_remap_entry(struct iommu *iommu)
+static unsigned int alloc_remap_entry(struct iommu *iommu, unsigned int nr)
 {
     struct iremap_entry *iremap_entries = NULL;
     struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu);
-    int i;
+    unsigned int i, found;
 
     ASSERT( spin_is_locked(&ir_ctrl->iremap_lock) );
 
-    for ( i = 0; i < IREMAP_ENTRY_NR; i++ )
+    for ( found = i = 0; i < IREMAP_ENTRY_NR; i++ )
     {
         struct iremap_entry *p;
         if ( i % (1 << IREMAP_ENTRY_ORDER) == 0 )
@@ -220,7 +220,9 @@ static int alloc_remap_entry(struct iomm
         else
             p = &iremap_entries[i % (1 << IREMAP_ENTRY_ORDER)];
 
-        if ( p->lo_val == 0 && p->hi_val == 0 ) /* a free entry */
+        if ( p->lo_val || p->hi_val ) /* not a free entry */
+            found = 0;
+        else if ( ++found == nr )
             break;
     }
 
@@ -228,7 +230,7 @@ static int alloc_remap_entry(struct iomm
         unmap_vtd_domain_page(iremap_entries);
 
     if ( i < IREMAP_ENTRY_NR ) 
-        ir_ctrl->iremap_num++;
+        ir_ctrl->iremap_num += nr;
     return i;
 }
 
@@ -293,7 +295,7 @@ static int ioapic_rte_to_remap_entry(str
     index = apic_pin_2_ir_idx[apic][ioapic_pin];
     if ( index < 0 )
     {
-        index = alloc_remap_entry(iommu);
+        index = alloc_remap_entry(iommu, 1);
         if ( index < IREMAP_ENTRY_NR )
             apic_pin_2_ir_idx[apic][ioapic_pin] = index;
     }
@@ -485,19 +487,18 @@ static void set_msi_source_id(struct pci
 }
 
 static int remap_entry_to_msi_msg(
-    struct iommu *iommu, struct msi_msg *msg)
+    struct iommu *iommu, struct msi_msg *msg, unsigned int index)
 {
     struct iremap_entry *iremap_entry = NULL, *iremap_entries;
     struct msi_msg_remap_entry *remap_rte;
-    int index;
     unsigned long flags;
     struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu);
 
     remap_rte = (struct msi_msg_remap_entry *) msg;
-    index = (remap_rte->address_lo.index_15 << 15) |
+    index += (remap_rte->address_lo.index_15 << 15) |
              remap_rte->address_lo.index_0_14;
 
-    if ( index < 0 || index > IREMAP_ENTRY_NR - 1 )
+    if ( index >= IREMAP_ENTRY_NR )
     {
         dprintk(XENLOG_ERR VTDPREFIX,
                 "%s: index (%d) for remap table is invalid !\n",
@@ -555,31 +556,29 @@ static int msi_msg_to_remap_entry(
     struct iremap_entry *iremap_entry = NULL, *iremap_entries;
     struct iremap_entry new_ire;
     struct msi_msg_remap_entry *remap_rte;
-    int index;
+    unsigned int index, i, nr = 1;
     unsigned long flags;
     struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu);
 
-    remap_rte = (struct msi_msg_remap_entry *) msg;
+    if ( msi_desc->msi_attrib.type == PCI_CAP_ID_MSI )
+        nr = msi_desc->msi.nvec;
+
     spin_lock_irqsave(&ir_ctrl->iremap_lock, flags);
 
     if ( msg == NULL )
     {
-        /* Free specified unused IRTE */
-        free_remap_entry(iommu, msi_desc->remap_index);
+        /* Free specified unused IRTEs */
+        for ( i = 0; i < nr; ++i )
+            free_remap_entry(iommu, msi_desc->remap_index + i);
         spin_unlock_irqrestore(&ir_ctrl->iremap_lock, flags);
         return 0;
     }
 
     if ( msi_desc->remap_index < 0 )
     {
-        /*
-         * TODO: Multiple-vector MSI requires allocating multiple continuous
-         * entries and configuring addr/data of msi_msg in different way. So
-         * alloca_remap_entry will be changed if enabling multiple-vector MSI
-         * in future.
-         */
-        index = alloc_remap_entry(iommu);
-        msi_desc->remap_index = index;
+        index = alloc_remap_entry(iommu, nr);
+        for ( i = 0; i < nr; ++i )
+            msi_desc[i].remap_index = index + i;
     }
     else
         index = msi_desc->remap_index;
@@ -590,7 +589,8 @@ static int msi_msg_to_remap_entry(
                 "%s: intremap index (%d) is larger than"
                 " the maximum index (%d)!\n",
                 __func__, index, IREMAP_ENTRY_NR - 1);
-        msi_desc->remap_index = -1;
+        for ( i = 0; i < nr; ++i )
+            msi_desc[i].remap_index = -1;
         spin_unlock_irqrestore(&ir_ctrl->iremap_lock, flags);
         return -EFAULT;
     }
@@ -626,14 +626,18 @@ static int msi_msg_to_remap_entry(
     new_ire.lo.p = 1;    /* finally, set present bit */
 
     /* now construct new MSI/MSI-X rte entry */
+    remap_rte = (struct msi_msg_remap_entry *)msg;
     remap_rte->address_lo.dontcare = 0;
-    remap_rte->address_lo.index_15 = (index >> 15) & 0x1;
-    remap_rte->address_lo.index_0_14 = index & 0x7fff;
+    i = index;
+    if ( !nr )
+        i -= msi_desc->msi_attrib.entry_nr;
+    remap_rte->address_lo.index_15 = (i >> 15) & 0x1;
+    remap_rte->address_lo.index_0_14 = i & 0x7fff;
     remap_rte->address_lo.SHV = 1;
     remap_rte->address_lo.format = 1;
 
     remap_rte->address_hi = 0;
-    remap_rte->data = 0;
+    remap_rte->data = index - i;
 
     memcpy(iremap_entry, &new_ire, sizeof(struct iremap_entry));
     iommu_flush_cache_entry(iremap_entry, sizeof(struct iremap_entry));
@@ -654,7 +658,9 @@ void msi_msg_read_remap_rte(
     drhd = pdev ? acpi_find_matched_drhd_unit(pdev)
                 : hpet_to_drhd(msi_desc->hpet_id);
     if ( drhd )
-        remap_entry_to_msi_msg(drhd->iommu, msg);
+        remap_entry_to_msi_msg(drhd->iommu, msg,
+                               msi_desc->msi_attrib.type == PCI_CAP_ID_MSI
+                               ? msi_desc->msi_attrib.entry_nr : 0);
 }
 
 int msi_msg_write_remap_rte(
@@ -680,7 +686,7 @@ int __init intel_setup_hpet_msi(struct m
         return 0;
 
     spin_lock_irqsave(&ir_ctrl->iremap_lock, flags);
-    msi_desc->remap_index = alloc_remap_entry(iommu);
+    msi_desc->remap_index = alloc_remap_entry(iommu, 1);
     if ( msi_desc->remap_index >= IREMAP_ENTRY_NR )
     {
         dprintk(XENLOG_ERR VTDPREFIX,



[-- Attachment #2: VT-d-multi-vector-MSI.patch --]
[-- Type: text/plain, Size: 6582 bytes --]

VT-d: enable for multi-vector MSI

The main change being to make alloc_remap_entry() capable of allocating
a block of entries.

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/drivers/passthrough/vtd/intremap.c
+++ b/xen/drivers/passthrough/vtd/intremap.c
@@ -194,18 +194,18 @@ static void free_remap_entry(struct iomm
 }
 
 /*
- * Look for a free intr remap entry.
+ * Look for a free intr remap entry (or a contiguous set thereof).
  * Need hold iremap_lock, and setup returned entry before releasing lock.
  */
-static int alloc_remap_entry(struct iommu *iommu)
+static unsigned int alloc_remap_entry(struct iommu *iommu, unsigned int nr)
 {
     struct iremap_entry *iremap_entries = NULL;
     struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu);
-    int i;
+    unsigned int i, found;
 
     ASSERT( spin_is_locked(&ir_ctrl->iremap_lock) );
 
-    for ( i = 0; i < IREMAP_ENTRY_NR; i++ )
+    for ( found = i = 0; i < IREMAP_ENTRY_NR; i++ )
     {
         struct iremap_entry *p;
         if ( i % (1 << IREMAP_ENTRY_ORDER) == 0 )
@@ -220,7 +220,9 @@ static int alloc_remap_entry(struct iomm
         else
             p = &iremap_entries[i % (1 << IREMAP_ENTRY_ORDER)];
 
-        if ( p->lo_val == 0 && p->hi_val == 0 ) /* a free entry */
+        if ( p->lo_val || p->hi_val ) /* not a free entry */
+            found = 0;
+        else if ( ++found == nr )
             break;
     }
 
@@ -228,7 +230,7 @@ static int alloc_remap_entry(struct iomm
         unmap_vtd_domain_page(iremap_entries);
 
     if ( i < IREMAP_ENTRY_NR ) 
-        ir_ctrl->iremap_num++;
+        ir_ctrl->iremap_num += nr;
     return i;
 }
 
@@ -293,7 +295,7 @@ static int ioapic_rte_to_remap_entry(str
     index = apic_pin_2_ir_idx[apic][ioapic_pin];
     if ( index < 0 )
     {
-        index = alloc_remap_entry(iommu);
+        index = alloc_remap_entry(iommu, 1);
         if ( index < IREMAP_ENTRY_NR )
             apic_pin_2_ir_idx[apic][ioapic_pin] = index;
     }
@@ -485,19 +487,18 @@ static void set_msi_source_id(struct pci
 }
 
 static int remap_entry_to_msi_msg(
-    struct iommu *iommu, struct msi_msg *msg)
+    struct iommu *iommu, struct msi_msg *msg, unsigned int index)
 {
     struct iremap_entry *iremap_entry = NULL, *iremap_entries;
     struct msi_msg_remap_entry *remap_rte;
-    int index;
     unsigned long flags;
     struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu);
 
     remap_rte = (struct msi_msg_remap_entry *) msg;
-    index = (remap_rte->address_lo.index_15 << 15) |
+    index += (remap_rte->address_lo.index_15 << 15) |
              remap_rte->address_lo.index_0_14;
 
-    if ( index < 0 || index > IREMAP_ENTRY_NR - 1 )
+    if ( index >= IREMAP_ENTRY_NR )
     {
         dprintk(XENLOG_ERR VTDPREFIX,
                 "%s: index (%d) for remap table is invalid !\n",
@@ -555,31 +556,29 @@ static int msi_msg_to_remap_entry(
     struct iremap_entry *iremap_entry = NULL, *iremap_entries;
     struct iremap_entry new_ire;
     struct msi_msg_remap_entry *remap_rte;
-    int index;
+    unsigned int index, i, nr = 1;
     unsigned long flags;
     struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu);
 
-    remap_rte = (struct msi_msg_remap_entry *) msg;
+    if ( msi_desc->msi_attrib.type == PCI_CAP_ID_MSI )
+        nr = msi_desc->msi.nvec;
+
     spin_lock_irqsave(&ir_ctrl->iremap_lock, flags);
 
     if ( msg == NULL )
     {
-        /* Free specified unused IRTE */
-        free_remap_entry(iommu, msi_desc->remap_index);
+        /* Free specified unused IRTEs */
+        for ( i = 0; i < nr; ++i )
+            free_remap_entry(iommu, msi_desc->remap_index + i);
         spin_unlock_irqrestore(&ir_ctrl->iremap_lock, flags);
         return 0;
     }
 
     if ( msi_desc->remap_index < 0 )
     {
-        /*
-         * TODO: Multiple-vector MSI requires allocating multiple continuous
-         * entries and configuring addr/data of msi_msg in different way. So
-         * alloca_remap_entry will be changed if enabling multiple-vector MSI
-         * in future.
-         */
-        index = alloc_remap_entry(iommu);
-        msi_desc->remap_index = index;
+        index = alloc_remap_entry(iommu, nr);
+        for ( i = 0; i < nr; ++i )
+            msi_desc[i].remap_index = index + i;
     }
     else
         index = msi_desc->remap_index;
@@ -590,7 +589,8 @@ static int msi_msg_to_remap_entry(
                 "%s: intremap index (%d) is larger than"
                 " the maximum index (%d)!\n",
                 __func__, index, IREMAP_ENTRY_NR - 1);
-        msi_desc->remap_index = -1;
+        for ( i = 0; i < nr; ++i )
+            msi_desc[i].remap_index = -1;
         spin_unlock_irqrestore(&ir_ctrl->iremap_lock, flags);
         return -EFAULT;
     }
@@ -626,14 +626,18 @@ static int msi_msg_to_remap_entry(
     new_ire.lo.p = 1;    /* finally, set present bit */
 
     /* now construct new MSI/MSI-X rte entry */
+    remap_rte = (struct msi_msg_remap_entry *)msg;
     remap_rte->address_lo.dontcare = 0;
-    remap_rte->address_lo.index_15 = (index >> 15) & 0x1;
-    remap_rte->address_lo.index_0_14 = index & 0x7fff;
+    i = index;
+    if ( !nr )
+        i -= msi_desc->msi_attrib.entry_nr;
+    remap_rte->address_lo.index_15 = (i >> 15) & 0x1;
+    remap_rte->address_lo.index_0_14 = i & 0x7fff;
     remap_rte->address_lo.SHV = 1;
     remap_rte->address_lo.format = 1;
 
     remap_rte->address_hi = 0;
-    remap_rte->data = 0;
+    remap_rte->data = index - i;
 
     memcpy(iremap_entry, &new_ire, sizeof(struct iremap_entry));
     iommu_flush_cache_entry(iremap_entry, sizeof(struct iremap_entry));
@@ -654,7 +658,9 @@ void msi_msg_read_remap_rte(
     drhd = pdev ? acpi_find_matched_drhd_unit(pdev)
                 : hpet_to_drhd(msi_desc->hpet_id);
     if ( drhd )
-        remap_entry_to_msi_msg(drhd->iommu, msg);
+        remap_entry_to_msi_msg(drhd->iommu, msg,
+                               msi_desc->msi_attrib.type == PCI_CAP_ID_MSI
+                               ? msi_desc->msi_attrib.entry_nr : 0);
 }
 
 int msi_msg_write_remap_rte(
@@ -680,7 +686,7 @@ int __init intel_setup_hpet_msi(struct m
         return 0;
 
     spin_lock_irqsave(&ir_ctrl->iremap_lock, flags);
-    msi_desc->remap_index = alloc_remap_entry(iommu);
+    msi_desc->remap_index = alloc_remap_entry(iommu, 1);
     if ( msi_desc->remap_index >= IREMAP_ENTRY_NR )
     {
         dprintk(XENLOG_ERR VTDPREFIX,

[-- Attachment #3: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH 4/6] AMD IOMMU: enable for multi-vector MSI
  2013-04-19 10:50 [PATCH 0/6] x86/IOMMU: multi-vector MSI Jan Beulich
                   ` (2 preceding siblings ...)
  2013-04-19 10:58 ` [PATCH 3/6] VT-d: enable for multi‑vector MSI Jan Beulich
@ 2013-04-19 10:59 ` Jan Beulich
  2013-04-19 10:59 ` [PATCH 5/6] x86: enable " Jan Beulich
                   ` (2 subsequent siblings)
  6 siblings, 0 replies; 20+ messages in thread
From: Jan Beulich @ 2013-04-19 10:59 UTC (permalink / raw)
  To: xen-devel
  Cc: Jacob Shin, xiantao.zhang, suravee.suthikulpanit, Konrad Rzeszutek Wilk

[-- Attachment #1: Type: text/plain, Size: 6182 bytes --]

The main change being to make alloc_intremap_entry() capable of
allocating a block of entries.

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/drivers/passthrough/amd/iommu_intr.c
+++ b/xen/drivers/passthrough/amd/iommu_intr.c
@@ -47,13 +47,33 @@ static int get_intremap_requestor_id(int
     return get_ivrs_mappings(seg)[bdf].dte_requestor_id;
 }
 
-static unsigned int alloc_intremap_entry(int seg, int bdf)
+static unsigned int alloc_intremap_entry(int seg, int bdf, unsigned int nr)
 {
     unsigned long *inuse = get_ivrs_mappings(seg)[bdf].intremap_inuse;
     unsigned int slot = find_first_zero_bit(inuse, INTREMAP_ENTRIES);
 
-    if ( slot < INTREMAP_ENTRIES )
-        __set_bit(slot, inuse);
+    for ( ; ; )
+    {
+        unsigned int end;
+
+        if ( slot >= INTREMAP_ENTRIES )
+            break;
+        end = find_next_bit(inuse, INTREMAP_ENTRIES, slot + 1);
+        if ( end > INTREMAP_ENTRIES )
+            end = INTREMAP_ENTRIES;
+        slot = (slot + nr - 1) & ~(nr - 1);
+        if ( slot + nr <= end )
+        {
+            while ( nr-- )
+                __set_bit(slot + nr, inuse);
+            break;
+        }
+        slot = (end + nr) & ~(nr - 1);
+        if ( slot >= INTREMAP_ENTRIES )
+            break;
+        slot = find_next_zero_bit(inuse, INTREMAP_ENTRIES, slot);
+    }
+
     return slot;
 }
 
@@ -132,7 +152,7 @@ static int update_intremap_entry_from_io
     offset = *index;
     if ( offset >= INTREMAP_ENTRIES )
     {
-        offset = alloc_intremap_entry(iommu->seg, req_id);
+        offset = alloc_intremap_entry(iommu->seg, req_id, 1);
         if ( offset >= INTREMAP_ENTRIES )
         {
             spin_unlock_irqrestore(lock, flags);
@@ -200,7 +220,7 @@ int __init amd_iommu_setup_ioapic_remapp
             dest = rte.dest.logical.logical_dest;
 
             spin_lock_irqsave(lock, flags);
-            offset = alloc_intremap_entry(seg, req_id);
+            offset = alloc_intremap_entry(seg, req_id, 1);
             BUG_ON(offset >= INTREMAP_ENTRIES);
             entry = get_intremap_entry(iommu->seg, req_id, offset);
             update_intremap_entry(entry, vector,
@@ -334,7 +354,7 @@ unsigned int amd_iommu_read_ioapic_from_
 }
 
 static int update_intremap_entry_from_msi_msg(
-    struct amd_iommu *iommu, u16 bdf,
+    struct amd_iommu *iommu, u16 bdf, unsigned int nr,
     int *remap_index, const struct msi_msg *msg, u32 *data)
 {
     unsigned long flags;
@@ -342,7 +362,7 @@ static int update_intremap_entry_from_ms
     u16 req_id, alias_id;
     u8 delivery_mode, dest, vector, dest_mode;
     spinlock_t *lock;
-    unsigned int offset;
+    unsigned int offset, i;
 
     req_id = get_dma_requestor_id(iommu->seg, bdf);
     alias_id = get_intremap_requestor_id(iommu->seg, bdf);
@@ -351,7 +371,8 @@ static int update_intremap_entry_from_ms
     {
         lock = get_intremap_lock(iommu->seg, req_id);
         spin_lock_irqsave(lock, flags);
-        free_intremap_entry(iommu->seg, req_id, *remap_index);
+        for ( i = 0; i < nr; ++i )
+            free_intremap_entry(iommu->seg, req_id, *remap_index + i);
         spin_unlock_irqrestore(lock, flags);
         goto done;
     }
@@ -366,7 +387,8 @@ static int update_intremap_entry_from_ms
     offset = *remap_index;
     if ( offset >= INTREMAP_ENTRIES )
     {
-        offset = alloc_intremap_entry(iommu->seg, bdf);
+        ASSERT(nr);
+        offset = alloc_intremap_entry(iommu->seg, bdf, nr);
         if ( offset >= INTREMAP_ENTRIES )
         {
             spin_unlock_irqrestore(lock, flags);
@@ -432,6 +454,7 @@ int amd_iommu_msi_msg_update_ire(
     struct pci_dev *pdev = msi_desc->dev;
     int bdf, seg, rc;
     struct amd_iommu *iommu;
+    unsigned int i, nr = 1;
     u32 data;
 
     bdf = pdev ? PCI_BDF2(pdev->bus, pdev->devfn) : hpet_sbdf.bdf;
@@ -441,10 +464,13 @@ int amd_iommu_msi_msg_update_ire(
     if ( IS_ERR_OR_NULL(iommu) )
         return PTR_ERR(iommu);
 
+    if ( msi_desc->msi_attrib.type == PCI_CAP_ID_MSI )
+        nr = msi_desc->msi.nvec;
+
     if ( msi_desc->remap_index >= 0 && !msg )
     {
         do {
-            update_intremap_entry_from_msi_msg(iommu, bdf,
+            update_intremap_entry_from_msi_msg(iommu, bdf, nr,
                                                &msi_desc->remap_index,
                                                NULL, NULL);
             if ( !pdev || !pdev->phantom_stride )
@@ -452,7 +478,8 @@ int amd_iommu_msi_msg_update_ire(
             bdf += pdev->phantom_stride;
         } while ( PCI_SLOT(bdf) == PCI_SLOT(pdev->devfn) );
 
-        msi_desc->remap_index = -1;
+        for ( i = 0; i < nr; ++i )
+            msi_desc[i].remap_index = -1;
         if ( pdev )
             bdf = PCI_BDF2(pdev->bus, pdev->devfn);
     }
@@ -461,7 +488,7 @@ int amd_iommu_msi_msg_update_ire(
         return 0;
 
     do {
-        rc = update_intremap_entry_from_msi_msg(iommu, bdf,
+        rc = update_intremap_entry_from_msi_msg(iommu, bdf, nr,
                                                 &msi_desc->remap_index,
                                                 msg, &data);
         if ( rc || !pdev || !pdev->phantom_stride )
@@ -469,6 +496,10 @@ int amd_iommu_msi_msg_update_ire(
         bdf += pdev->phantom_stride;
     } while ( PCI_SLOT(bdf) == PCI_SLOT(pdev->devfn) );
 
+    if ( !rc )
+        for ( i = 1; i < nr; ++i )
+            msi_desc[i].remap_index = msi_desc->remap_index + i;
+
     msg->data = data;
     return rc;
 }
@@ -487,6 +518,14 @@ void amd_iommu_read_msi_from_ire(
 
     entry = get_intremap_entry(seg, get_dma_requestor_id(seg, bdf), offset);
 
+    if ( msi_desc->msi_attrib.type == PCI_CAP_ID_MSI )
+    {
+        int nr = msi_desc->msi_attrib.entry_nr;
+
+        ASSERT(!(offset & (msi_desc[-nr].msi.nvec - 1)));
+        offset |= nr;
+    }
+
     msg->data &= ~(INTREMAP_ENTRIES - 1);
     msg->data |= get_field_from_reg_u32(*entry,
                                         INT_REMAP_ENTRY_INTTYPE_MASK,



[-- Attachment #2: AMD-IOMMU-multi-vector-MSI.patch --]
[-- Type: text/plain, Size: 6220 bytes --]

AMD IOMMU: enable for multi-vector MSI

The main change being to make alloc_intremap_entry() capable of
allocating a block of entries.

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/drivers/passthrough/amd/iommu_intr.c
+++ b/xen/drivers/passthrough/amd/iommu_intr.c
@@ -47,13 +47,33 @@ static int get_intremap_requestor_id(int
     return get_ivrs_mappings(seg)[bdf].dte_requestor_id;
 }
 
-static unsigned int alloc_intremap_entry(int seg, int bdf)
+static unsigned int alloc_intremap_entry(int seg, int bdf, unsigned int nr)
 {
     unsigned long *inuse = get_ivrs_mappings(seg)[bdf].intremap_inuse;
     unsigned int slot = find_first_zero_bit(inuse, INTREMAP_ENTRIES);
 
-    if ( slot < INTREMAP_ENTRIES )
-        __set_bit(slot, inuse);
+    for ( ; ; )
+    {
+        unsigned int end;
+
+        if ( slot >= INTREMAP_ENTRIES )
+            break;
+        end = find_next_bit(inuse, INTREMAP_ENTRIES, slot + 1);
+        if ( end > INTREMAP_ENTRIES )
+            end = INTREMAP_ENTRIES;
+        slot = (slot + nr - 1) & ~(nr - 1);
+        if ( slot + nr <= end )
+        {
+            while ( nr-- )
+                __set_bit(slot + nr, inuse);
+            break;
+        }
+        slot = (end + nr) & ~(nr - 1);
+        if ( slot >= INTREMAP_ENTRIES )
+            break;
+        slot = find_next_zero_bit(inuse, INTREMAP_ENTRIES, slot);
+    }
+
     return slot;
 }
 
@@ -132,7 +152,7 @@ static int update_intremap_entry_from_io
     offset = *index;
     if ( offset >= INTREMAP_ENTRIES )
     {
-        offset = alloc_intremap_entry(iommu->seg, req_id);
+        offset = alloc_intremap_entry(iommu->seg, req_id, 1);
         if ( offset >= INTREMAP_ENTRIES )
         {
             spin_unlock_irqrestore(lock, flags);
@@ -200,7 +220,7 @@ int __init amd_iommu_setup_ioapic_remapp
             dest = rte.dest.logical.logical_dest;
 
             spin_lock_irqsave(lock, flags);
-            offset = alloc_intremap_entry(seg, req_id);
+            offset = alloc_intremap_entry(seg, req_id, 1);
             BUG_ON(offset >= INTREMAP_ENTRIES);
             entry = get_intremap_entry(iommu->seg, req_id, offset);
             update_intremap_entry(entry, vector,
@@ -334,7 +354,7 @@ unsigned int amd_iommu_read_ioapic_from_
 }
 
 static int update_intremap_entry_from_msi_msg(
-    struct amd_iommu *iommu, u16 bdf,
+    struct amd_iommu *iommu, u16 bdf, unsigned int nr,
     int *remap_index, const struct msi_msg *msg, u32 *data)
 {
     unsigned long flags;
@@ -342,7 +362,7 @@ static int update_intremap_entry_from_ms
     u16 req_id, alias_id;
     u8 delivery_mode, dest, vector, dest_mode;
     spinlock_t *lock;
-    unsigned int offset;
+    unsigned int offset, i;
 
     req_id = get_dma_requestor_id(iommu->seg, bdf);
     alias_id = get_intremap_requestor_id(iommu->seg, bdf);
@@ -351,7 +371,8 @@ static int update_intremap_entry_from_ms
     {
         lock = get_intremap_lock(iommu->seg, req_id);
         spin_lock_irqsave(lock, flags);
-        free_intremap_entry(iommu->seg, req_id, *remap_index);
+        for ( i = 0; i < nr; ++i )
+            free_intremap_entry(iommu->seg, req_id, *remap_index + i);
         spin_unlock_irqrestore(lock, flags);
         goto done;
     }
@@ -366,7 +387,8 @@ static int update_intremap_entry_from_ms
     offset = *remap_index;
     if ( offset >= INTREMAP_ENTRIES )
     {
-        offset = alloc_intremap_entry(iommu->seg, bdf);
+        ASSERT(nr);
+        offset = alloc_intremap_entry(iommu->seg, bdf, nr);
         if ( offset >= INTREMAP_ENTRIES )
         {
             spin_unlock_irqrestore(lock, flags);
@@ -432,6 +454,7 @@ int amd_iommu_msi_msg_update_ire(
     struct pci_dev *pdev = msi_desc->dev;
     int bdf, seg, rc;
     struct amd_iommu *iommu;
+    unsigned int i, nr = 1;
     u32 data;
 
     bdf = pdev ? PCI_BDF2(pdev->bus, pdev->devfn) : hpet_sbdf.bdf;
@@ -441,10 +464,13 @@ int amd_iommu_msi_msg_update_ire(
     if ( IS_ERR_OR_NULL(iommu) )
         return PTR_ERR(iommu);
 
+    if ( msi_desc->msi_attrib.type == PCI_CAP_ID_MSI )
+        nr = msi_desc->msi.nvec;
+
     if ( msi_desc->remap_index >= 0 && !msg )
     {
         do {
-            update_intremap_entry_from_msi_msg(iommu, bdf,
+            update_intremap_entry_from_msi_msg(iommu, bdf, nr,
                                                &msi_desc->remap_index,
                                                NULL, NULL);
             if ( !pdev || !pdev->phantom_stride )
@@ -452,7 +478,8 @@ int amd_iommu_msi_msg_update_ire(
             bdf += pdev->phantom_stride;
         } while ( PCI_SLOT(bdf) == PCI_SLOT(pdev->devfn) );
 
-        msi_desc->remap_index = -1;
+        for ( i = 0; i < nr; ++i )
+            msi_desc[i].remap_index = -1;
         if ( pdev )
             bdf = PCI_BDF2(pdev->bus, pdev->devfn);
     }
@@ -461,7 +488,7 @@ int amd_iommu_msi_msg_update_ire(
         return 0;
 
     do {
-        rc = update_intremap_entry_from_msi_msg(iommu, bdf,
+        rc = update_intremap_entry_from_msi_msg(iommu, bdf, nr,
                                                 &msi_desc->remap_index,
                                                 msg, &data);
         if ( rc || !pdev || !pdev->phantom_stride )
@@ -469,6 +496,10 @@ int amd_iommu_msi_msg_update_ire(
         bdf += pdev->phantom_stride;
     } while ( PCI_SLOT(bdf) == PCI_SLOT(pdev->devfn) );
 
+    if ( !rc )
+        for ( i = 1; i < nr; ++i )
+            msi_desc[i].remap_index = msi_desc->remap_index + i;
+
     msg->data = data;
     return rc;
 }
@@ -487,6 +518,14 @@ void amd_iommu_read_msi_from_ire(
 
     entry = get_intremap_entry(seg, get_dma_requestor_id(seg, bdf), offset);
 
+    if ( msi_desc->msi_attrib.type == PCI_CAP_ID_MSI )
+    {
+        int nr = msi_desc->msi_attrib.entry_nr;
+
+        ASSERT(!(offset & (msi_desc[-nr].msi.nvec - 1)));
+        offset |= nr;
+    }
+
     msg->data &= ~(INTREMAP_ENTRIES - 1);
     msg->data |= get_field_from_reg_u32(*entry,
                                         INT_REMAP_ENTRY_INTTYPE_MASK,

[-- Attachment #3: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH 5/6] x86: enable multi-vector MSI
  2013-04-19 10:50 [PATCH 0/6] x86/IOMMU: multi-vector MSI Jan Beulich
                   ` (3 preceding siblings ...)
  2013-04-19 10:59 ` [PATCH 4/6] AMD IOMMU: enable for multi-vector MSI Jan Beulich
@ 2013-04-19 10:59 ` Jan Beulich
  2013-04-23  0:55   ` Suravee Suthikulanit
  2013-04-19 11:00 ` [PATCH 6/6] pciif: add multi-vector-MSI command Jan Beulich
  2013-04-19 14:48 ` [PATCH 0/6] x86/IOMMU: multi-vector MSI Jan Beulich
  6 siblings, 1 reply; 20+ messages in thread
From: Jan Beulich @ 2013-04-19 10:59 UTC (permalink / raw)
  To: xen-devel
  Cc: Jacob Shin, xiantao.zhang, suravee.suthikulpanit, Konrad Rzeszutek Wilk

[-- Attachment #1: Type: text/plain, Size: 23243 bytes --]

This implies
- extending the public interface to have a way to request a block of
  MSIs
- allocating a block of contiguous pIRQ-s for the target domain (but
  note that the Xen IRQs allocated have no need of being contiguous)
- repeating certain operations for all involved IRQs
- fixing multi_msi_enable()
- adjusting the mask bit accesses for maskable MSIs

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/arch/x86/irq.c
+++ b/xen/arch/x86/irq.c
@@ -1880,6 +1880,25 @@ int get_free_pirq(struct domain *d, int 
     return -ENOSPC;
 }
 
+int get_free_pirqs(struct domain *d, unsigned int nr)
+{
+    unsigned int i, found = 0;
+
+    ASSERT(spin_is_locked(&d->event_lock));
+
+    for ( i = d->nr_pirqs - 1; i >= nr_irqs_gsi; --i )
+        if ( is_free_pirq(d, pirq_info(d, i)) )
+        {
+            pirq_get_info(d, i);
+            if ( ++found == nr )
+                return i;
+        }
+        else
+            found = 0;
+
+    return -ENOSPC;
+}
+
 int map_domain_pirq(
     struct domain *d, int pirq, int irq, int type, void *data)
 {
@@ -1935,11 +1954,12 @@ int map_domain_pirq(
 
     desc = irq_to_desc(irq);
 
-    if ( type == MAP_PIRQ_TYPE_MSI )
+    if ( type == MAP_PIRQ_TYPE_MSI || type == MAP_PIRQ_TYPE_MULTI_MSI )
     {
         struct msi_info *msi = (struct msi_info *)data;
         struct msi_desc *msi_desc;
         struct pci_dev *pdev;
+        unsigned int nr = 0;
 
         ASSERT(spin_is_locked(&pcidevs_lock));
 
@@ -1950,7 +1970,14 @@ int map_domain_pirq(
         pdev = pci_get_pdev(msi->seg, msi->bus, msi->devfn);
         ret = pci_enable_msi(msi, &msi_desc);
         if ( ret )
+        {
+            if ( ret > 0 )
+            {
+                msi->entry_nr = ret;
+                ret = -ENFILE;
+            }
             goto done;
+        }
 
         spin_lock_irqsave(&desc->lock, flags);
 
@@ -1964,25 +1991,73 @@ int map_domain_pirq(
             goto done;
         }
 
-        ret = setup_msi_irq(desc, msi_desc);
-        if ( ret )
+        while ( !(ret = setup_msi_irq(desc, msi_desc + nr)) )
         {
+            if ( opt_irq_vector_map == OPT_IRQ_VECTOR_MAP_PERDEV &&
+                 !desc->arch.used_vectors )
+            {
+                desc->arch.used_vectors = &pdev->arch.used_vectors;
+                if ( desc->arch.vector != IRQ_VECTOR_UNASSIGNED )
+                {
+                    int vector = desc->arch.vector;
+
+                    ASSERT(!test_bit(vector, desc->arch.used_vectors));
+                    set_bit(vector, desc->arch.used_vectors);
+                }
+            }
+            if ( type == MAP_PIRQ_TYPE_MSI ||
+                 msi_desc->msi_attrib.type != PCI_CAP_ID_MSI ||
+                 ++nr == msi->entry_nr )
+                break;
+
+            set_domain_irq_pirq(d, irq, info);
             spin_unlock_irqrestore(&desc->lock, flags);
-            pci_disable_msi(msi_desc);
-            goto done;
+
+            info = NULL;
+            irq = create_irq(NUMA_NO_NODE);
+            ret = irq >= 0 ? prepare_domain_irq_pirq(d, irq, pirq + nr, &info)
+                           : irq;
+            if ( ret )
+                break;
+            msi_desc[nr].irq = irq;
+
+            if ( irq_permit_access(d, irq) != 0 )
+                printk(XENLOG_G_WARNING
+                       "dom%d: could not permit access to IRQ%d (pirq %d)\n",
+                       d->domain_id, irq, pirq);
+
+            desc = irq_to_desc(irq);
+            spin_lock_irqsave(&desc->lock, flags);
+
+            if ( desc->handler != &no_irq_type )
+            {
+                dprintk(XENLOG_G_ERR, "dom%d: irq %d (pirq %u) in use (%s)\n",
+                        d->domain_id, irq, pirq + nr, desc->handler->typename);
+                ret = -EBUSY;
+                break;
+            }
         }
 
-        if ( opt_irq_vector_map == OPT_IRQ_VECTOR_MAP_PERDEV
-             && !desc->arch.used_vectors )
+        if ( ret )
         {
-            desc->arch.used_vectors = &pdev->arch.used_vectors;
-            if ( desc->arch.vector != IRQ_VECTOR_UNASSIGNED )
+            spin_unlock_irqrestore(&desc->lock, flags);
+            while ( nr-- )
             {
-                int vector = desc->arch.vector;
-                ASSERT(!test_bit(vector, desc->arch.used_vectors));
-
-                set_bit(vector, desc->arch.used_vectors);
+                if ( irq >= 0 )
+                {
+                    if ( irq_deny_access(d, irq) )
+                        printk(XENLOG_G_ERR
+                               "dom%d: could not revoke access to IRQ%d (pirq %d)\n",
+                               d->domain_id, irq, pirq);
+                    destroy_irq(irq);
+                }
+                if ( info )
+                    cleanup_domain_irq_pirq(d, irq, info);
+                info = pirq_info(d, pirq + nr);
+                irq = info->arch.irq;
             }
+            pci_disable_msi(msi_desc);
+            goto done;
         }
 
         set_domain_irq_pirq(d, irq, info);
@@ -2013,7 +2088,8 @@ int unmap_domain_pirq(struct domain *d, 
 {
     unsigned long flags;
     struct irq_desc *desc;
-    int irq, ret = 0;
+    int irq, ret = 0, rc;
+    unsigned int i, nr = 1;
     bool_t forced_unbind;
     struct pirq *info;
     struct msi_desc *msi_desc = NULL;
@@ -2035,6 +2111,18 @@ int unmap_domain_pirq(struct domain *d, 
 
     desc = irq_to_desc(irq);
     msi_desc = desc->msi_desc;
+    if ( msi_desc && msi_desc->msi_attrib.type == PCI_CAP_ID_MSI )
+    {
+        if ( msi_desc->msi_attrib.entry_nr )
+        {
+            printk(XENLOG_G_ERR
+                   "dom%d: trying to unmap secondary MSI pirq %d\n",
+                   d->domain_id, pirq);
+            ret = -EBUSY;
+            goto done;
+        }
+        nr = msi_desc->msi.nvec;
+    }
 
     ret = xsm_unmap_domain_irq(XSM_HOOK, d, irq, msi_desc);
     if ( ret )
@@ -2050,37 +2138,83 @@ int unmap_domain_pirq(struct domain *d, 
 
     spin_lock_irqsave(&desc->lock, flags);
 
-    BUG_ON(irq != domain_pirq_to_irq(d, pirq));
-
-    if ( !forced_unbind )
-        clear_domain_irq_pirq(d, irq, info);
-    else
+    for ( i = 0; ; )
     {
-        info->arch.irq = -irq;
-        radix_tree_replace_slot(
-            radix_tree_lookup_slot(&d->arch.irq_pirq, irq),
-            radix_tree_int_to_ptr(-pirq));
+        BUG_ON(irq != domain_pirq_to_irq(d, pirq + i));
+
+        if ( !forced_unbind )
+            clear_domain_irq_pirq(d, irq, info);
+        else
+        {
+            info->arch.irq = -irq;
+            radix_tree_replace_slot(
+                radix_tree_lookup_slot(&d->arch.irq_pirq, irq),
+                radix_tree_int_to_ptr(-pirq));
+        }
+
+        if ( msi_desc )
+        {
+            desc->handler = &no_irq_type;
+            desc->msi_desc = NULL;
+        }
+
+        if ( ++i == nr )
+            break;
+
+        spin_unlock_irqrestore(&desc->lock, flags);
+
+        if ( !forced_unbind )
+           cleanup_domain_irq_pirq(d, irq, info);
+
+        rc = irq_deny_access(d, irq);
+        if ( rc )
+        {
+            printk(XENLOG_G_ERR
+                   "dom%d: could not deny access to IRQ%d (pirq %d)\n",
+                   d->domain_id, irq, pirq + i);
+            ret = rc;
+        }
+
+        do {
+            info = pirq_info(d, pirq + i);
+            if ( info && (irq = info->arch.irq) > 0 )
+                break;
+            printk(XENLOG_G_ERR "dom%d: MSI pirq %d not mapped\n",
+                   d->domain_id, pirq + i);
+        } while ( ++i < nr );
+
+        if ( i == nr )
+        {
+            desc = NULL;
+            break;
+        }
+
+        desc = irq_to_desc(irq);
+        BUG_ON(desc->msi_desc != msi_desc + i);
+
+        spin_lock_irqsave(&desc->lock, flags);
     }
 
-    if ( msi_desc )
+    if ( desc )
     {
-        desc->handler = &no_irq_type;
-        desc->msi_desc = NULL;
+        spin_unlock_irqrestore(&desc->lock, flags);
+
+        if ( !forced_unbind )
+            cleanup_domain_irq_pirq(d, irq, info);
+
+        rc = irq_deny_access(d, irq);
+        if ( rc )
+        {
+            printk(XENLOG_G_ERR
+                   "dom%d: could not deny access to IRQ%d (pirq %d)\n",
+                   d->domain_id, irq, pirq + nr - 1);
+            ret = rc;
+        }
     }
 
-    spin_unlock_irqrestore(&desc->lock, flags);
     if (msi_desc)
         msi_free_irq(msi_desc);
 
-    if ( !forced_unbind )
-        cleanup_domain_irq_pirq(d, irq, info);
-
-    ret = irq_deny_access(d, irq);
-    if ( ret )
-        printk(XENLOG_G_ERR
-               "dom%d: could not deny access to IRQ%d (pirq %d)\n",
-               d->domain_id, irq, pirq);
-
  done:
     return ret;
 }
--- a/xen/arch/x86/msi.c
+++ b/xen/arch/x86/msi.c
@@ -238,6 +238,11 @@ static int write_msi_msg(struct msi_desc
         u8 bus = dev->bus;
         u8 slot = PCI_SLOT(dev->devfn);
         u8 func = PCI_FUNC(dev->devfn);
+        int nr = entry->msi_attrib.entry_nr;
+
+        ASSERT((msg->data & (entry[-nr].msi.nvec - 1)) == nr);
+        if ( nr )
+            return 0;
 
         pci_conf_write32(seg, bus, slot, func, msi_lower_address_reg(pos),
                          msg->address_lo);
@@ -361,8 +366,8 @@ static void msi_set_mask_bit(struct irq_
             u8 func = PCI_FUNC(entry->dev->devfn);
 
             mask_bits = pci_conf_read32(seg, bus, slot, func, entry->msi.mpos);
-            mask_bits &= ~(1);
-            mask_bits |= flag;
+            mask_bits &= ~((u32)1 << entry->msi_attrib.entry_nr);
+            mask_bits |= (u32)flag << entry->msi_attrib.entry_nr;
             pci_conf_write32(seg, bus, slot, func, entry->msi.mpos, mask_bits);
         }
         break;
@@ -386,10 +391,11 @@ static int msi_get_mask_bit(const struct
     case PCI_CAP_ID_MSI:
         if (!entry->dev || !entry->msi_attrib.maskbit)
             break;
-        return pci_conf_read32(entry->dev->seg, entry->dev->bus,
-                               PCI_SLOT(entry->dev->devfn),
-                               PCI_FUNC(entry->dev->devfn),
-                               entry->msi.mpos) & 1;
+        return (pci_conf_read32(entry->dev->seg, entry->dev->bus,
+                                PCI_SLOT(entry->dev->devfn),
+                                PCI_FUNC(entry->dev->devfn),
+                                entry->msi.mpos) >>
+                entry->msi_attrib.entry_nr) & 1;
     case PCI_CAP_ID_MSIX:
         return readl(entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET) & 1;
     }
@@ -455,17 +461,20 @@ static hw_irq_controller pci_msi_nonmask
     .set_affinity = set_msi_affinity
 };
 
-static struct msi_desc* alloc_msi_entry(void)
+static struct msi_desc *alloc_msi_entry(unsigned int nr)
 {
     struct msi_desc *entry;
 
-    entry = xmalloc(struct msi_desc);
+    entry = xmalloc_array(struct msi_desc, nr);
     if ( !entry )
         return NULL;
 
     INIT_LIST_HEAD(&entry->list);
-    entry->dev = NULL;
-    entry->remap_index = -1;
+    while ( nr-- )
+    {
+        entry[nr].dev = NULL;
+        entry[nr].remap_index = -1;
+    }
 
     return entry;
 }
@@ -483,17 +492,24 @@ int setup_msi_irq(struct irq_desc *desc,
 
 int msi_free_irq(struct msi_desc *entry)
 {
-    destroy_irq(entry->irq);
+    unsigned int nr = entry->msi.nvec;
+
     if ( entry->msi_attrib.type == PCI_CAP_ID_MSIX )
     {
         unsigned long start;
         start = (unsigned long)entry->mask_base & ~(PAGE_SIZE - 1);
         msix_put_fixmap(entry->dev, virt_to_fix(start));
+        nr = 1;
     }
 
-    /* Free the unused IRTE if intr remap enabled */
-    if ( iommu_intremap )
-        iommu_update_ire_from_msi(entry, NULL);
+    while ( nr-- )
+    {
+        destroy_irq(entry[nr].irq);
+
+        /* Free the unused IRTE if intr remap enabled */
+        if ( iommu_intremap )
+            iommu_update_ire_from_msi(entry + nr, NULL);
+    }
 
     list_del(&entry->list);
     xfree(entry);
@@ -526,11 +542,12 @@ static struct msi_desc *find_msi_entry(s
  **/
 static int msi_capability_init(struct pci_dev *dev,
                                int irq,
-                               struct msi_desc **desc)
+                               struct msi_desc **desc,
+                               unsigned int nvec)
 {
     struct msi_desc *entry;
     int pos;
-    unsigned int maxvec, mpos;
+    unsigned int i, maxvec, mpos;
     u16 control, seg = dev->seg;
     u8 bus = dev->bus;
     u8 slot = PCI_SLOT(dev->devfn);
@@ -540,27 +557,34 @@ static int msi_capability_init(struct pc
     pos = pci_find_cap_offset(seg, bus, slot, func, PCI_CAP_ID_MSI);
     control = pci_conf_read16(seg, bus, slot, func, msi_control_reg(pos));
     maxvec = multi_msi_capable(control);
+    if ( nvec > maxvec )
+        return maxvec;
     control &= ~PCI_MSI_FLAGS_QSIZE;
+    multi_msi_enable(control, nvec);
 
     /* MSI Entry Initialization */
     msi_set_enable(dev, 0); /* Ensure msi is disabled as I set it up */
 
-    entry = alloc_msi_entry();
+    entry = alloc_msi_entry(nvec);
     if ( !entry )
         return -ENOMEM;
 
-    entry->msi_attrib.type = PCI_CAP_ID_MSI;
-    entry->msi_attrib.is_64 = is_64bit_address(control);
-    entry->msi_attrib.entry_nr = 0;
-    entry->msi_attrib.maskbit = is_mask_bit_support(control);
-    entry->msi_attrib.masked = 1;
-    entry->msi_attrib.pos = pos;
     mpos = msi_mask_bits_reg(pos, is_64bit_address(control));
-    entry->msi.nvec = 1;
+    for ( i = 0; i < nvec; ++i )
+    {
+        entry[i].msi_attrib.type = PCI_CAP_ID_MSI;
+        entry[i].msi_attrib.is_64 = is_64bit_address(control);
+        entry[i].msi_attrib.entry_nr = i;
+        entry[i].msi_attrib.maskbit = is_mask_bit_support(control);
+        entry[i].msi_attrib.masked = 1;
+        entry[i].msi_attrib.pos = pos;
+        if ( entry[i].msi_attrib.maskbit )
+            entry[i].msi.mpos = mpos;
+        entry[i].msi.nvec = 0;
+        entry[i].dev = dev;
+    }
+    entry->msi.nvec = nvec;
     entry->irq = irq;
-    if ( is_mask_bit_support(control) )
-        entry->msi.mpos = mpos;
-    entry->dev = dev;
     if ( entry->msi_attrib.maskbit )
     {
         u32 maskbits;
@@ -688,7 +712,7 @@ static int msix_capability_init(struct p
 
     if ( desc )
     {
-        entry = alloc_msi_entry();
+        entry = alloc_msi_entry(1);
         if ( !entry )
             return -ENOMEM;
         ASSERT(msi);
@@ -846,7 +870,6 @@ static int msix_capability_init(struct p
 
 static int __pci_enable_msi(struct msi_info *msi, struct msi_desc **desc)
 {
-    int status;
     struct pci_dev *pdev;
     struct msi_desc *old_desc;
 
@@ -875,8 +898,7 @@ static int __pci_enable_msi(struct msi_i
         pci_disable_msi(old_desc);
     }
 
-    status = msi_capability_init(pdev, msi->irq, desc);
-    return status;
+    return msi_capability_init(pdev, msi->irq, desc, msi->entry_nr);
 }
 
 static void __pci_disable_msi(struct msi_desc *entry)
@@ -1096,6 +1118,8 @@ int pci_restore_msi_state(struct pci_dev
 
     list_for_each_entry_safe( entry, tmp, &pdev->msi_list, list )
     {
+        unsigned int i = 0, nr = 1;
+
         irq = entry->irq;
         desc = &irq_desc[irq];
 
@@ -1105,30 +1129,58 @@ int pci_restore_msi_state(struct pci_dev
 
         if (desc->msi_desc != entry)
         {
+    bogus:
             dprintk(XENLOG_ERR,
-                    "Restore MSI for dev %04x:%02x:%02x:%x not set before?\n",
+                    "Restore MSI for %04x:%02x:%02x:%u entry %u not set?\n",
                     pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn),
-                    PCI_FUNC(pdev->devfn));
+                    PCI_FUNC(pdev->devfn), i);
             spin_unlock_irqrestore(&desc->lock, flags);
             return -EINVAL;
         }
 
         if ( entry->msi_attrib.type == PCI_CAP_ID_MSI )
+        {
             msi_set_enable(pdev, 0);
+            nr = entry->msi.nvec;
+        }
         else if ( entry->msi_attrib.type == PCI_CAP_ID_MSIX )
             msix_set_enable(pdev, 0);
 
         msg = entry->msg;
         write_msi_msg(entry, &msg);
 
-        msi_set_mask_bit(desc, entry->msi_attrib.masked);
+        for ( i = 0; ; )
+        {
+            msi_set_mask_bit(desc, entry[i].msi_attrib.masked);
+            spin_unlock_irqrestore(&desc->lock, flags);
+
+            if ( !--nr )
+                break;
+
+            desc = &irq_desc[entry[++i].irq];
+            spin_lock_irqsave(&desc->lock, flags);
+            if ( desc->msi_desc != entry + i )
+                goto bogus;
+        }
+
+        spin_unlock_irqrestore(&desc->lock, flags);
 
         if ( entry->msi_attrib.type == PCI_CAP_ID_MSI )
+        {
+            unsigned int cpos = msi_control_reg(entry->msi_attrib.pos);
+            u16 control = pci_conf_read16(pdev->seg, pdev->bus,
+                                          PCI_SLOT(pdev->devfn),
+                                          PCI_FUNC(pdev->devfn), cpos);
+
+            control &= ~PCI_MSI_FLAGS_QSIZE;
+            multi_msi_enable(control, entry->msi.nvec);
+            pci_conf_write16(pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn),
+                             PCI_FUNC(pdev->devfn), cpos, control);
+
             msi_set_enable(pdev, 1);
+        }
         else if ( entry->msi_attrib.type == PCI_CAP_ID_MSIX )
             msix_set_enable(pdev, 1);
-
-        spin_unlock_irqrestore(&desc->lock, flags);
     }
 
     return 0;
--- a/xen/arch/x86/physdev.c
+++ b/xen/arch/x86/physdev.c
@@ -140,8 +140,11 @@ int physdev_map_pirq(domid_t domid, int 
         break;
 
     case MAP_PIRQ_TYPE_MSI:
+        if ( !msi->table_base )
+            msi->entry_nr = 1;
         irq = *index;
         if ( irq == -1 )
+    case MAP_PIRQ_TYPE_MULTI_MSI:
             irq = create_irq(NUMA_NO_NODE);
 
         if ( irq < nr_irqs_gsi || irq >= nr_irqs )
@@ -179,6 +182,30 @@ int physdev_map_pirq(domid_t domid, int 
                 goto done;
             }
         }
+        else if ( type == MAP_PIRQ_TYPE_MULTI_MSI )
+        {
+            if ( msi->entry_nr <= 0 || msi->entry_nr > 32 )
+                ret = -EDOM;
+            else if ( msi->entry_nr != 1 && !iommu_intremap )
+                ret = -EOPNOTSUPP;
+            else
+            {
+                while ( msi->entry_nr & (msi->entry_nr - 1) )
+                    msi->entry_nr += msi->entry_nr & -msi->entry_nr;
+                pirq = get_free_pirqs(d, msi->entry_nr);
+                if ( pirq < 0 )
+                {
+                    while ( (msi->entry_nr >>= 1) > 1 )
+                        if ( get_free_pirqs(d, msi->entry_nr) > 0 )
+                            break;
+                    dprintk(XENLOG_G_ERR, "dom%d: no block of %d free pirqs\n",
+                            d->domain_id, msi->entry_nr << 1);
+                    ret = pirq;
+                }
+            }
+            if ( ret < 0 )
+                goto done;
+        }
         else
         {
             pirq = get_free_pirq(d, type);
@@ -210,8 +237,15 @@ int physdev_map_pirq(domid_t domid, int 
  done:
     spin_unlock(&d->event_lock);
     spin_unlock(&pcidevs_lock);
-    if ( (ret != 0) && (type == MAP_PIRQ_TYPE_MSI) && (*index == -1) )
-        destroy_irq(irq);
+    if ( ret != 0 )
+        switch ( type )
+        {
+        case MAP_PIRQ_TYPE_MSI:
+            if ( *index == -1 )
+        case MAP_PIRQ_TYPE_MULTI_MSI:
+                destroy_irq(irq);
+            break;
+        }
  free_domain:
     rcu_unlock_domain(d);
     return ret;
@@ -390,14 +424,22 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H
         if ( copy_from_guest(&map, arg, 1) != 0 )
             break;
 
-        if ( map.type == MAP_PIRQ_TYPE_MSI_SEG )
+        switch ( map.type )
         {
+        case MAP_PIRQ_TYPE_MSI_SEG:
             map.type = MAP_PIRQ_TYPE_MSI;
             msi.seg = map.bus >> 16;
-        }
-        else
-        {
+            break;
+
+        case MAP_PIRQ_TYPE_MULTI_MSI:
+            if ( map.table_base )
+                return -EINVAL;
+            msi.seg = map.bus >> 16;
+            break;
+
+        default:
             msi.seg = 0;
+            break;
         }
         msi.bus = map.bus;
         msi.devfn = map.devfn;
@@ -406,6 +448,8 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H
         ret = physdev_map_pirq(map.domid, map.type, &map.index, &map.pirq,
                                &msi);
 
+        if ( map.type == MAP_PIRQ_TYPE_MULTI_MSI )
+            map.entry_nr = msi.entry_nr;
         if ( __copy_to_guest(arg, &map, 1) )
             ret = -EFAULT;
         break;
--- a/xen/include/asm-x86/irq.h
+++ b/xen/include/asm-x86/irq.h
@@ -141,6 +141,7 @@ int map_domain_pirq(struct domain *d, in
                            void *data);
 int unmap_domain_pirq(struct domain *d, int pirq);
 int get_free_pirq(struct domain *d, int type);
+int get_free_pirqs(struct domain *, unsigned int nr);
 void free_domain_pirqs(struct domain *d);
 int map_domain_emuirq_pirq(struct domain *d, int pirq, int irq);
 int unmap_domain_pirq_emuirq(struct domain *d, int pirq);
--- a/xen/include/asm-x86/msi.h
+++ b/xen/include/asm-x86/msi.h
@@ -145,7 +145,7 @@ int msi_free_irq(struct msi_desc *entry)
 #define multi_msi_capable(control) \
 	(1 << ((control & PCI_MSI_FLAGS_QMASK) >> 1))
 #define multi_msi_enable(control, num) \
-	control |= (((num >> 1) << 4) & PCI_MSI_FLAGS_QSIZE);
+	control |= (((fls(num) - 1) << 4) & PCI_MSI_FLAGS_QSIZE);
 #define is_64bit_address(control)	(!!(control & PCI_MSI_FLAGS_64BIT))
 #define is_mask_bit_support(control)	(!!(control & PCI_MSI_FLAGS_MASKBIT))
 #define msi_enable(control, num) multi_msi_enable(control, num); \
--- a/xen/include/public/physdev.h
+++ b/xen/include/public/physdev.h
@@ -151,21 +151,22 @@ DEFINE_XEN_GUEST_HANDLE(physdev_irq_t);
 #define MAP_PIRQ_TYPE_GSI               0x1
 #define MAP_PIRQ_TYPE_UNKNOWN           0x2
 #define MAP_PIRQ_TYPE_MSI_SEG           0x3
+#define MAP_PIRQ_TYPE_MULTI_MSI         0x4
 
 #define PHYSDEVOP_map_pirq               13
 struct physdev_map_pirq {
     domid_t domid;
     /* IN */
     int type;
-    /* IN */
+    /* IN (ignored for ..._MULTI_MSI) */
     int index;
     /* IN or OUT */
     int pirq;
-    /* IN - high 16 bits hold segment for MAP_PIRQ_TYPE_MSI_SEG */
+    /* IN - high 16 bits hold segment for ..._MSI_SEG and ..._MULTI_MSI */
     int bus;
     /* IN */
     int devfn;
-    /* IN */
+    /* IN (also OUT for ..._MULTI_MSI) */
     int entry_nr;
     /* IN */
     uint64_t table_base;



[-- Attachment #2: x86-multi-vector-MSI.patch --]
[-- Type: text/plain, Size: 23271 bytes --]

x86: enable multi-vector MSI

This implies
- extending the public interface to have a way to request a block of
  MSIs
- allocating a block of contiguous pIRQ-s for the target domain (but
  note that the Xen IRQs allocated have no need of being contiguous)
- repeating certain operations for all involved IRQs
- fixing multi_msi_enable()
- adjusting the mask bit accesses for maskable MSIs

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/arch/x86/irq.c
+++ b/xen/arch/x86/irq.c
@@ -1880,6 +1880,25 @@ int get_free_pirq(struct domain *d, int 
     return -ENOSPC;
 }
 
+int get_free_pirqs(struct domain *d, unsigned int nr)
+{
+    unsigned int i, found = 0;
+
+    ASSERT(spin_is_locked(&d->event_lock));
+
+    for ( i = d->nr_pirqs - 1; i >= nr_irqs_gsi; --i )
+        if ( is_free_pirq(d, pirq_info(d, i)) )
+        {
+            pirq_get_info(d, i);
+            if ( ++found == nr )
+                return i;
+        }
+        else
+            found = 0;
+
+    return -ENOSPC;
+}
+
 int map_domain_pirq(
     struct domain *d, int pirq, int irq, int type, void *data)
 {
@@ -1935,11 +1954,12 @@ int map_domain_pirq(
 
     desc = irq_to_desc(irq);
 
-    if ( type == MAP_PIRQ_TYPE_MSI )
+    if ( type == MAP_PIRQ_TYPE_MSI || type == MAP_PIRQ_TYPE_MULTI_MSI )
     {
         struct msi_info *msi = (struct msi_info *)data;
         struct msi_desc *msi_desc;
         struct pci_dev *pdev;
+        unsigned int nr = 0;
 
         ASSERT(spin_is_locked(&pcidevs_lock));
 
@@ -1950,7 +1970,14 @@ int map_domain_pirq(
         pdev = pci_get_pdev(msi->seg, msi->bus, msi->devfn);
         ret = pci_enable_msi(msi, &msi_desc);
         if ( ret )
+        {
+            if ( ret > 0 )
+            {
+                msi->entry_nr = ret;
+                ret = -ENFILE;
+            }
             goto done;
+        }
 
         spin_lock_irqsave(&desc->lock, flags);
 
@@ -1964,25 +1991,73 @@ int map_domain_pirq(
             goto done;
         }
 
-        ret = setup_msi_irq(desc, msi_desc);
-        if ( ret )
+        while ( !(ret = setup_msi_irq(desc, msi_desc + nr)) )
         {
+            if ( opt_irq_vector_map == OPT_IRQ_VECTOR_MAP_PERDEV &&
+                 !desc->arch.used_vectors )
+            {
+                desc->arch.used_vectors = &pdev->arch.used_vectors;
+                if ( desc->arch.vector != IRQ_VECTOR_UNASSIGNED )
+                {
+                    int vector = desc->arch.vector;
+
+                    ASSERT(!test_bit(vector, desc->arch.used_vectors));
+                    set_bit(vector, desc->arch.used_vectors);
+                }
+            }
+            if ( type == MAP_PIRQ_TYPE_MSI ||
+                 msi_desc->msi_attrib.type != PCI_CAP_ID_MSI ||
+                 ++nr == msi->entry_nr )
+                break;
+
+            set_domain_irq_pirq(d, irq, info);
             spin_unlock_irqrestore(&desc->lock, flags);
-            pci_disable_msi(msi_desc);
-            goto done;
+
+            info = NULL;
+            irq = create_irq(NUMA_NO_NODE);
+            ret = irq >= 0 ? prepare_domain_irq_pirq(d, irq, pirq + nr, &info)
+                           : irq;
+            if ( ret )
+                break;
+            msi_desc[nr].irq = irq;
+
+            if ( irq_permit_access(d, irq) != 0 )
+                printk(XENLOG_G_WARNING
+                       "dom%d: could not permit access to IRQ%d (pirq %d)\n",
+                       d->domain_id, irq, pirq);
+
+            desc = irq_to_desc(irq);
+            spin_lock_irqsave(&desc->lock, flags);
+
+            if ( desc->handler != &no_irq_type )
+            {
+                dprintk(XENLOG_G_ERR, "dom%d: irq %d (pirq %u) in use (%s)\n",
+                        d->domain_id, irq, pirq + nr, desc->handler->typename);
+                ret = -EBUSY;
+                break;
+            }
         }
 
-        if ( opt_irq_vector_map == OPT_IRQ_VECTOR_MAP_PERDEV
-             && !desc->arch.used_vectors )
+        if ( ret )
         {
-            desc->arch.used_vectors = &pdev->arch.used_vectors;
-            if ( desc->arch.vector != IRQ_VECTOR_UNASSIGNED )
+            spin_unlock_irqrestore(&desc->lock, flags);
+            while ( nr-- )
             {
-                int vector = desc->arch.vector;
-                ASSERT(!test_bit(vector, desc->arch.used_vectors));
-
-                set_bit(vector, desc->arch.used_vectors);
+                if ( irq >= 0 )
+                {
+                    if ( irq_deny_access(d, irq) )
+                        printk(XENLOG_G_ERR
+                               "dom%d: could not revoke access to IRQ%d (pirq %d)\n",
+                               d->domain_id, irq, pirq);
+                    destroy_irq(irq);
+                }
+                if ( info )
+                    cleanup_domain_irq_pirq(d, irq, info);
+                info = pirq_info(d, pirq + nr);
+                irq = info->arch.irq;
             }
+            pci_disable_msi(msi_desc);
+            goto done;
         }
 
         set_domain_irq_pirq(d, irq, info);
@@ -2013,7 +2088,8 @@ int unmap_domain_pirq(struct domain *d, 
 {
     unsigned long flags;
     struct irq_desc *desc;
-    int irq, ret = 0;
+    int irq, ret = 0, rc;
+    unsigned int i, nr = 1;
     bool_t forced_unbind;
     struct pirq *info;
     struct msi_desc *msi_desc = NULL;
@@ -2035,6 +2111,18 @@ int unmap_domain_pirq(struct domain *d, 
 
     desc = irq_to_desc(irq);
     msi_desc = desc->msi_desc;
+    if ( msi_desc && msi_desc->msi_attrib.type == PCI_CAP_ID_MSI )
+    {
+        if ( msi_desc->msi_attrib.entry_nr )
+        {
+            printk(XENLOG_G_ERR
+                   "dom%d: trying to unmap secondary MSI pirq %d\n",
+                   d->domain_id, pirq);
+            ret = -EBUSY;
+            goto done;
+        }
+        nr = msi_desc->msi.nvec;
+    }
 
     ret = xsm_unmap_domain_irq(XSM_HOOK, d, irq, msi_desc);
     if ( ret )
@@ -2050,37 +2138,83 @@ int unmap_domain_pirq(struct domain *d, 
 
     spin_lock_irqsave(&desc->lock, flags);
 
-    BUG_ON(irq != domain_pirq_to_irq(d, pirq));
-
-    if ( !forced_unbind )
-        clear_domain_irq_pirq(d, irq, info);
-    else
+    for ( i = 0; ; )
     {
-        info->arch.irq = -irq;
-        radix_tree_replace_slot(
-            radix_tree_lookup_slot(&d->arch.irq_pirq, irq),
-            radix_tree_int_to_ptr(-pirq));
+        BUG_ON(irq != domain_pirq_to_irq(d, pirq + i));
+
+        if ( !forced_unbind )
+            clear_domain_irq_pirq(d, irq, info);
+        else
+        {
+            info->arch.irq = -irq;
+            radix_tree_replace_slot(
+                radix_tree_lookup_slot(&d->arch.irq_pirq, irq),
+                radix_tree_int_to_ptr(-pirq));
+        }
+
+        if ( msi_desc )
+        {
+            desc->handler = &no_irq_type;
+            desc->msi_desc = NULL;
+        }
+
+        if ( ++i == nr )
+            break;
+
+        spin_unlock_irqrestore(&desc->lock, flags);
+
+        if ( !forced_unbind )
+           cleanup_domain_irq_pirq(d, irq, info);
+
+        rc = irq_deny_access(d, irq);
+        if ( rc )
+        {
+            printk(XENLOG_G_ERR
+                   "dom%d: could not deny access to IRQ%d (pirq %d)\n",
+                   d->domain_id, irq, pirq + i);
+            ret = rc;
+        }
+
+        do {
+            info = pirq_info(d, pirq + i);
+            if ( info && (irq = info->arch.irq) > 0 )
+                break;
+            printk(XENLOG_G_ERR "dom%d: MSI pirq %d not mapped\n",
+                   d->domain_id, pirq + i);
+        } while ( ++i < nr );
+
+        if ( i == nr )
+        {
+            desc = NULL;
+            break;
+        }
+
+        desc = irq_to_desc(irq);
+        BUG_ON(desc->msi_desc != msi_desc + i);
+
+        spin_lock_irqsave(&desc->lock, flags);
     }
 
-    if ( msi_desc )
+    if ( desc )
     {
-        desc->handler = &no_irq_type;
-        desc->msi_desc = NULL;
+        spin_unlock_irqrestore(&desc->lock, flags);
+
+        if ( !forced_unbind )
+            cleanup_domain_irq_pirq(d, irq, info);
+
+        rc = irq_deny_access(d, irq);
+        if ( rc )
+        {
+            printk(XENLOG_G_ERR
+                   "dom%d: could not deny access to IRQ%d (pirq %d)\n",
+                   d->domain_id, irq, pirq + nr - 1);
+            ret = rc;
+        }
     }
 
-    spin_unlock_irqrestore(&desc->lock, flags);
     if (msi_desc)
         msi_free_irq(msi_desc);
 
-    if ( !forced_unbind )
-        cleanup_domain_irq_pirq(d, irq, info);
-
-    ret = irq_deny_access(d, irq);
-    if ( ret )
-        printk(XENLOG_G_ERR
-               "dom%d: could not deny access to IRQ%d (pirq %d)\n",
-               d->domain_id, irq, pirq);
-
  done:
     return ret;
 }
--- a/xen/arch/x86/msi.c
+++ b/xen/arch/x86/msi.c
@@ -238,6 +238,11 @@ static int write_msi_msg(struct msi_desc
         u8 bus = dev->bus;
         u8 slot = PCI_SLOT(dev->devfn);
         u8 func = PCI_FUNC(dev->devfn);
+        int nr = entry->msi_attrib.entry_nr;
+
+        ASSERT((msg->data & (entry[-nr].msi.nvec - 1)) == nr);
+        if ( nr )
+            return 0;
 
         pci_conf_write32(seg, bus, slot, func, msi_lower_address_reg(pos),
                          msg->address_lo);
@@ -361,8 +366,8 @@ static void msi_set_mask_bit(struct irq_
             u8 func = PCI_FUNC(entry->dev->devfn);
 
             mask_bits = pci_conf_read32(seg, bus, slot, func, entry->msi.mpos);
-            mask_bits &= ~(1);
-            mask_bits |= flag;
+            mask_bits &= ~((u32)1 << entry->msi_attrib.entry_nr);
+            mask_bits |= (u32)flag << entry->msi_attrib.entry_nr;
             pci_conf_write32(seg, bus, slot, func, entry->msi.mpos, mask_bits);
         }
         break;
@@ -386,10 +391,11 @@ static int msi_get_mask_bit(const struct
     case PCI_CAP_ID_MSI:
         if (!entry->dev || !entry->msi_attrib.maskbit)
             break;
-        return pci_conf_read32(entry->dev->seg, entry->dev->bus,
-                               PCI_SLOT(entry->dev->devfn),
-                               PCI_FUNC(entry->dev->devfn),
-                               entry->msi.mpos) & 1;
+        return (pci_conf_read32(entry->dev->seg, entry->dev->bus,
+                                PCI_SLOT(entry->dev->devfn),
+                                PCI_FUNC(entry->dev->devfn),
+                                entry->msi.mpos) >>
+                entry->msi_attrib.entry_nr) & 1;
     case PCI_CAP_ID_MSIX:
         return readl(entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET) & 1;
     }
@@ -455,17 +461,20 @@ static hw_irq_controller pci_msi_nonmask
     .set_affinity = set_msi_affinity
 };
 
-static struct msi_desc* alloc_msi_entry(void)
+static struct msi_desc *alloc_msi_entry(unsigned int nr)
 {
     struct msi_desc *entry;
 
-    entry = xmalloc(struct msi_desc);
+    entry = xmalloc_array(struct msi_desc, nr);
     if ( !entry )
         return NULL;
 
     INIT_LIST_HEAD(&entry->list);
-    entry->dev = NULL;
-    entry->remap_index = -1;
+    while ( nr-- )
+    {
+        entry[nr].dev = NULL;
+        entry[nr].remap_index = -1;
+    }
 
     return entry;
 }
@@ -483,17 +492,24 @@ int setup_msi_irq(struct irq_desc *desc,
 
 int msi_free_irq(struct msi_desc *entry)
 {
-    destroy_irq(entry->irq);
+    unsigned int nr = entry->msi.nvec;
+
     if ( entry->msi_attrib.type == PCI_CAP_ID_MSIX )
     {
         unsigned long start;
         start = (unsigned long)entry->mask_base & ~(PAGE_SIZE - 1);
         msix_put_fixmap(entry->dev, virt_to_fix(start));
+        nr = 1;
     }
 
-    /* Free the unused IRTE if intr remap enabled */
-    if ( iommu_intremap )
-        iommu_update_ire_from_msi(entry, NULL);
+    while ( nr-- )
+    {
+        destroy_irq(entry[nr].irq);
+
+        /* Free the unused IRTE if intr remap enabled */
+        if ( iommu_intremap )
+            iommu_update_ire_from_msi(entry + nr, NULL);
+    }
 
     list_del(&entry->list);
     xfree(entry);
@@ -526,11 +542,12 @@ static struct msi_desc *find_msi_entry(s
  **/
 static int msi_capability_init(struct pci_dev *dev,
                                int irq,
-                               struct msi_desc **desc)
+                               struct msi_desc **desc,
+                               unsigned int nvec)
 {
     struct msi_desc *entry;
     int pos;
-    unsigned int maxvec, mpos;
+    unsigned int i, maxvec, mpos;
     u16 control, seg = dev->seg;
     u8 bus = dev->bus;
     u8 slot = PCI_SLOT(dev->devfn);
@@ -540,27 +557,34 @@ static int msi_capability_init(struct pc
     pos = pci_find_cap_offset(seg, bus, slot, func, PCI_CAP_ID_MSI);
     control = pci_conf_read16(seg, bus, slot, func, msi_control_reg(pos));
     maxvec = multi_msi_capable(control);
+    if ( nvec > maxvec )
+        return maxvec;
     control &= ~PCI_MSI_FLAGS_QSIZE;
+    multi_msi_enable(control, nvec);
 
     /* MSI Entry Initialization */
     msi_set_enable(dev, 0); /* Ensure msi is disabled as I set it up */
 
-    entry = alloc_msi_entry();
+    entry = alloc_msi_entry(nvec);
     if ( !entry )
         return -ENOMEM;
 
-    entry->msi_attrib.type = PCI_CAP_ID_MSI;
-    entry->msi_attrib.is_64 = is_64bit_address(control);
-    entry->msi_attrib.entry_nr = 0;
-    entry->msi_attrib.maskbit = is_mask_bit_support(control);
-    entry->msi_attrib.masked = 1;
-    entry->msi_attrib.pos = pos;
     mpos = msi_mask_bits_reg(pos, is_64bit_address(control));
-    entry->msi.nvec = 1;
+    for ( i = 0; i < nvec; ++i )
+    {
+        entry[i].msi_attrib.type = PCI_CAP_ID_MSI;
+        entry[i].msi_attrib.is_64 = is_64bit_address(control);
+        entry[i].msi_attrib.entry_nr = i;
+        entry[i].msi_attrib.maskbit = is_mask_bit_support(control);
+        entry[i].msi_attrib.masked = 1;
+        entry[i].msi_attrib.pos = pos;
+        if ( entry[i].msi_attrib.maskbit )
+            entry[i].msi.mpos = mpos;
+        entry[i].msi.nvec = 0;
+        entry[i].dev = dev;
+    }
+    entry->msi.nvec = nvec;
     entry->irq = irq;
-    if ( is_mask_bit_support(control) )
-        entry->msi.mpos = mpos;
-    entry->dev = dev;
     if ( entry->msi_attrib.maskbit )
     {
         u32 maskbits;
@@ -688,7 +712,7 @@ static int msix_capability_init(struct p
 
     if ( desc )
     {
-        entry = alloc_msi_entry();
+        entry = alloc_msi_entry(1);
         if ( !entry )
             return -ENOMEM;
         ASSERT(msi);
@@ -846,7 +870,6 @@ static int msix_capability_init(struct p
 
 static int __pci_enable_msi(struct msi_info *msi, struct msi_desc **desc)
 {
-    int status;
     struct pci_dev *pdev;
     struct msi_desc *old_desc;
 
@@ -875,8 +898,7 @@ static int __pci_enable_msi(struct msi_i
         pci_disable_msi(old_desc);
     }
 
-    status = msi_capability_init(pdev, msi->irq, desc);
-    return status;
+    return msi_capability_init(pdev, msi->irq, desc, msi->entry_nr);
 }
 
 static void __pci_disable_msi(struct msi_desc *entry)
@@ -1096,6 +1118,8 @@ int pci_restore_msi_state(struct pci_dev
 
     list_for_each_entry_safe( entry, tmp, &pdev->msi_list, list )
     {
+        unsigned int i = 0, nr = 1;
+
         irq = entry->irq;
         desc = &irq_desc[irq];
 
@@ -1105,30 +1129,58 @@ int pci_restore_msi_state(struct pci_dev
 
         if (desc->msi_desc != entry)
         {
+    bogus:
             dprintk(XENLOG_ERR,
-                    "Restore MSI for dev %04x:%02x:%02x:%x not set before?\n",
+                    "Restore MSI for %04x:%02x:%02x:%u entry %u not set?\n",
                     pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn),
-                    PCI_FUNC(pdev->devfn));
+                    PCI_FUNC(pdev->devfn), i);
             spin_unlock_irqrestore(&desc->lock, flags);
             return -EINVAL;
         }
 
         if ( entry->msi_attrib.type == PCI_CAP_ID_MSI )
+        {
             msi_set_enable(pdev, 0);
+            nr = entry->msi.nvec;
+        }
         else if ( entry->msi_attrib.type == PCI_CAP_ID_MSIX )
             msix_set_enable(pdev, 0);
 
         msg = entry->msg;
         write_msi_msg(entry, &msg);
 
-        msi_set_mask_bit(desc, entry->msi_attrib.masked);
+        for ( i = 0; ; )
+        {
+            msi_set_mask_bit(desc, entry[i].msi_attrib.masked);
+            spin_unlock_irqrestore(&desc->lock, flags);
+
+            if ( !--nr )
+                break;
+
+            desc = &irq_desc[entry[++i].irq];
+            spin_lock_irqsave(&desc->lock, flags);
+            if ( desc->msi_desc != entry + i )
+                goto bogus;
+        }
+
+        spin_unlock_irqrestore(&desc->lock, flags);
 
         if ( entry->msi_attrib.type == PCI_CAP_ID_MSI )
+        {
+            unsigned int cpos = msi_control_reg(entry->msi_attrib.pos);
+            u16 control = pci_conf_read16(pdev->seg, pdev->bus,
+                                          PCI_SLOT(pdev->devfn),
+                                          PCI_FUNC(pdev->devfn), cpos);
+
+            control &= ~PCI_MSI_FLAGS_QSIZE;
+            multi_msi_enable(control, entry->msi.nvec);
+            pci_conf_write16(pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn),
+                             PCI_FUNC(pdev->devfn), cpos, control);
+
             msi_set_enable(pdev, 1);
+        }
         else if ( entry->msi_attrib.type == PCI_CAP_ID_MSIX )
             msix_set_enable(pdev, 1);
-
-        spin_unlock_irqrestore(&desc->lock, flags);
     }
 
     return 0;
--- a/xen/arch/x86/physdev.c
+++ b/xen/arch/x86/physdev.c
@@ -140,8 +140,11 @@ int physdev_map_pirq(domid_t domid, int 
         break;
 
     case MAP_PIRQ_TYPE_MSI:
+        if ( !msi->table_base )
+            msi->entry_nr = 1;
         irq = *index;
         if ( irq == -1 )
+    case MAP_PIRQ_TYPE_MULTI_MSI:
             irq = create_irq(NUMA_NO_NODE);
 
         if ( irq < nr_irqs_gsi || irq >= nr_irqs )
@@ -179,6 +182,30 @@ int physdev_map_pirq(domid_t domid, int 
                 goto done;
             }
         }
+        else if ( type == MAP_PIRQ_TYPE_MULTI_MSI )
+        {
+            if ( msi->entry_nr <= 0 || msi->entry_nr > 32 )
+                ret = -EDOM;
+            else if ( msi->entry_nr != 1 && !iommu_intremap )
+                ret = -EOPNOTSUPP;
+            else
+            {
+                while ( msi->entry_nr & (msi->entry_nr - 1) )
+                    msi->entry_nr += msi->entry_nr & -msi->entry_nr;
+                pirq = get_free_pirqs(d, msi->entry_nr);
+                if ( pirq < 0 )
+                {
+                    while ( (msi->entry_nr >>= 1) > 1 )
+                        if ( get_free_pirqs(d, msi->entry_nr) > 0 )
+                            break;
+                    dprintk(XENLOG_G_ERR, "dom%d: no block of %d free pirqs\n",
+                            d->domain_id, msi->entry_nr << 1);
+                    ret = pirq;
+                }
+            }
+            if ( ret < 0 )
+                goto done;
+        }
         else
         {
             pirq = get_free_pirq(d, type);
@@ -210,8 +237,15 @@ int physdev_map_pirq(domid_t domid, int 
  done:
     spin_unlock(&d->event_lock);
     spin_unlock(&pcidevs_lock);
-    if ( (ret != 0) && (type == MAP_PIRQ_TYPE_MSI) && (*index == -1) )
-        destroy_irq(irq);
+    if ( ret != 0 )
+        switch ( type )
+        {
+        case MAP_PIRQ_TYPE_MSI:
+            if ( *index == -1 )
+        case MAP_PIRQ_TYPE_MULTI_MSI:
+                destroy_irq(irq);
+            break;
+        }
  free_domain:
     rcu_unlock_domain(d);
     return ret;
@@ -390,14 +424,22 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H
         if ( copy_from_guest(&map, arg, 1) != 0 )
             break;
 
-        if ( map.type == MAP_PIRQ_TYPE_MSI_SEG )
+        switch ( map.type )
         {
+        case MAP_PIRQ_TYPE_MSI_SEG:
             map.type = MAP_PIRQ_TYPE_MSI;
             msi.seg = map.bus >> 16;
-        }
-        else
-        {
+            break;
+
+        case MAP_PIRQ_TYPE_MULTI_MSI:
+            if ( map.table_base )
+                return -EINVAL;
+            msi.seg = map.bus >> 16;
+            break;
+
+        default:
             msi.seg = 0;
+            break;
         }
         msi.bus = map.bus;
         msi.devfn = map.devfn;
@@ -406,6 +448,8 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H
         ret = physdev_map_pirq(map.domid, map.type, &map.index, &map.pirq,
                                &msi);
 
+        if ( map.type == MAP_PIRQ_TYPE_MULTI_MSI )
+            map.entry_nr = msi.entry_nr;
         if ( __copy_to_guest(arg, &map, 1) )
             ret = -EFAULT;
         break;
--- a/xen/include/asm-x86/irq.h
+++ b/xen/include/asm-x86/irq.h
@@ -141,6 +141,7 @@ int map_domain_pirq(struct domain *d, in
                            void *data);
 int unmap_domain_pirq(struct domain *d, int pirq);
 int get_free_pirq(struct domain *d, int type);
+int get_free_pirqs(struct domain *, unsigned int nr);
 void free_domain_pirqs(struct domain *d);
 int map_domain_emuirq_pirq(struct domain *d, int pirq, int irq);
 int unmap_domain_pirq_emuirq(struct domain *d, int pirq);
--- a/xen/include/asm-x86/msi.h
+++ b/xen/include/asm-x86/msi.h
@@ -145,7 +145,7 @@ int msi_free_irq(struct msi_desc *entry)
 #define multi_msi_capable(control) \
 	(1 << ((control & PCI_MSI_FLAGS_QMASK) >> 1))
 #define multi_msi_enable(control, num) \
-	control |= (((num >> 1) << 4) & PCI_MSI_FLAGS_QSIZE);
+	control |= (((fls(num) - 1) << 4) & PCI_MSI_FLAGS_QSIZE);
 #define is_64bit_address(control)	(!!(control & PCI_MSI_FLAGS_64BIT))
 #define is_mask_bit_support(control)	(!!(control & PCI_MSI_FLAGS_MASKBIT))
 #define msi_enable(control, num) multi_msi_enable(control, num); \
--- a/xen/include/public/physdev.h
+++ b/xen/include/public/physdev.h
@@ -151,21 +151,22 @@ DEFINE_XEN_GUEST_HANDLE(physdev_irq_t);
 #define MAP_PIRQ_TYPE_GSI               0x1
 #define MAP_PIRQ_TYPE_UNKNOWN           0x2
 #define MAP_PIRQ_TYPE_MSI_SEG           0x3
+#define MAP_PIRQ_TYPE_MULTI_MSI         0x4
 
 #define PHYSDEVOP_map_pirq               13
 struct physdev_map_pirq {
     domid_t domid;
     /* IN */
     int type;
-    /* IN */
+    /* IN (ignored for ..._MULTI_MSI) */
     int index;
     /* IN or OUT */
     int pirq;
-    /* IN - high 16 bits hold segment for MAP_PIRQ_TYPE_MSI_SEG */
+    /* IN - high 16 bits hold segment for ..._MSI_SEG and ..._MULTI_MSI */
     int bus;
     /* IN */
     int devfn;
-    /* IN */
+    /* IN (also OUT for ..._MULTI_MSI) */
     int entry_nr;
     /* IN */
     uint64_t table_base;

[-- Attachment #3: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH 6/6] pciif: add multi-vector-MSI command
  2013-04-19 10:50 [PATCH 0/6] x86/IOMMU: multi-vector MSI Jan Beulich
                   ` (4 preceding siblings ...)
  2013-04-19 10:59 ` [PATCH 5/6] x86: enable " Jan Beulich
@ 2013-04-19 11:00 ` Jan Beulich
  2013-04-19 14:48 ` [PATCH 0/6] x86/IOMMU: multi-vector MSI Jan Beulich
  6 siblings, 0 replies; 20+ messages in thread
From: Jan Beulich @ 2013-04-19 11:00 UTC (permalink / raw)
  To: xen-devel
  Cc: Jacob Shin, xiantao.zhang, suravee.suthikulpanit, Konrad Rzeszutek Wilk

[-- Attachment #1: Type: text/plain, Size: 640 bytes --]

The requested vector count is to be passed in struct xen_pci_op's info
field. Upon failure, if a smaller vector count might work, the backend
will pass that smaller count in the value field (which so far is always
being set to zero in the error path).

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/include/public/io/pciif.h
+++ b/xen/include/public/io/pciif.h
@@ -46,6 +46,7 @@
 #define XEN_PCI_OP_aer_resume		(7)
 #define XEN_PCI_OP_aer_mmio		(8)
 #define XEN_PCI_OP_aer_slotreset	(9)
+#define XEN_PCI_OP_enable_multi_msi	(10)
 
 /* xen_pci_op error numbers */
 #define XEN_PCI_ERR_success          (0)




[-- Attachment #2: pciif-multi-vector-MSI.patch --]
[-- Type: text/plain, Size: 673 bytes --]

pciif: add multi-vector-MSI command

The requested vector count is to be passed in struct xen_pci_op's info
field. Upon failure, if a smaller vector count might work, the backend
will pass that smaller count in the value field (which so far is always
being set to zero in the error path).

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/include/public/io/pciif.h
+++ b/xen/include/public/io/pciif.h
@@ -46,6 +46,7 @@
 #define XEN_PCI_OP_aer_resume		(7)
 #define XEN_PCI_OP_aer_mmio		(8)
 #define XEN_PCI_OP_aer_slotreset	(9)
+#define XEN_PCI_OP_enable_multi_msi	(10)
 
 /* xen_pci_op error numbers */
 #define XEN_PCI_ERR_success          (0)

[-- Attachment #3: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 0/6] x86/IOMMU: multi-vector MSI
  2013-04-19 10:50 [PATCH 0/6] x86/IOMMU: multi-vector MSI Jan Beulich
                   ` (5 preceding siblings ...)
  2013-04-19 11:00 ` [PATCH 6/6] pciif: add multi-vector-MSI command Jan Beulich
@ 2013-04-19 14:48 ` Jan Beulich
  6 siblings, 0 replies; 20+ messages in thread
From: Jan Beulich @ 2013-04-19 14:48 UTC (permalink / raw)
  To: Konrad Rzeszutek Wilk; +Cc: xen-devel

[-- Attachment #1: Type: text/plain, Size: 666 bytes --]

>>> On 19.04.13 at 12:50, "Jan Beulich" <JBeulich@suse.com> wrote:
> 1: AMD IOMMU: allocate IRTE entries instead of using a static mapping
> 2: AMD IOMMU: untie remap and vector maps
> 3: VT-d: enable for multi-vector MSI
> 4: AMD IOMMU: enable for multi-vector MSI
> 5: x86: enable multi-vector MSI
> 6: pciif: add multi-vector-MSI command

For reference I'm also sending our kernel side code, as just
discussed on irc. I'm also including the fragment that I created
for pv-ops, which I stopped when seeing that I'd need to play
with xen_bind_pirq_msi_to_irq() in any case.

And please recall - the hypervisor side works only on VT-d so far.

Jan


[-- Attachment #2: xen3-patch-3.9-rc5-multi-vector-MSI-pt.pvops --]
[-- Type: application/octet-stream, Size: 3259 bytes --]

This is incomplete in that
- xen_bind_pirq_msi_to_irq() would need to be passed the vector count from
  xen_setup_msi_irqs()
- it only deals with the PV DomU case so far

--- head.orig/arch/x86/include/asm/xen/pci.h	2011-10-24 09:10:05.000000000 +0200
+++ head/arch/x86/include/asm/xen/pci.h	2013-04-19 15:40:49.000000000 +0200
@@ -44,7 +44,7 @@ static inline int xen_unregister_device_
  * its own functions.
  */
 struct xen_pci_frontend_ops {
-	int (*enable_msi)(struct pci_dev *dev, int vectors[]);
+	int (*enable_msi)(struct pci_dev *dev, int vectors[], int nvec);
 	void (*disable_msi)(struct pci_dev *dev);
 	int (*enable_msix)(struct pci_dev *dev, int vectors[], int nvec);
 	void (*disable_msix)(struct pci_dev *dev);
@@ -53,10 +53,10 @@ struct xen_pci_frontend_ops {
 extern struct xen_pci_frontend_ops *xen_pci_frontend;
 
 static inline int xen_pci_frontend_enable_msi(struct pci_dev *dev,
-					      int vectors[])
+					      int vectors[], int nvec)
 {
 	if (xen_pci_frontend && xen_pci_frontend->enable_msi)
-		return xen_pci_frontend->enable_msi(dev, vectors);
+		return xen_pci_frontend->enable_msi(dev, vectors, nvec);
 	return -ENODEV;
 }
 static inline void xen_pci_frontend_disable_msi(struct pci_dev *dev)
--- head.orig/arch/x86/pci/xen.c	2013-03-27 12:13:14.000000000 +0100
+++ head/arch/x86/pci/xen.c	2013-04-19 15:47:24.000000000 +0200
@@ -162,17 +162,15 @@ static int xen_setup_msi_irqs(struct pci
 	struct msi_desc *msidesc;
 	int *v;
 
-	if (type == PCI_CAP_ID_MSI && nvec > 1)
-		return 1;
-
-	v = kzalloc(sizeof(int) * max(1, nvec), GFP_KERNEL);
+        nvec = max(1, nvec);
+	v = kcalloc(sizeof(int), nvec, GFP_KERNEL);
 	if (!v)
 		return -ENOMEM;
 
 	if (type == PCI_CAP_ID_MSIX)
 		ret = xen_pci_frontend_enable_msix(dev, v, nvec);
 	else
-		ret = xen_pci_frontend_enable_msi(dev, v);
+		ret = xen_pci_frontend_enable_msi(dev, v, nvec);
 	if (ret)
 		goto error;
 	i = 0;
--- head.orig/drivers/pci/xen-pcifront.c	2013-02-19 00:58:34.000000000 +0100
+++ head/drivers/pci/xen-pcifront.c	2013-04-19 15:54:05.000000000 +0200
@@ -318,27 +318,35 @@ static void pci_frontend_disable_msix(st
 		dev_err(&dev->dev, "pci_disable_msix get err %x\n", err);
 }
 
-static int pci_frontend_enable_msi(struct pci_dev *dev, int vector[])
+static int pci_frontend_enable_msi(struct pci_dev *dev, int vector[], int nvec)
 {
 	int err;
 	struct xen_pci_op op = {
-		.cmd    = XEN_PCI_OP_enable_msi,
+		.cmd    = nvec > 1 ? XEN_PCI_OP_enable_multi_msi
+				   : XEN_PCI_OP_enable_msi,
 		.domain = pci_domain_nr(dev->bus),
 		.bus = dev->bus->number,
 		.devfn = dev->devfn,
+		.info = nvec,
 	};
 	struct pcifront_sd *sd = dev->bus->sysdata;
 	struct pcifront_device *pdev = pcifront_get_pdev(sd);
 
 	err = do_pci_op(pdev, &op);
 	if (likely(!err)) {
-		vector[0] = op.value;
+		unsigned int i;
+
+		for (i = 0; i < nvec; ++i)
+			vector[i] = op.value + i;
 		if (op.value <= 0) {
 			dev_warn(&dev->dev, "MSI entry is invalid: %d!\n",
 				op.value);
 			err = -EINVAL;
-			vector[0] = -1;
+			for (i = 0; i < nvec; ++i)
+				vector[i] = -1;
 		}
+	} else if (nvec > 1) {
+		err = op.info > 1 && op.info < nvec ? op.info : 1;
 	} else {
 		dev_err(&dev->dev, "pci frontend enable msi failed for dev "
 				    "%x:%x\n", op.bus, op.devfn);

[-- Attachment #3: xen3-patch-3.9-rc5-multi-vector-MSI --]
[-- Type: application/octet-stream, Size: 15103 bytes --]

--- head.orig/drivers/pci/msi-xen.c	2013-03-25 09:13:58.000000000 +0100
+++ head/drivers/pci/msi-xen.c	2013-04-05 17:26:35.000000000 +0200
@@ -27,12 +27,13 @@
 #include "pci.h"
 #include "msi.h"
 
-static int pci_msi_enable = 1;
+static bool pci_msi_enable = true;
 #if CONFIG_XEN_COMPAT < 0x040200
-static int pci_seg_supported = 1;
+static bool pci_seg_supported = true;
 #else
-#define pci_seg_supported 1
+#define pci_seg_supported true
 #endif
+static bool msi_multi_vec_supported = true;
 
 static LIST_HEAD(msi_dev_head);
 DEFINE_SPINLOCK(msi_dev_lock);
@@ -203,8 +204,8 @@ int unregister_msi_get_owner(int (*func)
 EXPORT_SYMBOL(unregister_msi_get_owner);
 #endif
 
-static int msi_unmap_pirq(struct pci_dev *dev, int pirq, domid_t owner,
-			  struct kobject *kobj)
+static void msi_unmap_pirq(struct pci_dev *dev, int pirq, unsigned int nr,
+			   domid_t owner, struct kobject *kobj)
 {
 	struct physdev_unmap_pirq unmap;
 	int rc;
@@ -217,10 +218,7 @@ static int msi_unmap_pirq(struct pci_dev
 		? pirq : evtchn_get_xen_pirq(pirq);
 
 	if ((rc = HYPERVISOR_physdev_op(PHYSDEVOP_unmap_pirq, &unmap)))
-		dev_warn(&dev->dev, "unmap irq %d failed\n", pirq);
-
-	if (rc < 0)
-		return rc;
+		dev_warn(&dev->dev, "unmap irq %d failed (%d)\n", pirq, rc);
 
 	/*
 	 * Its possible that we get into this path when populate_msi_sysfs()
@@ -233,9 +231,7 @@ static int msi_unmap_pirq(struct pci_dev
 	}
 
 	if (unmap.domid == DOMID_SELF)
-		evtchn_map_pirq(pirq, 0);
-
-	return 0;
+		evtchn_map_pirq(pirq, 0, nr);
 }
 
 static u64 find_table_base(struct pci_dev *dev, int pos)
@@ -264,7 +260,10 @@ static int msi_map_vector(struct pci_dev
 	int rc = -EINVAL;
 
 	map_irq.domid = domid;
-	map_irq.type = MAP_PIRQ_TYPE_MSI_SEG;
+	if (table_base || entry_nr <= 1)
+		map_irq.type = MAP_PIRQ_TYPE_MSI_SEG;
+	else
+		map_irq.type = MAP_PIRQ_TYPE_MULTI_MSI;
 	map_irq.index = -1;
 	map_irq.pirq = -1;
 	map_irq.bus = dev->bus->number | (pci_domain_nr(dev->bus) << 16);
@@ -274,6 +273,12 @@ static int msi_map_vector(struct pci_dev
 
 	if (pci_seg_supported)
 		rc = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq);
+	if ((rc == -EINVAL || rc == -EOPNOTSUPP)
+	    && map_irq.type == MAP_PIRQ_TYPE_MULTI_MSI
+	    && map_irq.entry_nr == entry_nr) {
+		msi_multi_vec_supported = false;
+		return rc;
+	}
 #if CONFIG_XEN_COMPAT < 0x040200
 	if (rc == -EINVAL && !pci_domain_nr(dev->bus)) {
 		map_irq.type = MAP_PIRQ_TYPE_MSI;
@@ -282,7 +287,7 @@ static int msi_map_vector(struct pci_dev
 		map_irq.bus = dev->bus->number;
 		rc = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq);
 		if (rc != -EINVAL)
-			pci_seg_supported = 0;
+			pci_seg_supported = false;
 	}
 #endif
 	if (rc)
@@ -296,20 +301,35 @@ static int msi_map_vector(struct pci_dev
 
 	BUG_ON(map_irq.pirq <= 0);
 
+	if (table_base || entry_nr <= 0)
+		entry_nr = 1;
+
 	/* If mapping of this particular MSI is on behalf of another domain,
 	 * we do not need to get an irq in dom0. This also implies:
 	 * dev->irq in dom0 will be 'Xen pirq' if this device belongs to
 	 * to another domain, and will be 'Linux irq' if it belongs to dom0.
 	 */
 	if (domid == DOMID_SELF) {
-		rc = evtchn_map_pirq(-1, map_irq.pirq);
-		dev_printk(KERN_DEBUG, &dev->dev,
-			   "irq %d (%d) for MSI/MSI-X\n",
-			   rc, map_irq.pirq);
+		rc = evtchn_map_pirq(-1, map_irq.pirq, entry_nr);
+		if (rc < 0 || entry_nr == 1)
+			dev_printk(KERN_DEBUG, &dev->dev,
+				   "irq %d (%d) for MSI/MSI-X\n",
+				   rc, map_irq.pirq);
+		else
+			dev_printk(KERN_DEBUG, &dev->dev,
+				   "irq %d (%d) ... %d (%d) for MSI\n",
+				   rc, map_irq.pirq, rc + entry_nr - 1,
+				   map_irq.pirq + entry_nr - 1);
 		return rc;
 	}
-	dev_printk(KERN_DEBUG, &dev->dev, "irq %d for dom%d MSI/MSI-X\n",
-		   map_irq.pirq, domid);
+	if (entry_nr == 1)
+		dev_printk(KERN_DEBUG, &dev->dev,
+			   "irq %d for dom%d MSI/MSI-X\n",
+			   map_irq.pirq, domid);
+	else
+		dev_printk(KERN_DEBUG, &dev->dev,
+			   "irq %d...%d for dom%d MSI\n",
+			   map_irq.pirq, map_irq.pirq + entry_nr - 1, domid);
 	return map_irq.pirq;
 }
 
@@ -518,9 +538,10 @@ static int msi_capability_init(struct pc
 
 	pci_read_config_word(dev, msi_control_reg(pos), &control);
 
-	pirq = msi_map_vector(dev, 0, 0, dev_entry->owner);
+	pirq = msi_map_vector(dev, nvec, 0, dev_entry->owner);
 	if (pirq < 0)
-		return -EBUSY;
+		return pirq;
+	dev_entry->e.entry_nr = -nvec;
 
 	/* Set MSI enabled bits	 */
 	pci_intx_for_msi(dev, 0);
@@ -603,7 +624,7 @@ static int msix_capability_init(struct p
 			list_for_each_entry(pirq_entry, &dev->msi_list, list)
 				if (pirq_entry->entry_nr == entries[i].entry)
 					break;
-			msi_unmap_pirq(dev, entries[j].vector,
+			msi_unmap_pirq(dev, entries[j].vector, 1,
 				       msi_dev_entry->owner,
 				       &pirq_entry->kobj);
 			detach_pirq_entry(entries[j].entry, msi_dev_entry);
@@ -700,7 +721,10 @@ int pci_enable_msi_block(struct pci_dev 
 	if (!pos)
 		return -EINVAL;
 	pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &msgctl);
-	maxvec = 1 /* XXX << ((msgctl & PCI_MSI_FLAGS_QMASK) >> 1) */;
+	if (msi_multi_vec_supported)
+		maxvec = 1 << ((msgctl & PCI_MSI_FLAGS_QMASK) >> 1);
+	else
+		maxvec = 1;
 	if (nvec > maxvec)
 		return maxvec;
 
@@ -712,12 +736,15 @@ int pci_enable_msi_block(struct pci_dev 
 #ifdef CONFIG_XEN_PCIDEV_FRONTEND
 		int ret;
 
+		if (nvec > 1) return 1; /* XXX */
+
 		temp = dev->irq;
 		ret = pci_frontend_enable_msi(dev);
 		if (ret)
 			return ret;
 
-		dev->irq = evtchn_map_pirq(-1, dev->irq);
+		dev->irq = evtchn_map_pirq(-1, dev->irq, nvec);
+		msi_dev_entry->e.entry_nr = -nvec;
 		dev->msi_enabled = 1;
 		msi_dev_entry->default_irq = temp;
 		populate_msi_sysfs(dev);
@@ -739,6 +766,8 @@ int pci_enable_msi_block(struct pci_dev 
 	status = msi_capability_init(dev, nvec);
 	if ( !status )
 		msi_dev_entry->default_irq = temp;
+	else if (nvec > 1)
+		status = 1;
 
 	return status;
 }
@@ -781,7 +810,7 @@ void pci_msi_shutdown(struct pci_dev *de
 
 	if (!is_initial_xendomain()) {
 #ifdef CONFIG_XEN_PCIDEV_FRONTEND
-		evtchn_map_pirq(dev->irq, 0);
+		evtchn_map_pirq(dev->irq, 0, -msi_dev_entry->e.entry_nr);
 		pci_frontend_disable_msi(dev);
 		dev->irq = msi_dev_entry->default_irq;
 		dev->msi_enabled = 0;
@@ -792,8 +821,8 @@ void pci_msi_shutdown(struct pci_dev *de
 	pirq = dev->irq;
 	/* Restore dev->irq to its default pin-assertion vector */
 	dev->irq = msi_dev_entry->default_irq;
-	msi_unmap_pirq(dev, pirq, msi_dev_entry->owner,
-		       &msi_dev_entry->e.kobj);
+	msi_unmap_pirq(dev, pirq, -msi_dev_entry->e.entry_nr,
+		       msi_dev_entry->owner, &msi_dev_entry->e.kobj);
 	msi_dev_entry->owner = DOMID_IO;
 	memset(&msi_dev_entry->e.kobj, 0, sizeof(msi_dev_entry->e.kobj));
 
@@ -884,7 +913,7 @@ int pci_enable_msix(struct pci_dev *dev,
 			}
 			if (mapped)
 				continue;
-			irq = evtchn_map_pirq(-1, entries[i].vector);
+			irq = evtchn_map_pirq(-1, entries[i].vector, 1);
 			attach_pirq_entry(irq, entries[i].entry, msi_dev_entry);
 			entries[i].vector = irq;
 		}
@@ -984,11 +1013,11 @@ void msi_remove_pci_irq_vectors(struct p
 	spin_lock_irqsave(&msi_dev_entry->pirq_list_lock, flags);
 	list_for_each_entry_safe(pirq_entry, tmp, &dev->msi_list, list) {
 		if (is_initial_xendomain())
-			msi_unmap_pirq(dev, pirq_entry->pirq,
+			msi_unmap_pirq(dev, pirq_entry->pirq, 1,
 				       msi_dev_entry->owner,
 				       &pirq_entry->kobj);
 		else
-			evtchn_map_pirq(pirq_entry->pirq, 0);
+			evtchn_map_pirq(pirq_entry->pirq, 0, 1);
 		list_del(&pirq_entry->list);
 		kfree(pirq_entry);
 	}
@@ -999,7 +1028,7 @@ void msi_remove_pci_irq_vectors(struct p
 
 void pci_no_msi(void)
 {
-	pci_msi_enable = 0;
+	pci_msi_enable = false;
 }
 
 /**
--- head.orig/drivers/xen/core/evtchn.c	2013-01-30 12:40:26.000000000 +0100
+++ head/drivers/xen/core/evtchn.c	2013-04-05 16:39:31.000000000 +0200
@@ -513,10 +513,11 @@ asmlinkage void __irq_entry evtchn_do_up
 }
 
 static int find_unbound_irq(unsigned int node, struct irq_cfg **pcfg,
-			    struct irq_chip *chip, bool percpu)
+			    struct irq_chip *chip, unsigned int nr)
 {
 	static int warned;
-	int irq;
+	unsigned int count = 0;
+	int irq, result = -ENOSPC;
 
 	for (irq = DYNIRQ_BASE; irq < nr_irqs; irq++) {
 		struct irq_cfg *cfg = alloc_irq_and_cfg_at(irq, node);
@@ -524,17 +525,23 @@ static int find_unbound_irq(unsigned int
 
 		if (unlikely(!cfg))
 			return -ENOMEM;
-		if (data->chip != &no_irq_chip &&
-		    data->chip != chip)
-			continue;
 
-		if (!cfg->bindcount) {
+		if ((data->chip == &no_irq_chip || data->chip == chip)
+		    && !cfg->bindcount) {
 			irq_flow_handler_t handle;
 			const char *name;
 
+			if (nr > 1) {
+				if (!count)
+					result = irq;
+				if (++count == nr)
+					break;
+				continue;
+			}
+
 			*pcfg = cfg;
 			irq_set_noprobe(irq);
-			if (!percpu) {
+			if (nr) {
 				handle = handle_fasteoi_irq;
 				name = "fasteoi";
 			} else {
@@ -545,6 +552,18 @@ static int find_unbound_irq(unsigned int
 						      handle, name);
 			return irq;
 		}
+		count = 0;
+		result = -ENOSPC;
+	}
+
+	if (nr > 1 && count == nr) {
+		BUG_ON(pcfg);
+		for (irq = result; count--; ++irq) {
+			irq_set_noprobe(irq);
+			irq_set_chip_and_handler_name(irq, chip,
+						      handle_fasteoi_irq, "fasteoi");
+		}
+		return result;
 	}
 
 	if (!warned) {
@@ -567,7 +586,7 @@ static int bind_caller_port_to_irq(unsig
 
 	if ((irq = evtchn_to_irq[caller_port]) == -1) {
 		if ((irq = find_unbound_irq(numa_node_id(), &cfg,
-					    &dynirq_chip, false)) < 0)
+					    &dynirq_chip, 1)) < 0)
 			goto out;
 
 		evtchn_to_irq[caller_port] = irq;
@@ -592,7 +611,7 @@ static int bind_local_port_to_irq(unsign
 	BUG_ON(evtchn_to_irq[local_port] != -1);
 
 	if ((irq = find_unbound_irq(numa_node_id(), &cfg, &dynirq_chip,
-				    false)) < 0) {
+				    1)) < 0) {
 		if (close_evtchn(local_port))
 			BUG();
 		goto out;
@@ -646,7 +665,7 @@ static int bind_virq_to_irq(unsigned int
 
 	if ((irq = per_cpu(virq_to_irq, cpu)[virq]) == -1) {
 		if ((irq = find_unbound_irq(cpu_to_node(cpu), &cfg,
-					    &dynirq_chip, false)) < 0)
+					    &dynirq_chip, 1)) < 0)
 			goto out;
 
 		bind_virq.virq = virq;
@@ -691,7 +710,7 @@ static int bind_ipi_to_irq(unsigned int 
 
 	if ((irq = per_cpu(ipi_to_irq, cpu)[ipi]) == -1) {
 		if ((irq = find_unbound_irq(cpu_to_node(cpu), &cfg,
-					    &dynirq_chip, false)) < 0)
+					    &dynirq_chip, 1)) < 0)
 			goto out;
 
 		bind_ipi.vcpu = cpu;
@@ -1022,7 +1041,7 @@ int bind_virq_to_irqaction(
 		BUG_ON(!retval);
 
 		if ((irq = find_unbound_irq(cpu_to_node(cpu), &cfg,
-					    &dynirq_chip, true)) < 0) {
+					    &dynirq_chip, 0)) < 0) {
 			virq_actions[virq] = cur->next;
 			spin_unlock(&irq_mapping_update_lock);
 			free_percpu_irqaction(new);
@@ -1122,7 +1141,7 @@ int __cpuinit bind_ipi_to_irqaction(
 
 	if (ipi_irq < 0) {
 		if ((ipi_irq = find_unbound_irq(cpu_to_node(cpu), &cfg,
-						&dynirq_chip, true)) < 0) {
+						&dynirq_chip, 0)) < 0) {
 			spin_unlock(&irq_mapping_update_lock);
 			return ipi_irq;
 		}
@@ -1873,19 +1892,28 @@ void evtchn_register_pirq(int irq)
 }
 
 #ifdef CONFIG_PCI_MSI
-int evtchn_map_pirq(int irq, int xen_pirq)
+int evtchn_map_pirq(int irq, unsigned int xen_pirq, unsigned int nr)
 {
 	if (irq < 0) {
 #ifdef CONFIG_SPARSE_IRQ
-		struct irq_cfg *cfg;
+		struct irq_cfg *cfg = NULL;
 
+		if (nr <= 0)
+			return -EINVAL;
 		spin_lock(&irq_mapping_update_lock);
-		irq = find_unbound_irq(numa_node_id(), &cfg, &pirq_chip,
-				       false);
+		irq = find_unbound_irq(numa_node_id(), nr == 1 ? &cfg : NULL,
+				       &pirq_chip, nr);
 		if (irq >= 0) {
-			BUG_ON(type_from_irq_cfg(cfg) != IRQT_UNBOUND);
-			cfg->bindcount++;
-			cfg->info = mk_irq_info(IRQT_PIRQ, xen_pirq, 0);
+			unsigned int i;
+
+			for (i = 0; i < nr; ++i) {
+				if (!cfg || i)
+					cfg = irq_cfg(irq + i);
+				BUG_ON(type_from_irq_cfg(cfg) != IRQT_UNBOUND);
+				cfg->bindcount++;
+				cfg->info = mk_irq_info(IRQT_PIRQ,
+							xen_pirq + i, 0);
+			}
 		}
 		spin_unlock(&irq_mapping_update_lock);
 		if (irq < 0)
@@ -1896,6 +1924,8 @@ int evtchn_map_pirq(int irq, int xen_pir
 #else
 		static DEFINE_SPINLOCK(irq_alloc_lock);
 
+		if (nr > 1)
+			return -EOPNOTSUPP;
 		irq = PIRQ_BASE + nr_pirqs - 1;
 		spin_lock(&irq_alloc_lock);
 		do {
@@ -1922,29 +1952,37 @@ int evtchn_map_pirq(int irq, int xen_pir
 					      handle_fasteoi_irq, "fasteoi");
 #endif
 	} else if (!xen_pirq) {
-		struct irq_cfg *cfg = irq_cfg(irq);
+		while (nr--) {
+			struct irq_cfg *cfg = irq_cfg(irq + nr);
 
-		if (!cfg || unlikely(type_from_irq_cfg(cfg) != IRQT_PIRQ))
-			return -EINVAL;
-		/*
-		 * dynamic_irq_cleanup(irq) would seem to be the correct thing
-		 * here, but cannot be used as we get here also during shutdown
-		 * when a driver didn't free_irq() its MSI(-X) IRQ(s), which
-		 * then causes a warning in dynamic_irq_cleanup().
-		 */
-		irq_set_chip_and_handler(irq, NULL, NULL);
-		cfg->info = IRQ_UNBOUND;
+			if (!cfg
+			    || unlikely(type_from_irq_cfg(cfg) != IRQT_PIRQ))
+				return -EINVAL;
+			/*
+			 * dynamic_irq_cleanup(irq) would seem to be the
+			 * correct thing here, but cannot be used as we get
+			 * here also during shutdown when a driver didn't
+			 * free_irq() its MSI(-X) IRQ(s), which then causes
+			 * a warning in dynamic_irq_cleanup().
+			 */
+			irq_set_chip_and_handler(irq, NULL, NULL);
+			cfg->info = IRQ_UNBOUND;
 #ifdef CONFIG_SPARSE_IRQ
-		cfg->bindcount--;
+			cfg->bindcount--;
 #endif
+		}
 		return 0;
-	} else if (type_from_irq(irq) != IRQT_PIRQ
-		   || index_from_irq(irq) != xen_pirq) {
-		pr_err("IRQ#%d is already mapped to %d:%u - "
-		       "cannot map to PIRQ#%u\n",
-		       irq, type_from_irq(irq), index_from_irq(irq), xen_pirq);
-		return -EINVAL;
-	}
+	} else
+		while (nr--) {
+			if (type_from_irq(irq + nr) == IRQT_PIRQ
+			    && index_from_irq(irq + nr) == xen_pirq + nr)
+				continue;
+			pr_err("IRQ#%u is already mapped to %d:%u - "
+			       "cannot map to PIRQ#%u\n",
+			       irq + nr, type_from_irq(irq + nr),
+			       index_from_irq(irq + nr), xen_pirq + nr);
+			return -EINVAL;
+		}
 	return index_from_irq(irq) ? irq : -EINVAL;
 }
 #endif
--- head.orig/include/xen/evtchn.h	2012-02-10 13:35:11.000000000 +0100
+++ head/include/xen/evtchn.h	2013-04-05 15:51:09.000000000 +0200
@@ -149,7 +149,7 @@ asmlinkage void evtchn_do_upcall(struct 
 /* Mark a PIRQ as unavailable for dynamic allocation. */
 void evtchn_register_pirq(int irq);
 /* Map a Xen-supplied PIRQ to a dynamically allocated one. */
-int evtchn_map_pirq(int irq, int xen_pirq);
+int evtchn_map_pirq(int irq, unsigned int xen_pirq, unsigned int nr);
 /* Look up a Xen-supplied PIRQ for a dynamically allocated one. */
 int evtchn_get_xen_pirq(int irq);
 
--- head.orig/include/xen/interface/physdev.h	2013-04-03 10:55:46.000000000 +0200
+++ head/include/xen/interface/physdev.h	2013-04-05 14:51:35.000000000 +0200
@@ -151,6 +151,7 @@ DEFINE_XEN_GUEST_HANDLE(physdev_irq_t);
 #define MAP_PIRQ_TYPE_GSI		0x1
 #define MAP_PIRQ_TYPE_UNKNOWN		0x2
 #define MAP_PIRQ_TYPE_MSI_SEG		0x3
+#define MAP_PIRQ_TYPE_MULTI_MSI         0x4
 
 #define PHYSDEVOP_map_pirq		13
 struct physdev_map_pirq {

[-- Attachment #4: xen3-patch-3.9-rc5-multi-vector-MSI-pt --]
[-- Type: application/octet-stream, Size: 7572 bytes --]

--- head.orig/drivers/pci/msi-xen.c	2013-04-05 17:26:35.000000000 +0200
+++ head/drivers/pci/msi-xen.c	2013-04-11 16:18:44.000000000 +0200
@@ -23,6 +23,7 @@
 
 #include <xen/interface/physdev.h>
 #include <xen/evtchn.h>
+#include <xen/pcifront.h>
 
 #include "pci.h"
 #include "msi.h"
@@ -710,7 +711,6 @@ static int pci_msi_check_device(struct p
  * updates the @dev's irq member to the lowest new interrupt number; the
  * other interrupt numbers allocated to this device are consecutive.
  */
-extern int pci_frontend_enable_msi(struct pci_dev *dev);
 int pci_enable_msi_block(struct pci_dev *dev, unsigned int nvec)
 {
 	int temp, status, pos, maxvec;
@@ -736,10 +736,8 @@ int pci_enable_msi_block(struct pci_dev 
 #ifdef CONFIG_XEN_PCIDEV_FRONTEND
 		int ret;
 
-		if (nvec > 1) return 1; /* XXX */
-
 		temp = dev->irq;
-		ret = pci_frontend_enable_msi(dev);
+		ret = pci_frontend_enable_msi(dev, nvec);
 		if (ret)
 			return ret;
 
@@ -799,7 +797,6 @@ int pci_enable_msi_block_auto(struct pci
 }
 EXPORT_SYMBOL(pci_enable_msi_block_auto);
 
-extern void pci_frontend_disable_msi(struct pci_dev* dev);
 void pci_msi_shutdown(struct pci_dev *dev)
 {
 	int pirq, pos;
@@ -873,8 +870,6 @@ int pci_msix_table_size(struct pci_dev *
  * of irqs or MSI-X vectors available. Driver should use the returned value to
  * re-send its request.
  **/
-extern int pci_frontend_enable_msix(struct pci_dev *dev,
-		struct msix_entry *entries, int nvec);
 int pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, int nvec)
 {
 	int status, nr_entries;
@@ -959,7 +954,6 @@ int pci_enable_msix(struct pci_dev *dev,
 }
 EXPORT_SYMBOL(pci_enable_msix);
 
-extern void pci_frontend_disable_msix(struct pci_dev* dev);
 void pci_msix_shutdown(struct pci_dev *dev)
 {
 	if (!pci_msi_enable || !dev || !dev->msix_enabled)
--- head.orig/drivers/xen/pcifront/pci_op.c	2013-01-09 17:02:21.000000000 +0100
+++ head/drivers/xen/pcifront/pci_op.c	2013-04-11 16:13:59.000000000 +0200
@@ -364,14 +364,16 @@ void pci_frontend_disable_msix(struct pc
 		dev_err(&dev->dev, "disable MSI-X -> %d\n", err);
 }
 
-int pci_frontend_enable_msi(struct pci_dev *dev)
+int pci_frontend_enable_msi(struct pci_dev *dev, unsigned int nvec)
 {
 	int err;
 	struct xen_pci_op op = {
-		.cmd    = XEN_PCI_OP_enable_msi,
+		.cmd    = nvec > 1 ? XEN_PCI_OP_enable_multi_msi
+				   : XEN_PCI_OP_enable_msi,
 		.domain = pci_domain_nr(dev->bus),
 		.bus = dev->bus->number,
 		.devfn = dev->devfn,
+		.info = nvec,
 	};
 	struct pcifront_sd *sd = dev->bus->sysdata;
 	struct pcifront_device *pdev = pcifront_get_pdev(sd);
@@ -379,6 +381,8 @@ int pci_frontend_enable_msi(struct pci_d
 	err = do_pci_op(pdev, &op);
 	if (likely(!err))
 		dev->irq = op.value;
+	else if (nvec > 1)
+		err = op.info > 1 && op.info < nvec ? op.info : 1;
 	else {
 		dev_err(&dev->dev, "enable MSI -> %d\n", err);
 		err = -EINVAL;
--- head.orig/drivers/xen/pcifront/pcifront.h	2011-09-09 09:40:06.000000000 +0200
+++ head/drivers/xen/pcifront/pcifront.h	2013-04-11 16:00:38.000000000 +0200
@@ -8,7 +8,6 @@
 
 #include <linux/slab.h>
 #include <linux/spinlock.h>
-#include <linux/pci.h>
 #include <xen/xenbus.h>
 #include <xen/interface/io/pciif.h>
 #include <linux/interrupt.h>
@@ -54,4 +53,37 @@ void pcifront_do_aer(struct work_struct 
 
 irqreturn_t pcifront_handler_aer(int irq, void *dev);
 
+#ifndef __ia64__
+
+#define pcifront_sd pci_sysdata
+
+static inline struct pcifront_device *
+pcifront_get_pdev(struct pcifront_sd *sd)
+{
+	return sd->pdev;
+}
+
+static inline void pcifront_setup_root_resources(struct pci_bus *bus,
+						 struct pcifront_sd *sd)
+{
+}
+
+#else /* __ia64__ */
+
+#define pcifront_sd pci_controller
+
+static inline struct pcifront_device *
+pcifront_get_pdev(struct pcifront_sd *sd)
+{
+	return (struct pcifront_device *)sd->platform_data;
+}
+
+static inline void pcifront_setup_root_resources(struct pci_bus *bus,
+						 struct pcifront_sd *sd)
+{
+	xen_pcibios_setup_root_windows(bus, sd);
+}
+
+#endif /* __ia64__ */
+
 #endif	/* __XEN_PCIFRONT_H__ */
--- head.orig/drivers/xen/xen-pciback/pciback_ops.c	2013-03-21 15:26:21.000000000 +0100
+++ head/drivers/xen/xen-pciback/pciback_ops.c	2013-04-11 15:38:12.000000000 +0200
@@ -144,7 +144,8 @@ void xen_pcibk_reset_device(struct pci_d
 #ifdef CONFIG_PCI_MSI
 static
 int xen_pcibk_enable_msi(struct xen_pcibk_device *pdev,
-			 struct pci_dev *dev, struct xen_pci_op *op)
+			 struct pci_dev *dev, struct xen_pci_op *op,
+			 unsigned int nvec)
 {
 #ifndef CONFIG_XEN
 	struct xen_pcibk_dev_data *dev_data;
@@ -154,13 +155,13 @@ int xen_pcibk_enable_msi(struct xen_pcib
 	if (unlikely(verbose_request))
 		printk(KERN_DEBUG DRV_NAME ": %s: enable MSI\n", pci_name(dev));
 
-	status = pci_enable_msi(dev);
+	status = pci_enable_msi_block(dev, nvec);
 
 	if (status) {
 		pr_warn_ratelimited(DRV_NAME ": %s: error enabling MSI for guest %u: err %d\n",
 				    pci_name(dev), pdev->xdev->otherend_id,
 				    status);
-		op->value = 0;
+		op->value = status > 0 && status < nvec ? status : 0;
 		return XEN_PCI_ERR_op_failed;
 	}
 
@@ -364,7 +365,11 @@ void xen_pcibk_do_op(struct work_struct 
 			break;
 #ifdef CONFIG_PCI_MSI
 		case XEN_PCI_OP_enable_msi:
-			op->err = xen_pcibk_enable_msi(pdev, dev, op);
+			op->err = xen_pcibk_enable_msi(pdev, dev, op, 1);
+			break;
+		case XEN_PCI_OP_enable_multi_msi:
+			op->err = xen_pcibk_enable_msi(pdev, dev, op,
+						       op->info);
 			break;
 		case XEN_PCI_OP_disable_msi:
 			op->err = xen_pcibk_disable_msi(pdev, dev, op);
--- head.orig/include/xen/interface/io/pciif.h	2013-04-11 16:04:22.000000000 +0200
+++ head/include/xen/interface/io/pciif.h	2013-04-11 15:29:38.000000000 +0200
@@ -46,6 +46,7 @@
 #define	XEN_PCI_OP_aer_resume		(7)
 #define	XEN_PCI_OP_aer_mmio		(8)
 #define	XEN_PCI_OP_aer_slotreset	(9)
+#define	XEN_PCI_OP_enable_multi_msi	(10)
 
 /* xen_pci_op error numbers */
 #define	XEN_PCI_ERR_success		(0)
--- head.orig/include/xen/pcifront.h	2012-04-04 10:19:31.000000000 +0200
+++ head/include/xen/pcifront.h	2013-04-11 16:09:20.000000000 +0200
@@ -6,56 +6,26 @@
 #ifndef __XEN_ASM_PCIFRONT_H__
 #define __XEN_ASM_PCIFRONT_H__
 
-#include <linux/spinlock.h>
-
 #ifdef __KERNEL__
 
-#ifndef __ia64__
-
-#include <asm/pci.h>
+#include <linux/pci.h>
 
-struct pcifront_device;
-struct pci_bus;
-#define pcifront_sd pci_sysdata
-
-static inline struct pcifront_device *
-pcifront_get_pdev(struct pcifront_sd *sd)
-{
-	return sd->pdev;
-}
-
-static inline void pcifront_setup_root_resources(struct pci_bus *bus,
-						 struct pcifront_sd *sd)
-{
-}
+int pci_frontend_enable_msi(struct pci_dev *, unsigned int nvec);
+void pci_frontend_disable_msi(struct pci_dev *);
+int pci_frontend_enable_msix(struct pci_dev *, struct msix_entry *, int nvec);
+void pci_frontend_disable_msix(struct pci_dev *);
 
-#else /* __ia64__ */
+#ifdef __ia64__
 
 #include <linux/acpi.h>
-#include <asm/pci.h>
-#define pcifront_sd pci_controller
 
 extern void xen_add_resource(struct pci_controller *, unsigned int,
 			     unsigned int, struct acpi_resource *);
 extern void xen_pcibios_setup_root_windows(struct pci_bus *,
 					   struct pci_controller *);
 
-static inline struct pcifront_device *
-pcifront_get_pdev(struct pcifront_sd *sd)
-{
-	return (struct pcifront_device *)sd->platform_data;
-}
-
-static inline void pcifront_setup_root_resources(struct pci_bus *bus,
-						 struct pcifront_sd *sd)
-{
-	xen_pcibios_setup_root_windows(bus, sd);
-}
-
 #endif /* __ia64__ */
 
-extern struct rw_semaphore pci_bus_sem;
-
 #endif /* __KERNEL__ */
 
 #endif /* __XEN_ASM_PCIFRONT_H__ */

[-- Attachment #5: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 5/6] x86: enable multi-vector MSI
  2013-04-19 10:59 ` [PATCH 5/6] x86: enable " Jan Beulich
@ 2013-04-23  0:55   ` Suravee Suthikulanit
  2013-04-23  6:26     ` Jan Beulich
  0 siblings, 1 reply; 20+ messages in thread
From: Suravee Suthikulanit @ 2013-04-23  0:55 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Konrad Rzeszutek Wilk, Jacob Shin, xiantao.zhang, xen-devel

On 4/19/2013 5:59 AM, Jan Beulich wrote:
> --- a/xen/arch/x86/msi.c
> +++ b/xen/arch/x86/msi.c
> @@ -238,6 +238,11 @@ static int write_msi_msg(struct msi_desc
>           u8 bus = dev->bus;
>           u8 slot = PCI_SLOT(dev->devfn);
>           u8 func = PCI_FUNC(dev->devfn);
> +        int nr = entry->msi_attrib.entry_nr;
> +
> +        ASSERT((msg->data & (entry[-nr].msi.nvec - 1)) == nr);
> +        if ( nr )
> +            return 0;
>   
Jan,

This logic seems incorrect.  Do you meant to write --nr?  This causes 
assertion here.  Also, investigation showing the value of nr is 0 here.

Suravee

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 5/6] x86: enable multi-vector MSI
  2013-04-23  0:55   ` Suravee Suthikulanit
@ 2013-04-23  6:26     ` Jan Beulich
  2013-04-26 21:16       ` Suravee Suthikulanit
  0 siblings, 1 reply; 20+ messages in thread
From: Jan Beulich @ 2013-04-23  6:26 UTC (permalink / raw)
  To: Suravee Suthikulanit
  Cc: Konrad Rzeszutek Wilk, Jacob Shin, xiantao.zhang, xen-devel

>>> On 23.04.13 at 02:55, Suravee Suthikulanit <suravee.suthikulpanit@amd.com>
wrote:
> On 4/19/2013 5:59 AM, Jan Beulich wrote:
>> --- a/xen/arch/x86/msi.c
>> +++ b/xen/arch/x86/msi.c
>> @@ -238,6 +238,11 @@ static int write_msi_msg(struct msi_desc
>>           u8 bus = dev->bus;
>>           u8 slot = PCI_SLOT(dev->devfn);
>>           u8 func = PCI_FUNC(dev->devfn);
>> +        int nr = entry->msi_attrib.entry_nr;
>> +
>> +        ASSERT((msg->data & (entry[-nr].msi.nvec - 1)) == nr);
>> +        if ( nr )
>> +            return 0;
> 
> This logic seems incorrect.  Do you meant to write --nr?

No, this indeed has to be -nr (i.e. the "master" entry, which is the
first on in the array.

> This causes assertion here.  Also, investigation showing the
> value of nr is 0 here.

nr being 0 here is perfectly fine, meaning this is the first ("master")
entry of a multi-vector device (it can't be a single-vector one, as in
that case entry[0].msi.nvec == 1, i.e. the & yields zero regardless
of msg->data).

And the assertion should hold, due to

    *data = (msg->data & ~(INTREMAP_ENTRIES - 1)) | offset;

in update_intremap_entry_from_msi_msg(), and
alloc_intremap_entry() returning only aligned blocks.

So the question isn't just what value nr there has, but also what
the other involved values are.

Jan

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/6] AMD IOMMU: allocate IRTE entries instead of using a static mapping
  2013-04-19 10:57 ` [PATCH 1/6] AMD IOMMU: allocate IRTE entries instead of using a static mapping Jan Beulich
@ 2013-04-23 13:21   ` Suravee Suthikulanit
  2013-04-23 15:06     ` Suravee Suthikulanit
  2013-04-26 17:13   ` Suravee Suthikulanit
  1 sibling, 1 reply; 20+ messages in thread
From: Suravee Suthikulanit @ 2013-04-23 13:21 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Konrad Rzeszutek Wilk, Jacob Shin, xiantao.zhang, xen-devel

I am now reproducing the issue with the USB devices not working with 
this patch again.  I'll continue to investigate more.

Suravee

On 4/19/2013 5:57 AM, Jan Beulich wrote:
> For multi-vector MSI, where we surely don't want to allocate
> contiguous vectors and be able to set affinities of the individual
> vectors separately, we need to drop the use of the tuple of vector and
> delivery mode to determine the IRTE to use, and instead allocate IRTEs
> (which imo should have been done from the beginning).
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> ---
> One thing I surely need confirmation on is whether this
>
>          BUG_ON(get_ivrs_mappings(iommu->seg)[req_id].intremap_table !=
>                 get_ivrs_mappings(iommu->seg)[alias_id].intremap_table);
>
> in update_intremap_entry_from_msi_msg() is valid. If it isn't, it's not
> clear to me how to properly set up things for affected devices, as we
> would need an identical index allocated for two different remap table
> instances (which can hardly be expected to work out well).
>
> --- a/xen/drivers/passthrough/amd/iommu_acpi.c
> +++ b/xen/drivers/passthrough/amd/iommu_acpi.c
> @@ -72,12 +72,15 @@ static void __init add_ivrs_mapping_entr
>            /* allocate per-device interrupt remapping table */
>            if ( amd_iommu_perdev_intremap )
>                ivrs_mappings[alias_id].intremap_table =
> -                amd_iommu_alloc_intremap_table();
> +                amd_iommu_alloc_intremap_table(
> +                    &ivrs_mappings[alias_id].intremap_inuse);
>            else
>            {
>                if ( shared_intremap_table == NULL  )
> -                 shared_intremap_table = amd_iommu_alloc_intremap_table();
> +                 shared_intremap_table = amd_iommu_alloc_intremap_table(
> +                     &shared_intremap_inuse);
>                ivrs_mappings[alias_id].intremap_table = shared_intremap_table;
> +             ivrs_mappings[alias_id].intremap_inuse = shared_intremap_inuse;
>            }
>       }
>       /* assgin iommu hardware */
> @@ -671,7 +674,7 @@ static u16 __init parse_ivhd_device_spec
>               if ( IO_APIC_ID(apic) != special->handle )
>                   continue;
>   
> -            if ( ioapic_sbdf[special->handle].pin_setup )
> +            if ( ioapic_sbdf[special->handle].pin_2_idx )
>               {
>                   if ( ioapic_sbdf[special->handle].bdf == bdf &&
>                        ioapic_sbdf[special->handle].seg == seg )
> @@ -691,14 +694,16 @@ static u16 __init parse_ivhd_device_spec
>                   ioapic_sbdf[special->handle].bdf = bdf;
>                   ioapic_sbdf[special->handle].seg = seg;
>   
> -                ioapic_sbdf[special->handle].pin_setup = xzalloc_array(
> -                    unsigned long, BITS_TO_LONGS(nr_ioapic_entries[apic]));
> +                ioapic_sbdf[special->handle].pin_2_idx = xmalloc_array(
> +                    u16, nr_ioapic_entries[apic]);
>                   if ( nr_ioapic_entries[apic] &&
> -                     !ioapic_sbdf[IO_APIC_ID(apic)].pin_setup )
> +                     !ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx )
>                   {
>                       printk(XENLOG_ERR "IVHD Error: Out of memory\n");
>                       return 0;
>                   }
> +                memset(ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx, -1,
> +                       nr_ioapic_entries[apic]);
>               }
>               break;
>           }
> @@ -926,7 +931,7 @@ static int __init parse_ivrs_table(struc
>       for ( apic = 0; !error && iommu_intremap && apic < nr_ioapics; ++apic )
>       {
>           if ( !nr_ioapic_entries[apic] ||
> -             ioapic_sbdf[IO_APIC_ID(apic)].pin_setup )
> +             ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx )
>               continue;
>   
>           printk(XENLOG_ERR "IVHD Error: no information for IO-APIC %#x\n",
> @@ -935,13 +940,15 @@ static int __init parse_ivrs_table(struc
>               error = -ENXIO;
>           else
>           {
> -            ioapic_sbdf[IO_APIC_ID(apic)].pin_setup = xzalloc_array(
> -                unsigned long, BITS_TO_LONGS(nr_ioapic_entries[apic]));
> -            if ( !ioapic_sbdf[IO_APIC_ID(apic)].pin_setup )
> +            ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx = xmalloc_array(
> +                u16, nr_ioapic_entries[apic]);
> +            if ( !ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx )
>               {
>                   printk(XENLOG_ERR "IVHD Error: Out of memory\n");
>                   error = -ENOMEM;
>               }
> +            memset(ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx, -1,
> +                   nr_ioapic_entries[apic]);
>           }
>       }
>   
> --- a/xen/drivers/passthrough/amd/iommu_intr.c
> +++ b/xen/drivers/passthrough/amd/iommu_intr.c
> @@ -31,6 +31,7 @@
>   struct ioapic_sbdf ioapic_sbdf[MAX_IO_APICS];
>   struct hpet_sbdf hpet_sbdf;
>   void *shared_intremap_table;
> +unsigned long *shared_intremap_inuse;
>   static DEFINE_SPINLOCK(shared_intremap_lock);
>   
>   static spinlock_t* get_intremap_lock(int seg, int req_id)
> @@ -46,30 +47,31 @@ static int get_intremap_requestor_id(int
>       return get_ivrs_mappings(seg)[bdf].dte_requestor_id;
>   }
>   
> -static int get_intremap_offset(u8 vector, u8 dm)
> +static unsigned int alloc_intremap_entry(int seg, int bdf)
>   {
> -    int offset = 0;
> -    offset = (dm << INT_REMAP_INDEX_DM_SHIFT) & INT_REMAP_INDEX_DM_MASK;
> -    offset |= (vector << INT_REMAP_INDEX_VECTOR_SHIFT ) &
> -        INT_REMAP_INDEX_VECTOR_MASK;
> -    return offset;
> +    unsigned long *inuse = get_ivrs_mappings(seg)[bdf].intremap_inuse;
> +    unsigned int slot = find_first_zero_bit(inuse, INTREMAP_ENTRIES);
> +
> +    if ( slot < INTREMAP_ENTRIES )
> +        __set_bit(slot, inuse);
> +    return slot;
>   }
>   
> -static u8 *get_intremap_entry(int seg, int bdf, int offset)
> +static u32 *get_intremap_entry(int seg, int bdf, int offset)
>   {
> -    u8 *table;
> +    u32 *table = get_ivrs_mappings(seg)[bdf].intremap_table;
>   
> -    table = (u8*)get_ivrs_mappings(seg)[bdf].intremap_table;
>       ASSERT( (table != NULL) && (offset < INTREMAP_ENTRIES) );
>   
> -    return (u8*) (table + offset);
> +    return table + offset;
>   }
>   
>   static void free_intremap_entry(int seg, int bdf, int offset)
>   {
> -    u32* entry;
> -    entry = (u32*)get_intremap_entry(seg, bdf, offset);
> +    u32 *entry = get_intremap_entry(seg, bdf, offset);
> +
>       memset(entry, 0, sizeof(u32));
> +    __clear_bit(offset, get_ivrs_mappings(seg)[bdf].intremap_inuse);
>   }
>   
>   static void update_intremap_entry(u32* entry, u8 vector, u8 int_type,
> @@ -98,18 +100,24 @@ static void update_intremap_entry(u32* e
>                               INT_REMAP_ENTRY_VECTOR_SHIFT, entry);
>   }
>   
> -static void update_intremap_entry_from_ioapic(
> +static void set_rte_index(struct IO_APIC_route_entry *rte, int offset)
> +{
> +    rte->vector = (u8)offset;
> +    rte->delivery_mode = offset >> 8;
> +}
> +
> +static int update_intremap_entry_from_ioapic(
>       int bdf,
>       struct amd_iommu *iommu,
> -    const struct IO_APIC_route_entry *rte,
> -    const struct IO_APIC_route_entry *old_rte)
> +    struct IO_APIC_route_entry *rte,
> +    u16 *index)
>   {
>       unsigned long flags;
>       u32* entry;
>       u8 delivery_mode, dest, vector, dest_mode;
>       int req_id;
>       spinlock_t *lock;
> -    int offset;
> +    unsigned int offset;
>   
>       req_id = get_intremap_requestor_id(iommu->seg, bdf);
>       lock = get_intremap_lock(iommu->seg, req_id);
> @@ -121,16 +129,20 @@ static void update_intremap_entry_from_i
>   
>       spin_lock_irqsave(lock, flags);
>   
> -    offset = get_intremap_offset(vector, delivery_mode);
> -    if ( old_rte )
> +    offset = *index;
> +    if ( offset >= INTREMAP_ENTRIES )
>       {
> -        int old_offset = get_intremap_offset(old_rte->vector,
> -                                             old_rte->delivery_mode);
> -
> -        if ( offset != old_offset )
> -            free_intremap_entry(iommu->seg, bdf, old_offset);
> +        offset = alloc_intremap_entry(iommu->seg, req_id);
> +        if ( offset >= INTREMAP_ENTRIES )
> +        {
> +            spin_unlock_irqrestore(lock, flags);
> +            rte->mask = 1;
> +            return -ENOSPC;
> +        }
> +        *index = offset;
>       }
> -    entry = (u32*)get_intremap_entry(iommu->seg, req_id, offset);
> +
> +    entry = get_intremap_entry(iommu->seg, req_id, offset);
>       update_intremap_entry(entry, vector, delivery_mode, dest_mode, dest);
>   
>       spin_unlock_irqrestore(lock, flags);
> @@ -141,6 +153,10 @@ static void update_intremap_entry_from_i
>           amd_iommu_flush_intremap(iommu, req_id);
>           spin_unlock_irqrestore(&iommu->lock, flags);
>       }
> +
> +    set_rte_index(rte, offset);
> +
> +    return 0;
>   }
>   
>   int __init amd_iommu_setup_ioapic_remapping(void)
> @@ -153,7 +169,7 @@ int __init amd_iommu_setup_ioapic_remapp
>       u16 seg, bdf, req_id;
>       struct amd_iommu *iommu;
>       spinlock_t *lock;
> -    int offset;
> +    unsigned int offset;
>   
>       /* Read ioapic entries and update interrupt remapping table accordingly */
>       for ( apic = 0; apic < nr_ioapics; apic++ )
> @@ -184,19 +200,23 @@ int __init amd_iommu_setup_ioapic_remapp
>               dest = rte.dest.logical.logical_dest;
>   
>               spin_lock_irqsave(lock, flags);
> -            offset = get_intremap_offset(vector, delivery_mode);
> -            entry = (u32*)get_intremap_entry(iommu->seg, req_id, offset);
> +            offset = alloc_intremap_entry(seg, req_id);
> +            BUG_ON(offset >= INTREMAP_ENTRIES);
> +            entry = get_intremap_entry(iommu->seg, req_id, offset);
>               update_intremap_entry(entry, vector,
>                                     delivery_mode, dest_mode, dest);
>               spin_unlock_irqrestore(lock, flags);
>   
> +            set_rte_index(&rte, offset);
> +            ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx[pin] = offset;
> +            __ioapic_write_entry(apic, pin, 1, rte);
> +
>               if ( iommu->enabled )
>               {
>                   spin_lock_irqsave(&iommu->lock, flags);
>                   amd_iommu_flush_intremap(iommu, req_id);
>                   spin_unlock_irqrestore(&iommu->lock, flags);
>               }
> -            set_bit(pin, ioapic_sbdf[IO_APIC_ID(apic)].pin_setup);
>           }
>       }
>       return 0;
> @@ -209,7 +229,7 @@ void amd_iommu_ioapic_update_ire(
>       struct IO_APIC_route_entry new_rte = { 0 };
>       unsigned int rte_lo = (reg & 1) ? reg - 1 : reg;
>       unsigned int pin = (reg - 0x10) / 2;
> -    int saved_mask, seg, bdf;
> +    int saved_mask, seg, bdf, rc;
>       struct amd_iommu *iommu;
>   
>       if ( !iommu_intremap )
> @@ -247,7 +267,7 @@ void amd_iommu_ioapic_update_ire(
>       }
>   
>       if ( new_rte.mask &&
> -         !test_bit(pin, ioapic_sbdf[IO_APIC_ID(apic)].pin_setup) )
> +         ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx[pin] >= INTREMAP_ENTRIES )
>       {
>           ASSERT(saved_mask);
>           __io_apic_write(apic, reg, value);
> @@ -262,14 +282,19 @@ void amd_iommu_ioapic_update_ire(
>       }
>   
>       /* Update interrupt remapping entry */
> -    update_intremap_entry_from_ioapic(
> -        bdf, iommu, &new_rte,
> -        test_and_set_bit(pin,
> -                         ioapic_sbdf[IO_APIC_ID(apic)].pin_setup) ? &old_rte
> -                                                                  : NULL);
> +    rc = update_intremap_entry_from_ioapic(
> +             bdf, iommu, &new_rte,
> +             &ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx[pin]);
>   
> -    /* Forward write access to IO-APIC RTE */
> -    __io_apic_write(apic, reg, value);
> +    __io_apic_write(apic, reg, ((u32 *)&new_rte)[reg != rte_lo]);
> +
> +    if ( rc )
> +    {
> +        /* Keep the entry masked. */
> +        printk(XENLOG_ERR "Remapping IO-APIC %#x pin %u failed (%d)\n",
> +               IO_APIC_ID(apic), pin, rc);
> +        return;
> +    }
>   
>       /* For lower bits access, return directly to avoid double writes */
>       if ( reg == rte_lo )
> @@ -283,16 +308,41 @@ void amd_iommu_ioapic_update_ire(
>       }
>   }
>   
> -static void update_intremap_entry_from_msi_msg(
> +unsigned int amd_iommu_read_ioapic_from_ire(
> +    unsigned int apic, unsigned int reg)
> +{
> +    unsigned int val = __io_apic_read(apic, reg);
> +
> +    if ( !(reg & 1) )
> +    {
> +        unsigned int offset = val & (INTREMAP_ENTRIES - 1);
> +        u16 bdf = ioapic_sbdf[IO_APIC_ID(apic)].bdf;
> +        u16 seg = ioapic_sbdf[IO_APIC_ID(apic)].seg;
> +        u16 req_id = get_intremap_requestor_id(seg, bdf);
> +        const u32 *entry = get_intremap_entry(seg, req_id, offset);
> +
> +        val &= ~(INTREMAP_ENTRIES - 1);
> +        val |= get_field_from_reg_u32(*entry,
> +                                      INT_REMAP_ENTRY_INTTYPE_MASK,
> +                                      INT_REMAP_ENTRY_INTTYPE_SHIFT) << 8;
> +        val |= get_field_from_reg_u32(*entry,
> +                                      INT_REMAP_ENTRY_VECTOR_MASK,
> +                                      INT_REMAP_ENTRY_VECTOR_SHIFT);
> +    }
> +
> +    return val;
> +}
> +
> +static int update_intremap_entry_from_msi_msg(
>       struct amd_iommu *iommu, u16 bdf,
> -    int *remap_index, const struct msi_msg *msg)
> +    int *remap_index, const struct msi_msg *msg, u32 *data)
>   {
>       unsigned long flags;
>       u32* entry;
>       u16 req_id, alias_id;
>       u8 delivery_mode, dest, vector, dest_mode;
>       spinlock_t *lock;
> -    int offset;
> +    unsigned int offset;
>   
>       req_id = get_dma_requestor_id(iommu->seg, bdf);
>       alias_id = get_intremap_requestor_id(iommu->seg, bdf);
> @@ -303,15 +353,6 @@ static void update_intremap_entry_from_m
>           spin_lock_irqsave(lock, flags);
>           free_intremap_entry(iommu->seg, req_id, *remap_index);
>           spin_unlock_irqrestore(lock, flags);
> -
> -        if ( ( req_id != alias_id ) &&
> -             get_ivrs_mappings(iommu->seg)[alias_id].intremap_table != NULL )
> -        {
> -            lock = get_intremap_lock(iommu->seg, alias_id);
> -            spin_lock_irqsave(lock, flags);
> -            free_intremap_entry(iommu->seg, alias_id, *remap_index);
> -            spin_unlock_irqrestore(lock, flags);
> -        }
>           goto done;
>       }
>   
> @@ -322,16 +363,24 @@ static void update_intremap_entry_from_m
>       delivery_mode = (msg->data >> MSI_DATA_DELIVERY_MODE_SHIFT) & 0x1;
>       vector = (msg->data >> MSI_DATA_VECTOR_SHIFT) & MSI_DATA_VECTOR_MASK;
>       dest = (msg->address_lo >> MSI_ADDR_DEST_ID_SHIFT) & 0xff;
> -    offset = get_intremap_offset(vector, delivery_mode);
> -    if ( *remap_index < 0)
> +    offset = *remap_index;
> +    if ( offset >= INTREMAP_ENTRIES )
> +    {
> +        offset = alloc_intremap_entry(iommu->seg, bdf);
> +        if ( offset >= INTREMAP_ENTRIES )
> +        {
> +            spin_unlock_irqrestore(lock, flags);
> +            return -ENOSPC;
> +        }
>           *remap_index = offset;
> -    else
> -        BUG_ON(*remap_index != offset);
> +    }
>   
> -    entry = (u32*)get_intremap_entry(iommu->seg, req_id, offset);
> +    entry = get_intremap_entry(iommu->seg, req_id, offset);
>       update_intremap_entry(entry, vector, delivery_mode, dest_mode, dest);
>       spin_unlock_irqrestore(lock, flags);
>   
> +    *data = (msg->data & ~(INTREMAP_ENTRIES - 1)) | offset;
> +
>       /*
>        * In some special cases, a pci-e device(e.g SATA controller in IDE mode)
>        * will use alias id to index interrupt remapping table.
> @@ -343,10 +392,8 @@ static void update_intremap_entry_from_m
>       if ( ( req_id != alias_id ) &&
>            get_ivrs_mappings(iommu->seg)[alias_id].intremap_table != NULL )
>       {
> -        spin_lock_irqsave(lock, flags);
> -        entry = (u32*)get_intremap_entry(iommu->seg, alias_id, offset);
> -        update_intremap_entry(entry, vector, delivery_mode, dest_mode, dest);
> -        spin_unlock_irqrestore(lock, flags);
> +        BUG_ON(get_ivrs_mappings(iommu->seg)[req_id].intremap_table !=
> +               get_ivrs_mappings(iommu->seg)[alias_id].intremap_table);
>       }
>   
>   done:
> @@ -358,19 +405,22 @@ done:
>               amd_iommu_flush_intremap(iommu, alias_id);
>           spin_unlock_irqrestore(&iommu->lock, flags);
>       }
> +
> +    return 0;
>   }
>   
>   static struct amd_iommu *_find_iommu_for_device(int seg, int bdf)
>   {
> -    struct amd_iommu *iommu = find_iommu_for_device(seg, bdf);
> -
> -    if ( iommu )
> -        return iommu;
> +    struct amd_iommu *iommu;
>   
>       list_for_each_entry ( iommu, &amd_iommu_head, list )
>           if ( iommu->seg == seg && iommu->bdf == bdf )
>               return NULL;
>   
> +    iommu = find_iommu_for_device(seg, bdf);
> +    if ( iommu )
> +        return iommu;
> +
>       AMD_IOMMU_DEBUG("No IOMMU for MSI dev = %04x:%02x:%02x.%u\n",
>                       seg, PCI_BUS(bdf), PCI_SLOT(bdf), PCI_FUNC(bdf));
>       return ERR_PTR(-EINVAL);
> @@ -380,8 +430,9 @@ int amd_iommu_msi_msg_update_ire(
>       struct msi_desc *msi_desc, struct msi_msg *msg)
>   {
>       struct pci_dev *pdev = msi_desc->dev;
> -    int bdf, seg;
> +    int bdf, seg, rc;
>       struct amd_iommu *iommu;
> +    u32 data;
>   
>       bdf = pdev ? PCI_BDF2(pdev->bus, pdev->devfn) : hpet_sbdf.bdf;
>       seg = pdev ? pdev->seg : hpet_sbdf.seg;
> @@ -390,11 +441,12 @@ int amd_iommu_msi_msg_update_ire(
>       if ( IS_ERR_OR_NULL(iommu) )
>           return PTR_ERR(iommu);
>   
> -    if ( msi_desc->remap_index >= 0 )
> +    if ( msi_desc->remap_index >= 0 && !msg )
>       {
>           do {
>               update_intremap_entry_from_msi_msg(iommu, bdf,
> -                                               &msi_desc->remap_index, NULL);
> +                                               &msi_desc->remap_index,
> +                                               NULL, NULL);
>               if ( !pdev || !pdev->phantom_stride )
>                   break;
>               bdf += pdev->phantom_stride;
> @@ -409,19 +461,39 @@ int amd_iommu_msi_msg_update_ire(
>           return 0;
>   
>       do {
> -        update_intremap_entry_from_msi_msg(iommu, bdf, &msi_desc->remap_index,
> -                                           msg);
> -        if ( !pdev || !pdev->phantom_stride )
> +        rc = update_intremap_entry_from_msi_msg(iommu, bdf,
> +                                                &msi_desc->remap_index,
> +                                                msg, &data);
> +        if ( rc || !pdev || !pdev->phantom_stride )
>               break;
>           bdf += pdev->phantom_stride;
>       } while ( PCI_SLOT(bdf) == PCI_SLOT(pdev->devfn) );
>   
> -    return 0;
> +    msg->data = data;
> +    return rc;
>   }
>   
>   void amd_iommu_read_msi_from_ire(
>       struct msi_desc *msi_desc, struct msi_msg *msg)
>   {
> +    unsigned int offset = msg->data & (INTREMAP_ENTRIES - 1);
> +    const struct pci_dev *pdev = msi_desc->dev;
> +    u16 bdf = pdev ? PCI_BDF2(pdev->bus, pdev->devfn) : hpet_sbdf.bdf;
> +    u16 seg = pdev ? pdev->seg : hpet_sbdf.seg;
> +    const u32 *entry;
> +
> +    if ( IS_ERR_OR_NULL(_find_iommu_for_device(seg, bdf)) )
> +        return;
> +
> +    entry = get_intremap_entry(seg, get_dma_requestor_id(seg, bdf), offset);
> +
> +    msg->data &= ~(INTREMAP_ENTRIES - 1);
> +    msg->data |= get_field_from_reg_u32(*entry,
> +                                        INT_REMAP_ENTRY_INTTYPE_MASK,
> +                                        INT_REMAP_ENTRY_INTTYPE_SHIFT) << 8;
> +    msg->data |= get_field_from_reg_u32(*entry,
> +                                        INT_REMAP_ENTRY_VECTOR_MASK,
> +                                        INT_REMAP_ENTRY_VECTOR_SHIFT);
>   }
>   
>   int __init amd_iommu_free_intremap_table(
> @@ -438,12 +510,14 @@ int __init amd_iommu_free_intremap_table
>       return 0;
>   }
>   
> -void* __init amd_iommu_alloc_intremap_table(void)
> +void* __init amd_iommu_alloc_intremap_table(unsigned long **inuse_map)
>   {
>       void *tb;
>       tb = __alloc_amd_iommu_tables(INTREMAP_TABLE_ORDER);
>       BUG_ON(tb == NULL);
>       memset(tb, 0, PAGE_SIZE * (1UL << INTREMAP_TABLE_ORDER));
> +    *inuse_map = xzalloc_array(unsigned long, BITS_TO_LONGS(INTREMAP_ENTRIES));
> +    BUG_ON(*inuse_map == NULL);
>       return tb;
>   }
>   
> --- a/xen/drivers/passthrough/amd/pci_amd_iommu.c
> +++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c
> @@ -622,7 +622,7 @@ const struct iommu_ops amd_iommu_ops = {
>       .get_device_group_id = amd_iommu_group_id,
>       .update_ire_from_apic = amd_iommu_ioapic_update_ire,
>       .update_ire_from_msi = amd_iommu_msi_msg_update_ire,
> -    .read_apic_from_ire = __io_apic_read,
> +    .read_apic_from_ire = amd_iommu_read_ioapic_from_ire,
>       .read_msi_from_ire = amd_iommu_read_msi_from_ire,
>       .setup_hpet_msi = amd_setup_hpet_msi,
>       .suspend = amd_iommu_suspend,
> --- a/xen/include/asm-x86/amd-iommu.h
> +++ b/xen/include/asm-x86/amd-iommu.h
> @@ -119,6 +119,7 @@ struct ivrs_mappings {
>   
>       /* per device interrupt remapping table */
>       void *intremap_table;
> +    unsigned long *intremap_inuse;
>       spinlock_t intremap_lock;
>   
>       /* ivhd device data settings */
> --- a/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h
> +++ b/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h
> @@ -458,10 +458,6 @@
>   #define MAX_AMD_IOMMUS                  32
>   
>   /* interrupt remapping table */
> -#define INT_REMAP_INDEX_DM_MASK         0x1C00
> -#define INT_REMAP_INDEX_DM_SHIFT        10
> -#define INT_REMAP_INDEX_VECTOR_MASK     0x3FC
> -#define INT_REMAP_INDEX_VECTOR_SHIFT    2
>   #define INT_REMAP_ENTRY_REMAPEN_MASK    0x00000001
>   #define INT_REMAP_ENTRY_REMAPEN_SHIFT   0
>   #define INT_REMAP_ENTRY_SUPIOPF_MASK    0x00000002
> --- a/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
> +++ b/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
> @@ -89,10 +89,12 @@ struct amd_iommu *find_iommu_for_device(
>   
>   /* interrupt remapping */
>   int amd_iommu_setup_ioapic_remapping(void);
> -void *amd_iommu_alloc_intremap_table(void);
> +void *amd_iommu_alloc_intremap_table(unsigned long **);
>   int amd_iommu_free_intremap_table(u16 seg, struct ivrs_mappings *);
>   void amd_iommu_ioapic_update_ire(
>       unsigned int apic, unsigned int reg, unsigned int value);
> +unsigned int amd_iommu_read_ioapic_from_ire(
> +    unsigned int apic, unsigned int reg);
>   int amd_iommu_msi_msg_update_ire(
>       struct msi_desc *msi_desc, struct msi_msg *msg);
>   void amd_iommu_read_msi_from_ire(
> @@ -101,15 +103,17 @@ int amd_setup_hpet_msi(struct msi_desc *
>   
>   extern struct ioapic_sbdf {
>       u16 bdf, seg;
> -    unsigned long *pin_setup;
> +    u16 *pin_2_idx;
>   } ioapic_sbdf[MAX_IO_APICS];
> -extern void *shared_intremap_table;
>   
>   extern struct hpet_sbdf {
>       u16 bdf, seg, id;
>       struct amd_iommu *iommu;
>   } hpet_sbdf;
>   
> +extern void *shared_intremap_table;
> +extern unsigned long *shared_intremap_inuse;
> +
>   /* power management support */
>   void amd_iommu_resume(void);
>   void amd_iommu_suspend(void);
>
>

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/6] AMD IOMMU: allocate IRTE entries instead of using a static mapping
  2013-04-23 13:21   ` Suravee Suthikulanit
@ 2013-04-23 15:06     ` Suravee Suthikulanit
  2013-04-24 13:34       ` Jan Beulich
  0 siblings, 1 reply; 20+ messages in thread
From: Suravee Suthikulanit @ 2013-04-23 15:06 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Konrad Rzeszutek Wilk, Jacob Shin, xiantao.zhang, xen-devel

On 4/23/2013 8:21 AM, Suravee Suthikulanit wrote:
> I am now reproducing the issue with the USB devices not working with 
> this patch again.  I'll continue to investigate more.
>
> Suravee 
Ok, I have more update on the issue.  Bellow, I include the output from 
"xl debug-key i".  It is showing several IRQ having the same vector "b0".
This is not the case when booting with the xen w/o the patch.

On my system, IRQ 18 is for my USB keyboard.

Suravee

## INTERRUPT BINDING (i)
(XEN) Guest interrupt information:
(XEN)    IRQ:   0 affinity:1 vec:f0 type=IO-APIC-edge status=00000000 
timer_interrupt+0/0x18a
(XEN)    IRQ:   1 affinity:1 vec:30 type=IO-APIC-edge status=00000002 
mapped, unbound
(XEN)    IRQ:   3 affinity:1 vec:38 type=IO-APIC-edge status=00000002 
mapped, unbound
(XEN)    IRQ:   4 affinity:1 vec:40 type=IO-APIC-edge status=00000002 
mapped, unbound
(XEN)    IRQ:   5 affinity:f vec:48 type=IO-APIC-edge status=00000002 
mapped, unbound
(XEN)    IRQ:   6 affinity:1 vec:50 type=IO-APIC-edge status=00000002 
mapped, unbound
(XEN)    IRQ:   7 affinity:1 vec:58 type=IO-APIC-edge status=00000002 
mapped, unbound
(XEN)    IRQ:   8 affinity:1 vec:60 type=IO-APIC-edge status=00000010 
in-flight=0 domain-list=0:  8(----),
(XEN)    IRQ:   9 affinity:1 vec:68 type=IO-APIC-level status=00000010 
in-flight=0 domain-list=0:  9(----),
(XEN)    IRQ:  10 affinity:1 vec:70 type=IO-APIC-edge status=00000002 
mapped, unbound
(XEN)    IRQ:  11 affinity:1 vec:78 type=IO-APIC-edge status=00000002 
mapped, unbound
(XEN)    IRQ:  12 affinity:1 vec:88 type=IO-APIC-edge status=00000002 
mapped, unbound
(XEN)    IRQ:  13 affinity:f vec:90 type=IO-APIC-edge status=00000002 
mapped, unbound
(XEN)    IRQ:  14 affinity:1 vec:98 type=IO-APIC-edge status=00000002 
mapped, unbound
(XEN)    IRQ:  15 affinity:1 vec:a0 type=IO-APIC-edge status=00000002 
mapped, unbound
(XEN)    IRQ:  16 affinity:1 vec:b0 type=IO-APIC-level status=00000010 
in-flight=0 domain-list=0: 16(----),
(XEN)    IRQ:  17 affinity:1 vec:b8 type=IO-APIC-level status=00000010 
in-flight=0 domain-list=0: 17(----),
(XEN)    IRQ:  18 affinity:1 vec:a8 type=IO-APIC-level status=00000010 
in-flight=0 domain-list=0: 18(----),
(XEN)    IRQ:  19 affinity:f vec:d8 type=IO-APIC-level status=00000002 
mapped, unbound
(XEN)    IRQ:  24 affinity:1 vec:28 type=AMD-IOMMU-MSI status=00000000 
iommu_interrupt_handler+0/0x57
(XEN)    IRQ:  25 affinity:1 vec:c0 type=PCI-MSI status=00000010 
in-flight=0 domain-list=0:279(----),
(XEN)    IRQ:  26 affinity:1 vec:c8 type=PCI-MSI status=00000010 
in-flight=0 domain-list=0:278(----),
(XEN)    IRQ:  27 affinity:1 vec:d0 type=PCI-MSI status=00000010 
in-flight=0 domain-list=0:277(----),
(XEN)    IRQ:  28 affinity:1 vec:21 type=PCI-MSI status=00000010 
in-flight=0 domain-list=0:276(----),
(XEN)    IRQ:  29 affinity:1 vec:29 type=PCI-MSI/-X status=00000010 
in-flight=0 domain-list=0:275(----),
(XEN)    IRQ:  30 affinity:1 vec:31 type=PCI-MSI/-X status=00000010 
in-flight=0 domain-list=0:274(----),
(XEN)    IRQ:  31 affinity:1 vec:39 type=PCI-MSI/-X status=00000010 
in-flight=0 domain-list=0:273(----),
(XEN)    IRQ:  32 affinity:1 vec:41 type=PCI-MSI/-X status=00000010 
in-flight=0 domain-list=0:272(----),
(XEN)    IRQ:  33 affinity:1 vec:49 type=PCI-MSI/-X status=00000010 
in-flight=0 domain-list=0:271(----),
(XEN)    IRQ:  34 affinity:1 vec:51 type=PCI-MSI/-X status=00000010 
in-flight=0 domain-list=0:270(----),
(XEN)    IRQ:  35 affinity:1 vec:59 type=PCI-MSI status=00000010 
in-flight=0 domain-list=0:269(----),
(XEN)    IRQ:  36 affinity:1 vec:61 type=PCI-MSI status=00000010 
in-flight=0 domain-list=0:268(----),
(XEN)    IRQ:  37 affinity:1 vec:69 type=PCI-MSI status=00000010 
in-flight=0 domain-list=0:267(----),
(XEN)    IRQ:  38 affinity:1 vec:71 type=PCI-MSI status=00000010 
in-flight=0 domain-list=0:266(----),
(XEN) IO-APIC interrupt information:
(XEN)     IRQ  0 Vec240:
(XEN)       Apic 0x00, Pin  2: vec=f0 delivery=Fixed dest=L status=0 
polarity=0 irr=0 trig=E mask=0 dest_id:1
(XEN)     IRQ  1 Vec 48:
(XEN)       Apic 0x00, Pin  1: vec=b0 delivery=Fixed dest=L status=0 
polarity=0 irr=0 trig=E mask=0 dest_id:1
(XEN)     IRQ  3 Vec 56:
(XEN)       Apic 0x00, Pin  3: vec=38 delivery=Fixed dest=L status=0 
polarity=0 irr=0 trig=E mask=0 dest_id:1
(XEN)     IRQ  4 Vec 64:
(XEN)       Apic 0x00, Pin  4: vec=40 delivery=Fixed dest=L status=0 
polarity=0 irr=0 trig=E mask=0 dest_id:1
(XEN)     IRQ  5 Vec 72:
(XEN)       Apic 0x00, Pin  5: vec=48 delivery=LoPri dest=L status=0 
polarity=0 irr=0 trig=E mask=1 dest_id:15
(XEN)     IRQ  6 Vec 80:
(XEN)       Apic 0x00, Pin  6: vec=50 delivery=Fixed dest=L status=0 
polarity=0 irr=0 trig=E mask=0 dest_id:1
(XEN)     IRQ  7 Vec 88:
(XEN)       Apic 0x00, Pin  7: vec=58 delivery=Fixed dest=L status=0 
polarity=0 irr=0 trig=E mask=0 dest_id:1
(XEN)     IRQ  8 Vec 96:
(XEN)       Apic 0x00, Pin  8: vec=60 delivery=Fixed dest=L status=0 
polarity=0 irr=0 trig=E mask=0 dest_id:1
(XEN)     IRQ  9 Vec104:
(XEN)       Apic 0x00, Pin  9: vec=68 delivery=Fixed dest=L status=0 
polarity=1 irr=0 trig=L mask=0 dest_id:1
(XEN)     IRQ 10 Vec112:
(XEN)       Apic 0x00, Pin 10: vec=70 delivery=Fixed dest=L status=0 
polarity=0 irr=0 trig=E mask=0 dest_id:1
(XEN)     IRQ 11 Vec120:
(XEN)       Apic 0x00, Pin 11: vec=78 delivery=Fixed dest=L status=0 
polarity=0 irr=0 trig=E mask=0 dest_id:1
(XEN)     IRQ 12 Vec136:
(XEN)       Apic 0x00, Pin 12: vec=b0 delivery=Fixed dest=L status=0 
polarity=0 irr=0 trig=E mask=0 dest_id:1
(XEN)     IRQ 13 Vec144:
(XEN)       Apic 0x00, Pin 13: vec=b0 delivery=Fixed dest=L status=0 
polarity=0 irr=0 trig=E mask=1 dest_id:15
(XEN)     IRQ 14 Vec152:
(XEN)       Apic 0x00, Pin 14: vec=b0 delivery=Fixed dest=L status=0 
polarity=0 irr=0 trig=E mask=0 dest_id:1
(XEN)     IRQ 15 Vec160:
(XEN)       Apic 0x00, Pin 15: vec=b0 delivery=Fixed dest=L status=0 
polarity=0 irr=0 trig=E mask=0 dest_id:1
(XEN)     IRQ 16 Vec176:
(XEN)       Apic 0x00, Pin 16: vec=b0 delivery=Fixed dest=L status=0 
polarity=1 irr=0 trig=L mask=0 dest_id:1
(XEN)     IRQ 17 Vec184:
(XEN)       Apic 0x00, Pin 17: vec=b0 delivery=Fixed dest=L status=0 
polarity=1 irr=1 trig=L mask=0 dest_id:1
(XEN)     IRQ 18 Vec168:
(XEN)       Apic 0x00, Pin 18: vec=b0 delivery=Fixed dest=L status=0 
polarity=1 irr=1 trig=L mask=0 dest_id:1
(XEN)     IRQ 19 Vec216:
(XEN)       Apic 0x00, Pin 19: vec=b0 delivery=Fixed dest=L status=0 
polarity=1 irr=0 trig=L mask=1 dest_id:15

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/6] AMD IOMMU: allocate IRTE entries instead of using a static mapping
  2013-04-23 15:06     ` Suravee Suthikulanit
@ 2013-04-24 13:34       ` Jan Beulich
  2013-04-24 21:52         ` suravee suthikulpanit
  0 siblings, 1 reply; 20+ messages in thread
From: Jan Beulich @ 2013-04-24 13:34 UTC (permalink / raw)
  To: Suravee Suthikulanit
  Cc: Konrad Rzeszutek Wilk, Jacob Shin, xiantao.zhang, xen-devel

[-- Attachment #1: Type: text/plain, Size: 850 bytes --]

>>> On 23.04.13 at 17:06, Suravee Suthikulanit <suravee.suthikulpanit@amd.com> wrote:
> On 4/23/2013 8:21 AM, Suravee Suthikulanit wrote:
>> I am now reproducing the issue with the USB devices not working with 
>> this patch again.  I'll continue to investigate more.
>>
>> Suravee 
> Ok, I have more update on the issue.  Bellow, I include the output from 
> "xl debug-key i".  It is showing several IRQ having the same vector "b0".
> This is not the case when booting with the xen w/o the patch.

Right, and I spotted a bug in the respective code, but I can't
readily connect that bug to the behavior you observed (i.e. I
can't explain why the bad vector would be the same all the time).

Nevertheless, attached a fixed version of the first patch of
the most recent series - let's see how much of a difference this
makes.

Jan


[-- Attachment #2: AMD-IOMMU-irte-alloc.patch --]
[-- Type: text/plain, Size: 23364 bytes --]

AMD IOMMU: allocate IRTE entries instead of using a static mapping

For multi-vector MSI, where we surely don't want to allocate
contiguous vectors and be able to set affinities of the individual
vectors separately, we need to drop the use of the tuple of vector and
delivery mode to determine the IRTE to use, and instead allocate IRTEs
(which imo should have been done from the beginning).

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
One thing I surely need confirmation on is whether this

        BUG_ON(get_ivrs_mappings(iommu->seg)[req_id].intremap_table !=
               get_ivrs_mappings(iommu->seg)[alias_id].intremap_table);

in update_intremap_entry_from_msi_msg() is valid. If it isn't, it's not
clear to me how to properly set up things for affected devices, as we
would need an identical index allocated for two different remap table
instances (which can hardly be expected to work out well).

--- a/xen/drivers/passthrough/amd/iommu_acpi.c
+++ b/xen/drivers/passthrough/amd/iommu_acpi.c
@@ -72,12 +72,15 @@ static void __init add_ivrs_mapping_entr
          /* allocate per-device interrupt remapping table */
          if ( amd_iommu_perdev_intremap )
              ivrs_mappings[alias_id].intremap_table =
-                amd_iommu_alloc_intremap_table();
+                amd_iommu_alloc_intremap_table(
+                    &ivrs_mappings[alias_id].intremap_inuse);
          else
          {
              if ( shared_intremap_table == NULL  )
-                 shared_intremap_table = amd_iommu_alloc_intremap_table();
+                 shared_intremap_table = amd_iommu_alloc_intremap_table(
+                     &shared_intremap_inuse);
              ivrs_mappings[alias_id].intremap_table = shared_intremap_table;
+             ivrs_mappings[alias_id].intremap_inuse = shared_intremap_inuse;
          }
     }
     /* assgin iommu hardware */
@@ -671,7 +674,7 @@ static u16 __init parse_ivhd_device_spec
             if ( IO_APIC_ID(apic) != special->handle )
                 continue;
 
-            if ( ioapic_sbdf[special->handle].pin_setup )
+            if ( ioapic_sbdf[special->handle].pin_2_idx )
             {
                 if ( ioapic_sbdf[special->handle].bdf == bdf &&
                      ioapic_sbdf[special->handle].seg == seg )
@@ -691,14 +694,16 @@ static u16 __init parse_ivhd_device_spec
                 ioapic_sbdf[special->handle].bdf = bdf;
                 ioapic_sbdf[special->handle].seg = seg;
 
-                ioapic_sbdf[special->handle].pin_setup = xzalloc_array(
-                    unsigned long, BITS_TO_LONGS(nr_ioapic_entries[apic]));
+                ioapic_sbdf[special->handle].pin_2_idx = xmalloc_array(
+                    u16, nr_ioapic_entries[apic]);
                 if ( nr_ioapic_entries[apic] &&
-                     !ioapic_sbdf[IO_APIC_ID(apic)].pin_setup )
+                     !ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx )
                 {
                     printk(XENLOG_ERR "IVHD Error: Out of memory\n");
                     return 0;
                 }
+                memset(ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx, -1,
+                       nr_ioapic_entries[apic]);
             }
             break;
         }
@@ -926,7 +931,7 @@ static int __init parse_ivrs_table(struc
     for ( apic = 0; !error && iommu_intremap && apic < nr_ioapics; ++apic )
     {
         if ( !nr_ioapic_entries[apic] ||
-             ioapic_sbdf[IO_APIC_ID(apic)].pin_setup )
+             ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx )
             continue;
 
         printk(XENLOG_ERR "IVHD Error: no information for IO-APIC %#x\n",
@@ -935,13 +940,15 @@ static int __init parse_ivrs_table(struc
             error = -ENXIO;
         else
         {
-            ioapic_sbdf[IO_APIC_ID(apic)].pin_setup = xzalloc_array(
-                unsigned long, BITS_TO_LONGS(nr_ioapic_entries[apic]));
-            if ( !ioapic_sbdf[IO_APIC_ID(apic)].pin_setup )
+            ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx = xmalloc_array(
+                u16, nr_ioapic_entries[apic]);
+            if ( !ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx )
             {
                 printk(XENLOG_ERR "IVHD Error: Out of memory\n");
                 error = -ENOMEM;
             }
+            memset(ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx, -1,
+                   nr_ioapic_entries[apic]);
         }
     }
 
--- a/xen/drivers/passthrough/amd/iommu_intr.c
+++ b/xen/drivers/passthrough/amd/iommu_intr.c
@@ -31,6 +31,7 @@
 struct ioapic_sbdf ioapic_sbdf[MAX_IO_APICS];
 struct hpet_sbdf hpet_sbdf;
 void *shared_intremap_table;
+unsigned long *shared_intremap_inuse;
 static DEFINE_SPINLOCK(shared_intremap_lock);
 
 static spinlock_t* get_intremap_lock(int seg, int req_id)
@@ -46,30 +47,31 @@ static int get_intremap_requestor_id(int
     return get_ivrs_mappings(seg)[bdf].dte_requestor_id;
 }
 
-static int get_intremap_offset(u8 vector, u8 dm)
+static unsigned int alloc_intremap_entry(int seg, int bdf)
 {
-    int offset = 0;
-    offset = (dm << INT_REMAP_INDEX_DM_SHIFT) & INT_REMAP_INDEX_DM_MASK;
-    offset |= (vector << INT_REMAP_INDEX_VECTOR_SHIFT ) & 
-        INT_REMAP_INDEX_VECTOR_MASK;
-    return offset;
+    unsigned long *inuse = get_ivrs_mappings(seg)[bdf].intremap_inuse;
+    unsigned int slot = find_first_zero_bit(inuse, INTREMAP_ENTRIES);
+
+    if ( slot < INTREMAP_ENTRIES )
+        __set_bit(slot, inuse);
+    return slot;
 }
 
-static u8 *get_intremap_entry(int seg, int bdf, int offset)
+static u32 *get_intremap_entry(int seg, int bdf, int offset)
 {
-    u8 *table;
+    u32 *table = get_ivrs_mappings(seg)[bdf].intremap_table;
 
-    table = (u8*)get_ivrs_mappings(seg)[bdf].intremap_table;
     ASSERT( (table != NULL) && (offset < INTREMAP_ENTRIES) );
 
-    return (u8*) (table + offset);
+    return table + offset;
 }
 
 static void free_intremap_entry(int seg, int bdf, int offset)
 {
-    u32* entry;
-    entry = (u32*)get_intremap_entry(seg, bdf, offset);
+    u32 *entry = get_intremap_entry(seg, bdf, offset);
+
     memset(entry, 0, sizeof(u32));
+    __clear_bit(offset, get_ivrs_mappings(seg)[bdf].intremap_inuse);
 }
 
 static void update_intremap_entry(u32* entry, u8 vector, u8 int_type,
@@ -98,18 +100,30 @@ static void update_intremap_entry(u32* e
                             INT_REMAP_ENTRY_VECTOR_SHIFT, entry);
 }
 
-static void update_intremap_entry_from_ioapic(
+static inline int get_rte_index(const struct IO_APIC_route_entry *rte)
+{
+    return rte->vector | (rte->delivery_mode << 8);
+}
+
+static inline void set_rte_index(struct IO_APIC_route_entry *rte, int offset)
+{
+    rte->vector = (u8)offset;
+    rte->delivery_mode = offset >> 8;
+}
+
+static int update_intremap_entry_from_ioapic(
     int bdf,
     struct amd_iommu *iommu,
-    const struct IO_APIC_route_entry *rte,
-    const struct IO_APIC_route_entry *old_rte)
+    struct IO_APIC_route_entry *rte,
+    bool_t lo_update,
+    u16 *index)
 {
     unsigned long flags;
     u32* entry;
     u8 delivery_mode, dest, vector, dest_mode;
     int req_id;
     spinlock_t *lock;
-    int offset;
+    unsigned int offset;
 
     req_id = get_intremap_requestor_id(iommu->seg, bdf);
     lock = get_intremap_lock(iommu->seg, req_id);
@@ -121,16 +135,35 @@ static void update_intremap_entry_from_i
 
     spin_lock_irqsave(lock, flags);
 
-    offset = get_intremap_offset(vector, delivery_mode);
-    if ( old_rte )
+    offset = *index;
+    if ( offset >= INTREMAP_ENTRIES )
     {
-        int old_offset = get_intremap_offset(old_rte->vector,
-                                             old_rte->delivery_mode);
+        offset = alloc_intremap_entry(iommu->seg, req_id);
+        if ( offset >= INTREMAP_ENTRIES )
+        {
+            spin_unlock_irqrestore(lock, flags);
+            rte->mask = 1;
+            return -ENOSPC;
+        }
+        *index = offset;
+        lo_update = 1;
+    }
 
-        if ( offset != old_offset )
-            free_intremap_entry(iommu->seg, bdf, old_offset);
+    entry = get_intremap_entry(iommu->seg, req_id, offset);
+    if ( !lo_update )
+    {
+        /*
+         * Low half of incoming RTE is already in remapped format,
+         * so need to recover vector and delivery mode from IRTE.
+         */
+        ASSERT(get_rte_index(rte) == offset);
+        vector = get_field_from_reg_u32(*entry,
+                                        INT_REMAP_ENTRY_VECTOR_MASK,
+                                        INT_REMAP_ENTRY_VECTOR_SHIFT);
+        delivery_mode = get_field_from_reg_u32(*entry,
+                                               INT_REMAP_ENTRY_INTTYPE_MASK,
+                                               INT_REMAP_ENTRY_INTTYPE_SHIFT);
     }
-    entry = (u32*)get_intremap_entry(iommu->seg, req_id, offset);
     update_intremap_entry(entry, vector, delivery_mode, dest_mode, dest);
 
     spin_unlock_irqrestore(lock, flags);
@@ -141,6 +174,10 @@ static void update_intremap_entry_from_i
         amd_iommu_flush_intremap(iommu, req_id);
         spin_unlock_irqrestore(&iommu->lock, flags);
     }
+
+    set_rte_index(rte, offset);
+
+    return 0;
 }
 
 int __init amd_iommu_setup_ioapic_remapping(void)
@@ -153,7 +190,7 @@ int __init amd_iommu_setup_ioapic_remapp
     u16 seg, bdf, req_id;
     struct amd_iommu *iommu;
     spinlock_t *lock;
-    int offset;
+    unsigned int offset;
 
     /* Read ioapic entries and update interrupt remapping table accordingly */
     for ( apic = 0; apic < nr_ioapics; apic++ )
@@ -184,19 +221,23 @@ int __init amd_iommu_setup_ioapic_remapp
             dest = rte.dest.logical.logical_dest;
 
             spin_lock_irqsave(lock, flags);
-            offset = get_intremap_offset(vector, delivery_mode);
-            entry = (u32*)get_intremap_entry(iommu->seg, req_id, offset);
+            offset = alloc_intremap_entry(seg, req_id);
+            BUG_ON(offset >= INTREMAP_ENTRIES);
+            entry = get_intremap_entry(iommu->seg, req_id, offset);
             update_intremap_entry(entry, vector,
                                   delivery_mode, dest_mode, dest);
             spin_unlock_irqrestore(lock, flags);
 
+            set_rte_index(&rte, offset);
+            ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx[pin] = offset;
+            __ioapic_write_entry(apic, pin, 1, rte);
+
             if ( iommu->enabled )
             {
                 spin_lock_irqsave(&iommu->lock, flags);
                 amd_iommu_flush_intremap(iommu, req_id);
                 spin_unlock_irqrestore(&iommu->lock, flags);
             }
-            set_bit(pin, ioapic_sbdf[IO_APIC_ID(apic)].pin_setup);
         }
     }
     return 0;
@@ -209,7 +250,7 @@ void amd_iommu_ioapic_update_ire(
     struct IO_APIC_route_entry new_rte = { 0 };
     unsigned int rte_lo = (reg & 1) ? reg - 1 : reg;
     unsigned int pin = (reg - 0x10) / 2;
-    int saved_mask, seg, bdf;
+    int saved_mask, seg, bdf, rc;
     struct amd_iommu *iommu;
 
     if ( !iommu_intremap )
@@ -247,7 +288,7 @@ void amd_iommu_ioapic_update_ire(
     }
 
     if ( new_rte.mask &&
-         !test_bit(pin, ioapic_sbdf[IO_APIC_ID(apic)].pin_setup) )
+         ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx[pin] >= INTREMAP_ENTRIES )
     {
         ASSERT(saved_mask);
         __io_apic_write(apic, reg, value);
@@ -262,14 +303,19 @@ void amd_iommu_ioapic_update_ire(
     }
 
     /* Update interrupt remapping entry */
-    update_intremap_entry_from_ioapic(
-        bdf, iommu, &new_rte,
-        test_and_set_bit(pin,
-                         ioapic_sbdf[IO_APIC_ID(apic)].pin_setup) ? &old_rte
-                                                                  : NULL);
+    rc = update_intremap_entry_from_ioapic(
+             bdf, iommu, &new_rte, reg == rte_lo,
+             &ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx[pin]);
 
-    /* Forward write access to IO-APIC RTE */
-    __io_apic_write(apic, reg, value);
+    __io_apic_write(apic, reg, ((u32 *)&new_rte)[reg != rte_lo]);
+
+    if ( rc )
+    {
+        /* Keep the entry masked. */
+        printk(XENLOG_ERR "Remapping IO-APIC %#x pin %u failed (%d)\n",
+               IO_APIC_ID(apic), pin, rc);
+        return;
+    }
 
     /* For lower bits access, return directly to avoid double writes */
     if ( reg == rte_lo )
@@ -283,16 +329,41 @@ void amd_iommu_ioapic_update_ire(
     }
 }
 
-static void update_intremap_entry_from_msi_msg(
+unsigned int amd_iommu_read_ioapic_from_ire(
+    unsigned int apic, unsigned int reg)
+{
+    unsigned int val = __io_apic_read(apic, reg);
+
+    if ( !(reg & 1) )
+    {
+        unsigned int offset = val & (INTREMAP_ENTRIES - 1);
+        u16 bdf = ioapic_sbdf[IO_APIC_ID(apic)].bdf;
+        u16 seg = ioapic_sbdf[IO_APIC_ID(apic)].seg;
+        u16 req_id = get_intremap_requestor_id(seg, bdf);
+        const u32 *entry = get_intremap_entry(seg, req_id, offset);
+
+        val &= ~(INTREMAP_ENTRIES - 1);
+        val |= get_field_from_reg_u32(*entry,
+                                      INT_REMAP_ENTRY_INTTYPE_MASK,
+                                      INT_REMAP_ENTRY_INTTYPE_SHIFT) << 8;
+        val |= get_field_from_reg_u32(*entry,
+                                      INT_REMAP_ENTRY_VECTOR_MASK,
+                                      INT_REMAP_ENTRY_VECTOR_SHIFT);
+    }
+
+    return val;
+}
+
+static int update_intremap_entry_from_msi_msg(
     struct amd_iommu *iommu, u16 bdf,
-    int *remap_index, const struct msi_msg *msg)
+    int *remap_index, const struct msi_msg *msg, u32 *data)
 {
     unsigned long flags;
     u32* entry;
     u16 req_id, alias_id;
     u8 delivery_mode, dest, vector, dest_mode;
     spinlock_t *lock;
-    int offset;
+    unsigned int offset;
 
     req_id = get_dma_requestor_id(iommu->seg, bdf);
     alias_id = get_intremap_requestor_id(iommu->seg, bdf);
@@ -303,15 +374,6 @@ static void update_intremap_entry_from_m
         spin_lock_irqsave(lock, flags);
         free_intremap_entry(iommu->seg, req_id, *remap_index);
         spin_unlock_irqrestore(lock, flags);
-
-        if ( ( req_id != alias_id ) &&
-             get_ivrs_mappings(iommu->seg)[alias_id].intremap_table != NULL )
-        {
-            lock = get_intremap_lock(iommu->seg, alias_id);
-            spin_lock_irqsave(lock, flags);
-            free_intremap_entry(iommu->seg, alias_id, *remap_index);
-            spin_unlock_irqrestore(lock, flags);
-        }
         goto done;
     }
 
@@ -322,16 +384,24 @@ static void update_intremap_entry_from_m
     delivery_mode = (msg->data >> MSI_DATA_DELIVERY_MODE_SHIFT) & 0x1;
     vector = (msg->data >> MSI_DATA_VECTOR_SHIFT) & MSI_DATA_VECTOR_MASK;
     dest = (msg->address_lo >> MSI_ADDR_DEST_ID_SHIFT) & 0xff;
-    offset = get_intremap_offset(vector, delivery_mode);
-    if ( *remap_index < 0)
+    offset = *remap_index;
+    if ( offset >= INTREMAP_ENTRIES )
+    {
+        offset = alloc_intremap_entry(iommu->seg, bdf);
+        if ( offset >= INTREMAP_ENTRIES )
+        {
+            spin_unlock_irqrestore(lock, flags);
+            return -ENOSPC;
+        }
         *remap_index = offset;
-    else
-        BUG_ON(*remap_index != offset);
+    }
 
-    entry = (u32*)get_intremap_entry(iommu->seg, req_id, offset);
+    entry = get_intremap_entry(iommu->seg, req_id, offset);
     update_intremap_entry(entry, vector, delivery_mode, dest_mode, dest);
     spin_unlock_irqrestore(lock, flags);
 
+    *data = (msg->data & ~(INTREMAP_ENTRIES - 1)) | offset;
+
     /*
      * In some special cases, a pci-e device(e.g SATA controller in IDE mode)
      * will use alias id to index interrupt remapping table.
@@ -343,10 +413,8 @@ static void update_intremap_entry_from_m
     if ( ( req_id != alias_id ) &&
          get_ivrs_mappings(iommu->seg)[alias_id].intremap_table != NULL )
     {
-        spin_lock_irqsave(lock, flags);
-        entry = (u32*)get_intremap_entry(iommu->seg, alias_id, offset);
-        update_intremap_entry(entry, vector, delivery_mode, dest_mode, dest);
-        spin_unlock_irqrestore(lock, flags);
+        BUG_ON(get_ivrs_mappings(iommu->seg)[req_id].intremap_table !=
+               get_ivrs_mappings(iommu->seg)[alias_id].intremap_table);
     }
 
 done:
@@ -358,19 +426,22 @@ done:
             amd_iommu_flush_intremap(iommu, alias_id);
         spin_unlock_irqrestore(&iommu->lock, flags);
     }
+
+    return 0;
 }
 
 static struct amd_iommu *_find_iommu_for_device(int seg, int bdf)
 {
-    struct amd_iommu *iommu = find_iommu_for_device(seg, bdf);
-
-    if ( iommu )
-        return iommu;
+    struct amd_iommu *iommu;
 
     list_for_each_entry ( iommu, &amd_iommu_head, list )
         if ( iommu->seg == seg && iommu->bdf == bdf )
             return NULL;
 
+    iommu = find_iommu_for_device(seg, bdf);
+    if ( iommu )
+        return iommu;
+
     AMD_IOMMU_DEBUG("No IOMMU for MSI dev = %04x:%02x:%02x.%u\n",
                     seg, PCI_BUS(bdf), PCI_SLOT(bdf), PCI_FUNC(bdf));
     return ERR_PTR(-EINVAL);
@@ -380,8 +451,9 @@ int amd_iommu_msi_msg_update_ire(
     struct msi_desc *msi_desc, struct msi_msg *msg)
 {
     struct pci_dev *pdev = msi_desc->dev;
-    int bdf, seg;
+    int bdf, seg, rc;
     struct amd_iommu *iommu;
+    u32 data;
 
     bdf = pdev ? PCI_BDF2(pdev->bus, pdev->devfn) : hpet_sbdf.bdf;
     seg = pdev ? pdev->seg : hpet_sbdf.seg;
@@ -390,11 +462,12 @@ int amd_iommu_msi_msg_update_ire(
     if ( IS_ERR_OR_NULL(iommu) )
         return PTR_ERR(iommu);
 
-    if ( msi_desc->remap_index >= 0 )
+    if ( msi_desc->remap_index >= 0 && !msg )
     {
         do {
             update_intremap_entry_from_msi_msg(iommu, bdf,
-                                               &msi_desc->remap_index, NULL);
+                                               &msi_desc->remap_index,
+                                               NULL, NULL);
             if ( !pdev || !pdev->phantom_stride )
                 break;
             bdf += pdev->phantom_stride;
@@ -409,19 +482,39 @@ int amd_iommu_msi_msg_update_ire(
         return 0;
 
     do {
-        update_intremap_entry_from_msi_msg(iommu, bdf, &msi_desc->remap_index,
-                                           msg);
-        if ( !pdev || !pdev->phantom_stride )
+        rc = update_intremap_entry_from_msi_msg(iommu, bdf,
+                                                &msi_desc->remap_index,
+                                                msg, &data);
+        if ( rc || !pdev || !pdev->phantom_stride )
             break;
         bdf += pdev->phantom_stride;
     } while ( PCI_SLOT(bdf) == PCI_SLOT(pdev->devfn) );
 
-    return 0;
+    msg->data = data;
+    return rc;
 }
 
 void amd_iommu_read_msi_from_ire(
     struct msi_desc *msi_desc, struct msi_msg *msg)
 {
+    unsigned int offset = msg->data & (INTREMAP_ENTRIES - 1);
+    const struct pci_dev *pdev = msi_desc->dev;
+    u16 bdf = pdev ? PCI_BDF2(pdev->bus, pdev->devfn) : hpet_sbdf.bdf;
+    u16 seg = pdev ? pdev->seg : hpet_sbdf.seg;
+    const u32 *entry;
+
+    if ( IS_ERR_OR_NULL(_find_iommu_for_device(seg, bdf)) )
+        return;
+
+    entry = get_intremap_entry(seg, get_dma_requestor_id(seg, bdf), offset);
+
+    msg->data &= ~(INTREMAP_ENTRIES - 1);
+    msg->data |= get_field_from_reg_u32(*entry,
+                                        INT_REMAP_ENTRY_INTTYPE_MASK,
+                                        INT_REMAP_ENTRY_INTTYPE_SHIFT) << 8;
+    msg->data |= get_field_from_reg_u32(*entry,
+                                        INT_REMAP_ENTRY_VECTOR_MASK,
+                                        INT_REMAP_ENTRY_VECTOR_SHIFT);
 }
 
 int __init amd_iommu_free_intremap_table(
@@ -438,12 +531,14 @@ int __init amd_iommu_free_intremap_table
     return 0;
 }
 
-void* __init amd_iommu_alloc_intremap_table(void)
+void* __init amd_iommu_alloc_intremap_table(unsigned long **inuse_map)
 {
     void *tb;
     tb = __alloc_amd_iommu_tables(INTREMAP_TABLE_ORDER);
     BUG_ON(tb == NULL);
     memset(tb, 0, PAGE_SIZE * (1UL << INTREMAP_TABLE_ORDER));
+    *inuse_map = xzalloc_array(unsigned long, BITS_TO_LONGS(INTREMAP_ENTRIES));
+    BUG_ON(*inuse_map == NULL);
     return tb;
 }
 
--- a/xen/drivers/passthrough/amd/pci_amd_iommu.c
+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c
@@ -622,7 +622,7 @@ const struct iommu_ops amd_iommu_ops = {
     .get_device_group_id = amd_iommu_group_id,
     .update_ire_from_apic = amd_iommu_ioapic_update_ire,
     .update_ire_from_msi = amd_iommu_msi_msg_update_ire,
-    .read_apic_from_ire = __io_apic_read,
+    .read_apic_from_ire = amd_iommu_read_ioapic_from_ire,
     .read_msi_from_ire = amd_iommu_read_msi_from_ire,
     .setup_hpet_msi = amd_setup_hpet_msi,
     .suspend = amd_iommu_suspend,
--- a/xen/include/asm-x86/amd-iommu.h
+++ b/xen/include/asm-x86/amd-iommu.h
@@ -119,6 +119,7 @@ struct ivrs_mappings {
 
     /* per device interrupt remapping table */
     void *intremap_table;
+    unsigned long *intremap_inuse;
     spinlock_t intremap_lock;
 
     /* ivhd device data settings */
--- a/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h
+++ b/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h
@@ -458,10 +458,6 @@
 #define MAX_AMD_IOMMUS                  32
 
 /* interrupt remapping table */
-#define INT_REMAP_INDEX_DM_MASK         0x1C00
-#define INT_REMAP_INDEX_DM_SHIFT        10
-#define INT_REMAP_INDEX_VECTOR_MASK     0x3FC
-#define INT_REMAP_INDEX_VECTOR_SHIFT    2
 #define INT_REMAP_ENTRY_REMAPEN_MASK    0x00000001
 #define INT_REMAP_ENTRY_REMAPEN_SHIFT   0
 #define INT_REMAP_ENTRY_SUPIOPF_MASK    0x00000002
--- a/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
+++ b/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
@@ -89,10 +89,12 @@ struct amd_iommu *find_iommu_for_device(
 
 /* interrupt remapping */
 int amd_iommu_setup_ioapic_remapping(void);
-void *amd_iommu_alloc_intremap_table(void);
+void *amd_iommu_alloc_intremap_table(unsigned long **);
 int amd_iommu_free_intremap_table(u16 seg, struct ivrs_mappings *);
 void amd_iommu_ioapic_update_ire(
     unsigned int apic, unsigned int reg, unsigned int value);
+unsigned int amd_iommu_read_ioapic_from_ire(
+    unsigned int apic, unsigned int reg);
 int amd_iommu_msi_msg_update_ire(
     struct msi_desc *msi_desc, struct msi_msg *msg);
 void amd_iommu_read_msi_from_ire(
@@ -101,15 +103,17 @@ int amd_setup_hpet_msi(struct msi_desc *
 
 extern struct ioapic_sbdf {
     u16 bdf, seg;
-    unsigned long *pin_setup;
+    u16 *pin_2_idx;
 } ioapic_sbdf[MAX_IO_APICS];
-extern void *shared_intremap_table;
 
 extern struct hpet_sbdf {
     u16 bdf, seg, id;
     struct amd_iommu *iommu;
 } hpet_sbdf;
 
+extern void *shared_intremap_table;
+extern unsigned long *shared_intremap_inuse;
+
 /* power management support */
 void amd_iommu_resume(void);
 void amd_iommu_suspend(void);

[-- Attachment #3: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/6] AMD IOMMU: allocate IRTE entries instead of using a static mapping
  2013-04-24 13:34       ` Jan Beulich
@ 2013-04-24 21:52         ` suravee suthikulpanit
  2013-04-26 10:39           ` Jan Beulich
  0 siblings, 1 reply; 20+ messages in thread
From: suravee suthikulpanit @ 2013-04-24 21:52 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Konrad Rzeszutek Wilk, Jacob Shin, xiantao.zhang, xen-devel

[-- Attachment #1: Type: text/plain, Size: 1341 bytes --]

On 04/24/2013 08:34 AM, Jan Beulich wrote:
>>>> On 23.04.13 at 17:06, Suravee Suthikulanit <suravee.suthikulpanit@amd.com> wrote:
>> On 4/23/2013 8:21 AM, Suravee Suthikulanit wrote:
>>> I am now reproducing the issue with the USB devices not working with
>>> this patch again.  I'll continue to investigate more.
>>>
>>> Suravee
>> Ok, I have more update on the issue.  Bellow, I include the output from
>> "xl debug-key i".  It is showing several IRQ having the same vector "b0".
>> This is not the case when booting with the xen w/o the patch.
> Right, and I spotted a bug in the respective code, but I can't
> readily connect that bug to the behavior you observed (i.e. I
> can't explain why the bad vector would be the same all the time).
>
> Nevertheless, attached a fixed version of the first patch of
> the most recent series - let's see how much of a difference this
> makes.
>
> Jan
>
Jan,

I have finally root cause the issue.  Here are the two patches that should help fixing the issue.
The first patch fixes the issue with the mouse/keyboard not working while the second patch
add a debug-key to help dumping the IRTE which I used to debug the issue.

I have also include the xl dmesg from both "before" patching and "after" applying patch 1, 2 and the fixes.
These also include the output from debug keys "M,z,i,j"



Suravee


[-- Attachment #2: after.txt --]
[-- Type: text/plain, Size: 29503 bytes --]

 __  __            _  _    _____                    _        _     _      
 \ \/ /___ _ __   | || |  |___ /    _   _ _ __  ___| |_ __ _| |__ | | ___ 
  \  // _ \ '_ \  | || |_   |_ \ __| | | | '_ \/ __| __/ _` | '_ \| |/ _ \
  /  \  __/ | | | |__   _| ___) |__| |_| | | | \__ \ || (_| | |_) | |  __/
 /_/\_\___|_| |_|    |_|(_)____/    \__,_|_| |_|___/\__\__,_|_.__/|_|\___|
                                                                          
(XEN) Xen version 4.3-unstable (root@) (gcc (Ubuntu/Linaro 4.7.2-2ubuntu1) 4.7.2) debug=y Wed Apr 24 14:52:17 CDT 2013
(XEN) Latest ChangeSet: unavailable
(XEN) Bootloader: GRUB 2.00-7ubuntu11
(XEN) Command line: placeholder dom0_max_vcpus=2 dom0_vcpus_pin dom0_mem=4G,max:4G iommu=debug apic_verbosity=debug
(XEN) Video information:
(XEN)  VGA is text mode 80x25, font 8x16
(XEN)  VBE/DDC methods: V2; EDID transfer time: 1 seconds
(XEN) Disc information:
(XEN)  Found 1 MBR signatures
(XEN)  Found 1 EDD information structures
(XEN) Xen-e820 RAM map:
(XEN)  0000000000000000 - 000000000009fc00 (usable)
(XEN)  000000000009fc00 - 00000000000a0000 (reserved)
(XEN)  00000000000e0000 - 0000000000100000 (reserved)
(XEN)  0000000000100000 - 00000000cea84000 (usable)
(XEN)  00000000cea84000 - 00000000ced5e000 (reserved)
(XEN)  00000000ced5e000 - 00000000cf0ae000 (ACPI NVS)
(XEN)  00000000cf0ae000 - 00000000cf5e4000 (reserved)
(XEN)  00000000cf5e4000 - 00000000cf5e5000 (usable)
(XEN)  00000000cf5e5000 - 00000000cf7eb000 (ACPI NVS)
(XEN)  00000000cf7eb000 - 00000000cfaa3000 (usable)
(XEN)  00000000cfaa3000 - 00000000cfdf6000 (reserved)
(XEN)  00000000cfdf6000 - 00000000cfe00000 (usable)
(XEN)  00000000fec00000 - 00000000fec01000 (reserved)
(XEN)  00000000fec10000 - 00000000fec11000 (reserved)
(XEN)  00000000fed00000 - 00000000fed01000 (reserved)
(XEN)  00000000fed40000 - 00000000fed45000 (reserved)
(XEN)  00000000fed80000 - 00000000fed90000 (reserved)
(XEN)  00000000ff000000 - 0000000100000000 (reserved)
(XEN)  0000000100001000 - 000000042f000000 (usable)
(XEN) ACPI: RSDP 000F0490, 0024 (r2 ALASKA)
(XEN) ACPI: XSDT CF09B078, 006C (r1 ALASKA    A M I  1072009 AMI     10013)
(XEN) ACPI: FACP CF0A2A10, 010C (r5 ALASKA    A M I  1072009 AMI     10013)
(XEN) ACPI Warning (tbfadt-0464): Optional field "Pm2ControlBlock" has zero address or length: 0000000000000000/1 [20070126]
(XEN) ACPI: DSDT CF09B178, 7897 (r2 ALASKA    A M I        0 INTL 20051117)
(XEN) ACPI: FACS CF0AC080, 0040
(XEN) ACPI: APIC CF0A2B20, 0072 (r3 ALASKA    A M I  1072009 AMI     10013)
(XEN) ACPI: FPDT CF0A2B98, 0044 (r1 ALASKA    A M I  1072009 AMI     10013)
(XEN) ACPI: MCFG CF0A2BE0, 003C (r1 ALASKA    A M I  1072009 MSFT    10013)
(XEN) ACPI: HPET CF0A2C20, 0038 (r1 ALASKA    A M I  1072009 AMI         5)
(XEN) ACPI: SSDT CF0A2C58, 0D40 (r1 AMD    POWERNOW        1 AMD         1)
(XEN) ACPI: SSDT CF0A3998, 04B7 (r2    AMD     ALIB        1 MSFT  4000000)
(XEN) ACPI: IVRS CF0A3E50, 0070 (r2  AMD   AMDIOMMU        1 AMD         0)
(XEN) ACPI: CRAT CF0A3EC0, 0398 (r1 AMD    AGESA           1 AMD         1)
(XEN) System RAM: 16348MB (16741268kB)
(XEN) No NUMA configuration found
(XEN) Faking a node at 0000000000000000-000000042f000000
(XEN) Domain heap initialised
(XEN) found SMP MP-table at 000fd7e0
(XEN) DMI 2.7 present.
(XEN) APIC boot state is 'xapic'
(XEN) Using APIC driver default
(XEN) ACPI: PM-Timer IO Port: 0x808
(XEN) ACPI: v5 SLEEP INFO: control[0:0], status[0:0]
(XEN) ACPI: SLEEP INFO: pm1x_cnt[804,0], pm1x_evt[800,0]
(XEN) ACPI: 32/64X FACS address mismatch in FADT - cf0ac080/0000000000000000, using 32
(XEN) ACPI:             wakeup_vec[cf0ac08c], vec_size[20]
(XEN) ACPI: Local APIC address 0xfee00000
(XEN) ACPI: LAPIC (acpi_id[0x01] lapic_id[0x10] enabled)
(XEN) Processor #16 5:0 APIC version 16
(XEN) ACPI: LAPIC (acpi_id[0x02] lapic_id[0x11] enabled)
(XEN) Processor #17 5:0 APIC version 16
(XEN) ACPI: LAPIC (acpi_id[0x03] lapic_id[0x12] enabled)
(XEN) Processor #18 5:0 APIC version 16
(XEN) ACPI: LAPIC (acpi_id[0x04] lapic_id[0x13] enabled)
(XEN) Processor #19 5:0 APIC version 16
(XEN) ACPI: LAPIC_NMI (acpi_id[0xff] high edge lint[0x1])
(XEN) ACPI: IOAPIC (id[0x05] address[0xfec00000] gsi_base[0])
(XEN) IOAPIC[0]: apic_id 5, version 33, address 0xfec00000, GSI 0-23
(XEN) ACPI: INT_SRC_OVR (bus 0 bus_irq 0 global_irq 2 dfl dfl)
(XEN) ACPI: INT_SRC_OVR (bus 0 bus_irq 9 global_irq 9 low level)
(XEN) ACPI: IRQ0 used by override.
(XEN) ACPI: IRQ2 used by override.
(XEN) ACPI: IRQ9 used by override.
(XEN) Enabling APIC mode:  Flat.  Using 1 I/O APICs
(XEN) ACPI: HPET id: 0x10228210 base: 0xfed00000
(XEN) ERST table was not found
(XEN) Using ACPI (MADT) for SMP configuration information
(XEN) SMP: Allowing 4 CPUs (0 hotplug CPUs)
(XEN) mapped APIC to ffff82c3ffdfb000 (fee00000)
(XEN) mapped IOAPIC to ffff82c3ffdfa000 (fec00000)
(XEN) IRQ limits: 24 GSI, 760 MSI/MSI-X
(XEN) Using scheduler: SMP Credit Scheduler (credit)
(XEN) Detected 3293.945 MHz processor.
(XEN) Initing memory sharing.
(XEN) xstate_init: using cntxt_size: 0x3c0 and states: 0x4000000000000007
(XEN) AMD Fam15h machine check reporting enabled
(XEN) PCI: MCFG configuration 0: base e0000000 segment 0000 buses 00 - ff
(XEN) PCI: Not using MCFG for segment 0000 bus 00-ff
(XEN) AMD-Vi: Found MSI capability block at 0x54
(XEN) AMD-Vi: ACPI Table:
(XEN) AMD-Vi:  Signature IVRS
(XEN) AMD-Vi:  Length 0x70
(XEN) AMD-Vi:  Revision 0x2
(XEN) AMD-Vi:  CheckSum 0xb2
(XEN) AMD-Vi:  OEM_Id AMD  
(XEN) AMD-Vi:  OEM_Table_Id AMDIOMMU
(XEN) AMD-Vi:  OEM_Revision 0x1
(XEN) AMD-Vi:  Creator_Id AMD 
(XEN) AMD-Vi:  Creator_Revision 0
(XEN) AMD-Vi: IVRS Block: type 0x10 flags 0xfe len 0x40 id 0x2
(XEN) AMD-Vi: IVHD Device Entry: type 0x3 id 0x8 flags 0
(XEN) AMD-Vi:  Dev_Id Range: 0x8 -> 0xfffe
(XEN) AMD-Vi: IVHD Device Entry: type 0x43 id 0x200 flags 0
(XEN) AMD-Vi:  Dev_Id Range: 0x200 -> 0x2ff alias 0xa4
(XEN) AMD-Vi: IVHD Device Entry: type 0 id 0 flags 0
(XEN) AMD-Vi: IVHD Device Entry: type 0x48 id 0 flags 0
(XEN) AMD-Vi: IVHD Special: 0000:00:14.0 variety 0x2 handle 0
(XEN) AMD-Vi: IVHD Device Entry: type 0x48 id 0 flags 0xd7
(XEN) AMD-Vi: IVHD Special: 0000:00:14.0 variety 0x1 handle 0x5
(XEN) AMD-Vi: IOMMU Extended Features:
(XEN)  - Prefetch Pages Command
(XEN)  - Peripheral Page Service Request
(XEN)  - Guest Translation
(XEN)  - Invalidate All Command
(XEN) AMD-Vi: PPR Log Enabled.
(XEN) AMD-Vi: Guest Translation Enabled.
(XEN) AMD-Vi: IOMMU 0 Enabled.
(XEN) I/O virtualisation enabled
(XEN)  - Dom0 mode: Relaxed
(XEN) Interrupt remapping enabled
(XEN) Getting VERSION: 80050010
(XEN) Getting VERSION: 80050010
(XEN) Getting ID: 10000000
(XEN) Getting LVT0: 700
(XEN) Getting LVT1: 400
(XEN) enabled ExtINT on CPU#0
(XEN) ENABLING IO-APIC IRQs
(XEN)  -> Using new ACK method
(XEN) init IO_APIC IRQs
(XEN)  IO-APIC (apicid-pin) 5-0, 5-16, 5-17, 5-18, 5-19, 5-20, 5-21, 5-22, 5-23 not connected.
(XEN) ..TIMER: vector=0xF0 apic1=0 pin1=2 apic2=-1 pin2=-1
(XEN) number of MP IRQ sources: 15.
(XEN) number of IO-APIC #5 registers: 24.
(XEN) testing the IO APIC.......................
(XEN) IO APIC #5......
(XEN) .... register #00: 05000000
(XEN) .......    : physical APIC id: 05
(XEN) .......    : Delivery Type: 0
(XEN) .......    : LTS          : 0
(XEN) .... register #01: 00178021
(XEN) .......     : max redirection entries: 0017
(XEN) .......     : PRQ implemented: 1
(XEN) .......     : IO APIC version: 0021
(XEN) .... register #02: 05000000
(XEN) .......     : arbitration: 05
(XEN) .... register #03: 05018021
(XEN) .......     : Boot DT    : 1
(XEN) .... IRQ redirection table:
(XEN)  NR Log Phy Mask Trig IRR Pol Stat Dest Deli Vect:   
(XEN)  00 000 00  1    0    0   0   0    0    0    00
(XEN)  01 001 01  0    0    0   0   0    1    1    30
(XEN)  02 001 01  0    0    0   0   0    1    1    F0
(XEN)  03 001 01  0    0    0   0   0    1    1    38
(XEN)  04 001 01  0    0    0   0   0    1    1    40
(XEN)  05 001 01  0    0    0   0   0    1    1    48
(XEN)  06 001 01  0    0    0   0   0    1    1    50
(XEN)  07 001 01  0    0    0   0   0    1    1    58
(XEN)  08 001 01  0    0    0   0   0    1    1    60
(XEN)  09 001 01  1    1    0   1   0    1    1    68
(XEN)  0a 001 01  0    0    0   0   0    1    1    70
(XEN)  0b 001 01  0    0    0   0   0    1    1    78
(XEN)  0c 001 01  0    0    0   0   0    1    1    88
(XEN)  0d 001 01  0    0    0   0   0    1    1    90
(XEN)  0e 001 01  0    0    0   0   0    1    1    98
(XEN)  0f 001 01  0    0    0   0   0    1    1    A0
(XEN)  10 000 00  1    0    0   0   0    0    0    00
(XEN)  11 000 00  1    0    0   0   0    0    0    00
(XEN)  12 000 00  1    0    0   0   0    0    0    00
(XEN)  13 000 00  1    0    0   0   0    0    0    00
(XEN)  14 000 00  1    0    0   0   0    0    0    00
(XEN)  15 000 00  1    0    0   0   0    0    0    00
(XEN)  16 000 00  1    0    0   0   0    0    0    00
(XEN)  17 000 00  1    0    0   0   0    0    0    00
(XEN) Using vector-based indexing
(XEN) IRQ to pin mappings:
(XEN) IRQ240 -> 0:2
(XEN) IRQ48 -> 0:1
(XEN) IRQ56 -> 0:3
(XEN) IRQ64 -> 0:4
(XEN) IRQ72 -> 0:5
(XEN) IRQ80 -> 0:6
(XEN) IRQ88 -> 0:7
(XEN) IRQ96 -> 0:8
(XEN) IRQ104 -> 0:9
(XEN) IRQ112 -> 0:10
(XEN) IRQ120 -> 0:11
(XEN) IRQ136 -> 0:12
(XEN) IRQ144 -> 0:13
(XEN) IRQ152 -> 0:14
(XEN) IRQ160 -> 0:15
(XEN) .................................... done.
(XEN) Using local APIC timer interrupts.
(XEN) calibrating APIC timer ...
(XEN) ..... CPU clock speed is 3293.9124 MHz.
(XEN) ..... host bus clock speed is 99.8154 MHz.
(XEN) ..... bus_scale = 0x6637
(XEN) Using standard rendezvous
(XEN) Platform timer is 14.318MHz HPET
(XEN) Allocated console ring of 32 KiB.
(XEN) HVM: ASIDs enabled.
(XEN) SVM: Supported advanced features:
(XEN)  - Nested Page Tables (NPT)
(XEN)  - Last Branch Record (LBR) Virtualisation
(XEN)  - Next-RIP Saved on #VMEXIT
(XEN)  - VMCB Clean Bits
(XEN)  - DecodeAssists
(XEN)  - Pause-Intercept Filter
(XEN)  - TSC Rate MSR
(XEN) HVM: SVM enabled
(XEN) HVM: Hardware Assisted Paging (HAP) detected
(XEN) HVM: HAP page sizes: 4kB, 2MB, 1GB
(XEN) masked ExtINT on CPU#1
(XEN) microcode: CPU1 collect_cpu_info: patch_id=0x600100e
(XEN) masked ExtINT on CPU#2
(XEN) microcode: CPU2 collect_cpu_info: patch_id=0x600100e
(XEN) masked ExtINT on CPU#3
(XEN) microcode: CPU3 collect_cpu_info: patch_id=0x600100e
(XEN) Brought up 4 CPUs
(XEN) ACPI sleep modes: S3
(XEN) MCA: Use hw thresholding to adjust polling frequency
(XEN) mcheck_poll: Machine check polling timer started.
(XEN) *** LOADING DOMAIN 0 ***
(XEN) elf_parse_binary: phdr: paddr=0x1000000 memsz=0xae7000
(XEN) elf_parse_binary: phdr: paddr=0x1c00000 memsz=0xdd0e8
(XEN) elf_parse_binary: phdr: paddr=0x1cde000 memsz=0x14680
(XEN) elf_parse_binary: phdr: paddr=0x1cf3000 memsz=0x65c000
(XEN) elf_parse_binary: memory: 0x1000000 -> 0x234f000
(XEN) elf_xen_parse_note: GUEST_OS = "linux"
(XEN) elf_xen_parse_note: GUEST_VERSION = "2.6"
(XEN) elf_xen_parse_note: XEN_VERSION = "xen-3.0"
(XEN) elf_xen_parse_note: VIRT_BASE = 0xffffffff80000000
(XEN) elf_xen_parse_note: ENTRY = 0xffffffff81cf3210
(XEN) elf_xen_parse_note: HYPERCALL_PAGE = 0xffffffff81001000
(XEN) elf_xen_parse_note: FEATURES = "!writable_page_tables|pae_pgdir_above_4gb"
(XEN) elf_xen_parse_note: PAE_MODE = "yes"
(XEN) elf_xen_parse_note: LOADER = "generic"
(XEN) elf_xen_parse_note: unknown xen elf note (0xd)
(XEN) elf_xen_parse_note: SUSPEND_CANCEL = 0x1
(XEN) elf_xen_parse_note: HV_START_LOW = 0xffff800000000000
(XEN) elf_xen_parse_note: PADDR_OFFSET = 0x0
(XEN) elf_xen_addr_calc_check: addresses:
(XEN)     virt_base        = 0xffffffff80000000
(XEN)     elf_paddr_offset = 0x0
(XEN)     virt_offset      = 0xffffffff80000000
(XEN)     virt_kstart      = 0xffffffff81000000
(XEN)     virt_kend        = 0xffffffff8234f000
(XEN)     virt_entry       = 0xffffffff81cf3210
(XEN)     p2m_base         = 0xffffffffffffffff
(XEN)  Xen  kernel: 64-bit, lsb, compat32
(XEN)  Dom0 kernel: 64-bit, PAE, lsb, paddr 0x1000000 -> 0x234f000
(XEN) PHYSICAL MEMORY ARRANGEMENT:
(XEN)  Dom0 alloc.:   0000000238000000->000000023c000000 (1022084 pages to be allocated)
(XEN)  Init. ramdisk: 000000042c884000->000000042efffc00
(XEN) VIRTUAL MEMORY ARRANGEMENT:
(XEN)  Loaded kernel: ffffffff81000000->ffffffff8234f000
(XEN)  Init. ramdisk: ffffffff8234f000->ffffffff84acac00
(XEN)  Phys-Mach map: ffffffff84acb000->ffffffff852cb000
(XEN)  Start info:    ffffffff852cb000->ffffffff852cb4b4
(XEN)  Page tables:   ffffffff852cc000->ffffffff852f9000
(XEN)  Boot stack:    ffffffff852f9000->ffffffff852fa000
(XEN)  TOTAL:         ffffffff80000000->ffffffff85400000
(XEN)  ENTRY ADDRESS: ffffffff81cf3210
(XEN) Dom0 has maximum 2 VCPUs
(XEN) elf_load_binary: phdr 0 at 0xffffffff81000000 -> 0xffffffff81ae7000
(XEN) elf_load_binary: phdr 1 at 0xffffffff81c00000 -> 0xffffffff81cdd0e8
(XEN) elf_load_binary: phdr 2 at 0xffffffff81cde000 -> 0xffffffff81cf2680
(XEN) elf_load_binary: phdr 3 at 0xffffffff81cf3000 -> 0xffffffff81dcd000
(XEN) AMD-Vi: No iommu for device 0000:00:00.0
(XEN) setup 0000:00:00.0 for d0 failed (-19)
(XEN) AMD-Vi: No iommu for device 0000:00:00.2
(XEN) setup 0000:00:00.2 for d0 failed (-19)
(XEN) AMD-Vi: Setup I/O page table: device id = 0x10, root table = 0x23e818000, domain = 0, paging mode = 3
(XEN) AMD-Vi: Setup I/O page table: device id = 0x80, root table = 0x23e818000, domain = 0, paging mode = 3
(XEN) AMD-Vi: Setup I/O page table: device id = 0x81, root table = 0x23e818000, domain = 0, paging mode = 3
(XEN) AMD-Vi: Setup I/O page table: device id = 0x88, root table = 0x23e818000, domain = 0, paging mode = 3
(XEN) AMD-Vi: Setup I/O page table: device id = 0x90, root table = 0x23e818000, domain = 0, paging mode = 3
(XEN) AMD-Vi: Setup I/O page table: device id = 0x92, root table = 0x23e818000, domain = 0, paging mode = 3
(XEN) AMD-Vi: Setup I/O page table: device id = 0x98, root table = 0x23e818000, domain = 0, paging mode = 3
(XEN) AMD-Vi: Setup I/O page table: device id = 0x9a, root table = 0x23e818000, domain = 0, paging mode = 3
(XEN) AMD-Vi: Setup I/O page table: device id = 0xa0, root table = 0x23e818000, domain = 0, paging mode = 3
(XEN) AMD-Vi: Setup I/O page table: device id = 0xa2, root table = 0x23e818000, domain = 0, paging mode = 3
(XEN) AMD-Vi: Setup I/O page table: device id = 0xa3, root table = 0x23e818000, domain = 0, paging mode = 3
(XEN) AMD-Vi: Setup I/O page table: device id = 0xa4, root table = 0x23e818000, domain = 0, paging mode = 3
(XEN) AMD-Vi: Setup I/O page table: device id = 0xa5, root table = 0x23e818000, domain = 0, paging mode = 3
(XEN) AMD-Vi: Setup I/O page table: device id = 0xa7, root table = 0x23e818000, domain = 0, paging mode = 3
(XEN) AMD-Vi: Setup I/O page table: device id = 0xa8, root table = 0x23e818000, domain = 0, paging mode = 3
(XEN) AMD-Vi: Setup I/O page table: device id = 0xa9, root table = 0x23e818000, domain = 0, paging mode = 3
(XEN) AMD-Vi: Setup I/O page table: device id = 0xc0, root table = 0x23e818000, domain = 0, paging mode = 3
(XEN) AMD-Vi: Setup I/O page table: device id = 0xc1, root table = 0x23e818000, domain = 0, paging mode = 3
(XEN) AMD-Vi: Setup I/O page table: device id = 0xc2, root table = 0x23e818000, domain = 0, paging mode = 3
(XEN) AMD-Vi: Setup I/O page table: device id = 0xc3, root table = 0x23e818000, domain = 0, paging mode = 3
(XEN) AMD-Vi: Setup I/O page table: device id = 0xc4, root table = 0x23e818000, domain = 0, paging mode = 3
(XEN) AMD-Vi: Setup I/O page table: device id = 0xc5, root table = 0x23e818000, domain = 0, paging mode = 3
(XEN) AMD-Vi: Setup I/O page table: device id = 0x100, root table = 0x23e818000, domain = 0, paging mode = 3
(XEN) AMD-Vi: Setup I/O page table: device id = 0x101, root table = 0x23e818000, domain = 0, paging mode = 3
(XEN) AMD-Vi: Setup I/O page table: device id = 0x300, root table = 0x23e818000, domain = 0, paging mode = 3
(XEN) AMD-Vi: Setup I/O page table: device id = 0x400, root table = 0x23e818000, domain = 0, paging mode = 3
(XEN) Scrubbing Free RAM: ....................................................................................................................done.
(XEN) Initial low memory virq threshold set at 0x4000 pages.
(XEN) Std. Loglevel: All
(XEN) Guest Loglevel: All
(XEN) Xen is relinquishing VGA console.
(XEN) *** Serial input -> DOM0 (type 'CTRL-a' three times to switch input to Xen)
(XEN) Freed 272kB init memory.
(XEN) IOAPIC[0]: Set PCI routing entry (5-9 -> 0x68 -> IRQ 9 Mode:1 Active:1)
(XEN) traps.c:2495:d0 Domain attempted WRMSR 00000000c0010201 from 0x0000000000000000 to 0x000000000000abcd.
(XEN) PCI add device 0000:00:00.0
(XEN) PCI add device 0000:00:00.2
(XEN) PCI add device 0000:00:02.0
(XEN) PCI add device 0000:00:10.0
(XEN) PCI add device 0000:00:10.1
(XEN) PCI add device 0000:00:11.0
(XEN) PCI add device 0000:00:12.0
(XEN) PCI add device 0000:00:12.2
(XEN) PCI add device 0000:00:13.0
(XEN) PCI add device 0000:00:13.2
(XEN) PCI add device 0000:00:14.0
(XEN) PCI add device 0000:00:14.2
(XEN) PCI add device 0000:00:14.3
(XEN) PCI add device 0000:00:14.4
(XEN) PCI add device 0000:00:14.5
(XEN) PCI add device 0000:00:14.7
(XEN) PCI add device 0000:00:15.0
(XEN) PCI add device 0000:00:15.1
(XEN) PCI add device 0000:00:18.0
(XEN) PCI add device 0000:00:18.1
(XEN) PCI add device 0000:00:18.2
(XEN) PCI add device 0000:00:18.3
(XEN) PCI add device 0000:00:18.4
(XEN) PCI add device 0000:00:18.5
(XEN) PCI add device 0000:01:00.0
(XEN) PCI add device 0000:01:00.1
(XEN) PCI add device 0000:03:00.0
(XEN) PCI add device 0000:04:00.0
(XEN) IOAPIC[0]: Set PCI routing entry (5-8 -> 0x60 -> IRQ 8 Mode:0 Active:0)
(XEN) IOAPIC[0]: Set PCI routing entry (5-5 -> 0x48 -> IRQ 5 Mode:0 Active:0)
(XEN) IOAPIC[0]: Set PCI routing entry (5-13 -> 0x90 -> IRQ 13 Mode:0 Active:0)
(XEN) IOAPIC[0]: Set PCI routing entry (5-18 -> 0xa8 -> IRQ 18 Mode:1 Active:1)
(XEN) IOAPIC[0]: Set PCI routing entry (5-16 -> 0xb0 -> IRQ 16 Mode:1 Active:1)
(XEN) IOAPIC[0]: Set PCI routing entry (5-17 -> 0xb8 -> IRQ 17 Mode:1 Active:1)
(XEN) IOAPIC[0]: Set PCI routing entry (5-19 -> 0xd8 -> IRQ 19 Mode:1 Active:1)
(XEN) MSI information:
(XEN)  MSI     24 vec=28  fixed  edge deassert phys    cpu dest=00000001 mask=0/0/?
(XEN)  MSI     25 vec=c0 lowest  edge   assert  log lowest dest=00000001 mask=0/1/?
(XEN)  MSI     26 vec=c8 lowest  edge   assert  log lowest dest=00000001 mask=0/1/?
(XEN)  MSI     27 vec=d0 lowest  edge   assert  log lowest dest=00000001 mask=0/1/?
(XEN)  MSI     28 vec=21 lowest  edge   assert  log lowest dest=00000001 mask=0/1/?
(XEN)  MSI-X   29 vec=29 lowest  edge   assert  log lowest dest=00000001 mask=1/0/0
(XEN)  MSI-X   30 vec=31 lowest  edge   assert  log lowest dest=00000001 mask=1/0/0
(XEN)  MSI-X   31 vec=39 lowest  edge   assert  log lowest dest=00000001 mask=1/0/0
(XEN)  MSI-X   32 vec=41 lowest  edge   assert  log lowest dest=00000001 mask=1/0/0
(XEN)  MSI-X   33 vec=49 lowest  edge   assert  log lowest dest=00000001 mask=1/0/0
(XEN)  MSI-X   34 vec=51 lowest  edge   assert  log lowest dest=00000001 mask=1/0/0
(XEN)  MSI     35 vec=59 lowest  edge   assert  log lowest dest=00000001 mask=0/1/?
(XEN)  MSI     36 vec=61 lowest  edge   assert  log lowest dest=00000001 mask=0/1/?
(XEN)  MSI     37 vec=69 lowest  edge   assert  log lowest dest=00000001 mask=0/1/?
(XEN)  MSI     38 vec=71 lowest  edge   assert  log lowest dest=00000001 mask=0/1/?
(XEN) number of MP IRQ sources: 15.
(XEN) number of IO-APIC #5 registers: 24.
(XEN) testing the IO APIC.......................
(XEN) IO APIC #5......
(XEN) .... register #00: 05000000
(XEN) .......    : physical APIC id: 05
(XEN) .......    : Delivery Type: 0
(XEN) .......    : LTS          : 0
(XEN) .... register #01: 00178021
(XEN) .......     : max redirection entries: 0017
(XEN) .......     : PRQ implemented: 1
(XEN) .......     : IO APIC version: 0021
(XEN) .... register #02: 05000000
(XEN) .......     : arbitration: 05
(XEN) .... register #03: 05018021
(XEN) .......     : Boot DT    : 1
(XEN) .... IRQ redirection table:
(XEN)  NR Log Phy Mask Trig IRR Pol Stat Dest Deli Vect:   
(XEN)  00 000 00  1    0    0   0   0    0    0    00
(XEN)  01 001 01  0    0    0   0   0    1    1    30
(XEN)  02 001 01  0    0    0   0   0    1    1    F0
(XEN)  03 001 01  0    0    0   0   0    1    1    38
(XEN)  04 001 01  0    0    0   0   0    1    1    40
(XEN)  05 00F 0F  1    0    0   0   0    1    1    48
(XEN)  06 001 01  0    0    0   0   0    1    1    50
(XEN)  07 001 01  0    0    0   0   0    1    1    58
(XEN)  08 001 01  0    0    0   0   0    1    1    60
(XEN)  09 001 01  0    1    0   1   0    1    1    68
(XEN)  0a 001 01  0    0    0   0   0    1    1    70
(XEN)  0b 001 01  0    0    0   0   0    1    1    78
(XEN)  0c 001 01  0    0    0   0   0    1    1    88
(XEN)  0d 00F 0F  1    0    0   0   0    1    1    90
(XEN)  0e 001 01  0    0    0   0   0    1    1    98
(XEN)  0f 001 01  0    0    0   0   0    1    1    A0
(XEN)  10 001 01  0    1    0   1   0    1    1    B0
(XEN)  11 001 01  0    1    0   1   0    1    1    B8
(XEN)  12 001 01  0    1    0   1   0    1    1    A8
(XEN)  13 00F 0F  1    1    0   1   0    1    1    D8
(XEN)  14 000 00  1    0    0   0   0    0    0    00
(XEN)  15 000 00  1    0    0   0   0    0    0    00
(XEN)  16 000 00  1    0    0   0   0    0    0    00
(XEN)  17 000 00  1    0    0   0   0    0    0    00
(XEN) Using vector-based indexing
(XEN) IRQ to pin mappings:
(XEN) IRQ240 -> 0:2
(XEN) IRQ48 -> 0:1
(XEN) IRQ56 -> 0:3
(XEN) IRQ64 -> 0:4
(XEN) IRQ72 -> 0:5
(XEN) IRQ80 -> 0:6
(XEN) IRQ88 -> 0:7
(XEN) IRQ96 -> 0:8
(XEN) IRQ104 -> 0:9
(XEN) IRQ112 -> 0:10
(XEN) IRQ120 -> 0:11
(XEN) IRQ136 -> 0:12
(XEN) IRQ144 -> 0:13
(XEN) IRQ152 -> 0:14
(XEN) IRQ160 -> 0:15
(XEN) IRQ176 -> 0:16
(XEN) IRQ184 -> 0:17
(XEN) IRQ168 -> 0:18
(XEN) IRQ216 -> 0:19
(XEN) .................................... done.
(XEN) Guest interrupt information:
(XEN)    IRQ:   0 affinity:1 vec:f0 type=IO-APIC-edge    status=00000000 timer_interrupt+0/0x18a
(XEN)    IRQ:   1 affinity:1 vec:30 type=IO-APIC-edge    status=00000002 mapped, unbound
(XEN)    IRQ:   3 affinity:1 vec:38 type=IO-APIC-edge    status=00000002 mapped, unbound
(XEN)    IRQ:   4 affinity:1 vec:40 type=IO-APIC-edge    status=00000002 mapped, unbound
(XEN)    IRQ:   5 affinity:f vec:48 type=IO-APIC-edge    status=00000002 mapped, unbound
(XEN)    IRQ:   6 affinity:1 vec:50 type=IO-APIC-edge    status=00000002 mapped, unbound
(XEN)    IRQ:   7 affinity:1 vec:58 type=IO-APIC-edge    status=00000002 mapped, unbound
(XEN)    IRQ:   8 affinity:1 vec:60 type=IO-APIC-edge    status=00000010 in-flight=0 domain-list=0:  8(----),
(XEN)    IRQ:   9 affinity:1 vec:68 type=IO-APIC-level   status=00000010 in-flight=0 domain-list=0:  9(----),
(XEN)    IRQ:  10 affinity:1 vec:70 type=IO-APIC-edge    status=00000002 mapped, unbound
(XEN)    IRQ:  11 affinity:1 vec:78 type=IO-APIC-edge    status=00000002 mapped, unbound
(XEN)    IRQ:  12 affinity:1 vec:88 type=IO-APIC-edge    status=00000002 mapped, unbound
(XEN)    IRQ:  13 affinity:f vec:90 type=IO-APIC-edge    status=00000002 mapped, unbound
(XEN)    IRQ:  14 affinity:1 vec:98 type=IO-APIC-edge    status=00000002 mapped, unbound
(XEN)    IRQ:  15 affinity:1 vec:a0 type=IO-APIC-edge    status=00000002 mapped, unbound
(XEN)    IRQ:  16 affinity:1 vec:b0 type=IO-APIC-level   status=00000010 in-flight=0 domain-list=0: 16(----),
(XEN)    IRQ:  17 affinity:1 vec:b8 type=IO-APIC-level   status=00000010 in-flight=0 domain-list=0: 17(----),
(XEN)    IRQ:  18 affinity:1 vec:a8 type=IO-APIC-level   status=00000010 in-flight=0 domain-list=0: 18(----),
(XEN)    IRQ:  19 affinity:f vec:d8 type=IO-APIC-level   status=00000002 mapped, unbound
(XEN)    IRQ:  24 affinity:1 vec:28 type=AMD-IOMMU-MSI   status=00000000 iommu_interrupt_handler+0/0x57
(XEN)    IRQ:  25 affinity:1 vec:c0 type=PCI-MSI         status=00000010 in-flight=0 domain-list=0:279(----),
(XEN)    IRQ:  26 affinity:1 vec:c8 type=PCI-MSI         status=00000010 in-flight=0 domain-list=0:278(----),
(XEN)    IRQ:  27 affinity:1 vec:d0 type=PCI-MSI         status=00000010 in-flight=0 domain-list=0:277(----),
(XEN)    IRQ:  28 affinity:1 vec:21 type=PCI-MSI         status=00000010 in-flight=0 domain-list=0:276(----),
(XEN)    IRQ:  29 affinity:1 vec:29 type=PCI-MSI/-X      status=00000010 in-flight=0 domain-list=0:275(----),
(XEN)    IRQ:  30 affinity:1 vec:31 type=PCI-MSI/-X      status=00000010 in-flight=0 domain-list=0:274(----),
(XEN)    IRQ:  31 affinity:1 vec:39 type=PCI-MSI/-X      status=00000010 in-flight=0 domain-list=0:273(----),
(XEN)    IRQ:  32 affinity:1 vec:41 type=PCI-MSI/-X      status=00000010 in-flight=0 domain-list=0:272(----),
(XEN)    IRQ:  33 affinity:1 vec:49 type=PCI-MSI/-X      status=00000010 in-flight=0 domain-list=0:271(----),
(XEN)    IRQ:  34 affinity:1 vec:51 type=PCI-MSI/-X      status=00000010 in-flight=0 domain-list=0:270(----),
(XEN)    IRQ:  35 affinity:1 vec:59 type=PCI-MSI         status=00000010 in-flight=0 domain-list=0:269(----),
(XEN)    IRQ:  36 affinity:1 vec:61 type=PCI-MSI         status=00000010 in-flight=0 domain-list=0:268(----),
(XEN)    IRQ:  37 affinity:1 vec:69 type=PCI-MSI         status=00000010 in-flight=0 domain-list=0:267(----),
(XEN)    IRQ:  38 affinity:1 vec:71 type=PCI-MSI         status=00000010 in-flight=0 domain-list=0:266(----),
(XEN) IO-APIC interrupt information:
(XEN)     IRQ  0 Vec240:
(XEN)       Apic 0x00, Pin  2: vec=f0 delivery=LoPri dest=L status=0 polarity=0 irr=0 trig=E mask=0 dest_id:1
(XEN)     IRQ  1 Vec 48:
(XEN)       Apic 0x00, Pin  1: vec=30 delivery=LoPri dest=L status=0 polarity=0 irr=0 trig=E mask=0 dest_id:1
(XEN)     IRQ  3 Vec 56:
(XEN)       Apic 0x00, Pin  3: vec=38 delivery=LoPri dest=L status=0 polarity=0 irr=0 trig=E mask=0 dest_id:1
(XEN)     IRQ  4 Vec 64:
(XEN)       Apic 0x00, Pin  4: vec=40 delivery=LoPri dest=L status=0 polarity=0 irr=0 trig=E mask=0 dest_id:1
(XEN)     IRQ  5 Vec 72:
(XEN)       Apic 0x00, Pin  5: vec=48 delivery=LoPri dest=L status=0 polarity=0 irr=0 trig=E mask=1 dest_id:15
(XEN)     IRQ  6 Vec 80:
(XEN)       Apic 0x00, Pin  6: vec=50 delivery=LoPri dest=L status=0 polarity=0 irr=0 trig=E mask=0 dest_id:1
(XEN)     IRQ  7 Vec 88:
(XEN)       Apic 0x00, Pin  7: vec=58 delivery=LoPri dest=L status=0 polarity=0 irr=0 trig=E mask=0 dest_id:1
(XEN)     IRQ  8 Vec 96:
(XEN)       Apic 0x00, Pin  8: vec=60 delivery=LoPri dest=L status=0 polarity=0 irr=0 trig=E mask=0 dest_id:1
(XEN)     IRQ  9 Vec104:
(XEN)       Apic 0x00, Pin  9: vec=68 delivery=LoPri dest=L status=0 polarity=1 irr=0 trig=L mask=0 dest_id:1
(XEN)     IRQ 10 Vec112:
(XEN)       Apic 0x00, Pin 10: vec=70 delivery=LoPri dest=L status=0 polarity=0 irr=0 trig=E mask=0 dest_id:1
(XEN)     IRQ 11 Vec120:
(XEN)       Apic 0x00, Pin 11: vec=78 delivery=LoPri dest=L status=0 polarity=0 irr=0 trig=E mask=0 dest_id:1
(XEN)     IRQ 12 Vec136:
(XEN)       Apic 0x00, Pin 12: vec=88 delivery=LoPri dest=L status=0 polarity=0 irr=0 trig=E mask=0 dest_id:1
(XEN)     IRQ 13 Vec144:
(XEN)       Apic 0x00, Pin 13: vec=90 delivery=LoPri dest=L status=0 polarity=0 irr=0 trig=E mask=1 dest_id:15
(XEN)     IRQ 14 Vec152:
(XEN)       Apic 0x00, Pin 14: vec=98 delivery=LoPri dest=L status=0 polarity=0 irr=0 trig=E mask=0 dest_id:1
(XEN)     IRQ 15 Vec160:
(XEN)       Apic 0x00, Pin 15: vec=a0 delivery=LoPri dest=L status=0 polarity=0 irr=0 trig=E mask=0 dest_id:1
(XEN)     IRQ 16 Vec176:
(XEN)       Apic 0x00, Pin 16: vec=b0 delivery=LoPri dest=L status=0 polarity=1 irr=0 trig=L mask=0 dest_id:1
(XEN)     IRQ 17 Vec184:
(XEN)       Apic 0x00, Pin 17: vec=b8 delivery=LoPri dest=L status=0 polarity=1 irr=0 trig=L mask=0 dest_id:1
(XEN)     IRQ 18 Vec168:
(XEN)       Apic 0x00, Pin 18: vec=a8 delivery=LoPri dest=L status=0 polarity=1 irr=0 trig=L mask=0 dest_id:1
(XEN)     IRQ 19 Vec216:
(XEN)       Apic 0x00, Pin 19: vec=d8 delivery=LoPri dest=L status=0 polarity=1 irr=0 trig=L mask=1 dest_id:15
(XEN) --- Dumping Per-dev IOMMU Interrupt Remapping Table ---
(XEN)         BDF: 10, IRTE[   0] : 0xc00145
(XEN)         BDF: 80, IRTE[   0] : 0x290145
(XEN)         BDF: 80, IRTE[   1] : 0x310145
(XEN)         BDF: 80, IRTE[   2] : 0x390145
(XEN)         BDF: 81, IRTE[   0] : 0x410145
(XEN)         BDF: 81, IRTE[   1] : 0x490145
(XEN)         BDF: 81, IRTE[   2] : 0x510145
(XEN)         BDF: 88, IRTE[   0] : 0x210145
(XEN)         BDF: a0, IRTE[   1] : 0x300145
(XEN)         BDF: a0, IRTE[   2] : 0xf00145
(XEN)         BDF: a0, IRTE[   3] : 0x380145
(XEN)         BDF: a0, IRTE[   4] : 0x400145
(XEN)         BDF: a0, IRTE[   5] : 0x480f44
(XEN)         BDF: a0, IRTE[   6] : 0x500145
(XEN)         BDF: a0, IRTE[   7] : 0x580145
(XEN)         BDF: a0, IRTE[   8] : 0x600145
(XEN)         BDF: a0, IRTE[   9] : 0x680145
(XEN)         BDF: a0, IRTE[  10] : 0x700145
(XEN)         BDF: a0, IRTE[  11] : 0x780145
(XEN)         BDF: a0, IRTE[  12] : 0x880145
(XEN)         BDF: a0, IRTE[  13] : 0x900f44
(XEN)         BDF: a0, IRTE[  14] : 0x980145
(XEN)         BDF: a0, IRTE[  15] : 0xa00145
(XEN)         BDF: a0, IRTE[  16] : 0xb00145
(XEN)         BDF: a0, IRTE[  17] : 0xb80145
(XEN)         BDF: a0, IRTE[  18] : 0xa80145
(XEN)         BDF: a0, IRTE[  19] : 0xd80f44
(XEN)         BDF: a8, IRTE[   0] : 0xc80145
(XEN)         BDF: a9, IRTE[   0] : 0xd00145
(XEN)         BDF:100, IRTE[   0] : 0x610145
(XEN)         BDF:101, IRTE[   0] : 0x690145
(XEN)         BDF:300, IRTE[   0] : 0x590145
(XEN)         BDF:400, IRTE[   0] : 0x710145
(XEN) --- Dumping Shared IOMMU Interrupt Remapping Table ---

[-- Attachment #3: before.txt --]
[-- Type: text/plain, Size: 29504 bytes --]

 __  __            _  _    _____                    _        _     _      
 \ \/ /___ _ __   | || |  |___ /    _   _ _ __  ___| |_ __ _| |__ | | ___ 
  \  // _ \ '_ \  | || |_   |_ \ __| | | | '_ \/ __| __/ _` | '_ \| |/ _ \
  /  \  __/ | | | |__   _| ___) |__| |_| | | | \__ \ || (_| | |_) | |  __/
 /_/\_\___|_| |_|    |_|(_)____/    \__,_|_| |_|___/\__\__,_|_.__/|_|\___|
                                                                          
(XEN) Xen version 4.3-unstable (root@) (gcc (Ubuntu/Linaro 4.7.2-2ubuntu1) 4.7.2) debug=y Wed Apr 24 14:53:13 CDT 2013
(XEN) Latest ChangeSet: unavailable
(XEN) Bootloader: GRUB 2.00-7ubuntu11
(XEN) Command line: placeholder dom0_max_vcpus=2 dom0_vcpus_pin dom0_mem=4G,max:4G iommu=debug apic_verbosity=debug
(XEN) Video information:
(XEN)  VGA is text mode 80x25, font 8x16
(XEN)  VBE/DDC methods: V2; EDID transfer time: 1 seconds
(XEN) Disc information:
(XEN)  Found 1 MBR signatures
(XEN)  Found 1 EDD information structures
(XEN) Xen-e820 RAM map:
(XEN)  0000000000000000 - 000000000009fc00 (usable)
(XEN)  000000000009fc00 - 00000000000a0000 (reserved)
(XEN)  00000000000e0000 - 0000000000100000 (reserved)
(XEN)  0000000000100000 - 00000000cea84000 (usable)
(XEN)  00000000cea84000 - 00000000ced5e000 (reserved)
(XEN)  00000000ced5e000 - 00000000cf0ae000 (ACPI NVS)
(XEN)  00000000cf0ae000 - 00000000cf5e4000 (reserved)
(XEN)  00000000cf5e4000 - 00000000cf5e5000 (usable)
(XEN)  00000000cf5e5000 - 00000000cf7eb000 (ACPI NVS)
(XEN)  00000000cf7eb000 - 00000000cfaa3000 (usable)
(XEN)  00000000cfaa3000 - 00000000cfdf6000 (reserved)
(XEN)  00000000cfdf6000 - 00000000cfe00000 (usable)
(XEN)  00000000fec00000 - 00000000fec01000 (reserved)
(XEN)  00000000fec10000 - 00000000fec11000 (reserved)
(XEN)  00000000fed00000 - 00000000fed01000 (reserved)
(XEN)  00000000fed40000 - 00000000fed45000 (reserved)
(XEN)  00000000fed80000 - 00000000fed90000 (reserved)
(XEN)  00000000ff000000 - 0000000100000000 (reserved)
(XEN)  0000000100001000 - 000000042f000000 (usable)
(XEN) ACPI: RSDP 000F0490, 0024 (r2 ALASKA)
(XEN) ACPI: XSDT CF09B078, 006C (r1 ALASKA    A M I  1072009 AMI     10013)
(XEN) ACPI: FACP CF0A2A10, 010C (r5 ALASKA    A M I  1072009 AMI     10013)
(XEN) ACPI Warning (tbfadt-0464): Optional field "Pm2ControlBlock" has zero address or length: 0000000000000000/1 [20070126]
(XEN) ACPI: DSDT CF09B178, 7897 (r2 ALASKA    A M I        0 INTL 20051117)
(XEN) ACPI: FACS CF0AC080, 0040
(XEN) ACPI: APIC CF0A2B20, 0072 (r3 ALASKA    A M I  1072009 AMI     10013)
(XEN) ACPI: FPDT CF0A2B98, 0044 (r1 ALASKA    A M I  1072009 AMI     10013)
(XEN) ACPI: MCFG CF0A2BE0, 003C (r1 ALASKA    A M I  1072009 MSFT    10013)
(XEN) ACPI: HPET CF0A2C20, 0038 (r1 ALASKA    A M I  1072009 AMI         5)
(XEN) ACPI: SSDT CF0A2C58, 0D40 (r1 AMD    POWERNOW        1 AMD         1)
(XEN) ACPI: SSDT CF0A3998, 04B7 (r2    AMD     ALIB        1 MSFT  4000000)
(XEN) ACPI: IVRS CF0A3E50, 0070 (r2  AMD   AMDIOMMU        1 AMD         0)
(XEN) ACPI: CRAT CF0A3EC0, 0398 (r1 AMD    AGESA           1 AMD         1)
(XEN) System RAM: 16348MB (16741268kB)
(XEN) No NUMA configuration found
(XEN) Faking a node at 0000000000000000-000000042f000000
(XEN) Domain heap initialised
(XEN) found SMP MP-table at 000fd7e0
(XEN) DMI 2.7 present.
(XEN) APIC boot state is 'xapic'
(XEN) Using APIC driver default
(XEN) ACPI: PM-Timer IO Port: 0x808
(XEN) ACPI: v5 SLEEP INFO: control[0:0], status[0:0]
(XEN) ACPI: SLEEP INFO: pm1x_cnt[804,0], pm1x_evt[800,0]
(XEN) ACPI: 32/64X FACS address mismatch in FADT - cf0ac080/0000000000000000, using 32
(XEN) ACPI:             wakeup_vec[cf0ac08c], vec_size[20]
(XEN) ACPI: Local APIC address 0xfee00000
(XEN) ACPI: LAPIC (acpi_id[0x01] lapic_id[0x10] enabled)
(XEN) Processor #16 5:0 APIC version 16
(XEN) ACPI: LAPIC (acpi_id[0x02] lapic_id[0x11] enabled)
(XEN) Processor #17 5:0 APIC version 16
(XEN) ACPI: LAPIC (acpi_id[0x03] lapic_id[0x12] enabled)
(XEN) Processor #18 5:0 APIC version 16
(XEN) ACPI: LAPIC (acpi_id[0x04] lapic_id[0x13] enabled)
(XEN) Processor #19 5:0 APIC version 16
(XEN) ACPI: LAPIC_NMI (acpi_id[0xff] high edge lint[0x1])
(XEN) ACPI: IOAPIC (id[0x05] address[0xfec00000] gsi_base[0])
(XEN) IOAPIC[0]: apic_id 5, version 33, address 0xfec00000, GSI 0-23
(XEN) ACPI: INT_SRC_OVR (bus 0 bus_irq 0 global_irq 2 dfl dfl)
(XEN) ACPI: INT_SRC_OVR (bus 0 bus_irq 9 global_irq 9 low level)
(XEN) ACPI: IRQ0 used by override.
(XEN) ACPI: IRQ2 used by override.
(XEN) ACPI: IRQ9 used by override.
(XEN) Enabling APIC mode:  Flat.  Using 1 I/O APICs
(XEN) ACPI: HPET id: 0x10228210 base: 0xfed00000
(XEN) ERST table was not found
(XEN) Using ACPI (MADT) for SMP configuration information
(XEN) SMP: Allowing 4 CPUs (0 hotplug CPUs)
(XEN) mapped APIC to ffff82c3ffdfb000 (fee00000)
(XEN) mapped IOAPIC to ffff82c3ffdfa000 (fec00000)
(XEN) IRQ limits: 24 GSI, 760 MSI/MSI-X
(XEN) Using scheduler: SMP Credit Scheduler (credit)
(XEN) Detected 3293.910 MHz processor.
(XEN) Initing memory sharing.
(XEN) xstate_init: using cntxt_size: 0x3c0 and states: 0x4000000000000007
(XEN) AMD Fam15h machine check reporting enabled
(XEN) PCI: MCFG configuration 0: base e0000000 segment 0000 buses 00 - ff
(XEN) PCI: Not using MCFG for segment 0000 bus 00-ff
(XEN) AMD-Vi: Found MSI capability block at 0x54
(XEN) AMD-Vi: ACPI Table:
(XEN) AMD-Vi:  Signature IVRS
(XEN) AMD-Vi:  Length 0x70
(XEN) AMD-Vi:  Revision 0x2
(XEN) AMD-Vi:  CheckSum 0xb2
(XEN) AMD-Vi:  OEM_Id AMD  
(XEN) AMD-Vi:  OEM_Table_Id AMDIOMMU
(XEN) AMD-Vi:  OEM_Revision 0x1
(XEN) AMD-Vi:  Creator_Id AMD 
(XEN) AMD-Vi:  Creator_Revision 0
(XEN) AMD-Vi: IVRS Block: type 0x10 flags 0xfe len 0x40 id 0x2
(XEN) AMD-Vi: IVHD Device Entry: type 0x3 id 0x8 flags 0
(XEN) AMD-Vi:  Dev_Id Range: 0x8 -> 0xfffe
(XEN) AMD-Vi: IVHD Device Entry: type 0x43 id 0x200 flags 0
(XEN) AMD-Vi:  Dev_Id Range: 0x200 -> 0x2ff alias 0xa4
(XEN) AMD-Vi: IVHD Device Entry: type 0 id 0 flags 0
(XEN) AMD-Vi: IVHD Device Entry: type 0x48 id 0 flags 0
(XEN) AMD-Vi: IVHD Special: 0000:00:14.0 variety 0x2 handle 0
(XEN) AMD-Vi: IVHD Device Entry: type 0x48 id 0 flags 0xd7
(XEN) AMD-Vi: IVHD Special: 0000:00:14.0 variety 0x1 handle 0x5
(XEN) AMD-Vi: IOMMU Extended Features:
(XEN)  - Prefetch Pages Command
(XEN)  - Peripheral Page Service Request
(XEN)  - Guest Translation
(XEN)  - Invalidate All Command
(XEN) AMD-Vi: PPR Log Enabled.
(XEN) AMD-Vi: Guest Translation Enabled.
(XEN) AMD-Vi: IOMMU 0 Enabled.
(XEN) AMD-Vi: Enabling per-device vector maps
(XEN) I/O virtualisation enabled
(XEN)  - Dom0 mode: Relaxed
(XEN) Interrupt remapping enabled
(XEN) Getting VERSION: 80050010
(XEN) Getting VERSION: 80050010
(XEN) Getting ID: 10000000
(XEN) Getting LVT0: 700
(XEN) Getting LVT1: 400
(XEN) enabled ExtINT on CPU#0
(XEN) ENABLING IO-APIC IRQs
(XEN)  -> Using new ACK method
(XEN) init IO_APIC IRQs
(XEN)  IO-APIC (apicid-pin) 5-0, 5-16, 5-17, 5-18, 5-19, 5-20, 5-21, 5-22, 5-23 not connected.
(XEN) ..TIMER: vector=0xF0 apic1=0 pin1=2 apic2=-1 pin2=-1
(XEN) number of MP IRQ sources: 15.
(XEN) number of IO-APIC #5 registers: 24.
(XEN) testing the IO APIC.......................
(XEN) IO APIC #5......
(XEN) .... register #00: 05000000
(XEN) .......    : physical APIC id: 05
(XEN) .......    : Delivery Type: 0
(XEN) .......    : LTS          : 0
(XEN) .... register #01: 00178021
(XEN) .......     : max redirection entries: 0017
(XEN) .......     : PRQ implemented: 1
(XEN) .......     : IO APIC version: 0021
(XEN) .... register #02: 05000000
(XEN) .......     : arbitration: 05
(XEN) .... register #03: 05018021
(XEN) .......     : Boot DT    : 1
(XEN) .... IRQ redirection table:
(XEN)  NR Log Phy Mask Trig IRR Pol Stat Dest Deli Vect:   
(XEN)  00 000 00  1    0    0   0   0    0    0    00
(XEN)  01 001 01  0    0    0   0   0    1    1    30
(XEN)  02 001 01  0    0    0   0   0    1    1    F0
(XEN)  03 001 01  0    0    0   0   0    1    1    38
(XEN)  04 001 01  0    0    0   0   0    1    1    40
(XEN)  05 001 01  0    0    0   0   0    1    1    48
(XEN)  06 001 01  0    0    0   0   0    1    1    50
(XEN)  07 001 01  0    0    0   0   0    1    1    58
(XEN)  08 001 01  0    0    0   0   0    1    1    60
(XEN)  09 001 01  1    1    0   1   0    1    1    68
(XEN)  0a 001 01  0    0    0   0   0    1    1    70
(XEN)  0b 001 01  0    0    0   0   0    1    1    78
(XEN)  0c 001 01  0    0    0   0   0    1    1    88
(XEN)  0d 001 01  0    0    0   0   0    1    1    90
(XEN)  0e 001 01  0    0    0   0   0    1    1    98
(XEN)  0f 001 01  0    0    0   0   0    1    1    A0
(XEN)  10 000 00  1    0    0   0   0    0    0    00
(XEN)  11 000 00  1    0    0   0   0    0    0    00
(XEN)  12 000 00  1    0    0   0   0    0    0    00
(XEN)  13 000 00  1    0    0   0   0    0    0    00
(XEN)  14 000 00  1    0    0   0   0    0    0    00
(XEN)  15 000 00  1    0    0   0   0    0    0    00
(XEN)  16 000 00  1    0    0   0   0    0    0    00
(XEN)  17 000 00  1    0    0   0   0    0    0    00
(XEN) Using vector-based indexing
(XEN) IRQ to pin mappings:
(XEN) IRQ240 -> 0:2
(XEN) IRQ48 -> 0:1
(XEN) IRQ56 -> 0:3
(XEN) IRQ64 -> 0:4
(XEN) IRQ72 -> 0:5
(XEN) IRQ80 -> 0:6
(XEN) IRQ88 -> 0:7
(XEN) IRQ96 -> 0:8
(XEN) IRQ104 -> 0:9
(XEN) IRQ112 -> 0:10
(XEN) IRQ120 -> 0:11
(XEN) IRQ136 -> 0:12
(XEN) IRQ144 -> 0:13
(XEN) IRQ152 -> 0:14
(XEN) IRQ160 -> 0:15
(XEN) .................................... done.
(XEN) Using local APIC timer interrupts.
(XEN) calibrating APIC timer ...
(XEN) ..... CPU clock speed is 3293.8493 MHz.
(XEN) ..... host bus clock speed is 99.8136 MHz.
(XEN) ..... bus_scale = 0x6637
(XEN) Using standard rendezvous
(XEN) Platform timer is 14.318MHz HPET
(XEN) Allocated console ring of 32 KiB.
(XEN) HVM: ASIDs enabled.
(XEN) SVM: Supported advanced features:
(XEN)  - Nested Page Tables (NPT)
(XEN)  - Last Branch Record (LBR) Virtualisation
(XEN)  - Next-RIP Saved on #VMEXIT
(XEN)  - VMCB Clean Bits
(XEN)  - DecodeAssists
(XEN)  - Pause-Intercept Filter
(XEN)  - TSC Rate MSR
(XEN) HVM: SVM enabled
(XEN) HVM: Hardware Assisted Paging (HAP) detected
(XEN) HVM: HAP page sizes: 4kB, 2MB, 1GB
(XEN) masked ExtINT on CPU#1
(XEN) microcode: CPU1 collect_cpu_info: patch_id=0x600100e
(XEN) masked ExtINT on CPU#2
(XEN) microcode: CPU2 collect_cpu_info: patch_id=0x600100e
(XEN) masked ExtINT on CPU#3
(XEN) microcode: CPU3 collect_cpu_info: patch_id=0x600100e
(XEN) Brought up 4 CPUs
(XEN) ACPI sleep modes: S3
(XEN) MCA: Use hw thresholding to adjust polling frequency
(XEN) mcheck_poll: Machine check polling timer started.
(XEN) *** LOADING DOMAIN 0 ***
(XEN) elf_parse_binary: phdr: paddr=0x1000000 memsz=0xae7000
(XEN) elf_parse_binary: phdr: paddr=0x1c00000 memsz=0xdd0e8
(XEN) elf_parse_binary: phdr: paddr=0x1cde000 memsz=0x14680
(XEN) elf_parse_binary: phdr: paddr=0x1cf3000 memsz=0x65c000
(XEN) elf_parse_binary: memory: 0x1000000 -> 0x234f000
(XEN) elf_xen_parse_note: GUEST_OS = "linux"
(XEN) elf_xen_parse_note: GUEST_VERSION = "2.6"
(XEN) elf_xen_parse_note: XEN_VERSION = "xen-3.0"
(XEN) elf_xen_parse_note: VIRT_BASE = 0xffffffff80000000
(XEN) elf_xen_parse_note: ENTRY = 0xffffffff81cf3210
(XEN) elf_xen_parse_note: HYPERCALL_PAGE = 0xffffffff81001000
(XEN) elf_xen_parse_note: FEATURES = "!writable_page_tables|pae_pgdir_above_4gb"
(XEN) elf_xen_parse_note: PAE_MODE = "yes"
(XEN) elf_xen_parse_note: LOADER = "generic"
(XEN) elf_xen_parse_note: unknown xen elf note (0xd)
(XEN) elf_xen_parse_note: SUSPEND_CANCEL = 0x1
(XEN) elf_xen_parse_note: HV_START_LOW = 0xffff800000000000
(XEN) elf_xen_parse_note: PADDR_OFFSET = 0x0
(XEN) elf_xen_addr_calc_check: addresses:
(XEN)     virt_base        = 0xffffffff80000000
(XEN)     elf_paddr_offset = 0x0
(XEN)     virt_offset      = 0xffffffff80000000
(XEN)     virt_kstart      = 0xffffffff81000000
(XEN)     virt_kend        = 0xffffffff8234f000
(XEN)     virt_entry       = 0xffffffff81cf3210
(XEN)     p2m_base         = 0xffffffffffffffff
(XEN)  Xen  kernel: 64-bit, lsb, compat32
(XEN)  Dom0 kernel: 64-bit, PAE, lsb, paddr 0x1000000 -> 0x234f000
(XEN) PHYSICAL MEMORY ARRANGEMENT:
(XEN)  Dom0 alloc.:   0000000238000000->000000023c000000 (1022084 pages to be allocated)
(XEN)  Init. ramdisk: 000000042c884000->000000042efffc00
(XEN) VIRTUAL MEMORY ARRANGEMENT:
(XEN)  Loaded kernel: ffffffff81000000->ffffffff8234f000
(XEN)  Init. ramdisk: ffffffff8234f000->ffffffff84acac00
(XEN)  Phys-Mach map: ffffffff84acb000->ffffffff852cb000
(XEN)  Start info:    ffffffff852cb000->ffffffff852cb4b4
(XEN)  Page tables:   ffffffff852cc000->ffffffff852f9000
(XEN)  Boot stack:    ffffffff852f9000->ffffffff852fa000
(XEN)  TOTAL:         ffffffff80000000->ffffffff85400000
(XEN)  ENTRY ADDRESS: ffffffff81cf3210
(XEN) Dom0 has maximum 2 VCPUs
(XEN) elf_load_binary: phdr 0 at 0xffffffff81000000 -> 0xffffffff81ae7000
(XEN) elf_load_binary: phdr 1 at 0xffffffff81c00000 -> 0xffffffff81cdd0e8
(XEN) elf_load_binary: phdr 2 at 0xffffffff81cde000 -> 0xffffffff81cf2680
(XEN) elf_load_binary: phdr 3 at 0xffffffff81cf3000 -> 0xffffffff81dcd000
(XEN) AMD-Vi: No iommu for device 0000:00:00.0
(XEN) setup 0000:00:00.0 for d0 failed (-19)
(XEN) AMD-Vi: No iommu for device 0000:00:00.2
(XEN) setup 0000:00:00.2 for d0 failed (-19)
(XEN) AMD-Vi: Setup I/O page table: device id = 0x10, root table = 0x23f7a7000, domain = 0, paging mode = 3
(XEN) AMD-Vi: Setup I/O page table: device id = 0x80, root table = 0x23f7a7000, domain = 0, paging mode = 3
(XEN) AMD-Vi: Setup I/O page table: device id = 0x81, root table = 0x23f7a7000, domain = 0, paging mode = 3
(XEN) AMD-Vi: Setup I/O page table: device id = 0x88, root table = 0x23f7a7000, domain = 0, paging mode = 3
(XEN) AMD-Vi: Setup I/O page table: device id = 0x90, root table = 0x23f7a7000, domain = 0, paging mode = 3
(XEN) AMD-Vi: Setup I/O page table: device id = 0x92, root table = 0x23f7a7000, domain = 0, paging mode = 3
(XEN) AMD-Vi: Setup I/O page table: device id = 0x98, root table = 0x23f7a7000, domain = 0, paging mode = 3
(XEN) AMD-Vi: Setup I/O page table: device id = 0x9a, root table = 0x23f7a7000, domain = 0, paging mode = 3
(XEN) AMD-Vi: Setup I/O page table: device id = 0xa0, root table = 0x23f7a7000, domain = 0, paging mode = 3
(XEN) AMD-Vi: Setup I/O page table: device id = 0xa2, root table = 0x23f7a7000, domain = 0, paging mode = 3
(XEN) AMD-Vi: Setup I/O page table: device id = 0xa3, root table = 0x23f7a7000, domain = 0, paging mode = 3
(XEN) AMD-Vi: Setup I/O page table: device id = 0xa4, root table = 0x23f7a7000, domain = 0, paging mode = 3
(XEN) AMD-Vi: Setup I/O page table: device id = 0xa5, root table = 0x23f7a7000, domain = 0, paging mode = 3
(XEN) AMD-Vi: Setup I/O page table: device id = 0xa7, root table = 0x23f7a7000, domain = 0, paging mode = 3
(XEN) AMD-Vi: Setup I/O page table: device id = 0xa8, root table = 0x23f7a7000, domain = 0, paging mode = 3
(XEN) AMD-Vi: Setup I/O page table: device id = 0xa9, root table = 0x23f7a7000, domain = 0, paging mode = 3
(XEN) AMD-Vi: Setup I/O page table: device id = 0xc0, root table = 0x23f7a7000, domain = 0, paging mode = 3
(XEN) AMD-Vi: Setup I/O page table: device id = 0xc1, root table = 0x23f7a7000, domain = 0, paging mode = 3
(XEN) AMD-Vi: Setup I/O page table: device id = 0xc2, root table = 0x23f7a7000, domain = 0, paging mode = 3
(XEN) AMD-Vi: Setup I/O page table: device id = 0xc3, root table = 0x23f7a7000, domain = 0, paging mode = 3
(XEN) AMD-Vi: Setup I/O page table: device id = 0xc4, root table = 0x23f7a7000, domain = 0, paging mode = 3
(XEN) AMD-Vi: Setup I/O page table: device id = 0xc5, root table = 0x23f7a7000, domain = 0, paging mode = 3
(XEN) AMD-Vi: Setup I/O page table: device id = 0x100, root table = 0x23f7a7000, domain = 0, paging mode = 3
(XEN) AMD-Vi: Setup I/O page table: device id = 0x101, root table = 0x23f7a7000, domain = 0, paging mode = 3
(XEN) AMD-Vi: Setup I/O page table: device id = 0x300, root table = 0x23f7a7000, domain = 0, paging mode = 3
(XEN) AMD-Vi: Setup I/O page table: device id = 0x400, root table = 0x23f7a7000, domain = 0, paging mode = 3
(XEN) Scrubbing Free RAM: ....................................................................................................................done.
(XEN) Initial low memory virq threshold set at 0x4000 pages.
(XEN) Std. Loglevel: All
(XEN) Guest Loglevel: All
(XEN) Xen is relinquishing VGA console.
(XEN) *** Serial input -> DOM0 (type 'CTRL-a' three times to switch input to Xen)
(XEN) Freed 276kB init memory.
(XEN) IOAPIC[0]: Set PCI routing entry (5-9 -> 0x68 -> IRQ 9 Mode:1 Active:1)
(XEN) traps.c:2495:d0 Domain attempted WRMSR 00000000c0010201 from 0x0000000000000000 to 0x000000000000abcd.
(XEN) PCI add device 0000:00:00.0
(XEN) PCI add device 0000:00:00.2
(XEN) PCI add device 0000:00:02.0
(XEN) PCI add device 0000:00:10.0
(XEN) PCI add device 0000:00:10.1
(XEN) PCI add device 0000:00:11.0
(XEN) PCI add device 0000:00:12.0
(XEN) PCI add device 0000:00:12.2
(XEN) PCI add device 0000:00:13.0
(XEN) PCI add device 0000:00:13.2
(XEN) PCI add device 0000:00:14.0
(XEN) PCI add device 0000:00:14.2
(XEN) PCI add device 0000:00:14.3
(XEN) PCI add device 0000:00:14.4
(XEN) PCI add device 0000:00:14.5
(XEN) PCI add device 0000:00:14.7
(XEN) PCI add device 0000:00:15.0
(XEN) PCI add device 0000:00:15.1
(XEN) PCI add device 0000:00:18.0
(XEN) PCI add device 0000:00:18.1
(XEN) PCI add device 0000:00:18.2
(XEN) PCI add device 0000:00:18.3
(XEN) PCI add device 0000:00:18.4
(XEN) PCI add device 0000:00:18.5
(XEN) PCI add device 0000:01:00.0
(XEN) PCI add device 0000:01:00.1
(XEN) PCI add device 0000:03:00.0
(XEN) PCI add device 0000:04:00.0
(XEN) IOAPIC[0]: Set PCI routing entry (5-8 -> 0x60 -> IRQ 8 Mode:0 Active:0)
(XEN) IOAPIC[0]: Set PCI routing entry (5-5 -> 0x48 -> IRQ 5 Mode:0 Active:0)
(XEN) IOAPIC[0]: Set PCI routing entry (5-13 -> 0x90 -> IRQ 13 Mode:0 Active:0)
(XEN) IOAPIC[0]: Set PCI routing entry (5-18 -> 0xa8 -> IRQ 18 Mode:1 Active:1)
(XEN) IOAPIC[0]: Set PCI routing entry (5-16 -> 0xb0 -> IRQ 16 Mode:1 Active:1)
(XEN) IOAPIC[0]: Set PCI routing entry (5-17 -> 0xb8 -> IRQ 17 Mode:1 Active:1)
(XEN) IOAPIC[0]: Set PCI routing entry (5-19 -> 0xd8 -> IRQ 19 Mode:1 Active:1)
(XEN) MSI information:
(XEN)  MSI     24 vec=28  fixed  edge deassert phys    cpu dest=00000001 mask=0/0/?
(XEN)  MSI     25 vec=c0 lowest  edge   assert  log lowest dest=00000001 mask=0/1/?
(XEN)  MSI     26 vec=c8 lowest  edge   assert  log lowest dest=00000001 mask=0/1/?
(XEN)  MSI     27 vec=d0 lowest  edge   assert  log lowest dest=00000001 mask=0/1/?
(XEN)  MSI     28 vec=21 lowest  edge   assert  log lowest dest=00000001 mask=0/1/?
(XEN)  MSI-X   29 vec=29 lowest  edge   assert  log lowest dest=00000001 mask=1/0/0
(XEN)  MSI-X   30 vec=31 lowest  edge   assert  log lowest dest=00000001 mask=1/0/0
(XEN)  MSI-X   31 vec=39 lowest  edge   assert  log lowest dest=00000001 mask=1/0/0
(XEN)  MSI-X   32 vec=41 lowest  edge   assert  log lowest dest=00000001 mask=1/0/0
(XEN)  MSI-X   33 vec=49 lowest  edge   assert  log lowest dest=00000001 mask=1/0/0
(XEN)  MSI-X   34 vec=51 lowest  edge   assert  log lowest dest=00000001 mask=1/0/0
(XEN)  MSI     35 vec=59 lowest  edge   assert  log lowest dest=00000001 mask=0/1/?
(XEN)  MSI     36 vec=61 lowest  edge   assert  log lowest dest=00000001 mask=0/1/?
(XEN)  MSI     37 vec=69 lowest  edge   assert  log lowest dest=00000001 mask=0/1/?
(XEN)  MSI     38 vec=71 lowest  edge   assert  log lowest dest=00000001 mask=0/1/?
(XEN) number of MP IRQ sources: 15.
(XEN) number of IO-APIC #5 registers: 24.
(XEN) testing the IO APIC.......................
(XEN) IO APIC #5......
(XEN) .... register #00: 05000000
(XEN) .......    : physical APIC id: 05
(XEN) .......    : Delivery Type: 0
(XEN) .......    : LTS          : 0
(XEN) .... register #01: 00178021
(XEN) .......     : max redirection entries: 0017
(XEN) .......     : PRQ implemented: 1
(XEN) .......     : IO APIC version: 0021
(XEN) .... register #02: 05000000
(XEN) .......     : arbitration: 05
(XEN) .... register #03: 05018021
(XEN) .......     : Boot DT    : 1
(XEN) .... IRQ redirection table:
(XEN)  NR Log Phy Mask Trig IRR Pol Stat Dest Deli Vect:   
(XEN)  00 000 00  1    0    0   0   0    0    0    00
(XEN)  01 001 01  0    0    0   0   0    1    1    30
(XEN)  02 001 01  0    0    0   0   0    1    1    F0
(XEN)  03 001 01  0    0    0   0   0    1    1    38
(XEN)  04 001 01  0    0    0   0   0    1    1    40
(XEN)  05 00F 0F  1    0    0   0   0    1    1    48
(XEN)  06 001 01  0    0    0   0   0    1    1    50
(XEN)  07 001 01  0    0    0   0   0    1    1    58
(XEN)  08 001 01  0    0    0   0   0    1    1    60
(XEN)  09 001 01  0    1    0   1   0    1    1    68
(XEN)  0a 001 01  0    0    0   0   0    1    1    70
(XEN)  0b 001 01  0    0    0   0   0    1    1    78
(XEN)  0c 001 01  0    0    0   0   0    1    1    88
(XEN)  0d 00F 0F  1    0    0   0   0    1    1    90
(XEN)  0e 001 01  0    0    0   0   0    1    1    98
(XEN)  0f 001 01  0    0    0   0   0    1    1    A0
(XEN)  10 001 01  0    1    0   1   0    1    1    B0
(XEN)  11 001 01  0    1    0   1   0    1    1    B8
(XEN)  12 001 01  0    1    0   1   0    1    1    A8
(XEN)  13 00F 0F  1    1    0   1   0    1    1    D8
(XEN)  14 000 00  1    0    0   0   0    0    0    00
(XEN)  15 000 00  1    0    0   0   0    0    0    00
(XEN)  16 000 00  1    0    0   0   0    0    0    00
(XEN)  17 000 00  1    0    0   0   0    0    0    00
(XEN) Using vector-based indexing
(XEN) IRQ to pin mappings:
(XEN) IRQ240 -> 0:2
(XEN) IRQ48 -> 0:1
(XEN) IRQ56 -> 0:3
(XEN) IRQ64 -> 0:4
(XEN) IRQ72 -> 0:5
(XEN) IRQ80 -> 0:6
(XEN) IRQ88 -> 0:7
(XEN) IRQ96 -> 0:8
(XEN) IRQ104 -> 0:9
(XEN) IRQ112 -> 0:10
(XEN) IRQ120 -> 0:11
(XEN) IRQ136 -> 0:12
(XEN) IRQ144 -> 0:13
(XEN) IRQ152 -> 0:14
(XEN) IRQ160 -> 0:15
(XEN) IRQ176 -> 0:16
(XEN) IRQ184 -> 0:17
(XEN) IRQ168 -> 0:18
(XEN) IRQ216 -> 0:19
(XEN) .................................... done.
(XEN) Guest interrupt information:
(XEN)    IRQ:   0 affinity:1 vec:f0 type=IO-APIC-edge    status=00000000 timer_interrupt+0/0x18a
(XEN)    IRQ:   1 affinity:1 vec:30 type=IO-APIC-edge    status=00000002 mapped, unbound
(XEN)    IRQ:   3 affinity:1 vec:38 type=IO-APIC-edge    status=00000002 mapped, unbound
(XEN)    IRQ:   4 affinity:1 vec:40 type=IO-APIC-edge    status=00000002 mapped, unbound
(XEN)    IRQ:   5 affinity:f vec:48 type=IO-APIC-edge    status=00000002 mapped, unbound
(XEN)    IRQ:   6 affinity:1 vec:50 type=IO-APIC-edge    status=00000002 mapped, unbound
(XEN)    IRQ:   7 affinity:1 vec:58 type=IO-APIC-edge    status=00000002 mapped, unbound
(XEN)    IRQ:   8 affinity:1 vec:60 type=IO-APIC-edge    status=00000010 in-flight=0 domain-list=0:  8(----),
(XEN)    IRQ:   9 affinity:1 vec:68 type=IO-APIC-level   status=00000010 in-flight=0 domain-list=0:  9(----),
(XEN)    IRQ:  10 affinity:1 vec:70 type=IO-APIC-edge    status=00000002 mapped, unbound
(XEN)    IRQ:  11 affinity:1 vec:78 type=IO-APIC-edge    status=00000002 mapped, unbound
(XEN)    IRQ:  12 affinity:1 vec:88 type=IO-APIC-edge    status=00000002 mapped, unbound
(XEN)    IRQ:  13 affinity:f vec:90 type=IO-APIC-edge    status=00000002 mapped, unbound
(XEN)    IRQ:  14 affinity:1 vec:98 type=IO-APIC-edge    status=00000002 mapped, unbound
(XEN)    IRQ:  15 affinity:1 vec:a0 type=IO-APIC-edge    status=00000002 mapped, unbound
(XEN)    IRQ:  16 affinity:1 vec:b0 type=IO-APIC-level   status=00000010 in-flight=0 domain-list=0: 16(----),
(XEN)    IRQ:  17 affinity:1 vec:b8 type=IO-APIC-level   status=00000010 in-flight=0 domain-list=0: 17(----),
(XEN)    IRQ:  18 affinity:1 vec:a8 type=IO-APIC-level   status=00000010 in-flight=0 domain-list=0: 18(----),
(XEN)    IRQ:  19 affinity:f vec:d8 type=IO-APIC-level   status=00000002 mapped, unbound
(XEN)    IRQ:  24 affinity:1 vec:28 type=AMD-IOMMU-MSI   status=00000000 iommu_interrupt_handler+0/0x57
(XEN)    IRQ:  25 affinity:1 vec:c0 type=PCI-MSI         status=00000010 in-flight=0 domain-list=0:279(----),
(XEN)    IRQ:  26 affinity:1 vec:c8 type=PCI-MSI         status=00000010 in-flight=0 domain-list=0:278(----),
(XEN)    IRQ:  27 affinity:1 vec:d0 type=PCI-MSI         status=00000010 in-flight=0 domain-list=0:277(----),
(XEN)    IRQ:  28 affinity:1 vec:21 type=PCI-MSI         status=00000010 in-flight=0 domain-list=0:276(----),
(XEN)    IRQ:  29 affinity:1 vec:29 type=PCI-MSI/-X      status=00000010 in-flight=0 domain-list=0:275(----),
(XEN)    IRQ:  30 affinity:1 vec:31 type=PCI-MSI/-X      status=00000010 in-flight=0 domain-list=0:274(----),
(XEN)    IRQ:  31 affinity:1 vec:39 type=PCI-MSI/-X      status=00000010 in-flight=0 domain-list=0:273(----),
(XEN)    IRQ:  32 affinity:1 vec:41 type=PCI-MSI/-X      status=00000010 in-flight=0 domain-list=0:272(----),
(XEN)    IRQ:  33 affinity:1 vec:49 type=PCI-MSI/-X      status=00000010 in-flight=0 domain-list=0:271(----),
(XEN)    IRQ:  34 affinity:1 vec:51 type=PCI-MSI/-X      status=00000010 in-flight=0 domain-list=0:270(----),
(XEN)    IRQ:  35 affinity:1 vec:59 type=PCI-MSI         status=00000010 in-flight=0 domain-list=0:269(----),
(XEN)    IRQ:  36 affinity:1 vec:61 type=PCI-MSI         status=00000010 in-flight=0 domain-list=0:268(----),
(XEN)    IRQ:  37 affinity:1 vec:69 type=PCI-MSI         status=00000010 in-flight=0 domain-list=0:267(----),
(XEN)    IRQ:  38 affinity:1 vec:71 type=PCI-MSI         status=00000010 in-flight=0 domain-list=0:266(----),
(XEN) IO-APIC interrupt information:
(XEN)     IRQ  0 Vec240:
(XEN)       Apic 0x00, Pin  2: vec=f0 delivery=LoPri dest=L status=0 polarity=0 irr=0 trig=E mask=0 dest_id:1
(XEN)     IRQ  1 Vec 48:
(XEN)       Apic 0x00, Pin  1: vec=30 delivery=LoPri dest=L status=0 polarity=0 irr=0 trig=E mask=0 dest_id:1
(XEN)     IRQ  3 Vec 56:
(XEN)       Apic 0x00, Pin  3: vec=38 delivery=LoPri dest=L status=0 polarity=0 irr=0 trig=E mask=0 dest_id:1
(XEN)     IRQ  4 Vec 64:
(XEN)       Apic 0x00, Pin  4: vec=40 delivery=LoPri dest=L status=0 polarity=0 irr=0 trig=E mask=0 dest_id:1
(XEN)     IRQ  5 Vec 72:
(XEN)       Apic 0x00, Pin  5: vec=48 delivery=LoPri dest=L status=0 polarity=0 irr=0 trig=E mask=1 dest_id:15
(XEN)     IRQ  6 Vec 80:
(XEN)       Apic 0x00, Pin  6: vec=50 delivery=LoPri dest=L status=0 polarity=0 irr=0 trig=E mask=0 dest_id:1
(XEN)     IRQ  7 Vec 88:
(XEN)       Apic 0x00, Pin  7: vec=58 delivery=LoPri dest=L status=0 polarity=0 irr=0 trig=E mask=0 dest_id:1
(XEN)     IRQ  8 Vec 96:
(XEN)       Apic 0x00, Pin  8: vec=60 delivery=LoPri dest=L status=0 polarity=0 irr=0 trig=E mask=0 dest_id:1
(XEN)     IRQ  9 Vec104:
(XEN)       Apic 0x00, Pin  9: vec=68 delivery=LoPri dest=L status=0 polarity=1 irr=0 trig=L mask=0 dest_id:1
(XEN)     IRQ 10 Vec112:
(XEN)       Apic 0x00, Pin 10: vec=70 delivery=LoPri dest=L status=0 polarity=0 irr=0 trig=E mask=0 dest_id:1
(XEN)     IRQ 11 Vec120:
(XEN)       Apic 0x00, Pin 11: vec=78 delivery=LoPri dest=L status=0 polarity=0 irr=0 trig=E mask=0 dest_id:1
(XEN)     IRQ 12 Vec136:
(XEN)       Apic 0x00, Pin 12: vec=88 delivery=LoPri dest=L status=0 polarity=0 irr=0 trig=E mask=0 dest_id:1
(XEN)     IRQ 13 Vec144:
(XEN)       Apic 0x00, Pin 13: vec=90 delivery=LoPri dest=L status=0 polarity=0 irr=0 trig=E mask=1 dest_id:15
(XEN)     IRQ 14 Vec152:
(XEN)       Apic 0x00, Pin 14: vec=98 delivery=LoPri dest=L status=0 polarity=0 irr=0 trig=E mask=0 dest_id:1
(XEN)     IRQ 15 Vec160:
(XEN)       Apic 0x00, Pin 15: vec=a0 delivery=LoPri dest=L status=0 polarity=0 irr=0 trig=E mask=0 dest_id:1
(XEN)     IRQ 16 Vec176:
(XEN)       Apic 0x00, Pin 16: vec=b0 delivery=LoPri dest=L status=0 polarity=1 irr=0 trig=L mask=0 dest_id:1
(XEN)     IRQ 17 Vec184:
(XEN)       Apic 0x00, Pin 17: vec=b8 delivery=LoPri dest=L status=0 polarity=1 irr=0 trig=L mask=0 dest_id:1
(XEN)     IRQ 18 Vec168:
(XEN)       Apic 0x00, Pin 18: vec=a8 delivery=LoPri dest=L status=0 polarity=1 irr=0 trig=L mask=0 dest_id:1
(XEN)     IRQ 19 Vec216:
(XEN)       Apic 0x00, Pin 19: vec=d8 delivery=LoPri dest=L status=0 polarity=1 irr=0 trig=L mask=1 dest_id:15
(XEN) --- Dumping Per-dev IOMMU Interrupt Remapping Table ---
(XEN)         BDF: 10, IRTE[ 448] : 0xc00145
(XEN)         BDF: 80, IRTE[ 297] : 0x290145
(XEN)         BDF: 80, IRTE[ 305] : 0x310145
(XEN)         BDF: 80, IRTE[ 313] : 0x390145
(XEN)         BDF: 81, IRTE[ 321] : 0x410145
(XEN)         BDF: 81, IRTE[ 329] : 0x490145
(XEN)         BDF: 81, IRTE[ 337] : 0x510145
(XEN)         BDF: 88, IRTE[ 289] : 0x210145
(XEN)         BDF: a0, IRTE[ 304] : 0x300145
(XEN)         BDF: a0, IRTE[ 312] : 0x380145
(XEN)         BDF: a0, IRTE[ 320] : 0x400145
(XEN)         BDF: a0, IRTE[ 328] : 0x480f45
(XEN)         BDF: a0, IRTE[ 336] : 0x500145
(XEN)         BDF: a0, IRTE[ 344] : 0x580145
(XEN)         BDF: a0, IRTE[ 352] : 0x600145
(XEN)         BDF: a0, IRTE[ 360] : 0x680145
(XEN)         BDF: a0, IRTE[ 368] : 0x700145
(XEN)         BDF: a0, IRTE[ 376] : 0x780145
(XEN)         BDF: a0, IRTE[ 392] : 0x880145
(XEN)         BDF: a0, IRTE[ 400] : 0x900f45
(XEN)         BDF: a0, IRTE[ 408] : 0x980145
(XEN)         BDF: a0, IRTE[ 416] : 0xa00145
(XEN)         BDF: a0, IRTE[ 424] : 0xa80145
(XEN)         BDF: a0, IRTE[ 432] : 0xb00145
(XEN)         BDF: a0, IRTE[ 440] : 0xb80145
(XEN)         BDF: a0, IRTE[ 496] : 0xf00145
(XEN)         BDF: a8, IRTE[ 456] : 0xc80145
(XEN)         BDF: a9, IRTE[ 464] : 0xd00145
(XEN)         BDF:100, IRTE[ 353] : 0x610145
(XEN)         BDF:101, IRTE[ 369] : 0x710145
(XEN)         BDF:300, IRTE[ 345] : 0x590145
(XEN)         BDF:400, IRTE[ 361] : 0x690145
(XEN) --- Dumping Shared IOMMU Interrupt Remapping Table ---

[-- Attachment #4: 0001-Fix-IOAPIC-interrupt-routing-issue-introduced-in-the.patch --]
[-- Type: text/x-patch, Size: 4036 bytes --]

>From 1c63734faeda247441f37ea3efd8629501eff79d Mon Sep 17 00:00:00 2001
From: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
Date: Wed, 24 Apr 2013 15:06:02 -0500
Subject: [PATCH 1/2] Fix IOAPIC interrupt routing issue introduced in the new
 IRTE indexing scheme

With the new IRTE indexing scheme introduced in the patch

http://lists.xen.org/archives/html/xen-devel/2013-04/msg02442.html

IOAPIC routing entry (RTE) which has "mask" bit being sett does not
get setup in the IOMMU interrupt remapping table. This causes the
IOMMU to block interrupt from these devices. This patch fixes the
issue by allocating IOMMU remapping entry even though and use the
mask bit to "enable" or "disable" the IRTE.

Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
---
 xen/drivers/passthrough/amd/iommu_intr.c |   17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/xen/drivers/passthrough/amd/iommu_intr.c b/xen/drivers/passthrough/amd/iommu_intr.c
index a46064e..ed9ae79 100644
--- a/xen/drivers/passthrough/amd/iommu_intr.c
+++ b/xen/drivers/passthrough/amd/iommu_intr.c
@@ -75,9 +75,9 @@ static void free_intremap_entry(int seg, int bdf, int offset)
 }
 
 static void update_intremap_entry(u32* entry, u8 vector, u8 int_type,
-    u8 dest_mode, u8 dest)
+    u8 dest_mode, u8 dest, u8 mask)
 {
-    set_field_in_reg_u32(IOMMU_CONTROL_ENABLED, 0,
+    set_field_in_reg_u32((mask == 0)? IOMMU_CONTROL_ENABLED: IOMMU_CONTROL_DISABLED, 0,
                             INT_REMAP_ENTRY_REMAPEN_MASK,
                             INT_REMAP_ENTRY_REMAPEN_SHIFT, entry);
     set_field_in_reg_u32(IOMMU_CONTROL_DISABLED, *entry,
@@ -156,7 +156,9 @@ static int update_intremap_entry_from_ioapic(
          * Low half of incoming RTE is already in remapped format,
          * so need to recover vector and delivery mode from IRTE.
          */
-        ASSERT(get_rte_index(rte) == offset);
+        if ((vector != 0) && (delivery_mode != 0))
+	    ASSERT(get_rte_index(rte) == offset);
+
         vector = get_field_from_reg_u32(*entry,
                                         INT_REMAP_ENTRY_VECTOR_MASK,
                                         INT_REMAP_ENTRY_VECTOR_SHIFT);
@@ -164,7 +166,7 @@ static int update_intremap_entry_from_ioapic(
                                                INT_REMAP_ENTRY_INTTYPE_MASK,
                                                INT_REMAP_ENTRY_INTTYPE_SHIFT);
     }
-    update_intremap_entry(entry, vector, delivery_mode, dest_mode, dest);
+    update_intremap_entry(entry, vector, delivery_mode, dest_mode, dest, rte->mask);
 
     spin_unlock_irqrestore(lock, flags);
 
@@ -198,8 +200,6 @@ int __init amd_iommu_setup_ioapic_remapping(void)
         for ( pin = 0; pin < nr_ioapic_entries[apic]; pin++ )
         {
             rte = __ioapic_read_entry(apic, pin, 1);
-            if ( rte.mask == 1 )
-                continue;
 
             /* get device id of ioapic devices */
             bdf = ioapic_sbdf[IO_APIC_ID(apic)].bdf;
@@ -225,7 +225,7 @@ int __init amd_iommu_setup_ioapic_remapping(void)
             BUG_ON(offset >= INTREMAP_ENTRIES);
             entry = get_intremap_entry(iommu->seg, req_id, offset);
             update_intremap_entry(entry, vector,
-                                  delivery_mode, dest_mode, dest);
+                                  delivery_mode, dest_mode, dest, rte.mask);
             spin_unlock_irqrestore(lock, flags);
 
             set_rte_index(&rte, offset);
@@ -240,6 +240,7 @@ int __init amd_iommu_setup_ioapic_remapping(void)
             }
         }
     }
+
     return 0;
 }
 
@@ -397,7 +398,7 @@ static int update_intremap_entry_from_msi_msg(
     }
 
     entry = get_intremap_entry(iommu->seg, req_id, offset);
-    update_intremap_entry(entry, vector, delivery_mode, dest_mode, dest);
+    update_intremap_entry(entry, vector, delivery_mode, dest_mode, dest, 0);
     spin_unlock_irqrestore(lock, flags);
 
     *data = (msg->data & ~(INTREMAP_ENTRIES - 1)) | offset;
-- 
1.7.10.4


[-- Attachment #5: 0002-Add-debug-key-for-dumping-IOMMU-IRTE.patch --]
[-- Type: text/x-patch, Size: 2896 bytes --]

>From e5ad3120b67dda98a77d0ae05773ceeb2be1c92f Mon Sep 17 00:00:00 2001
From: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
Date: Wed, 24 Apr 2013 16:40:09 -0500
Subject: [PATCH 2/2] Add debug-key for dumping IOMMU IRTE.

Adding debug-key "j" to allow IOMMU IRTE dumping

Example: xl debug-key j

Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
---
 xen/drivers/passthrough/amd/iommu_intr.c |   57 ++++++++++++++++++++++++++++++
 1 file changed, 57 insertions(+)

diff --git a/xen/drivers/passthrough/amd/iommu_intr.c b/xen/drivers/passthrough/amd/iommu_intr.c
index ed9ae79..ab16a5c 100644
--- a/xen/drivers/passthrough/amd/iommu_intr.c
+++ b/xen/drivers/passthrough/amd/iommu_intr.c
@@ -23,6 +23,7 @@
 #include <asm/amd-iommu.h>
 #include <asm/hvm/svm/amd-iommu-proto.h>
 #include <asm/io_apic.h>
+#include <xen/keyhandler.h>
 
 #define INTREMAP_TABLE_ORDER    1
 #define INTREMAP_LENGTH 0xB
@@ -34,6 +35,14 @@ void *shared_intremap_table;
 unsigned long *shared_intremap_inuse;
 static DEFINE_SPINLOCK(shared_intremap_lock);
 
+static void _dump_amd_iommu_intremap_tables(unsigned char key);
+
+static struct keyhandler dump_iommu_intremap_table = {
+    .diagnostic = 0,
+    .u.fn = _dump_amd_iommu_intremap_tables,
+    .desc = "dump AMD iommu intremap tables"
+};
+
 static spinlock_t* get_intremap_lock(int seg, int req_id)
 {
     return (amd_iommu_perdev_intremap ?
@@ -241,6 +250,8 @@ int __init amd_iommu_setup_ioapic_remapping(void)
         }
     }
 
+    register_keyhandler('j', &dump_iommu_intremap_table);
+
     return 0;
 }
 
@@ -554,3 +565,49 @@ int __init amd_setup_hpet_msi(struct msi_desc *msi_desc)
 
     return 0;
 }
+
+static void _dump_intremap_table (u32 bdf, u32 *table)
+{
+    u32 count;
+
+    if ( table == NULL ) {
+	return; 
+    }
+
+    for (count = 0; count < INTREMAP_ENTRIES; count++) {
+	if (*(table+count) == 0)
+		continue;
+        printk("        BDF:%3x, IRTE[%4u] : 0x%x\n", bdf, count, *(table+count));
+    }
+}
+
+static int __dump_iommu_intremap_mapping(u16 seg, struct ivrs_mappings *ivrs_mapping)
+{
+    unsigned long flags;
+
+    if ( ivrs_mapping == NULL )
+	return 0;
+
+    spin_lock_irqsave(&(ivrs_mapping->intremap_lock), flags);
+
+    _dump_intremap_table(ivrs_mapping->dte_requestor_id, ivrs_mapping->intremap_table);
+
+    spin_unlock_irqrestore(&(ivrs_mapping->intremap_lock), flags);
+
+    return 0;
+}
+
+static void _dump_amd_iommu_intremap_tables(unsigned char key)
+{
+    unsigned long flags;
+
+    printk("--- Dumping Per-dev IOMMU Interrupt Remapping Table ---\n");
+
+    iterate_ivrs_entries(__dump_iommu_intremap_mapping);
+
+    printk("--- Dumping Shared IOMMU Interrupt Remapping Table ---\n");
+
+    spin_lock_irqsave(&shared_intremap_lock, flags);
+    _dump_intremap_table(-1, shared_intremap_table);
+    spin_unlock_irqrestore(&shared_intremap_lock, flags);
+}
-- 
1.7.10.4


[-- Attachment #6: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/6] AMD IOMMU: allocate IRTE entries instead of using a static mapping
  2013-04-24 21:52         ` suravee suthikulpanit
@ 2013-04-26 10:39           ` Jan Beulich
  0 siblings, 0 replies; 20+ messages in thread
From: Jan Beulich @ 2013-04-26 10:39 UTC (permalink / raw)
  To: suravee suthikulpanit
  Cc: Konrad Rzeszutek Wilk, Jacob Shin, xiantao.zhang, xen-devel

>>> On 24.04.13 at 23:52, suravee suthikulpanit <suravee.suthikulpanit@amd.com> wrote:
> I have finally root cause the issue.  Here are the two patches that should 
> help fixing the issue.
> The first patch fixes the issue with the mouse/keyboard not working while 

That's not looking right: update_intremap_entry_from_ioapic() is
very well doing the necessary allocation if none happened at
boot, or at least the code to do so is there. If that one has a bug,
we should aim at fixing it instead of working around the issue. Did
you check that this

    offset = *index;
    if ( offset >= INTREMAP_ENTRIES )
    {
        offset = alloc_intremap_entry(iommu->seg, req_id, 1);
        if ( offset >= INTREMAP_ENTRIES )

in update_intremap_entry_from_ioapic() doesn't take effect for
them?

Also conceptually I don't see why we would want to waste IRTEs
for IO-APIC pins that likely will never get used.

> the second patch
> add a debug-key to help dumping the IRTE which I used to debug the issue.

That one is certainly going to be helpful going forward, but you
should clearly use 'V' as the key just like VT-d does instead of
occupying yet another one.

Jan

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/6] AMD IOMMU: allocate IRTE entries instead of using a static mapping
  2013-04-19 10:57 ` [PATCH 1/6] AMD IOMMU: allocate IRTE entries instead of using a static mapping Jan Beulich
  2013-04-23 13:21   ` Suravee Suthikulanit
@ 2013-04-26 17:13   ` Suravee Suthikulanit
  2013-04-29  7:31     ` Jan Beulich
  1 sibling, 1 reply; 20+ messages in thread
From: Suravee Suthikulanit @ 2013-04-26 17:13 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Konrad Rzeszutek Wilk, Jacob Shin, xiantao.zhang, xen-devel

On 4/19/2013 5:57 AM, Jan Beulich wrote:
> --- a/xen/drivers/passthrough/amd/iommu_acpi.c
> +++ b/xen/drivers/passthrough/amd/iommu_acpi.c
>
> @@ -691,14 +694,16 @@ static u16 __init parse_ivhd_device_spec
>                   ioapic_sbdf[special->handle].bdf = bdf;
>                   ioapic_sbdf[special->handle].seg = seg;
>   
> -                ioapic_sbdf[special->handle].pin_setup = xzalloc_array(
> -                    unsigned long, BITS_TO_LONGS(nr_ioapic_entries[apic]));
> +                ioapic_sbdf[special->handle].pin_2_idx = xmalloc_array(
> +                    u16, nr_ioapic_entries[apic]);
>                   if ( nr_ioapic_entries[apic] &&
> -                     !ioapic_sbdf[IO_APIC_ID(apic)].pin_setup )
> +                     !ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx )
>                   {
>                       printk(XENLOG_ERR "IVHD Error: Out of memory\n");
>                       return 0;
>                   }
> +                memset(ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx, -1,
> +                       nr_ioapic_entries[apic]);
>               }

Jan,

Ok.. here is why the (offset >= INTREMAP_ENTRIES) in update_intremap_entry_from_ioapic failed.

+                memset(ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx, -1,
+                       nr_ioapic_entries[apic]);

should have been

+                memset(ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx, -1,
+                       (nr_ioapic_entries[apic] * sizeof(u16)));

Since nr_ioapic_entries[apic] = 24, only pin_2_idx[0 to 11] is set to 0xffff.  This causes the pin_2_idx[12-23] to fail the check.

Suravee.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 5/6] x86: enable multi-vector MSI
  2013-04-23  6:26     ` Jan Beulich
@ 2013-04-26 21:16       ` Suravee Suthikulanit
  0 siblings, 0 replies; 20+ messages in thread
From: Suravee Suthikulanit @ 2013-04-26 21:16 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Konrad Rzeszutek Wilk, Jacob Shin, xiantao.zhang, xen-devel

On 4/23/2013 1:26 AM, Jan Beulich wrote:
>>>> On 23.04.13 at 02:55, Suravee Suthikulanit <suravee.suthikulpanit@amd.com>
> wrote:
>> On 4/19/2013 5:59 AM, Jan Beulich wrote:
>>> --- a/xen/arch/x86/msi.c
>>> +++ b/xen/arch/x86/msi.c
>>> @@ -238,6 +238,11 @@ static int write_msi_msg(struct msi_desc
>>>            u8 bus = dev->bus;
>>>            u8 slot = PCI_SLOT(dev->devfn);
>>>            u8 func = PCI_FUNC(dev->devfn);
>>> +        int nr = entry->msi_attrib.entry_nr;
>>> +
>>> +        ASSERT((msg->data & (entry[-nr].msi.nvec - 1)) == nr);
>>> +        if ( nr )
>>> +            return 0;
>> This logic seems incorrect.  Do you meant to write --nr?
> No, this indeed has to be -nr (i.e. the "masterkj" entry, which is the
> first on in the array.
>
>> This causes assertion here.  Also, investigation showing the
>> value of nr is 0 here.
> nr being 0 here is perfectly fine, meaning this is the first ("master")
> entry of a multi-vector device (it can't be a single-vector one, as in
> that case entry[0].msi.nvec == 1, i.e. the & yields zero regardless
> of msg->data).
>
> And the assertion should hold, due to
>
>      *data = (msg->data & ~(INTREMAP_ENTRIES - 1)) | offset;
>
> in update_intremap_entry_from_msi_msg(), and
> alloc_intremap_entry() returning only aligned blocks.
>
> So the question isn't just what value nr there has, but also what
> the other involved values are.
>
> Jan
Ok, thanks for explanation.  Do you think you could add comment in the 
code?kkk  It was not quite clear at the beginning why we need this 
assertion.

The problem occurs when the function 
"xen/driver/passthrough/amd/iommu_init.c: enable_iommu()" trying to 
initialize IOMMU and calling the "set_msi_affinity", which in turn 
calling "write_msi_msg".  At this point, "nvec" is still zero.  So, the 
following code should fix it.

     unsigned int nvec = entry[-nr].msi.nvec;
     if ( nvec > 0 )
             ASSERT((msg->data & (nvec - 1)) == nr);

Suravee

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/6] AMD IOMMU: allocate IRTE entries instead of using a static mapping
  2013-04-26 17:13   ` Suravee Suthikulanit
@ 2013-04-29  7:31     ` Jan Beulich
  2013-04-29  7:33       ` Suthikulpanit, Suravee
  0 siblings, 1 reply; 20+ messages in thread
From: Jan Beulich @ 2013-04-29  7:31 UTC (permalink / raw)
  To: Suravee Suthikulanit
  Cc: Konrad Rzeszutek Wilk, Jacob Shin, xiantao.zhang, xen-devel

>>> On 26.04.13 at 19:13, Suravee Suthikulanit <suravee.suthikulpanit@amd.com>
wrote:
> On 4/19/2013 5:57 AM, Jan Beulich wrote:
>> --- a/xen/drivers/passthrough/amd/iommu_acpi.c
>> +++ b/xen/drivers/passthrough/amd/iommu_acpi.c
>>
>> @@ -691,14 +694,16 @@ static u16 __init parse_ivhd_device_spec
>>                   ioapic_sbdf[special->handle].bdf = bdf;
>>                   ioapic_sbdf[special->handle].seg = seg;
>>   
>> -                ioapic_sbdf[special->handle].pin_setup = xzalloc_array(
>> -                    unsigned long, BITS_TO_LONGS(nr_ioapic_entries[apic]));
>> +                ioapic_sbdf[special->handle].pin_2_idx = xmalloc_array(
>> +                    u16, nr_ioapic_entries[apic]);
>>                   if ( nr_ioapic_entries[apic] &&
>> -                     !ioapic_sbdf[IO_APIC_ID(apic)].pin_setup )
>> +                     !ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx )
>>                   {
>>                       printk(XENLOG_ERR "IVHD Error: Out of memory\n");
>>                       return 0;
>>                   }
>> +                memset(ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx, -1,
>> +                       nr_ioapic_entries[apic]);
>>               }
> 
> Jan,
> 
> Ok.. here is why the (offset >= INTREMAP_ENTRIES) in 
> update_intremap_entry_from_ioapic failed.
> 
> +                memset(ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx, -1,
> +                       nr_ioapic_entries[apic]);
> 
> should have been
> 
> +                memset(ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx, -1,
> +                       (nr_ioapic_entries[apic] * sizeof(u16)));
> 
> Since nr_ioapic_entries[apic] = 24, only pin_2_idx[0 to 11] is set to 
> 0xffff.  This causes the pin_2_idx[12-23] to fail the check.

Ah - so a one line fix. Will integrate that into the current patch.
Are you saying that with that change, the patch finally works?

Thanks, Jan

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/6] AMD IOMMU: allocate IRTE entries instead of using a static mapping
  2013-04-29  7:31     ` Jan Beulich
@ 2013-04-29  7:33       ` Suthikulpanit, Suravee
  2013-04-29  7:42         ` Jan Beulich
  0 siblings, 1 reply; 20+ messages in thread
From: Suthikulpanit, Suravee @ 2013-04-29  7:33 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Konrad Rzeszutek Wilk, Shin, Jacob, xiantao.zhang, xen-devel

Yes.  I have tested this and check the interrupt remapping tables.  They look fine and working now.

Suravee

PS: Are you going to check in the patch to add the debug key also?  (You could change it to "V" as you pointed out.)


-----Original Message-----
From: Jan Beulich [mailto:JBeulich@suse.com] 
Sent: Monday, April 29, 2013 2:31 AM
To: Suthikulpanit, Suravee
Cc: Shin, Jacob; xiantao.zhang@intel.com; xen-devel; Konrad Rzeszutek Wilk
Subject: Re: [PATCH 1/6] AMD IOMMU: allocate IRTE entries instead of using a static mapping
Importance: High

>>> On 26.04.13 at 19:13, Suravee Suthikulanit 
>>> <suravee.suthikulpanit@amd.com>
wrote:
> On 4/19/2013 5:57 AM, Jan Beulich wrote:
>> --- a/xen/drivers/passthrough/amd/iommu_acpi.c
>> +++ b/xen/drivers/passthrough/amd/iommu_acpi.c
>>
>> @@ -691,14 +694,16 @@ static u16 __init parse_ivhd_device_spec
>>                   ioapic_sbdf[special->handle].bdf = bdf;
>>                   ioapic_sbdf[special->handle].seg = seg;
>>   
>> -                ioapic_sbdf[special->handle].pin_setup = xzalloc_array(
>> -                    unsigned long, BITS_TO_LONGS(nr_ioapic_entries[apic]));
>> +                ioapic_sbdf[special->handle].pin_2_idx = xmalloc_array(
>> +                    u16, nr_ioapic_entries[apic]);
>>                   if ( nr_ioapic_entries[apic] &&
>> -                     !ioapic_sbdf[IO_APIC_ID(apic)].pin_setup )
>> +                     !ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx )
>>                   {
>>                       printk(XENLOG_ERR "IVHD Error: Out of memory\n");
>>                       return 0;
>>                   }
>> +                memset(ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx, -1,
>> +                       nr_ioapic_entries[apic]);
>>               }
> 
> Jan,
> 
> Ok.. here is why the (offset >= INTREMAP_ENTRIES) in 
> update_intremap_entry_from_ioapic failed.
> 
> +                memset(ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx, -1,
> +                       nr_ioapic_entries[apic]);
> 
> should have been
> 
> +                memset(ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx, -1,
> +                       (nr_ioapic_entries[apic] * sizeof(u16)));
> 
> Since nr_ioapic_entries[apic] = 24, only pin_2_idx[0 to 11] is set to 
> 0xffff.  This causes the pin_2_idx[12-23] to fail the check.

Ah - so a one line fix. Will integrate that into the current patch.
Are you saying that with that change, the patch finally works?

Thanks, Jan

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/6] AMD IOMMU: allocate IRTE entries instead of using a static mapping
  2013-04-29  7:33       ` Suthikulpanit, Suravee
@ 2013-04-29  7:42         ` Jan Beulich
  0 siblings, 0 replies; 20+ messages in thread
From: Jan Beulich @ 2013-04-29  7:42 UTC (permalink / raw)
  To: Suravee Suthikulpanit, George Dunlap
  Cc: KonradRzeszutek Wilk, Jacob Shin, xiantao.zhang, xen-devel

>>> On 29.04.13 at 09:33, "Suthikulpanit, Suravee" <Suravee.Suthikulpanit@amd.com> wrote:
> PS: Are you going to check in the patch to add the debug key also?  (You 
> could change it to "V" as you pointed out.)

That's the plan - albeit we'll need a word from George regarding
doing so still for 4.3 (for the multi-vector-MSI series we mostly
settled on not dong this for 4.3, considering how late in the game
we are now, and seeing that patch 5 still has at least one issue).

Jan

^ permalink raw reply	[flat|nested] 20+ messages in thread

end of thread, other threads:[~2013-04-29  7:42 UTC | newest]

Thread overview: 20+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-04-19 10:50 [PATCH 0/6] x86/IOMMU: multi-vector MSI Jan Beulich
2013-04-19 10:57 ` [PATCH 1/6] AMD IOMMU: allocate IRTE entries instead of using a static mapping Jan Beulich
2013-04-23 13:21   ` Suravee Suthikulanit
2013-04-23 15:06     ` Suravee Suthikulanit
2013-04-24 13:34       ` Jan Beulich
2013-04-24 21:52         ` suravee suthikulpanit
2013-04-26 10:39           ` Jan Beulich
2013-04-26 17:13   ` Suravee Suthikulanit
2013-04-29  7:31     ` Jan Beulich
2013-04-29  7:33       ` Suthikulpanit, Suravee
2013-04-29  7:42         ` Jan Beulich
2013-04-19 10:57 ` [PATCH 2/6] AMD IOMMU: untie remap and vector maps Jan Beulich
2013-04-19 10:58 ` [PATCH 3/6] VT-d: enable for multi‑vector MSI Jan Beulich
2013-04-19 10:59 ` [PATCH 4/6] AMD IOMMU: enable for multi-vector MSI Jan Beulich
2013-04-19 10:59 ` [PATCH 5/6] x86: enable " Jan Beulich
2013-04-23  0:55   ` Suravee Suthikulanit
2013-04-23  6:26     ` Jan Beulich
2013-04-26 21:16       ` Suravee Suthikulanit
2013-04-19 11:00 ` [PATCH 6/6] pciif: add multi-vector-MSI command Jan Beulich
2013-04-19 14:48 ` [PATCH 0/6] x86/IOMMU: multi-vector MSI Jan Beulich

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.