xen-devel.lists.xenproject.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v4 RFC 0/6] x86/MSI: XSA-120, 126, 128-131 follow-up
@ 2015-06-22 14:38 Jan Beulich
  2015-06-22 14:46 ` [PATCH v4 RFC 1/6] x86/PCI: add config space write abstract intercept logic Jan Beulich
                   ` (6 more replies)
  0 siblings, 7 replies; 20+ messages in thread
From: Jan Beulich @ 2015-06-22 14:38 UTC (permalink / raw)
  To: xen-devel; +Cc: Andrew Cooper, Keir Fraser, Roger Pau Monne

Only patches 1, 2, and 6 are really RFC (with some debugging code still
left in), the others (hence v4) have been submitted before. 

1: PCI: add config space write abstract intercept logic
2: MSI-X: track host and guest mask-all requests separately
3: MSI-X: be more careful during teardown
4: MSI-X: access MSI-X table only after having enabled MSI-X
5: MSI-X: reduce fiddling with control register during restore
6: MSI: properly track guest masking requests

A fundamental question is whether to go without the so far missing
(and more involved) MMCFG intercepts. This largely depends
whether there are any half way recent Dom0 OSes using MMCFG
accesses for the base 256 bytes of PCI config space.

Signed-off-by: Jan Beulich <jbeulich@suse.com>

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH v4 RFC 1/6] x86/PCI: add config space write abstract intercept logic
  2015-06-22 14:38 [PATCH v4 RFC 0/6] x86/MSI: XSA-120, 126, 128-131 follow-up Jan Beulich
@ 2015-06-22 14:46 ` Jan Beulich
  2015-06-22 19:31   ` Konrad Rzeszutek Wilk
  2015-06-24 17:09   ` Andrew Cooper
  2015-06-22 14:47 ` [PATCH v4 RFC 2/6] x86/MSI-X: track host and guest mask‑all requests separately Jan Beulich
                   ` (5 subsequent siblings)
  6 siblings, 2 replies; 20+ messages in thread
From: Jan Beulich @ 2015-06-22 14:46 UTC (permalink / raw)
  To: xen-devel; +Cc: Andrew Cooper, Keir Fraser

[-- Attachment #1: Type: text/plain, Size: 3775 bytes --]

This is to be used by MSI code, and later to also be hooked up to
MMCFG accesses by Dom0.

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/arch/x86/msi.c
+++ b/xen/arch/x86/msi.c
@@ -1111,6 +1111,12 @@ void pci_cleanup_msi(struct pci_dev *pde
     msi_free_irqs(pdev);
 }
 
+int pci_msi_conf_write_intercept(struct pci_dev *pdev, unsigned int reg,
+                                 unsigned int size, uint32_t *data)
+{
+    return 0;
+}
+
 int pci_restore_msi_state(struct pci_dev *pdev)
 {
     unsigned long flags;
--- a/xen/arch/x86/pci.c
+++ b/xen/arch/x86/pci.c
@@ -67,3 +67,28 @@ void pci_conf_write(uint32_t cf8, uint8_
 
     spin_unlock_irqrestore(&pci_config_lock, flags);
 }
+
+int pci_conf_write_intercept(unsigned int seg, unsigned int bdf,
+                             unsigned int reg, unsigned int size,
+                             uint32_t *data)
+{
+    struct pci_dev *pdev;
+    int rc = 0;
+
+    /*
+     * Avoid expensive operations when no hook is going to do anything
+     * for the access anyway.
+     */
+    if ( reg < 64 || reg >= 256 )
+        return 0;
+
+    spin_lock(&pcidevs_lock);
+
+    pdev = pci_get_pdev(seg, PCI_BUS(bdf), PCI_DEVFN2(bdf));
+    if ( pdev )
+        rc = pci_msi_conf_write_intercept(pdev, reg, size, data);
+
+    spin_unlock(&pcidevs_lock);
+
+    return rc;
+}
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -1771,8 +1771,8 @@ static bool_t admin_io_okay(unsigned int
     return ioports_access_permitted(d, port, port + bytes - 1);
 }
 
-static bool_t pci_cfg_ok(struct domain *currd, bool_t write,
-                         unsigned int start, unsigned int size)
+static bool_t pci_cfg_ok(struct domain *currd, unsigned int start,
+                         unsigned int size, uint32_t *write)
 {
     uint32_t machine_bdf;
 
@@ -1804,8 +1804,12 @@ static bool_t pci_cfg_ok(struct domain *
             start |= CF8_ADDR_HI(currd->arch.pci_cf8);
     }
 
-    return !xsm_pci_config_permission(XSM_HOOK, currd, machine_bdf,
-                                      start, start + size - 1, write);
+    if ( xsm_pci_config_permission(XSM_HOOK, currd, machine_bdf,
+                                   start, start + size - 1, !!write) != 0 )
+         return 0;
+
+    return !write ||
+           pci_conf_write_intercept(0, machine_bdf, start, size, write) >= 0;
 }
 
 uint32_t guest_io_read(unsigned int port, unsigned int bytes,
@@ -1857,7 +1861,7 @@ uint32_t guest_io_read(unsigned int port
             size = min(bytes, 4 - (port & 3));
             if ( size == 3 )
                 size = 2;
-            if ( pci_cfg_ok(currd, 0, port & 3, size) )
+            if ( pci_cfg_ok(currd, port & 3, size, NULL) )
                 sub_data = pci_conf_read(currd->arch.pci_cf8, port & 3, size);
         }
 
@@ -1928,7 +1932,7 @@ void guest_io_write(unsigned int port, u
             size = min(bytes, 4 - (port & 3));
             if ( size == 3 )
                 size = 2;
-            if ( pci_cfg_ok(currd, 1, port & 3, size) )
+            if ( pci_cfg_ok(currd, port & 3, size, &data) )
                 pci_conf_write(currd->arch.pci_cf8, port & 3, size, data);
         }
 
--- a/xen/include/asm-x86/pci.h
+++ b/xen/include/asm-x86/pci.h
@@ -15,4 +15,10 @@ struct arch_pci_dev {
     vmask_t used_vectors;
 };
 
+int pci_conf_write_intercept(unsigned int seg, unsigned int bdf,
+                             unsigned int reg, unsigned int size,
+                             uint32_t *data);
+int pci_msi_conf_write_intercept(struct pci_dev *, unsigned int reg,
+                                 unsigned int size, uint32_t *data);
+
 #endif /* __X86_PCI_H__ */




[-- Attachment #2: x86-PCI-CFG-write-intercept.patch --]
[-- Type: text/plain, Size: 3829 bytes --]

x86/PCI: add config space write abstract intercept logic

This is to be used by MSI code, and later to also be hooked up to
MMCFG accesses by Dom0.

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/arch/x86/msi.c
+++ b/xen/arch/x86/msi.c
@@ -1111,6 +1111,12 @@ void pci_cleanup_msi(struct pci_dev *pde
     msi_free_irqs(pdev);
 }
 
+int pci_msi_conf_write_intercept(struct pci_dev *pdev, unsigned int reg,
+                                 unsigned int size, uint32_t *data)
+{
+    return 0;
+}
+
 int pci_restore_msi_state(struct pci_dev *pdev)
 {
     unsigned long flags;
--- a/xen/arch/x86/pci.c
+++ b/xen/arch/x86/pci.c
@@ -67,3 +67,28 @@ void pci_conf_write(uint32_t cf8, uint8_
 
     spin_unlock_irqrestore(&pci_config_lock, flags);
 }
+
+int pci_conf_write_intercept(unsigned int seg, unsigned int bdf,
+                             unsigned int reg, unsigned int size,
+                             uint32_t *data)
+{
+    struct pci_dev *pdev;
+    int rc = 0;
+
+    /*
+     * Avoid expensive operations when no hook is going to do anything
+     * for the access anyway.
+     */
+    if ( reg < 64 || reg >= 256 )
+        return 0;
+
+    spin_lock(&pcidevs_lock);
+
+    pdev = pci_get_pdev(seg, PCI_BUS(bdf), PCI_DEVFN2(bdf));
+    if ( pdev )
+        rc = pci_msi_conf_write_intercept(pdev, reg, size, data);
+
+    spin_unlock(&pcidevs_lock);
+
+    return rc;
+}
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -1771,8 +1771,8 @@ static bool_t admin_io_okay(unsigned int
     return ioports_access_permitted(d, port, port + bytes - 1);
 }
 
-static bool_t pci_cfg_ok(struct domain *currd, bool_t write,
-                         unsigned int start, unsigned int size)
+static bool_t pci_cfg_ok(struct domain *currd, unsigned int start,
+                         unsigned int size, uint32_t *write)
 {
     uint32_t machine_bdf;
 
@@ -1804,8 +1804,12 @@ static bool_t pci_cfg_ok(struct domain *
             start |= CF8_ADDR_HI(currd->arch.pci_cf8);
     }
 
-    return !xsm_pci_config_permission(XSM_HOOK, currd, machine_bdf,
-                                      start, start + size - 1, write);
+    if ( xsm_pci_config_permission(XSM_HOOK, currd, machine_bdf,
+                                   start, start + size - 1, !!write) != 0 )
+         return 0;
+
+    return !write ||
+           pci_conf_write_intercept(0, machine_bdf, start, size, write) >= 0;
 }
 
 uint32_t guest_io_read(unsigned int port, unsigned int bytes,
@@ -1857,7 +1861,7 @@ uint32_t guest_io_read(unsigned int port
             size = min(bytes, 4 - (port & 3));
             if ( size == 3 )
                 size = 2;
-            if ( pci_cfg_ok(currd, 0, port & 3, size) )
+            if ( pci_cfg_ok(currd, port & 3, size, NULL) )
                 sub_data = pci_conf_read(currd->arch.pci_cf8, port & 3, size);
         }
 
@@ -1928,7 +1932,7 @@ void guest_io_write(unsigned int port, u
             size = min(bytes, 4 - (port & 3));
             if ( size == 3 )
                 size = 2;
-            if ( pci_cfg_ok(currd, 1, port & 3, size) )
+            if ( pci_cfg_ok(currd, port & 3, size, &data) )
                 pci_conf_write(currd->arch.pci_cf8, port & 3, size, data);
         }
 
--- a/xen/include/asm-x86/pci.h
+++ b/xen/include/asm-x86/pci.h
@@ -15,4 +15,10 @@ struct arch_pci_dev {
     vmask_t used_vectors;
 };
 
+int pci_conf_write_intercept(unsigned int seg, unsigned int bdf,
+                             unsigned int reg, unsigned int size,
+                             uint32_t *data);
+int pci_msi_conf_write_intercept(struct pci_dev *, unsigned int reg,
+                                 unsigned int size, uint32_t *data);
+
 #endif /* __X86_PCI_H__ */

[-- Attachment #3: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH v4 RFC 2/6] x86/MSI-X: track host and guest mask‑all requests separately
  2015-06-22 14:38 [PATCH v4 RFC 0/6] x86/MSI: XSA-120, 126, 128-131 follow-up Jan Beulich
  2015-06-22 14:46 ` [PATCH v4 RFC 1/6] x86/PCI: add config space write abstract intercept logic Jan Beulich
@ 2015-06-22 14:47 ` Jan Beulich
  2015-06-24 17:15   ` Andrew Cooper
  2015-06-22 14:49 ` [PATCH v4 3/6] x86/MSI-X: be more careful during teardown Jan Beulich
                   ` (4 subsequent siblings)
  6 siblings, 1 reply; 20+ messages in thread
From: Jan Beulich @ 2015-06-22 14:47 UTC (permalink / raw)
  To: xen-devel; +Cc: Andrew Cooper, Keir Fraser

[-- Attachment #1: Type: text/plain, Size: 2272 bytes --]

Host uses of the bits will be added subsequently, and must not be
overridden by guests (including Dom0, namely when acting on behalf of
a guest).

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/arch/x86/msi.c
+++ b/xen/arch/x86/msi.c
@@ -846,6 +846,12 @@ static int msix_capability_init(struct p
 
     if ( !msix->used_entries )
     {
+        msix->host_maskall = 0;
+        if ( !msix->guest_maskall )
+            control &= ~PCI_MSIX_FLAGS_MASKALL;
+        else
+            control |= PCI_MSIX_FLAGS_MASKALL;
+
         if ( rangeset_add_range(mmio_ro_ranges, msix->table.first,
                                 msix->table.last) )
             WARN();
@@ -1114,6 +1120,36 @@ void pci_cleanup_msi(struct pci_dev *pde
 int pci_msi_conf_write_intercept(struct pci_dev *pdev, unsigned int reg,
                                  unsigned int size, uint32_t *data)
 {
+    u16 seg = pdev->seg;
+    u8 bus = pdev->bus;
+    u8 slot = PCI_SLOT(pdev->devfn);
+    u8 func = PCI_FUNC(pdev->devfn);
+    struct msi_desc *entry;
+    unsigned int pos;
+
+    if ( pdev->msix )
+    {
+        entry = find_msi_entry(pdev, -1, PCI_CAP_ID_MSIX);
+        pos = entry ? entry->msi_attrib.pos
+                    : pci_find_cap_offset(seg, bus, slot, func,
+                                          PCI_CAP_ID_MSIX);
+        ASSERT(pos);
+
+        if ( reg < pos || reg >= msix_pba_offset_reg(pos) + 4 )
+            return 0;
+printk("%04x:%02x:%02x.%u: MSI-X %03x:%u->%04x (%d,%d)\n",//temp
+       seg, bus, slot, func, reg, size, *data, pdev->msix->host_maskall, pdev->msix->guest_maskall);//temp
+
+        if ( reg != msix_control_reg(pos) || size != 2 )
+            return -EACCES;
+
+        pdev->msix->guest_maskall = !!(*data & PCI_MSIX_FLAGS_MASKALL);
+        if ( pdev->msix->host_maskall )
+            *data |= PCI_MSIX_FLAGS_MASKALL;
+
+        return 1;
+    }
+
     return 0;
 }
 
--- a/xen/include/asm-x86/msi.h
+++ b/xen/include/asm-x86/msi.h
@@ -233,6 +233,7 @@ struct arch_msix {
     int table_refcnt[MAX_MSIX_TABLE_PAGES];
     int table_idx[MAX_MSIX_TABLE_PAGES];
     spinlock_t table_lock;
+    bool_t host_maskall, guest_maskall;
     domid_t warned;
 };
 




[-- Attachment #2: x86-MSI-X-maskall.patch --]
[-- Type: text/plain, Size: 2330 bytes --]

x86/MSI-X: track host and guest mask-all requests separately

Host uses of the bits will be added subsequently, and must not be
overridden by guests (including Dom0, namely when acting on behalf of
a guest).

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/arch/x86/msi.c
+++ b/xen/arch/x86/msi.c
@@ -846,6 +846,12 @@ static int msix_capability_init(struct p
 
     if ( !msix->used_entries )
     {
+        msix->host_maskall = 0;
+        if ( !msix->guest_maskall )
+            control &= ~PCI_MSIX_FLAGS_MASKALL;
+        else
+            control |= PCI_MSIX_FLAGS_MASKALL;
+
         if ( rangeset_add_range(mmio_ro_ranges, msix->table.first,
                                 msix->table.last) )
             WARN();
@@ -1114,6 +1120,36 @@ void pci_cleanup_msi(struct pci_dev *pde
 int pci_msi_conf_write_intercept(struct pci_dev *pdev, unsigned int reg,
                                  unsigned int size, uint32_t *data)
 {
+    u16 seg = pdev->seg;
+    u8 bus = pdev->bus;
+    u8 slot = PCI_SLOT(pdev->devfn);
+    u8 func = PCI_FUNC(pdev->devfn);
+    struct msi_desc *entry;
+    unsigned int pos;
+
+    if ( pdev->msix )
+    {
+        entry = find_msi_entry(pdev, -1, PCI_CAP_ID_MSIX);
+        pos = entry ? entry->msi_attrib.pos
+                    : pci_find_cap_offset(seg, bus, slot, func,
+                                          PCI_CAP_ID_MSIX);
+        ASSERT(pos);
+
+        if ( reg < pos || reg >= msix_pba_offset_reg(pos) + 4 )
+            return 0;
+printk("%04x:%02x:%02x.%u: MSI-X %03x:%u->%04x (%d,%d)\n",//temp
+       seg, bus, slot, func, reg, size, *data, pdev->msix->host_maskall, pdev->msix->guest_maskall);//temp
+
+        if ( reg != msix_control_reg(pos) || size != 2 )
+            return -EACCES;
+
+        pdev->msix->guest_maskall = !!(*data & PCI_MSIX_FLAGS_MASKALL);
+        if ( pdev->msix->host_maskall )
+            *data |= PCI_MSIX_FLAGS_MASKALL;
+
+        return 1;
+    }
+
     return 0;
 }
 
--- a/xen/include/asm-x86/msi.h
+++ b/xen/include/asm-x86/msi.h
@@ -233,6 +233,7 @@ struct arch_msix {
     int table_refcnt[MAX_MSIX_TABLE_PAGES];
     int table_idx[MAX_MSIX_TABLE_PAGES];
     spinlock_t table_lock;
+    bool_t host_maskall, guest_maskall;
     domid_t warned;
 };
 

[-- Attachment #3: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH v4 3/6] x86/MSI-X: be more careful during teardown
  2015-06-22 14:38 [PATCH v4 RFC 0/6] x86/MSI: XSA-120, 126, 128-131 follow-up Jan Beulich
  2015-06-22 14:46 ` [PATCH v4 RFC 1/6] x86/PCI: add config space write abstract intercept logic Jan Beulich
  2015-06-22 14:47 ` [PATCH v4 RFC 2/6] x86/MSI-X: track host and guest mask‑all requests separately Jan Beulich
@ 2015-06-22 14:49 ` Jan Beulich
  2015-06-22 14:50 ` [PATCH v4 4/6] x86/MSI-X: access MSI-X table only after having enabled MSI-X Jan Beulich
                   ` (3 subsequent siblings)
  6 siblings, 0 replies; 20+ messages in thread
From: Jan Beulich @ 2015-06-22 14:49 UTC (permalink / raw)
  To: xen-devel; +Cc: Andrew Cooper, Keir Fraser

[-- Attachment #1: Type: text/plain, Size: 12272 bytes --]

When a device gets detached from a guest, pciback will clear its
command register, thus disabling both memory and I/O decoding. The
disabled memory decoding, however, has an effect on the MSI-X table
accesses the hypervisor does: These won't have the intended effect
anymore. Even worse, for PCIe devices (but not SR-IOV virtual
functions) such accesses may (will?) be treated as Unsupported
Requests, causing respective errors to be surfaced, potentially in the
form of NMIs that may be fatal to the hypervisor or Dom0 is different
ways. Hence rather than carrying out these accesses, we should avoid
them where we can, and use alternative (e.g. PCI config space based)
mechanisms to achieve at least the same effect.

At this time it continues to be unclear whether this is fixing an
actual bug or is rather just working around bogus (but apparently
common) system behavior.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
Backporting note (largely to myself):
   Depends on (not yet backported to 4.4 and earlier) commit 061eebe0e
   "x86/MSI: drop workaround for insecure Dom0 kernels" (due to re-use
   of struct arch_msix's warned field).

--- a/xen/arch/x86/irq.c
+++ b/xen/arch/x86/irq.c
@@ -217,9 +217,9 @@ void destroy_irq(unsigned int irq)
     }
 
     spin_lock_irqsave(&desc->lock, flags);
-    desc->status  |= IRQ_DISABLED;
     desc->status  &= ~IRQ_GUEST;
     desc->handler->shutdown(desc);
+    desc->status |= IRQ_DISABLED;
     action = desc->action;
     desc->action  = NULL;
     desc->msi_desc = NULL;
@@ -995,8 +995,8 @@ void __init release_irq(unsigned int irq
     spin_lock_irqsave(&desc->lock,flags);
     action = desc->action;
     desc->action  = NULL;
-    desc->status |= IRQ_DISABLED;
     desc->handler->shutdown(desc);
+    desc->status |= IRQ_DISABLED;
     spin_unlock_irqrestore(&desc->lock,flags);
 
     /* Wait to make sure it's not being used on another CPU */
@@ -1732,8 +1732,8 @@ static irq_guest_action_t *__pirq_guest_
     BUG_ON(action->in_flight != 0);
 
     /* Disabling IRQ before releasing the desc_lock avoids an IRQ storm. */
-    desc->status |= IRQ_DISABLED;
     desc->handler->disable(desc);
+    desc->status |= IRQ_DISABLED;
 
     /*
      * Mark any remaining pending EOIs as ready to flush.
--- a/xen/arch/x86/msi.c
+++ b/xen/arch/x86/msi.c
@@ -123,6 +123,27 @@ static void msix_put_fixmap(struct arch_
     spin_unlock(&msix->table_lock);
 }
 
+static bool_t memory_decoded(const struct pci_dev *dev)
+{
+    u8 bus, slot, func;
+
+    if ( !dev->info.is_virtfn )
+    {
+        bus = dev->bus;
+        slot = PCI_SLOT(dev->devfn);
+        func = PCI_FUNC(dev->devfn);
+    }
+    else
+    {
+        bus = dev->info.physfn.bus;
+        slot = PCI_SLOT(dev->info.physfn.devfn);
+        func = PCI_FUNC(dev->info.physfn.devfn);
+    }
+
+    return !!(pci_conf_read16(dev->seg, bus, slot, func, PCI_COMMAND) &
+              PCI_COMMAND_MEMORY);
+}
+
 /*
  * MSI message composition
  */
@@ -166,7 +187,7 @@ void msi_compose_msg(unsigned vector, co
     }
 }
 
-static void read_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
+static bool_t read_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
 {
     switch ( entry->msi_attrib.type )
     {
@@ -201,6 +222,8 @@ static void read_msi_msg(struct msi_desc
     {
         void __iomem *base = entry->mask_base;
 
+        if ( unlikely(!memory_decoded(entry->dev)) )
+            return 0;
         msg->address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET);
         msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET);
         msg->data = readl(base + PCI_MSIX_ENTRY_DATA_OFFSET);
@@ -212,6 +235,8 @@ static void read_msi_msg(struct msi_desc
 
     if ( iommu_intremap )
         iommu_read_msi_from_ire(entry, msg);
+
+    return 1;
 }
 
 static int write_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
@@ -262,6 +287,8 @@ static int write_msi_msg(struct msi_desc
     {
         void __iomem *base = entry->mask_base;
 
+        if ( unlikely(!memory_decoded(entry->dev)) )
+            return -ENXIO;
         writel(msg->address_lo,
                base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET);
         writel(msg->address_hi,
@@ -289,7 +316,8 @@ void set_msi_affinity(struct irq_desc *d
     ASSERT(spin_is_locked(&desc->lock));
 
     memset(&msg, 0, sizeof(msg));
-    read_msi_msg(msi_desc, &msg);
+    if ( !read_msi_msg(msi_desc, &msg) )
+        return;
 
     msg.data &= ~MSI_DATA_VECTOR_MASK;
     msg.data |= MSI_DATA_VECTOR(desc->arch.vector);
@@ -349,23 +377,27 @@ int msi_maskable_irq(const struct msi_de
            || entry->msi_attrib.maskbit;
 }
 
-static void msi_set_mask_bit(struct irq_desc *desc, bool_t host, bool_t guest)
+static bool_t msi_set_mask_bit(struct irq_desc *desc, bool_t host, bool_t guest)
 {
     struct msi_desc *entry = desc->msi_desc;
+    struct pci_dev *pdev;
+    u16 seg;
+    u8 bus, slot, func;
     bool_t flag = host || guest;
 
     ASSERT(spin_is_locked(&desc->lock));
     BUG_ON(!entry || !entry->dev);
+    pdev = entry->dev;
+    seg = pdev->seg;
+    bus = pdev->bus;
+    slot = PCI_SLOT(pdev->devfn);
+    func = PCI_FUNC(pdev->devfn);
     switch ( entry->msi_attrib.type )
     {
     case PCI_CAP_ID_MSI:
         if ( entry->msi_attrib.maskbit )
         {
             u32 mask_bits;
-            u16 seg = entry->dev->seg;
-            u8 bus = entry->dev->bus;
-            u8 slot = PCI_SLOT(entry->dev->devfn);
-            u8 func = PCI_FUNC(entry->dev->devfn);
 
             mask_bits = pci_conf_read32(seg, bus, slot, func, entry->msi.mpos);
             mask_bits &= ~((u32)1 << entry->msi_attrib.entry_nr);
@@ -374,25 +406,54 @@ static void msi_set_mask_bit(struct irq_
         }
         break;
     case PCI_CAP_ID_MSIX:
-    {
-        int offset = PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET;
-        writel(flag, entry->mask_base + offset);
-        readl(entry->mask_base + offset);
-        break;
-    }
+        if ( likely(memory_decoded(pdev)) )
+        {
+            writel(flag, entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);
+            readl(entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);
+            break;
+        }
+        if ( flag )
+        {
+            u16 control;
+            domid_t domid = pdev->domain->domain_id;
+
+            pdev->msix->host_maskall = 1;
+            control = pci_conf_read16(seg, bus, slot, func,
+                                      msix_control_reg(entry->msi_attrib.pos));
+            if ( control & PCI_MSIX_FLAGS_MASKALL )
+                break;
+            pci_conf_write16(seg, bus, slot, func,
+                             msix_control_reg(entry->msi_attrib.pos),
+                             control | PCI_MSIX_FLAGS_MASKALL);
+            if ( pdev->msix->warned != domid )
+            {
+                pdev->msix->warned = domid;
+                printk(XENLOG_G_WARNING
+                       "cannot mask IRQ %d: masked MSI-X on Dom%d's %04x:%02x:%02x.%u\n",
+                       desc->irq, domid, pdev->seg, pdev->bus,
+                       PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
+            }
+            break;
+        }
+        /* fall through */
     default:
-        BUG();
-        break;
+        return 0;
     }
     entry->msi_attrib.host_masked = host;
     entry->msi_attrib.guest_masked = guest;
+
+    return 1;
 }
 
 static int msi_get_mask_bit(const struct msi_desc *entry)
 {
-    switch (entry->msi_attrib.type) {
+    if ( !entry->dev )
+        return -1;
+
+    switch ( entry->msi_attrib.type )
+    {
     case PCI_CAP_ID_MSI:
-        if (!entry->dev || !entry->msi_attrib.maskbit)
+        if ( !entry->msi_attrib.maskbit )
             break;
         return (pci_conf_read32(entry->dev->seg, entry->dev->bus,
                                 PCI_SLOT(entry->dev->devfn),
@@ -400,6 +461,8 @@ static int msi_get_mask_bit(const struct
                                 entry->msi.mpos) >>
                 entry->msi_attrib.entry_nr) & 1;
     case PCI_CAP_ID_MSIX:
+        if ( unlikely(!memory_decoded(entry->dev)) )
+            break;
         return readl(entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET) & 1;
     }
     return -1;
@@ -407,12 +470,16 @@ static int msi_get_mask_bit(const struct
 
 void mask_msi_irq(struct irq_desc *desc)
 {
-    msi_set_mask_bit(desc, 1, desc->msi_desc->msi_attrib.guest_masked);
+    if ( unlikely(!msi_set_mask_bit(desc, 1,
+                                    desc->msi_desc->msi_attrib.guest_masked)) )
+        BUG_ON(!(desc->status & IRQ_DISABLED));
 }
 
 void unmask_msi_irq(struct irq_desc *desc)
 {
-    msi_set_mask_bit(desc, 0, desc->msi_desc->msi_attrib.guest_masked);
+    if ( unlikely(!msi_set_mask_bit(desc, 0,
+                                    desc->msi_desc->msi_attrib.guest_masked)) )
+        WARN();
 }
 
 void guest_mask_msi_irq(struct irq_desc *desc, bool_t mask)
@@ -425,13 +492,15 @@ static unsigned int startup_msi_irq(stru
     bool_t guest_masked = (desc->status & IRQ_GUEST) &&
                           is_hvm_domain(desc->msi_desc->dev->domain);
 
-    msi_set_mask_bit(desc, 0, guest_masked);
+    if ( unlikely(!msi_set_mask_bit(desc, 0, guest_masked)) )
+        WARN();
     return 0;
 }
 
 static void shutdown_msi_irq(struct irq_desc *desc)
 {
-    msi_set_mask_bit(desc, 1, 1);
+    if ( unlikely(!msi_set_mask_bit(desc, 1, 1)) )
+        BUG_ON(!(desc->status & IRQ_DISABLED));
 }
 
 void ack_nonmaskable_msi_irq(struct irq_desc *desc)
@@ -743,6 +812,9 @@ static int msix_capability_init(struct p
     control = pci_conf_read16(seg, bus, slot, func, msix_control_reg(pos));
     msix_set_enable(dev, 0);/* Ensure msix is disabled as I set it up */
 
+    if ( unlikely(!memory_decoded(dev)) )
+        return -ENXIO;
+
     if ( desc )
     {
         entry = alloc_msi_entry(1);
@@ -882,7 +954,8 @@ static int msix_capability_init(struct p
     ++msix->used_entries;
 
     /* Restore MSI-X enabled bits */
-    pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos), control);
+    pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos),
+                     control & ~PCI_MSIX_FLAGS_MASKALL);
 
     return 0;
 }
@@ -1027,8 +1100,16 @@ static void __pci_disable_msix(struct ms
 
     BUG_ON(list_empty(&dev->msi_list));
 
-    writel(1, entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);
-
+    if ( likely(memory_decoded(dev)) )
+        writel(1, entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);
+    else if ( !(control & PCI_MSIX_FLAGS_MASKALL) )
+    {
+        printk(XENLOG_WARNING
+               "cannot disable IRQ %d: masking MSI-X on %04x:%02x:%02x.%u\n",
+               entry->irq, dev->seg, dev->bus,
+               PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn));
+        control |= PCI_MSIX_FLAGS_MASKALL;
+    }
     pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos), control);
 
     _pci_cleanup_msix(dev->msix);
@@ -1204,15 +1285,24 @@ int pci_restore_msi_state(struct pci_dev
             nr = entry->msi.nvec;
         }
         else if ( entry->msi_attrib.type == PCI_CAP_ID_MSIX )
+        {
             msix_set_enable(pdev, 0);
+            if ( unlikely(!memory_decoded(pdev)) )
+            {
+                spin_unlock_irqrestore(&desc->lock, flags);
+                return -ENXIO;
+            }
+        }
 
         msg = entry->msg;
         write_msi_msg(entry, &msg);
 
         for ( i = 0; ; )
         {
-            msi_set_mask_bit(desc, entry[i].msi_attrib.host_masked,
-                             entry[i].msi_attrib.guest_masked);
+            if ( unlikely(!msi_set_mask_bit(desc,
+                                            entry[i].msi_attrib.host_masked,
+                                            entry[i].msi_attrib.guest_masked)) )
+                BUG();
 
             if ( !--nr )
                 break;



[-- Attachment #2: x86-MSI-X-teardown.patch --]
[-- Type: text/plain, Size: 12314 bytes --]

x86/MSI-X: be more careful during teardown

When a device gets detached from a guest, pciback will clear its
command register, thus disabling both memory and I/O decoding. The
disabled memory decoding, however, has an effect on the MSI-X table
accesses the hypervisor does: These won't have the intended effect
anymore. Even worse, for PCIe devices (but not SR-IOV virtual
functions) such accesses may (will?) be treated as Unsupported
Requests, causing respective errors to be surfaced, potentially in the
form of NMIs that may be fatal to the hypervisor or Dom0 is different
ways. Hence rather than carrying out these accesses, we should avoid
them where we can, and use alternative (e.g. PCI config space based)
mechanisms to achieve at least the same effect.

At this time it continues to be unclear whether this is fixing an
actual bug or is rather just working around bogus (but apparently
common) system behavior.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
Backporting note (largely to myself):
   Depends on (not yet backported to 4.4 and earlier) commit 061eebe0e
   "x86/MSI: drop workaround for insecure Dom0 kernels" (due to re-use
   of struct arch_msix's warned field).

--- a/xen/arch/x86/irq.c
+++ b/xen/arch/x86/irq.c
@@ -217,9 +217,9 @@ void destroy_irq(unsigned int irq)
     }
 
     spin_lock_irqsave(&desc->lock, flags);
-    desc->status  |= IRQ_DISABLED;
     desc->status  &= ~IRQ_GUEST;
     desc->handler->shutdown(desc);
+    desc->status |= IRQ_DISABLED;
     action = desc->action;
     desc->action  = NULL;
     desc->msi_desc = NULL;
@@ -995,8 +995,8 @@ void __init release_irq(unsigned int irq
     spin_lock_irqsave(&desc->lock,flags);
     action = desc->action;
     desc->action  = NULL;
-    desc->status |= IRQ_DISABLED;
     desc->handler->shutdown(desc);
+    desc->status |= IRQ_DISABLED;
     spin_unlock_irqrestore(&desc->lock,flags);
 
     /* Wait to make sure it's not being used on another CPU */
@@ -1732,8 +1732,8 @@ static irq_guest_action_t *__pirq_guest_
     BUG_ON(action->in_flight != 0);
 
     /* Disabling IRQ before releasing the desc_lock avoids an IRQ storm. */
-    desc->status |= IRQ_DISABLED;
     desc->handler->disable(desc);
+    desc->status |= IRQ_DISABLED;
 
     /*
      * Mark any remaining pending EOIs as ready to flush.
--- a/xen/arch/x86/msi.c
+++ b/xen/arch/x86/msi.c
@@ -123,6 +123,27 @@ static void msix_put_fixmap(struct arch_
     spin_unlock(&msix->table_lock);
 }
 
+static bool_t memory_decoded(const struct pci_dev *dev)
+{
+    u8 bus, slot, func;
+
+    if ( !dev->info.is_virtfn )
+    {
+        bus = dev->bus;
+        slot = PCI_SLOT(dev->devfn);
+        func = PCI_FUNC(dev->devfn);
+    }
+    else
+    {
+        bus = dev->info.physfn.bus;
+        slot = PCI_SLOT(dev->info.physfn.devfn);
+        func = PCI_FUNC(dev->info.physfn.devfn);
+    }
+
+    return !!(pci_conf_read16(dev->seg, bus, slot, func, PCI_COMMAND) &
+              PCI_COMMAND_MEMORY);
+}
+
 /*
  * MSI message composition
  */
@@ -166,7 +187,7 @@ void msi_compose_msg(unsigned vector, co
     }
 }
 
-static void read_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
+static bool_t read_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
 {
     switch ( entry->msi_attrib.type )
     {
@@ -201,6 +222,8 @@ static void read_msi_msg(struct msi_desc
     {
         void __iomem *base = entry->mask_base;
 
+        if ( unlikely(!memory_decoded(entry->dev)) )
+            return 0;
         msg->address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET);
         msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET);
         msg->data = readl(base + PCI_MSIX_ENTRY_DATA_OFFSET);
@@ -212,6 +235,8 @@ static void read_msi_msg(struct msi_desc
 
     if ( iommu_intremap )
         iommu_read_msi_from_ire(entry, msg);
+
+    return 1;
 }
 
 static int write_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
@@ -262,6 +287,8 @@ static int write_msi_msg(struct msi_desc
     {
         void __iomem *base = entry->mask_base;
 
+        if ( unlikely(!memory_decoded(entry->dev)) )
+            return -ENXIO;
         writel(msg->address_lo,
                base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET);
         writel(msg->address_hi,
@@ -289,7 +316,8 @@ void set_msi_affinity(struct irq_desc *d
     ASSERT(spin_is_locked(&desc->lock));
 
     memset(&msg, 0, sizeof(msg));
-    read_msi_msg(msi_desc, &msg);
+    if ( !read_msi_msg(msi_desc, &msg) )
+        return;
 
     msg.data &= ~MSI_DATA_VECTOR_MASK;
     msg.data |= MSI_DATA_VECTOR(desc->arch.vector);
@@ -349,23 +377,27 @@ int msi_maskable_irq(const struct msi_de
            || entry->msi_attrib.maskbit;
 }
 
-static void msi_set_mask_bit(struct irq_desc *desc, bool_t host, bool_t guest)
+static bool_t msi_set_mask_bit(struct irq_desc *desc, bool_t host, bool_t guest)
 {
     struct msi_desc *entry = desc->msi_desc;
+    struct pci_dev *pdev;
+    u16 seg;
+    u8 bus, slot, func;
     bool_t flag = host || guest;
 
     ASSERT(spin_is_locked(&desc->lock));
     BUG_ON(!entry || !entry->dev);
+    pdev = entry->dev;
+    seg = pdev->seg;
+    bus = pdev->bus;
+    slot = PCI_SLOT(pdev->devfn);
+    func = PCI_FUNC(pdev->devfn);
     switch ( entry->msi_attrib.type )
     {
     case PCI_CAP_ID_MSI:
         if ( entry->msi_attrib.maskbit )
         {
             u32 mask_bits;
-            u16 seg = entry->dev->seg;
-            u8 bus = entry->dev->bus;
-            u8 slot = PCI_SLOT(entry->dev->devfn);
-            u8 func = PCI_FUNC(entry->dev->devfn);
 
             mask_bits = pci_conf_read32(seg, bus, slot, func, entry->msi.mpos);
             mask_bits &= ~((u32)1 << entry->msi_attrib.entry_nr);
@@ -374,25 +406,54 @@ static void msi_set_mask_bit(struct irq_
         }
         break;
     case PCI_CAP_ID_MSIX:
-    {
-        int offset = PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET;
-        writel(flag, entry->mask_base + offset);
-        readl(entry->mask_base + offset);
-        break;
-    }
+        if ( likely(memory_decoded(pdev)) )
+        {
+            writel(flag, entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);
+            readl(entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);
+            break;
+        }
+        if ( flag )
+        {
+            u16 control;
+            domid_t domid = pdev->domain->domain_id;
+
+            pdev->msix->host_maskall = 1;
+            control = pci_conf_read16(seg, bus, slot, func,
+                                      msix_control_reg(entry->msi_attrib.pos));
+            if ( control & PCI_MSIX_FLAGS_MASKALL )
+                break;
+            pci_conf_write16(seg, bus, slot, func,
+                             msix_control_reg(entry->msi_attrib.pos),
+                             control | PCI_MSIX_FLAGS_MASKALL);
+            if ( pdev->msix->warned != domid )
+            {
+                pdev->msix->warned = domid;
+                printk(XENLOG_G_WARNING
+                       "cannot mask IRQ %d: masked MSI-X on Dom%d's %04x:%02x:%02x.%u\n",
+                       desc->irq, domid, pdev->seg, pdev->bus,
+                       PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
+            }
+            break;
+        }
+        /* fall through */
     default:
-        BUG();
-        break;
+        return 0;
     }
     entry->msi_attrib.host_masked = host;
     entry->msi_attrib.guest_masked = guest;
+
+    return 1;
 }
 
 static int msi_get_mask_bit(const struct msi_desc *entry)
 {
-    switch (entry->msi_attrib.type) {
+    if ( !entry->dev )
+        return -1;
+
+    switch ( entry->msi_attrib.type )
+    {
     case PCI_CAP_ID_MSI:
-        if (!entry->dev || !entry->msi_attrib.maskbit)
+        if ( !entry->msi_attrib.maskbit )
             break;
         return (pci_conf_read32(entry->dev->seg, entry->dev->bus,
                                 PCI_SLOT(entry->dev->devfn),
@@ -400,6 +461,8 @@ static int msi_get_mask_bit(const struct
                                 entry->msi.mpos) >>
                 entry->msi_attrib.entry_nr) & 1;
     case PCI_CAP_ID_MSIX:
+        if ( unlikely(!memory_decoded(entry->dev)) )
+            break;
         return readl(entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET) & 1;
     }
     return -1;
@@ -407,12 +470,16 @@ static int msi_get_mask_bit(const struct
 
 void mask_msi_irq(struct irq_desc *desc)
 {
-    msi_set_mask_bit(desc, 1, desc->msi_desc->msi_attrib.guest_masked);
+    if ( unlikely(!msi_set_mask_bit(desc, 1,
+                                    desc->msi_desc->msi_attrib.guest_masked)) )
+        BUG_ON(!(desc->status & IRQ_DISABLED));
 }
 
 void unmask_msi_irq(struct irq_desc *desc)
 {
-    msi_set_mask_bit(desc, 0, desc->msi_desc->msi_attrib.guest_masked);
+    if ( unlikely(!msi_set_mask_bit(desc, 0,
+                                    desc->msi_desc->msi_attrib.guest_masked)) )
+        WARN();
 }
 
 void guest_mask_msi_irq(struct irq_desc *desc, bool_t mask)
@@ -425,13 +492,15 @@ static unsigned int startup_msi_irq(stru
     bool_t guest_masked = (desc->status & IRQ_GUEST) &&
                           is_hvm_domain(desc->msi_desc->dev->domain);
 
-    msi_set_mask_bit(desc, 0, guest_masked);
+    if ( unlikely(!msi_set_mask_bit(desc, 0, guest_masked)) )
+        WARN();
     return 0;
 }
 
 static void shutdown_msi_irq(struct irq_desc *desc)
 {
-    msi_set_mask_bit(desc, 1, 1);
+    if ( unlikely(!msi_set_mask_bit(desc, 1, 1)) )
+        BUG_ON(!(desc->status & IRQ_DISABLED));
 }
 
 void ack_nonmaskable_msi_irq(struct irq_desc *desc)
@@ -743,6 +812,9 @@ static int msix_capability_init(struct p
     control = pci_conf_read16(seg, bus, slot, func, msix_control_reg(pos));
     msix_set_enable(dev, 0);/* Ensure msix is disabled as I set it up */
 
+    if ( unlikely(!memory_decoded(dev)) )
+        return -ENXIO;
+
     if ( desc )
     {
         entry = alloc_msi_entry(1);
@@ -882,7 +954,8 @@ static int msix_capability_init(struct p
     ++msix->used_entries;
 
     /* Restore MSI-X enabled bits */
-    pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos), control);
+    pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos),
+                     control & ~PCI_MSIX_FLAGS_MASKALL);
 
     return 0;
 }
@@ -1027,8 +1100,16 @@ static void __pci_disable_msix(struct ms
 
     BUG_ON(list_empty(&dev->msi_list));
 
-    writel(1, entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);
-
+    if ( likely(memory_decoded(dev)) )
+        writel(1, entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);
+    else if ( !(control & PCI_MSIX_FLAGS_MASKALL) )
+    {
+        printk(XENLOG_WARNING
+               "cannot disable IRQ %d: masking MSI-X on %04x:%02x:%02x.%u\n",
+               entry->irq, dev->seg, dev->bus,
+               PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn));
+        control |= PCI_MSIX_FLAGS_MASKALL;
+    }
     pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos), control);
 
     _pci_cleanup_msix(dev->msix);
@@ -1204,15 +1285,24 @@ int pci_restore_msi_state(struct pci_dev
             nr = entry->msi.nvec;
         }
         else if ( entry->msi_attrib.type == PCI_CAP_ID_MSIX )
+        {
             msix_set_enable(pdev, 0);
+            if ( unlikely(!memory_decoded(pdev)) )
+            {
+                spin_unlock_irqrestore(&desc->lock, flags);
+                return -ENXIO;
+            }
+        }
 
         msg = entry->msg;
         write_msi_msg(entry, &msg);
 
         for ( i = 0; ; )
         {
-            msi_set_mask_bit(desc, entry[i].msi_attrib.host_masked,
-                             entry[i].msi_attrib.guest_masked);
+            if ( unlikely(!msi_set_mask_bit(desc,
+                                            entry[i].msi_attrib.host_masked,
+                                            entry[i].msi_attrib.guest_masked)) )
+                BUG();
 
             if ( !--nr )
                 break;

[-- Attachment #3: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH v4 4/6] x86/MSI-X: access MSI-X table only after having enabled MSI-X
  2015-06-22 14:38 [PATCH v4 RFC 0/6] x86/MSI: XSA-120, 126, 128-131 follow-up Jan Beulich
                   ` (2 preceding siblings ...)
  2015-06-22 14:49 ` [PATCH v4 3/6] x86/MSI-X: be more careful during teardown Jan Beulich
@ 2015-06-22 14:50 ` Jan Beulich
  2015-06-22 14:51 ` [PATCH v4 RFC 5/6] x86/MSI-X: reduce fiddling with control register during restore Jan Beulich
                   ` (2 subsequent siblings)
  6 siblings, 0 replies; 20+ messages in thread
From: Jan Beulich @ 2015-06-22 14:50 UTC (permalink / raw)
  To: xen-devel; +Cc: Andrew Cooper, Keir Fraser

[-- Attachment #1: Type: text/plain, Size: 13498 bytes --]

As done in Linux by f598282f51 ("PCI: Fix the NIU MSI-X problem in a
better way") and its broken predecessor, make sure we don't access the
MSI-X table without having enabled MSI-X first, using the mask-all flag
instead to prevent interrupts from occurring.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>

--- a/xen/arch/x86/msi.c
+++ b/xen/arch/x86/msi.c
@@ -144,6 +144,17 @@ static bool_t memory_decoded(const struc
               PCI_COMMAND_MEMORY);
 }
 
+static bool_t msix_memory_decoded(const struct pci_dev *dev, unsigned int pos)
+{
+    u16 control = pci_conf_read16(dev->seg, dev->bus, PCI_SLOT(dev->devfn),
+                                  PCI_FUNC(dev->devfn), msix_control_reg(pos));
+
+    if ( !(control & PCI_MSIX_FLAGS_ENABLE) )
+        return 0;
+
+    return memory_decoded(dev);
+}
+
 /*
  * MSI message composition
  */
@@ -222,7 +233,8 @@ static bool_t read_msi_msg(struct msi_de
     {
         void __iomem *base = entry->mask_base;
 
-        if ( unlikely(!memory_decoded(entry->dev)) )
+        if ( unlikely(!msix_memory_decoded(entry->dev,
+                                           entry->msi_attrib.pos)) )
             return 0;
         msg->address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET);
         msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET);
@@ -287,7 +299,8 @@ static int write_msi_msg(struct msi_desc
     {
         void __iomem *base = entry->mask_base;
 
-        if ( unlikely(!memory_decoded(entry->dev)) )
+        if ( unlikely(!msix_memory_decoded(entry->dev,
+                                           entry->msi_attrib.pos)) )
             return -ENXIO;
         writel(msg->address_lo,
                base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET);
@@ -381,9 +394,9 @@ static bool_t msi_set_mask_bit(struct ir
 {
     struct msi_desc *entry = desc->msi_desc;
     struct pci_dev *pdev;
-    u16 seg;
+    u16 seg, control;
     u8 bus, slot, func;
-    bool_t flag = host || guest;
+    bool_t flag = host || guest, maskall;
 
     ASSERT(spin_is_locked(&desc->lock));
     BUG_ON(!entry || !entry->dev);
@@ -406,36 +419,45 @@ static bool_t msi_set_mask_bit(struct ir
         }
         break;
     case PCI_CAP_ID_MSIX:
+        maskall = pdev->msix->host_maskall;
+        control = pci_conf_read16(seg, bus, slot, func,
+                                  msix_control_reg(entry->msi_attrib.pos));
+        if ( unlikely(!(control & PCI_MSIX_FLAGS_ENABLE)) )
+        {
+            pdev->msix->host_maskall = 1;
+            pci_conf_write16(seg, bus, slot, func,
+                             msix_control_reg(entry->msi_attrib.pos),
+                             control | (PCI_MSIX_FLAGS_ENABLE |
+                                        PCI_MSIX_FLAGS_MASKALL));
+        }
         if ( likely(memory_decoded(pdev)) )
         {
             writel(flag, entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);
             readl(entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);
-            break;
+            if ( likely(control & PCI_MSIX_FLAGS_ENABLE) )
+                break;
+            flag = 1;
         }
-        if ( flag )
+        else if ( flag && !(control & PCI_MSIX_FLAGS_MASKALL) )
         {
-            u16 control;
             domid_t domid = pdev->domain->domain_id;
 
-            pdev->msix->host_maskall = 1;
-            control = pci_conf_read16(seg, bus, slot, func,
-                                      msix_control_reg(entry->msi_attrib.pos));
-            if ( control & PCI_MSIX_FLAGS_MASKALL )
-                break;
-            pci_conf_write16(seg, bus, slot, func,
-                             msix_control_reg(entry->msi_attrib.pos),
-                             control | PCI_MSIX_FLAGS_MASKALL);
+            maskall = 1;
             if ( pdev->msix->warned != domid )
             {
                 pdev->msix->warned = domid;
                 printk(XENLOG_G_WARNING
-                       "cannot mask IRQ %d: masked MSI-X on Dom%d's %04x:%02x:%02x.%u\n",
+                       "cannot mask IRQ %d: masking MSI-X on Dom%d's %04x:%02x:%02x.%u\n",
                        desc->irq, domid, pdev->seg, pdev->bus,
                        PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
             }
-            break;
         }
-        /* fall through */
+        pdev->msix->host_maskall = maskall;
+        if ( maskall || pdev->msix->guest_maskall )
+            control |= PCI_MSIX_FLAGS_MASKALL;
+        pci_conf_write16(seg, bus, slot, func,
+                         msix_control_reg(entry->msi_attrib.pos), control);
+        return flag;
     default:
         return 0;
     }
@@ -461,7 +483,8 @@ static int msi_get_mask_bit(const struct
                                 entry->msi.mpos) >>
                 entry->msi_attrib.entry_nr) & 1;
     case PCI_CAP_ID_MSIX:
-        if ( unlikely(!memory_decoded(entry->dev)) )
+        if ( unlikely(!msix_memory_decoded(entry->dev,
+                                           entry->msi_attrib.pos)) )
             break;
         return readl(entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET) & 1;
     }
@@ -567,9 +590,31 @@ static struct msi_desc *alloc_msi_entry(
 
 int setup_msi_irq(struct irq_desc *desc, struct msi_desc *msidesc)
 {
-    return __setup_msi_irq(desc, msidesc,
-                           msi_maskable_irq(msidesc) ? &pci_msi_maskable
-                                                     : &pci_msi_nonmaskable);
+    const struct pci_dev *pdev = msidesc->dev;
+    unsigned int cpos = msix_control_reg(msidesc->msi_attrib.pos);
+    u16 control = ~0;
+    int rc;
+
+    if ( msidesc->msi_attrib.type == PCI_CAP_ID_MSIX )
+    {
+        control = pci_conf_read16(pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn),
+                                  PCI_FUNC(pdev->devfn), cpos);
+        if ( !(control & PCI_MSIX_FLAGS_ENABLE) )
+            pci_conf_write16(pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn),
+                             PCI_FUNC(pdev->devfn), cpos,
+                             control | (PCI_MSIX_FLAGS_ENABLE |
+                                        PCI_MSIX_FLAGS_MASKALL));
+    }
+
+    rc = __setup_msi_irq(desc, msidesc,
+                         msi_maskable_irq(msidesc) ? &pci_msi_maskable
+                                                   : &pci_msi_nonmaskable);
+
+    if ( !(control & PCI_MSIX_FLAGS_ENABLE) )
+        pci_conf_write16(pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn),
+                         PCI_FUNC(pdev->devfn), cpos, control);
+
+    return rc;
 }
 
 int __setup_msi_irq(struct irq_desc *desc, struct msi_desc *msidesc,
@@ -806,20 +851,38 @@ static int msix_capability_init(struct p
     u8 bus = dev->bus;
     u8 slot = PCI_SLOT(dev->devfn);
     u8 func = PCI_FUNC(dev->devfn);
+    bool_t maskall = msix->host_maskall;
 
     ASSERT(spin_is_locked(&pcidevs_lock));
 
     control = pci_conf_read16(seg, bus, slot, func, msix_control_reg(pos));
-    msix_set_enable(dev, 0);/* Ensure msix is disabled as I set it up */
+    /*
+     * Ensure MSI-X interrupts are masked during setup. Some devices require
+     * MSI-X to be enabled before we can touch the MSI-X registers. We need
+     * to mask all the vectors to prevent interrupts coming in before they're
+     * fully set up.
+     */
+    msix->host_maskall = 1;
+    pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos),
+                     control | (PCI_MSIX_FLAGS_ENABLE |
+                                PCI_MSIX_FLAGS_MASKALL));
 
     if ( unlikely(!memory_decoded(dev)) )
+    {
+        pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos),
+                         control & ~PCI_MSIX_FLAGS_ENABLE);
         return -ENXIO;
+    }
 
     if ( desc )
     {
         entry = alloc_msi_entry(1);
         if ( !entry )
+        {
+            pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos),
+                             control & ~PCI_MSIX_FLAGS_ENABLE);
             return -ENOMEM;
+        }
         ASSERT(msi);
     }
 
@@ -850,6 +913,8 @@ static int msix_capability_init(struct p
     {
         if ( !msi || !msi->table_base )
         {
+            pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos),
+                             control & ~PCI_MSIX_FLAGS_ENABLE);
             xfree(entry);
             return -ENXIO;
         }
@@ -892,6 +957,8 @@ static int msix_capability_init(struct p
 
         if ( idx < 0 )
         {
+            pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos),
+                             control & ~PCI_MSIX_FLAGS_ENABLE);
             xfree(entry);
             return idx;
         }
@@ -918,7 +985,7 @@ static int msix_capability_init(struct p
 
     if ( !msix->used_entries )
     {
-        msix->host_maskall = 0;
+        maskall = 0;
         if ( !msix->guest_maskall )
             control &= ~PCI_MSIX_FLAGS_MASKALL;
         else
@@ -954,8 +1021,8 @@ static int msix_capability_init(struct p
     ++msix->used_entries;
 
     /* Restore MSI-X enabled bits */
-    pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos),
-                     control & ~PCI_MSIX_FLAGS_MASKALL);
+    msix->host_maskall = maskall;
+    pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos), control);
 
     return 0;
 }
@@ -1095,8 +1162,15 @@ static void __pci_disable_msix(struct ms
                                            PCI_CAP_ID_MSIX);
     u16 control = pci_conf_read16(seg, bus, slot, func,
                                   msix_control_reg(entry->msi_attrib.pos));
+    bool_t maskall = dev->msix->host_maskall;
 
-    msix_set_enable(dev, 0);
+    if ( unlikely(!(control & PCI_MSIX_FLAGS_ENABLE)) )
+    {
+        dev->msix->host_maskall = 1;
+        pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos),
+                         control | (PCI_MSIX_FLAGS_ENABLE |
+                                    PCI_MSIX_FLAGS_MASKALL));
+    }
 
     BUG_ON(list_empty(&dev->msi_list));
 
@@ -1108,8 +1182,11 @@ static void __pci_disable_msix(struct ms
                "cannot disable IRQ %d: masking MSI-X on %04x:%02x:%02x.%u\n",
                entry->irq, dev->seg, dev->bus,
                PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn));
-        control |= PCI_MSIX_FLAGS_MASKALL;
+        maskall = 1;
     }
+    dev->msix->host_maskall = maskall;
+    if ( maskall || dev->msix->guest_maskall )
+        control |= PCI_MSIX_FLAGS_MASKALL;
     pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos), control);
 
     _pci_cleanup_msix(dev->msix);
@@ -1260,6 +1337,8 @@ int pci_restore_msi_state(struct pci_dev
     list_for_each_entry_safe( entry, tmp, &pdev->msi_list, list )
     {
         unsigned int i = 0, nr = 1;
+        u16 control = 0;
+        u8 slot = PCI_SLOT(pdev->devfn), func = PCI_FUNC(pdev->devfn);
 
         irq = entry->irq;
         desc = &irq_desc[irq];
@@ -1286,10 +1365,18 @@ int pci_restore_msi_state(struct pci_dev
         }
         else if ( entry->msi_attrib.type == PCI_CAP_ID_MSIX )
         {
-            msix_set_enable(pdev, 0);
+            control = pci_conf_read16(pdev->seg, pdev->bus, slot, func,
+                                      msix_control_reg(entry->msi_attrib.pos));
+            pci_conf_write16(pdev->seg, pdev->bus, slot, func,
+                             msix_control_reg(entry->msi_attrib.pos),
+                             control | (PCI_MSIX_FLAGS_ENABLE |
+                                        PCI_MSIX_FLAGS_MASKALL));
             if ( unlikely(!memory_decoded(pdev)) )
             {
                 spin_unlock_irqrestore(&desc->lock, flags);
+                pci_conf_write16(pdev->seg, pdev->bus, slot, func,
+                                 msix_control_reg(entry->msi_attrib.pos),
+                                 control & ~PCI_MSIX_FLAGS_ENABLE);
                 return -ENXIO;
             }
         }
@@ -1319,11 +1406,9 @@ int pci_restore_msi_state(struct pci_dev
         if ( entry->msi_attrib.type == PCI_CAP_ID_MSI )
         {
             unsigned int cpos = msi_control_reg(entry->msi_attrib.pos);
-            u16 control = pci_conf_read16(pdev->seg, pdev->bus,
-                                          PCI_SLOT(pdev->devfn),
-                                          PCI_FUNC(pdev->devfn), cpos);
 
-            control &= ~PCI_MSI_FLAGS_QSIZE;
+            control = pci_conf_read16(pdev->seg, pdev->bus, slot, func, cpos) &
+                      ~PCI_MSI_FLAGS_QSIZE;
             multi_msi_enable(control, entry->msi.nvec);
             pci_conf_write16(pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn),
                              PCI_FUNC(pdev->devfn), cpos, control);
@@ -1331,7 +1416,9 @@ int pci_restore_msi_state(struct pci_dev
             msi_set_enable(pdev, 1);
         }
         else if ( entry->msi_attrib.type == PCI_CAP_ID_MSIX )
-            msix_set_enable(pdev, 1);
+            pci_conf_write16(pdev->seg, pdev->bus, slot, func,
+                             msix_control_reg(entry->msi_attrib.pos),
+                             control | PCI_MSIX_FLAGS_ENABLE);
     }
 
     return 0;



[-- Attachment #2: x86-MSI-X-enable.patch --]
[-- Type: text/plain, Size: 13559 bytes --]

x86/MSI-X: access MSI-X table only after having enabled MSI-X

As done in Linux by f598282f51 ("PCI: Fix the NIU MSI-X problem in a
better way") and its broken predecessor, make sure we don't access the
MSI-X table without having enabled MSI-X first, using the mask-all flag
instead to prevent interrupts from occurring.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>

--- a/xen/arch/x86/msi.c
+++ b/xen/arch/x86/msi.c
@@ -144,6 +144,17 @@ static bool_t memory_decoded(const struc
               PCI_COMMAND_MEMORY);
 }
 
+static bool_t msix_memory_decoded(const struct pci_dev *dev, unsigned int pos)
+{
+    u16 control = pci_conf_read16(dev->seg, dev->bus, PCI_SLOT(dev->devfn),
+                                  PCI_FUNC(dev->devfn), msix_control_reg(pos));
+
+    if ( !(control & PCI_MSIX_FLAGS_ENABLE) )
+        return 0;
+
+    return memory_decoded(dev);
+}
+
 /*
  * MSI message composition
  */
@@ -222,7 +233,8 @@ static bool_t read_msi_msg(struct msi_de
     {
         void __iomem *base = entry->mask_base;
 
-        if ( unlikely(!memory_decoded(entry->dev)) )
+        if ( unlikely(!msix_memory_decoded(entry->dev,
+                                           entry->msi_attrib.pos)) )
             return 0;
         msg->address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET);
         msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET);
@@ -287,7 +299,8 @@ static int write_msi_msg(struct msi_desc
     {
         void __iomem *base = entry->mask_base;
 
-        if ( unlikely(!memory_decoded(entry->dev)) )
+        if ( unlikely(!msix_memory_decoded(entry->dev,
+                                           entry->msi_attrib.pos)) )
             return -ENXIO;
         writel(msg->address_lo,
                base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET);
@@ -381,9 +394,9 @@ static bool_t msi_set_mask_bit(struct ir
 {
     struct msi_desc *entry = desc->msi_desc;
     struct pci_dev *pdev;
-    u16 seg;
+    u16 seg, control;
     u8 bus, slot, func;
-    bool_t flag = host || guest;
+    bool_t flag = host || guest, maskall;
 
     ASSERT(spin_is_locked(&desc->lock));
     BUG_ON(!entry || !entry->dev);
@@ -406,36 +419,45 @@ static bool_t msi_set_mask_bit(struct ir
         }
         break;
     case PCI_CAP_ID_MSIX:
+        maskall = pdev->msix->host_maskall;
+        control = pci_conf_read16(seg, bus, slot, func,
+                                  msix_control_reg(entry->msi_attrib.pos));
+        if ( unlikely(!(control & PCI_MSIX_FLAGS_ENABLE)) )
+        {
+            pdev->msix->host_maskall = 1;
+            pci_conf_write16(seg, bus, slot, func,
+                             msix_control_reg(entry->msi_attrib.pos),
+                             control | (PCI_MSIX_FLAGS_ENABLE |
+                                        PCI_MSIX_FLAGS_MASKALL));
+        }
         if ( likely(memory_decoded(pdev)) )
         {
             writel(flag, entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);
             readl(entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);
-            break;
+            if ( likely(control & PCI_MSIX_FLAGS_ENABLE) )
+                break;
+            flag = 1;
         }
-        if ( flag )
+        else if ( flag && !(control & PCI_MSIX_FLAGS_MASKALL) )
         {
-            u16 control;
             domid_t domid = pdev->domain->domain_id;
 
-            pdev->msix->host_maskall = 1;
-            control = pci_conf_read16(seg, bus, slot, func,
-                                      msix_control_reg(entry->msi_attrib.pos));
-            if ( control & PCI_MSIX_FLAGS_MASKALL )
-                break;
-            pci_conf_write16(seg, bus, slot, func,
-                             msix_control_reg(entry->msi_attrib.pos),
-                             control | PCI_MSIX_FLAGS_MASKALL);
+            maskall = 1;
             if ( pdev->msix->warned != domid )
             {
                 pdev->msix->warned = domid;
                 printk(XENLOG_G_WARNING
-                       "cannot mask IRQ %d: masked MSI-X on Dom%d's %04x:%02x:%02x.%u\n",
+                       "cannot mask IRQ %d: masking MSI-X on Dom%d's %04x:%02x:%02x.%u\n",
                        desc->irq, domid, pdev->seg, pdev->bus,
                        PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
             }
-            break;
         }
-        /* fall through */
+        pdev->msix->host_maskall = maskall;
+        if ( maskall || pdev->msix->guest_maskall )
+            control |= PCI_MSIX_FLAGS_MASKALL;
+        pci_conf_write16(seg, bus, slot, func,
+                         msix_control_reg(entry->msi_attrib.pos), control);
+        return flag;
     default:
         return 0;
     }
@@ -461,7 +483,8 @@ static int msi_get_mask_bit(const struct
                                 entry->msi.mpos) >>
                 entry->msi_attrib.entry_nr) & 1;
     case PCI_CAP_ID_MSIX:
-        if ( unlikely(!memory_decoded(entry->dev)) )
+        if ( unlikely(!msix_memory_decoded(entry->dev,
+                                           entry->msi_attrib.pos)) )
             break;
         return readl(entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET) & 1;
     }
@@ -567,9 +590,31 @@ static struct msi_desc *alloc_msi_entry(
 
 int setup_msi_irq(struct irq_desc *desc, struct msi_desc *msidesc)
 {
-    return __setup_msi_irq(desc, msidesc,
-                           msi_maskable_irq(msidesc) ? &pci_msi_maskable
-                                                     : &pci_msi_nonmaskable);
+    const struct pci_dev *pdev = msidesc->dev;
+    unsigned int cpos = msix_control_reg(msidesc->msi_attrib.pos);
+    u16 control = ~0;
+    int rc;
+
+    if ( msidesc->msi_attrib.type == PCI_CAP_ID_MSIX )
+    {
+        control = pci_conf_read16(pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn),
+                                  PCI_FUNC(pdev->devfn), cpos);
+        if ( !(control & PCI_MSIX_FLAGS_ENABLE) )
+            pci_conf_write16(pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn),
+                             PCI_FUNC(pdev->devfn), cpos,
+                             control | (PCI_MSIX_FLAGS_ENABLE |
+                                        PCI_MSIX_FLAGS_MASKALL));
+    }
+
+    rc = __setup_msi_irq(desc, msidesc,
+                         msi_maskable_irq(msidesc) ? &pci_msi_maskable
+                                                   : &pci_msi_nonmaskable);
+
+    if ( !(control & PCI_MSIX_FLAGS_ENABLE) )
+        pci_conf_write16(pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn),
+                         PCI_FUNC(pdev->devfn), cpos, control);
+
+    return rc;
 }
 
 int __setup_msi_irq(struct irq_desc *desc, struct msi_desc *msidesc,
@@ -806,20 +851,38 @@ static int msix_capability_init(struct p
     u8 bus = dev->bus;
     u8 slot = PCI_SLOT(dev->devfn);
     u8 func = PCI_FUNC(dev->devfn);
+    bool_t maskall = msix->host_maskall;
 
     ASSERT(spin_is_locked(&pcidevs_lock));
 
     control = pci_conf_read16(seg, bus, slot, func, msix_control_reg(pos));
-    msix_set_enable(dev, 0);/* Ensure msix is disabled as I set it up */
+    /*
+     * Ensure MSI-X interrupts are masked during setup. Some devices require
+     * MSI-X to be enabled before we can touch the MSI-X registers. We need
+     * to mask all the vectors to prevent interrupts coming in before they're
+     * fully set up.
+     */
+    msix->host_maskall = 1;
+    pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos),
+                     control | (PCI_MSIX_FLAGS_ENABLE |
+                                PCI_MSIX_FLAGS_MASKALL));
 
     if ( unlikely(!memory_decoded(dev)) )
+    {
+        pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos),
+                         control & ~PCI_MSIX_FLAGS_ENABLE);
         return -ENXIO;
+    }
 
     if ( desc )
     {
         entry = alloc_msi_entry(1);
         if ( !entry )
+        {
+            pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos),
+                             control & ~PCI_MSIX_FLAGS_ENABLE);
             return -ENOMEM;
+        }
         ASSERT(msi);
     }
 
@@ -850,6 +913,8 @@ static int msix_capability_init(struct p
     {
         if ( !msi || !msi->table_base )
         {
+            pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos),
+                             control & ~PCI_MSIX_FLAGS_ENABLE);
             xfree(entry);
             return -ENXIO;
         }
@@ -892,6 +957,8 @@ static int msix_capability_init(struct p
 
         if ( idx < 0 )
         {
+            pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos),
+                             control & ~PCI_MSIX_FLAGS_ENABLE);
             xfree(entry);
             return idx;
         }
@@ -918,7 +985,7 @@ static int msix_capability_init(struct p
 
     if ( !msix->used_entries )
     {
-        msix->host_maskall = 0;
+        maskall = 0;
         if ( !msix->guest_maskall )
             control &= ~PCI_MSIX_FLAGS_MASKALL;
         else
@@ -954,8 +1021,8 @@ static int msix_capability_init(struct p
     ++msix->used_entries;
 
     /* Restore MSI-X enabled bits */
-    pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos),
-                     control & ~PCI_MSIX_FLAGS_MASKALL);
+    msix->host_maskall = maskall;
+    pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos), control);
 
     return 0;
 }
@@ -1095,8 +1162,15 @@ static void __pci_disable_msix(struct ms
                                            PCI_CAP_ID_MSIX);
     u16 control = pci_conf_read16(seg, bus, slot, func,
                                   msix_control_reg(entry->msi_attrib.pos));
+    bool_t maskall = dev->msix->host_maskall;
 
-    msix_set_enable(dev, 0);
+    if ( unlikely(!(control & PCI_MSIX_FLAGS_ENABLE)) )
+    {
+        dev->msix->host_maskall = 1;
+        pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos),
+                         control | (PCI_MSIX_FLAGS_ENABLE |
+                                    PCI_MSIX_FLAGS_MASKALL));
+    }
 
     BUG_ON(list_empty(&dev->msi_list));
 
@@ -1108,8 +1182,11 @@ static void __pci_disable_msix(struct ms
                "cannot disable IRQ %d: masking MSI-X on %04x:%02x:%02x.%u\n",
                entry->irq, dev->seg, dev->bus,
                PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn));
-        control |= PCI_MSIX_FLAGS_MASKALL;
+        maskall = 1;
     }
+    dev->msix->host_maskall = maskall;
+    if ( maskall || dev->msix->guest_maskall )
+        control |= PCI_MSIX_FLAGS_MASKALL;
     pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos), control);
 
     _pci_cleanup_msix(dev->msix);
@@ -1260,6 +1337,8 @@ int pci_restore_msi_state(struct pci_dev
     list_for_each_entry_safe( entry, tmp, &pdev->msi_list, list )
     {
         unsigned int i = 0, nr = 1;
+        u16 control = 0;
+        u8 slot = PCI_SLOT(pdev->devfn), func = PCI_FUNC(pdev->devfn);
 
         irq = entry->irq;
         desc = &irq_desc[irq];
@@ -1286,10 +1365,18 @@ int pci_restore_msi_state(struct pci_dev
         }
         else if ( entry->msi_attrib.type == PCI_CAP_ID_MSIX )
         {
-            msix_set_enable(pdev, 0);
+            control = pci_conf_read16(pdev->seg, pdev->bus, slot, func,
+                                      msix_control_reg(entry->msi_attrib.pos));
+            pci_conf_write16(pdev->seg, pdev->bus, slot, func,
+                             msix_control_reg(entry->msi_attrib.pos),
+                             control | (PCI_MSIX_FLAGS_ENABLE |
+                                        PCI_MSIX_FLAGS_MASKALL));
             if ( unlikely(!memory_decoded(pdev)) )
             {
                 spin_unlock_irqrestore(&desc->lock, flags);
+                pci_conf_write16(pdev->seg, pdev->bus, slot, func,
+                                 msix_control_reg(entry->msi_attrib.pos),
+                                 control & ~PCI_MSIX_FLAGS_ENABLE);
                 return -ENXIO;
             }
         }
@@ -1319,11 +1406,9 @@ int pci_restore_msi_state(struct pci_dev
         if ( entry->msi_attrib.type == PCI_CAP_ID_MSI )
         {
             unsigned int cpos = msi_control_reg(entry->msi_attrib.pos);
-            u16 control = pci_conf_read16(pdev->seg, pdev->bus,
-                                          PCI_SLOT(pdev->devfn),
-                                          PCI_FUNC(pdev->devfn), cpos);
 
-            control &= ~PCI_MSI_FLAGS_QSIZE;
+            control = pci_conf_read16(pdev->seg, pdev->bus, slot, func, cpos) &
+                      ~PCI_MSI_FLAGS_QSIZE;
             multi_msi_enable(control, entry->msi.nvec);
             pci_conf_write16(pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn),
                              PCI_FUNC(pdev->devfn), cpos, control);
@@ -1331,7 +1416,9 @@ int pci_restore_msi_state(struct pci_dev
             msi_set_enable(pdev, 1);
         }
         else if ( entry->msi_attrib.type == PCI_CAP_ID_MSIX )
-            msix_set_enable(pdev, 1);
+            pci_conf_write16(pdev->seg, pdev->bus, slot, func,
+                             msix_control_reg(entry->msi_attrib.pos),
+                             control | PCI_MSIX_FLAGS_ENABLE);
     }
 
     return 0;

[-- Attachment #3: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH v4 RFC 5/6] x86/MSI-X: reduce fiddling with control register during restore
  2015-06-22 14:38 [PATCH v4 RFC 0/6] x86/MSI: XSA-120, 126, 128-131 follow-up Jan Beulich
                   ` (3 preceding siblings ...)
  2015-06-22 14:50 ` [PATCH v4 4/6] x86/MSI-X: access MSI-X table only after having enabled MSI-X Jan Beulich
@ 2015-06-22 14:51 ` Jan Beulich
  2015-06-22 14:51 ` [PATCH v4 RFC 6/6] x86/MSI: properly track guest masking requests Jan Beulich
  2015-07-13 11:42 ` [PATCH v4 RFC 0/6] x86/MSI: XSA-120, 126, 128-131 follow-up Jan Beulich
  6 siblings, 0 replies; 20+ messages in thread
From: Jan Beulich @ 2015-06-22 14:51 UTC (permalink / raw)
  To: xen-devel; +Cc: Andrew Cooper, Keir Fraser

[-- Attachment #1: Type: text/plain, Size: 4108 bytes --]

Rather than disabling and enabling MSI-X once per vector, do it just
once per device.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>

--- a/xen/arch/x86/msi.c
+++ b/xen/arch/x86/msi.c
@@ -1319,6 +1319,9 @@ int pci_restore_msi_state(struct pci_dev
     struct msi_desc *entry, *tmp;
     struct irq_desc *desc;
     struct msi_msg msg;
+    u8 slot = PCI_SLOT(pdev->devfn), func = PCI_FUNC(pdev->devfn);
+    unsigned int type = 0, pos = 0;
+    u16 control = 0;
 
     ASSERT(spin_is_locked(&pcidevs_lock));
 
@@ -1337,8 +1340,6 @@ int pci_restore_msi_state(struct pci_dev
     list_for_each_entry_safe( entry, tmp, &pdev->msi_list, list )
     {
         unsigned int i = 0, nr = 1;
-        u16 control = 0;
-        u8 slot = PCI_SLOT(pdev->devfn), func = PCI_FUNC(pdev->devfn);
 
         irq = entry->irq;
         desc = &irq_desc[irq];
@@ -1355,31 +1356,38 @@ int pci_restore_msi_state(struct pci_dev
                     pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn),
                     PCI_FUNC(pdev->devfn), i);
             spin_unlock_irqrestore(&desc->lock, flags);
+            if ( type == PCI_CAP_ID_MSIX )
+                pci_conf_write16(pdev->seg, pdev->bus, slot, func,
+                                 msix_control_reg(pos),
+                                 control & ~PCI_MSIX_FLAGS_ENABLE);
             return -EINVAL;
         }
 
+        ASSERT(!type || type == entry->msi_attrib.type);
+        pos = entry->msi_attrib.pos;
         if ( entry->msi_attrib.type == PCI_CAP_ID_MSI )
         {
             msi_set_enable(pdev, 0);
             nr = entry->msi.nvec;
         }
-        else if ( entry->msi_attrib.type == PCI_CAP_ID_MSIX )
+        else if ( !type && entry->msi_attrib.type == PCI_CAP_ID_MSIX )
         {
             control = pci_conf_read16(pdev->seg, pdev->bus, slot, func,
-                                      msix_control_reg(entry->msi_attrib.pos));
+                                      msix_control_reg(pos));
             pci_conf_write16(pdev->seg, pdev->bus, slot, func,
-                             msix_control_reg(entry->msi_attrib.pos),
+                             msix_control_reg(pos),
                              control | (PCI_MSIX_FLAGS_ENABLE |
                                         PCI_MSIX_FLAGS_MASKALL));
             if ( unlikely(!memory_decoded(pdev)) )
             {
                 spin_unlock_irqrestore(&desc->lock, flags);
                 pci_conf_write16(pdev->seg, pdev->bus, slot, func,
-                                 msix_control_reg(entry->msi_attrib.pos),
+                                 msix_control_reg(pos),
                                  control & ~PCI_MSIX_FLAGS_ENABLE);
                 return -ENXIO;
             }
         }
+        type = entry->msi_attrib.type;
 
         msg = entry->msg;
         write_msi_msg(entry, &msg);
@@ -1403,9 +1411,9 @@ int pci_restore_msi_state(struct pci_dev
 
         spin_unlock_irqrestore(&desc->lock, flags);
 
-        if ( entry->msi_attrib.type == PCI_CAP_ID_MSI )
+        if ( type == PCI_CAP_ID_MSI )
         {
-            unsigned int cpos = msi_control_reg(entry->msi_attrib.pos);
+            unsigned int cpos = msi_control_reg(pos);
 
             control = pci_conf_read16(pdev->seg, pdev->bus, slot, func, cpos) &
                       ~PCI_MSI_FLAGS_QSIZE;
@@ -1415,12 +1423,13 @@ int pci_restore_msi_state(struct pci_dev
 
             msi_set_enable(pdev, 1);
         }
-        else if ( entry->msi_attrib.type == PCI_CAP_ID_MSIX )
-            pci_conf_write16(pdev->seg, pdev->bus, slot, func,
-                             msix_control_reg(entry->msi_attrib.pos),
-                             control | PCI_MSIX_FLAGS_ENABLE);
     }
 
+    if ( type == PCI_CAP_ID_MSIX )
+        pci_conf_write16(pdev->seg, pdev->bus, slot, func,
+                         msix_control_reg(pos),
+                         control | PCI_MSIX_FLAGS_ENABLE);
+
     return 0;
 }
 



[-- Attachment #2: x86-MSI-X-restore-once.patch --]
[-- Type: text/plain, Size: 4171 bytes --]

x86/MSI-X: reduce fiddling with control register during restore

Rather than disabling and enabling MSI-X once per vector, do it just
once per device.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>

--- a/xen/arch/x86/msi.c
+++ b/xen/arch/x86/msi.c
@@ -1319,6 +1319,9 @@ int pci_restore_msi_state(struct pci_dev
     struct msi_desc *entry, *tmp;
     struct irq_desc *desc;
     struct msi_msg msg;
+    u8 slot = PCI_SLOT(pdev->devfn), func = PCI_FUNC(pdev->devfn);
+    unsigned int type = 0, pos = 0;
+    u16 control = 0;
 
     ASSERT(spin_is_locked(&pcidevs_lock));
 
@@ -1337,8 +1340,6 @@ int pci_restore_msi_state(struct pci_dev
     list_for_each_entry_safe( entry, tmp, &pdev->msi_list, list )
     {
         unsigned int i = 0, nr = 1;
-        u16 control = 0;
-        u8 slot = PCI_SLOT(pdev->devfn), func = PCI_FUNC(pdev->devfn);
 
         irq = entry->irq;
         desc = &irq_desc[irq];
@@ -1355,31 +1356,38 @@ int pci_restore_msi_state(struct pci_dev
                     pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn),
                     PCI_FUNC(pdev->devfn), i);
             spin_unlock_irqrestore(&desc->lock, flags);
+            if ( type == PCI_CAP_ID_MSIX )
+                pci_conf_write16(pdev->seg, pdev->bus, slot, func,
+                                 msix_control_reg(pos),
+                                 control & ~PCI_MSIX_FLAGS_ENABLE);
             return -EINVAL;
         }
 
+        ASSERT(!type || type == entry->msi_attrib.type);
+        pos = entry->msi_attrib.pos;
         if ( entry->msi_attrib.type == PCI_CAP_ID_MSI )
         {
             msi_set_enable(pdev, 0);
             nr = entry->msi.nvec;
         }
-        else if ( entry->msi_attrib.type == PCI_CAP_ID_MSIX )
+        else if ( !type && entry->msi_attrib.type == PCI_CAP_ID_MSIX )
         {
             control = pci_conf_read16(pdev->seg, pdev->bus, slot, func,
-                                      msix_control_reg(entry->msi_attrib.pos));
+                                      msix_control_reg(pos));
             pci_conf_write16(pdev->seg, pdev->bus, slot, func,
-                             msix_control_reg(entry->msi_attrib.pos),
+                             msix_control_reg(pos),
                              control | (PCI_MSIX_FLAGS_ENABLE |
                                         PCI_MSIX_FLAGS_MASKALL));
             if ( unlikely(!memory_decoded(pdev)) )
             {
                 spin_unlock_irqrestore(&desc->lock, flags);
                 pci_conf_write16(pdev->seg, pdev->bus, slot, func,
-                                 msix_control_reg(entry->msi_attrib.pos),
+                                 msix_control_reg(pos),
                                  control & ~PCI_MSIX_FLAGS_ENABLE);
                 return -ENXIO;
             }
         }
+        type = entry->msi_attrib.type;
 
         msg = entry->msg;
         write_msi_msg(entry, &msg);
@@ -1403,9 +1411,9 @@ int pci_restore_msi_state(struct pci_dev
 
         spin_unlock_irqrestore(&desc->lock, flags);
 
-        if ( entry->msi_attrib.type == PCI_CAP_ID_MSI )
+        if ( type == PCI_CAP_ID_MSI )
         {
-            unsigned int cpos = msi_control_reg(entry->msi_attrib.pos);
+            unsigned int cpos = msi_control_reg(pos);
 
             control = pci_conf_read16(pdev->seg, pdev->bus, slot, func, cpos) &
                       ~PCI_MSI_FLAGS_QSIZE;
@@ -1415,12 +1423,13 @@ int pci_restore_msi_state(struct pci_dev
 
             msi_set_enable(pdev, 1);
         }
-        else if ( entry->msi_attrib.type == PCI_CAP_ID_MSIX )
-            pci_conf_write16(pdev->seg, pdev->bus, slot, func,
-                             msix_control_reg(entry->msi_attrib.pos),
-                             control | PCI_MSIX_FLAGS_ENABLE);
     }
 
+    if ( type == PCI_CAP_ID_MSIX )
+        pci_conf_write16(pdev->seg, pdev->bus, slot, func,
+                         msix_control_reg(pos),
+                         control | PCI_MSIX_FLAGS_ENABLE);
+
     return 0;
 }
 

[-- Attachment #3: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH v4 RFC 6/6] x86/MSI: properly track guest masking requests
  2015-06-22 14:38 [PATCH v4 RFC 0/6] x86/MSI: XSA-120, 126, 128-131 follow-up Jan Beulich
                   ` (4 preceding siblings ...)
  2015-06-22 14:51 ` [PATCH v4 RFC 5/6] x86/MSI-X: reduce fiddling with control register during restore Jan Beulich
@ 2015-06-22 14:51 ` Jan Beulich
  2015-06-24 17:24   ` Andrew Cooper
  2015-07-13 11:42 ` [PATCH v4 RFC 0/6] x86/MSI: XSA-120, 126, 128-131 follow-up Jan Beulich
  6 siblings, 1 reply; 20+ messages in thread
From: Jan Beulich @ 2015-06-22 14:51 UTC (permalink / raw)
  To: xen-devel; +Cc: Andrew Cooper, Keir Fraser

[-- Attachment #1: Type: text/plain, Size: 1565 bytes --]

... by monitoring writes to the mask register.

This allows reverting the main effect of the XSA-129 patches in qemu.

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/arch/x86/msi.c
+++ b/xen/arch/x86/msi.c
@@ -1308,6 +1308,39 @@ printk("%04x:%02x:%02x.%u: MSI-X %03x:%u
         return 1;
     }
 
+    entry = find_msi_entry(pdev, -1, PCI_CAP_ID_MSI);
+    if ( entry && entry->msi_attrib.maskbit )
+    {
+        uint16_t cntl;
+        uint32_t unused;
+
+        pos = entry->msi_attrib.pos;
+        if ( reg < pos || reg >= entry->msi.mpos + 8 )
+            return 0;
+printk("%04x:%02x:%02x.%u: MSI %03x:%u->%04x\n", seg, bus, slot, func, reg, size, *data);//temp
+
+        if ( reg == msi_control_reg(pos) )
+            return size == 2 ? 1 : -EACCES;
+        if ( reg < entry->msi.mpos || reg >= entry->msi.mpos + 4 || size != 4 )
+            return -EACCES;
+
+        cntl = pci_conf_read16(seg, bus, slot, func, msi_control_reg(pos));
+        unused = ~(uint32_t)0 >> (32 - multi_msi_capable(cntl));
+        for ( pos = 0; pos < entry->msi.nvec; ++pos, ++entry )
+        {
+            entry->msi_attrib.guest_masked =
+                *data >> entry->msi_attrib.entry_nr;
+            if ( entry->msi_attrib.host_masked )
+                *data |= 1 << pos;
+            unused &= ~(1 << pos);
+        }
+printk("%04x:%02x:%02x.%u: MSI -> %08x (%08x)\n", seg, bus, slot, func, *data, unused);//temp
+
+        *data |= unused;
+
+        return 1;
+    }
+
     return 0;
 }
 




[-- Attachment #2: x86-MSI-mask.patch --]
[-- Type: text/plain, Size: 1609 bytes --]

x86/MSI: properly track guest masking requests

... by monitoring writes to the mask register.

This allows reverting the main effect of the XSA-129 patches in qemu.

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/arch/x86/msi.c
+++ b/xen/arch/x86/msi.c
@@ -1308,6 +1308,39 @@ printk("%04x:%02x:%02x.%u: MSI-X %03x:%u
         return 1;
     }
 
+    entry = find_msi_entry(pdev, -1, PCI_CAP_ID_MSI);
+    if ( entry && entry->msi_attrib.maskbit )
+    {
+        uint16_t cntl;
+        uint32_t unused;
+
+        pos = entry->msi_attrib.pos;
+        if ( reg < pos || reg >= entry->msi.mpos + 8 )
+            return 0;
+printk("%04x:%02x:%02x.%u: MSI %03x:%u->%04x\n", seg, bus, slot, func, reg, size, *data);//temp
+
+        if ( reg == msi_control_reg(pos) )
+            return size == 2 ? 1 : -EACCES;
+        if ( reg < entry->msi.mpos || reg >= entry->msi.mpos + 4 || size != 4 )
+            return -EACCES;
+
+        cntl = pci_conf_read16(seg, bus, slot, func, msi_control_reg(pos));
+        unused = ~(uint32_t)0 >> (32 - multi_msi_capable(cntl));
+        for ( pos = 0; pos < entry->msi.nvec; ++pos, ++entry )
+        {
+            entry->msi_attrib.guest_masked =
+                *data >> entry->msi_attrib.entry_nr;
+            if ( entry->msi_attrib.host_masked )
+                *data |= 1 << pos;
+            unused &= ~(1 << pos);
+        }
+printk("%04x:%02x:%02x.%u: MSI -> %08x (%08x)\n", seg, bus, slot, func, *data, unused);//temp
+
+        *data |= unused;
+
+        return 1;
+    }
+
     return 0;
 }
 

[-- Attachment #3: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v4 RFC 1/6] x86/PCI: add config space write abstract intercept logic
  2015-06-22 14:46 ` [PATCH v4 RFC 1/6] x86/PCI: add config space write abstract intercept logic Jan Beulich
@ 2015-06-22 19:31   ` Konrad Rzeszutek Wilk
  2015-06-23  7:21     ` Jan Beulich
  2015-06-24 17:09   ` Andrew Cooper
  1 sibling, 1 reply; 20+ messages in thread
From: Konrad Rzeszutek Wilk @ 2015-06-22 19:31 UTC (permalink / raw)
  To: Jan Beulich; +Cc: xen-devel, Keir Fraser, Andrew Cooper

> -static bool_t pci_cfg_ok(struct domain *currd, bool_t write,
> -                         unsigned int start, unsigned int size)
> +static bool_t pci_cfg_ok(struct domain *currd, unsigned int start,
> +                         unsigned int size, uint32_t *write)
>  {
>      uint32_t machine_bdf;
>  
> @@ -1804,8 +1804,12 @@ static bool_t pci_cfg_ok(struct domain *
>              start |= CF8_ADDR_HI(currd->arch.pci_cf8);
>      }
>  
> -    return !xsm_pci_config_permission(XSM_HOOK, currd, machine_bdf,
> -                                      start, start + size - 1, write);
> +    if ( xsm_pci_config_permission(XSM_HOOK, currd, machine_bdf,
> +                                   start, start + size - 1, !!write) != 0 )
> +         return 0;
> +
> +    return !write ||
> +           pci_conf_write_intercept(0, machine_bdf, start, size, write) >= 0;

Won't the 'write' parameter cause an compiler error as it expects an pointer?

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v4 RFC 1/6] x86/PCI: add config space write abstract intercept logic
  2015-06-22 19:31   ` Konrad Rzeszutek Wilk
@ 2015-06-23  7:21     ` Jan Beulich
  2015-06-23 11:06       ` Konrad Rzeszutek Wilk
  0 siblings, 1 reply; 20+ messages in thread
From: Jan Beulich @ 2015-06-23  7:21 UTC (permalink / raw)
  To: Konrad Rzeszutek Wilk; +Cc: Andrew Cooper, Keir Fraser, xen-devel

>>> On 22.06.15 at 21:31, <konrad.wilk@oracle.com> wrote:
>> @@ -1804,8 +1804,12 @@ static bool_t pci_cfg_ok(struct domain *
>>              start |= CF8_ADDR_HI(currd->arch.pci_cf8);
>>      }
>>  
>> -    return !xsm_pci_config_permission(XSM_HOOK, currd, machine_bdf,
>> -                                      start, start + size - 1, write);
>> +    if ( xsm_pci_config_permission(XSM_HOOK, currd, machine_bdf,
>> +                                   start, start + size - 1, !!write) != 0 )
>> +         return 0;
>> +
>> +    return !write ||
>> +           pci_conf_write_intercept(0, machine_bdf, start, size, write) >= 0;
> 
> Won't the 'write' parameter cause an compiler error as it expects an 
> pointer?

No, certainly not - !write means the same as write != NULL, but is
(imo) easier to read.

Jan

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v4 RFC 1/6] x86/PCI: add config space write abstract intercept logic
  2015-06-23  7:21     ` Jan Beulich
@ 2015-06-23 11:06       ` Konrad Rzeszutek Wilk
  2015-06-23 12:55         ` Jan Beulich
  0 siblings, 1 reply; 20+ messages in thread
From: Konrad Rzeszutek Wilk @ 2015-06-23 11:06 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Andrew Cooper, Keir Fraser, xen-devel

On June 23, 2015 3:21:17 AM EDT, Jan Beulich <JBeulich@suse.com> wrote:
>>>> On 22.06.15 at 21:31, <konrad.wilk@oracle.com> wrote:
>>> @@ -1804,8 +1804,12 @@ static bool_t pci_cfg_ok(struct domain *
>>>              start |= CF8_ADDR_HI(currd->arch.pci_cf8);
>>>      }
>>>  
>>> -    return !xsm_pci_config_permission(XSM_HOOK, currd, machine_bdf,
>>> -                                      start, start + size - 1,
>write);
>>> +    if ( xsm_pci_config_permission(XSM_HOOK, currd, machine_bdf,
>>> +                                   start, start + size - 1,
>!!write) != 0 )
>>> +         return 0;
>>> +
>>> +    return !write ||
>>> +           pci_conf_write_intercept(0, machine_bdf, start, size,
>write) >= 0;
>> 
>> Won't the 'write' parameter cause an compiler error as it expects an 
>> pointer?
>
>No, certainly not - !write means the same as write != NULL, but is
>(imo) easier to read.

I meant the

 pci_conf_write_intercept(...,write).

The prototype for the last parameter is for *uint32?

>
>Jan

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v4 RFC 1/6] x86/PCI: add config space write abstract intercept logic
  2015-06-23 11:06       ` Konrad Rzeszutek Wilk
@ 2015-06-23 12:55         ` Jan Beulich
  0 siblings, 0 replies; 20+ messages in thread
From: Jan Beulich @ 2015-06-23 12:55 UTC (permalink / raw)
  To: Konrad Rzeszutek Wilk; +Cc: Andrew Cooper, Keir Fraser, xen-devel

>>> On 23.06.15 at 13:06, <konrad.wilk@oracle.com> wrote:
> On June 23, 2015 3:21:17 AM EDT, Jan Beulich <JBeulich@suse.com> wrote:
>>>>> On 22.06.15 at 21:31, <konrad.wilk@oracle.com> wrote:
>>>> @@ -1804,8 +1804,12 @@ static bool_t pci_cfg_ok(struct domain *
>>>>              start |= CF8_ADDR_HI(currd->arch.pci_cf8);
>>>>      }
>>>>  
>>>> -    return !xsm_pci_config_permission(XSM_HOOK, currd, machine_bdf,
>>>> -                                      start, start + size - 1,
>>write);
>>>> +    if ( xsm_pci_config_permission(XSM_HOOK, currd, machine_bdf,
>>>> +                                   start, start + size - 1,
>>!!write) != 0 )
>>>> +         return 0;
>>>> +
>>>> +    return !write ||
>>>> +           pci_conf_write_intercept(0, machine_bdf, start, size,
>>write) >= 0;
>>> 
>>> Won't the 'write' parameter cause an compiler error as it expects an 
>>> pointer?
>>
>>No, certainly not - !write means the same as write != NULL, but is
>>(imo) easier to read.
> 
> I meant the
> 
>  pci_conf_write_intercept(...,write).
> 
> The prototype for the last parameter is for *uint32?

But the write parameter is being changed to this very type in
this patch.

Jan

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v4 RFC 1/6] x86/PCI: add config space write abstract intercept logic
  2015-06-22 14:46 ` [PATCH v4 RFC 1/6] x86/PCI: add config space write abstract intercept logic Jan Beulich
  2015-06-22 19:31   ` Konrad Rzeszutek Wilk
@ 2015-06-24 17:09   ` Andrew Cooper
  1 sibling, 0 replies; 20+ messages in thread
From: Andrew Cooper @ 2015-06-24 17:09 UTC (permalink / raw)
  To: Jan Beulich, xen-devel; +Cc: Keir Fraser

On 22/06/15 15:46, Jan Beulich wrote:
> This is to be used by MSI code, and later to also be hooked up to
> MMCFG accesses by Dom0.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v4 RFC 2/6] x86/MSI-X: track host and guest mask‑all requests separately
  2015-06-22 14:47 ` [PATCH v4 RFC 2/6] x86/MSI-X: track host and guest mask‑all requests separately Jan Beulich
@ 2015-06-24 17:15   ` Andrew Cooper
  2015-06-25  8:01     ` Jan Beulich
  0 siblings, 1 reply; 20+ messages in thread
From: Andrew Cooper @ 2015-06-24 17:15 UTC (permalink / raw)
  To: Jan Beulich, xen-devel; +Cc: Keir Fraser

On 22/06/15 15:47, Jan Beulich wrote:
> Host uses of the bits will be added subsequently, and must not be
> overridden by guests (including Dom0, namely when acting on behalf of
> a guest).
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>
> --- a/xen/arch/x86/msi.c
> +++ b/xen/arch/x86/msi.c
> @@ -846,6 +846,12 @@ static int msix_capability_init(struct p
>  
>      if ( !msix->used_entries )
>      {
> +        msix->host_maskall = 0;
> +        if ( !msix->guest_maskall )
> +            control &= ~PCI_MSIX_FLAGS_MASKALL;
> +        else
> +            control |= PCI_MSIX_FLAGS_MASKALL;

Is the guest (or hardware) in a position to influence guest_maskall at
this point?  I am not sure that it is.

~Andrew

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v4 RFC 6/6] x86/MSI: properly track guest masking requests
  2015-06-22 14:51 ` [PATCH v4 RFC 6/6] x86/MSI: properly track guest masking requests Jan Beulich
@ 2015-06-24 17:24   ` Andrew Cooper
  2015-06-25  8:04     ` Jan Beulich
  0 siblings, 1 reply; 20+ messages in thread
From: Andrew Cooper @ 2015-06-24 17:24 UTC (permalink / raw)
  To: Jan Beulich, xen-devel; +Cc: Keir Fraser


[-- Attachment #1.1: Type: text/plain, Size: 1879 bytes --]

On 22/06/15 15:51, Jan Beulich wrote:
> ... by monitoring writes to the mask register.
>
> This allows reverting the main effect of the XSA-129 patches in qemu.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>
> --- a/xen/arch/x86/msi.c
> +++ b/xen/arch/x86/msi.c
> @@ -1308,6 +1308,39 @@ printk("%04x:%02x:%02x.%u: MSI-X %03x:%u
>          return 1;
>      }
>  
> +    entry = find_msi_entry(pdev, -1, PCI_CAP_ID_MSI);
> +    if ( entry && entry->msi_attrib.maskbit )
> +    {
> +        uint16_t cntl;
> +        uint32_t unused;
> +
> +        pos = entry->msi_attrib.pos;
> +        if ( reg < pos || reg >= entry->msi.mpos + 8 )
> +            return 0;
> +printk("%04x:%02x:%02x.%u: MSI %03x:%u->%04x\n", seg, bus, slot, func, reg, size, *data);//temp
> +
> +        if ( reg == msi_control_reg(pos) )
> +            return size == 2 ? 1 : -EACCES;
> +        if ( reg < entry->msi.mpos || reg >= entry->msi.mpos + 4 || size != 4 )
> +            return -EACCES;

Can we avoid using EACCES to avoid confusing it with a mismatched tools
version?

~Andrew

> +
> +        cntl = pci_conf_read16(seg, bus, slot, func, msi_control_reg(pos));
> +        unused = ~(uint32_t)0 >> (32 - multi_msi_capable(cntl));
> +        for ( pos = 0; pos < entry->msi.nvec; ++pos, ++entry )
> +        {
> +            entry->msi_attrib.guest_masked =
> +                *data >> entry->msi_attrib.entry_nr;
> +            if ( entry->msi_attrib.host_masked )
> +                *data |= 1 << pos;
> +            unused &= ~(1 << pos);
> +        }
> +printk("%04x:%02x:%02x.%u: MSI -> %08x (%08x)\n", seg, bus, slot, func, *data, unused);//temp
> +
> +        *data |= unused;
> +
> +        return 1;
> +    }
> +
>      return 0;
>  }
>  
>
>
>
>
>
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xen.org
> http://lists.xen.org/xen-devel


[-- Attachment #1.2: Type: text/html, Size: 2731 bytes --]

[-- Attachment #2: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v4 RFC 2/6] x86/MSI-X: track host and guest mask‑all requests separately
  2015-06-24 17:15   ` Andrew Cooper
@ 2015-06-25  8:01     ` Jan Beulich
  2015-06-25 14:25       ` Andrew Cooper
  0 siblings, 1 reply; 20+ messages in thread
From: Jan Beulich @ 2015-06-25  8:01 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: xen-devel, Keir Fraser

>>> On 24.06.15 at 19:15, <andrew.cooper3@citrix.com> wrote:
> On 22/06/15 15:47, Jan Beulich wrote:
>> Host uses of the bits will be added subsequently, and must not be
>> overridden by guests (including Dom0, namely when acting on behalf of
>> a guest).
>>
>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>>
>> --- a/xen/arch/x86/msi.c
>> +++ b/xen/arch/x86/msi.c
>> @@ -846,6 +846,12 @@ static int msix_capability_init(struct p
>>  
>>      if ( !msix->used_entries )
>>      {
>> +        msix->host_maskall = 0;
>> +        if ( !msix->guest_maskall )
>> +            control &= ~PCI_MSIX_FLAGS_MASKALL;
>> +        else
>> +            control |= PCI_MSIX_FLAGS_MASKALL;
> 
> Is the guest (or hardware) in a position to influence guest_maskall at
> this point?  I am not sure that it is.

Of course - via the cfg write intercept (i.e. the hunk immediately
following this one).

Jan

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v4 RFC 6/6] x86/MSI: properly track guest masking requests
  2015-06-24 17:24   ` Andrew Cooper
@ 2015-06-25  8:04     ` Jan Beulich
  2015-06-25 14:26       ` Andrew Cooper
  0 siblings, 1 reply; 20+ messages in thread
From: Jan Beulich @ 2015-06-25  8:04 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: xen-devel, Keir Fraser

>>> On 24.06.15 at 19:24, <andrew.cooper3@citrix.com> wrote:
> On 22/06/15 15:51, Jan Beulich wrote:
>> --- a/xen/arch/x86/msi.c
>> +++ b/xen/arch/x86/msi.c
>> @@ -1308,6 +1308,39 @@ printk("%04x:%02x:%02x.%u: MSI-X %03x:%u
>>          return 1;
>>      }
>>  
>> +    entry = find_msi_entry(pdev, -1, PCI_CAP_ID_MSI);
>> +    if ( entry && entry->msi_attrib.maskbit )
>> +    {
>> +        uint16_t cntl;
>> +        uint32_t unused;
>> +
>> +        pos = entry->msi_attrib.pos;
>> +        if ( reg < pos || reg >= entry->msi.mpos + 8 )
>> +            return 0;
>> +printk("%04x:%02x:%02x.%u: MSI %03x:%u->%04x\n", seg, bus, slot, func, reg, size, *data);//temp
>> +
>> +        if ( reg == msi_control_reg(pos) )
>> +            return size == 2 ? 1 : -EACCES;
>> +        if ( reg < entry->msi.mpos || reg >= entry->msi.mpos + 4 || size != 4 )
>> +            return -EACCES;
> 
> Can we avoid using EACCES to avoid confusing it with a mismatched tools
> version?

What other suitable error code would you see here? I'm not sure
we want this error code to be reserved for exactly one purpose,
the more that here we're on a path that will never has this error
code returned to the guest (and even less so via a domctl/sysctl,
which would be the primary mismatched-tools-version candidates).

It's also odd that you ask for this here, when patch 2 has a use
of this error code too.

Jan

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v4 RFC 2/6] x86/MSI-X: track host and guest mask‑all requests separately
  2015-06-25  8:01     ` Jan Beulich
@ 2015-06-25 14:25       ` Andrew Cooper
  0 siblings, 0 replies; 20+ messages in thread
From: Andrew Cooper @ 2015-06-25 14:25 UTC (permalink / raw)
  To: Jan Beulich; +Cc: xen-devel, Keir Fraser

On 25/06/15 09:01, Jan Beulich wrote:
>>>> On 24.06.15 at 19:15, <andrew.cooper3@citrix.com> wrote:
>> On 22/06/15 15:47, Jan Beulich wrote:
>>> Host uses of the bits will be added subsequently, and must not be
>>> overridden by guests (including Dom0, namely when acting on behalf of
>>> a guest).
>>>
>>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>>>
>>> --- a/xen/arch/x86/msi.c
>>> +++ b/xen/arch/x86/msi.c
>>> @@ -846,6 +846,12 @@ static int msix_capability_init(struct p
>>>  
>>>      if ( !msix->used_entries )
>>>      {
>>> +        msix->host_maskall = 0;
>>> +        if ( !msix->guest_maskall )
>>> +            control &= ~PCI_MSIX_FLAGS_MASKALL;
>>> +        else
>>> +            control |= PCI_MSIX_FLAGS_MASKALL;
>> Is the guest (or hardware) in a position to influence guest_maskall at
>> this point?  I am not sure that it is.
> Of course - via the cfg write intercept (i.e. the hunk immediately
> following this one).

So it is.  I had got the call chronology confused.

Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>  (although
perhaps better to wait until we understand Sanders MSI masking problem).

~Andrew

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v4 RFC 6/6] x86/MSI: properly track guest masking requests
  2015-06-25  8:04     ` Jan Beulich
@ 2015-06-25 14:26       ` Andrew Cooper
  2015-06-25 14:49         ` Jan Beulich
  0 siblings, 1 reply; 20+ messages in thread
From: Andrew Cooper @ 2015-06-25 14:26 UTC (permalink / raw)
  To: Jan Beulich; +Cc: xen-devel, Keir Fraser

On 25/06/15 09:04, Jan Beulich wrote:
>>>> On 24.06.15 at 19:24, <andrew.cooper3@citrix.com> wrote:
>> On 22/06/15 15:51, Jan Beulich wrote:
>>> --- a/xen/arch/x86/msi.c
>>> +++ b/xen/arch/x86/msi.c
>>> @@ -1308,6 +1308,39 @@ printk("%04x:%02x:%02x.%u: MSI-X %03x:%u
>>>          return 1;
>>>      }
>>>  
>>> +    entry = find_msi_entry(pdev, -1, PCI_CAP_ID_MSI);
>>> +    if ( entry && entry->msi_attrib.maskbit )
>>> +    {
>>> +        uint16_t cntl;
>>> +        uint32_t unused;
>>> +
>>> +        pos = entry->msi_attrib.pos;
>>> +        if ( reg < pos || reg >= entry->msi.mpos + 8 )
>>> +            return 0;
>>> +printk("%04x:%02x:%02x.%u: MSI %03x:%u->%04x\n", seg, bus, slot, func, reg, size, *data);//temp
>>> +
>>> +        if ( reg == msi_control_reg(pos) )
>>> +            return size == 2 ? 1 : -EACCES;
>>> +        if ( reg < entry->msi.mpos || reg >= entry->msi.mpos + 4 || size != 4 )
>>> +            return -EACCES;
>> Can we avoid using EACCES to avoid confusing it with a mismatched tools
>> version?
> What other suitable error code would you see here? I'm not sure
> we want this error code to be reserved for exactly one purpose,
> the more that here we're on a path that will never has this error
> code returned to the guest (and even less so via a domctl/sysctl,
> which would be the primary mismatched-tools-version candidates).

If there is no chance that we will end up back on a domctl/sysctl error
path then its use is fine.

~Andrew

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v4 RFC 6/6] x86/MSI: properly track guest masking requests
  2015-06-25 14:26       ` Andrew Cooper
@ 2015-06-25 14:49         ` Jan Beulich
  0 siblings, 0 replies; 20+ messages in thread
From: Jan Beulich @ 2015-06-25 14:49 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: xen-devel, Keir Fraser

>>> On 25.06.15 at 16:26, <andrew.cooper3@citrix.com> wrote:
> On 25/06/15 09:04, Jan Beulich wrote:
>>>>> On 24.06.15 at 19:24, <andrew.cooper3@citrix.com> wrote:
>>> On 22/06/15 15:51, Jan Beulich wrote:
>>>> --- a/xen/arch/x86/msi.c
>>>> +++ b/xen/arch/x86/msi.c
>>>> @@ -1308,6 +1308,39 @@ printk("%04x:%02x:%02x.%u: MSI-X %03x:%u
>>>>          return 1;
>>>>      }
>>>>  
>>>> +    entry = find_msi_entry(pdev, -1, PCI_CAP_ID_MSI);
>>>> +    if ( entry && entry->msi_attrib.maskbit )
>>>> +    {
>>>> +        uint16_t cntl;
>>>> +        uint32_t unused;
>>>> +
>>>> +        pos = entry->msi_attrib.pos;
>>>> +        if ( reg < pos || reg >= entry->msi.mpos + 8 )
>>>> +            return 0;
>>>> +printk("%04x:%02x:%02x.%u: MSI %03x:%u->%04x\n", seg, bus, slot, func, reg, 
> size, *data);//temp
>>>> +
>>>> +        if ( reg == msi_control_reg(pos) )
>>>> +            return size == 2 ? 1 : -EACCES;
>>>> +        if ( reg < entry->msi.mpos || reg >= entry->msi.mpos + 4 || size != 4 )
>>>> +            return -EACCES;
>>> Can we avoid using EACCES to avoid confusing it with a mismatched tools
>>> version?
>> What other suitable error code would you see here? I'm not sure
>> we want this error code to be reserved for exactly one purpose,
>> the more that here we're on a path that will never has this error
>> code returned to the guest (and even less so via a domctl/sysctl,
>> which would be the primary mismatched-tools-version candidates).
> 
> If there is no chance that we will end up back on a domctl/sysctl error
> path then its use is fine.

I don't see how it could get on such a patch - it's an intercept of
a guest (Dom0) operation. If we were to introduce hypercall based
PCI config space writes, then that would be a physdev op imo.

Jan

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v4 RFC 0/6] x86/MSI: XSA-120, 126, 128-131 follow-up
  2015-06-22 14:38 [PATCH v4 RFC 0/6] x86/MSI: XSA-120, 126, 128-131 follow-up Jan Beulich
                   ` (5 preceding siblings ...)
  2015-06-22 14:51 ` [PATCH v4 RFC 6/6] x86/MSI: properly track guest masking requests Jan Beulich
@ 2015-07-13 11:42 ` Jan Beulich
  6 siblings, 0 replies; 20+ messages in thread
From: Jan Beulich @ 2015-07-13 11:42 UTC (permalink / raw)
  To: xen-devel; +Cc: Andrew Cooper, Keir Fraser, Wei Liu

[-- Attachment #1: Type: text/plain, Size: 13594 bytes --]

>>> On 22.06.15 at 16:38, <JBeulich@suse.com> wrote:
> Only patches 1, 2, and 6 are really RFC (with some debugging code still
> left in), the others (hence v4) have been submitted before. 
> 
> 1: PCI: add config space write abstract intercept logic
> 2: MSI-X: track host and guest mask-all requests separately
> 3: MSI-X: be more careful during teardown
> 4: MSI-X: access MSI-X table only after having enabled MSI-X
> 5: MSI-X: reduce fiddling with control register during restore
> 6: MSI: properly track guest masking requests
> 
> A fundamental question is whether to go without the so far missing
> (and more involved) MMCFG intercepts. This largely depends
> whether there are any half way recent Dom0 OSes using MMCFG
> accesses for the base 256 bytes of PCI config space.

Below/attached a 7th patch dealing with MMCFG accesses for
devices we know about _before_ Dom0 setting up its mapping
of MMCFG space. As noted at its top, this means it still won't
work for a number of cases (not mentioned there is the case of
Dom0 re-organizing bus numbering in order to e.g. fit in SR-IOV
virtual functions).

There are two approaches I can see to deal with these cases,
neither of which I like particularly:

1) Globally intercept all MMCFG writes (i.e. independent of
whether there's anything we actually need to see, like the MSI/
MSI-X mask bits that are of interest here). The part that makes
me dislike this model is the increased involvement of the x86
instruction emulator plus the increased chances of us needing to
deal with other than just plain stores.

2) Track Dom0 mappings of MMCFG space: We'd need to set up
(if not covered by the frame table) struct page_info for all
involved pages, and either overload it to store two or three
pointers back to L1 page table slots where the respective page
is being mapped, or (if two or three mappings per page don't
suffice) attach a linked list of such pointers. I think it goes
without saying that the complexity of getting this overload
right (i.e. namely to not conflict with any existing uses) as well
as the need to not lose track of any mappings are the reasons
I'm not really keen on this one.

Models I considered and found not suitable (and can recall right
now):

3) Hunt down Dom0 mappings at the time we learn we need to
intercept writes for a particular device. The overhead of having
to scan all Dom0 page tables seems prohibitive of such a solution.

4) Initially disallow writes to MMCFG pages globally, granting
write permission upon first write access when we can determine
that the device doesn't need any "snooping". This wouldn't cope
with hotplug (and alike for SR-IOV VFs), as we might end up
needing to revoke write access.

5) Mandating Dom0 to use hypervisor managed mappings of
MMCFG space. For one this would require all Dom0 kernels to be
changed. And then it wouldn't be usable for 32-bit Dom0, as
there's no VA space left to place such a mapping, and even if we
recycled some of the compat M2P space the range wouldn't be
large enough to just cover a single segment's space, not to
speak of multiple segments. And introducing a windowing model
would make Dom0 changes even more complex (for little benefit,
considering that generally we'd expect most people to use 64-bit
kernels these days).

6) Mandating at least config space writes to be done via hypercall.
This again would require all Dom0 kernels to change.

Hence - if anyone has any better idea, please speak up soon. And
of course also if anyone has a particular preference between the
two presented viable options.

Jan

TODO: currently dependent upon IOMMU being enabled (due to bus scan only happening in that case)
TODO: SR-IOV (and alike?) devices not visible at time of MMCFG registration
TODO: remove //temp-s

--- unstable.orig/xen/arch/x86/mm.c
+++ unstable/xen/arch/x86/mm.c
@@ -5208,6 +5208,7 @@ int ptwr_do_page_fault(struct vcpu *v, u
 
     /* We are looking only for read-only mappings of p.t. pages. */
     if ( ((l1e_get_flags(pte) & (_PAGE_PRESENT|_PAGE_RW)) != _PAGE_PRESENT) ||
+         rangeset_contains_singleton(mmio_ro_ranges, l1e_get_pfn(pte)) ||
          !get_page_from_pagenr(l1e_get_pfn(pte), d) )
         goto bail;
 
@@ -5255,6 +5256,7 @@ int ptwr_do_page_fault(struct vcpu *v, u
 struct mmio_ro_emulate_ctxt {
     struct x86_emulate_ctxt ctxt;
     unsigned long cr2;
+    unsigned int seg, bdf;
 };
 
 static int mmio_ro_emulated_read(
@@ -5294,6 +5296,50 @@ static const struct x86_emulate_ops mmio
     .write      = mmio_ro_emulated_write,
 };
 
+static int mmio_intercept_write(
+    enum x86_segment seg,
+    unsigned long offset,
+    void *p_data,
+    unsigned int bytes,
+    struct x86_emulate_ctxt *ctxt)
+{
+    struct mmio_ro_emulate_ctxt *mmio_ctxt =
+        container_of(ctxt, struct mmio_ro_emulate_ctxt, ctxt);
+
+    /*
+     * Only allow naturally-aligned stores no wider than 4 bytes to the
+     * original %cr2 address.
+     */
+    if ( ((bytes | offset) & (bytes - 1)) || bytes > 4 ||
+         offset != mmio_ctxt->cr2 )
+    {
+        MEM_LOG("mmio_intercept: bad write (cr2=%lx, addr=%lx, bytes=%u)",
+                mmio_ctxt->cr2, offset, bytes);
+        return X86EMUL_UNHANDLEABLE;
+    }
+
+    offset &= 0xfff;
+printk("%04x:%02x:%02x.%u write [%lx] %0*x\n",//temp
+       mmio_ctxt->seg, PCI_BUS(mmio_ctxt->bdf), PCI_SLOT(mmio_ctxt->bdf), PCI_FUNC(mmio_ctxt->bdf),//temp
+       offset, bytes * 2, *(uint32_t*)p_data);//temp
+    pci_conf_write_intercept(mmio_ctxt->seg, mmio_ctxt->bdf, offset, bytes,
+                             p_data);
+printk("%04x:%02x:%02x.%u write [%lx]=%0*x\n",//temp
+       mmio_ctxt->seg, PCI_BUS(mmio_ctxt->bdf), PCI_SLOT(mmio_ctxt->bdf), PCI_FUNC(mmio_ctxt->bdf),//temp
+       offset, bytes * 2, *(uint32_t*)p_data);//temp
+    pci_mmcfg_write(mmio_ctxt->seg, PCI_BUS(mmio_ctxt->bdf),
+                    PCI_DEVFN2(mmio_ctxt->bdf), offset, bytes,
+                    *(uint32_t *)p_data);
+
+    return X86EMUL_OKAY;
+}
+
+static const struct x86_emulate_ops mmio_intercept_ops = {
+    .read       = mmio_ro_emulated_read,
+    .insn_fetch = ptwr_emulated_read,
+    .write      = mmio_intercept_write,
+};
+
 /* Check if guest is trying to modify a r/o MMIO page. */
 int mmio_ro_do_page_fault(struct vcpu *v, unsigned long addr,
                           struct cpu_user_regs *regs)
@@ -5308,6 +5354,7 @@ int mmio_ro_do_page_fault(struct vcpu *v
         .ctxt.swint_emulate = x86_swint_emulate_none,
         .cr2 = addr
     };
+    const unsigned long *map;
     int rc;
 
     /* Attempt to read the PTE that maps the VA being accessed. */
@@ -5332,7 +5379,14 @@ int mmio_ro_do_page_fault(struct vcpu *v
     if ( !rangeset_contains_singleton(mmio_ro_ranges, mfn) )
         return 0;
 
-    rc = x86_emulate(&mmio_ro_ctxt.ctxt, &mmio_ro_emulate_ops);
+    if ( pci_mmcfg_decode(mfn, &mmio_ro_ctxt.seg, &mmio_ro_ctxt.bdf) &&
+         (map = pci_get_intercept_map(mmio_ro_ctxt.seg)) != NULL &&
+         test_bit(mmio_ro_ctxt.bdf, map) &&
+         ((map = pci_get_ro_map(mmio_ro_ctxt.seg)) == NULL ||
+          !test_bit(mmio_ro_ctxt.bdf, map)) )
+        rc = x86_emulate(&mmio_ro_ctxt.ctxt, &mmio_intercept_ops);
+    else
+        rc = x86_emulate(&mmio_ro_ctxt.ctxt, &mmio_ro_emulate_ops);
 
     return rc != X86EMUL_UNHANDLEABLE ? EXCRET_fault_fixed : 0;
 }
--- unstable.orig/xen/arch/x86/pci.c
+++ unstable/xen/arch/x86/pci.c
@@ -75,6 +75,15 @@ int pci_conf_write_intercept(unsigned in
     struct pci_dev *pdev;
     int rc = 0;
 
+    if ( !data ) /* probe */
+    {
+         ASSERT(!~reg && !size);
+         return pci_find_cap_offset(seg, PCI_BUS(bdf), PCI_SLOT(bdf),
+                                    PCI_FUNC(bdf), PCI_CAP_ID_MSI) ||
+                pci_find_cap_offset(seg, PCI_BUS(bdf), PCI_SLOT(bdf),
+                                    PCI_FUNC(bdf), PCI_CAP_ID_MSIX);
+    }
+
     /*
      * Avoid expensive operations when no hook is going to do anything
      * for the access anyway.
--- unstable.orig/xen/arch/x86/x86_64/mmconfig_64.c
+++ unstable/xen/arch/x86/x86_64/mmconfig_64.c
@@ -154,10 +154,25 @@ void arch_pci_ro_device(int seg, int bdf
     }
 }
 
+static void set_ro(const struct acpi_mcfg_allocation *cfg,
+                   const unsigned long *map)
+{
+    unsigned int bdf = PCI_BDF(cfg->start_bus_number, 0, 0);
+    unsigned int end = PCI_BDF(cfg->end_bus_number, -1, -1);
+
+    if (!map)
+        return;
+
+    while ((bdf = find_next_bit(map, end + 1, bdf)) <= end) {
+        arch_pci_ro_device(cfg->pci_segment, bdf);
+        if (bdf++ == end)
+            break;
+    }
+}
+
 int pci_mmcfg_arch_enable(unsigned int idx)
 {
     const typeof(pci_mmcfg_config[0]) *cfg = pci_mmcfg_virt[idx].cfg;
-    const unsigned long *ro_map = pci_get_ro_map(cfg->pci_segment);
 
     if (pci_mmcfg_virt[idx].virt)
         return 0;
@@ -169,16 +184,10 @@ int pci_mmcfg_arch_enable(unsigned int i
     }
     printk(KERN_INFO "PCI: Using MCFG for segment %04x bus %02x-%02x\n",
            cfg->pci_segment, cfg->start_bus_number, cfg->end_bus_number);
-    if (ro_map) {
-        unsigned int bdf = PCI_BDF(cfg->start_bus_number, 0, 0);
-        unsigned int end = PCI_BDF(cfg->end_bus_number, -1, -1);
-
-        while ((bdf = find_next_bit(ro_map, end + 1, bdf)) <= end) {
-            arch_pci_ro_device(cfg->pci_segment, bdf);
-            if (bdf++ == end)
-                break;
-        }
-    }
+
+    set_ro(cfg, pci_get_ro_map(cfg->pci_segment));
+    set_ro(cfg, pci_get_intercept_map(cfg->pci_segment));
+
     return 0;
 }
 
@@ -197,6 +206,32 @@ void pci_mmcfg_arch_disable(unsigned int
            cfg->pci_segment, cfg->start_bus_number, cfg->end_bus_number);
 }
 
+bool_t pci_mmcfg_decode(unsigned long mfn, unsigned int *seg,
+                        unsigned int *bdf)
+{
+    unsigned int idx;
+
+    for (idx = 0; idx < pci_mmcfg_config_num; ++idx) {
+        const struct acpi_mcfg_allocation *cfg = pci_mmcfg_virt[idx].cfg;
+
+        if (pci_mmcfg_virt[idx].virt &&
+            mfn >= PFN_DOWN(cfg->address + (cfg->start_bus_number << 20)) &&
+            mfn < PFN_DOWN(cfg->address + ((cfg->end_bus_number + 1) << 20))) {
+static unsigned long cnt, thr;//temp
+            *seg = cfg->pci_segment;
+            *bdf = mfn - PFN_DOWN(cfg->address);
+if(++cnt > thr) {//temp
+ thr |= cnt;
+ printk("%lx -> %04x:%02x:%02x.%u\n", mfn, *seg, PCI_BUS(*bdf), PCI_SLOT(*bdf), PCI_FUNC(*bdf));
+}
+            return 1;
+        }
+    }
+
+printk("%lx -> ???\n", mfn);//temp
+    return 0;
+}
+
 int __init pci_mmcfg_arch_init(void)
 {
     int i;
--- unstable.orig/xen/drivers/passthrough/pci.c
+++ unstable/xen/drivers/passthrough/pci.c
@@ -39,6 +39,7 @@ struct pci_seg {
     struct list_head alldevs_list;
     u16 nr;
     unsigned long *ro_map;
+    unsigned long *intercept_map;
     /* bus2bridge_lock protects bus2bridge array */
     spinlock_t bus2bridge_lock;
 #define MAX_BUSES 256
@@ -123,6 +124,13 @@ const unsigned long *pci_get_ro_map(u16 
     return pseg ? pseg->ro_map : NULL;
 }
 
+const unsigned long *pci_get_intercept_map(u16 seg)
+{
+    struct pci_seg *pseg = get_pseg(seg);
+
+    return pseg ? pseg->intercept_map : NULL;
+}
+
 static struct phantom_dev {
     u16 seg;
     u8 bus, slot, stride;
@@ -284,6 +292,20 @@ static struct pci_dev *alloc_pdev(struct
     pdev->domain = NULL;
     INIT_LIST_HEAD(&pdev->msi_list);
 
+    if ( !pseg->intercept_map &&
+         pci_conf_write_intercept(pseg->nr, PCI_BDF2(bus, devfn), ~0, 0,
+                                  NULL) > 0 )
+    {
+        size_t sz = BITS_TO_LONGS(PCI_BDF(-1, -1, -1) + 1) * sizeof(long);
+
+        pseg->intercept_map = vzalloc(sz);
+        if ( !pseg->intercept_map )
+        {
+            xfree(pdev);
+            return NULL;
+        }
+    }
+
     if ( pci_find_cap_offset(pseg->nr, bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
                              PCI_CAP_ID_MSIX) )
     {
@@ -366,6 +388,13 @@ static struct pci_dev *alloc_pdev(struct
 
     check_pdev(pdev);
 
+    if ( pci_conf_write_intercept(pseg->nr, PCI_BDF2(bus, devfn), ~0, 0,
+                                  NULL) > 0 )
+    {
+        set_bit(PCI_BDF2(bus, devfn), pseg->intercept_map);
+        arch_pci_ro_device(pseg->nr, PCI_BDF2(bus, devfn));
+    }
+
     return pdev;
 }
 
--- unstable.orig/xen/include/asm-x86/pci.h
+++ unstable/xen/include/asm-x86/pci.h
@@ -20,5 +20,7 @@ int pci_conf_write_intercept(unsigned in
                              uint32_t *data);
 int pci_msi_conf_write_intercept(struct pci_dev *, unsigned int reg,
                                  unsigned int size, uint32_t *data);
+bool_t pci_mmcfg_decode(unsigned long mfn, unsigned int *seg,
+                        unsigned int *bdf);
 
 #endif /* __X86_PCI_H__ */
--- unstable.orig/xen/include/xen/pci.h
+++ unstable/xen/include/xen/pci.h
@@ -106,6 +106,7 @@ void setup_hwdom_pci_devices(struct doma
 int pci_release_devices(struct domain *d);
 int pci_add_segment(u16 seg);
 const unsigned long *pci_get_ro_map(u16 seg);
+const unsigned long *pci_get_intercept_map(u16 seg);
 int pci_add_device(u16 seg, u8 bus, u8 devfn,
                    const struct pci_dev_info *, nodeid_t node);
 int pci_remove_device(u16 seg, u8 bus, u8 devfn);


[-- Attachment #2: x86-PCI-MMCFG-intercept.patch --]
[-- Type: text/plain, Size: 9945 bytes --]


TODO: currently dependent upon IOMMU being enabled (due to bus scan only happening in that case)
TODO: SR-IOV (and alike?) devices not visible at time of MMCFG registration
TODO: remove //temp-s

--- unstable.orig/xen/arch/x86/mm.c
+++ unstable/xen/arch/x86/mm.c
@@ -5208,6 +5208,7 @@ int ptwr_do_page_fault(struct vcpu *v, u
 
     /* We are looking only for read-only mappings of p.t. pages. */
     if ( ((l1e_get_flags(pte) & (_PAGE_PRESENT|_PAGE_RW)) != _PAGE_PRESENT) ||
+         rangeset_contains_singleton(mmio_ro_ranges, l1e_get_pfn(pte)) ||
          !get_page_from_pagenr(l1e_get_pfn(pte), d) )
         goto bail;
 
@@ -5255,6 +5256,7 @@ int ptwr_do_page_fault(struct vcpu *v, u
 struct mmio_ro_emulate_ctxt {
     struct x86_emulate_ctxt ctxt;
     unsigned long cr2;
+    unsigned int seg, bdf;
 };
 
 static int mmio_ro_emulated_read(
@@ -5294,6 +5296,50 @@ static const struct x86_emulate_ops mmio
     .write      = mmio_ro_emulated_write,
 };
 
+static int mmio_intercept_write(
+    enum x86_segment seg,
+    unsigned long offset,
+    void *p_data,
+    unsigned int bytes,
+    struct x86_emulate_ctxt *ctxt)
+{
+    struct mmio_ro_emulate_ctxt *mmio_ctxt =
+        container_of(ctxt, struct mmio_ro_emulate_ctxt, ctxt);
+
+    /*
+     * Only allow naturally-aligned stores no wider than 4 bytes to the
+     * original %cr2 address.
+     */
+    if ( ((bytes | offset) & (bytes - 1)) || bytes > 4 ||
+         offset != mmio_ctxt->cr2 )
+    {
+        MEM_LOG("mmio_intercept: bad write (cr2=%lx, addr=%lx, bytes=%u)",
+                mmio_ctxt->cr2, offset, bytes);
+        return X86EMUL_UNHANDLEABLE;
+    }
+
+    offset &= 0xfff;
+printk("%04x:%02x:%02x.%u write [%lx] %0*x\n",//temp
+       mmio_ctxt->seg, PCI_BUS(mmio_ctxt->bdf), PCI_SLOT(mmio_ctxt->bdf), PCI_FUNC(mmio_ctxt->bdf),//temp
+       offset, bytes * 2, *(uint32_t*)p_data);//temp
+    pci_conf_write_intercept(mmio_ctxt->seg, mmio_ctxt->bdf, offset, bytes,
+                             p_data);
+printk("%04x:%02x:%02x.%u write [%lx]=%0*x\n",//temp
+       mmio_ctxt->seg, PCI_BUS(mmio_ctxt->bdf), PCI_SLOT(mmio_ctxt->bdf), PCI_FUNC(mmio_ctxt->bdf),//temp
+       offset, bytes * 2, *(uint32_t*)p_data);//temp
+    pci_mmcfg_write(mmio_ctxt->seg, PCI_BUS(mmio_ctxt->bdf),
+                    PCI_DEVFN2(mmio_ctxt->bdf), offset, bytes,
+                    *(uint32_t *)p_data);
+
+    return X86EMUL_OKAY;
+}
+
+static const struct x86_emulate_ops mmio_intercept_ops = {
+    .read       = mmio_ro_emulated_read,
+    .insn_fetch = ptwr_emulated_read,
+    .write      = mmio_intercept_write,
+};
+
 /* Check if guest is trying to modify a r/o MMIO page. */
 int mmio_ro_do_page_fault(struct vcpu *v, unsigned long addr,
                           struct cpu_user_regs *regs)
@@ -5308,6 +5354,7 @@ int mmio_ro_do_page_fault(struct vcpu *v
         .ctxt.swint_emulate = x86_swint_emulate_none,
         .cr2 = addr
     };
+    const unsigned long *map;
     int rc;
 
     /* Attempt to read the PTE that maps the VA being accessed. */
@@ -5332,7 +5379,14 @@ int mmio_ro_do_page_fault(struct vcpu *v
     if ( !rangeset_contains_singleton(mmio_ro_ranges, mfn) )
         return 0;
 
-    rc = x86_emulate(&mmio_ro_ctxt.ctxt, &mmio_ro_emulate_ops);
+    if ( pci_mmcfg_decode(mfn, &mmio_ro_ctxt.seg, &mmio_ro_ctxt.bdf) &&
+         (map = pci_get_intercept_map(mmio_ro_ctxt.seg)) != NULL &&
+         test_bit(mmio_ro_ctxt.bdf, map) &&
+         ((map = pci_get_ro_map(mmio_ro_ctxt.seg)) == NULL ||
+          !test_bit(mmio_ro_ctxt.bdf, map)) )
+        rc = x86_emulate(&mmio_ro_ctxt.ctxt, &mmio_intercept_ops);
+    else
+        rc = x86_emulate(&mmio_ro_ctxt.ctxt, &mmio_ro_emulate_ops);
 
     return rc != X86EMUL_UNHANDLEABLE ? EXCRET_fault_fixed : 0;
 }
--- unstable.orig/xen/arch/x86/pci.c
+++ unstable/xen/arch/x86/pci.c
@@ -75,6 +75,15 @@ int pci_conf_write_intercept(unsigned in
     struct pci_dev *pdev;
     int rc = 0;
 
+    if ( !data ) /* probe */
+    {
+         ASSERT(!~reg && !size);
+         return pci_find_cap_offset(seg, PCI_BUS(bdf), PCI_SLOT(bdf),
+                                    PCI_FUNC(bdf), PCI_CAP_ID_MSI) ||
+                pci_find_cap_offset(seg, PCI_BUS(bdf), PCI_SLOT(bdf),
+                                    PCI_FUNC(bdf), PCI_CAP_ID_MSIX);
+    }
+
     /*
      * Avoid expensive operations when no hook is going to do anything
      * for the access anyway.
--- unstable.orig/xen/arch/x86/x86_64/mmconfig_64.c
+++ unstable/xen/arch/x86/x86_64/mmconfig_64.c
@@ -154,10 +154,25 @@ void arch_pci_ro_device(int seg, int bdf
     }
 }
 
+static void set_ro(const struct acpi_mcfg_allocation *cfg,
+                   const unsigned long *map)
+{
+    unsigned int bdf = PCI_BDF(cfg->start_bus_number, 0, 0);
+    unsigned int end = PCI_BDF(cfg->end_bus_number, -1, -1);
+
+    if (!map)
+        return;
+
+    while ((bdf = find_next_bit(map, end + 1, bdf)) <= end) {
+        arch_pci_ro_device(cfg->pci_segment, bdf);
+        if (bdf++ == end)
+            break;
+    }
+}
+
 int pci_mmcfg_arch_enable(unsigned int idx)
 {
     const typeof(pci_mmcfg_config[0]) *cfg = pci_mmcfg_virt[idx].cfg;
-    const unsigned long *ro_map = pci_get_ro_map(cfg->pci_segment);
 
     if (pci_mmcfg_virt[idx].virt)
         return 0;
@@ -169,16 +184,10 @@ int pci_mmcfg_arch_enable(unsigned int i
     }
     printk(KERN_INFO "PCI: Using MCFG for segment %04x bus %02x-%02x\n",
            cfg->pci_segment, cfg->start_bus_number, cfg->end_bus_number);
-    if (ro_map) {
-        unsigned int bdf = PCI_BDF(cfg->start_bus_number, 0, 0);
-        unsigned int end = PCI_BDF(cfg->end_bus_number, -1, -1);
-
-        while ((bdf = find_next_bit(ro_map, end + 1, bdf)) <= end) {
-            arch_pci_ro_device(cfg->pci_segment, bdf);
-            if (bdf++ == end)
-                break;
-        }
-    }
+
+    set_ro(cfg, pci_get_ro_map(cfg->pci_segment));
+    set_ro(cfg, pci_get_intercept_map(cfg->pci_segment));
+
     return 0;
 }
 
@@ -197,6 +206,32 @@ void pci_mmcfg_arch_disable(unsigned int
            cfg->pci_segment, cfg->start_bus_number, cfg->end_bus_number);
 }
 
+bool_t pci_mmcfg_decode(unsigned long mfn, unsigned int *seg,
+                        unsigned int *bdf)
+{
+    unsigned int idx;
+
+    for (idx = 0; idx < pci_mmcfg_config_num; ++idx) {
+        const struct acpi_mcfg_allocation *cfg = pci_mmcfg_virt[idx].cfg;
+
+        if (pci_mmcfg_virt[idx].virt &&
+            mfn >= PFN_DOWN(cfg->address + (cfg->start_bus_number << 20)) &&
+            mfn < PFN_DOWN(cfg->address + ((cfg->end_bus_number + 1) << 20))) {
+static unsigned long cnt, thr;//temp
+            *seg = cfg->pci_segment;
+            *bdf = mfn - PFN_DOWN(cfg->address);
+if(++cnt > thr) {//temp
+ thr |= cnt;
+ printk("%lx -> %04x:%02x:%02x.%u\n", mfn, *seg, PCI_BUS(*bdf), PCI_SLOT(*bdf), PCI_FUNC(*bdf));
+}
+            return 1;
+        }
+    }
+
+printk("%lx -> ???\n", mfn);//temp
+    return 0;
+}
+
 int __init pci_mmcfg_arch_init(void)
 {
     int i;
--- unstable.orig/xen/drivers/passthrough/pci.c
+++ unstable/xen/drivers/passthrough/pci.c
@@ -39,6 +39,7 @@ struct pci_seg {
     struct list_head alldevs_list;
     u16 nr;
     unsigned long *ro_map;
+    unsigned long *intercept_map;
     /* bus2bridge_lock protects bus2bridge array */
     spinlock_t bus2bridge_lock;
 #define MAX_BUSES 256
@@ -123,6 +124,13 @@ const unsigned long *pci_get_ro_map(u16 
     return pseg ? pseg->ro_map : NULL;
 }
 
+const unsigned long *pci_get_intercept_map(u16 seg)
+{
+    struct pci_seg *pseg = get_pseg(seg);
+
+    return pseg ? pseg->intercept_map : NULL;
+}
+
 static struct phantom_dev {
     u16 seg;
     u8 bus, slot, stride;
@@ -284,6 +292,20 @@ static struct pci_dev *alloc_pdev(struct
     pdev->domain = NULL;
     INIT_LIST_HEAD(&pdev->msi_list);
 
+    if ( !pseg->intercept_map &&
+         pci_conf_write_intercept(pseg->nr, PCI_BDF2(bus, devfn), ~0, 0,
+                                  NULL) > 0 )
+    {
+        size_t sz = BITS_TO_LONGS(PCI_BDF(-1, -1, -1) + 1) * sizeof(long);
+
+        pseg->intercept_map = vzalloc(sz);
+        if ( !pseg->intercept_map )
+        {
+            xfree(pdev);
+            return NULL;
+        }
+    }
+
     if ( pci_find_cap_offset(pseg->nr, bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
                              PCI_CAP_ID_MSIX) )
     {
@@ -366,6 +388,13 @@ static struct pci_dev *alloc_pdev(struct
 
     check_pdev(pdev);
 
+    if ( pci_conf_write_intercept(pseg->nr, PCI_BDF2(bus, devfn), ~0, 0,
+                                  NULL) > 0 )
+    {
+        set_bit(PCI_BDF2(bus, devfn), pseg->intercept_map);
+        arch_pci_ro_device(pseg->nr, PCI_BDF2(bus, devfn));
+    }
+
     return pdev;
 }
 
--- unstable.orig/xen/include/asm-x86/pci.h
+++ unstable/xen/include/asm-x86/pci.h
@@ -20,5 +20,7 @@ int pci_conf_write_intercept(unsigned in
                              uint32_t *data);
 int pci_msi_conf_write_intercept(struct pci_dev *, unsigned int reg,
                                  unsigned int size, uint32_t *data);
+bool_t pci_mmcfg_decode(unsigned long mfn, unsigned int *seg,
+                        unsigned int *bdf);
 
 #endif /* __X86_PCI_H__ */
--- unstable.orig/xen/include/xen/pci.h
+++ unstable/xen/include/xen/pci.h
@@ -106,6 +106,7 @@ void setup_hwdom_pci_devices(struct doma
 int pci_release_devices(struct domain *d);
 int pci_add_segment(u16 seg);
 const unsigned long *pci_get_ro_map(u16 seg);
+const unsigned long *pci_get_intercept_map(u16 seg);
 int pci_add_device(u16 seg, u8 bus, u8 devfn,
                    const struct pci_dev_info *, nodeid_t node);
 int pci_remove_device(u16 seg, u8 bus, u8 devfn);

[-- Attachment #3: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 20+ messages in thread

end of thread, other threads:[~2015-07-13 11:42 UTC | newest]

Thread overview: 20+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-06-22 14:38 [PATCH v4 RFC 0/6] x86/MSI: XSA-120, 126, 128-131 follow-up Jan Beulich
2015-06-22 14:46 ` [PATCH v4 RFC 1/6] x86/PCI: add config space write abstract intercept logic Jan Beulich
2015-06-22 19:31   ` Konrad Rzeszutek Wilk
2015-06-23  7:21     ` Jan Beulich
2015-06-23 11:06       ` Konrad Rzeszutek Wilk
2015-06-23 12:55         ` Jan Beulich
2015-06-24 17:09   ` Andrew Cooper
2015-06-22 14:47 ` [PATCH v4 RFC 2/6] x86/MSI-X: track host and guest mask‑all requests separately Jan Beulich
2015-06-24 17:15   ` Andrew Cooper
2015-06-25  8:01     ` Jan Beulich
2015-06-25 14:25       ` Andrew Cooper
2015-06-22 14:49 ` [PATCH v4 3/6] x86/MSI-X: be more careful during teardown Jan Beulich
2015-06-22 14:50 ` [PATCH v4 4/6] x86/MSI-X: access MSI-X table only after having enabled MSI-X Jan Beulich
2015-06-22 14:51 ` [PATCH v4 RFC 5/6] x86/MSI-X: reduce fiddling with control register during restore Jan Beulich
2015-06-22 14:51 ` [PATCH v4 RFC 6/6] x86/MSI: properly track guest masking requests Jan Beulich
2015-06-24 17:24   ` Andrew Cooper
2015-06-25  8:04     ` Jan Beulich
2015-06-25 14:26       ` Andrew Cooper
2015-06-25 14:49         ` Jan Beulich
2015-07-13 11:42 ` [PATCH v4 RFC 0/6] x86/MSI: XSA-120, 126, 128-131 follow-up Jan Beulich

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).