* [PATCH resend 0/3] x86/IOMMU: multi-vector MSI
@ 2013-07-16 10:00 Jan Beulich
2013-07-16 10:13 ` [PATCH resend 1/3] VT-d: enable for " Jan Beulich
` (2 more replies)
0 siblings, 3 replies; 18+ messages in thread
From: Jan Beulich @ 2013-07-16 10:00 UTC (permalink / raw)
To: xen-devel; +Cc: Keir Fraser, xiantao.zhang
These are the patches from an earlier submitted series that haven't
got ack-ed by anyone so far.
1: VT-d: enable for multi-vector MSI
2: x86: enable multi-vector MSI
3: pciif: add multi-vector-MSI command
Signed-off-by: Jan Beulich <jbeulich@suse.com>
^ permalink raw reply [flat|nested] 18+ messages in thread
* [PATCH resend 1/3] VT-d: enable for multi-vector MSI
2013-07-16 10:00 [PATCH resend 0/3] x86/IOMMU: multi-vector MSI Jan Beulich
@ 2013-07-16 10:13 ` Jan Beulich
2013-07-16 11:15 ` Andrew Cooper
2013-07-16 10:14 ` [PATCH resend 2/3] x86: enable " Jan Beulich
2013-07-16 10:15 ` [PATCH resend 3/3] pciif: add multi-vector-MSI command Jan Beulich
2 siblings, 1 reply; 18+ messages in thread
From: Jan Beulich @ 2013-07-16 10:13 UTC (permalink / raw)
To: xen-devel, Jan Beulich; +Cc: Keir Fraser, xiantao.zhang
[-- Attachment #1: Type: text/plain, Size: 6549 bytes --]
The main change being to make alloc_remap_entry() capable of allocating
a block of entries.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/drivers/passthrough/vtd/intremap.c
+++ b/xen/drivers/passthrough/vtd/intremap.c
@@ -194,18 +194,18 @@ static void free_remap_entry(struct iomm
}
/*
- * Look for a free intr remap entry.
+ * Look for a free intr remap entry (or a contiguous set thereof).
* Need hold iremap_lock, and setup returned entry before releasing lock.
*/
-static int alloc_remap_entry(struct iommu *iommu)
+static unsigned int alloc_remap_entry(struct iommu *iommu, unsigned int nr)
{
struct iremap_entry *iremap_entries = NULL;
struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu);
- int i;
+ unsigned int i, found;
ASSERT( spin_is_locked(&ir_ctrl->iremap_lock) );
- for ( i = 0; i < IREMAP_ENTRY_NR; i++ )
+ for ( found = i = 0; i < IREMAP_ENTRY_NR; i++ )
{
struct iremap_entry *p;
if ( i % (1 << IREMAP_ENTRY_ORDER) == 0 )
@@ -220,7 +220,9 @@ static int alloc_remap_entry(struct iomm
else
p = &iremap_entries[i % (1 << IREMAP_ENTRY_ORDER)];
- if ( p->lo_val == 0 && p->hi_val == 0 ) /* a free entry */
+ if ( p->lo_val || p->hi_val ) /* not a free entry */
+ found = 0;
+ else if ( ++found == nr )
break;
}
@@ -228,7 +230,7 @@ static int alloc_remap_entry(struct iomm
unmap_vtd_domain_page(iremap_entries);
if ( i < IREMAP_ENTRY_NR )
- ir_ctrl->iremap_num++;
+ ir_ctrl->iremap_num += nr;
return i;
}
@@ -293,7 +295,7 @@ static int ioapic_rte_to_remap_entry(str
index = apic_pin_2_ir_idx[apic][ioapic_pin];
if ( index < 0 )
{
- index = alloc_remap_entry(iommu);
+ index = alloc_remap_entry(iommu, 1);
if ( index < IREMAP_ENTRY_NR )
apic_pin_2_ir_idx[apic][ioapic_pin] = index;
}
@@ -485,19 +487,18 @@ static void set_msi_source_id(struct pci
}
static int remap_entry_to_msi_msg(
- struct iommu *iommu, struct msi_msg *msg)
+ struct iommu *iommu, struct msi_msg *msg, unsigned int index)
{
struct iremap_entry *iremap_entry = NULL, *iremap_entries;
struct msi_msg_remap_entry *remap_rte;
- int index;
unsigned long flags;
struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu);
remap_rte = (struct msi_msg_remap_entry *) msg;
- index = (remap_rte->address_lo.index_15 << 15) |
+ index += (remap_rte->address_lo.index_15 << 15) |
remap_rte->address_lo.index_0_14;
- if ( index < 0 || index > IREMAP_ENTRY_NR - 1 )
+ if ( index >= IREMAP_ENTRY_NR )
{
dprintk(XENLOG_ERR VTDPREFIX,
"%s: index (%d) for remap table is invalid !\n",
@@ -555,31 +556,29 @@ static int msi_msg_to_remap_entry(
struct iremap_entry *iremap_entry = NULL, *iremap_entries;
struct iremap_entry new_ire;
struct msi_msg_remap_entry *remap_rte;
- int index;
+ unsigned int index, i, nr = 1;
unsigned long flags;
struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu);
- remap_rte = (struct msi_msg_remap_entry *) msg;
+ if ( msi_desc->msi_attrib.type == PCI_CAP_ID_MSI )
+ nr = msi_desc->msi.nvec;
+
spin_lock_irqsave(&ir_ctrl->iremap_lock, flags);
if ( msg == NULL )
{
- /* Free specified unused IRTE */
- free_remap_entry(iommu, msi_desc->remap_index);
+ /* Free specified unused IRTEs */
+ for ( i = 0; i < nr; ++i )
+ free_remap_entry(iommu, msi_desc->remap_index + i);
spin_unlock_irqrestore(&ir_ctrl->iremap_lock, flags);
return 0;
}
if ( msi_desc->remap_index < 0 )
{
- /*
- * TODO: Multiple-vector MSI requires allocating multiple continuous
- * entries and configuring addr/data of msi_msg in different way. So
- * alloca_remap_entry will be changed if enabling multiple-vector MSI
- * in future.
- */
- index = alloc_remap_entry(iommu);
- msi_desc->remap_index = index;
+ index = alloc_remap_entry(iommu, nr);
+ for ( i = 0; i < nr; ++i )
+ msi_desc[i].remap_index = index + i;
}
else
index = msi_desc->remap_index;
@@ -590,7 +589,8 @@ static int msi_msg_to_remap_entry(
"%s: intremap index (%d) is larger than"
" the maximum index (%d)!\n",
__func__, index, IREMAP_ENTRY_NR - 1);
- msi_desc->remap_index = -1;
+ for ( i = 0; i < nr; ++i )
+ msi_desc[i].remap_index = -1;
spin_unlock_irqrestore(&ir_ctrl->iremap_lock, flags);
return -EFAULT;
}
@@ -626,14 +626,18 @@ static int msi_msg_to_remap_entry(
new_ire.lo.p = 1; /* finally, set present bit */
/* now construct new MSI/MSI-X rte entry */
+ remap_rte = (struct msi_msg_remap_entry *)msg;
remap_rte->address_lo.dontcare = 0;
- remap_rte->address_lo.index_15 = (index >> 15) & 0x1;
- remap_rte->address_lo.index_0_14 = index & 0x7fff;
+ i = index;
+ if ( !nr )
+ i -= msi_desc->msi_attrib.entry_nr;
+ remap_rte->address_lo.index_15 = (i >> 15) & 0x1;
+ remap_rte->address_lo.index_0_14 = i & 0x7fff;
remap_rte->address_lo.SHV = 1;
remap_rte->address_lo.format = 1;
remap_rte->address_hi = 0;
- remap_rte->data = 0;
+ remap_rte->data = index - i;
memcpy(iremap_entry, &new_ire, sizeof(struct iremap_entry));
iommu_flush_cache_entry(iremap_entry, sizeof(struct iremap_entry));
@@ -654,7 +658,9 @@ void msi_msg_read_remap_rte(
drhd = pdev ? acpi_find_matched_drhd_unit(pdev)
: hpet_to_drhd(msi_desc->hpet_id);
if ( drhd )
- remap_entry_to_msi_msg(drhd->iommu, msg);
+ remap_entry_to_msi_msg(drhd->iommu, msg,
+ msi_desc->msi_attrib.type == PCI_CAP_ID_MSI
+ ? msi_desc->msi_attrib.entry_nr : 0);
}
int msi_msg_write_remap_rte(
@@ -680,7 +686,7 @@ int __init intel_setup_hpet_msi(struct m
return 0;
spin_lock_irqsave(&ir_ctrl->iremap_lock, flags);
- msi_desc->remap_index = alloc_remap_entry(iommu);
+ msi_desc->remap_index = alloc_remap_entry(iommu, 1);
if ( msi_desc->remap_index >= IREMAP_ENTRY_NR )
{
dprintk(XENLOG_ERR VTDPREFIX,
[-- Attachment #2: VT-d-multi-vector-MSI.patch --]
[-- Type: text/plain, Size: 6582 bytes --]
VT-d: enable for multi-vector MSI
The main change being to make alloc_remap_entry() capable of allocating
a block of entries.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/drivers/passthrough/vtd/intremap.c
+++ b/xen/drivers/passthrough/vtd/intremap.c
@@ -194,18 +194,18 @@ static void free_remap_entry(struct iomm
}
/*
- * Look for a free intr remap entry.
+ * Look for a free intr remap entry (or a contiguous set thereof).
* Need hold iremap_lock, and setup returned entry before releasing lock.
*/
-static int alloc_remap_entry(struct iommu *iommu)
+static unsigned int alloc_remap_entry(struct iommu *iommu, unsigned int nr)
{
struct iremap_entry *iremap_entries = NULL;
struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu);
- int i;
+ unsigned int i, found;
ASSERT( spin_is_locked(&ir_ctrl->iremap_lock) );
- for ( i = 0; i < IREMAP_ENTRY_NR; i++ )
+ for ( found = i = 0; i < IREMAP_ENTRY_NR; i++ )
{
struct iremap_entry *p;
if ( i % (1 << IREMAP_ENTRY_ORDER) == 0 )
@@ -220,7 +220,9 @@ static int alloc_remap_entry(struct iomm
else
p = &iremap_entries[i % (1 << IREMAP_ENTRY_ORDER)];
- if ( p->lo_val == 0 && p->hi_val == 0 ) /* a free entry */
+ if ( p->lo_val || p->hi_val ) /* not a free entry */
+ found = 0;
+ else if ( ++found == nr )
break;
}
@@ -228,7 +230,7 @@ static int alloc_remap_entry(struct iomm
unmap_vtd_domain_page(iremap_entries);
if ( i < IREMAP_ENTRY_NR )
- ir_ctrl->iremap_num++;
+ ir_ctrl->iremap_num += nr;
return i;
}
@@ -293,7 +295,7 @@ static int ioapic_rte_to_remap_entry(str
index = apic_pin_2_ir_idx[apic][ioapic_pin];
if ( index < 0 )
{
- index = alloc_remap_entry(iommu);
+ index = alloc_remap_entry(iommu, 1);
if ( index < IREMAP_ENTRY_NR )
apic_pin_2_ir_idx[apic][ioapic_pin] = index;
}
@@ -485,19 +487,18 @@ static void set_msi_source_id(struct pci
}
static int remap_entry_to_msi_msg(
- struct iommu *iommu, struct msi_msg *msg)
+ struct iommu *iommu, struct msi_msg *msg, unsigned int index)
{
struct iremap_entry *iremap_entry = NULL, *iremap_entries;
struct msi_msg_remap_entry *remap_rte;
- int index;
unsigned long flags;
struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu);
remap_rte = (struct msi_msg_remap_entry *) msg;
- index = (remap_rte->address_lo.index_15 << 15) |
+ index += (remap_rte->address_lo.index_15 << 15) |
remap_rte->address_lo.index_0_14;
- if ( index < 0 || index > IREMAP_ENTRY_NR - 1 )
+ if ( index >= IREMAP_ENTRY_NR )
{
dprintk(XENLOG_ERR VTDPREFIX,
"%s: index (%d) for remap table is invalid !\n",
@@ -555,31 +556,29 @@ static int msi_msg_to_remap_entry(
struct iremap_entry *iremap_entry = NULL, *iremap_entries;
struct iremap_entry new_ire;
struct msi_msg_remap_entry *remap_rte;
- int index;
+ unsigned int index, i, nr = 1;
unsigned long flags;
struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu);
- remap_rte = (struct msi_msg_remap_entry *) msg;
+ if ( msi_desc->msi_attrib.type == PCI_CAP_ID_MSI )
+ nr = msi_desc->msi.nvec;
+
spin_lock_irqsave(&ir_ctrl->iremap_lock, flags);
if ( msg == NULL )
{
- /* Free specified unused IRTE */
- free_remap_entry(iommu, msi_desc->remap_index);
+ /* Free specified unused IRTEs */
+ for ( i = 0; i < nr; ++i )
+ free_remap_entry(iommu, msi_desc->remap_index + i);
spin_unlock_irqrestore(&ir_ctrl->iremap_lock, flags);
return 0;
}
if ( msi_desc->remap_index < 0 )
{
- /*
- * TODO: Multiple-vector MSI requires allocating multiple continuous
- * entries and configuring addr/data of msi_msg in different way. So
- * alloca_remap_entry will be changed if enabling multiple-vector MSI
- * in future.
- */
- index = alloc_remap_entry(iommu);
- msi_desc->remap_index = index;
+ index = alloc_remap_entry(iommu, nr);
+ for ( i = 0; i < nr; ++i )
+ msi_desc[i].remap_index = index + i;
}
else
index = msi_desc->remap_index;
@@ -590,7 +589,8 @@ static int msi_msg_to_remap_entry(
"%s: intremap index (%d) is larger than"
" the maximum index (%d)!\n",
__func__, index, IREMAP_ENTRY_NR - 1);
- msi_desc->remap_index = -1;
+ for ( i = 0; i < nr; ++i )
+ msi_desc[i].remap_index = -1;
spin_unlock_irqrestore(&ir_ctrl->iremap_lock, flags);
return -EFAULT;
}
@@ -626,14 +626,18 @@ static int msi_msg_to_remap_entry(
new_ire.lo.p = 1; /* finally, set present bit */
/* now construct new MSI/MSI-X rte entry */
+ remap_rte = (struct msi_msg_remap_entry *)msg;
remap_rte->address_lo.dontcare = 0;
- remap_rte->address_lo.index_15 = (index >> 15) & 0x1;
- remap_rte->address_lo.index_0_14 = index & 0x7fff;
+ i = index;
+ if ( !nr )
+ i -= msi_desc->msi_attrib.entry_nr;
+ remap_rte->address_lo.index_15 = (i >> 15) & 0x1;
+ remap_rte->address_lo.index_0_14 = i & 0x7fff;
remap_rte->address_lo.SHV = 1;
remap_rte->address_lo.format = 1;
remap_rte->address_hi = 0;
- remap_rte->data = 0;
+ remap_rte->data = index - i;
memcpy(iremap_entry, &new_ire, sizeof(struct iremap_entry));
iommu_flush_cache_entry(iremap_entry, sizeof(struct iremap_entry));
@@ -654,7 +658,9 @@ void msi_msg_read_remap_rte(
drhd = pdev ? acpi_find_matched_drhd_unit(pdev)
: hpet_to_drhd(msi_desc->hpet_id);
if ( drhd )
- remap_entry_to_msi_msg(drhd->iommu, msg);
+ remap_entry_to_msi_msg(drhd->iommu, msg,
+ msi_desc->msi_attrib.type == PCI_CAP_ID_MSI
+ ? msi_desc->msi_attrib.entry_nr : 0);
}
int msi_msg_write_remap_rte(
@@ -680,7 +686,7 @@ int __init intel_setup_hpet_msi(struct m
return 0;
spin_lock_irqsave(&ir_ctrl->iremap_lock, flags);
- msi_desc->remap_index = alloc_remap_entry(iommu);
+ msi_desc->remap_index = alloc_remap_entry(iommu, 1);
if ( msi_desc->remap_index >= IREMAP_ENTRY_NR )
{
dprintk(XENLOG_ERR VTDPREFIX,
[-- Attachment #3: Type: text/plain, Size: 126 bytes --]
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel
^ permalink raw reply [flat|nested] 18+ messages in thread
* [PATCH resend 2/3] x86: enable multi-vector MSI
2013-07-16 10:00 [PATCH resend 0/3] x86/IOMMU: multi-vector MSI Jan Beulich
2013-07-16 10:13 ` [PATCH resend 1/3] VT-d: enable for " Jan Beulich
@ 2013-07-16 10:14 ` Jan Beulich
2013-07-16 11:36 ` Andrew Cooper
` (2 more replies)
2013-07-16 10:15 ` [PATCH resend 3/3] pciif: add multi-vector-MSI command Jan Beulich
2 siblings, 3 replies; 18+ messages in thread
From: Jan Beulich @ 2013-07-16 10:14 UTC (permalink / raw)
To: xen-devel; +Cc: Keir Fraser, xiantao.zhang
[-- Attachment #1: Type: text/plain, Size: 23243 bytes --]
This implies
- extending the public interface to have a way to request a block of
MSIs
- allocating a block of contiguous pIRQ-s for the target domain (but
note that the Xen IRQs allocated have no need of being contiguous)
- repeating certain operations for all involved IRQs
- fixing multi_msi_enable()
- adjusting the mask bit accesses for maskable MSIs
Signed-off-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/arch/x86/irq.c
+++ b/xen/arch/x86/irq.c
@@ -1863,6 +1863,25 @@ int get_free_pirq(struct domain *d, int
return -ENOSPC;
}
+int get_free_pirqs(struct domain *d, unsigned int nr)
+{
+ unsigned int i, found = 0;
+
+ ASSERT(spin_is_locked(&d->event_lock));
+
+ for ( i = d->nr_pirqs - 1; i >= nr_irqs_gsi; --i )
+ if ( is_free_pirq(d, pirq_info(d, i)) )
+ {
+ pirq_get_info(d, i);
+ if ( ++found == nr )
+ return i;
+ }
+ else
+ found = 0;
+
+ return -ENOSPC;
+}
+
int map_domain_pirq(
struct domain *d, int pirq, int irq, int type, void *data)
{
@@ -1918,11 +1937,12 @@ int map_domain_pirq(
desc = irq_to_desc(irq);
- if ( type == MAP_PIRQ_TYPE_MSI )
+ if ( type == MAP_PIRQ_TYPE_MSI || type == MAP_PIRQ_TYPE_MULTI_MSI )
{
struct msi_info *msi = (struct msi_info *)data;
struct msi_desc *msi_desc;
struct pci_dev *pdev;
+ unsigned int nr = 0;
ASSERT(spin_is_locked(&pcidevs_lock));
@@ -1933,7 +1953,14 @@ int map_domain_pirq(
pdev = pci_get_pdev(msi->seg, msi->bus, msi->devfn);
ret = pci_enable_msi(msi, &msi_desc);
if ( ret )
+ {
+ if ( ret > 0 )
+ {
+ msi->entry_nr = ret;
+ ret = -ENFILE;
+ }
goto done;
+ }
spin_lock_irqsave(&desc->lock, flags);
@@ -1947,25 +1974,73 @@ int map_domain_pirq(
goto done;
}
- ret = setup_msi_irq(desc, msi_desc);
- if ( ret )
+ while ( !(ret = setup_msi_irq(desc, msi_desc + nr)) )
{
+ if ( opt_irq_vector_map == OPT_IRQ_VECTOR_MAP_PERDEV &&
+ !desc->arch.used_vectors )
+ {
+ desc->arch.used_vectors = &pdev->arch.used_vectors;
+ if ( desc->arch.vector != IRQ_VECTOR_UNASSIGNED )
+ {
+ int vector = desc->arch.vector;
+
+ ASSERT(!test_bit(vector, desc->arch.used_vectors));
+ set_bit(vector, desc->arch.used_vectors);
+ }
+ }
+ if ( type == MAP_PIRQ_TYPE_MSI ||
+ msi_desc->msi_attrib.type != PCI_CAP_ID_MSI ||
+ ++nr == msi->entry_nr )
+ break;
+
+ set_domain_irq_pirq(d, irq, info);
spin_unlock_irqrestore(&desc->lock, flags);
- pci_disable_msi(msi_desc);
- goto done;
+
+ info = NULL;
+ irq = create_irq(NUMA_NO_NODE);
+ ret = irq >= 0 ? prepare_domain_irq_pirq(d, irq, pirq + nr, &info)
+ : irq;
+ if ( ret )
+ break;
+ msi_desc[nr].irq = irq;
+
+ if ( irq_permit_access(d, irq) != 0 )
+ printk(XENLOG_G_WARNING
+ "dom%d: could not permit access to IRQ%d (pirq %d)\n",
+ d->domain_id, irq, pirq);
+
+ desc = irq_to_desc(irq);
+ spin_lock_irqsave(&desc->lock, flags);
+
+ if ( desc->handler != &no_irq_type )
+ {
+ dprintk(XENLOG_G_ERR, "dom%d: irq %d (pirq %u) in use (%s)\n",
+ d->domain_id, irq, pirq + nr, desc->handler->typename);
+ ret = -EBUSY;
+ break;
+ }
}
- if ( opt_irq_vector_map == OPT_IRQ_VECTOR_MAP_PERDEV
- && !desc->arch.used_vectors )
+ if ( ret )
{
- desc->arch.used_vectors = &pdev->arch.used_vectors;
- if ( desc->arch.vector != IRQ_VECTOR_UNASSIGNED )
+ spin_unlock_irqrestore(&desc->lock, flags);
+ while ( nr-- )
{
- int vector = desc->arch.vector;
- ASSERT(!test_bit(vector, desc->arch.used_vectors));
-
- set_bit(vector, desc->arch.used_vectors);
+ if ( irq >= 0 )
+ {
+ if ( irq_deny_access(d, irq) )
+ printk(XENLOG_G_ERR
+ "dom%d: could not revoke access to IRQ%d (pirq %d)\n",
+ d->domain_id, irq, pirq);
+ destroy_irq(irq);
+ }
+ if ( info )
+ cleanup_domain_irq_pirq(d, irq, info);
+ info = pirq_info(d, pirq + nr);
+ irq = info->arch.irq;
}
+ pci_disable_msi(msi_desc);
+ goto done;
}
set_domain_irq_pirq(d, irq, info);
@@ -1996,7 +2071,8 @@ int unmap_domain_pirq(struct domain *d,
{
unsigned long flags;
struct irq_desc *desc;
- int irq, ret = 0;
+ int irq, ret = 0, rc;
+ unsigned int i, nr = 1;
bool_t forced_unbind;
struct pirq *info;
struct msi_desc *msi_desc = NULL;
@@ -2018,6 +2094,18 @@ int unmap_domain_pirq(struct domain *d,
desc = irq_to_desc(irq);
msi_desc = desc->msi_desc;
+ if ( msi_desc && msi_desc->msi_attrib.type == PCI_CAP_ID_MSI )
+ {
+ if ( msi_desc->msi_attrib.entry_nr )
+ {
+ printk(XENLOG_G_ERR
+ "dom%d: trying to unmap secondary MSI pirq %d\n",
+ d->domain_id, pirq);
+ ret = -EBUSY;
+ goto done;
+ }
+ nr = msi_desc->msi.nvec;
+ }
ret = xsm_unmap_domain_irq(XSM_HOOK, d, irq, msi_desc);
if ( ret )
@@ -2033,37 +2121,83 @@ int unmap_domain_pirq(struct domain *d,
spin_lock_irqsave(&desc->lock, flags);
- BUG_ON(irq != domain_pirq_to_irq(d, pirq));
-
- if ( !forced_unbind )
- clear_domain_irq_pirq(d, irq, info);
- else
+ for ( i = 0; ; )
{
- info->arch.irq = -irq;
- radix_tree_replace_slot(
- radix_tree_lookup_slot(&d->arch.irq_pirq, irq),
- radix_tree_int_to_ptr(-pirq));
+ BUG_ON(irq != domain_pirq_to_irq(d, pirq + i));
+
+ if ( !forced_unbind )
+ clear_domain_irq_pirq(d, irq, info);
+ else
+ {
+ info->arch.irq = -irq;
+ radix_tree_replace_slot(
+ radix_tree_lookup_slot(&d->arch.irq_pirq, irq),
+ radix_tree_int_to_ptr(-pirq));
+ }
+
+ if ( msi_desc )
+ {
+ desc->handler = &no_irq_type;
+ desc->msi_desc = NULL;
+ }
+
+ if ( ++i == nr )
+ break;
+
+ spin_unlock_irqrestore(&desc->lock, flags);
+
+ if ( !forced_unbind )
+ cleanup_domain_irq_pirq(d, irq, info);
+
+ rc = irq_deny_access(d, irq);
+ if ( rc )
+ {
+ printk(XENLOG_G_ERR
+ "dom%d: could not deny access to IRQ%d (pirq %d)\n",
+ d->domain_id, irq, pirq + i);
+ ret = rc;
+ }
+
+ do {
+ info = pirq_info(d, pirq + i);
+ if ( info && (irq = info->arch.irq) > 0 )
+ break;
+ printk(XENLOG_G_ERR "dom%d: MSI pirq %d not mapped\n",
+ d->domain_id, pirq + i);
+ } while ( ++i < nr );
+
+ if ( i == nr )
+ {
+ desc = NULL;
+ break;
+ }
+
+ desc = irq_to_desc(irq);
+ BUG_ON(desc->msi_desc != msi_desc + i);
+
+ spin_lock_irqsave(&desc->lock, flags);
}
- if ( msi_desc )
+ if ( desc )
{
- desc->handler = &no_irq_type;
- desc->msi_desc = NULL;
+ spin_unlock_irqrestore(&desc->lock, flags);
+
+ if ( !forced_unbind )
+ cleanup_domain_irq_pirq(d, irq, info);
+
+ rc = irq_deny_access(d, irq);
+ if ( rc )
+ {
+ printk(XENLOG_G_ERR
+ "dom%d: could not deny access to IRQ%d (pirq %d)\n",
+ d->domain_id, irq, pirq + nr - 1);
+ ret = rc;
+ }
}
- spin_unlock_irqrestore(&desc->lock, flags);
if (msi_desc)
msi_free_irq(msi_desc);
- if ( !forced_unbind )
- cleanup_domain_irq_pirq(d, irq, info);
-
- ret = irq_deny_access(d, irq);
- if ( ret )
- printk(XENLOG_G_ERR
- "dom%d: could not deny access to IRQ%d (pirq %d)\n",
- d->domain_id, irq, pirq);
-
done:
return ret;
}
--- a/xen/arch/x86/msi.c
+++ b/xen/arch/x86/msi.c
@@ -236,6 +236,11 @@ static int write_msi_msg(struct msi_desc
u8 bus = dev->bus;
u8 slot = PCI_SLOT(dev->devfn);
u8 func = PCI_FUNC(dev->devfn);
+ int nr = entry->msi_attrib.entry_nr;
+
+ ASSERT((msg->data & (entry[-nr].msi.nvec - 1)) == nr);
+ if ( nr )
+ return 0;
pci_conf_write32(seg, bus, slot, func, msi_lower_address_reg(pos),
msg->address_lo);
@@ -359,8 +364,8 @@ static void msi_set_mask_bit(struct irq_
u8 func = PCI_FUNC(entry->dev->devfn);
mask_bits = pci_conf_read32(seg, bus, slot, func, entry->msi.mpos);
- mask_bits &= ~(1);
- mask_bits |= flag;
+ mask_bits &= ~((u32)1 << entry->msi_attrib.entry_nr);
+ mask_bits |= (u32)flag << entry->msi_attrib.entry_nr;
pci_conf_write32(seg, bus, slot, func, entry->msi.mpos, mask_bits);
}
break;
@@ -384,10 +389,11 @@ static int msi_get_mask_bit(const struct
case PCI_CAP_ID_MSI:
if (!entry->dev || !entry->msi_attrib.maskbit)
break;
- return pci_conf_read32(entry->dev->seg, entry->dev->bus,
- PCI_SLOT(entry->dev->devfn),
- PCI_FUNC(entry->dev->devfn),
- entry->msi.mpos) & 1;
+ return (pci_conf_read32(entry->dev->seg, entry->dev->bus,
+ PCI_SLOT(entry->dev->devfn),
+ PCI_FUNC(entry->dev->devfn),
+ entry->msi.mpos) >>
+ entry->msi_attrib.entry_nr) & 1;
case PCI_CAP_ID_MSIX:
return readl(entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET) & 1;
}
@@ -453,17 +459,20 @@ static hw_irq_controller pci_msi_nonmask
.set_affinity = set_msi_affinity
};
-static struct msi_desc* alloc_msi_entry(void)
+static struct msi_desc *alloc_msi_entry(unsigned int nr)
{
struct msi_desc *entry;
- entry = xmalloc(struct msi_desc);
+ entry = xmalloc_array(struct msi_desc, nr);
if ( !entry )
return NULL;
INIT_LIST_HEAD(&entry->list);
- entry->dev = NULL;
- entry->remap_index = -1;
+ while ( nr-- )
+ {
+ entry[nr].dev = NULL;
+ entry[nr].remap_index = -1;
+ }
return entry;
}
@@ -488,17 +497,24 @@ int __setup_msi_irq(struct irq_desc *des
int msi_free_irq(struct msi_desc *entry)
{
- destroy_irq(entry->irq);
+ unsigned int nr = entry->msi.nvec;
+
if ( entry->msi_attrib.type == PCI_CAP_ID_MSIX )
{
unsigned long start;
start = (unsigned long)entry->mask_base & ~(PAGE_SIZE - 1);
msix_put_fixmap(entry->dev, virt_to_fix(start));
+ nr = 1;
}
- /* Free the unused IRTE if intr remap enabled */
- if ( iommu_intremap )
- iommu_update_ire_from_msi(entry, NULL);
+ while ( nr-- )
+ {
+ destroy_irq(entry[nr].irq);
+
+ /* Free the unused IRTE if intr remap enabled */
+ if ( iommu_intremap )
+ iommu_update_ire_from_msi(entry + nr, NULL);
+ }
list_del(&entry->list);
xfree(entry);
@@ -531,11 +547,12 @@ static struct msi_desc *find_msi_entry(s
**/
static int msi_capability_init(struct pci_dev *dev,
int irq,
- struct msi_desc **desc)
+ struct msi_desc **desc,
+ unsigned int nvec)
{
struct msi_desc *entry;
int pos;
- unsigned int maxvec, mpos;
+ unsigned int i, maxvec, mpos;
u16 control, seg = dev->seg;
u8 bus = dev->bus;
u8 slot = PCI_SLOT(dev->devfn);
@@ -545,27 +562,34 @@ static int msi_capability_init(struct pc
pos = pci_find_cap_offset(seg, bus, slot, func, PCI_CAP_ID_MSI);
control = pci_conf_read16(seg, bus, slot, func, msi_control_reg(pos));
maxvec = multi_msi_capable(control);
+ if ( nvec > maxvec )
+ return maxvec;
control &= ~PCI_MSI_FLAGS_QSIZE;
+ multi_msi_enable(control, nvec);
/* MSI Entry Initialization */
msi_set_enable(dev, 0); /* Ensure msi is disabled as I set it up */
- entry = alloc_msi_entry();
+ entry = alloc_msi_entry(nvec);
if ( !entry )
return -ENOMEM;
- entry->msi_attrib.type = PCI_CAP_ID_MSI;
- entry->msi_attrib.is_64 = is_64bit_address(control);
- entry->msi_attrib.entry_nr = 0;
- entry->msi_attrib.maskbit = is_mask_bit_support(control);
- entry->msi_attrib.masked = 1;
- entry->msi_attrib.pos = pos;
mpos = msi_mask_bits_reg(pos, is_64bit_address(control));
- entry->msi.nvec = 1;
+ for ( i = 0; i < nvec; ++i )
+ {
+ entry[i].msi_attrib.type = PCI_CAP_ID_MSI;
+ entry[i].msi_attrib.is_64 = is_64bit_address(control);
+ entry[i].msi_attrib.entry_nr = i;
+ entry[i].msi_attrib.maskbit = is_mask_bit_support(control);
+ entry[i].msi_attrib.masked = 1;
+ entry[i].msi_attrib.pos = pos;
+ if ( entry[i].msi_attrib.maskbit )
+ entry[i].msi.mpos = mpos;
+ entry[i].msi.nvec = 0;
+ entry[i].dev = dev;
+ }
+ entry->msi.nvec = nvec;
entry->irq = irq;
- if ( is_mask_bit_support(control) )
- entry->msi.mpos = mpos;
- entry->dev = dev;
if ( entry->msi_attrib.maskbit )
{
u32 maskbits;
@@ -693,7 +717,7 @@ static int msix_capability_init(struct p
if ( desc )
{
- entry = alloc_msi_entry();
+ entry = alloc_msi_entry(1);
if ( !entry )
return -ENOMEM;
ASSERT(msi);
@@ -851,7 +875,6 @@ static int msix_capability_init(struct p
static int __pci_enable_msi(struct msi_info *msi, struct msi_desc **desc)
{
- int status;
struct pci_dev *pdev;
struct msi_desc *old_desc;
@@ -880,8 +903,7 @@ static int __pci_enable_msi(struct msi_i
pci_disable_msi(old_desc);
}
- status = msi_capability_init(pdev, msi->irq, desc);
- return status;
+ return msi_capability_init(pdev, msi->irq, desc, msi->entry_nr);
}
static void __pci_disable_msi(struct msi_desc *entry)
@@ -1101,6 +1123,8 @@ int pci_restore_msi_state(struct pci_dev
list_for_each_entry_safe( entry, tmp, &pdev->msi_list, list )
{
+ unsigned int i = 0, nr = 1;
+
irq = entry->irq;
desc = &irq_desc[irq];
@@ -1110,30 +1134,58 @@ int pci_restore_msi_state(struct pci_dev
if (desc->msi_desc != entry)
{
+ bogus:
dprintk(XENLOG_ERR,
- "Restore MSI for dev %04x:%02x:%02x:%x not set before?\n",
+ "Restore MSI for %04x:%02x:%02x:%u entry %u not set?\n",
pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn),
- PCI_FUNC(pdev->devfn));
+ PCI_FUNC(pdev->devfn), i);
spin_unlock_irqrestore(&desc->lock, flags);
return -EINVAL;
}
if ( entry->msi_attrib.type == PCI_CAP_ID_MSI )
+ {
msi_set_enable(pdev, 0);
+ nr = entry->msi.nvec;
+ }
else if ( entry->msi_attrib.type == PCI_CAP_ID_MSIX )
msix_set_enable(pdev, 0);
msg = entry->msg;
write_msi_msg(entry, &msg);
- msi_set_mask_bit(desc, entry->msi_attrib.masked);
+ for ( i = 0; ; )
+ {
+ msi_set_mask_bit(desc, entry[i].msi_attrib.masked);
+ spin_unlock_irqrestore(&desc->lock, flags);
+
+ if ( !--nr )
+ break;
+
+ desc = &irq_desc[entry[++i].irq];
+ spin_lock_irqsave(&desc->lock, flags);
+ if ( desc->msi_desc != entry + i )
+ goto bogus;
+ }
+
+ spin_unlock_irqrestore(&desc->lock, flags);
if ( entry->msi_attrib.type == PCI_CAP_ID_MSI )
+ {
+ unsigned int cpos = msi_control_reg(entry->msi_attrib.pos);
+ u16 control = pci_conf_read16(pdev->seg, pdev->bus,
+ PCI_SLOT(pdev->devfn),
+ PCI_FUNC(pdev->devfn), cpos);
+
+ control &= ~PCI_MSI_FLAGS_QSIZE;
+ multi_msi_enable(control, entry->msi.nvec);
+ pci_conf_write16(pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn),
+ PCI_FUNC(pdev->devfn), cpos, control);
+
msi_set_enable(pdev, 1);
+ }
else if ( entry->msi_attrib.type == PCI_CAP_ID_MSIX )
msix_set_enable(pdev, 1);
-
- spin_unlock_irqrestore(&desc->lock, flags);
}
return 0;
--- a/xen/arch/x86/physdev.c
+++ b/xen/arch/x86/physdev.c
@@ -140,8 +140,11 @@ int physdev_map_pirq(domid_t domid, int
break;
case MAP_PIRQ_TYPE_MSI:
+ if ( !msi->table_base )
+ msi->entry_nr = 1;
irq = *index;
if ( irq == -1 )
+ case MAP_PIRQ_TYPE_MULTI_MSI:
irq = create_irq(NUMA_NO_NODE);
if ( irq < nr_irqs_gsi || irq >= nr_irqs )
@@ -179,6 +182,30 @@ int physdev_map_pirq(domid_t domid, int
goto done;
}
}
+ else if ( type == MAP_PIRQ_TYPE_MULTI_MSI )
+ {
+ if ( msi->entry_nr <= 0 || msi->entry_nr > 32 )
+ ret = -EDOM;
+ else if ( msi->entry_nr != 1 && !iommu_intremap )
+ ret = -EOPNOTSUPP;
+ else
+ {
+ while ( msi->entry_nr & (msi->entry_nr - 1) )
+ msi->entry_nr += msi->entry_nr & -msi->entry_nr;
+ pirq = get_free_pirqs(d, msi->entry_nr);
+ if ( pirq < 0 )
+ {
+ while ( (msi->entry_nr >>= 1) > 1 )
+ if ( get_free_pirqs(d, msi->entry_nr) > 0 )
+ break;
+ dprintk(XENLOG_G_ERR, "dom%d: no block of %d free pirqs\n",
+ d->domain_id, msi->entry_nr << 1);
+ ret = pirq;
+ }
+ }
+ if ( ret < 0 )
+ goto done;
+ }
else
{
pirq = get_free_pirq(d, type);
@@ -210,8 +237,15 @@ int physdev_map_pirq(domid_t domid, int
done:
spin_unlock(&d->event_lock);
spin_unlock(&pcidevs_lock);
- if ( (ret != 0) && (type == MAP_PIRQ_TYPE_MSI) && (*index == -1) )
- destroy_irq(irq);
+ if ( ret != 0 )
+ switch ( type )
+ {
+ case MAP_PIRQ_TYPE_MSI:
+ if ( *index == -1 )
+ case MAP_PIRQ_TYPE_MULTI_MSI:
+ destroy_irq(irq);
+ break;
+ }
free_domain:
rcu_unlock_domain(d);
return ret;
@@ -390,14 +424,22 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H
if ( copy_from_guest(&map, arg, 1) != 0 )
break;
- if ( map.type == MAP_PIRQ_TYPE_MSI_SEG )
+ switch ( map.type )
{
+ case MAP_PIRQ_TYPE_MSI_SEG:
map.type = MAP_PIRQ_TYPE_MSI;
msi.seg = map.bus >> 16;
- }
- else
- {
+ break;
+
+ case MAP_PIRQ_TYPE_MULTI_MSI:
+ if ( map.table_base )
+ return -EINVAL;
+ msi.seg = map.bus >> 16;
+ break;
+
+ default:
msi.seg = 0;
+ break;
}
msi.bus = map.bus;
msi.devfn = map.devfn;
@@ -406,6 +448,8 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H
ret = physdev_map_pirq(map.domid, map.type, &map.index, &map.pirq,
&msi);
+ if ( map.type == MAP_PIRQ_TYPE_MULTI_MSI )
+ map.entry_nr = msi.entry_nr;
if ( __copy_to_guest(arg, &map, 1) )
ret = -EFAULT;
break;
--- a/xen/include/asm-x86/irq.h
+++ b/xen/include/asm-x86/irq.h
@@ -141,6 +141,7 @@ int map_domain_pirq(struct domain *d, in
void *data);
int unmap_domain_pirq(struct domain *d, int pirq);
int get_free_pirq(struct domain *d, int type);
+int get_free_pirqs(struct domain *, unsigned int nr);
void free_domain_pirqs(struct domain *d);
int map_domain_emuirq_pirq(struct domain *d, int pirq, int irq);
int unmap_domain_pirq_emuirq(struct domain *d, int pirq);
--- a/xen/include/asm-x86/msi.h
+++ b/xen/include/asm-x86/msi.h
@@ -148,7 +148,7 @@ int msi_free_irq(struct msi_desc *entry)
#define multi_msi_capable(control) \
(1 << ((control & PCI_MSI_FLAGS_QMASK) >> 1))
#define multi_msi_enable(control, num) \
- control |= (((num >> 1) << 4) & PCI_MSI_FLAGS_QSIZE);
+ control |= (((fls(num) - 1) << 4) & PCI_MSI_FLAGS_QSIZE);
#define is_64bit_address(control) (!!(control & PCI_MSI_FLAGS_64BIT))
#define is_mask_bit_support(control) (!!(control & PCI_MSI_FLAGS_MASKBIT))
#define msi_enable(control, num) multi_msi_enable(control, num); \
--- a/xen/include/public/physdev.h
+++ b/xen/include/public/physdev.h
@@ -151,21 +151,22 @@ DEFINE_XEN_GUEST_HANDLE(physdev_irq_t);
#define MAP_PIRQ_TYPE_GSI 0x1
#define MAP_PIRQ_TYPE_UNKNOWN 0x2
#define MAP_PIRQ_TYPE_MSI_SEG 0x3
+#define MAP_PIRQ_TYPE_MULTI_MSI 0x4
#define PHYSDEVOP_map_pirq 13
struct physdev_map_pirq {
domid_t domid;
/* IN */
int type;
- /* IN */
+ /* IN (ignored for ..._MULTI_MSI) */
int index;
/* IN or OUT */
int pirq;
- /* IN - high 16 bits hold segment for MAP_PIRQ_TYPE_MSI_SEG */
+ /* IN - high 16 bits hold segment for ..._MSI_SEG and ..._MULTI_MSI */
int bus;
/* IN */
int devfn;
- /* IN */
+ /* IN (also OUT for ..._MULTI_MSI) */
int entry_nr;
/* IN */
uint64_t table_base;
[-- Attachment #2: x86-multi-vector-MSI.patch --]
[-- Type: text/plain, Size: 23271 bytes --]
x86: enable multi-vector MSI
This implies
- extending the public interface to have a way to request a block of
MSIs
- allocating a block of contiguous pIRQ-s for the target domain (but
note that the Xen IRQs allocated have no need of being contiguous)
- repeating certain operations for all involved IRQs
- fixing multi_msi_enable()
- adjusting the mask bit accesses for maskable MSIs
Signed-off-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/arch/x86/irq.c
+++ b/xen/arch/x86/irq.c
@@ -1863,6 +1863,25 @@ int get_free_pirq(struct domain *d, int
return -ENOSPC;
}
+int get_free_pirqs(struct domain *d, unsigned int nr)
+{
+ unsigned int i, found = 0;
+
+ ASSERT(spin_is_locked(&d->event_lock));
+
+ for ( i = d->nr_pirqs - 1; i >= nr_irqs_gsi; --i )
+ if ( is_free_pirq(d, pirq_info(d, i)) )
+ {
+ pirq_get_info(d, i);
+ if ( ++found == nr )
+ return i;
+ }
+ else
+ found = 0;
+
+ return -ENOSPC;
+}
+
int map_domain_pirq(
struct domain *d, int pirq, int irq, int type, void *data)
{
@@ -1918,11 +1937,12 @@ int map_domain_pirq(
desc = irq_to_desc(irq);
- if ( type == MAP_PIRQ_TYPE_MSI )
+ if ( type == MAP_PIRQ_TYPE_MSI || type == MAP_PIRQ_TYPE_MULTI_MSI )
{
struct msi_info *msi = (struct msi_info *)data;
struct msi_desc *msi_desc;
struct pci_dev *pdev;
+ unsigned int nr = 0;
ASSERT(spin_is_locked(&pcidevs_lock));
@@ -1933,7 +1953,14 @@ int map_domain_pirq(
pdev = pci_get_pdev(msi->seg, msi->bus, msi->devfn);
ret = pci_enable_msi(msi, &msi_desc);
if ( ret )
+ {
+ if ( ret > 0 )
+ {
+ msi->entry_nr = ret;
+ ret = -ENFILE;
+ }
goto done;
+ }
spin_lock_irqsave(&desc->lock, flags);
@@ -1947,25 +1974,73 @@ int map_domain_pirq(
goto done;
}
- ret = setup_msi_irq(desc, msi_desc);
- if ( ret )
+ while ( !(ret = setup_msi_irq(desc, msi_desc + nr)) )
{
+ if ( opt_irq_vector_map == OPT_IRQ_VECTOR_MAP_PERDEV &&
+ !desc->arch.used_vectors )
+ {
+ desc->arch.used_vectors = &pdev->arch.used_vectors;
+ if ( desc->arch.vector != IRQ_VECTOR_UNASSIGNED )
+ {
+ int vector = desc->arch.vector;
+
+ ASSERT(!test_bit(vector, desc->arch.used_vectors));
+ set_bit(vector, desc->arch.used_vectors);
+ }
+ }
+ if ( type == MAP_PIRQ_TYPE_MSI ||
+ msi_desc->msi_attrib.type != PCI_CAP_ID_MSI ||
+ ++nr == msi->entry_nr )
+ break;
+
+ set_domain_irq_pirq(d, irq, info);
spin_unlock_irqrestore(&desc->lock, flags);
- pci_disable_msi(msi_desc);
- goto done;
+
+ info = NULL;
+ irq = create_irq(NUMA_NO_NODE);
+ ret = irq >= 0 ? prepare_domain_irq_pirq(d, irq, pirq + nr, &info)
+ : irq;
+ if ( ret )
+ break;
+ msi_desc[nr].irq = irq;
+
+ if ( irq_permit_access(d, irq) != 0 )
+ printk(XENLOG_G_WARNING
+ "dom%d: could not permit access to IRQ%d (pirq %d)\n",
+ d->domain_id, irq, pirq);
+
+ desc = irq_to_desc(irq);
+ spin_lock_irqsave(&desc->lock, flags);
+
+ if ( desc->handler != &no_irq_type )
+ {
+ dprintk(XENLOG_G_ERR, "dom%d: irq %d (pirq %u) in use (%s)\n",
+ d->domain_id, irq, pirq + nr, desc->handler->typename);
+ ret = -EBUSY;
+ break;
+ }
}
- if ( opt_irq_vector_map == OPT_IRQ_VECTOR_MAP_PERDEV
- && !desc->arch.used_vectors )
+ if ( ret )
{
- desc->arch.used_vectors = &pdev->arch.used_vectors;
- if ( desc->arch.vector != IRQ_VECTOR_UNASSIGNED )
+ spin_unlock_irqrestore(&desc->lock, flags);
+ while ( nr-- )
{
- int vector = desc->arch.vector;
- ASSERT(!test_bit(vector, desc->arch.used_vectors));
-
- set_bit(vector, desc->arch.used_vectors);
+ if ( irq >= 0 )
+ {
+ if ( irq_deny_access(d, irq) )
+ printk(XENLOG_G_ERR
+ "dom%d: could not revoke access to IRQ%d (pirq %d)\n",
+ d->domain_id, irq, pirq);
+ destroy_irq(irq);
+ }
+ if ( info )
+ cleanup_domain_irq_pirq(d, irq, info);
+ info = pirq_info(d, pirq + nr);
+ irq = info->arch.irq;
}
+ pci_disable_msi(msi_desc);
+ goto done;
}
set_domain_irq_pirq(d, irq, info);
@@ -1996,7 +2071,8 @@ int unmap_domain_pirq(struct domain *d,
{
unsigned long flags;
struct irq_desc *desc;
- int irq, ret = 0;
+ int irq, ret = 0, rc;
+ unsigned int i, nr = 1;
bool_t forced_unbind;
struct pirq *info;
struct msi_desc *msi_desc = NULL;
@@ -2018,6 +2094,18 @@ int unmap_domain_pirq(struct domain *d,
desc = irq_to_desc(irq);
msi_desc = desc->msi_desc;
+ if ( msi_desc && msi_desc->msi_attrib.type == PCI_CAP_ID_MSI )
+ {
+ if ( msi_desc->msi_attrib.entry_nr )
+ {
+ printk(XENLOG_G_ERR
+ "dom%d: trying to unmap secondary MSI pirq %d\n",
+ d->domain_id, pirq);
+ ret = -EBUSY;
+ goto done;
+ }
+ nr = msi_desc->msi.nvec;
+ }
ret = xsm_unmap_domain_irq(XSM_HOOK, d, irq, msi_desc);
if ( ret )
@@ -2033,37 +2121,83 @@ int unmap_domain_pirq(struct domain *d,
spin_lock_irqsave(&desc->lock, flags);
- BUG_ON(irq != domain_pirq_to_irq(d, pirq));
-
- if ( !forced_unbind )
- clear_domain_irq_pirq(d, irq, info);
- else
+ for ( i = 0; ; )
{
- info->arch.irq = -irq;
- radix_tree_replace_slot(
- radix_tree_lookup_slot(&d->arch.irq_pirq, irq),
- radix_tree_int_to_ptr(-pirq));
+ BUG_ON(irq != domain_pirq_to_irq(d, pirq + i));
+
+ if ( !forced_unbind )
+ clear_domain_irq_pirq(d, irq, info);
+ else
+ {
+ info->arch.irq = -irq;
+ radix_tree_replace_slot(
+ radix_tree_lookup_slot(&d->arch.irq_pirq, irq),
+ radix_tree_int_to_ptr(-pirq));
+ }
+
+ if ( msi_desc )
+ {
+ desc->handler = &no_irq_type;
+ desc->msi_desc = NULL;
+ }
+
+ if ( ++i == nr )
+ break;
+
+ spin_unlock_irqrestore(&desc->lock, flags);
+
+ if ( !forced_unbind )
+ cleanup_domain_irq_pirq(d, irq, info);
+
+ rc = irq_deny_access(d, irq);
+ if ( rc )
+ {
+ printk(XENLOG_G_ERR
+ "dom%d: could not deny access to IRQ%d (pirq %d)\n",
+ d->domain_id, irq, pirq + i);
+ ret = rc;
+ }
+
+ do {
+ info = pirq_info(d, pirq + i);
+ if ( info && (irq = info->arch.irq) > 0 )
+ break;
+ printk(XENLOG_G_ERR "dom%d: MSI pirq %d not mapped\n",
+ d->domain_id, pirq + i);
+ } while ( ++i < nr );
+
+ if ( i == nr )
+ {
+ desc = NULL;
+ break;
+ }
+
+ desc = irq_to_desc(irq);
+ BUG_ON(desc->msi_desc != msi_desc + i);
+
+ spin_lock_irqsave(&desc->lock, flags);
}
- if ( msi_desc )
+ if ( desc )
{
- desc->handler = &no_irq_type;
- desc->msi_desc = NULL;
+ spin_unlock_irqrestore(&desc->lock, flags);
+
+ if ( !forced_unbind )
+ cleanup_domain_irq_pirq(d, irq, info);
+
+ rc = irq_deny_access(d, irq);
+ if ( rc )
+ {
+ printk(XENLOG_G_ERR
+ "dom%d: could not deny access to IRQ%d (pirq %d)\n",
+ d->domain_id, irq, pirq + nr - 1);
+ ret = rc;
+ }
}
- spin_unlock_irqrestore(&desc->lock, flags);
if (msi_desc)
msi_free_irq(msi_desc);
- if ( !forced_unbind )
- cleanup_domain_irq_pirq(d, irq, info);
-
- ret = irq_deny_access(d, irq);
- if ( ret )
- printk(XENLOG_G_ERR
- "dom%d: could not deny access to IRQ%d (pirq %d)\n",
- d->domain_id, irq, pirq);
-
done:
return ret;
}
--- a/xen/arch/x86/msi.c
+++ b/xen/arch/x86/msi.c
@@ -236,6 +236,11 @@ static int write_msi_msg(struct msi_desc
u8 bus = dev->bus;
u8 slot = PCI_SLOT(dev->devfn);
u8 func = PCI_FUNC(dev->devfn);
+ int nr = entry->msi_attrib.entry_nr;
+
+ ASSERT((msg->data & (entry[-nr].msi.nvec - 1)) == nr);
+ if ( nr )
+ return 0;
pci_conf_write32(seg, bus, slot, func, msi_lower_address_reg(pos),
msg->address_lo);
@@ -359,8 +364,8 @@ static void msi_set_mask_bit(struct irq_
u8 func = PCI_FUNC(entry->dev->devfn);
mask_bits = pci_conf_read32(seg, bus, slot, func, entry->msi.mpos);
- mask_bits &= ~(1);
- mask_bits |= flag;
+ mask_bits &= ~((u32)1 << entry->msi_attrib.entry_nr);
+ mask_bits |= (u32)flag << entry->msi_attrib.entry_nr;
pci_conf_write32(seg, bus, slot, func, entry->msi.mpos, mask_bits);
}
break;
@@ -384,10 +389,11 @@ static int msi_get_mask_bit(const struct
case PCI_CAP_ID_MSI:
if (!entry->dev || !entry->msi_attrib.maskbit)
break;
- return pci_conf_read32(entry->dev->seg, entry->dev->bus,
- PCI_SLOT(entry->dev->devfn),
- PCI_FUNC(entry->dev->devfn),
- entry->msi.mpos) & 1;
+ return (pci_conf_read32(entry->dev->seg, entry->dev->bus,
+ PCI_SLOT(entry->dev->devfn),
+ PCI_FUNC(entry->dev->devfn),
+ entry->msi.mpos) >>
+ entry->msi_attrib.entry_nr) & 1;
case PCI_CAP_ID_MSIX:
return readl(entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET) & 1;
}
@@ -453,17 +459,20 @@ static hw_irq_controller pci_msi_nonmask
.set_affinity = set_msi_affinity
};
-static struct msi_desc* alloc_msi_entry(void)
+static struct msi_desc *alloc_msi_entry(unsigned int nr)
{
struct msi_desc *entry;
- entry = xmalloc(struct msi_desc);
+ entry = xmalloc_array(struct msi_desc, nr);
if ( !entry )
return NULL;
INIT_LIST_HEAD(&entry->list);
- entry->dev = NULL;
- entry->remap_index = -1;
+ while ( nr-- )
+ {
+ entry[nr].dev = NULL;
+ entry[nr].remap_index = -1;
+ }
return entry;
}
@@ -488,17 +497,24 @@ int __setup_msi_irq(struct irq_desc *des
int msi_free_irq(struct msi_desc *entry)
{
- destroy_irq(entry->irq);
+ unsigned int nr = entry->msi.nvec;
+
if ( entry->msi_attrib.type == PCI_CAP_ID_MSIX )
{
unsigned long start;
start = (unsigned long)entry->mask_base & ~(PAGE_SIZE - 1);
msix_put_fixmap(entry->dev, virt_to_fix(start));
+ nr = 1;
}
- /* Free the unused IRTE if intr remap enabled */
- if ( iommu_intremap )
- iommu_update_ire_from_msi(entry, NULL);
+ while ( nr-- )
+ {
+ destroy_irq(entry[nr].irq);
+
+ /* Free the unused IRTE if intr remap enabled */
+ if ( iommu_intremap )
+ iommu_update_ire_from_msi(entry + nr, NULL);
+ }
list_del(&entry->list);
xfree(entry);
@@ -531,11 +547,12 @@ static struct msi_desc *find_msi_entry(s
**/
static int msi_capability_init(struct pci_dev *dev,
int irq,
- struct msi_desc **desc)
+ struct msi_desc **desc,
+ unsigned int nvec)
{
struct msi_desc *entry;
int pos;
- unsigned int maxvec, mpos;
+ unsigned int i, maxvec, mpos;
u16 control, seg = dev->seg;
u8 bus = dev->bus;
u8 slot = PCI_SLOT(dev->devfn);
@@ -545,27 +562,34 @@ static int msi_capability_init(struct pc
pos = pci_find_cap_offset(seg, bus, slot, func, PCI_CAP_ID_MSI);
control = pci_conf_read16(seg, bus, slot, func, msi_control_reg(pos));
maxvec = multi_msi_capable(control);
+ if ( nvec > maxvec )
+ return maxvec;
control &= ~PCI_MSI_FLAGS_QSIZE;
+ multi_msi_enable(control, nvec);
/* MSI Entry Initialization */
msi_set_enable(dev, 0); /* Ensure msi is disabled as I set it up */
- entry = alloc_msi_entry();
+ entry = alloc_msi_entry(nvec);
if ( !entry )
return -ENOMEM;
- entry->msi_attrib.type = PCI_CAP_ID_MSI;
- entry->msi_attrib.is_64 = is_64bit_address(control);
- entry->msi_attrib.entry_nr = 0;
- entry->msi_attrib.maskbit = is_mask_bit_support(control);
- entry->msi_attrib.masked = 1;
- entry->msi_attrib.pos = pos;
mpos = msi_mask_bits_reg(pos, is_64bit_address(control));
- entry->msi.nvec = 1;
+ for ( i = 0; i < nvec; ++i )
+ {
+ entry[i].msi_attrib.type = PCI_CAP_ID_MSI;
+ entry[i].msi_attrib.is_64 = is_64bit_address(control);
+ entry[i].msi_attrib.entry_nr = i;
+ entry[i].msi_attrib.maskbit = is_mask_bit_support(control);
+ entry[i].msi_attrib.masked = 1;
+ entry[i].msi_attrib.pos = pos;
+ if ( entry[i].msi_attrib.maskbit )
+ entry[i].msi.mpos = mpos;
+ entry[i].msi.nvec = 0;
+ entry[i].dev = dev;
+ }
+ entry->msi.nvec = nvec;
entry->irq = irq;
- if ( is_mask_bit_support(control) )
- entry->msi.mpos = mpos;
- entry->dev = dev;
if ( entry->msi_attrib.maskbit )
{
u32 maskbits;
@@ -693,7 +717,7 @@ static int msix_capability_init(struct p
if ( desc )
{
- entry = alloc_msi_entry();
+ entry = alloc_msi_entry(1);
if ( !entry )
return -ENOMEM;
ASSERT(msi);
@@ -851,7 +875,6 @@ static int msix_capability_init(struct p
static int __pci_enable_msi(struct msi_info *msi, struct msi_desc **desc)
{
- int status;
struct pci_dev *pdev;
struct msi_desc *old_desc;
@@ -880,8 +903,7 @@ static int __pci_enable_msi(struct msi_i
pci_disable_msi(old_desc);
}
- status = msi_capability_init(pdev, msi->irq, desc);
- return status;
+ return msi_capability_init(pdev, msi->irq, desc, msi->entry_nr);
}
static void __pci_disable_msi(struct msi_desc *entry)
@@ -1101,6 +1123,8 @@ int pci_restore_msi_state(struct pci_dev
list_for_each_entry_safe( entry, tmp, &pdev->msi_list, list )
{
+ unsigned int i = 0, nr = 1;
+
irq = entry->irq;
desc = &irq_desc[irq];
@@ -1110,30 +1134,58 @@ int pci_restore_msi_state(struct pci_dev
if (desc->msi_desc != entry)
{
+ bogus:
dprintk(XENLOG_ERR,
- "Restore MSI for dev %04x:%02x:%02x:%x not set before?\n",
+ "Restore MSI for %04x:%02x:%02x:%u entry %u not set?\n",
pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn),
- PCI_FUNC(pdev->devfn));
+ PCI_FUNC(pdev->devfn), i);
spin_unlock_irqrestore(&desc->lock, flags);
return -EINVAL;
}
if ( entry->msi_attrib.type == PCI_CAP_ID_MSI )
+ {
msi_set_enable(pdev, 0);
+ nr = entry->msi.nvec;
+ }
else if ( entry->msi_attrib.type == PCI_CAP_ID_MSIX )
msix_set_enable(pdev, 0);
msg = entry->msg;
write_msi_msg(entry, &msg);
- msi_set_mask_bit(desc, entry->msi_attrib.masked);
+ for ( i = 0; ; )
+ {
+ msi_set_mask_bit(desc, entry[i].msi_attrib.masked);
+ spin_unlock_irqrestore(&desc->lock, flags);
+
+ if ( !--nr )
+ break;
+
+ desc = &irq_desc[entry[++i].irq];
+ spin_lock_irqsave(&desc->lock, flags);
+ if ( desc->msi_desc != entry + i )
+ goto bogus;
+ }
+
+ spin_unlock_irqrestore(&desc->lock, flags);
if ( entry->msi_attrib.type == PCI_CAP_ID_MSI )
+ {
+ unsigned int cpos = msi_control_reg(entry->msi_attrib.pos);
+ u16 control = pci_conf_read16(pdev->seg, pdev->bus,
+ PCI_SLOT(pdev->devfn),
+ PCI_FUNC(pdev->devfn), cpos);
+
+ control &= ~PCI_MSI_FLAGS_QSIZE;
+ multi_msi_enable(control, entry->msi.nvec);
+ pci_conf_write16(pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn),
+ PCI_FUNC(pdev->devfn), cpos, control);
+
msi_set_enable(pdev, 1);
+ }
else if ( entry->msi_attrib.type == PCI_CAP_ID_MSIX )
msix_set_enable(pdev, 1);
-
- spin_unlock_irqrestore(&desc->lock, flags);
}
return 0;
--- a/xen/arch/x86/physdev.c
+++ b/xen/arch/x86/physdev.c
@@ -140,8 +140,11 @@ int physdev_map_pirq(domid_t domid, int
break;
case MAP_PIRQ_TYPE_MSI:
+ if ( !msi->table_base )
+ msi->entry_nr = 1;
irq = *index;
if ( irq == -1 )
+ case MAP_PIRQ_TYPE_MULTI_MSI:
irq = create_irq(NUMA_NO_NODE);
if ( irq < nr_irqs_gsi || irq >= nr_irqs )
@@ -179,6 +182,30 @@ int physdev_map_pirq(domid_t domid, int
goto done;
}
}
+ else if ( type == MAP_PIRQ_TYPE_MULTI_MSI )
+ {
+ if ( msi->entry_nr <= 0 || msi->entry_nr > 32 )
+ ret = -EDOM;
+ else if ( msi->entry_nr != 1 && !iommu_intremap )
+ ret = -EOPNOTSUPP;
+ else
+ {
+ while ( msi->entry_nr & (msi->entry_nr - 1) )
+ msi->entry_nr += msi->entry_nr & -msi->entry_nr;
+ pirq = get_free_pirqs(d, msi->entry_nr);
+ if ( pirq < 0 )
+ {
+ while ( (msi->entry_nr >>= 1) > 1 )
+ if ( get_free_pirqs(d, msi->entry_nr) > 0 )
+ break;
+ dprintk(XENLOG_G_ERR, "dom%d: no block of %d free pirqs\n",
+ d->domain_id, msi->entry_nr << 1);
+ ret = pirq;
+ }
+ }
+ if ( ret < 0 )
+ goto done;
+ }
else
{
pirq = get_free_pirq(d, type);
@@ -210,8 +237,15 @@ int physdev_map_pirq(domid_t domid, int
done:
spin_unlock(&d->event_lock);
spin_unlock(&pcidevs_lock);
- if ( (ret != 0) && (type == MAP_PIRQ_TYPE_MSI) && (*index == -1) )
- destroy_irq(irq);
+ if ( ret != 0 )
+ switch ( type )
+ {
+ case MAP_PIRQ_TYPE_MSI:
+ if ( *index == -1 )
+ case MAP_PIRQ_TYPE_MULTI_MSI:
+ destroy_irq(irq);
+ break;
+ }
free_domain:
rcu_unlock_domain(d);
return ret;
@@ -390,14 +424,22 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H
if ( copy_from_guest(&map, arg, 1) != 0 )
break;
- if ( map.type == MAP_PIRQ_TYPE_MSI_SEG )
+ switch ( map.type )
{
+ case MAP_PIRQ_TYPE_MSI_SEG:
map.type = MAP_PIRQ_TYPE_MSI;
msi.seg = map.bus >> 16;
- }
- else
- {
+ break;
+
+ case MAP_PIRQ_TYPE_MULTI_MSI:
+ if ( map.table_base )
+ return -EINVAL;
+ msi.seg = map.bus >> 16;
+ break;
+
+ default:
msi.seg = 0;
+ break;
}
msi.bus = map.bus;
msi.devfn = map.devfn;
@@ -406,6 +448,8 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H
ret = physdev_map_pirq(map.domid, map.type, &map.index, &map.pirq,
&msi);
+ if ( map.type == MAP_PIRQ_TYPE_MULTI_MSI )
+ map.entry_nr = msi.entry_nr;
if ( __copy_to_guest(arg, &map, 1) )
ret = -EFAULT;
break;
--- a/xen/include/asm-x86/irq.h
+++ b/xen/include/asm-x86/irq.h
@@ -141,6 +141,7 @@ int map_domain_pirq(struct domain *d, in
void *data);
int unmap_domain_pirq(struct domain *d, int pirq);
int get_free_pirq(struct domain *d, int type);
+int get_free_pirqs(struct domain *, unsigned int nr);
void free_domain_pirqs(struct domain *d);
int map_domain_emuirq_pirq(struct domain *d, int pirq, int irq);
int unmap_domain_pirq_emuirq(struct domain *d, int pirq);
--- a/xen/include/asm-x86/msi.h
+++ b/xen/include/asm-x86/msi.h
@@ -148,7 +148,7 @@ int msi_free_irq(struct msi_desc *entry)
#define multi_msi_capable(control) \
(1 << ((control & PCI_MSI_FLAGS_QMASK) >> 1))
#define multi_msi_enable(control, num) \
- control |= (((num >> 1) << 4) & PCI_MSI_FLAGS_QSIZE);
+ control |= (((fls(num) - 1) << 4) & PCI_MSI_FLAGS_QSIZE);
#define is_64bit_address(control) (!!(control & PCI_MSI_FLAGS_64BIT))
#define is_mask_bit_support(control) (!!(control & PCI_MSI_FLAGS_MASKBIT))
#define msi_enable(control, num) multi_msi_enable(control, num); \
--- a/xen/include/public/physdev.h
+++ b/xen/include/public/physdev.h
@@ -151,21 +151,22 @@ DEFINE_XEN_GUEST_HANDLE(physdev_irq_t);
#define MAP_PIRQ_TYPE_GSI 0x1
#define MAP_PIRQ_TYPE_UNKNOWN 0x2
#define MAP_PIRQ_TYPE_MSI_SEG 0x3
+#define MAP_PIRQ_TYPE_MULTI_MSI 0x4
#define PHYSDEVOP_map_pirq 13
struct physdev_map_pirq {
domid_t domid;
/* IN */
int type;
- /* IN */
+ /* IN (ignored for ..._MULTI_MSI) */
int index;
/* IN or OUT */
int pirq;
- /* IN - high 16 bits hold segment for MAP_PIRQ_TYPE_MSI_SEG */
+ /* IN - high 16 bits hold segment for ..._MSI_SEG and ..._MULTI_MSI */
int bus;
/* IN */
int devfn;
- /* IN */
+ /* IN (also OUT for ..._MULTI_MSI) */
int entry_nr;
/* IN */
uint64_t table_base;
[-- Attachment #3: Type: text/plain, Size: 126 bytes --]
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel
^ permalink raw reply [flat|nested] 18+ messages in thread
* [PATCH resend 3/3] pciif: add multi-vector-MSI command
2013-07-16 10:00 [PATCH resend 0/3] x86/IOMMU: multi-vector MSI Jan Beulich
2013-07-16 10:13 ` [PATCH resend 1/3] VT-d: enable for " Jan Beulich
2013-07-16 10:14 ` [PATCH resend 2/3] x86: enable " Jan Beulich
@ 2013-07-16 10:15 ` Jan Beulich
2013-07-16 11:19 ` Andrew Cooper
` (2 more replies)
2 siblings, 3 replies; 18+ messages in thread
From: Jan Beulich @ 2013-07-16 10:15 UTC (permalink / raw)
To: xen-devel; +Cc: Keir Fraser, xiantao.zhang
[-- Attachment #1: Type: text/plain, Size: 640 bytes --]
The requested vector count is to be passed in struct xen_pci_op's info
field. Upon failure, if a smaller vector count might work, the backend
will pass that smaller count in the value field (which so far is always
being set to zero in the error path).
Signed-off-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/include/public/io/pciif.h
+++ b/xen/include/public/io/pciif.h
@@ -46,6 +46,7 @@
#define XEN_PCI_OP_aer_resume (7)
#define XEN_PCI_OP_aer_mmio (8)
#define XEN_PCI_OP_aer_slotreset (9)
+#define XEN_PCI_OP_enable_multi_msi (10)
/* xen_pci_op error numbers */
#define XEN_PCI_ERR_success (0)
[-- Attachment #2: pciif-multi-vector-MSI.patch --]
[-- Type: text/plain, Size: 673 bytes --]
pciif: add multi-vector-MSI command
The requested vector count is to be passed in struct xen_pci_op's info
field. Upon failure, if a smaller vector count might work, the backend
will pass that smaller count in the value field (which so far is always
being set to zero in the error path).
Signed-off-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/include/public/io/pciif.h
+++ b/xen/include/public/io/pciif.h
@@ -46,6 +46,7 @@
#define XEN_PCI_OP_aer_resume (7)
#define XEN_PCI_OP_aer_mmio (8)
#define XEN_PCI_OP_aer_slotreset (9)
+#define XEN_PCI_OP_enable_multi_msi (10)
/* xen_pci_op error numbers */
#define XEN_PCI_ERR_success (0)
[-- Attachment #3: Type: text/plain, Size: 126 bytes --]
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH resend 1/3] VT-d: enable for multi-vector MSI
2013-07-16 10:13 ` [PATCH resend 1/3] VT-d: enable for " Jan Beulich
@ 2013-07-16 11:15 ` Andrew Cooper
2013-07-16 11:32 ` Jan Beulich
0 siblings, 1 reply; 18+ messages in thread
From: Andrew Cooper @ 2013-07-16 11:15 UTC (permalink / raw)
To: Jan Beulich; +Cc: Keir Fraser, xiantao.zhang, xen-devel
[-- Attachment #1.1: Type: text/plain, Size: 7097 bytes --]
On 16/07/13 11:13, Jan Beulich wrote:
> The main change being to make alloc_remap_entry() capable of allocating
> a block of entries.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>
> --- a/xen/drivers/passthrough/vtd/intremap.c
> +++ b/xen/drivers/passthrough/vtd/intremap.c
> @@ -194,18 +194,18 @@ static void free_remap_entry(struct iomm
> }
>
> /*
> - * Look for a free intr remap entry.
> + * Look for a free intr remap entry (or a contiguous set thereof).
> * Need hold iremap_lock, and setup returned entry before releasing lock.
> */
> -static int alloc_remap_entry(struct iommu *iommu)
> +static unsigned int alloc_remap_entry(struct iommu *iommu, unsigned int nr)
alloc_remap_entries() now that it unconditionally takes a count (and you
already have to patch all callsites)
> {
> struct iremap_entry *iremap_entries = NULL;
> struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu);
> - int i;
> + unsigned int i, found;
>
> ASSERT( spin_is_locked(&ir_ctrl->iremap_lock) );
>
> - for ( i = 0; i < IREMAP_ENTRY_NR; i++ )
> + for ( found = i = 0; i < IREMAP_ENTRY_NR; i++ )
> {
> struct iremap_entry *p;
> if ( i % (1 << IREMAP_ENTRY_ORDER) == 0 )
> @@ -220,7 +220,9 @@ static int alloc_remap_entry(struct iomm
> else
> p = &iremap_entries[i % (1 << IREMAP_ENTRY_ORDER)];
>
> - if ( p->lo_val == 0 && p->hi_val == 0 ) /* a free entry */
> + if ( p->lo_val || p->hi_val ) /* not a free entry */
> + found = 0;
> + else if ( ++found == nr )
> break;
> }
>
> @@ -228,7 +230,7 @@ static int alloc_remap_entry(struct iomm
> unmap_vtd_domain_page(iremap_entries);
>
> if ( i < IREMAP_ENTRY_NR )
> - ir_ctrl->iremap_num++;
> + ir_ctrl->iremap_num += nr;
> return i;
> }
>
> @@ -293,7 +295,7 @@ static int ioapic_rte_to_remap_entry(str
> index = apic_pin_2_ir_idx[apic][ioapic_pin];
> if ( index < 0 )
> {
> - index = alloc_remap_entry(iommu);
> + index = alloc_remap_entry(iommu, 1);
> if ( index < IREMAP_ENTRY_NR )
> apic_pin_2_ir_idx[apic][ioapic_pin] = index;
> }
> @@ -485,19 +487,18 @@ static void set_msi_source_id(struct pci
> }
>
> static int remap_entry_to_msi_msg(
> - struct iommu *iommu, struct msi_msg *msg)
> + struct iommu *iommu, struct msi_msg *msg, unsigned int index)
> {
> struct iremap_entry *iremap_entry = NULL, *iremap_entries;
> struct msi_msg_remap_entry *remap_rte;
> - int index;
> unsigned long flags;
> struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu);
>
> remap_rte = (struct msi_msg_remap_entry *) msg;
> - index = (remap_rte->address_lo.index_15 << 15) |
> + index += (remap_rte->address_lo.index_15 << 15) |
> remap_rte->address_lo.index_0_14;
>
> - if ( index < 0 || index > IREMAP_ENTRY_NR - 1 )
> + if ( index >= IREMAP_ENTRY_NR )
> {
> dprintk(XENLOG_ERR VTDPREFIX,
> "%s: index (%d) for remap table is invalid !\n",
> @@ -555,31 +556,29 @@ static int msi_msg_to_remap_entry(
> struct iremap_entry *iremap_entry = NULL, *iremap_entries;
> struct iremap_entry new_ire;
> struct msi_msg_remap_entry *remap_rte;
> - int index;
> + unsigned int index, i, nr = 1;
Does this hardcoding of nr=1 defeat the purpose of the following logic?
~Andrew
> unsigned long flags;
> struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu);
>
> - remap_rte = (struct msi_msg_remap_entry *) msg;
> + if ( msi_desc->msi_attrib.type == PCI_CAP_ID_MSI )
> + nr = msi_desc->msi.nvec;
> +
> spin_lock_irqsave(&ir_ctrl->iremap_lock, flags);
>
> if ( msg == NULL )
> {
> - /* Free specified unused IRTE */
> - free_remap_entry(iommu, msi_desc->remap_index);
> + /* Free specified unused IRTEs */
> + for ( i = 0; i < nr; ++i )
> + free_remap_entry(iommu, msi_desc->remap_index + i);
> spin_unlock_irqrestore(&ir_ctrl->iremap_lock, flags);
> return 0;
> }
>
> if ( msi_desc->remap_index < 0 )
> {
> - /*
> - * TODO: Multiple-vector MSI requires allocating multiple continuous
> - * entries and configuring addr/data of msi_msg in different way. So
> - * alloca_remap_entry will be changed if enabling multiple-vector MSI
> - * in future.
> - */
> - index = alloc_remap_entry(iommu);
> - msi_desc->remap_index = index;
> + index = alloc_remap_entry(iommu, nr);
> + for ( i = 0; i < nr; ++i )
> + msi_desc[i].remap_index = index + i;
> }
> else
> index = msi_desc->remap_index;
> @@ -590,7 +589,8 @@ static int msi_msg_to_remap_entry(
> "%s: intremap index (%d) is larger than"
> " the maximum index (%d)!\n",
> __func__, index, IREMAP_ENTRY_NR - 1);
> - msi_desc->remap_index = -1;
> + for ( i = 0; i < nr; ++i )
> + msi_desc[i].remap_index = -1;
> spin_unlock_irqrestore(&ir_ctrl->iremap_lock, flags);
> return -EFAULT;
> }
> @@ -626,14 +626,18 @@ static int msi_msg_to_remap_entry(
> new_ire.lo.p = 1; /* finally, set present bit */
>
> /* now construct new MSI/MSI-X rte entry */
> + remap_rte = (struct msi_msg_remap_entry *)msg;
> remap_rte->address_lo.dontcare = 0;
> - remap_rte->address_lo.index_15 = (index >> 15) & 0x1;
> - remap_rte->address_lo.index_0_14 = index & 0x7fff;
> + i = index;
> + if ( !nr )
> + i -= msi_desc->msi_attrib.entry_nr;
> + remap_rte->address_lo.index_15 = (i >> 15) & 0x1;
> + remap_rte->address_lo.index_0_14 = i & 0x7fff;
> remap_rte->address_lo.SHV = 1;
> remap_rte->address_lo.format = 1;
>
> remap_rte->address_hi = 0;
> - remap_rte->data = 0;
> + remap_rte->data = index - i;
>
> memcpy(iremap_entry, &new_ire, sizeof(struct iremap_entry));
> iommu_flush_cache_entry(iremap_entry, sizeof(struct iremap_entry));
> @@ -654,7 +658,9 @@ void msi_msg_read_remap_rte(
> drhd = pdev ? acpi_find_matched_drhd_unit(pdev)
> : hpet_to_drhd(msi_desc->hpet_id);
> if ( drhd )
> - remap_entry_to_msi_msg(drhd->iommu, msg);
> + remap_entry_to_msi_msg(drhd->iommu, msg,
> + msi_desc->msi_attrib.type == PCI_CAP_ID_MSI
> + ? msi_desc->msi_attrib.entry_nr : 0);
> }
>
> int msi_msg_write_remap_rte(
> @@ -680,7 +686,7 @@ int __init intel_setup_hpet_msi(struct m
> return 0;
>
> spin_lock_irqsave(&ir_ctrl->iremap_lock, flags);
> - msi_desc->remap_index = alloc_remap_entry(iommu);
> + msi_desc->remap_index = alloc_remap_entry(iommu, 1);
> if ( msi_desc->remap_index >= IREMAP_ENTRY_NR )
> {
> dprintk(XENLOG_ERR VTDPREFIX,
>
>
>
>
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xen.org
> http://lists.xen.org/xen-devel
[-- Attachment #1.2: Type: text/html, Size: 8037 bytes --]
[-- Attachment #2: Type: text/plain, Size: 126 bytes --]
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH resend 3/3] pciif: add multi-vector-MSI command
2013-07-16 10:15 ` [PATCH resend 3/3] pciif: add multi-vector-MSI command Jan Beulich
@ 2013-07-16 11:19 ` Andrew Cooper
2013-07-16 11:35 ` Jan Beulich
2013-08-05 13:12 ` Ping: " Jan Beulich
2013-08-08 9:02 ` Keir Fraser
2 siblings, 1 reply; 18+ messages in thread
From: Andrew Cooper @ 2013-07-16 11:19 UTC (permalink / raw)
To: Jan Beulich; +Cc: Keir Fraser, xiantao.zhang, xen-devel
[-- Attachment #1.1: Type: text/plain, Size: 1074 bytes --]
On 16/07/13 11:15, Jan Beulich wrote:
> The requested vector count is to be passed in struct xen_pci_op's info
> field. Upon failure, if a smaller vector count might work, the backend
> will pass that smaller count in the value field (which so far is always
> being set to zero in the error path).
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>
> --- a/xen/include/public/io/pciif.h
> +++ b/xen/include/public/io/pciif.h
> @@ -46,6 +46,7 @@
> #define XEN_PCI_OP_aer_resume (7)
> #define XEN_PCI_OP_aer_mmio (8)
> #define XEN_PCI_OP_aer_slotreset (9)
> +#define XEN_PCI_OP_enable_multi_msi (10)
/* Be sure to bump this number if you change this file */
#define XEN_PCI_MAGIC "7"
Should you bump this version, or is the comment stale? The only in-tree
consumer I can find is MiniOS's pcifront, which writes it into xenstore.
~Andrew
>
> /* xen_pci_op error numbers */
> #define XEN_PCI_ERR_success (0)
>
>
>
>
>
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xen.org
> http://lists.xen.org/xen-devel
[-- Attachment #1.2: Type: text/html, Size: 1934 bytes --]
[-- Attachment #2: Type: text/plain, Size: 126 bytes --]
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH resend 1/3] VT-d: enable for multi-vector MSI
2013-07-16 11:15 ` Andrew Cooper
@ 2013-07-16 11:32 ` Jan Beulich
2013-07-17 9:50 ` Andrew Cooper
0 siblings, 1 reply; 18+ messages in thread
From: Jan Beulich @ 2013-07-16 11:32 UTC (permalink / raw)
To: Andrew Cooper; +Cc: Keir Fraser, xiantao.zhang, xen-devel
>>> On 16.07.13 at 13:15, Andrew Cooper <andrew.cooper3@citrix.com> wrote:
> On 16/07/13 11:13, Jan Beulich wrote:
>> The main change being to make alloc_remap_entry() capable of allocating
>> a block of entries.
>>
>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>>
>> --- a/xen/drivers/passthrough/vtd/intremap.c
>> +++ b/xen/drivers/passthrough/vtd/intremap.c
>> @@ -194,18 +194,18 @@ static void free_remap_entry(struct iomm
>> }
>>
>> /*
>> - * Look for a free intr remap entry.
>> + * Look for a free intr remap entry (or a contiguous set thereof).
>> * Need hold iremap_lock, and setup returned entry before releasing lock.
>> */
>> -static int alloc_remap_entry(struct iommu *iommu)
>> +static unsigned int alloc_remap_entry(struct iommu *iommu, unsigned int nr)
>
> alloc_remap_entries() now that it unconditionally takes a count (and you
> already have to patch all callsites)
Actually I checked with Linux, and the use singular in the function
name too (albeit the name isn't identical).
>> @@ -555,31 +556,29 @@ static int msi_msg_to_remap_entry(
>> struct iremap_entry *iremap_entry = NULL, *iremap_entries;
>> struct iremap_entry new_ire;
>> struct msi_msg_remap_entry *remap_rte;
>> - int index;
>> + unsigned int index, i, nr = 1;
>
> Does this hardcoding of nr=1 defeat the purpose of the following logic?
In what way?
>> unsigned long flags;
>> struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu);
>>
>> - remap_rte = (struct msi_msg_remap_entry *) msg;
>> + if ( msi_desc->msi_attrib.type == PCI_CAP_ID_MSI )
>> + nr = msi_desc->msi.nvec;
The logic here makes the vector count 1 for MSI-X and msi.nvec
for MSI.
Jan
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH resend 3/3] pciif: add multi-vector-MSI command
2013-07-16 11:19 ` Andrew Cooper
@ 2013-07-16 11:35 ` Jan Beulich
2013-07-17 9:02 ` Andrew Cooper
0 siblings, 1 reply; 18+ messages in thread
From: Jan Beulich @ 2013-07-16 11:35 UTC (permalink / raw)
To: Andrew Cooper; +Cc: Keir Fraser, xiantao.zhang, xen-devel
>>> On 16.07.13 at 13:19, Andrew Cooper <andrew.cooper3@citrix.com> wrote:
> On 16/07/13 11:15, Jan Beulich wrote:
>> The requested vector count is to be passed in struct xen_pci_op's info
>> field. Upon failure, if a smaller vector count might work, the backend
>> will pass that smaller count in the value field (which so far is always
>> being set to zero in the error path).
>>
>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>>
>> --- a/xen/include/public/io/pciif.h
>> +++ b/xen/include/public/io/pciif.h
>> @@ -46,6 +46,7 @@
>> #define XEN_PCI_OP_aer_resume (7)
>> #define XEN_PCI_OP_aer_mmio (8)
>> #define XEN_PCI_OP_aer_slotreset (9)
>> +#define XEN_PCI_OP_enable_multi_msi (10)
>
> /* Be sure to bump this number if you change this file */
> #define XEN_PCI_MAGIC "7"
>
> Should you bump this version, or is the comment stale? The only in-tree
> consumer I can find is MiniOS's pcifront, which writes it into xenstore.
Whether it's stale I don't know (likely it is considering that you
found just a single consumer), but bumping a revision just
because of the (backwards compatible) addition seems
superfluous to me. This would be different if I changed the
existing enable_msi...
Jan
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH resend 2/3] x86: enable multi-vector MSI
2013-07-16 10:14 ` [PATCH resend 2/3] x86: enable " Jan Beulich
@ 2013-07-16 11:36 ` Andrew Cooper
2013-07-16 11:48 ` Jan Beulich
2013-08-05 13:11 ` Ping: " Jan Beulich
2013-08-08 9:02 ` Keir Fraser
2 siblings, 1 reply; 18+ messages in thread
From: Andrew Cooper @ 2013-07-16 11:36 UTC (permalink / raw)
To: Jan Beulich; +Cc: Keir Fraser, xiantao.zhang, xen-devel
[-- Attachment #1.1: Type: text/plain, Size: 24573 bytes --]
On 16/07/13 11:14, Jan Beulich wrote:
> This implies
> - extending the public interface to have a way to request a block of
> MSIs
> - allocating a block of contiguous pIRQ-s for the target domain (but
> note that the Xen IRQs allocated have no need of being contiguous)
> - repeating certain operations for all involved IRQs
> - fixing multi_msi_enable()
> - adjusting the mask bit accesses for maskable MSIs
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>
> --- a/xen/arch/x86/irq.c
> +++ b/xen/arch/x86/irq.c
> @@ -1863,6 +1863,25 @@ int get_free_pirq(struct domain *d, int
> return -ENOSPC;
> }
>
> +int get_free_pirqs(struct domain *d, unsigned int nr)
> +{
> + unsigned int i, found = 0;
> +
> + ASSERT(spin_is_locked(&d->event_lock));
> +
> + for ( i = d->nr_pirqs - 1; i >= nr_irqs_gsi; --i )
> + if ( is_free_pirq(d, pirq_info(d, i)) )
> + {
> + pirq_get_info(d, i);
> + if ( ++found == nr )
> + return i;
> + }
> + else
> + found = 0;
> +
> + return -ENOSPC;
> +}
> +
Is there any reason why this loop is backwards? Unless I am mistaken,
guests can choose their own pirqs when binding them, reducing the
likelyhood that the top of the available space will be free.
> int map_domain_pirq(
> struct domain *d, int pirq, int irq, int type, void *data)
> {
> @@ -1918,11 +1937,12 @@ int map_domain_pirq(
>
> desc = irq_to_desc(irq);
>
> - if ( type == MAP_PIRQ_TYPE_MSI )
> + if ( type == MAP_PIRQ_TYPE_MSI || type == MAP_PIRQ_TYPE_MULTI_MSI )
> {
> struct msi_info *msi = (struct msi_info *)data;
> struct msi_desc *msi_desc;
> struct pci_dev *pdev;
> + unsigned int nr = 0;
>
> ASSERT(spin_is_locked(&pcidevs_lock));
>
> @@ -1933,7 +1953,14 @@ int map_domain_pirq(
> pdev = pci_get_pdev(msi->seg, msi->bus, msi->devfn);
> ret = pci_enable_msi(msi, &msi_desc);
> if ( ret )
> + {
> + if ( ret > 0 )
> + {
> + msi->entry_nr = ret;
> + ret = -ENFILE;
> + }
> goto done;
> + }
>
> spin_lock_irqsave(&desc->lock, flags);
>
> @@ -1947,25 +1974,73 @@ int map_domain_pirq(
> goto done;
> }
>
> - ret = setup_msi_irq(desc, msi_desc);
> - if ( ret )
> + while ( !(ret = setup_msi_irq(desc, msi_desc + nr)) )
> {
> + if ( opt_irq_vector_map == OPT_IRQ_VECTOR_MAP_PERDEV &&
> + !desc->arch.used_vectors )
> + {
> + desc->arch.used_vectors = &pdev->arch.used_vectors;
> + if ( desc->arch.vector != IRQ_VECTOR_UNASSIGNED )
> + {
> + int vector = desc->arch.vector;
> +
> + ASSERT(!test_bit(vector, desc->arch.used_vectors));
> + set_bit(vector, desc->arch.used_vectors);
> + }
> + }
> + if ( type == MAP_PIRQ_TYPE_MSI ||
> + msi_desc->msi_attrib.type != PCI_CAP_ID_MSI ||
> + ++nr == msi->entry_nr )
> + break;
> +
> + set_domain_irq_pirq(d, irq, info);
> spin_unlock_irqrestore(&desc->lock, flags);
> - pci_disable_msi(msi_desc);
> - goto done;
> +
> + info = NULL;
> + irq = create_irq(NUMA_NO_NODE);
> + ret = irq >= 0 ? prepare_domain_irq_pirq(d, irq, pirq + nr, &info)
> + : irq;
> + if ( ret )
> + break;
> + msi_desc[nr].irq = irq;
> +
> + if ( irq_permit_access(d, irq) != 0 )
> + printk(XENLOG_G_WARNING
> + "dom%d: could not permit access to IRQ%d (pirq %d)\n",
> + d->domain_id, irq, pirq);
> +
> + desc = irq_to_desc(irq);
> + spin_lock_irqsave(&desc->lock, flags);
> +
> + if ( desc->handler != &no_irq_type )
> + {
> + dprintk(XENLOG_G_ERR, "dom%d: irq %d (pirq %u) in use (%s)\n",
> + d->domain_id, irq, pirq + nr, desc->handler->typename);
> + ret = -EBUSY;
> + break;
> + }
> }
>
> - if ( opt_irq_vector_map == OPT_IRQ_VECTOR_MAP_PERDEV
> - && !desc->arch.used_vectors )
> + if ( ret )
> {
> - desc->arch.used_vectors = &pdev->arch.used_vectors;
> - if ( desc->arch.vector != IRQ_VECTOR_UNASSIGNED )
> + spin_unlock_irqrestore(&desc->lock, flags);
> + while ( nr-- )
> {
> - int vector = desc->arch.vector;
> - ASSERT(!test_bit(vector, desc->arch.used_vectors));
> -
> - set_bit(vector, desc->arch.used_vectors);
> + if ( irq >= 0 )
> + {
> + if ( irq_deny_access(d, irq) )
> + printk(XENLOG_G_ERR
> + "dom%d: could not revoke access to IRQ%d (pirq %d)\n",
> + d->domain_id, irq, pirq);
> + destroy_irq(irq);
> + }
> + if ( info )
> + cleanup_domain_irq_pirq(d, irq, info);
> + info = pirq_info(d, pirq + nr);
> + irq = info->arch.irq;
> }
> + pci_disable_msi(msi_desc);
> + goto done;
> }
>
> set_domain_irq_pirq(d, irq, info);
> @@ -1996,7 +2071,8 @@ int unmap_domain_pirq(struct domain *d,
> {
> unsigned long flags;
> struct irq_desc *desc;
> - int irq, ret = 0;
> + int irq, ret = 0, rc;
> + unsigned int i, nr = 1;
> bool_t forced_unbind;
> struct pirq *info;
> struct msi_desc *msi_desc = NULL;
> @@ -2018,6 +2094,18 @@ int unmap_domain_pirq(struct domain *d,
>
> desc = irq_to_desc(irq);
> msi_desc = desc->msi_desc;
> + if ( msi_desc && msi_desc->msi_attrib.type == PCI_CAP_ID_MSI )
> + {
> + if ( msi_desc->msi_attrib.entry_nr )
> + {
> + printk(XENLOG_G_ERR
> + "dom%d: trying to unmap secondary MSI pirq %d\n",
> + d->domain_id, pirq);
> + ret = -EBUSY;
> + goto done;
> + }
> + nr = msi_desc->msi.nvec;
> + }
>
> ret = xsm_unmap_domain_irq(XSM_HOOK, d, irq, msi_desc);
> if ( ret )
> @@ -2033,37 +2121,83 @@ int unmap_domain_pirq(struct domain *d,
>
> spin_lock_irqsave(&desc->lock, flags);
>
> - BUG_ON(irq != domain_pirq_to_irq(d, pirq));
> -
> - if ( !forced_unbind )
> - clear_domain_irq_pirq(d, irq, info);
> - else
> + for ( i = 0; ; )
> {
> - info->arch.irq = -irq;
> - radix_tree_replace_slot(
> - radix_tree_lookup_slot(&d->arch.irq_pirq, irq),
> - radix_tree_int_to_ptr(-pirq));
> + BUG_ON(irq != domain_pirq_to_irq(d, pirq + i));
> +
> + if ( !forced_unbind )
> + clear_domain_irq_pirq(d, irq, info);
> + else
> + {
> + info->arch.irq = -irq;
> + radix_tree_replace_slot(
> + radix_tree_lookup_slot(&d->arch.irq_pirq, irq),
> + radix_tree_int_to_ptr(-pirq));
> + }
> +
> + if ( msi_desc )
> + {
> + desc->handler = &no_irq_type;
> + desc->msi_desc = NULL;
> + }
> +
> + if ( ++i == nr )
> + break;
> +
> + spin_unlock_irqrestore(&desc->lock, flags);
> +
> + if ( !forced_unbind )
> + cleanup_domain_irq_pirq(d, irq, info);
> +
> + rc = irq_deny_access(d, irq);
> + if ( rc )
> + {
> + printk(XENLOG_G_ERR
> + "dom%d: could not deny access to IRQ%d (pirq %d)\n",
> + d->domain_id, irq, pirq + i);
> + ret = rc;
> + }
> +
> + do {
> + info = pirq_info(d, pirq + i);
> + if ( info && (irq = info->arch.irq) > 0 )
> + break;
> + printk(XENLOG_G_ERR "dom%d: MSI pirq %d not mapped\n",
> + d->domain_id, pirq + i);
> + } while ( ++i < nr );
> +
> + if ( i == nr )
> + {
> + desc = NULL;
> + break;
> + }
> +
> + desc = irq_to_desc(irq);
> + BUG_ON(desc->msi_desc != msi_desc + i);
> +
> + spin_lock_irqsave(&desc->lock, flags);
> }
>
> - if ( msi_desc )
> + if ( desc )
> {
> - desc->handler = &no_irq_type;
> - desc->msi_desc = NULL;
> + spin_unlock_irqrestore(&desc->lock, flags);
> +
> + if ( !forced_unbind )
> + cleanup_domain_irq_pirq(d, irq, info);
> +
> + rc = irq_deny_access(d, irq);
> + if ( rc )
> + {
> + printk(XENLOG_G_ERR
> + "dom%d: could not deny access to IRQ%d (pirq %d)\n",
> + d->domain_id, irq, pirq + nr - 1);
> + ret = rc;
> + }
> }
>
> - spin_unlock_irqrestore(&desc->lock, flags);
> if (msi_desc)
> msi_free_irq(msi_desc);
>
> - if ( !forced_unbind )
> - cleanup_domain_irq_pirq(d, irq, info);
> -
> - ret = irq_deny_access(d, irq);
> - if ( ret )
> - printk(XENLOG_G_ERR
> - "dom%d: could not deny access to IRQ%d (pirq %d)\n",
> - d->domain_id, irq, pirq);
> -
> done:
> return ret;
> }
> --- a/xen/arch/x86/msi.c
> +++ b/xen/arch/x86/msi.c
> @@ -236,6 +236,11 @@ static int write_msi_msg(struct msi_desc
> u8 bus = dev->bus;
> u8 slot = PCI_SLOT(dev->devfn);
> u8 func = PCI_FUNC(dev->devfn);
> + int nr = entry->msi_attrib.entry_nr;
> +
> + ASSERT((msg->data & (entry[-nr].msi.nvec - 1)) == nr);
> + if ( nr )
> + return 0;
>
> pci_conf_write32(seg, bus, slot, func, msi_lower_address_reg(pos),
> msg->address_lo);
> @@ -359,8 +364,8 @@ static void msi_set_mask_bit(struct irq_
> u8 func = PCI_FUNC(entry->dev->devfn);
>
> mask_bits = pci_conf_read32(seg, bus, slot, func, entry->msi.mpos);
> - mask_bits &= ~(1);
> - mask_bits |= flag;
> + mask_bits &= ~((u32)1 << entry->msi_attrib.entry_nr);
> + mask_bits |= (u32)flag << entry->msi_attrib.entry_nr;
> pci_conf_write32(seg, bus, slot, func, entry->msi.mpos, mask_bits);
> }
> break;
> @@ -384,10 +389,11 @@ static int msi_get_mask_bit(const struct
> case PCI_CAP_ID_MSI:
> if (!entry->dev || !entry->msi_attrib.maskbit)
> break;
> - return pci_conf_read32(entry->dev->seg, entry->dev->bus,
> - PCI_SLOT(entry->dev->devfn),
> - PCI_FUNC(entry->dev->devfn),
> - entry->msi.mpos) & 1;
> + return (pci_conf_read32(entry->dev->seg, entry->dev->bus,
> + PCI_SLOT(entry->dev->devfn),
> + PCI_FUNC(entry->dev->devfn),
> + entry->msi.mpos) >>
> + entry->msi_attrib.entry_nr) & 1;
> case PCI_CAP_ID_MSIX:
> return readl(entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET) & 1;
> }
> @@ -453,17 +459,20 @@ static hw_irq_controller pci_msi_nonmask
> .set_affinity = set_msi_affinity
> };
>
> -static struct msi_desc* alloc_msi_entry(void)
> +static struct msi_desc *alloc_msi_entry(unsigned int nr)
> {
> struct msi_desc *entry;
>
> - entry = xmalloc(struct msi_desc);
> + entry = xmalloc_array(struct msi_desc, nr);
> if ( !entry )
> return NULL;
>
> INIT_LIST_HEAD(&entry->list);
> - entry->dev = NULL;
> - entry->remap_index = -1;
> + while ( nr-- )
> + {
> + entry[nr].dev = NULL;
> + entry[nr].remap_index = -1;
> + }
>
> return entry;
> }
> @@ -488,17 +497,24 @@ int __setup_msi_irq(struct irq_desc *des
>
> int msi_free_irq(struct msi_desc *entry)
> {
> - destroy_irq(entry->irq);
> + unsigned int nr = entry->msi.nvec;
> +
> if ( entry->msi_attrib.type == PCI_CAP_ID_MSIX )
> {
> unsigned long start;
> start = (unsigned long)entry->mask_base & ~(PAGE_SIZE - 1);
> msix_put_fixmap(entry->dev, virt_to_fix(start));
> + nr = 1;
> }
>
> - /* Free the unused IRTE if intr remap enabled */
> - if ( iommu_intremap )
> - iommu_update_ire_from_msi(entry, NULL);
> + while ( nr-- )
> + {
> + destroy_irq(entry[nr].irq);
> +
> + /* Free the unused IRTE if intr remap enabled */
> + if ( iommu_intremap )
> + iommu_update_ire_from_msi(entry + nr, NULL);
> + }
>
> list_del(&entry->list);
> xfree(entry);
> @@ -531,11 +547,12 @@ static struct msi_desc *find_msi_entry(s
> **/
> static int msi_capability_init(struct pci_dev *dev,
> int irq,
> - struct msi_desc **desc)
> + struct msi_desc **desc,
> + unsigned int nvec)
> {
> struct msi_desc *entry;
> int pos;
> - unsigned int maxvec, mpos;
> + unsigned int i, maxvec, mpos;
> u16 control, seg = dev->seg;
> u8 bus = dev->bus;
> u8 slot = PCI_SLOT(dev->devfn);
> @@ -545,27 +562,34 @@ static int msi_capability_init(struct pc
> pos = pci_find_cap_offset(seg, bus, slot, func, PCI_CAP_ID_MSI);
> control = pci_conf_read16(seg, bus, slot, func, msi_control_reg(pos));
> maxvec = multi_msi_capable(control);
> + if ( nvec > maxvec )
> + return maxvec;
> control &= ~PCI_MSI_FLAGS_QSIZE;
> + multi_msi_enable(control, nvec);
>
> /* MSI Entry Initialization */
> msi_set_enable(dev, 0); /* Ensure msi is disabled as I set it up */
>
> - entry = alloc_msi_entry();
> + entry = alloc_msi_entry(nvec);
> if ( !entry )
> return -ENOMEM;
>
> - entry->msi_attrib.type = PCI_CAP_ID_MSI;
> - entry->msi_attrib.is_64 = is_64bit_address(control);
> - entry->msi_attrib.entry_nr = 0;
> - entry->msi_attrib.maskbit = is_mask_bit_support(control);
> - entry->msi_attrib.masked = 1;
> - entry->msi_attrib.pos = pos;
> mpos = msi_mask_bits_reg(pos, is_64bit_address(control));
> - entry->msi.nvec = 1;
> + for ( i = 0; i < nvec; ++i )
> + {
> + entry[i].msi_attrib.type = PCI_CAP_ID_MSI;
> + entry[i].msi_attrib.is_64 = is_64bit_address(control);
> + entry[i].msi_attrib.entry_nr = i;
> + entry[i].msi_attrib.maskbit = is_mask_bit_support(control);
> + entry[i].msi_attrib.masked = 1;
> + entry[i].msi_attrib.pos = pos;
> + if ( entry[i].msi_attrib.maskbit )
> + entry[i].msi.mpos = mpos;
> + entry[i].msi.nvec = 0;
> + entry[i].dev = dev;
> + }
> + entry->msi.nvec = nvec;
> entry->irq = irq;
> - if ( is_mask_bit_support(control) )
> - entry->msi.mpos = mpos;
> - entry->dev = dev;
> if ( entry->msi_attrib.maskbit )
> {
> u32 maskbits;
> @@ -693,7 +717,7 @@ static int msix_capability_init(struct p
>
> if ( desc )
> {
> - entry = alloc_msi_entry();
> + entry = alloc_msi_entry(1);
> if ( !entry )
> return -ENOMEM;
> ASSERT(msi);
> @@ -851,7 +875,6 @@ static int msix_capability_init(struct p
>
> static int __pci_enable_msi(struct msi_info *msi, struct msi_desc **desc)
> {
> - int status;
> struct pci_dev *pdev;
> struct msi_desc *old_desc;
>
> @@ -880,8 +903,7 @@ static int __pci_enable_msi(struct msi_i
> pci_disable_msi(old_desc);
> }
>
> - status = msi_capability_init(pdev, msi->irq, desc);
> - return status;
> + return msi_capability_init(pdev, msi->irq, desc, msi->entry_nr);
> }
>
> static void __pci_disable_msi(struct msi_desc *entry)
> @@ -1101,6 +1123,8 @@ int pci_restore_msi_state(struct pci_dev
>
> list_for_each_entry_safe( entry, tmp, &pdev->msi_list, list )
> {
> + unsigned int i = 0, nr = 1;
> +
> irq = entry->irq;
> desc = &irq_desc[irq];
>
> @@ -1110,30 +1134,58 @@ int pci_restore_msi_state(struct pci_dev
>
> if (desc->msi_desc != entry)
> {
> + bogus:
> dprintk(XENLOG_ERR,
> - "Restore MSI for dev %04x:%02x:%02x:%x not set before?\n",
> + "Restore MSI for %04x:%02x:%02x:%u entry %u not set?\n",
> pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn),
> - PCI_FUNC(pdev->devfn));
> + PCI_FUNC(pdev->devfn), i);
> spin_unlock_irqrestore(&desc->lock, flags);
> return -EINVAL;
> }
>
> if ( entry->msi_attrib.type == PCI_CAP_ID_MSI )
> + {
> msi_set_enable(pdev, 0);
> + nr = entry->msi.nvec;
> + }
> else if ( entry->msi_attrib.type == PCI_CAP_ID_MSIX )
> msix_set_enable(pdev, 0);
>
> msg = entry->msg;
> write_msi_msg(entry, &msg);
>
> - msi_set_mask_bit(desc, entry->msi_attrib.masked);
> + for ( i = 0; ; )
> + {
> + msi_set_mask_bit(desc, entry[i].msi_attrib.masked);
> + spin_unlock_irqrestore(&desc->lock, flags);
> +
> + if ( !--nr )
> + break;
> +
> + desc = &irq_desc[entry[++i].irq];
> + spin_lock_irqsave(&desc->lock, flags);
> + if ( desc->msi_desc != entry + i )
> + goto bogus;
> + }
> +
> + spin_unlock_irqrestore(&desc->lock, flags);
>
> if ( entry->msi_attrib.type == PCI_CAP_ID_MSI )
> + {
> + unsigned int cpos = msi_control_reg(entry->msi_attrib.pos);
> + u16 control = pci_conf_read16(pdev->seg, pdev->bus,
> + PCI_SLOT(pdev->devfn),
> + PCI_FUNC(pdev->devfn), cpos);
> +
> + control &= ~PCI_MSI_FLAGS_QSIZE;
> + multi_msi_enable(control, entry->msi.nvec);
> + pci_conf_write16(pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn),
> + PCI_FUNC(pdev->devfn), cpos, control);
> +
> msi_set_enable(pdev, 1);
> + }
> else if ( entry->msi_attrib.type == PCI_CAP_ID_MSIX )
> msix_set_enable(pdev, 1);
> -
> - spin_unlock_irqrestore(&desc->lock, flags);
> }
>
> return 0;
> --- a/xen/arch/x86/physdev.c
> +++ b/xen/arch/x86/physdev.c
> @@ -140,8 +140,11 @@ int physdev_map_pirq(domid_t domid, int
> break;
>
> case MAP_PIRQ_TYPE_MSI:
> + if ( !msi->table_base )
> + msi->entry_nr = 1;
> irq = *index;
> if ( irq == -1 )
> + case MAP_PIRQ_TYPE_MULTI_MSI:
> irq = create_irq(NUMA_NO_NODE);
>
> if ( irq < nr_irqs_gsi || irq >= nr_irqs )
> @@ -179,6 +182,30 @@ int physdev_map_pirq(domid_t domid, int
> goto done;
> }
> }
> + else if ( type == MAP_PIRQ_TYPE_MULTI_MSI )
> + {
> + if ( msi->entry_nr <= 0 || msi->entry_nr > 32 )
> + ret = -EDOM;
> + else if ( msi->entry_nr != 1 && !iommu_intremap )
> + ret = -EOPNOTSUPP;
> + else
> + {
> + while ( msi->entry_nr & (msi->entry_nr - 1) )
> + msi->entry_nr += msi->entry_nr & -msi->entry_nr;
> + pirq = get_free_pirqs(d, msi->entry_nr);
> + if ( pirq < 0 )
> + {
> + while ( (msi->entry_nr >>= 1) > 1 )
> + if ( get_free_pirqs(d, msi->entry_nr) > 0 )
> + break;
> + dprintk(XENLOG_G_ERR, "dom%d: no block of %d free pirqs\n",
> + d->domain_id, msi->entry_nr << 1);
> + ret = pirq;
> + }
> + }
> + if ( ret < 0 )
> + goto done;
> + }
> else
> {
> pirq = get_free_pirq(d, type);
> @@ -210,8 +237,15 @@ int physdev_map_pirq(domid_t domid, int
> done:
> spin_unlock(&d->event_lock);
> spin_unlock(&pcidevs_lock);
> - if ( (ret != 0) && (type == MAP_PIRQ_TYPE_MSI) && (*index == -1) )
> - destroy_irq(irq);
> + if ( ret != 0 )
> + switch ( type )
> + {
> + case MAP_PIRQ_TYPE_MSI:
> + if ( *index == -1 )
> + case MAP_PIRQ_TYPE_MULTI_MSI:
> + destroy_irq(irq);
> + break;
> + }
Do we not need to create and destroy entry_nr irqs in this function, or
is a multi-vector-msi now considered as just as single irq ?
I ask because this appears to lack the "repeating certain operations for
all involved IRQs" described in the comment.
~Andrew
> free_domain:
> rcu_unlock_domain(d);
> return ret;
> @@ -390,14 +424,22 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H
> if ( copy_from_guest(&map, arg, 1) != 0 )
> break;
>
> - if ( map.type == MAP_PIRQ_TYPE_MSI_SEG )
> + switch ( map.type )
> {
> + case MAP_PIRQ_TYPE_MSI_SEG:
> map.type = MAP_PIRQ_TYPE_MSI;
> msi.seg = map.bus >> 16;
> - }
> - else
> - {
> + break;
> +
> + case MAP_PIRQ_TYPE_MULTI_MSI:
> + if ( map.table_base )
> + return -EINVAL;
> + msi.seg = map.bus >> 16;
> + break;
> +
> + default:
> msi.seg = 0;
> + break;
> }
> msi.bus = map.bus;
> msi.devfn = map.devfn;
> @@ -406,6 +448,8 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H
> ret = physdev_map_pirq(map.domid, map.type, &map.index, &map.pirq,
> &msi);
>
> + if ( map.type == MAP_PIRQ_TYPE_MULTI_MSI )
> + map.entry_nr = msi.entry_nr;
> if ( __copy_to_guest(arg, &map, 1) )
> ret = -EFAULT;
> break;
> --- a/xen/include/asm-x86/irq.h
> +++ b/xen/include/asm-x86/irq.h
> @@ -141,6 +141,7 @@ int map_domain_pirq(struct domain *d, in
> void *data);
> int unmap_domain_pirq(struct domain *d, int pirq);
> int get_free_pirq(struct domain *d, int type);
> +int get_free_pirqs(struct domain *, unsigned int nr);
> void free_domain_pirqs(struct domain *d);
> int map_domain_emuirq_pirq(struct domain *d, int pirq, int irq);
> int unmap_domain_pirq_emuirq(struct domain *d, int pirq);
> --- a/xen/include/asm-x86/msi.h
> +++ b/xen/include/asm-x86/msi.h
> @@ -148,7 +148,7 @@ int msi_free_irq(struct msi_desc *entry)
> #define multi_msi_capable(control) \
> (1 << ((control & PCI_MSI_FLAGS_QMASK) >> 1))
> #define multi_msi_enable(control, num) \
> - control |= (((num >> 1) << 4) & PCI_MSI_FLAGS_QSIZE);
> + control |= (((fls(num) - 1) << 4) & PCI_MSI_FLAGS_QSIZE);
> #define is_64bit_address(control) (!!(control & PCI_MSI_FLAGS_64BIT))
> #define is_mask_bit_support(control) (!!(control & PCI_MSI_FLAGS_MASKBIT))
> #define msi_enable(control, num) multi_msi_enable(control, num); \
> --- a/xen/include/public/physdev.h
> +++ b/xen/include/public/physdev.h
> @@ -151,21 +151,22 @@ DEFINE_XEN_GUEST_HANDLE(physdev_irq_t);
> #define MAP_PIRQ_TYPE_GSI 0x1
> #define MAP_PIRQ_TYPE_UNKNOWN 0x2
> #define MAP_PIRQ_TYPE_MSI_SEG 0x3
> +#define MAP_PIRQ_TYPE_MULTI_MSI 0x4
>
> #define PHYSDEVOP_map_pirq 13
> struct physdev_map_pirq {
> domid_t domid;
> /* IN */
> int type;
> - /* IN */
> + /* IN (ignored for ..._MULTI_MSI) */
> int index;
> /* IN or OUT */
> int pirq;
> - /* IN - high 16 bits hold segment for MAP_PIRQ_TYPE_MSI_SEG */
> + /* IN - high 16 bits hold segment for ..._MSI_SEG and ..._MULTI_MSI */
> int bus;
> /* IN */
> int devfn;
> - /* IN */
> + /* IN (also OUT for ..._MULTI_MSI) */
> int entry_nr;
> /* IN */
> uint64_t table_base;
>
>
>
>
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xen.org
> http://lists.xen.org/xen-devel
[-- Attachment #1.2: Type: text/html, Size: 25128 bytes --]
[-- Attachment #2: Type: text/plain, Size: 126 bytes --]
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH resend 2/3] x86: enable multi-vector MSI
2013-07-16 11:36 ` Andrew Cooper
@ 2013-07-16 11:48 ` Jan Beulich
2013-07-17 10:02 ` Andrew Cooper
0 siblings, 1 reply; 18+ messages in thread
From: Jan Beulich @ 2013-07-16 11:48 UTC (permalink / raw)
To: Andrew Cooper; +Cc: Keir Fraser, xiantao.zhang, xen-devel
>>> On 16.07.13 at 13:36, Andrew Cooper <andrew.cooper3@citrix.com> wrote:
> On 16/07/13 11:14, Jan Beulich wrote:
>> +int get_free_pirqs(struct domain *d, unsigned int nr)
>> +{
>> + unsigned int i, found = 0;
>> +
>> + ASSERT(spin_is_locked(&d->event_lock));
>> +
>> + for ( i = d->nr_pirqs - 1; i >= nr_irqs_gsi; --i )
>> + if ( is_free_pirq(d, pirq_info(d, i)) )
>> + {
>> + pirq_get_info(d, i);
>> + if ( ++found == nr )
>> + return i;
>> + }
>> + else
>> + found = 0;
>> +
>> + return -ENOSPC;
>> +}
>> +
>
> Is there any reason why this loop is backwards? Unless I am mistaken,
> guests can choose their own pirqs when binding them, reducing the
> likelyhood that the top of the available space will be free.
This just follows the behavior of get_free_pirq(). I'm not up to
having the two behave differently.
>> @@ -210,8 +237,15 @@ int physdev_map_pirq(domid_t domid, int
>> done:
>> spin_unlock(&d->event_lock);
>> spin_unlock(&pcidevs_lock);
>> - if ( (ret != 0) && (type == MAP_PIRQ_TYPE_MSI) && (*index == -1) )
>> - destroy_irq(irq);
>> + if ( ret != 0 )
>> + switch ( type )
>> + {
>> + case MAP_PIRQ_TYPE_MSI:
>> + if ( *index == -1 )
>> + case MAP_PIRQ_TYPE_MULTI_MSI:
>> + destroy_irq(irq);
>> + break;
>> + }
>
> Do we not need to create and destroy entry_nr irqs in this function, or
> is a multi-vector-msi now considered as just as single irq ?
>
> I ask because this appears to lack the "repeating certain operations for
> all involved IRQs" described in the comment.
No, there's a single create_irq() in the function, and having a single
destroy_irq() here matches this. The remaining ones (both!) are in
map_domain_pirq().
Also, as a general remark, asking for changes in a series that was
posted 2.5 months ago (and deferred just because of the 4.3
release process) seems a little strange to me. I had to repost
merely to collect ack-s, and didn't really expect further requests
for adjustments as there was ample time before to do so.
Jan
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH resend 3/3] pciif: add multi-vector-MSI command
2013-07-16 11:35 ` Jan Beulich
@ 2013-07-17 9:02 ` Andrew Cooper
0 siblings, 0 replies; 18+ messages in thread
From: Andrew Cooper @ 2013-07-17 9:02 UTC (permalink / raw)
To: Jan Beulich; +Cc: Keir Fraser, xiantao.zhang, xen-devel
On 16/07/13 12:35, Jan Beulich wrote:
>>>> On 16.07.13 at 13:19, Andrew Cooper <andrew.cooper3@citrix.com> wrote:
>> On 16/07/13 11:15, Jan Beulich wrote:
>>> The requested vector count is to be passed in struct xen_pci_op's info
>>> field. Upon failure, if a smaller vector count might work, the backend
>>> will pass that smaller count in the value field (which so far is always
>>> being set to zero in the error path).
>>>
>>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>>>
>>> --- a/xen/include/public/io/pciif.h
>>> +++ b/xen/include/public/io/pciif.h
>>> @@ -46,6 +46,7 @@
>>> #define XEN_PCI_OP_aer_resume (7)
>>> #define XEN_PCI_OP_aer_mmio (8)
>>> #define XEN_PCI_OP_aer_slotreset (9)
>>> +#define XEN_PCI_OP_enable_multi_msi (10)
>> /* Be sure to bump this number if you change this file */
>> #define XEN_PCI_MAGIC "7"
>>
>> Should you bump this version, or is the comment stale? The only in-tree
>> consumer I can find is MiniOS's pcifront, which writes it into xenstore.
> Whether it's stale I don't know (likely it is considering that you
> found just a single consumer), but bumping a revision just
> because of the (backwards compatible) addition seems
> superfluous to me. This would be different if I changed the
> existing enable_msi...
>
> Jan
>
I suspected that was the answer, but just wanted to check that it hadn't
been overlooked,
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH resend 1/3] VT-d: enable for multi-vector MSI
2013-07-16 11:32 ` Jan Beulich
@ 2013-07-17 9:50 ` Andrew Cooper
2013-07-18 10:48 ` Zhang, Xiantao
0 siblings, 1 reply; 18+ messages in thread
From: Andrew Cooper @ 2013-07-17 9:50 UTC (permalink / raw)
To: Jan Beulich; +Cc: Keir Fraser, xiantao.zhang, xen-devel
On 16/07/13 12:32, Jan Beulich wrote:
>>>> On 16.07.13 at 13:15, Andrew Cooper <andrew.cooper3@citrix.com> wrote:
>> On 16/07/13 11:13, Jan Beulich wrote:
>>> The main change being to make alloc_remap_entry() capable of allocating
>>> a block of entries.
>>>
>>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>>>
>>> --- a/xen/drivers/passthrough/vtd/intremap.c
>>> +++ b/xen/drivers/passthrough/vtd/intremap.c
>>> @@ -194,18 +194,18 @@ static void free_remap_entry(struct iomm
>>> }
>>>
>>> /*
>>> - * Look for a free intr remap entry.
>>> + * Look for a free intr remap entry (or a contiguous set thereof).
>>> * Need hold iremap_lock, and setup returned entry before releasing lock.
>>> */
>>> -static int alloc_remap_entry(struct iommu *iommu)
>>> +static unsigned int alloc_remap_entry(struct iommu *iommu, unsigned int nr)
>> alloc_remap_entries() now that it unconditionally takes a count (and you
>> already have to patch all callsites)
> Actually I checked with Linux, and the use singular in the function
> name too (albeit the name isn't identical).
>
>>> @@ -555,31 +556,29 @@ static int msi_msg_to_remap_entry(
>>> struct iremap_entry *iremap_entry = NULL, *iremap_entries;
>>> struct iremap_entry new_ire;
>>> struct msi_msg_remap_entry *remap_rte;
>>> - int index;
>>> + unsigned int index, i, nr = 1;
>> Does this hardcoding of nr=1 defeat the purpose of the following logic?
> In what way?
>
>>> unsigned long flags;
>>> struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu);
>>>
>>> - remap_rte = (struct msi_msg_remap_entry *) msg;
>>> + if ( msi_desc->msi_attrib.type == PCI_CAP_ID_MSI )
>>> + nr = msi_desc->msi.nvec;
> The logic here makes the vector count 1 for MSI-X and msi.nvec
> for MSI.
>
> Jan
>
Ah yes - I see now.
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH resend 2/3] x86: enable multi-vector MSI
2013-07-16 11:48 ` Jan Beulich
@ 2013-07-17 10:02 ` Andrew Cooper
0 siblings, 0 replies; 18+ messages in thread
From: Andrew Cooper @ 2013-07-17 10:02 UTC (permalink / raw)
To: Jan Beulich; +Cc: Keir Fraser, xiantao.zhang, xen-devel
On 16/07/13 12:48, Jan Beulich wrote:
>>>> On 16.07.13 at 13:36, Andrew Cooper <andrew.cooper3@citrix.com> wrote:
>> On 16/07/13 11:14, Jan Beulich wrote:
>>> +int get_free_pirqs(struct domain *d, unsigned int nr)
>>> +{
>>> + unsigned int i, found = 0;
>>> +
>>> + ASSERT(spin_is_locked(&d->event_lock));
>>> +
>>> + for ( i = d->nr_pirqs - 1; i >= nr_irqs_gsi; --i )
>>> + if ( is_free_pirq(d, pirq_info(d, i)) )
>>> + {
>>> + pirq_get_info(d, i);
>>> + if ( ++found == nr )
>>> + return i;
>>> + }
>>> + else
>>> + found = 0;
>>> +
>>> + return -ENOSPC;
>>> +}
>>> +
>> Is there any reason why this loop is backwards? Unless I am mistaken,
>> guests can choose their own pirqs when binding them, reducing the
>> likelyhood that the top of the available space will be free.
> This just follows the behavior of get_free_pirq(). I'm not up to
> having the two behave differently.
>
>>> @@ -210,8 +237,15 @@ int physdev_map_pirq(domid_t domid, int
>>> done:
>>> spin_unlock(&d->event_lock);
>>> spin_unlock(&pcidevs_lock);
>>> - if ( (ret != 0) && (type == MAP_PIRQ_TYPE_MSI) && (*index == -1) )
>>> - destroy_irq(irq);
>>> + if ( ret != 0 )
>>> + switch ( type )
>>> + {
>>> + case MAP_PIRQ_TYPE_MSI:
>>> + if ( *index == -1 )
>>> + case MAP_PIRQ_TYPE_MULTI_MSI:
>>> + destroy_irq(irq);
>>> + break;
>>> + }
>> Do we not need to create and destroy entry_nr irqs in this function, or
>> is a multi-vector-msi now considered as just as single irq ?
>>
>> I ask because this appears to lack the "repeating certain operations for
>> all involved IRQs" described in the comment.
> No, there's a single create_irq() in the function, and having a single
> destroy_irq() here matches this. The remaining ones (both!) are in
> map_domain_pirq().
Ok.
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
>
> Also, as a general remark, asking for changes in a series that was
> posted 2.5 months ago (and deferred just because of the 4.3
> release process) seems a little strange to me. I had to repost
> merely to collect ack-s, and didn't really expect further requests
> for adjustments as there was ample time before to do so.
>
> Jan
>
2.5 months ago, I was very busy with XSAs, hotfixes and a release of
XenServer, so I appologies for not reviewing back then.
~Andrew
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH resend 1/3] VT-d: enable for multi-vector MSI
2013-07-17 9:50 ` Andrew Cooper
@ 2013-07-18 10:48 ` Zhang, Xiantao
0 siblings, 0 replies; 18+ messages in thread
From: Zhang, Xiantao @ 2013-07-18 10:48 UTC (permalink / raw)
To: Andrew Cooper, Jan Beulich; +Cc: Keir Fraser, Zhang, Xiantao, xen-devel
Thanks, Acked-by: Xiantao Zhang <xiantao.zhang@intel.com>
> -----Original Message-----
> From: Andrew Cooper [mailto:andrew.cooper3@citrix.com]
> Sent: Wednesday, July 17, 2013 5:50 PM
> To: Jan Beulich
> Cc: Zhang, Xiantao; xen-devel; Keir Fraser
> Subject: Re: [Xen-devel] [PATCH resend 1/3] VT-d: enable for multi-vector
> MSI
>
> On 16/07/13 12:32, Jan Beulich wrote:
> >>>> On 16.07.13 at 13:15, Andrew Cooper <andrew.cooper3@citrix.com>
> wrote:
> >> On 16/07/13 11:13, Jan Beulich wrote:
> >>> The main change being to make alloc_remap_entry() capable of
> allocating
> >>> a block of entries.
> >>>
> >>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> >>>
> >>> --- a/xen/drivers/passthrough/vtd/intremap.c
> >>> +++ b/xen/drivers/passthrough/vtd/intremap.c
> >>> @@ -194,18 +194,18 @@ static void free_remap_entry(struct iomm
> >>> }
> >>>
> >>> /*
> >>> - * Look for a free intr remap entry.
> >>> + * Look for a free intr remap entry (or a contiguous set thereof).
> >>> * Need hold iremap_lock, and setup returned entry before releasing
> lock.
> >>> */
> >>> -static int alloc_remap_entry(struct iommu *iommu)
> >>> +static unsigned int alloc_remap_entry(struct iommu *iommu, unsigned
> int nr)
> >> alloc_remap_entries() now that it unconditionally takes a count (and you
> >> already have to patch all callsites)
> > Actually I checked with Linux, and the use singular in the function
> > name too (albeit the name isn't identical).
> >
> >>> @@ -555,31 +556,29 @@ static int msi_msg_to_remap_entry(
> >>> struct iremap_entry *iremap_entry = NULL, *iremap_entries;
> >>> struct iremap_entry new_ire;
> >>> struct msi_msg_remap_entry *remap_rte;
> >>> - int index;
> >>> + unsigned int index, i, nr = 1;
> >> Does this hardcoding of nr=1 defeat the purpose of the following logic?
> > In what way?
> >
> >>> unsigned long flags;
> >>> struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu);
> >>>
> >>> - remap_rte = (struct msi_msg_remap_entry *) msg;
> >>> + if ( msi_desc->msi_attrib.type == PCI_CAP_ID_MSI )
> >>> + nr = msi_desc->msi.nvec;
> > The logic here makes the vector count 1 for MSI-X and msi.nvec
> > for MSI.
> >
> > Jan
> >
>
> Ah yes - I see now.
>
> Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
^ permalink raw reply [flat|nested] 18+ messages in thread
* Ping: [PATCH resend 2/3] x86: enable multi-vector MSI
2013-07-16 10:14 ` [PATCH resend 2/3] x86: enable " Jan Beulich
2013-07-16 11:36 ` Andrew Cooper
@ 2013-08-05 13:11 ` Jan Beulich
2013-08-08 9:02 ` Keir Fraser
2 siblings, 0 replies; 18+ messages in thread
From: Jan Beulich @ 2013-08-05 13:11 UTC (permalink / raw)
To: Keir Fraser; +Cc: xen-devel, xiantao.zhang
Ping (public interface change)?
>>> On 16.07.13 at 12:14, "Jan Beulich" <JBeulich@suse.com> wrote:
> This implies
> - extending the public interface to have a way to request a block of
> MSIs
> - allocating a block of contiguous pIRQ-s for the target domain (but
> note that the Xen IRQs allocated have no need of being contiguous)
> - repeating certain operations for all involved IRQs
> - fixing multi_msi_enable()
> - adjusting the mask bit accesses for maskable MSIs
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>
> --- a/xen/arch/x86/irq.c
> +++ b/xen/arch/x86/irq.c
> @@ -1863,6 +1863,25 @@ int get_free_pirq(struct domain *d, int
> return -ENOSPC;
> }
>
> +int get_free_pirqs(struct domain *d, unsigned int nr)
> +{
> + unsigned int i, found = 0;
> +
> + ASSERT(spin_is_locked(&d->event_lock));
> +
> + for ( i = d->nr_pirqs - 1; i >= nr_irqs_gsi; --i )
> + if ( is_free_pirq(d, pirq_info(d, i)) )
> + {
> + pirq_get_info(d, i);
> + if ( ++found == nr )
> + return i;
> + }
> + else
> + found = 0;
> +
> + return -ENOSPC;
> +}
> +
> int map_domain_pirq(
> struct domain *d, int pirq, int irq, int type, void *data)
> {
> @@ -1918,11 +1937,12 @@ int map_domain_pirq(
>
> desc = irq_to_desc(irq);
>
> - if ( type == MAP_PIRQ_TYPE_MSI )
> + if ( type == MAP_PIRQ_TYPE_MSI || type == MAP_PIRQ_TYPE_MULTI_MSI )
> {
> struct msi_info *msi = (struct msi_info *)data;
> struct msi_desc *msi_desc;
> struct pci_dev *pdev;
> + unsigned int nr = 0;
>
> ASSERT(spin_is_locked(&pcidevs_lock));
>
> @@ -1933,7 +1953,14 @@ int map_domain_pirq(
> pdev = pci_get_pdev(msi->seg, msi->bus, msi->devfn);
> ret = pci_enable_msi(msi, &msi_desc);
> if ( ret )
> + {
> + if ( ret > 0 )
> + {
> + msi->entry_nr = ret;
> + ret = -ENFILE;
> + }
> goto done;
> + }
>
> spin_lock_irqsave(&desc->lock, flags);
>
> @@ -1947,25 +1974,73 @@ int map_domain_pirq(
> goto done;
> }
>
> - ret = setup_msi_irq(desc, msi_desc);
> - if ( ret )
> + while ( !(ret = setup_msi_irq(desc, msi_desc + nr)) )
> {
> + if ( opt_irq_vector_map == OPT_IRQ_VECTOR_MAP_PERDEV &&
> + !desc->arch.used_vectors )
> + {
> + desc->arch.used_vectors = &pdev->arch.used_vectors;
> + if ( desc->arch.vector != IRQ_VECTOR_UNASSIGNED )
> + {
> + int vector = desc->arch.vector;
> +
> + ASSERT(!test_bit(vector, desc->arch.used_vectors));
> + set_bit(vector, desc->arch.used_vectors);
> + }
> + }
> + if ( type == MAP_PIRQ_TYPE_MSI ||
> + msi_desc->msi_attrib.type != PCI_CAP_ID_MSI ||
> + ++nr == msi->entry_nr )
> + break;
> +
> + set_domain_irq_pirq(d, irq, info);
> spin_unlock_irqrestore(&desc->lock, flags);
> - pci_disable_msi(msi_desc);
> - goto done;
> +
> + info = NULL;
> + irq = create_irq(NUMA_NO_NODE);
> + ret = irq >= 0 ? prepare_domain_irq_pirq(d, irq, pirq + nr,
> &info)
> + : irq;
> + if ( ret )
> + break;
> + msi_desc[nr].irq = irq;
> +
> + if ( irq_permit_access(d, irq) != 0 )
> + printk(XENLOG_G_WARNING
> + "dom%d: could not permit access to IRQ%d (pirq
> %d)\n",
> + d->domain_id, irq, pirq);
> +
> + desc = irq_to_desc(irq);
> + spin_lock_irqsave(&desc->lock, flags);
> +
> + if ( desc->handler != &no_irq_type )
> + {
> + dprintk(XENLOG_G_ERR, "dom%d: irq %d (pirq %u) in use
> (%s)\n",
> + d->domain_id, irq, pirq + nr, desc->handler->typename);
> + ret = -EBUSY;
> + break;
> + }
> }
>
> - if ( opt_irq_vector_map == OPT_IRQ_VECTOR_MAP_PERDEV
> - && !desc->arch.used_vectors )
> + if ( ret )
> {
> - desc->arch.used_vectors = &pdev->arch.used_vectors;
> - if ( desc->arch.vector != IRQ_VECTOR_UNASSIGNED )
> + spin_unlock_irqrestore(&desc->lock, flags);
> + while ( nr-- )
> {
> - int vector = desc->arch.vector;
> - ASSERT(!test_bit(vector, desc->arch.used_vectors));
> -
> - set_bit(vector, desc->arch.used_vectors);
> + if ( irq >= 0 )
> + {
> + if ( irq_deny_access(d, irq) )
> + printk(XENLOG_G_ERR
> + "dom%d: could not revoke access to IRQ%d
> (pirq %d)\n",
> + d->domain_id, irq, pirq);
> + destroy_irq(irq);
> + }
> + if ( info )
> + cleanup_domain_irq_pirq(d, irq, info);
> + info = pirq_info(d, pirq + nr);
> + irq = info->arch.irq;
> }
> + pci_disable_msi(msi_desc);
> + goto done;
> }
>
> set_domain_irq_pirq(d, irq, info);
> @@ -1996,7 +2071,8 @@ int unmap_domain_pirq(struct domain *d,
> {
> unsigned long flags;
> struct irq_desc *desc;
> - int irq, ret = 0;
> + int irq, ret = 0, rc;
> + unsigned int i, nr = 1;
> bool_t forced_unbind;
> struct pirq *info;
> struct msi_desc *msi_desc = NULL;
> @@ -2018,6 +2094,18 @@ int unmap_domain_pirq(struct domain *d,
>
> desc = irq_to_desc(irq);
> msi_desc = desc->msi_desc;
> + if ( msi_desc && msi_desc->msi_attrib.type == PCI_CAP_ID_MSI )
> + {
> + if ( msi_desc->msi_attrib.entry_nr )
> + {
> + printk(XENLOG_G_ERR
> + "dom%d: trying to unmap secondary MSI pirq %d\n",
> + d->domain_id, pirq);
> + ret = -EBUSY;
> + goto done;
> + }
> + nr = msi_desc->msi.nvec;
> + }
>
> ret = xsm_unmap_domain_irq(XSM_HOOK, d, irq, msi_desc);
> if ( ret )
> @@ -2033,37 +2121,83 @@ int unmap_domain_pirq(struct domain *d,
>
> spin_lock_irqsave(&desc->lock, flags);
>
> - BUG_ON(irq != domain_pirq_to_irq(d, pirq));
> -
> - if ( !forced_unbind )
> - clear_domain_irq_pirq(d, irq, info);
> - else
> + for ( i = 0; ; )
> {
> - info->arch.irq = -irq;
> - radix_tree_replace_slot(
> - radix_tree_lookup_slot(&d->arch.irq_pirq, irq),
> - radix_tree_int_to_ptr(-pirq));
> + BUG_ON(irq != domain_pirq_to_irq(d, pirq + i));
> +
> + if ( !forced_unbind )
> + clear_domain_irq_pirq(d, irq, info);
> + else
> + {
> + info->arch.irq = -irq;
> + radix_tree_replace_slot(
> + radix_tree_lookup_slot(&d->arch.irq_pirq, irq),
> + radix_tree_int_to_ptr(-pirq));
> + }
> +
> + if ( msi_desc )
> + {
> + desc->handler = &no_irq_type;
> + desc->msi_desc = NULL;
> + }
> +
> + if ( ++i == nr )
> + break;
> +
> + spin_unlock_irqrestore(&desc->lock, flags);
> +
> + if ( !forced_unbind )
> + cleanup_domain_irq_pirq(d, irq, info);
> +
> + rc = irq_deny_access(d, irq);
> + if ( rc )
> + {
> + printk(XENLOG_G_ERR
> + "dom%d: could not deny access to IRQ%d (pirq %d)\n",
> + d->domain_id, irq, pirq + i);
> + ret = rc;
> + }
> +
> + do {
> + info = pirq_info(d, pirq + i);
> + if ( info && (irq = info->arch.irq) > 0 )
> + break;
> + printk(XENLOG_G_ERR "dom%d: MSI pirq %d not mapped\n",
> + d->domain_id, pirq + i);
> + } while ( ++i < nr );
> +
> + if ( i == nr )
> + {
> + desc = NULL;
> + break;
> + }
> +
> + desc = irq_to_desc(irq);
> + BUG_ON(desc->msi_desc != msi_desc + i);
> +
> + spin_lock_irqsave(&desc->lock, flags);
> }
>
> - if ( msi_desc )
> + if ( desc )
> {
> - desc->handler = &no_irq_type;
> - desc->msi_desc = NULL;
> + spin_unlock_irqrestore(&desc->lock, flags);
> +
> + if ( !forced_unbind )
> + cleanup_domain_irq_pirq(d, irq, info);
> +
> + rc = irq_deny_access(d, irq);
> + if ( rc )
> + {
> + printk(XENLOG_G_ERR
> + "dom%d: could not deny access to IRQ%d (pirq %d)\n",
> + d->domain_id, irq, pirq + nr - 1);
> + ret = rc;
> + }
> }
>
> - spin_unlock_irqrestore(&desc->lock, flags);
> if (msi_desc)
> msi_free_irq(msi_desc);
>
> - if ( !forced_unbind )
> - cleanup_domain_irq_pirq(d, irq, info);
> -
> - ret = irq_deny_access(d, irq);
> - if ( ret )
> - printk(XENLOG_G_ERR
> - "dom%d: could not deny access to IRQ%d (pirq %d)\n",
> - d->domain_id, irq, pirq);
> -
> done:
> return ret;
> }
> --- a/xen/arch/x86/msi.c
> +++ b/xen/arch/x86/msi.c
> @@ -236,6 +236,11 @@ static int write_msi_msg(struct msi_desc
> u8 bus = dev->bus;
> u8 slot = PCI_SLOT(dev->devfn);
> u8 func = PCI_FUNC(dev->devfn);
> + int nr = entry->msi_attrib.entry_nr;
> +
> + ASSERT((msg->data & (entry[-nr].msi.nvec - 1)) == nr);
> + if ( nr )
> + return 0;
>
> pci_conf_write32(seg, bus, slot, func, msi_lower_address_reg(pos),
> msg->address_lo);
> @@ -359,8 +364,8 @@ static void msi_set_mask_bit(struct irq_
> u8 func = PCI_FUNC(entry->dev->devfn);
>
> mask_bits = pci_conf_read32(seg, bus, slot, func,
> entry->msi.mpos);
> - mask_bits &= ~(1);
> - mask_bits |= flag;
> + mask_bits &= ~((u32)1 << entry->msi_attrib.entry_nr);
> + mask_bits |= (u32)flag << entry->msi_attrib.entry_nr;
> pci_conf_write32(seg, bus, slot, func, entry->msi.mpos,
> mask_bits);
> }
> break;
> @@ -384,10 +389,11 @@ static int msi_get_mask_bit(const struct
> case PCI_CAP_ID_MSI:
> if (!entry->dev || !entry->msi_attrib.maskbit)
> break;
> - return pci_conf_read32(entry->dev->seg, entry->dev->bus,
> - PCI_SLOT(entry->dev->devfn),
> - PCI_FUNC(entry->dev->devfn),
> - entry->msi.mpos) & 1;
> + return (pci_conf_read32(entry->dev->seg, entry->dev->bus,
> + PCI_SLOT(entry->dev->devfn),
> + PCI_FUNC(entry->dev->devfn),
> + entry->msi.mpos) >>
> + entry->msi_attrib.entry_nr) & 1;
> case PCI_CAP_ID_MSIX:
> return readl(entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET) &
> 1;
> }
> @@ -453,17 +459,20 @@ static hw_irq_controller pci_msi_nonmask
> .set_affinity = set_msi_affinity
> };
>
> -static struct msi_desc* alloc_msi_entry(void)
> +static struct msi_desc *alloc_msi_entry(unsigned int nr)
> {
> struct msi_desc *entry;
>
> - entry = xmalloc(struct msi_desc);
> + entry = xmalloc_array(struct msi_desc, nr);
> if ( !entry )
> return NULL;
>
> INIT_LIST_HEAD(&entry->list);
> - entry->dev = NULL;
> - entry->remap_index = -1;
> + while ( nr-- )
> + {
> + entry[nr].dev = NULL;
> + entry[nr].remap_index = -1;
> + }
>
> return entry;
> }
> @@ -488,17 +497,24 @@ int __setup_msi_irq(struct irq_desc *des
>
> int msi_free_irq(struct msi_desc *entry)
> {
> - destroy_irq(entry->irq);
> + unsigned int nr = entry->msi.nvec;
> +
> if ( entry->msi_attrib.type == PCI_CAP_ID_MSIX )
> {
> unsigned long start;
> start = (unsigned long)entry->mask_base & ~(PAGE_SIZE - 1);
> msix_put_fixmap(entry->dev, virt_to_fix(start));
> + nr = 1;
> }
>
> - /* Free the unused IRTE if intr remap enabled */
> - if ( iommu_intremap )
> - iommu_update_ire_from_msi(entry, NULL);
> + while ( nr-- )
> + {
> + destroy_irq(entry[nr].irq);
> +
> + /* Free the unused IRTE if intr remap enabled */
> + if ( iommu_intremap )
> + iommu_update_ire_from_msi(entry + nr, NULL);
> + }
>
> list_del(&entry->list);
> xfree(entry);
> @@ -531,11 +547,12 @@ static struct msi_desc *find_msi_entry(s
> **/
> static int msi_capability_init(struct pci_dev *dev,
> int irq,
> - struct msi_desc **desc)
> + struct msi_desc **desc,
> + unsigned int nvec)
> {
> struct msi_desc *entry;
> int pos;
> - unsigned int maxvec, mpos;
> + unsigned int i, maxvec, mpos;
> u16 control, seg = dev->seg;
> u8 bus = dev->bus;
> u8 slot = PCI_SLOT(dev->devfn);
> @@ -545,27 +562,34 @@ static int msi_capability_init(struct pc
> pos = pci_find_cap_offset(seg, bus, slot, func, PCI_CAP_ID_MSI);
> control = pci_conf_read16(seg, bus, slot, func, msi_control_reg(pos));
> maxvec = multi_msi_capable(control);
> + if ( nvec > maxvec )
> + return maxvec;
> control &= ~PCI_MSI_FLAGS_QSIZE;
> + multi_msi_enable(control, nvec);
>
> /* MSI Entry Initialization */
> msi_set_enable(dev, 0); /* Ensure msi is disabled as I set it up */
>
> - entry = alloc_msi_entry();
> + entry = alloc_msi_entry(nvec);
> if ( !entry )
> return -ENOMEM;
>
> - entry->msi_attrib.type = PCI_CAP_ID_MSI;
> - entry->msi_attrib.is_64 = is_64bit_address(control);
> - entry->msi_attrib.entry_nr = 0;
> - entry->msi_attrib.maskbit = is_mask_bit_support(control);
> - entry->msi_attrib.masked = 1;
> - entry->msi_attrib.pos = pos;
> mpos = msi_mask_bits_reg(pos, is_64bit_address(control));
> - entry->msi.nvec = 1;
> + for ( i = 0; i < nvec; ++i )
> + {
> + entry[i].msi_attrib.type = PCI_CAP_ID_MSI;
> + entry[i].msi_attrib.is_64 = is_64bit_address(control);
> + entry[i].msi_attrib.entry_nr = i;
> + entry[i].msi_attrib.maskbit = is_mask_bit_support(control);
> + entry[i].msi_attrib.masked = 1;
> + entry[i].msi_attrib.pos = pos;
> + if ( entry[i].msi_attrib.maskbit )
> + entry[i].msi.mpos = mpos;
> + entry[i].msi.nvec = 0;
> + entry[i].dev = dev;
> + }
> + entry->msi.nvec = nvec;
> entry->irq = irq;
> - if ( is_mask_bit_support(control) )
> - entry->msi.mpos = mpos;
> - entry->dev = dev;
> if ( entry->msi_attrib.maskbit )
> {
> u32 maskbits;
> @@ -693,7 +717,7 @@ static int msix_capability_init(struct p
>
> if ( desc )
> {
> - entry = alloc_msi_entry();
> + entry = alloc_msi_entry(1);
> if ( !entry )
> return -ENOMEM;
> ASSERT(msi);
> @@ -851,7 +875,6 @@ static int msix_capability_init(struct p
>
> static int __pci_enable_msi(struct msi_info *msi, struct msi_desc **desc)
> {
> - int status;
> struct pci_dev *pdev;
> struct msi_desc *old_desc;
>
> @@ -880,8 +903,7 @@ static int __pci_enable_msi(struct msi_i
> pci_disable_msi(old_desc);
> }
>
> - status = msi_capability_init(pdev, msi->irq, desc);
> - return status;
> + return msi_capability_init(pdev, msi->irq, desc, msi->entry_nr);
> }
>
> static void __pci_disable_msi(struct msi_desc *entry)
> @@ -1101,6 +1123,8 @@ int pci_restore_msi_state(struct pci_dev
>
> list_for_each_entry_safe( entry, tmp, &pdev->msi_list, list )
> {
> + unsigned int i = 0, nr = 1;
> +
> irq = entry->irq;
> desc = &irq_desc[irq];
>
> @@ -1110,30 +1134,58 @@ int pci_restore_msi_state(struct pci_dev
>
> if (desc->msi_desc != entry)
> {
> + bogus:
> dprintk(XENLOG_ERR,
> - "Restore MSI for dev %04x:%02x:%02x:%x not set
> before?\n",
> + "Restore MSI for %04x:%02x:%02x:%u entry %u not
> set?\n",
> pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn),
> - PCI_FUNC(pdev->devfn));
> + PCI_FUNC(pdev->devfn), i);
> spin_unlock_irqrestore(&desc->lock, flags);
> return -EINVAL;
> }
>
> if ( entry->msi_attrib.type == PCI_CAP_ID_MSI )
> + {
> msi_set_enable(pdev, 0);
> + nr = entry->msi.nvec;
> + }
> else if ( entry->msi_attrib.type == PCI_CAP_ID_MSIX )
> msix_set_enable(pdev, 0);
>
> msg = entry->msg;
> write_msi_msg(entry, &msg);
>
> - msi_set_mask_bit(desc, entry->msi_attrib.masked);
> + for ( i = 0; ; )
> + {
> + msi_set_mask_bit(desc, entry[i].msi_attrib.masked);
> + spin_unlock_irqrestore(&desc->lock, flags);
> +
> + if ( !--nr )
> + break;
> +
> + desc = &irq_desc[entry[++i].irq];
> + spin_lock_irqsave(&desc->lock, flags);
> + if ( desc->msi_desc != entry + i )
> + goto bogus;
> + }
> +
> + spin_unlock_irqrestore(&desc->lock, flags);
>
> if ( entry->msi_attrib.type == PCI_CAP_ID_MSI )
> + {
> + unsigned int cpos = msi_control_reg(entry->msi_attrib.pos);
> + u16 control = pci_conf_read16(pdev->seg, pdev->bus,
> + PCI_SLOT(pdev->devfn),
> + PCI_FUNC(pdev->devfn), cpos);
> +
> + control &= ~PCI_MSI_FLAGS_QSIZE;
> + multi_msi_enable(control, entry->msi.nvec);
> + pci_conf_write16(pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn),
> + PCI_FUNC(pdev->devfn), cpos, control);
> +
> msi_set_enable(pdev, 1);
> + }
> else if ( entry->msi_attrib.type == PCI_CAP_ID_MSIX )
> msix_set_enable(pdev, 1);
> -
> - spin_unlock_irqrestore(&desc->lock, flags);
> }
>
> return 0;
> --- a/xen/arch/x86/physdev.c
> +++ b/xen/arch/x86/physdev.c
> @@ -140,8 +140,11 @@ int physdev_map_pirq(domid_t domid, int
> break;
>
> case MAP_PIRQ_TYPE_MSI:
> + if ( !msi->table_base )
> + msi->entry_nr = 1;
> irq = *index;
> if ( irq == -1 )
> + case MAP_PIRQ_TYPE_MULTI_MSI:
> irq = create_irq(NUMA_NO_NODE);
>
> if ( irq < nr_irqs_gsi || irq >= nr_irqs )
> @@ -179,6 +182,30 @@ int physdev_map_pirq(domid_t domid, int
> goto done;
> }
> }
> + else if ( type == MAP_PIRQ_TYPE_MULTI_MSI )
> + {
> + if ( msi->entry_nr <= 0 || msi->entry_nr > 32 )
> + ret = -EDOM;
> + else if ( msi->entry_nr != 1 && !iommu_intremap )
> + ret = -EOPNOTSUPP;
> + else
> + {
> + while ( msi->entry_nr & (msi->entry_nr - 1) )
> + msi->entry_nr += msi->entry_nr & -msi->entry_nr;
> + pirq = get_free_pirqs(d, msi->entry_nr);
> + if ( pirq < 0 )
> + {
> + while ( (msi->entry_nr >>= 1) > 1 )
> + if ( get_free_pirqs(d, msi->entry_nr) > 0 )
> + break;
> + dprintk(XENLOG_G_ERR, "dom%d: no block of %d free
> pirqs\n",
> + d->domain_id, msi->entry_nr << 1);
> + ret = pirq;
> + }
> + }
> + if ( ret < 0 )
> + goto done;
> + }
> else
> {
> pirq = get_free_pirq(d, type);
> @@ -210,8 +237,15 @@ int physdev_map_pirq(domid_t domid, int
> done:
> spin_unlock(&d->event_lock);
> spin_unlock(&pcidevs_lock);
> - if ( (ret != 0) && (type == MAP_PIRQ_TYPE_MSI) && (*index == -1) )
> - destroy_irq(irq);
> + if ( ret != 0 )
> + switch ( type )
> + {
> + case MAP_PIRQ_TYPE_MSI:
> + if ( *index == -1 )
> + case MAP_PIRQ_TYPE_MULTI_MSI:
> + destroy_irq(irq);
> + break;
> + }
> free_domain:
> rcu_unlock_domain(d);
> return ret;
> @@ -390,14 +424,22 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H
> if ( copy_from_guest(&map, arg, 1) != 0 )
> break;
>
> - if ( map.type == MAP_PIRQ_TYPE_MSI_SEG )
> + switch ( map.type )
> {
> + case MAP_PIRQ_TYPE_MSI_SEG:
> map.type = MAP_PIRQ_TYPE_MSI;
> msi.seg = map.bus >> 16;
> - }
> - else
> - {
> + break;
> +
> + case MAP_PIRQ_TYPE_MULTI_MSI:
> + if ( map.table_base )
> + return -EINVAL;
> + msi.seg = map.bus >> 16;
> + break;
> +
> + default:
> msi.seg = 0;
> + break;
> }
> msi.bus = map.bus;
> msi.devfn = map.devfn;
> @@ -406,6 +448,8 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H
> ret = physdev_map_pirq(map.domid, map.type, &map.index, &map.pirq,
> &msi);
>
> + if ( map.type == MAP_PIRQ_TYPE_MULTI_MSI )
> + map.entry_nr = msi.entry_nr;
> if ( __copy_to_guest(arg, &map, 1) )
> ret = -EFAULT;
> break;
> --- a/xen/include/asm-x86/irq.h
> +++ b/xen/include/asm-x86/irq.h
> @@ -141,6 +141,7 @@ int map_domain_pirq(struct domain *d, in
> void *data);
> int unmap_domain_pirq(struct domain *d, int pirq);
> int get_free_pirq(struct domain *d, int type);
> +int get_free_pirqs(struct domain *, unsigned int nr);
> void free_domain_pirqs(struct domain *d);
> int map_domain_emuirq_pirq(struct domain *d, int pirq, int irq);
> int unmap_domain_pirq_emuirq(struct domain *d, int pirq);
> --- a/xen/include/asm-x86/msi.h
> +++ b/xen/include/asm-x86/msi.h
> @@ -148,7 +148,7 @@ int msi_free_irq(struct msi_desc *entry)
> #define multi_msi_capable(control) \
> (1 << ((control & PCI_MSI_FLAGS_QMASK) >> 1))
> #define multi_msi_enable(control, num) \
> - control |= (((num >> 1) << 4) & PCI_MSI_FLAGS_QSIZE);
> + control |= (((fls(num) - 1) << 4) & PCI_MSI_FLAGS_QSIZE);
> #define is_64bit_address(control) (!!(control & PCI_MSI_FLAGS_64BIT))
> #define is_mask_bit_support(control) (!!(control & PCI_MSI_FLAGS_MASKBIT))
> #define msi_enable(control, num) multi_msi_enable(control, num); \
> --- a/xen/include/public/physdev.h
> +++ b/xen/include/public/physdev.h
> @@ -151,21 +151,22 @@ DEFINE_XEN_GUEST_HANDLE(physdev_irq_t);
> #define MAP_PIRQ_TYPE_GSI 0x1
> #define MAP_PIRQ_TYPE_UNKNOWN 0x2
> #define MAP_PIRQ_TYPE_MSI_SEG 0x3
> +#define MAP_PIRQ_TYPE_MULTI_MSI 0x4
>
> #define PHYSDEVOP_map_pirq 13
> struct physdev_map_pirq {
> domid_t domid;
> /* IN */
> int type;
> - /* IN */
> + /* IN (ignored for ..._MULTI_MSI) */
> int index;
> /* IN or OUT */
> int pirq;
> - /* IN - high 16 bits hold segment for MAP_PIRQ_TYPE_MSI_SEG */
> + /* IN - high 16 bits hold segment for ..._MSI_SEG and ..._MULTI_MSI */
> int bus;
> /* IN */
> int devfn;
> - /* IN */
> + /* IN (also OUT for ..._MULTI_MSI) */
> int entry_nr;
> /* IN */
> uint64_t table_base;
^ permalink raw reply [flat|nested] 18+ messages in thread
* Ping: [PATCH resend 3/3] pciif: add multi-vector-MSI command
2013-07-16 10:15 ` [PATCH resend 3/3] pciif: add multi-vector-MSI command Jan Beulich
2013-07-16 11:19 ` Andrew Cooper
@ 2013-08-05 13:12 ` Jan Beulich
2013-08-08 9:02 ` Keir Fraser
2 siblings, 0 replies; 18+ messages in thread
From: Jan Beulich @ 2013-08-05 13:12 UTC (permalink / raw)
To: Keir Fraser; +Cc: xen-devel
Ping (I/O interface change, however trivial it might be)?
>>> On 16.07.13 at 12:15, "Jan Beulich" <JBeulich@suse.com> wrote:
> The requested vector count is to be passed in struct xen_pci_op's info
> field. Upon failure, if a smaller vector count might work, the backend
> will pass that smaller count in the value field (which so far is always
> being set to zero in the error path).
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>
> --- a/xen/include/public/io/pciif.h
> +++ b/xen/include/public/io/pciif.h
> @@ -46,6 +46,7 @@
> #define XEN_PCI_OP_aer_resume (7)
> #define XEN_PCI_OP_aer_mmio (8)
> #define XEN_PCI_OP_aer_slotreset (9)
> +#define XEN_PCI_OP_enable_multi_msi (10)
>
> /* xen_pci_op error numbers */
> #define XEN_PCI_ERR_success (0)
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH resend 2/3] x86: enable multi-vector MSI
2013-07-16 10:14 ` [PATCH resend 2/3] x86: enable " Jan Beulich
2013-07-16 11:36 ` Andrew Cooper
2013-08-05 13:11 ` Ping: " Jan Beulich
@ 2013-08-08 9:02 ` Keir Fraser
2 siblings, 0 replies; 18+ messages in thread
From: Keir Fraser @ 2013-08-08 9:02 UTC (permalink / raw)
To: Jan Beulich, xen-devel; +Cc: xiantao.zhang
On 16/07/2013 11:14, "Jan Beulich" <JBeulich@suse.com> wrote:
> This implies
> - extending the public interface to have a way to request a block of
> MSIs
> - allocating a block of contiguous pIRQ-s for the target domain (but
> note that the Xen IRQs allocated have no need of being contiguous)
> - repeating certain operations for all involved IRQs
> - fixing multi_msi_enable()
> - adjusting the mask bit accesses for maskable MSIs
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Keir Fraser <keir@xen.org>
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH resend 3/3] pciif: add multi-vector-MSI command
2013-07-16 10:15 ` [PATCH resend 3/3] pciif: add multi-vector-MSI command Jan Beulich
2013-07-16 11:19 ` Andrew Cooper
2013-08-05 13:12 ` Ping: " Jan Beulich
@ 2013-08-08 9:02 ` Keir Fraser
2 siblings, 0 replies; 18+ messages in thread
From: Keir Fraser @ 2013-08-08 9:02 UTC (permalink / raw)
To: Jan Beulich, xen-devel; +Cc: xiantao.zhang
On 16/07/2013 11:15, "Jan Beulich" <JBeulich@suse.com> wrote:
> The requested vector count is to be passed in struct xen_pci_op's info
> field. Upon failure, if a smaller vector count might work, the backend
> will pass that smaller count in the value field (which so far is always
> being set to zero in the error path).
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Keir Fraser <keir@xen.org>
^ permalink raw reply [flat|nested] 18+ messages in thread
end of thread, other threads:[~2013-08-08 9:02 UTC | newest]
Thread overview: 18+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-07-16 10:00 [PATCH resend 0/3] x86/IOMMU: multi-vector MSI Jan Beulich
2013-07-16 10:13 ` [PATCH resend 1/3] VT-d: enable for " Jan Beulich
2013-07-16 11:15 ` Andrew Cooper
2013-07-16 11:32 ` Jan Beulich
2013-07-17 9:50 ` Andrew Cooper
2013-07-18 10:48 ` Zhang, Xiantao
2013-07-16 10:14 ` [PATCH resend 2/3] x86: enable " Jan Beulich
2013-07-16 11:36 ` Andrew Cooper
2013-07-16 11:48 ` Jan Beulich
2013-07-17 10:02 ` Andrew Cooper
2013-08-05 13:11 ` Ping: " Jan Beulich
2013-08-08 9:02 ` Keir Fraser
2013-07-16 10:15 ` [PATCH resend 3/3] pciif: add multi-vector-MSI command Jan Beulich
2013-07-16 11:19 ` Andrew Cooper
2013-07-16 11:35 ` Jan Beulich
2013-07-17 9:02 ` Andrew Cooper
2013-08-05 13:12 ` Ping: " Jan Beulich
2013-08-08 9:02 ` Keir Fraser
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.