All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/2] AMD IOMMU: XSA-36 follow ups
@ 2013-02-06 13:04 Jan Beulich
  2013-02-06 13:12 ` [PATCH 1/2] AMD IOMMU: also spot missing IO-APIC entries in IVRS table Jan Beulich
                   ` (3 more replies)
  0 siblings, 4 replies; 20+ messages in thread
From: Jan Beulich @ 2013-02-06 13:04 UTC (permalink / raw)
  To: xen-devel; +Cc: Boris Ostrovsky, Sherry Hurwitz

A regression was reported on a class of broken firmware that c/s
26517:601139e2b0db didn't consider, leading to a boot time crash.

Further, the phantom function support committed earlier is still
lacking AMD side code (during the composition of which the
security issue was noticed, and hence the patch had to be held
back.

1: also spot missing IO-APIC entries in IVRS table
2: handle MSI for phantom functions

Signed-off-by: Jan Beulich <jbeulich@suse.com>

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH 1/2] AMD IOMMU: also spot missing IO-APIC entries in IVRS table
  2013-02-06 13:04 [PATCH 0/2] AMD IOMMU: XSA-36 follow ups Jan Beulich
@ 2013-02-06 13:12 ` Jan Beulich
  2013-02-06 14:41   ` Boris Ostrovsky
  2013-02-11 10:49   ` Ian Campbell
  2013-02-06 13:12 ` [PATCH 2/2] AMD IOMMU: handle MSI for phantom functions Jan Beulich
                   ` (2 subsequent siblings)
  3 siblings, 2 replies; 20+ messages in thread
From: Jan Beulich @ 2013-02-06 13:12 UTC (permalink / raw)
  To: xen-devel; +Cc: Boris Ostrovsky, Sherry Hurwitz

[-- Attachment #1: Type: text/plain, Size: 4908 bytes --]

Apart from dealing duplicate conflicting entries, we also have to
handle firmware omitting IO-APIC entries in IVRS altogether. Not doing
so has resulted in c/s 26517:601139e2b0db to crash such systems during
boot (whereas with the change here the IOMMU gets disabled just as is
being done in the other cases, i.e. unless global tables are being
used).

Debugging this issue has also pointed out that the debug log output is
pretty ugly to look at - consolidate the output, and add one extra
item for the IVHD special entries, so that future issues are easier
to analyze.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Tested-by: Sander Eikelenboom <linux@eikelenboom.it>

--- a/xen/drivers/passthrough/amd/iommu_acpi.c
+++ b/xen/drivers/passthrough/amd/iommu_acpi.c
@@ -354,9 +354,8 @@ static int __init parse_ivmd_block(const
     base = start_addr & PAGE_MASK;
     limit = (start_addr + mem_length - 1) & PAGE_MASK;
 
-    AMD_IOMMU_DEBUG("IVMD Block: Type %#x\n",ivmd_block->header.type);
-    AMD_IOMMU_DEBUG(" Start_Addr_Phys %#lx\n", start_addr);
-    AMD_IOMMU_DEBUG(" Mem_Length %#lx\n", mem_length);
+    AMD_IOMMU_DEBUG("IVMD Block: type %#x phys %#lx len %#lx\n",
+                    ivmd_block->header.type, start_addr, mem_length);
 
     if ( ivmd_block->header.flags & ACPI_IVMD_EXCLUSION_RANGE )
         iw = ir = IOMMU_CONTROL_ENABLED;
@@ -551,8 +550,8 @@ static u16 __init parse_ivhd_device_alia
         return 0;
     }
 
-    AMD_IOMMU_DEBUG(" Dev_Id Range: %#x -> %#x\n", first_bdf, last_bdf);
-    AMD_IOMMU_DEBUG(" Dev_Id Alias: %#x\n", alias_id);
+    AMD_IOMMU_DEBUG(" Dev_Id Range: %#x -> %#x alias %#x\n",
+                    first_bdf, last_bdf, alias_id);
 
     for ( bdf = first_bdf; bdf <= last_bdf; bdf++ )
         add_ivrs_mapping_entry(bdf, alias_id, range->alias.header.data_setting,
@@ -654,6 +653,9 @@ static u16 __init parse_ivhd_device_spec
         return 0;
     }
 
+    AMD_IOMMU_DEBUG("IVHD Special: %04x:%02x:%02x.%u variety %#x handle %#x\n",
+                    seg, PCI_BUS(bdf), PCI_SLOT(bdf), PCI_FUNC(bdf),
+                    special->variety, special->handle);
     add_ivrs_mapping_entry(bdf, bdf, special->header.data_setting, iommu);
 
     switch ( special->variety )
@@ -758,10 +760,9 @@ static int __init parse_ivhd_block(const
     {
         ivhd_device = (const void *)((const u8 *)ivhd_block + block_length);
 
-        AMD_IOMMU_DEBUG( "IVHD Device Entry:\n");
-        AMD_IOMMU_DEBUG( " Type %#x\n", ivhd_device->header.type);
-        AMD_IOMMU_DEBUG( " Dev_Id %#x\n", ivhd_device->header.id);
-        AMD_IOMMU_DEBUG( " Flags %#x\n", ivhd_device->header.data_setting);
+        AMD_IOMMU_DEBUG("IVHD Device Entry: type %#x id %#x flags %#x\n",
+                        ivhd_device->header.type, ivhd_device->header.id,
+                        ivhd_device->header.data_setting);
 
         switch ( ivhd_device->header.type )
         {
@@ -890,6 +891,7 @@ static int __init parse_ivrs_table(struc
 {
     const struct acpi_ivrs_header *ivrs_block;
     unsigned long length;
+    unsigned int apic;
     int error = 0;
 
     BUG_ON(!table);
@@ -903,11 +905,9 @@ static int __init parse_ivrs_table(struc
     {
         ivrs_block = (struct acpi_ivrs_header *)((u8 *)table + length);
 
-        AMD_IOMMU_DEBUG("IVRS Block:\n");
-        AMD_IOMMU_DEBUG(" Type %#x\n", ivrs_block->type);
-        AMD_IOMMU_DEBUG(" Flags %#x\n", ivrs_block->flags);
-        AMD_IOMMU_DEBUG(" Length %#x\n", ivrs_block->length);
-        AMD_IOMMU_DEBUG(" Dev_Id %#x\n", ivrs_block->device_id);
+        AMD_IOMMU_DEBUG("IVRS Block: type %#x flags %#x len %#x id %#x\n",
+                        ivrs_block->type, ivrs_block->flags,
+                        ivrs_block->length, ivrs_block->device_id);
 
         if ( table->length < (length + ivrs_block->length) )
         {
@@ -922,6 +922,29 @@ static int __init parse_ivrs_table(struc
         length += ivrs_block->length;
     }
 
+    /* Each IO-APIC must have been mentioned in the table. */
+    for ( apic = 0; !error && apic < nr_ioapics; ++apic )
+    {
+        if ( !nr_ioapic_entries[apic] ||
+             ioapic_sbdf[IO_APIC_ID(apic)].pin_setup )
+            continue;
+
+        printk(XENLOG_ERR "IVHD Error: no information for IO-APIC %#x\n",
+               IO_APIC_ID(apic));
+        if ( amd_iommu_perdev_intremap )
+            error = -ENXIO;
+        else
+        {
+            ioapic_sbdf[IO_APIC_ID(apic)].pin_setup = xzalloc_array(
+                unsigned long, BITS_TO_LONGS(nr_ioapic_entries[apic]));
+            if ( !ioapic_sbdf[IO_APIC_ID(apic)].pin_setup )
+            {
+                printk(XENLOG_ERR "IVHD Error: Out of memory\n");
+                error = -ENOMEM;
+            }
+        }
+    }
+
     return error;
 }
 



[-- Attachment #2: AMD-IOMMU-IVHD-special-missing.patch --]
[-- Type: text/plain, Size: 4966 bytes --]

AMD IOMMU: also spot missing IO-APIC entries in IVRS table

Apart from dealing duplicate conflicting entries, we also have to
handle firmware omitting IO-APIC entries in IVRS altogether. Not doing
so has resulted in c/s 26517:601139e2b0db to crash such systems during
boot (whereas with the change here the IOMMU gets disabled just as is
being done in the other cases, i.e. unless global tables are being
used).

Debugging this issue has also pointed out that the debug log output is
pretty ugly to look at - consolidate the output, and add one extra
item for the IVHD special entries, so that future issues are easier
to analyze.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Tested-by: Sander Eikelenboom <linux@eikelenboom.it>

--- a/xen/drivers/passthrough/amd/iommu_acpi.c
+++ b/xen/drivers/passthrough/amd/iommu_acpi.c
@@ -354,9 +354,8 @@ static int __init parse_ivmd_block(const
     base = start_addr & PAGE_MASK;
     limit = (start_addr + mem_length - 1) & PAGE_MASK;
 
-    AMD_IOMMU_DEBUG("IVMD Block: Type %#x\n",ivmd_block->header.type);
-    AMD_IOMMU_DEBUG(" Start_Addr_Phys %#lx\n", start_addr);
-    AMD_IOMMU_DEBUG(" Mem_Length %#lx\n", mem_length);
+    AMD_IOMMU_DEBUG("IVMD Block: type %#x phys %#lx len %#lx\n",
+                    ivmd_block->header.type, start_addr, mem_length);
 
     if ( ivmd_block->header.flags & ACPI_IVMD_EXCLUSION_RANGE )
         iw = ir = IOMMU_CONTROL_ENABLED;
@@ -551,8 +550,8 @@ static u16 __init parse_ivhd_device_alia
         return 0;
     }
 
-    AMD_IOMMU_DEBUG(" Dev_Id Range: %#x -> %#x\n", first_bdf, last_bdf);
-    AMD_IOMMU_DEBUG(" Dev_Id Alias: %#x\n", alias_id);
+    AMD_IOMMU_DEBUG(" Dev_Id Range: %#x -> %#x alias %#x\n",
+                    first_bdf, last_bdf, alias_id);
 
     for ( bdf = first_bdf; bdf <= last_bdf; bdf++ )
         add_ivrs_mapping_entry(bdf, alias_id, range->alias.header.data_setting,
@@ -654,6 +653,9 @@ static u16 __init parse_ivhd_device_spec
         return 0;
     }
 
+    AMD_IOMMU_DEBUG("IVHD Special: %04x:%02x:%02x.%u variety %#x handle %#x\n",
+                    seg, PCI_BUS(bdf), PCI_SLOT(bdf), PCI_FUNC(bdf),
+                    special->variety, special->handle);
     add_ivrs_mapping_entry(bdf, bdf, special->header.data_setting, iommu);
 
     switch ( special->variety )
@@ -758,10 +760,9 @@ static int __init parse_ivhd_block(const
     {
         ivhd_device = (const void *)((const u8 *)ivhd_block + block_length);
 
-        AMD_IOMMU_DEBUG( "IVHD Device Entry:\n");
-        AMD_IOMMU_DEBUG( " Type %#x\n", ivhd_device->header.type);
-        AMD_IOMMU_DEBUG( " Dev_Id %#x\n", ivhd_device->header.id);
-        AMD_IOMMU_DEBUG( " Flags %#x\n", ivhd_device->header.data_setting);
+        AMD_IOMMU_DEBUG("IVHD Device Entry: type %#x id %#x flags %#x\n",
+                        ivhd_device->header.type, ivhd_device->header.id,
+                        ivhd_device->header.data_setting);
 
         switch ( ivhd_device->header.type )
         {
@@ -890,6 +891,7 @@ static int __init parse_ivrs_table(struc
 {
     const struct acpi_ivrs_header *ivrs_block;
     unsigned long length;
+    unsigned int apic;
     int error = 0;
 
     BUG_ON(!table);
@@ -903,11 +905,9 @@ static int __init parse_ivrs_table(struc
     {
         ivrs_block = (struct acpi_ivrs_header *)((u8 *)table + length);
 
-        AMD_IOMMU_DEBUG("IVRS Block:\n");
-        AMD_IOMMU_DEBUG(" Type %#x\n", ivrs_block->type);
-        AMD_IOMMU_DEBUG(" Flags %#x\n", ivrs_block->flags);
-        AMD_IOMMU_DEBUG(" Length %#x\n", ivrs_block->length);
-        AMD_IOMMU_DEBUG(" Dev_Id %#x\n", ivrs_block->device_id);
+        AMD_IOMMU_DEBUG("IVRS Block: type %#x flags %#x len %#x id %#x\n",
+                        ivrs_block->type, ivrs_block->flags,
+                        ivrs_block->length, ivrs_block->device_id);
 
         if ( table->length < (length + ivrs_block->length) )
         {
@@ -922,6 +922,29 @@ static int __init parse_ivrs_table(struc
         length += ivrs_block->length;
     }
 
+    /* Each IO-APIC must have been mentioned in the table. */
+    for ( apic = 0; !error && apic < nr_ioapics; ++apic )
+    {
+        if ( !nr_ioapic_entries[apic] ||
+             ioapic_sbdf[IO_APIC_ID(apic)].pin_setup )
+            continue;
+
+        printk(XENLOG_ERR "IVHD Error: no information for IO-APIC %#x\n",
+               IO_APIC_ID(apic));
+        if ( amd_iommu_perdev_intremap )
+            error = -ENXIO;
+        else
+        {
+            ioapic_sbdf[IO_APIC_ID(apic)].pin_setup = xzalloc_array(
+                unsigned long, BITS_TO_LONGS(nr_ioapic_entries[apic]));
+            if ( !ioapic_sbdf[IO_APIC_ID(apic)].pin_setup )
+            {
+                printk(XENLOG_ERR "IVHD Error: Out of memory\n");
+                error = -ENOMEM;
+            }
+        }
+    }
+
     return error;
 }
 

[-- Attachment #3: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH 2/2] AMD IOMMU: handle MSI for phantom functions
  2013-02-06 13:04 [PATCH 0/2] AMD IOMMU: XSA-36 follow ups Jan Beulich
  2013-02-06 13:12 ` [PATCH 1/2] AMD IOMMU: also spot missing IO-APIC entries in IVRS table Jan Beulich
@ 2013-02-06 13:12 ` Jan Beulich
  2013-02-11 10:53   ` Ian Campbell
  2013-02-08  9:58 ` [PATCH 0/2] AMD IOMMU: XSA-36 follow ups Jan Beulich
  2013-02-11 13:12 ` Ian Campbell
  3 siblings, 1 reply; 20+ messages in thread
From: Jan Beulich @ 2013-02-06 13:12 UTC (permalink / raw)
  To: xen-devel; +Cc: Boris Ostrovsky, Sherry Hurwitz

[-- Attachment #1: Type: text/plain, Size: 3376 bytes --]

With ordinary requests allowed to come from phantom functions, the
remapping tables ought to be set up to also allow for MSI triggers to
come from other than the "real" device too.

It is not clear to me whether the alias-ID handling also needs
adjustment for this to work properly, or whether firmware can be
expected to properly express this through a device alias range
descriptor (or multiple device alias ones).

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/drivers/passthrough/amd/iommu_intr.c
+++ b/xen/drivers/passthrough/amd/iommu_intr.c
@@ -286,7 +286,7 @@ void amd_iommu_ioapic_update_ire(
 
 static void update_intremap_entry_from_msi_msg(
     struct amd_iommu *iommu, u16 bdf,
-    struct msi_desc *msi_desc, struct msi_msg *msg)
+    int *remap_index, const struct msi_msg *msg)
 {
     unsigned long flags;
     u32* entry;
@@ -302,7 +302,7 @@ static void update_intremap_entry_from_m
     {
         lock = get_intremap_lock(iommu->seg, req_id);
         spin_lock_irqsave(lock, flags);
-        free_intremap_entry(iommu->seg, req_id, msi_desc->remap_index);
+        free_intremap_entry(iommu->seg, req_id, *remap_index);
         spin_unlock_irqrestore(lock, flags);
 
         if ( ( req_id != alias_id ) &&
@@ -310,7 +310,7 @@ static void update_intremap_entry_from_m
         {
             lock = get_intremap_lock(iommu->seg, alias_id);
             spin_lock_irqsave(lock, flags);
-            free_intremap_entry(iommu->seg, alias_id, msi_desc->remap_index);
+            free_intremap_entry(iommu->seg, alias_id, *remap_index);
             spin_unlock_irqrestore(lock, flags);
         }
         goto done;
@@ -324,7 +324,10 @@ static void update_intremap_entry_from_m
     vector = (msg->data >> MSI_DATA_VECTOR_SHIFT) & MSI_DATA_VECTOR_MASK;
     dest = (msg->address_lo >> MSI_ADDR_DEST_ID_SHIFT) & 0xff;
     offset = get_intremap_offset(vector, delivery_mode);
-    msi_desc->remap_index = offset;
+    if ( *remap_index < 0)
+        *remap_index = offset;
+    else
+        BUG_ON(*remap_index != offset);
 
     entry = (u32*)get_intremap_entry(iommu->seg, req_id, offset);
     update_intremap_entry(entry, vector, delivery_mode, dest_mode, dest);
@@ -379,12 +382,30 @@ void amd_iommu_msi_msg_update_ire(
     }
 
     if ( msi_desc->remap_index >= 0 )
-        update_intremap_entry_from_msi_msg(iommu, bdf, msi_desc, NULL);
+    {
+        do {
+            update_intremap_entry_from_msi_msg(iommu, bdf,
+                                               &msi_desc->remap_index, NULL);
+            if ( !pdev || !pdev->phantom_stride )
+                break;
+            bdf += pdev->phantom_stride;
+        } while ( PCI_SLOT(bdf) == PCI_SLOT(pdev->devfn) );
+
+        msi_desc->remap_index = -1;
+        if ( pdev )
+            bdf = PCI_BDF2(pdev->bus, pdev->devfn);
+    }
 
     if ( !msg )
         return;
 
-    update_intremap_entry_from_msi_msg(iommu, bdf, msi_desc, msg);
+    do {
+        update_intremap_entry_from_msi_msg(iommu, bdf, &msi_desc->remap_index,
+                                           msg);
+        if ( !pdev || !pdev->phantom_stride )
+            break;
+        bdf += pdev->phantom_stride;
+    } while ( PCI_SLOT(bdf) == PCI_SLOT(pdev->devfn) );
 }
 
 void amd_iommu_read_msi_from_ire(




[-- Attachment #2: AMD-IOMMU-phantom-MSI.patch --]
[-- Type: text/plain, Size: 3417 bytes --]

AMD IOMMU: handle MSI for phantom functions

With ordinary requests allowed to come from phantom functions, the
remapping tables ought to be set up to also allow for MSI triggers to
come from other than the "real" device too.

It is not clear to me whether the alias-ID handling also needs
adjustment for this to work properly, or whether firmware can be
expected to properly express this through a device alias range
descriptor (or multiple device alias ones).

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/drivers/passthrough/amd/iommu_intr.c
+++ b/xen/drivers/passthrough/amd/iommu_intr.c
@@ -286,7 +286,7 @@ void amd_iommu_ioapic_update_ire(
 
 static void update_intremap_entry_from_msi_msg(
     struct amd_iommu *iommu, u16 bdf,
-    struct msi_desc *msi_desc, struct msi_msg *msg)
+    int *remap_index, const struct msi_msg *msg)
 {
     unsigned long flags;
     u32* entry;
@@ -302,7 +302,7 @@ static void update_intremap_entry_from_m
     {
         lock = get_intremap_lock(iommu->seg, req_id);
         spin_lock_irqsave(lock, flags);
-        free_intremap_entry(iommu->seg, req_id, msi_desc->remap_index);
+        free_intremap_entry(iommu->seg, req_id, *remap_index);
         spin_unlock_irqrestore(lock, flags);
 
         if ( ( req_id != alias_id ) &&
@@ -310,7 +310,7 @@ static void update_intremap_entry_from_m
         {
             lock = get_intremap_lock(iommu->seg, alias_id);
             spin_lock_irqsave(lock, flags);
-            free_intremap_entry(iommu->seg, alias_id, msi_desc->remap_index);
+            free_intremap_entry(iommu->seg, alias_id, *remap_index);
             spin_unlock_irqrestore(lock, flags);
         }
         goto done;
@@ -324,7 +324,10 @@ static void update_intremap_entry_from_m
     vector = (msg->data >> MSI_DATA_VECTOR_SHIFT) & MSI_DATA_VECTOR_MASK;
     dest = (msg->address_lo >> MSI_ADDR_DEST_ID_SHIFT) & 0xff;
     offset = get_intremap_offset(vector, delivery_mode);
-    msi_desc->remap_index = offset;
+    if ( *remap_index < 0)
+        *remap_index = offset;
+    else
+        BUG_ON(*remap_index != offset);
 
     entry = (u32*)get_intremap_entry(iommu->seg, req_id, offset);
     update_intremap_entry(entry, vector, delivery_mode, dest_mode, dest);
@@ -379,12 +382,30 @@ void amd_iommu_msi_msg_update_ire(
     }
 
     if ( msi_desc->remap_index >= 0 )
-        update_intremap_entry_from_msi_msg(iommu, bdf, msi_desc, NULL);
+    {
+        do {
+            update_intremap_entry_from_msi_msg(iommu, bdf,
+                                               &msi_desc->remap_index, NULL);
+            if ( !pdev || !pdev->phantom_stride )
+                break;
+            bdf += pdev->phantom_stride;
+        } while ( PCI_SLOT(bdf) == PCI_SLOT(pdev->devfn) );
+
+        msi_desc->remap_index = -1;
+        if ( pdev )
+            bdf = PCI_BDF2(pdev->bus, pdev->devfn);
+    }
 
     if ( !msg )
         return;
 
-    update_intremap_entry_from_msi_msg(iommu, bdf, msi_desc, msg);
+    do {
+        update_intremap_entry_from_msi_msg(iommu, bdf, &msi_desc->remap_index,
+                                           msg);
+        if ( !pdev || !pdev->phantom_stride )
+            break;
+        bdf += pdev->phantom_stride;
+    } while ( PCI_SLOT(bdf) == PCI_SLOT(pdev->devfn) );
 }
 
 void amd_iommu_read_msi_from_ire(

[-- Attachment #3: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/2] AMD IOMMU: also spot missing IO-APIC entries in IVRS table
  2013-02-06 13:12 ` [PATCH 1/2] AMD IOMMU: also spot missing IO-APIC entries in IVRS table Jan Beulich
@ 2013-02-06 14:41   ` Boris Ostrovsky
  2013-02-06 14:52     ` Jan Beulich
  2013-02-11 10:49   ` Ian Campbell
  1 sibling, 1 reply; 20+ messages in thread
From: Boris Ostrovsky @ 2013-02-06 14:41 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Sherry Hurwitz, xen-devel

On 2/6/2013 8:12 AM, Jan Beulich wrote:

>
> +    /* Each IO-APIC must have been mentioned in the table. */
> +    for ( apic = 0; !error&&  apic<  nr_ioapics; ++apic )
> +    {
> +        if ( !nr_ioapic_entries[apic] ||
> +             ioapic_sbdf[IO_APIC_ID(apic)].pin_setup )
> +            continue;
> +
> +        printk(XENLOG_ERR "IVHD Error: no information for IO-APIC %#x\n",
> +               IO_APIC_ID(apic));
> +        if ( amd_iommu_perdev_intremap )
> +            error = -ENXIO;
> +        else
> +        {
> +            ioapic_sbdf[IO_APIC_ID(apic)].pin_setup = xzalloc_array(
> +                unsigned long, BITS_TO_LONGS(nr_ioapic_entries[apic]));
> +            if ( !ioapic_sbdf[IO_APIC_ID(apic)].pin_setup )
> +            {
> +                printk(XENLOG_ERR "IVHD Error: Out of memory\n");
> +                error = -ENOMEM;
> +            }
> +        }
> +    }
> +
>       return error;
>   }
>

Don't we end up with ioapic_sbdf[IO_APIC_ID(apic)].bdf/seg being 
uninitialized? They are usually set in parse_ivhd_device_special(), at 
the same time pin_setup is allocated, but with IVRS broken in this way 
we'll never get there, will we?

-boris

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/2] AMD IOMMU: also spot missing IO-APIC entries in IVRS table
  2013-02-06 14:41   ` Boris Ostrovsky
@ 2013-02-06 14:52     ` Jan Beulich
  2013-02-06 15:03       ` Boris Ostrovsky
  0 siblings, 1 reply; 20+ messages in thread
From: Jan Beulich @ 2013-02-06 14:52 UTC (permalink / raw)
  To: Boris Ostrovsky; +Cc: xen-devel, Sherry Hurwitz

>>> On 06.02.13 at 15:41, Boris Ostrovsky <boris.ostrovsky@oracle.com> wrote:
> On 2/6/2013 8:12 AM, Jan Beulich wrote:
> 
>>
>> +    /* Each IO-APIC must have been mentioned in the table. */
>> +    for ( apic = 0; !error&&  apic<  nr_ioapics; ++apic )
>> +    {
>> +        if ( !nr_ioapic_entries[apic] ||
>> +             ioapic_sbdf[IO_APIC_ID(apic)].pin_setup )
>> +            continue;
>> +
>> +        printk(XENLOG_ERR "IVHD Error: no information for IO-APIC %#x\n",
>> +               IO_APIC_ID(apic));
>> +        if ( amd_iommu_perdev_intremap )
>> +            error = -ENXIO;
>> +        else
>> +        {
>> +            ioapic_sbdf[IO_APIC_ID(apic)].pin_setup = xzalloc_array(
>> +                unsigned long, BITS_TO_LONGS(nr_ioapic_entries[apic]));
>> +            if ( !ioapic_sbdf[IO_APIC_ID(apic)].pin_setup )
>> +            {
>> +                printk(XENLOG_ERR "IVHD Error: Out of memory\n");
>> +                error = -ENOMEM;
>> +            }
>> +        }
>> +    }
>> +
>>       return error;
>>   }
>>
> 
> Don't we end up with ioapic_sbdf[IO_APIC_ID(apic)].bdf/seg being 
> uninitialized? They are usually set in parse_ivhd_device_special(), at 
> the same time pin_setup is allocated, but with IVRS broken in this way 
> we'll never get there, will we?

Correct. .bdf/.seg being uninitialized is no much of a problem
when using global intremap tables though. And certainly not on
a system with just a single IOMMU (as was the case on the
crashing system). Do you see alternatives? Disable the IOMMU
always, even if not using global remap tables? That could be
seen as a regression, as at least global remap tables worked fine
so far on such systems.

Jan

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/2] AMD IOMMU: also spot missing IO-APIC entries in IVRS table
  2013-02-06 14:52     ` Jan Beulich
@ 2013-02-06 15:03       ` Boris Ostrovsky
  2013-02-06 15:12         ` Jan Beulich
  0 siblings, 1 reply; 20+ messages in thread
From: Boris Ostrovsky @ 2013-02-06 15:03 UTC (permalink / raw)
  To: Jan Beulich; +Cc: xen-devel, Sherry Hurwitz

On 2/6/2013 9:52 AM, Jan Beulich wrote:
>>>> On 06.02.13 at 15:41, Boris Ostrovsky<boris.ostrovsky@oracle.com>  wrote:
>> On 2/6/2013 8:12 AM, Jan Beulich wrote:
>>
>>>
>>> +    /* Each IO-APIC must have been mentioned in the table. */
>>> +    for ( apic = 0; !error&&   apic<   nr_ioapics; ++apic )
>>> +    {
>>> +        if ( !nr_ioapic_entries[apic] ||
>>> +             ioapic_sbdf[IO_APIC_ID(apic)].pin_setup )
>>> +            continue;
>>> +
>>> +        printk(XENLOG_ERR "IVHD Error: no information for IO-APIC %#x\n",
>>> +               IO_APIC_ID(apic));
>>> +        if ( amd_iommu_perdev_intremap )
>>> +            error = -ENXIO;
>>> +        else
>>> +        {
>>> +            ioapic_sbdf[IO_APIC_ID(apic)].pin_setup = xzalloc_array(
>>> +                unsigned long, BITS_TO_LONGS(nr_ioapic_entries[apic]));
>>> +            if ( !ioapic_sbdf[IO_APIC_ID(apic)].pin_setup )
>>> +            {
>>> +                printk(XENLOG_ERR "IVHD Error: Out of memory\n");
>>> +                error = -ENOMEM;
>>> +            }
>>> +        }
>>> +    }
>>> +
>>>        return error;
>>>    }
>>>
>>
>> Don't we end up with ioapic_sbdf[IO_APIC_ID(apic)].bdf/seg being
>> uninitialized? They are usually set in parse_ivhd_device_special(), at
>> the same time pin_setup is allocated, but with IVRS broken in this way
>> we'll never get there, will we?
>
> Correct. .bdf/.seg being uninitialized is no much of a problem
> when using global intremap tables though.

Since this patch has been tested it clearly must have worked somehow but 
I don't understand where you'd get bdf and seg in 
amd_iommu_ioapic_update_ire() and then manage to find the right IOMMU.

-boris

 > And certainly not on
> a system with just a single IOMMU (as was the case on the
> crashing system). Do you see alternatives? Disable the IOMMU
> always, even if not using global remap tables? That could be
> seen as a regression, as at least global remap tables worked fine
> so far on such systems.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/2] AMD IOMMU: also spot missing IO-APIC entries in IVRS table
  2013-02-06 15:03       ` Boris Ostrovsky
@ 2013-02-06 15:12         ` Jan Beulich
  0 siblings, 0 replies; 20+ messages in thread
From: Jan Beulich @ 2013-02-06 15:12 UTC (permalink / raw)
  To: Boris Ostrovsky; +Cc: xen-devel, Sherry Hurwitz

>>> On 06.02.13 at 16:03, Boris Ostrovsky <boris.ostrovsky@oracle.com> wrote:
> On 2/6/2013 9:52 AM, Jan Beulich wrote:
>>>>> On 06.02.13 at 15:41, Boris Ostrovsky<boris.ostrovsky@oracle.com>  wrote:
>>> On 2/6/2013 8:12 AM, Jan Beulich wrote:
>>>
>>>>
>>>> +    /* Each IO-APIC must have been mentioned in the table. */
>>>> +    for ( apic = 0; !error&&   apic<   nr_ioapics; ++apic )
>>>> +    {
>>>> +        if ( !nr_ioapic_entries[apic] ||
>>>> +             ioapic_sbdf[IO_APIC_ID(apic)].pin_setup )
>>>> +            continue;
>>>> +
>>>> +        printk(XENLOG_ERR "IVHD Error: no information for IO-APIC %#x\n",
>>>> +               IO_APIC_ID(apic));
>>>> +        if ( amd_iommu_perdev_intremap )
>>>> +            error = -ENXIO;
>>>> +        else
>>>> +        {
>>>> +            ioapic_sbdf[IO_APIC_ID(apic)].pin_setup = xzalloc_array(
>>>> +                unsigned long, BITS_TO_LONGS(nr_ioapic_entries[apic]));
>>>> +            if ( !ioapic_sbdf[IO_APIC_ID(apic)].pin_setup )
>>>> +            {
>>>> +                printk(XENLOG_ERR "IVHD Error: Out of memory\n");
>>>> +                error = -ENOMEM;
>>>> +            }
>>>> +        }
>>>> +    }
>>>> +
>>>>        return error;
>>>>    }
>>>>
>>>
>>> Don't we end up with ioapic_sbdf[IO_APIC_ID(apic)].bdf/seg being
>>> uninitialized? They are usually set in parse_ivhd_device_special(), at
>>> the same time pin_setup is allocated, but with IVRS broken in this way
>>> we'll never get there, will we?
>>
>> Correct. .bdf/.seg being uninitialized is no much of a problem
>> when using global intremap tables though.
> 
> Since this patch has been tested it clearly must have worked somehow but 
> I don't understand where you'd get bdf and seg in 
> amd_iommu_ioapic_update_ire() and then manage to find the right IOMMU.

The "right" IOMMU simply is the only one, and that is equally well
found with sbdf being zero.

Jan

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 0/2] AMD IOMMU: XSA-36 follow ups
  2013-02-06 13:04 [PATCH 0/2] AMD IOMMU: XSA-36 follow ups Jan Beulich
  2013-02-06 13:12 ` [PATCH 1/2] AMD IOMMU: also spot missing IO-APIC entries in IVRS table Jan Beulich
  2013-02-06 13:12 ` [PATCH 2/2] AMD IOMMU: handle MSI for phantom functions Jan Beulich
@ 2013-02-08  9:58 ` Jan Beulich
  2013-02-08 13:53   ` Sander Eikelenboom
  2013-03-01 18:45   ` Malcolm Crossley
  2013-02-11 13:12 ` Ian Campbell
  3 siblings, 2 replies; 20+ messages in thread
From: Jan Beulich @ 2013-02-08  9:58 UTC (permalink / raw)
  To: xen-devel; +Cc: Boris Ostrovsky, xiantao.zhang, Sherry Hurwitz

[-- Attachment #1: Type: text/plain, Size: 9405 bytes --]

>>> On 06.02.13 at 14:04, "Jan Beulich" <JBeulich@suse.com> wrote:
> A regression was reported on a class of broken firmware that c/s
> 26517:601139e2b0db didn't consider, leading to a boot time crash.

After some more thought on this and the comments we got
regarding disabling the IOMMU in this situation altogether making
things worse instead of better, I came to the conclusion that we
can actually restrict the action in affected cases to just disabling
interrupt remapping. That doesn't make the situation worse than
prior to the XSA-36 fixes (where interrupt remapping didn't really
protect domains from one another), but allows at least DMA
isolation to still be utilized. Patch 3/2 below/attached.

Jan

AMD IOMMU: only disable interrupt remapping when certain IVRS consistency checks fail

After some more thought on the XSA-36 and specifically the comments we
got regarding disabling the IOMMU in this situation altogether making
things worse instead of better, I came to the conclusion that we can
actually restrict the action in affected cases to just disabling
interrupt remapping. That doesn't make the situation worse than prior
to the XSA-36 fixes (where interrupt remapping didn't really protect
domains from one another), but allows at least DMA isolation to still
be utilized.

In the course of this, the calls to the interrupt remapping related
IOMMU implementation specific operations needed to be re-qualified
from depending on iommu_enabled to iommu_intremap (as was already the
case for x86's HPET code).

That in turn required to make sure iommu_intremap gets properly
cleared when the respective initialization fails (or isn't being
done at all).

Along with making sure interrupt remapping doesn't get inconsistently
enabled on some IOMMUs and not on others in the VT-d code, this in turn
allowed quite a bit of cleanup on the VT-d side (if desired, that
cleanup could of course be broken out into a separate patch).

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/arch/x86/msi.c
+++ b/xen/arch/x86/msi.c
@@ -207,7 +207,7 @@ static void read_msi_msg(struct msi_desc
         BUG();
     }
 
-    if ( iommu_enabled )
+    if ( iommu_intremap )
         iommu_read_msi_from_ire(entry, msg);
 }
 
@@ -215,7 +215,7 @@ static void write_msi_msg(struct msi_des
 {
     entry->msg = *msg;
 
-    if ( iommu_enabled )
+    if ( iommu_intremap )
     {
         ASSERT(msg != &entry->msg);
         iommu_update_ire_from_msi(entry, msg);
@@ -489,7 +489,7 @@ int msi_free_irq(struct msi_desc *entry)
     }
 
     /* Free the unused IRTE if intr remap enabled */
-    if ( iommu_enabled )
+    if ( iommu_intremap )
         iommu_update_ire_from_msi(entry, NULL);
 
     list_del(&entry->list);
--- a/xen/drivers/passthrough/amd/iommu_acpi.c
+++ b/xen/drivers/passthrough/amd/iommu_acpi.c
@@ -682,10 +682,10 @@ static u16 __init parse_ivhd_device_spec
                     printk(XENLOG_ERR "IVHD Error: Conflicting IO-APIC %#x entries\n",
                            special->handle);
                     if ( amd_iommu_perdev_intremap )
-                        return 0;
+                        iommu_intremap = 0;
                 }
             }
-            else
+            else if ( iommu_intremap )
             {
                 /* set device id of ioapic */
                 ioapic_sbdf[special->handle].bdf = bdf;
@@ -697,7 +697,7 @@ static u16 __init parse_ivhd_device_spec
                      !ioapic_sbdf[IO_APIC_ID(apic)].pin_setup )
                 {
                     printk(XENLOG_ERR "IVHD Error: Out of memory\n");
-                    return 0;
+                    iommu_intremap = 0;
                 }
             }
             break;
@@ -706,7 +706,7 @@ static u16 __init parse_ivhd_device_spec
         {
             printk(XENLOG_ERR "IVHD Error: Invalid IO-APIC %#x\n",
                    special->handle);
-            return 0;
+            iommu_intremap = 0;
         }
         break;
     case ACPI_IVHD_HPET:
@@ -932,7 +932,7 @@ static int __init parse_ivrs_table(struc
         printk(XENLOG_ERR "IVHD Error: no information for IO-APIC %#x\n",
                IO_APIC_ID(apic));
         if ( amd_iommu_perdev_intremap )
-            error = -ENXIO;
+            iommu_intremap = 0;
         else
         {
             ioapic_sbdf[IO_APIC_ID(apic)].pin_setup = xzalloc_array(
@@ -940,7 +940,7 @@ static int __init parse_ivrs_table(struc
             if ( !ioapic_sbdf[IO_APIC_ID(apic)].pin_setup )
             {
                 printk(XENLOG_ERR "IVHD Error: Out of memory\n");
-                error = -ENOMEM;
+                iommu_intremap = 0;
             }
         }
     }
--- a/xen/drivers/passthrough/amd/iommu_init.c
+++ b/xen/drivers/passthrough/amd/iommu_init.c
@@ -1157,7 +1157,7 @@ int __init amd_iommu_init(void)
     BUG_ON( !iommu_found() );
 
     if ( amd_iommu_perdev_intremap && amd_sp5100_erratum28() )
-        goto error_out;
+        iommu_intremap = 0;
 
     ivrs_bdf_entries = amd_iommu_get_ivrs_dev_entries();
 
@@ -1173,7 +1173,7 @@ int __init amd_iommu_init(void)
         goto error_out;
 
     /* initialize io-apic interrupt remapping entries */
-    if ( amd_iommu_setup_ioapic_remapping() != 0 )
+    if ( iommu_intremap && amd_iommu_setup_ioapic_remapping() != 0 )
         goto error_out;
 
     /* allocate and initialize a global device table shared by all iommus */
--- a/xen/drivers/passthrough/iommu.c
+++ b/xen/drivers/passthrough/iommu.c
@@ -470,6 +470,8 @@ int __init iommu_setup(void)
         rc = iommu_hardware_setup();
         iommu_enabled = (rc == 0);
     }
+    if ( !iommu_enabled )
+        iommu_intremap = 0;
 
     if ( (force_iommu && !iommu_enabled) ||
          (force_intremap && !iommu_intremap) )
@@ -487,6 +489,7 @@ int __init iommu_setup(void)
         printk(" - Dom0 mode: %s\n",
                iommu_passthrough ? "Passthrough" :
                iommu_dom0_strict ? "Strict" : "Relaxed");
+    printk("Interrupt remapping %sabled\n", iommu_intremap ? "en" : "dis");
 
     return rc;
 }
--- a/xen/drivers/passthrough/vtd/intremap.c
+++ b/xen/drivers/passthrough/vtd/intremap.c
@@ -373,7 +373,7 @@ unsigned int io_apic_read_remap_rte(
     struct iommu *iommu = ioapic_to_iommu(IO_APIC_ID(apic));
     struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu);
 
-    if ( !ir_ctrl || !ir_ctrl->iremap_maddr || !ir_ctrl->iremap_num ||
+    if ( !ir_ctrl->iremap_num ||
         ( (index = apic_pin_2_ir_idx[apic][ioapic_pin]) < 0 ) )
         return __io_apic_read(apic, reg);
 
@@ -396,15 +396,8 @@ void io_apic_write_remap_rte(
     struct IO_APIC_route_remap_entry *remap_rte;
     unsigned int rte_upper = (reg & 1) ? 1 : 0;
     struct iommu *iommu = ioapic_to_iommu(IO_APIC_ID(apic));
-    struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu);
     int saved_mask;
 
-    if ( !ir_ctrl || !ir_ctrl->iremap_maddr )
-    {
-        __io_apic_write(apic, reg, value);
-        return;
-    }
-
     old_rte = __ioapic_read_entry(apic, ioapic_pin, 1);
 
     remap_rte = (struct IO_APIC_route_remap_entry *) &old_rte;
@@ -653,20 +646,11 @@ void msi_msg_read_remap_rte(
 {
     struct pci_dev *pdev = msi_desc->dev;
     struct acpi_drhd_unit *drhd = NULL;
-    struct iommu *iommu = NULL;
-    struct ir_ctrl *ir_ctrl;
 
     drhd = pdev ? acpi_find_matched_drhd_unit(pdev)
                 : hpet_to_drhd(msi_desc->hpet_id);
-    if ( !drhd )
-        return;
-    iommu = drhd->iommu;
-
-    ir_ctrl = iommu_ir_ctrl(iommu);
-    if ( !ir_ctrl || !ir_ctrl->iremap_maddr )
-        return;
-
-    remap_entry_to_msi_msg(iommu, msg);
+    if ( drhd )
+        remap_entry_to_msi_msg(drhd->iommu, msg);
 }
 
 void msi_msg_write_remap_rte(
@@ -674,20 +658,11 @@ void msi_msg_write_remap_rte(
 {
     struct pci_dev *pdev = msi_desc->dev;
     struct acpi_drhd_unit *drhd = NULL;
-    struct iommu *iommu = NULL;
-    struct ir_ctrl *ir_ctrl;
 
     drhd = pdev ? acpi_find_matched_drhd_unit(pdev)
                 : hpet_to_drhd(msi_desc->hpet_id);
-    if ( !drhd )
-        return;
-    iommu = drhd->iommu;
-
-    ir_ctrl = iommu_ir_ctrl(iommu);
-    if ( !ir_ctrl || !ir_ctrl->iremap_maddr )
-        return;
-
-    msi_msg_to_remap_entry(iommu, pdev, msi_desc, msg);
+    if ( drhd )
+        msi_msg_to_remap_entry(drhd->iommu, pdev, msi_desc, msg);
 }
 
 int __init intel_setup_hpet_msi(struct msi_desc *msi_desc)
--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -2065,6 +2065,9 @@ static int init_vtd_hw(void)
                 break;
             }
         }
+        if ( !iommu_intremap )
+            for_each_drhd_unit ( drhd )
+                disable_intremap(drhd->iommu);
     }
 
     /*
--- a/xen/include/asm-x86/io_apic.h
+++ b/xen/include/asm-x86/io_apic.h
@@ -129,7 +129,7 @@ struct IO_APIC_route_entry {
 extern struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS];
 
 /* Only need to remap ioapic RTE (reg: 10~3Fh) */
-#define ioapic_reg_remapped(reg) (iommu_enabled && ((reg) >= 0x10))
+#define ioapic_reg_remapped(reg) (iommu_intremap && ((reg) >= 0x10))
 
 static inline unsigned int __io_apic_read(unsigned int apic, unsigned int reg)
 {



[-- Attachment #2: AMD-IOMMU-disable-intremap-only.patch --]
[-- Type: text/plain, Size: 8673 bytes --]

AMD IOMMU: only disable interrupt remapping when certain IVRS consistency checks fail

After some more thought on the XSA-36 and specifically the comments we
got regarding disabling the IOMMU in this situation altogether making
things worse instead of better, I came to the conclusion that we can
actually restrict the action in affected cases to just disabling
interrupt remapping. That doesn't make the situation worse than prior
to the XSA-36 fixes (where interrupt remapping didn't really protect
domains from one another), but allows at least DMA isolation to still
be utilized.

In the course of this, the calls to the interrupt remapping related
IOMMU implementation specific operations needed to be re-qualified
from depending on iommu_enabled to iommu_intremap (as was already the
case for x86's HPET code).

That in turn required to make sure iommu_intremap gets properly
cleared when the respective initialization fails (or isn't being
done at all).

Along with making sure interrupt remapping doesn't get inconsistently
enabled on some IOMMUs and not on others in the VT-d code, this in turn
allowed quite a bit of cleanup on the VT-d side (if desired, that
cleanup could of course be broken out into a separate patch).

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/arch/x86/msi.c
+++ b/xen/arch/x86/msi.c
@@ -207,7 +207,7 @@ static void read_msi_msg(struct msi_desc
         BUG();
     }
 
-    if ( iommu_enabled )
+    if ( iommu_intremap )
         iommu_read_msi_from_ire(entry, msg);
 }
 
@@ -215,7 +215,7 @@ static void write_msi_msg(struct msi_des
 {
     entry->msg = *msg;
 
-    if ( iommu_enabled )
+    if ( iommu_intremap )
     {
         ASSERT(msg != &entry->msg);
         iommu_update_ire_from_msi(entry, msg);
@@ -489,7 +489,7 @@ int msi_free_irq(struct msi_desc *entry)
     }
 
     /* Free the unused IRTE if intr remap enabled */
-    if ( iommu_enabled )
+    if ( iommu_intremap )
         iommu_update_ire_from_msi(entry, NULL);
 
     list_del(&entry->list);
--- a/xen/drivers/passthrough/amd/iommu_acpi.c
+++ b/xen/drivers/passthrough/amd/iommu_acpi.c
@@ -682,10 +682,10 @@ static u16 __init parse_ivhd_device_spec
                     printk(XENLOG_ERR "IVHD Error: Conflicting IO-APIC %#x entries\n",
                            special->handle);
                     if ( amd_iommu_perdev_intremap )
-                        return 0;
+                        iommu_intremap = 0;
                 }
             }
-            else
+            else if ( iommu_intremap )
             {
                 /* set device id of ioapic */
                 ioapic_sbdf[special->handle].bdf = bdf;
@@ -697,7 +697,7 @@ static u16 __init parse_ivhd_device_spec
                      !ioapic_sbdf[IO_APIC_ID(apic)].pin_setup )
                 {
                     printk(XENLOG_ERR "IVHD Error: Out of memory\n");
-                    return 0;
+                    iommu_intremap = 0;
                 }
             }
             break;
@@ -706,7 +706,7 @@ static u16 __init parse_ivhd_device_spec
         {
             printk(XENLOG_ERR "IVHD Error: Invalid IO-APIC %#x\n",
                    special->handle);
-            return 0;
+            iommu_intremap = 0;
         }
         break;
     case ACPI_IVHD_HPET:
@@ -932,7 +932,7 @@ static int __init parse_ivrs_table(struc
         printk(XENLOG_ERR "IVHD Error: no information for IO-APIC %#x\n",
                IO_APIC_ID(apic));
         if ( amd_iommu_perdev_intremap )
-            error = -ENXIO;
+            iommu_intremap = 0;
         else
         {
             ioapic_sbdf[IO_APIC_ID(apic)].pin_setup = xzalloc_array(
@@ -940,7 +940,7 @@ static int __init parse_ivrs_table(struc
             if ( !ioapic_sbdf[IO_APIC_ID(apic)].pin_setup )
             {
                 printk(XENLOG_ERR "IVHD Error: Out of memory\n");
-                error = -ENOMEM;
+                iommu_intremap = 0;
             }
         }
     }
--- a/xen/drivers/passthrough/amd/iommu_init.c
+++ b/xen/drivers/passthrough/amd/iommu_init.c
@@ -1157,7 +1157,7 @@ int __init amd_iommu_init(void)
     BUG_ON( !iommu_found() );
 
     if ( amd_iommu_perdev_intremap && amd_sp5100_erratum28() )
-        goto error_out;
+        iommu_intremap = 0;
 
     ivrs_bdf_entries = amd_iommu_get_ivrs_dev_entries();
 
@@ -1173,7 +1173,7 @@ int __init amd_iommu_init(void)
         goto error_out;
 
     /* initialize io-apic interrupt remapping entries */
-    if ( amd_iommu_setup_ioapic_remapping() != 0 )
+    if ( iommu_intremap && amd_iommu_setup_ioapic_remapping() != 0 )
         goto error_out;
 
     /* allocate and initialize a global device table shared by all iommus */
--- a/xen/drivers/passthrough/iommu.c
+++ b/xen/drivers/passthrough/iommu.c
@@ -470,6 +470,8 @@ int __init iommu_setup(void)
         rc = iommu_hardware_setup();
         iommu_enabled = (rc == 0);
     }
+    if ( !iommu_enabled )
+        iommu_intremap = 0;
 
     if ( (force_iommu && !iommu_enabled) ||
          (force_intremap && !iommu_intremap) )
@@ -487,6 +489,7 @@ int __init iommu_setup(void)
         printk(" - Dom0 mode: %s\n",
                iommu_passthrough ? "Passthrough" :
                iommu_dom0_strict ? "Strict" : "Relaxed");
+    printk("Interrupt remapping %sabled\n", iommu_intremap ? "en" : "dis");
 
     return rc;
 }
--- a/xen/drivers/passthrough/vtd/intremap.c
+++ b/xen/drivers/passthrough/vtd/intremap.c
@@ -373,7 +373,7 @@ unsigned int io_apic_read_remap_rte(
     struct iommu *iommu = ioapic_to_iommu(IO_APIC_ID(apic));
     struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu);
 
-    if ( !ir_ctrl || !ir_ctrl->iremap_maddr || !ir_ctrl->iremap_num ||
+    if ( !ir_ctrl->iremap_num ||
         ( (index = apic_pin_2_ir_idx[apic][ioapic_pin]) < 0 ) )
         return __io_apic_read(apic, reg);
 
@@ -396,15 +396,8 @@ void io_apic_write_remap_rte(
     struct IO_APIC_route_remap_entry *remap_rte;
     unsigned int rte_upper = (reg & 1) ? 1 : 0;
     struct iommu *iommu = ioapic_to_iommu(IO_APIC_ID(apic));
-    struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu);
     int saved_mask;
 
-    if ( !ir_ctrl || !ir_ctrl->iremap_maddr )
-    {
-        __io_apic_write(apic, reg, value);
-        return;
-    }
-
     old_rte = __ioapic_read_entry(apic, ioapic_pin, 1);
 
     remap_rte = (struct IO_APIC_route_remap_entry *) &old_rte;
@@ -653,20 +646,11 @@ void msi_msg_read_remap_rte(
 {
     struct pci_dev *pdev = msi_desc->dev;
     struct acpi_drhd_unit *drhd = NULL;
-    struct iommu *iommu = NULL;
-    struct ir_ctrl *ir_ctrl;
 
     drhd = pdev ? acpi_find_matched_drhd_unit(pdev)
                 : hpet_to_drhd(msi_desc->hpet_id);
-    if ( !drhd )
-        return;
-    iommu = drhd->iommu;
-
-    ir_ctrl = iommu_ir_ctrl(iommu);
-    if ( !ir_ctrl || !ir_ctrl->iremap_maddr )
-        return;
-
-    remap_entry_to_msi_msg(iommu, msg);
+    if ( drhd )
+        remap_entry_to_msi_msg(drhd->iommu, msg);
 }
 
 void msi_msg_write_remap_rte(
@@ -674,20 +658,11 @@ void msi_msg_write_remap_rte(
 {
     struct pci_dev *pdev = msi_desc->dev;
     struct acpi_drhd_unit *drhd = NULL;
-    struct iommu *iommu = NULL;
-    struct ir_ctrl *ir_ctrl;
 
     drhd = pdev ? acpi_find_matched_drhd_unit(pdev)
                 : hpet_to_drhd(msi_desc->hpet_id);
-    if ( !drhd )
-        return;
-    iommu = drhd->iommu;
-
-    ir_ctrl = iommu_ir_ctrl(iommu);
-    if ( !ir_ctrl || !ir_ctrl->iremap_maddr )
-        return;
-
-    msi_msg_to_remap_entry(iommu, pdev, msi_desc, msg);
+    if ( drhd )
+        msi_msg_to_remap_entry(drhd->iommu, pdev, msi_desc, msg);
 }
 
 int __init intel_setup_hpet_msi(struct msi_desc *msi_desc)
--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -2065,6 +2065,9 @@ static int init_vtd_hw(void)
                 break;
             }
         }
+        if ( !iommu_intremap )
+            for_each_drhd_unit ( drhd )
+                disable_intremap(drhd->iommu);
     }
 
     /*
--- a/xen/include/asm-x86/io_apic.h
+++ b/xen/include/asm-x86/io_apic.h
@@ -129,7 +129,7 @@ struct IO_APIC_route_entry {
 extern struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS];
 
 /* Only need to remap ioapic RTE (reg: 10~3Fh) */
-#define ioapic_reg_remapped(reg) (iommu_enabled && ((reg) >= 0x10))
+#define ioapic_reg_remapped(reg) (iommu_intremap && ((reg) >= 0x10))
 
 static inline unsigned int __io_apic_read(unsigned int apic, unsigned int reg)
 {

[-- Attachment #3: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 0/2] AMD IOMMU: XSA-36 follow ups
  2013-02-08  9:58 ` [PATCH 0/2] AMD IOMMU: XSA-36 follow ups Jan Beulich
@ 2013-02-08 13:53   ` Sander Eikelenboom
  2013-03-01 18:45   ` Malcolm Crossley
  1 sibling, 0 replies; 20+ messages in thread
From: Sander Eikelenboom @ 2013-02-08 13:53 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Boris Ostrovsky, Sherry Hurwitz, xiantao.zhang, xen-devel


Friday, February 8, 2013, 10:58:02 AM, you wrote:

>>>> On 06.02.13 at 14:04, "Jan Beulich" <JBeulich@suse.com> wrote:
>> A regression was reported on a class of broken firmware that c/s
>> 26517:601139e2b0db didn't consider, leading to a boot time crash.

> After some more thought on this and the comments we got
> regarding disabling the IOMMU in this situation altogether making
> things worse instead of better, I came to the conclusion that we
> can actually restrict the action in affected cases to just disabling
> interrupt remapping. That doesn't make the situation worse than
> prior to the XSA-36 fixes (where interrupt remapping didn't really
> protect domains from one another), but allows at least DMA
> isolation to still be utilized. Patch 3/2 below/attached.

> Jan

That seems the same linux does.
Unfortunately it seems i'am having to use it, just tested the latest bios for my motherboard and that still has no IVHD for the IOAPIC, but also locks up immediately after that message.


> AMD IOMMU: only disable interrupt remapping when certain IVRS consistency checks fail

> After some more thought on the XSA-36 and specifically the comments we
> got regarding disabling the IOMMU in this situation altogether making
> things worse instead of better, I came to the conclusion that we can
> actually restrict the action in affected cases to just disabling
> interrupt remapping. That doesn't make the situation worse than prior
> to the XSA-36 fixes (where interrupt remapping didn't really protect
> domains from one another), but allows at least DMA isolation to still
> be utilized.

> In the course of this, the calls to the interrupt remapping related
> IOMMU implementation specific operations needed to be re-qualified
> from depending on iommu_enabled to iommu_intremap (as was already the
> case for x86's HPET code).

> That in turn required to make sure iommu_intremap gets properly
> cleared when the respective initialization fails (or isn't being
> done at all).

> Along with making sure interrupt remapping doesn't get inconsistently
> enabled on some IOMMUs and not on others in the VT-d code, this in turn
> allowed quite a bit of cleanup on the VT-d side (if desired, that
> cleanup could of course be broken out into a separate patch).

> Signed-off-by: Jan Beulich <jbeulich@suse.com>

> --- a/xen/arch/x86/msi.c
> +++ b/xen/arch/x86/msi.c
> @@ -207,7 +207,7 @@ static void read_msi_msg(struct msi_desc
>          BUG();
>      }
>  
> -    if ( iommu_enabled )
> +    if ( iommu_intremap )
>          iommu_read_msi_from_ire(entry, msg);
>  }
>  
> @@ -215,7 +215,7 @@ static void write_msi_msg(struct msi_des
>  {
>      entry->msg = *msg;
>  
> -    if ( iommu_enabled )
> +    if ( iommu_intremap )
>      {
>          ASSERT(msg != &entry->msg);
>          iommu_update_ire_from_msi(entry, msg);
> @@ -489,7 +489,7 @@ int msi_free_irq(struct msi_desc *entry)
>      }
>  
>      /* Free the unused IRTE if intr remap enabled */
> -    if ( iommu_enabled )
> +    if ( iommu_intremap )
>          iommu_update_ire_from_msi(entry, NULL);
>  
>      list_del(&entry->list);
> --- a/xen/drivers/passthrough/amd/iommu_acpi.c
> +++ b/xen/drivers/passthrough/amd/iommu_acpi.c
> @@ -682,10 +682,10 @@ static u16 __init parse_ivhd_device_spec
>                      printk(XENLOG_ERR "IVHD Error: Conflicting IO-APIC %#x entries\n",
>                             special->handle);
>                      if ( amd_iommu_perdev_intremap )
> -                        return 0;
> +                        iommu_intremap = 0;
>                  }
>              }
> -            else
> +            else if ( iommu_intremap )
>              {
>                  /* set device id of ioapic */
>                  ioapic_sbdf[special->handle].bdf = bdf;
> @@ -697,7 +697,7 @@ static u16 __init parse_ivhd_device_spec
>                       !ioapic_sbdf[IO_APIC_ID(apic)].pin_setup )
>                  {
>                      printk(XENLOG_ERR "IVHD Error: Out of memory\n");
> -                    return 0;
> +                    iommu_intremap = 0;
>                  }
>              }
>              break;
> @@ -706,7 +706,7 @@ static u16 __init parse_ivhd_device_spec
>          {
>              printk(XENLOG_ERR "IVHD Error: Invalid IO-APIC %#x\n",
>                     special->handle);
> -            return 0;
> +            iommu_intremap = 0;
>          }
>          break;
>      case ACPI_IVHD_HPET:
> @@ -932,7 +932,7 @@ static int __init parse_ivrs_table(struc
>          printk(XENLOG_ERR "IVHD Error: no information for IO-APIC %#x\n",
>                 IO_APIC_ID(apic));
>          if ( amd_iommu_perdev_intremap )
> -            error = -ENXIO;
> +            iommu_intremap = 0;
>          else
>          {
>              ioapic_sbdf[IO_APIC_ID(apic)].pin_setup = xzalloc_array(
> @@ -940,7 +940,7 @@ static int __init parse_ivrs_table(struc
>              if ( !ioapic_sbdf[IO_APIC_ID(apic)].pin_setup )
>              {
>                  printk(XENLOG_ERR "IVHD Error: Out of memory\n");
> -                error = -ENOMEM;
> +                iommu_intremap = 0;
>              }
>          }
>      }
> --- a/xen/drivers/passthrough/amd/iommu_init.c
> +++ b/xen/drivers/passthrough/amd/iommu_init.c
> @@ -1157,7 +1157,7 @@ int __init amd_iommu_init(void)
>      BUG_ON( !iommu_found() );
>  
>      if ( amd_iommu_perdev_intremap && amd_sp5100_erratum28() )
> -        goto error_out;
> +        iommu_intremap = 0;
>  
>      ivrs_bdf_entries = amd_iommu_get_ivrs_dev_entries();
>  
> @@ -1173,7 +1173,7 @@ int __init amd_iommu_init(void)
>          goto error_out;
>  
>      /* initialize io-apic interrupt remapping entries */
> -    if ( amd_iommu_setup_ioapic_remapping() != 0 )
> +    if ( iommu_intremap && amd_iommu_setup_ioapic_remapping() != 0 )
>          goto error_out;
>  
>      /* allocate and initialize a global device table shared by all iommus */
> --- a/xen/drivers/passthrough/iommu.c
> +++ b/xen/drivers/passthrough/iommu.c
> @@ -470,6 +470,8 @@ int __init iommu_setup(void)
>          rc = iommu_hardware_setup();
>          iommu_enabled = (rc == 0);
>      }
> +    if ( !iommu_enabled )
> +        iommu_intremap = 0;
>  
>      if ( (force_iommu && !iommu_enabled) ||
>           (force_intremap && !iommu_intremap) )
> @@ -487,6 +489,7 @@ int __init iommu_setup(void)
>          printk(" - Dom0 mode: %s\n",
>                 iommu_passthrough ? "Passthrough" :
>                 iommu_dom0_strict ? "Strict" : "Relaxed");
> +    printk("Interrupt remapping %sabled\n", iommu_intremap ? "en" : "dis");
>  
>      return rc;
>  }
> --- a/xen/drivers/passthrough/vtd/intremap.c
> +++ b/xen/drivers/passthrough/vtd/intremap.c
> @@ -373,7 +373,7 @@ unsigned int io_apic_read_remap_rte(
>      struct iommu *iommu = ioapic_to_iommu(IO_APIC_ID(apic));
>      struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu);
>  
> -    if ( !ir_ctrl || !ir_ctrl->iremap_maddr || !ir_ctrl->iremap_num ||
+    if ( !ir_ctrl->>iremap_num ||
>          ( (index = apic_pin_2_ir_idx[apic][ioapic_pin]) < 0 ) )
>          return __io_apic_read(apic, reg);
>  
> @@ -396,15 +396,8 @@ void io_apic_write_remap_rte(
>      struct IO_APIC_route_remap_entry *remap_rte;
>      unsigned int rte_upper = (reg & 1) ? 1 : 0;
>      struct iommu *iommu = ioapic_to_iommu(IO_APIC_ID(apic));
> -    struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu);
>      int saved_mask;
>  
> -    if ( !ir_ctrl || !ir_ctrl->iremap_maddr )
> -    {
> -        __io_apic_write(apic, reg, value);
> -        return;
> -    }
> -
>      old_rte = __ioapic_read_entry(apic, ioapic_pin, 1);
>  
>      remap_rte = (struct IO_APIC_route_remap_entry *) &old_rte;
> @@ -653,20 +646,11 @@ void msi_msg_read_remap_rte(
>  {
>      struct pci_dev *pdev = msi_desc->dev;
>      struct acpi_drhd_unit *drhd = NULL;
> -    struct iommu *iommu = NULL;
> -    struct ir_ctrl *ir_ctrl;
>  
>      drhd = pdev ? acpi_find_matched_drhd_unit(pdev)
>                  : hpet_to_drhd(msi_desc->hpet_id);
> -    if ( !drhd )
> -        return;
-    iommu = drhd->>iommu;
> -
> -    ir_ctrl = iommu_ir_ctrl(iommu);
> -    if ( !ir_ctrl || !ir_ctrl->iremap_maddr )
> -        return;
> -
> -    remap_entry_to_msi_msg(iommu, msg);
> +    if ( drhd )
> +        remap_entry_to_msi_msg(drhd->iommu, msg);
>  }
>  
>  void msi_msg_write_remap_rte(
> @@ -674,20 +658,11 @@ void msi_msg_write_remap_rte(
>  {
>      struct pci_dev *pdev = msi_desc->dev;
>      struct acpi_drhd_unit *drhd = NULL;
> -    struct iommu *iommu = NULL;
> -    struct ir_ctrl *ir_ctrl;
>  
>      drhd = pdev ? acpi_find_matched_drhd_unit(pdev)
>                  : hpet_to_drhd(msi_desc->hpet_id);
> -    if ( !drhd )
> -        return;
-    iommu = drhd->>iommu;
> -
> -    ir_ctrl = iommu_ir_ctrl(iommu);
> -    if ( !ir_ctrl || !ir_ctrl->iremap_maddr )
> -        return;
> -
> -    msi_msg_to_remap_entry(iommu, pdev, msi_desc, msg);
> +    if ( drhd )
> +        msi_msg_to_remap_entry(drhd->iommu, pdev, msi_desc, msg);
>  }
>  
>  int __init intel_setup_hpet_msi(struct msi_desc *msi_desc)
> --- a/xen/drivers/passthrough/vtd/iommu.c
> +++ b/xen/drivers/passthrough/vtd/iommu.c
> @@ -2065,6 +2065,9 @@ static int init_vtd_hw(void)
>                  break;
>              }
>          }
> +        if ( !iommu_intremap )
> +            for_each_drhd_unit ( drhd )
> +                disable_intremap(drhd->iommu);
>      }
>  
>      /*
> --- a/xen/include/asm-x86/io_apic.h
> +++ b/xen/include/asm-x86/io_apic.h
> @@ -129,7 +129,7 @@ struct IO_APIC_route_entry {
>  extern struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS];
>  
>  /* Only need to remap ioapic RTE (reg: 10~3Fh) */
> -#define ioapic_reg_remapped(reg) (iommu_enabled && ((reg) >= 0x10))
> +#define ioapic_reg_remapped(reg) (iommu_intremap && ((reg) >= 0x10))
>  
>  static inline unsigned int __io_apic_read(unsigned int apic, unsigned int reg)
>  {

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/2] AMD IOMMU: also spot missing IO-APIC entries in IVRS table
  2013-02-06 13:12 ` [PATCH 1/2] AMD IOMMU: also spot missing IO-APIC entries in IVRS table Jan Beulich
  2013-02-06 14:41   ` Boris Ostrovsky
@ 2013-02-11 10:49   ` Ian Campbell
  2013-02-12 11:59     ` Jan Beulich
  1 sibling, 1 reply; 20+ messages in thread
From: Ian Campbell @ 2013-02-11 10:49 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Boris Ostrovsky, Sherry Hurwitz, xen-devel

On Wed, 2013-02-06 at 13:12 +0000, Jan Beulich wrote:
> Apart from dealing duplicate conflicting entries, we also have to
> handle firmware omitting IO-APIC entries in IVRS altogether. Not doing
> so has resulted in c/s 26517:601139e2b0db to crash such systems during
> boot (whereas with the change here the IOMMU gets disabled just as is
> being done in the other cases, i.e. unless global tables are being
> used).
> 
> Debugging this issue has also pointed out that the debug log output is
> pretty ugly to look at - consolidate the output, and add one extra
> item for the IVHD special entries, so that future issues are easier
> to analyze.
> 
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> Tested-by: Sander Eikelenboom <linux@eikelenboom.it>

Not really my area but the change looks reasonably straight forward and
fixes a real error observed in the field,
Acked-by: Ian Campbell <ian.campbell@citrix.com>

> 
> --- a/xen/drivers/passthrough/amd/iommu_acpi.c
> +++ b/xen/drivers/passthrough/amd/iommu_acpi.c
> @@ -354,9 +354,8 @@ static int __init parse_ivmd_block(const
>      base = start_addr & PAGE_MASK;
>      limit = (start_addr + mem_length - 1) & PAGE_MASK;
>  
> -    AMD_IOMMU_DEBUG("IVMD Block: Type %#x\n",ivmd_block->header.type);
> -    AMD_IOMMU_DEBUG(" Start_Addr_Phys %#lx\n", start_addr);
> -    AMD_IOMMU_DEBUG(" Mem_Length %#lx\n", mem_length);
> +    AMD_IOMMU_DEBUG("IVMD Block: type %#x phys %#lx len %#lx\n",
> +                    ivmd_block->header.type, start_addr, mem_length);
>  
>      if ( ivmd_block->header.flags & ACPI_IVMD_EXCLUSION_RANGE )
>          iw = ir = IOMMU_CONTROL_ENABLED;
> @@ -551,8 +550,8 @@ static u16 __init parse_ivhd_device_alia
>          return 0;
>      }
>  
> -    AMD_IOMMU_DEBUG(" Dev_Id Range: %#x -> %#x\n", first_bdf, last_bdf);
> -    AMD_IOMMU_DEBUG(" Dev_Id Alias: %#x\n", alias_id);
> +    AMD_IOMMU_DEBUG(" Dev_Id Range: %#x -> %#x alias %#x\n",
> +                    first_bdf, last_bdf, alias_id);
>  
>      for ( bdf = first_bdf; bdf <= last_bdf; bdf++ )
>          add_ivrs_mapping_entry(bdf, alias_id, range->alias.header.data_setting,
> @@ -654,6 +653,9 @@ static u16 __init parse_ivhd_device_spec
>          return 0;
>      }
>  
> +    AMD_IOMMU_DEBUG("IVHD Special: %04x:%02x:%02x.%u variety %#x handle %#x\n",
> +                    seg, PCI_BUS(bdf), PCI_SLOT(bdf), PCI_FUNC(bdf),
> +                    special->variety, special->handle);
>      add_ivrs_mapping_entry(bdf, bdf, special->header.data_setting, iommu);
>  
>      switch ( special->variety )
> @@ -758,10 +760,9 @@ static int __init parse_ivhd_block(const
>      {
>          ivhd_device = (const void *)((const u8 *)ivhd_block + block_length);
>  
> -        AMD_IOMMU_DEBUG( "IVHD Device Entry:\n");
> -        AMD_IOMMU_DEBUG( " Type %#x\n", ivhd_device->header.type);
> -        AMD_IOMMU_DEBUG( " Dev_Id %#x\n", ivhd_device->header.id);
> -        AMD_IOMMU_DEBUG( " Flags %#x\n", ivhd_device->header.data_setting);
> +        AMD_IOMMU_DEBUG("IVHD Device Entry: type %#x id %#x flags %#x\n",
> +                        ivhd_device->header.type, ivhd_device->header.id,
> +                        ivhd_device->header.data_setting);
>  
>          switch ( ivhd_device->header.type )
>          {
> @@ -890,6 +891,7 @@ static int __init parse_ivrs_table(struc
>  {
>      const struct acpi_ivrs_header *ivrs_block;
>      unsigned long length;
> +    unsigned int apic;
>      int error = 0;
>  
>      BUG_ON(!table);
> @@ -903,11 +905,9 @@ static int __init parse_ivrs_table(struc
>      {
>          ivrs_block = (struct acpi_ivrs_header *)((u8 *)table + length);
>  
> -        AMD_IOMMU_DEBUG("IVRS Block:\n");
> -        AMD_IOMMU_DEBUG(" Type %#x\n", ivrs_block->type);
> -        AMD_IOMMU_DEBUG(" Flags %#x\n", ivrs_block->flags);
> -        AMD_IOMMU_DEBUG(" Length %#x\n", ivrs_block->length);
> -        AMD_IOMMU_DEBUG(" Dev_Id %#x\n", ivrs_block->device_id);
> +        AMD_IOMMU_DEBUG("IVRS Block: type %#x flags %#x len %#x id %#x\n",
> +                        ivrs_block->type, ivrs_block->flags,
> +                        ivrs_block->length, ivrs_block->device_id);
>  
>          if ( table->length < (length + ivrs_block->length) )
>          {
> @@ -922,6 +922,29 @@ static int __init parse_ivrs_table(struc
>          length += ivrs_block->length;
>      }
>  
> +    /* Each IO-APIC must have been mentioned in the table. */
> +    for ( apic = 0; !error && apic < nr_ioapics; ++apic )
> +    {
> +        if ( !nr_ioapic_entries[apic] ||
> +             ioapic_sbdf[IO_APIC_ID(apic)].pin_setup )
> +            continue;
> +
> +        printk(XENLOG_ERR "IVHD Error: no information for IO-APIC %#x\n",
> +               IO_APIC_ID(apic));
> +        if ( amd_iommu_perdev_intremap )
> +            error = -ENXIO;
> +        else
> +        {
> +            ioapic_sbdf[IO_APIC_ID(apic)].pin_setup = xzalloc_array(
> +                unsigned long, BITS_TO_LONGS(nr_ioapic_entries[apic]));
> +            if ( !ioapic_sbdf[IO_APIC_ID(apic)].pin_setup )
> +            {
> +                printk(XENLOG_ERR "IVHD Error: Out of memory\n");
> +                error = -ENOMEM;
> +            }
> +        }
> +    }
> +
>      return error;
>  }
>  
> 
> 

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 2/2] AMD IOMMU: handle MSI for phantom functions
  2013-02-06 13:12 ` [PATCH 2/2] AMD IOMMU: handle MSI for phantom functions Jan Beulich
@ 2013-02-11 10:53   ` Ian Campbell
  0 siblings, 0 replies; 20+ messages in thread
From: Ian Campbell @ 2013-02-11 10:53 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Boris Ostrovsky, Sherry Hurwitz, xen-devel

On Wed, 2013-02-06 at 13:12 +0000, Jan Beulich wrote:
> @@ -379,12 +382,30 @@ void amd_iommu_msi_msg_update_ire(
>      }
>  
>      if ( msi_desc->remap_index >= 0 )
> -        update_intremap_entry_from_msi_msg(iommu, bdf, msi_desc, NULL);
> +    {
> +        do {
> +            update_intremap_entry_from_msi_msg(iommu, bdf,
> +                                               &msi_desc->remap_index, NULL);
> +            if ( !pdev || !pdev->phantom_stride )
> +                break;
> +            bdf += pdev->phantom_stride;
> +        } while ( PCI_SLOT(bdf) == PCI_SLOT(pdev->devfn) );
> +
> +        msi_desc->remap_index = -1;

The reason for this reset is a bit subtle, but I think I get it, might
be worth a comment though.

Otherwise: Acked-by: Ian Campbell <ian.campbell@citrix.com>
(although with similar caveats to 1/2)

Ian.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 0/2] AMD IOMMU: XSA-36 follow ups
  2013-02-06 13:04 [PATCH 0/2] AMD IOMMU: XSA-36 follow ups Jan Beulich
                   ` (2 preceding siblings ...)
  2013-02-08  9:58 ` [PATCH 0/2] AMD IOMMU: XSA-36 follow ups Jan Beulich
@ 2013-02-11 13:12 ` Ian Campbell
  2013-02-12  8:45   ` Jan Beulich
  3 siblings, 1 reply; 20+ messages in thread
From: Ian Campbell @ 2013-02-11 13:12 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Boris Ostrovsky, Sherry Hurwitz, xen-devel

On Wed, 2013-02-06 at 13:04 +0000, Jan Beulich wrote:
> A regression was reported on a class of broken firmware that c/s
> 26517:601139e2b0db didn't consider, leading to a boot time crash.

Should we release an update to XSA-36 when these have gone in?

Ian.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 0/2] AMD IOMMU: XSA-36 follow ups
  2013-02-11 13:12 ` Ian Campbell
@ 2013-02-12  8:45   ` Jan Beulich
  0 siblings, 0 replies; 20+ messages in thread
From: Jan Beulich @ 2013-02-12  8:45 UTC (permalink / raw)
  To: Ian Campbell; +Cc: Boris Ostrovsky, xen-devel, Sherry Hurwitz

>>> On 11.02.13 at 14:12, Ian Campbell <Ian.Campbell@citrix.com> wrote:
> On Wed, 2013-02-06 at 13:04 +0000, Jan Beulich wrote:
>> A regression was reported on a class of broken firmware that c/s
>> 26517:601139e2b0db didn't consider, leading to a boot time crash.
> 
> Should we release an update to XSA-36 when these have gone in?

I think so, yes.

Jan

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/2] AMD IOMMU: also spot missing IO-APIC entries in IVRS table
  2013-02-11 10:49   ` Ian Campbell
@ 2013-02-12 11:59     ` Jan Beulich
  0 siblings, 0 replies; 20+ messages in thread
From: Jan Beulich @ 2013-02-12 11:59 UTC (permalink / raw)
  To: Ian Campbell
  Cc: Boris Ostrovsky, xen-devel, suravee.suthikulpanit, Sherry Hurwitz

>>> On 11.02.13 at 11:49, Ian Campbell <Ian.Campbell@citrix.com> wrote:
> On Wed, 2013-02-06 at 13:12 +0000, Jan Beulich wrote:
>> Apart from dealing duplicate conflicting entries, we also have to
>> handle firmware omitting IO-APIC entries in IVRS altogether. Not doing
>> so has resulted in c/s 26517:601139e2b0db to crash such systems during
>> boot (whereas with the change here the IOMMU gets disabled just as is
>> being done in the other cases, i.e. unless global tables are being
>> used).
>> 
>> Debugging this issue has also pointed out that the debug log output is
>> pretty ugly to look at - consolidate the output, and add one extra
>> item for the IVHD special entries, so that future issues are easier
>> to analyze.
>> 
>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>> Tested-by: Sander Eikelenboom <linux@eikelenboom.it>
> 
> Not really my area but the change looks reasonably straight forward and
> fixes a real error observed in the field,
> Acked-by: Ian Campbell <ian.campbell@citrix.com>

While I'd like to see this one go in rather sooner than later, I'm
willing to wait for the community call tomorrow to see whether
we can get an AMD side ack for this and the other patch. But
we surely shouldn't defer getting this in for much longer.

Jan

>> --- a/xen/drivers/passthrough/amd/iommu_acpi.c
>> +++ b/xen/drivers/passthrough/amd/iommu_acpi.c
>> @@ -354,9 +354,8 @@ static int __init parse_ivmd_block(const
>>      base = start_addr & PAGE_MASK;
>>      limit = (start_addr + mem_length - 1) & PAGE_MASK;
>>  
>> -    AMD_IOMMU_DEBUG("IVMD Block: Type %#x\n",ivmd_block->header.type);
>> -    AMD_IOMMU_DEBUG(" Start_Addr_Phys %#lx\n", start_addr);
>> -    AMD_IOMMU_DEBUG(" Mem_Length %#lx\n", mem_length);
>> +    AMD_IOMMU_DEBUG("IVMD Block: type %#x phys %#lx len %#lx\n",
>> +                    ivmd_block->header.type, start_addr, mem_length);
>>  
>>      if ( ivmd_block->header.flags & ACPI_IVMD_EXCLUSION_RANGE )
>>          iw = ir = IOMMU_CONTROL_ENABLED;
>> @@ -551,8 +550,8 @@ static u16 __init parse_ivhd_device_alia
>>          return 0;
>>      }
>>  
>> -    AMD_IOMMU_DEBUG(" Dev_Id Range: %#x -> %#x\n", first_bdf, last_bdf);
>> -    AMD_IOMMU_DEBUG(" Dev_Id Alias: %#x\n", alias_id);
>> +    AMD_IOMMU_DEBUG(" Dev_Id Range: %#x -> %#x alias %#x\n",
>> +                    first_bdf, last_bdf, alias_id);
>>  
>>      for ( bdf = first_bdf; bdf <= last_bdf; bdf++ )
>>          add_ivrs_mapping_entry(bdf, alias_id, 
> range->alias.header.data_setting,
>> @@ -654,6 +653,9 @@ static u16 __init parse_ivhd_device_spec
>>          return 0;
>>      }
>>  
>> +    AMD_IOMMU_DEBUG("IVHD Special: %04x:%02x:%02x.%u variety %#x handle 
> %#x\n",
>> +                    seg, PCI_BUS(bdf), PCI_SLOT(bdf), PCI_FUNC(bdf),
>> +                    special->variety, special->handle);
>>      add_ivrs_mapping_entry(bdf, bdf, special->header.data_setting, iommu);
>>  
>>      switch ( special->variety )
>> @@ -758,10 +760,9 @@ static int __init parse_ivhd_block(const
>>      {
>>          ivhd_device = (const void *)((const u8 *)ivhd_block + 
> block_length);
>>  
>> -        AMD_IOMMU_DEBUG( "IVHD Device Entry:\n");
>> -        AMD_IOMMU_DEBUG( " Type %#x\n", ivhd_device->header.type);
>> -        AMD_IOMMU_DEBUG( " Dev_Id %#x\n", ivhd_device->header.id);
>> -        AMD_IOMMU_DEBUG( " Flags %#x\n", ivhd_device->header.data_setting);
>> +        AMD_IOMMU_DEBUG("IVHD Device Entry: type %#x id %#x flags %#x\n",
>> +                        ivhd_device->header.type, ivhd_device->header.id,
>> +                        ivhd_device->header.data_setting);
>>  
>>          switch ( ivhd_device->header.type )
>>          {
>> @@ -890,6 +891,7 @@ static int __init parse_ivrs_table(struc
>>  {
>>      const struct acpi_ivrs_header *ivrs_block;
>>      unsigned long length;
>> +    unsigned int apic;
>>      int error = 0;
>>  
>>      BUG_ON(!table);
>> @@ -903,11 +905,9 @@ static int __init parse_ivrs_table(struc
>>      {
>>          ivrs_block = (struct acpi_ivrs_header *)((u8 *)table + length);
>>  
>> -        AMD_IOMMU_DEBUG("IVRS Block:\n");
>> -        AMD_IOMMU_DEBUG(" Type %#x\n", ivrs_block->type);
>> -        AMD_IOMMU_DEBUG(" Flags %#x\n", ivrs_block->flags);
>> -        AMD_IOMMU_DEBUG(" Length %#x\n", ivrs_block->length);
>> -        AMD_IOMMU_DEBUG(" Dev_Id %#x\n", ivrs_block->device_id);
>> +        AMD_IOMMU_DEBUG("IVRS Block: type %#x flags %#x len %#x id %#x\n",
>> +                        ivrs_block->type, ivrs_block->flags,
>> +                        ivrs_block->length, ivrs_block->device_id);
>>  
>>          if ( table->length < (length + ivrs_block->length) )
>>          {
>> @@ -922,6 +922,29 @@ static int __init parse_ivrs_table(struc
>>          length += ivrs_block->length;
>>      }
>>  
>> +    /* Each IO-APIC must have been mentioned in the table. */
>> +    for ( apic = 0; !error && apic < nr_ioapics; ++apic )
>> +    {
>> +        if ( !nr_ioapic_entries[apic] ||
>> +             ioapic_sbdf[IO_APIC_ID(apic)].pin_setup )
>> +            continue;
>> +
>> +        printk(XENLOG_ERR "IVHD Error: no information for IO-APIC %#x\n",
>> +               IO_APIC_ID(apic));
>> +        if ( amd_iommu_perdev_intremap )
>> +            error = -ENXIO;
>> +        else
>> +        {
>> +            ioapic_sbdf[IO_APIC_ID(apic)].pin_setup = xzalloc_array(
>> +                unsigned long, BITS_TO_LONGS(nr_ioapic_entries[apic]));
>> +            if ( !ioapic_sbdf[IO_APIC_ID(apic)].pin_setup )
>> +            {
>> +                printk(XENLOG_ERR "IVHD Error: Out of memory\n");
>> +                error = -ENOMEM;
>> +            }
>> +        }
>> +    }
>> +
>>      return error;
>>  }
>>  
>> 
>> 

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 0/2] AMD IOMMU: XSA-36 follow ups
  2013-02-08  9:58 ` [PATCH 0/2] AMD IOMMU: XSA-36 follow ups Jan Beulich
  2013-02-08 13:53   ` Sander Eikelenboom
@ 2013-03-01 18:45   ` Malcolm Crossley
  2013-03-04  9:48     ` Jan Beulich
  1 sibling, 1 reply; 20+ messages in thread
From: Malcolm Crossley @ 2013-03-01 18:45 UTC (permalink / raw)
  To: xen-devel

On 08/02/13 09:58, Jan Beulich wrote:
>>>> On 06.02.13 at 14:04, "Jan Beulich" <JBeulich@suse.com> wrote:
>> A regression was reported on a class of broken firmware that c/s
>> 26517:601139e2b0db didn't consider, leading to a boot time crash.
> After some more thought on this and the comments we got
> regarding disabling the IOMMU in this situation altogether making
> things worse instead of better, I came to the conclusion that we
> can actually restrict the action in affected cases to just disabling
> interrupt remapping. That doesn't make the situation worse than
> prior to the XSA-36 fixes (where interrupt remapping didn't really
> protect domains from one another), but allows at least DMA
> isolation to still be utilized. Patch 3/2 below/attached.
>
> Jan

What is the status of this patch? It has not been included into 
xen-unstable as yet.

I am of the opinion that it is better to have DMA isolation than to 
remove the feature altogether.
Particularly because there are other ways a guest with a PCI passthrough 
device can attack the host
than via reprogramming the interrupt vector in the PCI device.

Malcolm
> AMD IOMMU: only disable interrupt remapping when certain IVRS consistency checks fail
>
> After some more thought on the XSA-36 and specifically the comments we
> got regarding disabling the IOMMU in this situation altogether making
> things worse instead of better, I came to the conclusion that we can
> actually restrict the action in affected cases to just disabling
> interrupt remapping. That doesn't make the situation worse than prior
> to the XSA-36 fixes (where interrupt remapping didn't really protect
> domains from one another), but allows at least DMA isolation to still
> be utilized.
>
> In the course of this, the calls to the interrupt remapping related
> IOMMU implementation specific operations needed to be re-qualified
> from depending on iommu_enabled to iommu_intremap (as was already the
> case for x86's HPET code).
>
> That in turn required to make sure iommu_intremap gets properly
> cleared when the respective initialization fails (or isn't being
> done at all).
>
> Along with making sure interrupt remapping doesn't get inconsistently
> enabled on some IOMMUs and not on others in the VT-d code, this in turn
> allowed quite a bit of cleanup on the VT-d side (if desired, that
> cleanup could of course be broken out into a separate patch).
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>
> --- a/xen/arch/x86/msi.c
> +++ b/xen/arch/x86/msi.c
> @@ -207,7 +207,7 @@ static void read_msi_msg(struct msi_desc
>           BUG();
>       }
>   
> -    if ( iommu_enabled )
> +    if ( iommu_intremap )
>           iommu_read_msi_from_ire(entry, msg);
>   }
>   
> @@ -215,7 +215,7 @@ static void write_msi_msg(struct msi_des
>   {
>       entry->msg = *msg;
>   
> -    if ( iommu_enabled )
> +    if ( iommu_intremap )
>       {
>           ASSERT(msg != &entry->msg);
>           iommu_update_ire_from_msi(entry, msg);
> @@ -489,7 +489,7 @@ int msi_free_irq(struct msi_desc *entry)
>       }
>   
>       /* Free the unused IRTE if intr remap enabled */
> -    if ( iommu_enabled )
> +    if ( iommu_intremap )
>           iommu_update_ire_from_msi(entry, NULL);
>   
>       list_del(&entry->list);
> --- a/xen/drivers/passthrough/amd/iommu_acpi.c
> +++ b/xen/drivers/passthrough/amd/iommu_acpi.c
> @@ -682,10 +682,10 @@ static u16 __init parse_ivhd_device_spec
>                       printk(XENLOG_ERR "IVHD Error: Conflicting IO-APIC %#x entries\n",
>                              special->handle);
>                       if ( amd_iommu_perdev_intremap )
> -                        return 0;
> +                        iommu_intremap = 0;
>                   }
>               }
> -            else
> +            else if ( iommu_intremap )
>               {
>                   /* set device id of ioapic */
>                   ioapic_sbdf[special->handle].bdf = bdf;
> @@ -697,7 +697,7 @@ static u16 __init parse_ivhd_device_spec
>                        !ioapic_sbdf[IO_APIC_ID(apic)].pin_setup )
>                   {
>                       printk(XENLOG_ERR "IVHD Error: Out of memory\n");
> -                    return 0;
> +                    iommu_intremap = 0;
>                   }
>               }
>               break;
> @@ -706,7 +706,7 @@ static u16 __init parse_ivhd_device_spec
>           {
>               printk(XENLOG_ERR "IVHD Error: Invalid IO-APIC %#x\n",
>                      special->handle);
> -            return 0;
> +            iommu_intremap = 0;
>           }
>           break;
>       case ACPI_IVHD_HPET:
> @@ -932,7 +932,7 @@ static int __init parse_ivrs_table(struc
>           printk(XENLOG_ERR "IVHD Error: no information for IO-APIC %#x\n",
>                  IO_APIC_ID(apic));
>           if ( amd_iommu_perdev_intremap )
> -            error = -ENXIO;
> +            iommu_intremap = 0;
>           else
>           {
>               ioapic_sbdf[IO_APIC_ID(apic)].pin_setup = xzalloc_array(
> @@ -940,7 +940,7 @@ static int __init parse_ivrs_table(struc
>               if ( !ioapic_sbdf[IO_APIC_ID(apic)].pin_setup )
>               {
>                   printk(XENLOG_ERR "IVHD Error: Out of memory\n");
> -                error = -ENOMEM;
> +                iommu_intremap = 0;
>               }
>           }
>       }
> --- a/xen/drivers/passthrough/amd/iommu_init.c
> +++ b/xen/drivers/passthrough/amd/iommu_init.c
> @@ -1157,7 +1157,7 @@ int __init amd_iommu_init(void)
>       BUG_ON( !iommu_found() );
>   
>       if ( amd_iommu_perdev_intremap && amd_sp5100_erratum28() )
> -        goto error_out;
> +        iommu_intremap = 0;
>   
>       ivrs_bdf_entries = amd_iommu_get_ivrs_dev_entries();
>   
> @@ -1173,7 +1173,7 @@ int __init amd_iommu_init(void)
>           goto error_out;
>   
>       /* initialize io-apic interrupt remapping entries */
> -    if ( amd_iommu_setup_ioapic_remapping() != 0 )
> +    if ( iommu_intremap && amd_iommu_setup_ioapic_remapping() != 0 )
>           goto error_out;
>   
>       /* allocate and initialize a global device table shared by all iommus */
> --- a/xen/drivers/passthrough/iommu.c
> +++ b/xen/drivers/passthrough/iommu.c
> @@ -470,6 +470,8 @@ int __init iommu_setup(void)
>           rc = iommu_hardware_setup();
>           iommu_enabled = (rc == 0);
>       }
> +    if ( !iommu_enabled )
> +        iommu_intremap = 0;
>   
>       if ( (force_iommu && !iommu_enabled) ||
>            (force_intremap && !iommu_intremap) )
> @@ -487,6 +489,7 @@ int __init iommu_setup(void)
>           printk(" - Dom0 mode: %s\n",
>                  iommu_passthrough ? "Passthrough" :
>                  iommu_dom0_strict ? "Strict" : "Relaxed");
> +    printk("Interrupt remapping %sabled\n", iommu_intremap ? "en" : "dis");
>   
>       return rc;
>   }
> --- a/xen/drivers/passthrough/vtd/intremap.c
> +++ b/xen/drivers/passthrough/vtd/intremap.c
> @@ -373,7 +373,7 @@ unsigned int io_apic_read_remap_rte(
>       struct iommu *iommu = ioapic_to_iommu(IO_APIC_ID(apic));
>       struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu);
>   
> -    if ( !ir_ctrl || !ir_ctrl->iremap_maddr || !ir_ctrl->iremap_num ||
> +    if ( !ir_ctrl->iremap_num ||
>           ( (index = apic_pin_2_ir_idx[apic][ioapic_pin]) < 0 ) )
>           return __io_apic_read(apic, reg);
>   
> @@ -396,15 +396,8 @@ void io_apic_write_remap_rte(
>       struct IO_APIC_route_remap_entry *remap_rte;
>       unsigned int rte_upper = (reg & 1) ? 1 : 0;
>       struct iommu *iommu = ioapic_to_iommu(IO_APIC_ID(apic));
> -    struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu);
>       int saved_mask;
>   
> -    if ( !ir_ctrl || !ir_ctrl->iremap_maddr )
> -    {
> -        __io_apic_write(apic, reg, value);
> -        return;
> -    }
> -
>       old_rte = __ioapic_read_entry(apic, ioapic_pin, 1);
>   
>       remap_rte = (struct IO_APIC_route_remap_entry *) &old_rte;
> @@ -653,20 +646,11 @@ void msi_msg_read_remap_rte(
>   {
>       struct pci_dev *pdev = msi_desc->dev;
>       struct acpi_drhd_unit *drhd = NULL;
> -    struct iommu *iommu = NULL;
> -    struct ir_ctrl *ir_ctrl;
>   
>       drhd = pdev ? acpi_find_matched_drhd_unit(pdev)
>                   : hpet_to_drhd(msi_desc->hpet_id);
> -    if ( !drhd )
> -        return;
> -    iommu = drhd->iommu;
> -
> -    ir_ctrl = iommu_ir_ctrl(iommu);
> -    if ( !ir_ctrl || !ir_ctrl->iremap_maddr )
> -        return;
> -
> -    remap_entry_to_msi_msg(iommu, msg);
> +    if ( drhd )
> +        remap_entry_to_msi_msg(drhd->iommu, msg);
>   }
>   
>   void msi_msg_write_remap_rte(
> @@ -674,20 +658,11 @@ void msi_msg_write_remap_rte(
>   {
>       struct pci_dev *pdev = msi_desc->dev;
>       struct acpi_drhd_unit *drhd = NULL;
> -    struct iommu *iommu = NULL;
> -    struct ir_ctrl *ir_ctrl;
>   
>       drhd = pdev ? acpi_find_matched_drhd_unit(pdev)
>                   : hpet_to_drhd(msi_desc->hpet_id);
> -    if ( !drhd )
> -        return;
> -    iommu = drhd->iommu;
> -
> -    ir_ctrl = iommu_ir_ctrl(iommu);
> -    if ( !ir_ctrl || !ir_ctrl->iremap_maddr )
> -        return;
> -
> -    msi_msg_to_remap_entry(iommu, pdev, msi_desc, msg);
> +    if ( drhd )
> +        msi_msg_to_remap_entry(drhd->iommu, pdev, msi_desc, msg);
>   }
>   
>   int __init intel_setup_hpet_msi(struct msi_desc *msi_desc)
> --- a/xen/drivers/passthrough/vtd/iommu.c
> +++ b/xen/drivers/passthrough/vtd/iommu.c
> @@ -2065,6 +2065,9 @@ static int init_vtd_hw(void)
>                   break;
>               }
>           }
> +        if ( !iommu_intremap )
> +            for_each_drhd_unit ( drhd )
> +                disable_intremap(drhd->iommu);
>       }
>   
>       /*
> --- a/xen/include/asm-x86/io_apic.h
> +++ b/xen/include/asm-x86/io_apic.h
> @@ -129,7 +129,7 @@ struct IO_APIC_route_entry {
>   extern struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS];
>   
>   /* Only need to remap ioapic RTE (reg: 10~3Fh) */
> -#define ioapic_reg_remapped(reg) (iommu_enabled && ((reg) >= 0x10))
> +#define ioapic_reg_remapped(reg) (iommu_intremap && ((reg) >= 0x10))
>   
>   static inline unsigned int __io_apic_read(unsigned int apic, unsigned int reg)
>   {
>
>

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 0/2] AMD IOMMU: XSA-36 follow ups
  2013-03-01 18:45   ` Malcolm Crossley
@ 2013-03-04  9:48     ` Jan Beulich
  0 siblings, 0 replies; 20+ messages in thread
From: Jan Beulich @ 2013-03-04  9:48 UTC (permalink / raw)
  To: Malcolm Crossley; +Cc: xen-devel

>>> On 01.03.13 at 19:45, Malcolm Crossley <malcolm.crossley@citrix.com> wrote:
> On 08/02/13 09:58, Jan Beulich wrote:
>>>>> On 06.02.13 at 14:04, "Jan Beulich" <JBeulich@suse.com> wrote:
>>> A regression was reported on a class of broken firmware that c/s
>>> 26517:601139e2b0db didn't consider, leading to a boot time crash.
>> After some more thought on this and the comments we got
>> regarding disabling the IOMMU in this situation altogether making
>> things worse instead of better, I came to the conclusion that we
>> can actually restrict the action in affected cases to just disabling
>> interrupt remapping. That doesn't make the situation worse than
>> prior to the XSA-36 fixes (where interrupt remapping didn't really
>> protect domains from one another), but allows at least DMA
>> isolation to still be utilized. Patch 3/2 below/attached.
> 
> What is the status of this patch? It has not been included into 
> xen-unstable as yet.

No real consensus was reached on the better of two less than
optimal alternatives.

> I am of the opinion that it is better to have DMA isolation than to 
> remove the feature altogether.
> Particularly because there are other ways a guest with a PCI passthrough 
> device can attack the host
> than via reprogramming the interrupt vector in the PCI device.

Yes, that's one of the two perspectives one can take. The other
is that by doing what the patch does, it re-opens the security
hole that XSA-36 describes, i.e. without other measures users'
systems become vulnerable again.

Are you sure you consider either of the two alternatives
significantly better than the other?

Jan

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 0/2] AMD IOMMU: XSA-36 follow ups
  2013-02-08 16:48 ` Jan Beulich
  2013-02-08 17:10   ` Sander Eikelenboom
@ 2013-02-11 11:46   ` Ian Campbell
  1 sibling, 0 replies; 20+ messages in thread
From: Ian Campbell @ 2013-02-11 11:46 UTC (permalink / raw)
  To: Jan Beulich
  Cc: Boris Ostrovsky, sherry.hurwitz, suravee.suthikulpanit,
	xiantao.zhang, xen-devel

On Fri, 2013-02-08 at 16:48 +0000, Jan Beulich wrote:
> >>> On 08.02.13 at 15:29, Boris Ostrovsky <boris.ostrovsky@oracle.com> wrote:
> 
> > ----- JBeulich@suse.com wrote:
> > 
> >> >>> On 06.02.13 at 14:04, "Jan Beulich" <JBeulich@suse.com> wrote:
> >> > A regression was reported on a class of broken firmware that c/s
> >> > 26517:601139e2b0db didn't consider, leading to a boot time crash.
> >> 
> >> After some more thought on this and the comments we got
> >> regarding disabling the IOMMU in this situation altogether making
> >> things worse instead of better, I came to the conclusion that we
> >> can actually restrict the action in affected cases to just disabling
> >> interrupt remapping. That doesn't make the situation worse than
> >> prior to the XSA-36 fixes (where interrupt remapping didn't really
> >> protect domains from one another), but allows at least DMA
> >> isolation to still be utilized. Patch 3/2 below/attached.
> > 
> > But now users who don't examine log messages may not realize 
> > that interupt remapping is disabled and therefore the system can be
> > affected by XSA-36.
> 
> Yes. We need to balance these against one another - I see pros
> and cons in both (and I don't mind dropping this additional patch
> if we collectively come to the conclusion that the way it is now -
> with the one earlier fix - is the better state). So I'm really
> interested in others' opinions.

Given the infrastructure currently available for dealing with these
things in a more fine-grained/less heavy-handed manner (i.e. not much)
and in the context of security advisories (where obviousness of the
patch is a very useful property and where we are effectively
highlighting the issue to the world) I think it is reasonable to be
fairly aggressive in our handling of systems which don't provide the
necessary security properties.

Most of these sorts of issues naturally only arise as part of a security
vulnerability but if someone wanted to implement more fine grained
control after the fact that would also seem reasonable.

If we had pre-existing better infrastructure for determining controlling
things at runtime, including informing the user and allowing for
overrides, which could be tweaked with one or two lines in a security
fix then it would seem reasonable to use them up front.

Somebody would need to figure out what that actually means and implement
it as part of the normal development process, since we obviously don't
want to do it next time we are under the cosh of an impending security
advisory.

Ian.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 0/2] AMD IOMMU: XSA-36 follow ups
  2013-02-08 16:48 ` Jan Beulich
@ 2013-02-08 17:10   ` Sander Eikelenboom
  2013-02-11 11:46   ` Ian Campbell
  1 sibling, 0 replies; 20+ messages in thread
From: Sander Eikelenboom @ 2013-02-08 17:10 UTC (permalink / raw)
  To: Jan Beulich
  Cc: Boris Ostrovsky, sherry.hurwitz, suravee.suthikulpanit,
	xiantao.zhang, xen-devel


Friday, February 8, 2013, 5:48:41 PM, you wrote:

>>>> On 08.02.13 at 15:29, Boris Ostrovsky <boris.ostrovsky@oracle.com> wrote:

>> ----- JBeulich@suse.com wrote:
>> 
>>> >>> On 06.02.13 at 14:04, "Jan Beulich" <JBeulich@suse.com> wrote:
>>> > A regression was reported on a class of broken firmware that c/s
>>> > 26517:601139e2b0db didn't consider, leading to a boot time crash.
>>> 
>>> After some more thought on this and the comments we got
>>> regarding disabling the IOMMU in this situation altogether making
>>> things worse instead of better, I came to the conclusion that we
>>> can actually restrict the action in affected cases to just disabling
>>> interrupt remapping. That doesn't make the situation worse than
>>> prior to the XSA-36 fixes (where interrupt remapping didn't really
>>> protect domains from one another), but allows at least DMA
>>> isolation to still be utilized. Patch 3/2 below/attached.
>> 
>> But now users who don't examine log messages may not realize 
>> that interupt remapping is disabled and therefore the system can be
>> affected by XSA-36.

> Yes. We need to balance these against one another - I see pros
> and cons in both (and I don't mind dropping this additional patch
> if we collectively come to the conclusion that the way it is now -
> with the one earlier fix - is the better state). So I'm really
> interested in others' opinions.

One argument pro could be that linux seems to do the same (only disable interrupt remapping)

>> With current code (boot option to use global remapping table) users
>> are explicitly agreeing to allow for possibility of cross-domain interrupt
>> attack.
>> 
>> Also, I think it may not be a bad idea to have AMD folks test you earlier
>> patch on multi-IOMMU system (and simulate bad IVRS) to see how it behaves 
>> there.

And getting mainbord / bios manufactures actually support stuff .. instead of giving some nice disinformation.
Fresh from MSI techsupport:

Posted:         2013-02-09 00:11:38 (GMT+8),MSI
Content:        CPU virtualization (SVM) should be possible on the 890FXA-GD70. I/O virtualization (IOMMU) is support on the MSI's AMD 900-series. IOMMU will not be possible on MSI's AMD 800 series.

In short .. buy a new mainboard and pray .. pray hard .. that it works out of the box

> That would indeed be very desirable.

> Jan

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 0/2] AMD IOMMU: XSA-36 follow ups
  2013-02-08 14:29 Boris Ostrovsky
@ 2013-02-08 16:48 ` Jan Beulich
  2013-02-08 17:10   ` Sander Eikelenboom
  2013-02-11 11:46   ` Ian Campbell
  0 siblings, 2 replies; 20+ messages in thread
From: Jan Beulich @ 2013-02-08 16:48 UTC (permalink / raw)
  To: Boris Ostrovsky
  Cc: xen-devel, xiantao.zhang, suravee.suthikulpanit, sherry.hurwitz

>>> On 08.02.13 at 15:29, Boris Ostrovsky <boris.ostrovsky@oracle.com> wrote:

> ----- JBeulich@suse.com wrote:
> 
>> >>> On 06.02.13 at 14:04, "Jan Beulich" <JBeulich@suse.com> wrote:
>> > A regression was reported on a class of broken firmware that c/s
>> > 26517:601139e2b0db didn't consider, leading to a boot time crash.
>> 
>> After some more thought on this and the comments we got
>> regarding disabling the IOMMU in this situation altogether making
>> things worse instead of better, I came to the conclusion that we
>> can actually restrict the action in affected cases to just disabling
>> interrupt remapping. That doesn't make the situation worse than
>> prior to the XSA-36 fixes (where interrupt remapping didn't really
>> protect domains from one another), but allows at least DMA
>> isolation to still be utilized. Patch 3/2 below/attached.
> 
> But now users who don't examine log messages may not realize 
> that interupt remapping is disabled and therefore the system can be
> affected by XSA-36.

Yes. We need to balance these against one another - I see pros
and cons in both (and I don't mind dropping this additional patch
if we collectively come to the conclusion that the way it is now -
with the one earlier fix - is the better state). So I'm really
interested in others' opinions.

> With current code (boot option to use global remapping table) users
> are explicitly agreeing to allow for possibility of cross-domain interrupt
> attack.
> 
> Also, I think it may not be a bad idea to have AMD folks test you earlier
> patch on multi-IOMMU system (and simulate bad IVRS) to see how it behaves 
> there.

That would indeed be very desirable.

Jan

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 0/2] AMD IOMMU: XSA-36 follow ups
@ 2013-02-08 14:29 Boris Ostrovsky
  2013-02-08 16:48 ` Jan Beulich
  0 siblings, 1 reply; 20+ messages in thread
From: Boris Ostrovsky @ 2013-02-08 14:29 UTC (permalink / raw)
  To: JBeulich; +Cc: sherry.hurwitz, xiantao.zhang, suravee.suthikulpanit, xen-devel


----- JBeulich@suse.com wrote:

> >>> On 06.02.13 at 14:04, "Jan Beulich" <JBeulich@suse.com> wrote:
> > A regression was reported on a class of broken firmware that c/s
> > 26517:601139e2b0db didn't consider, leading to a boot time crash.
> 
> After some more thought on this and the comments we got
> regarding disabling the IOMMU in this situation altogether making
> things worse instead of better, I came to the conclusion that we
> can actually restrict the action in affected cases to just disabling
> interrupt remapping. That doesn't make the situation worse than
> prior to the XSA-36 fixes (where interrupt remapping didn't really
> protect domains from one another), but allows at least DMA
> isolation to still be utilized. Patch 3/2 below/attached.

But now users who don't examine log messages may not realize 
that interupt remapping is disabled and therefore the system can be
affected by XSA-36.

With current code (boot option to use global remapping table) users
are explicitly agreeing to allow for possibility of cross-domain interrupt
attack.

Also, I think it may not be a bad idea to have AMD folks test you earlier
patch on multi-IOMMU system (and simulate bad IVRS) to see how it behaves there.

Suravee --- this is the patch I am referring to: http://lists.xen.org/archives/html/xen-devel/2013-02/msg00408.html

-boris

> 
> Jan
> 
> AMD IOMMU: only disable interrupt remapping when certain IVRS
> consistency checks fail
> 
> After some more thought on the XSA-36 and specifically the comments
> we
> got regarding disabling the IOMMU in this situation altogether making
> things worse instead of better, I came to the conclusion that we can
> actually restrict the action in affected cases to just disabling
> interrupt remapping. That doesn't make the situation worse than prior
> to the XSA-36 fixes (where interrupt remapping didn't really protect
> domains from one another), but allows at least DMA isolation to still
> be utilized.
> 
> In the course of this, the calls to the interrupt remapping related
> IOMMU implementation specific operations needed to be re-qualified
> from depending on iommu_enabled to iommu_intremap (as was already the
> case for x86's HPET code).
> 
> That in turn required to make sure iommu_intremap gets properly
> cleared when the respective initialization fails (or isn't being
> done at all).
> 
> Along with making sure interrupt remapping doesn't get inconsistently
> enabled on some IOMMUs and not on others in the VT-d code, this in
> turn
> allowed quite a bit of cleanup on the VT-d side (if desired, that
> cleanup could of course be broken out into a separate patch).
> 
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> 
> --- a/xen/arch/x86/msi.c
> +++ b/xen/arch/x86/msi.c
> @@ -207,7 +207,7 @@ static void read_msi_msg(struct msi_desc
>          BUG();
>      }
>  
> -    if ( iommu_enabled )
> +    if ( iommu_intremap )
>          iommu_read_msi_from_ire(entry, msg);
>  }
>  
> @@ -215,7 +215,7 @@ static void write_msi_msg(struct msi_des
>  {
>      entry->msg = *msg;
>  
> -    if ( iommu_enabled )
> +    if ( iommu_intremap )
>      {
>          ASSERT(msg != &entry->msg);
>          iommu_update_ire_from_msi(entry, msg);
> @@ -489,7 +489,7 @@ int msi_free_irq(struct msi_desc *entry)
>      }
>  
>      /* Free the unused IRTE if intr remap enabled */
> -    if ( iommu_enabled )
> +    if ( iommu_intremap )
>          iommu_update_ire_from_msi(entry, NULL);
>  
>      list_del(&entry->list);
> --- a/xen/drivers/passthrough/amd/iommu_acpi.c
> +++ b/xen/drivers/passthrough/amd/iommu_acpi.c
> @@ -682,10 +682,10 @@ static u16 __init parse_ivhd_device_spec
>                      printk(XENLOG_ERR "IVHD Error: Conflicting
> IO-APIC %#x entries\n",
>                             special->handle);
>                      if ( amd_iommu_perdev_intremap )
> -                        return 0;
> +                        iommu_intremap = 0;
>                  }
>              }
> -            else
> +            else if ( iommu_intremap )
>              {
>                  /* set device id of ioapic */
>                  ioapic_sbdf[special->handle].bdf = bdf;
> @@ -697,7 +697,7 @@ static u16 __init parse_ivhd_device_spec
>                       !ioapic_sbdf[IO_APIC_ID(apic)].pin_setup )
>                  {
>                      printk(XENLOG_ERR "IVHD Error: Out of
> memory\n");
> -                    return 0;
> +                    iommu_intremap = 0;
>                  }
>              }
>              break;
> @@ -706,7 +706,7 @@ static u16 __init parse_ivhd_device_spec
>          {
>              printk(XENLOG_ERR "IVHD Error: Invalid IO-APIC %#x\n",
>                     special->handle);
> -            return 0;
> +            iommu_intremap = 0;
>          }
>          break;
>      case ACPI_IVHD_HPET:
> @@ -932,7 +932,7 @@ static int __init parse_ivrs_table(struc
>          printk(XENLOG_ERR "IVHD Error: no information for IO-APIC
> %#x\n",
>                 IO_APIC_ID(apic));
>          if ( amd_iommu_perdev_intremap )
> -            error = -ENXIO;
> +            iommu_intremap = 0;
>          else
>          {
>              ioapic_sbdf[IO_APIC_ID(apic)].pin_setup = xzalloc_array(
> @@ -940,7 +940,7 @@ static int __init parse_ivrs_table(struc
>              if ( !ioapic_sbdf[IO_APIC_ID(apic)].pin_setup )
>              {
>                  printk(XENLOG_ERR "IVHD Error: Out of memory\n");
> -                error = -ENOMEM;
> +                iommu_intremap = 0;
>              }
>          }
>      }
> --- a/xen/drivers/passthrough/amd/iommu_init.c
> +++ b/xen/drivers/passthrough/amd/iommu_init.c
> @@ -1157,7 +1157,7 @@ int __init amd_iommu_init(void)
>      BUG_ON( !iommu_found() );
>  
>      if ( amd_iommu_perdev_intremap && amd_sp5100_erratum28() )
> -        goto error_out;
> +        iommu_intremap = 0;
>  
>      ivrs_bdf_entries = amd_iommu_get_ivrs_dev_entries();
>  
> @@ -1173,7 +1173,7 @@ int __init amd_iommu_init(void)
>          goto error_out;
>  
>      /* initialize io-apic interrupt remapping entries */
> -    if ( amd_iommu_setup_ioapic_remapping() != 0 )
> +    if ( iommu_intremap && amd_iommu_setup_ioapic_remapping() != 0 )
>          goto error_out;
>  
>      /* allocate and initialize a global device table shared by all
> iommus */
> --- a/xen/drivers/passthrough/iommu.c
> +++ b/xen/drivers/passthrough/iommu.c
> @@ -470,6 +470,8 @@ int __init iommu_setup(void)
>          rc = iommu_hardware_setup();
>          iommu_enabled = (rc == 0);
>      }
> +    if ( !iommu_enabled )
> +        iommu_intremap = 0;
>  
>      if ( (force_iommu && !iommu_enabled) ||
>           (force_intremap && !iommu_intremap) )
> @@ -487,6 +489,7 @@ int __init iommu_setup(void)
>          printk(" - Dom0 mode: %s\n",
>                 iommu_passthrough ? "Passthrough" :
>                 iommu_dom0_strict ? "Strict" : "Relaxed");
> +    printk("Interrupt remapping %sabled\n", iommu_intremap ? "en" :
> "dis");
>  
>      return rc;
>  }
> --- a/xen/drivers/passthrough/vtd/intremap.c
> +++ b/xen/drivers/passthrough/vtd/intremap.c
> @@ -373,7 +373,7 @@ unsigned int io_apic_read_remap_rte(
>      struct iommu *iommu = ioapic_to_iommu(IO_APIC_ID(apic));
>      struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu);
>  
> -    if ( !ir_ctrl || !ir_ctrl->iremap_maddr || !ir_ctrl->iremap_num
> ||
> +    if ( !ir_ctrl->iremap_num ||
>          ( (index = apic_pin_2_ir_idx[apic][ioapic_pin]) < 0 ) )
>          return __io_apic_read(apic, reg);
>  
> @@ -396,15 +396,8 @@ void io_apic_write_remap_rte(
>      struct IO_APIC_route_remap_entry *remap_rte;
>      unsigned int rte_upper = (reg & 1) ? 1 : 0;
>      struct iommu *iommu = ioapic_to_iommu(IO_APIC_ID(apic));
> -    struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu);
>      int saved_mask;
>  
> -    if ( !ir_ctrl || !ir_ctrl->iremap_maddr )
> -    {
> -        __io_apic_write(apic, reg, value);
> -        return;
> -    }
> -
>      old_rte = __ioapic_read_entry(apic, ioapic_pin, 1);
>  
>      remap_rte = (struct IO_APIC_route_remap_entry *) &old_rte;
> @@ -653,20 +646,11 @@ void msi_msg_read_remap_rte(
>  {
>      struct pci_dev *pdev = msi_desc->dev;
>      struct acpi_drhd_unit *drhd = NULL;
> -    struct iommu *iommu = NULL;
> -    struct ir_ctrl *ir_ctrl;
>  
>      drhd = pdev ? acpi_find_matched_drhd_unit(pdev)
>                  : hpet_to_drhd(msi_desc->hpet_id);
> -    if ( !drhd )
> -        return;
> -    iommu = drhd->iommu;
> -
> -    ir_ctrl = iommu_ir_ctrl(iommu);
> -    if ( !ir_ctrl || !ir_ctrl->iremap_maddr )
> -        return;
> -
> -    remap_entry_to_msi_msg(iommu, msg);
> +    if ( drhd )
> +        remap_entry_to_msi_msg(drhd->iommu, msg);
>  }
>  
>  void msi_msg_write_remap_rte(
> @@ -674,20 +658,11 @@ void msi_msg_write_remap_rte(
>  {
>      struct pci_dev *pdev = msi_desc->dev;
>      struct acpi_drhd_unit *drhd = NULL;
> -    struct iommu *iommu = NULL;
> -    struct ir_ctrl *ir_ctrl;
>  
>      drhd = pdev ? acpi_find_matched_drhd_unit(pdev)
>                  : hpet_to_drhd(msi_desc->hpet_id);
> -    if ( !drhd )
> -        return;
> -    iommu = drhd->iommu;
> -
> -    ir_ctrl = iommu_ir_ctrl(iommu);
> -    if ( !ir_ctrl || !ir_ctrl->iremap_maddr )
> -        return;
> -
> -    msi_msg_to_remap_entry(iommu, pdev, msi_desc, msg);
> +    if ( drhd )
> +        msi_msg_to_remap_entry(drhd->iommu, pdev, msi_desc, msg);
>  }
>  
>  int __init intel_setup_hpet_msi(struct msi_desc *msi_desc)
> --- a/xen/drivers/passthrough/vtd/iommu.c
> +++ b/xen/drivers/passthrough/vtd/iommu.c
> @@ -2065,6 +2065,9 @@ static int init_vtd_hw(void)
>                  break;
>              }
>          }
> +        if ( !iommu_intremap )
> +            for_each_drhd_unit ( drhd )
> +                disable_intremap(drhd->iommu);
>      }
>  
>      /*
> --- a/xen/include/asm-x86/io_apic.h
> +++ b/xen/include/asm-x86/io_apic.h
> @@ -129,7 +129,7 @@ struct IO_APIC_route_entry {
>  extern struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS];
>  
>  /* Only need to remap ioapic RTE (reg: 10~3Fh) */
> -#define ioapic_reg_remapped(reg) (iommu_enabled && ((reg) >= 0x10))
> +#define ioapic_reg_remapped(reg) (iommu_intremap && ((reg) >= 0x10))
>  
>  static inline unsigned int __io_apic_read(unsigned int apic, unsigned
> int reg)
>  {
> 
> 
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xen.org
> http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 20+ messages in thread

end of thread, other threads:[~2013-03-04  9:48 UTC | newest]

Thread overview: 20+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-02-06 13:04 [PATCH 0/2] AMD IOMMU: XSA-36 follow ups Jan Beulich
2013-02-06 13:12 ` [PATCH 1/2] AMD IOMMU: also spot missing IO-APIC entries in IVRS table Jan Beulich
2013-02-06 14:41   ` Boris Ostrovsky
2013-02-06 14:52     ` Jan Beulich
2013-02-06 15:03       ` Boris Ostrovsky
2013-02-06 15:12         ` Jan Beulich
2013-02-11 10:49   ` Ian Campbell
2013-02-12 11:59     ` Jan Beulich
2013-02-06 13:12 ` [PATCH 2/2] AMD IOMMU: handle MSI for phantom functions Jan Beulich
2013-02-11 10:53   ` Ian Campbell
2013-02-08  9:58 ` [PATCH 0/2] AMD IOMMU: XSA-36 follow ups Jan Beulich
2013-02-08 13:53   ` Sander Eikelenboom
2013-03-01 18:45   ` Malcolm Crossley
2013-03-04  9:48     ` Jan Beulich
2013-02-11 13:12 ` Ian Campbell
2013-02-12  8:45   ` Jan Beulich
2013-02-08 14:29 Boris Ostrovsky
2013-02-08 16:48 ` Jan Beulich
2013-02-08 17:10   ` Sander Eikelenboom
2013-02-11 11:46   ` Ian Campbell

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.