All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] AMD IOMMU: Rework of interrupt remapping
@ 2009-09-15 13:54 Wei Wang2
  0 siblings, 0 replies; only message in thread
From: Wei Wang2 @ 2009-09-15 13:54 UTC (permalink / raw)
  To: xen-devel

[-- Attachment #1: Type: text/plain, Size: 948 bytes --]

Hi,
Attached patch reworks interrupt remapping for following purposes: 
1) Parsing IVRS special device entry in order to handle ioapic remapping 
correctly.
2) Allocating per-device interrupt remapping tables instead of using a global 
interrupt remapping table.
3) Some system devices like io-apic for north-bridge cannot be discovered 
during pci device enumeration procedure. To remap interrupt of those devices, 
device table update is splitted into 2 steps, so that interrupt tables can be 
bound to device table entry earlier than I/O page tables.
Thanks,
-Wei

Signed-off-by: Wei Wang <wei.wang2@amd.com>
-- 
AMD GmbH, Germany
Operating System Research Center

Legal Information:
Advanced Micro Devices GmbH
Karl-Hammerschmidt-Str. 34
85609 Dornach b. München

Geschäftsführer: Andrew Bowd, Thomas M. McCoy, Giuliano Meroni
Sitz: Dornach, Gemeinde Aschheim, Landkreis München
Registergericht München, HRB Nr. 43632

[-- Attachment #2: rwk_intr.patch --]
[-- Type: text/x-diff, Size: 46203 bytes --]

diff -r a2ab11e31f91 xen/drivers/passthrough/amd/iommu_acpi.c
--- a/xen/drivers/passthrough/amd/iommu_acpi.c	Mon Sep 07 09:00:21 2009 +0100
+++ b/xen/drivers/passthrough/amd/iommu_acpi.c	Tue Sep 15 15:39:24 2009 +0200
@@ -28,6 +28,51 @@ extern unsigned short ivrs_bdf_entries;
 extern unsigned short ivrs_bdf_entries;
 extern struct ivrs_mappings *ivrs_mappings;
 extern unsigned short last_bdf;
+extern int ioapic_bdf[MAX_IO_APICS];
+
+static void add_ivrs_mapping_entry(
+    u16 bdf, u16 alias_id, u8 flags, struct amd_iommu *iommu)
+{
+    u8 sys_mgt, lint1_pass, lint0_pass, nmi_pass, ext_int_pass, init_pass;
+    ASSERT( ivrs_mappings != NULL );
+
+    /* setup requestor id */
+    ivrs_mappings[bdf].dte_requestor_id = alias_id;
+
+    /* override flags for range of devices */
+    sys_mgt = get_field_from_byte(flags,
+                                  AMD_IOMMU_ACPI_SYS_MGT_MASK,
+                                  AMD_IOMMU_ACPI_SYS_MGT_SHIFT);
+    lint1_pass = get_field_from_byte(flags,
+                                  AMD_IOMMU_ACPI_LINT1_PASS_MASK,
+                                  AMD_IOMMU_ACPI_LINT1_PASS_SHIFT);
+    lint0_pass = get_field_from_byte(flags,
+                                  AMD_IOMMU_ACPI_LINT0_PASS_MASK,
+                                  AMD_IOMMU_ACPI_LINT0_PASS_SHIFT);
+    nmi_pass = get_field_from_byte(flags,
+                                  AMD_IOMMU_ACPI_NMI_PASS_MASK,
+                                  AMD_IOMMU_ACPI_NMI_PASS_SHIFT);
+    ext_int_pass = get_field_from_byte(flags,
+                                  AMD_IOMMU_ACPI_EINT_PASS_MASK,
+                                  AMD_IOMMU_ACPI_EINT_PASS_SHIFT);
+    init_pass = get_field_from_byte(flags,
+                                  AMD_IOMMU_ACPI_INIT_PASS_MASK,
+                                  AMD_IOMMU_ACPI_INIT_PASS_SHIFT);
+
+    ivrs_mappings[bdf].dte_sys_mgt_enable = sys_mgt;
+    ivrs_mappings[bdf].dte_lint1_pass = lint1_pass;
+    ivrs_mappings[bdf].dte_lint0_pass = lint0_pass;
+    ivrs_mappings[bdf].dte_nmi_pass = nmi_pass;
+    ivrs_mappings[bdf].dte_ext_int_pass = ext_int_pass;
+    ivrs_mappings[bdf].dte_init_pass = init_pass;
+
+    /* allocate per-device interrupt remapping table */
+    if ( ivrs_mappings[alias_id].intremap_table == NULL )
+        ivrs_mappings[alias_id].intremap_table =
+            amd_iommu_alloc_intremap_table();
+    /* assgin iommu hardware */
+    ivrs_mappings[bdf].iommu = iommu;
+}
 
 static struct amd_iommu * __init find_iommu_from_bdf_cap(
     u16 bdf, u8 cap_offset)
@@ -131,11 +176,9 @@ static int __init register_exclusion_ran
 {
     unsigned long range_top, iommu_top, length;
     struct amd_iommu *iommu;
-    u16 bus, devfn, req;
-
-    bus = bdf >> 8;
-    devfn = bdf & 0xFF;
-    iommu = find_iommu_for_device(bus, devfn);
+    u16 req;
+
+    iommu = find_iommu_for_device(bdf);
     if ( !iommu )
     {
         amd_iov_error("IVMD Error: No IOMMU for Dev_Id 0x%x!\n", bdf);
@@ -176,7 +219,7 @@ static int __init register_exclusion_ran
     unsigned long base, unsigned long limit, u8 iw, u8 ir)
 {
     unsigned long range_top, iommu_top, length;
-    u16 bus, devfn, bdf, req;
+    u16 bdf, req;
 
     /* is part of exclusion range inside of IOMMU virtual address space? */
     /* note: 'limit' parameter is assumed to be page-aligned */
@@ -191,9 +234,7 @@ static int __init register_exclusion_ran
         /* note: these entries are part of the exclusion range */
         for ( bdf = 0; bdf < ivrs_bdf_entries; bdf++ )
         {
-            bus = bdf >> 8;
-            devfn = bdf & 0xFF;
-            if ( iommu == find_iommu_for_device(bus, devfn) )
+            if ( iommu == find_iommu_for_device(bdf) )
             {
                 reserve_unity_map_for_device(bdf, base, length, iw, ir);
                 req = ivrs_mappings[bdf].dte_requestor_id;
@@ -367,12 +408,7 @@ static u16 __init parse_ivhd_device_sele
         return 0;
     }
 
-    /* override flags for device */
-    ivrs_mappings[bdf].dte_sys_mgt_enable =
-        get_field_from_byte(ivhd_device->header.flags,
-                            AMD_IOMMU_ACPI_SYS_MGT_MASK,
-                            AMD_IOMMU_ACPI_SYS_MGT_SHIFT);
-    ivrs_mappings[bdf].iommu = iommu;
+    add_ivrs_mapping_entry(bdf, bdf, ivhd_device->header.flags, iommu);
 
     return sizeof(struct acpi_ivhd_device_header);
 }
@@ -382,7 +418,6 @@ static u16 __init parse_ivhd_device_rang
     u16 header_length, u16 block_length, struct amd_iommu *iommu)
 {
     u16 dev_length, first_bdf, last_bdf, bdf;
-    u8 sys_mgt;
 
     dev_length = sizeof(struct acpi_ivhd_device_range);
     if ( header_length < (block_length + dev_length) )
@@ -418,15 +453,8 @@ static u16 __init parse_ivhd_device_rang
 
     amd_iov_info(" Dev_Id Range: 0x%x -> 0x%x\n", first_bdf, last_bdf);
 
-    /* override flags for range of devices */
-    sys_mgt = get_field_from_byte(ivhd_device->header.flags,
-                                  AMD_IOMMU_ACPI_SYS_MGT_MASK,
-                                  AMD_IOMMU_ACPI_SYS_MGT_SHIFT);
     for ( bdf = first_bdf; bdf <= last_bdf; bdf++ )
-    {
-        ivrs_mappings[bdf].dte_sys_mgt_enable = sys_mgt;
-        ivrs_mappings[bdf].iommu = iommu;
-    }
+        add_ivrs_mapping_entry(bdf, bdf, ivhd_device->header.flags, iommu);
 
     return dev_length;
 }
@@ -460,17 +488,7 @@ static u16 __init parse_ivhd_device_alia
 
     amd_iov_info(" Dev_Id Alias: 0x%x\n", alias_id);
 
-    /* override requestor_id and flags for device */
-    ivrs_mappings[bdf].dte_requestor_id = alias_id;
-    ivrs_mappings[bdf].dte_sys_mgt_enable =
-        get_field_from_byte(ivhd_device->header.flags,
-                            AMD_IOMMU_ACPI_SYS_MGT_MASK,
-                            AMD_IOMMU_ACPI_SYS_MGT_SHIFT);
-    ivrs_mappings[bdf].iommu = iommu;
-
-    ivrs_mappings[alias_id].dte_sys_mgt_enable =
-        ivrs_mappings[bdf].dte_sys_mgt_enable;
-    ivrs_mappings[alias_id].iommu = iommu;
+    add_ivrs_mapping_entry(bdf, alias_id, ivhd_device->header.flags, iommu);
 
     return dev_length;
 }
@@ -481,7 +499,6 @@ static u16 __init parse_ivhd_device_alia
 {
 
     u16 dev_length, first_bdf, last_bdf, alias_id, bdf;
-    u8 sys_mgt;
 
     dev_length = sizeof(struct acpi_ivhd_device_alias_range);
     if ( header_length < (block_length + dev_length) )
@@ -525,18 +542,8 @@ static u16 __init parse_ivhd_device_alia
     amd_iov_info(" Dev_Id Range: 0x%x -> 0x%x\n", first_bdf, last_bdf);
     amd_iov_info(" Dev_Id Alias: 0x%x\n", alias_id);
 
-    /* override requestor_id and flags for range of devices */
-    sys_mgt = get_field_from_byte(ivhd_device->header.flags,
-                                  AMD_IOMMU_ACPI_SYS_MGT_MASK,
-                                  AMD_IOMMU_ACPI_SYS_MGT_SHIFT);
     for ( bdf = first_bdf; bdf <= last_bdf; bdf++ )
-    {
-        ivrs_mappings[bdf].dte_requestor_id = alias_id;
-        ivrs_mappings[bdf].dte_sys_mgt_enable = sys_mgt;
-        ivrs_mappings[bdf].iommu = iommu;
-    }
-    ivrs_mappings[alias_id].dte_sys_mgt_enable = sys_mgt;
-    ivrs_mappings[alias_id].iommu = iommu;
+        add_ivrs_mapping_entry(bdf, alias_id, ivhd_device->header.flags, iommu);
 
     return dev_length;
 }
@@ -561,12 +568,7 @@ static u16 __init parse_ivhd_device_exte
         return 0;
     }
 
-    /* override flags for device */
-    ivrs_mappings[bdf].dte_sys_mgt_enable =
-        get_field_from_byte(ivhd_device->header.flags,
-                            AMD_IOMMU_ACPI_SYS_MGT_MASK,
-                            AMD_IOMMU_ACPI_SYS_MGT_SHIFT);
-    ivrs_mappings[bdf].iommu = iommu;
+    add_ivrs_mapping_entry(bdf, bdf, ivhd_device->header.flags, iommu);
 
     return dev_length;
 }
@@ -576,7 +578,6 @@ static u16 __init parse_ivhd_device_exte
     u16 header_length, u16 block_length, struct amd_iommu *iommu)
 {
     u16 dev_length, first_bdf, last_bdf, bdf;
-    u8 sys_mgt;
 
     dev_length = sizeof(struct acpi_ivhd_device_extended_range);
     if ( header_length < (block_length + dev_length) )
@@ -613,16 +614,35 @@ static u16 __init parse_ivhd_device_exte
     amd_iov_info(" Dev_Id Range: 0x%x -> 0x%x\n",
             first_bdf, last_bdf);
 
-    /* override flags for range of devices */
-    sys_mgt = get_field_from_byte(ivhd_device->header.flags,
-                                  AMD_IOMMU_ACPI_SYS_MGT_MASK,
-                                  AMD_IOMMU_ACPI_SYS_MGT_SHIFT);
     for ( bdf = first_bdf; bdf <= last_bdf; bdf++ )
-    {
-        ivrs_mappings[bdf].dte_sys_mgt_enable = sys_mgt;
-        ivrs_mappings[bdf].iommu = iommu;
-    }
-
+        add_ivrs_mapping_entry(bdf, bdf, ivhd_device->header.flags, iommu);
+
+    return dev_length;
+}
+
+static u16 __init parse_ivhd_device_special(
+    union acpi_ivhd_device *ivhd_device,
+    u16 header_length, u16 block_length, struct amd_iommu *iommu)
+{
+    u16 dev_length, bdf;
+
+    dev_length = sizeof(struct acpi_ivhd_device_special);
+    if ( header_length < (block_length + dev_length) )
+    {
+        amd_iov_error("IVHD Error: Invalid Device_Entry Length!\n");
+        return 0;
+    }
+
+    bdf = ivhd_device->special.dev_id;
+    if ( bdf >= ivrs_bdf_entries )
+    {
+        amd_iov_error("IVHD Error: Invalid Device_Entry Dev_Id 0x%x\n", bdf);
+        return 0;
+    }
+
+    add_ivrs_mapping_entry(bdf, bdf, ivhd_device->header.flags, iommu);
+    /* set device id of ioapic */
+    ioapic_bdf[ivhd_device->special.handle] = bdf;
     return dev_length;
 }
 
@@ -698,6 +718,11 @@ static int __init parse_ivhd_block(struc
             break;
         case AMD_IOMMU_ACPI_IVHD_DEV_EXT_RANGE:
             dev_length = parse_ivhd_device_extended_range(
+                ivhd_device,
+                ivhd_block->header.length, block_length, iommu);
+            break;
+        case AMD_IOMMU_ACPI_IVHD_DEV_SPECIAL:
+            dev_length = parse_ivhd_device_special(
                 ivhd_device,
                 ivhd_block->header.length, block_length, iommu);
             break;
@@ -911,6 +936,10 @@ static int __init get_last_bdf_ivhd(void
             UPDATE_LAST_BDF(ivhd_device->extended_range.trailer.dev_id)
             dev_length = sizeof(struct acpi_ivhd_device_extended_range);
             break;
+        case AMD_IOMMU_ACPI_IVHD_DEV_SPECIAL:
+            UPDATE_LAST_BDF(ivhd_device->special.dev_id)
+            dev_length = sizeof(struct acpi_ivhd_device_special);
+            break;
         default:
             amd_iov_error("IVHD Error: Invalid Device Type!\n");
             dev_length = 0;
diff -r a2ab11e31f91 xen/drivers/passthrough/amd/iommu_init.c
--- a/xen/drivers/passthrough/amd/iommu_init.c	Mon Sep 07 09:00:21 2009 +0100
+++ b/xen/drivers/passthrough/amd/iommu_init.c	Tue Sep 15 15:39:24 2009 +0200
@@ -631,6 +631,7 @@ static void __init amd_iommu_init_cleanu
 static void __init amd_iommu_init_cleanup(void)
 {
     struct amd_iommu *iommu, *next;
+    int bdf;
 
     /* free amd iommu list */
     list_for_each_entry_safe ( iommu, next, &amd_iommu_head, list )
@@ -646,7 +647,11 @@ static void __init amd_iommu_init_cleanu
     }
 
     /* free interrupt remapping table */
-    deallocate_intremap_table();
+    for ( bdf = 0; bdf < ivrs_bdf_entries; bdf++ )
+    {
+        if ( ivrs_mappings[bdf].intremap_table )
+            amd_iommu_free_intremap_table(bdf);
+    }
 
     /* free device table */
     deallocate_iommu_table_struct(&device_table);
@@ -693,12 +698,28 @@ static int __init init_ivrs_mapping(void
         ivrs_mappings[bdf].dte_allow_exclusion = IOMMU_CONTROL_DISABLED;
         ivrs_mappings[bdf].unity_map_enable = IOMMU_CONTROL_DISABLED;
         ivrs_mappings[bdf].iommu = NULL;
+
+        ivrs_mappings[bdf].intremap_table = NULL;
+        ivrs_mappings[bdf].dte_lint1_pass = IOMMU_CONTROL_DISABLED;
+        ivrs_mappings[bdf].dte_lint0_pass = IOMMU_CONTROL_DISABLED;
+        ivrs_mappings[bdf].dte_nmi_pass = IOMMU_CONTROL_DISABLED;
+        ivrs_mappings[bdf].dte_ext_int_pass = IOMMU_CONTROL_DISABLED;
+        ivrs_mappings[bdf].dte_init_pass = IOMMU_CONTROL_DISABLED;
+
+        spin_lock_init(&ivrs_mappings[bdf].intremap_lock);
     }
     return 0;
 }
 
 static int __init amd_iommu_setup_device_table(void)
 {
+    int bdf;
+    void *intr_tb, *dte;
+    int sys_mgt, dev_ex, lint1_pass, lint0_pass,
+       nmi_pass, ext_int_pass, init_pass;
+
+    BUG_ON( (ivrs_bdf_entries == 0) || (iommu_enabled) );
+
     /* allocate 'device table' on a 4K boundary */
     device_table.alloc_size = PAGE_SIZE <<
                               get_order_from_bytes(
@@ -707,7 +728,42 @@ static int __init amd_iommu_setup_device
     device_table.entries = device_table.alloc_size /
                            IOMMU_DEV_TABLE_ENTRY_SIZE;
 
-    return ( allocate_iommu_table_struct(&device_table, "Device Table") );
+    if ( allocate_iommu_table_struct(&device_table, "Device Table") != 0 )
+         return -ENOMEM;
+
+    /* Add device table entries */
+    for ( bdf = 0; bdf < ivrs_bdf_entries; bdf++ )
+    {
+        intr_tb = ivrs_mappings[bdf].intremap_table;
+
+        if ( intr_tb )
+        {
+            sys_mgt = ivrs_mappings[bdf].dte_sys_mgt_enable;
+            dev_ex = ivrs_mappings[bdf].dte_allow_exclusion;
+
+            /* get interrupt remapping settings */
+            lint1_pass = ivrs_mappings[bdf].dte_lint1_pass;
+            lint0_pass = ivrs_mappings[bdf].dte_lint0_pass;
+            nmi_pass = ivrs_mappings[bdf].dte_nmi_pass;
+            ext_int_pass = ivrs_mappings[bdf].dte_ext_int_pass;
+            init_pass = ivrs_mappings[bdf].dte_init_pass;
+
+            /* add device table entry */
+            dte = device_table.buffer + (bdf * IOMMU_DEV_TABLE_ENTRY_SIZE);
+            amd_iommu_add_dev_table_entry(
+                dte, sys_mgt, dev_ex, lint1_pass, lint0_pass,
+                nmi_pass, ext_int_pass, init_pass);
+
+            amd_iommu_set_intremap_table(
+                dte, (u64)virt_to_maddr(intr_tb), iommu_intremap);
+
+            amd_iov_info("Add device table entry at DTE:0x%x, "
+                "intremap_table:%"PRIx64"\n", bdf,
+                (u64)virt_to_maddr(intr_tb));
+        }
+    }
+
+    return 0;
 }
 
 int __init amd_iommu_init(void)
@@ -717,7 +773,8 @@ int __init amd_iommu_init(void)
     BUG_ON( !iommu_found() );
 
     irq_to_iommu = xmalloc_array(struct amd_iommu *, nr_irqs);
-    BUG_ON(!irq_to_iommu);
+    if ( irq_to_iommu == NULL )
+        goto error_out;
     memset(irq_to_iommu, 0, nr_irqs * sizeof(struct iommu*));
 
     ivrs_bdf_entries = amd_iommu_get_ivrs_dev_entries();
@@ -729,14 +786,14 @@ int __init amd_iommu_init(void)
         goto error_out;
 
     if ( amd_iommu_update_ivrs_mapping_acpi() != 0 )
+        goto error_out;
+
+    /* initialize io-apic interrupt remapping entries */
+    if ( amd_iommu_setup_ioapic_remapping() != 0 )
         goto error_out;
 
     /* allocate and initialize a global device table shared by all iommus */
     if ( amd_iommu_setup_device_table() != 0 )
-        goto error_out;
-
-    /* initialize io-apic interrupt remapping entries */
-    if ( amd_iommu_setup_intremap_table() != 0 )
         goto error_out;
 
     /* per iommu initialization  */
@@ -783,15 +840,13 @@ static void invalidate_all_domain_pages(
 
 static void invalidate_all_devices(void)
 {
-    u16 bus, devfn, bdf, req_id;
+    int bdf, req_id;
     unsigned long flags;
     struct amd_iommu *iommu;
 
     for ( bdf = 0; bdf < ivrs_bdf_entries; bdf++ )
     {
-        bus = bdf >> 8;
-        devfn = bdf & 0xFF;
-        iommu = find_iommu_for_device(bus, devfn);
+        iommu = find_iommu_for_device(bdf);
         req_id = ivrs_mappings[bdf].dte_requestor_id;
         if ( iommu )
         {
diff -r a2ab11e31f91 xen/drivers/passthrough/amd/iommu_intr.c
--- a/xen/drivers/passthrough/amd/iommu_intr.c	Mon Sep 07 09:00:21 2009 +0100
+++ b/xen/drivers/passthrough/amd/iommu_intr.c	Tue Sep 15 15:39:24 2009 +0200
@@ -23,16 +23,24 @@
 #include <asm/hvm/svm/amd-iommu-proto.h>
 
 #define INTREMAP_TABLE_ORDER    1
-static DEFINE_SPINLOCK(int_remap_table_lock);
-void *int_remap_table = NULL;
-
-static u8 *get_intremap_entry(u8 vector, u8 dm)
+int ioapic_bdf[MAX_IO_APICS];
+extern struct ivrs_mappings *ivrs_mappings;
+extern unsigned short ivrs_bdf_entries;
+
+static int get_intremap_requestor_id(int bdf)
+{
+    ASSERT( bdf < ivrs_bdf_entries );
+    return ivrs_mappings[bdf].dte_requestor_id;
+}
+
+static u8 *get_intremap_entry(int bdf, u8 vector, u8 dm)
 {
     u8 *table;
     int offset = 0;
-    table = (u8*)int_remap_table;
-
-    BUG_ON( !table );
+
+    table = (u8*)ivrs_mappings[bdf].intremap_table;
+    ASSERT( table != NULL );
+
     offset = (dm << INT_REMAP_INDEX_DM_SHIFT) & INT_REMAP_INDEX_DM_MASK;
     offset |= (vector << INT_REMAP_INDEX_VECTOR_SHIFT ) & 
         INT_REMAP_INDEX_VECTOR_MASK;
@@ -83,6 +91,8 @@ void invalidate_interrupt_table(struct a
 }
 
 static void update_intremap_entry_from_ioapic(
+    int bdf,
+    struct amd_iommu *iommu,
     struct IO_APIC_route_entry *ioapic_rte,
     unsigned int rte_upper, unsigned int value)
 {
@@ -90,39 +100,42 @@ static void update_intremap_entry_from_i
     u32* entry;
     u8 delivery_mode, dest, vector, dest_mode;
     struct IO_APIC_route_entry *rte = ioapic_rte;
-
-    spin_lock_irqsave(&int_remap_table_lock, flags);
-
-    if ( rte_upper )
-    {
-        dest = (value >> 24) & 0xFF;
+    int req_id;
+
+    req_id = get_intremap_requestor_id(bdf);
+
+    /* only remap interrupt vector when lower 32 bits in ioapic ire changed */
+    if ( likely(!rte_upper) )
+    {
         delivery_mode = rte->delivery_mode;
         vector = rte->vector;
         dest_mode = rte->dest_mode;
-        entry = (u32*)get_intremap_entry((u8)rte->vector,
-                                        (u8)rte->delivery_mode);
+        dest = rte->dest.logical.logical_dest;
+
+        spin_lock_irqsave(&ivrs_mappings[req_id].intremap_lock, flags);
+        entry = (u32*)get_intremap_entry(req_id, vector, delivery_mode);
         update_intremap_entry(entry, vector, delivery_mode, dest_mode, dest);
-    }
-
-    spin_unlock_irqrestore(&int_remap_table_lock, flags);
-    return;
-}
-
-int __init amd_iommu_setup_intremap_table(void)
+        spin_unlock_irqrestore(&ivrs_mappings[req_id].intremap_lock, flags);
+
+       if ( iommu->enabled )
+        {
+            spin_lock_irqsave(&iommu->lock, flags);
+            invalidate_interrupt_table(iommu, req_id);
+            flush_command_buffer(iommu);
+            spin_unlock_irqrestore(&iommu->lock, flags);
+        }
+    }
+}
+
+int __init amd_iommu_setup_ioapic_remapping(void)
 {
     struct IO_APIC_route_entry rte = {0};
     unsigned long flags;
     u32* entry;
     int apic, pin;
     u8 delivery_mode, dest, vector, dest_mode;
-
-    if ( int_remap_table == NULL )
-    {
-        int_remap_table = __alloc_amd_iommu_tables(INTREMAP_TABLE_ORDER);
-        if ( int_remap_table == NULL )
-            return -ENOMEM;
-        memset(int_remap_table, 0, PAGE_SIZE * (1UL << INTREMAP_TABLE_ORDER));
-    }
+    u16 bdf, req_id;
+    struct amd_iommu *iommu;
 
     /* Read ioapic entries and update interrupt remapping table accordingly */
     for ( apic = 0; apic < nr_ioapics; apic++ )
@@ -135,18 +148,34 @@ int __init amd_iommu_setup_intremap_tabl
             if ( rte.mask == 1 )
                 continue;
 
+            /* get device id of ioapic devices */
+            bdf = ioapic_bdf[IO_APIC_ID(apic)];
+            iommu = find_iommu_for_device(bdf);
+            if ( !iommu )
+            {
+                amd_iov_warning(
+                "Fail to find iommu for ioapic device id = 0x%x\n", bdf);
+                continue;
+            }
+
+            req_id = get_intremap_requestor_id(bdf);
             delivery_mode = rte.delivery_mode;
             vector = rte.vector;
             dest_mode = rte.dest_mode;
-            if ( dest_mode == 0 )
-                dest = rte.dest.physical.physical_dest & 0xf;
-            else
-                dest = rte.dest.logical.logical_dest & 0xff;
-
-            spin_lock_irqsave(&int_remap_table_lock, flags);
-            entry = (u32*)get_intremap_entry(vector, delivery_mode);
+            dest = rte.dest.logical.logical_dest;
+
+            spin_lock_irqsave(&ivrs_mappings[req_id].intremap_lock, flags);
+            entry = (u32*)get_intremap_entry(req_id, vector, delivery_mode);
             update_intremap_entry(entry, vector, delivery_mode, dest_mode, dest);
-            spin_unlock_irqrestore(&int_remap_table_lock, flags);
+            spin_unlock_irqrestore(&ivrs_mappings[req_id].intremap_lock, flags);
+
+            if ( iommu->enabled )
+            {
+                spin_lock_irqsave(&iommu->lock, flags);
+                invalidate_interrupt_table(iommu, req_id);
+                flush_command_buffer(iommu);
+                spin_unlock_irqrestore(&iommu->lock, flags);
+            }
         }
     }
     return 0;
@@ -157,17 +186,24 @@ void amd_iommu_ioapic_update_ire(
 {
     struct IO_APIC_route_entry ioapic_rte = { 0 };
     unsigned int rte_upper = (reg & 1) ? 1 : 0;
-    int saved_mask;
+    int saved_mask, bdf;
+    struct amd_iommu *iommu;
 
     *IO_APIC_BASE(apic) = reg;
     *(IO_APIC_BASE(apic)+4) = value;
 
-    if ( int_remap_table == NULL )
+    /* get device id of ioapic devices */
+    bdf = ioapic_bdf[IO_APIC_ID(apic)];
+    iommu = find_iommu_for_device(bdf);
+    if ( !iommu )
+    {
+        amd_iov_warning(
+            "Fail to find iommu for ioapic device id = 0x%x\n", bdf);
         return;
-    if ( !rte_upper )
+    }
+    if ( rte_upper )
         return;
 
-    reg--;
     /* read both lower and upper 32-bits of rte entry */
     *IO_APIC_BASE(apic) = reg;
     *(((u32 *)&ioapic_rte) + 0) = *(IO_APIC_BASE(apic)+4);
@@ -181,7 +217,8 @@ void amd_iommu_ioapic_update_ire(
     *(IO_APIC_BASE(apic)+4) = *(((int *)&ioapic_rte)+0);
     ioapic_rte.mask = saved_mask;
 
-    update_intremap_entry_from_ioapic(&ioapic_rte, rte_upper, value);
+    update_intremap_entry_from_ioapic(
+        bdf, iommu, &ioapic_rte, rte_upper, value);
 
     /* unmask the interrupt after we have updated the intremap table */
     *IO_APIC_BASE(apic) = reg;
@@ -193,28 +230,49 @@ static void update_intremap_entry_from_m
 {
     unsigned long flags;
     u32* entry;
-    u16 dev_id;
+    u16 bdf, req_id, alias_id;
 
     u8 delivery_mode, dest, vector, dest_mode;
 
-    dev_id = (pdev->bus << 8) | pdev->devfn;
-
-    spin_lock_irqsave(&int_remap_table_lock, flags);
+    bdf = (pdev->bus << 8) | pdev->devfn;
+    req_id = get_dma_requestor_id(bdf);
+
+    spin_lock_irqsave(&ivrs_mappings[req_id].intremap_lock, flags);
     dest_mode = (msg->address_lo >> MSI_ADDR_DESTMODE_SHIFT) & 0x1;
     delivery_mode = (msg->data >> MSI_DATA_DELIVERY_MODE_SHIFT) & 0x1;
     vector = (msg->data >> MSI_DATA_VECTOR_SHIFT) & MSI_DATA_VECTOR_MASK;
     dest = (msg->address_lo >> MSI_ADDR_DEST_ID_SHIFT) & 0xff;
 
-    entry = (u32*)get_intremap_entry((u8)vector, (u8)delivery_mode);
+    entry = (u32*)get_intremap_entry(req_id, vector, delivery_mode);
     update_intremap_entry(entry, vector, delivery_mode, dest_mode, dest);
-    spin_unlock_irqrestore(&int_remap_table_lock, flags);
-
-    spin_lock_irqsave(&iommu->lock, flags);
-    invalidate_interrupt_table(iommu, dev_id);
-    flush_command_buffer(iommu);
-    spin_unlock_irqrestore(&iommu->lock, flags);
-
-    return;
+    spin_unlock_irqrestore(&ivrs_mappings[req_id].intremap_lock, flags);
+
+    /*
+     * In some special cases, a pci-e device(e.g SATA controller in IDE mode)
+     * will use alias id to index interrupt remapping table.
+     * We have to setup a secondary interrupt remapping entry to satisfy those
+     * devices.
+     */
+    alias_id = get_intremap_requestor_id(bdf);
+    if ( ( bdf != alias_id ) &&
+        ivrs_mappings[alias_id].intremap_table != NULL )
+    {
+        spin_lock_irqsave(&ivrs_mappings[alias_id].intremap_lock, flags);
+        entry = (u32*)get_intremap_entry(alias_id, vector, delivery_mode);
+        update_intremap_entry(entry, vector, delivery_mode, dest_mode, dest);
+        invalidate_interrupt_table(iommu, alias_id);
+        spin_unlock_irqrestore(&ivrs_mappings[alias_id].intremap_lock, flags);
+    }
+
+    if ( iommu->enabled )
+    {
+        spin_lock_irqsave(&iommu->lock, flags);
+        invalidate_interrupt_table(iommu, req_id);
+        if ( alias_id != req_id )
+            invalidate_interrupt_table(iommu, alias_id);
+        flush_command_buffer(iommu);
+        spin_unlock_irqrestore(&iommu->lock, flags);
+    }
 }
 
 void amd_iommu_msi_msg_update_ire(
@@ -223,10 +281,15 @@ void amd_iommu_msi_msg_update_ire(
     struct pci_dev *pdev = msi_desc->dev;
     struct amd_iommu *iommu = NULL;
 
-    iommu = find_iommu_for_device(pdev->bus, pdev->devfn);
-
-    if ( !iommu || !int_remap_table )
+    iommu = find_iommu_for_device((pdev->bus << 8) | pdev->devfn);
+
+    if ( !iommu )
+    {
+        amd_iov_warning(
+            "Fail to find iommu for MSI device id = 0x%x\n",
+            (pdev->bus << 8) | pdev->devfn);
         return;
+    }
 
     update_intremap_entry_from_msi_msg(iommu, pdev, msg);
 }
@@ -243,13 +306,22 @@ void amd_iommu_read_msi_from_ire(
 {
 }
 
-int __init deallocate_intremap_table(void)
-{
-    if ( int_remap_table )
-    {
-        __free_amd_iommu_tables(int_remap_table, INTREMAP_TABLE_ORDER);
-        int_remap_table = NULL;
-    }
-
-    return 0;
-}
+void __init amd_iommu_free_intremap_table(int bdf)
+{
+    void *tb = ivrs_mappings[bdf].intremap_table;
+
+    if ( tb )
+    {
+        __free_amd_iommu_tables(tb, INTREMAP_TABLE_ORDER);
+        ivrs_mappings[bdf].intremap_table = NULL;
+    }
+}
+
+void* __init amd_iommu_alloc_intremap_table(void)
+{
+    void *tb;
+    tb = __alloc_amd_iommu_tables(INTREMAP_TABLE_ORDER);
+    BUG_ON(tb == NULL);
+    memset(tb, 0, PAGE_SIZE * (1UL << INTREMAP_TABLE_ORDER));
+    return tb;
+}
diff -r a2ab11e31f91 xen/drivers/passthrough/amd/iommu_map.c
--- a/xen/drivers/passthrough/amd/iommu_map.c	Mon Sep 07 09:00:21 2009 +0100
+++ b/xen/drivers/passthrough/amd/iommu_map.c	Tue Sep 15 15:39:24 2009 +0200
@@ -254,40 +254,62 @@ static void amd_iommu_set_page_directory
     pde[0] = entry;
 }
 
-void amd_iommu_set_dev_table_entry(u32 *dte, u64 root_ptr, u64 intremap_ptr,
-                                   u16 domain_id, u8 sys_mgt, u8 dev_ex,
-                                   u8 paging_mode, u8 valid, u8 int_valid)
+void amd_iommu_set_root_page_table(
+    u32 *dte, u64 root_ptr, u16 domain_id, u8 paging_mode, u8 valid)
 {
     u64 addr_hi, addr_lo;
     u32 entry;
-
-    dte[7] = dte[6] = 0;
+    set_field_in_reg_u32(domain_id, 0,
+                         IOMMU_DEV_TABLE_DOMAIN_ID_MASK,
+                         IOMMU_DEV_TABLE_DOMAIN_ID_SHIFT, &entry);
+    dte[2] = entry;
+
+    addr_lo = root_ptr & DMA_32BIT_MASK;
+    addr_hi = root_ptr >> 32;
+
+    set_field_in_reg_u32((u32)addr_hi, 0,
+                         IOMMU_DEV_TABLE_PAGE_TABLE_PTR_HIGH_MASK,
+                         IOMMU_DEV_TABLE_PAGE_TABLE_PTR_HIGH_SHIFT, &entry);
+    set_field_in_reg_u32(IOMMU_CONTROL_ENABLED, entry,
+                         IOMMU_DEV_TABLE_IO_WRITE_PERMISSION_MASK,
+                         IOMMU_DEV_TABLE_IO_WRITE_PERMISSION_SHIFT, &entry);
+    set_field_in_reg_u32(IOMMU_CONTROL_ENABLED, entry,
+                         IOMMU_DEV_TABLE_IO_READ_PERMISSION_MASK,
+                         IOMMU_DEV_TABLE_IO_READ_PERMISSION_SHIFT, &entry);
+    dte[1] = entry;
+
+    set_field_in_reg_u32((u32)addr_lo >> PAGE_SHIFT, 0,
+                         IOMMU_DEV_TABLE_PAGE_TABLE_PTR_LOW_MASK,
+                         IOMMU_DEV_TABLE_PAGE_TABLE_PTR_LOW_SHIFT, &entry);
+    set_field_in_reg_u32(paging_mode, entry,
+                         IOMMU_DEV_TABLE_PAGING_MODE_MASK,
+                         IOMMU_DEV_TABLE_PAGING_MODE_SHIFT, &entry);
+    set_field_in_reg_u32(IOMMU_CONTROL_ENABLED, entry,
+                         IOMMU_DEV_TABLE_TRANSLATION_VALID_MASK,
+                         IOMMU_DEV_TABLE_TRANSLATION_VALID_SHIFT, &entry);
+    set_field_in_reg_u32(valid ? IOMMU_CONTROL_ENABLED :
+                         IOMMU_CONTROL_DISABLED, entry,
+                         IOMMU_DEV_TABLE_VALID_MASK,
+                         IOMMU_DEV_TABLE_VALID_SHIFT, &entry);
+    dte[0] = entry;
+}
+
+void amd_iommu_set_intremap_table(u32 *dte, u64 intremap_ptr, u8 int_valid)
+{
+    u64 addr_hi, addr_lo;
+    u32 entry;
 
     addr_lo = intremap_ptr & DMA_32BIT_MASK;
     addr_hi = intremap_ptr >> 32;
 
-    set_field_in_reg_u32((u32)addr_hi, 0,
+    entry = dte[5];
+    set_field_in_reg_u32((u32)addr_hi, entry,
                         IOMMU_DEV_TABLE_INT_TABLE_PTR_HIGH_MASK,
                         IOMMU_DEV_TABLE_INT_TABLE_PTR_HIGH_SHIFT, &entry);
-    set_field_in_reg_u32(IOMMU_CONTROL_ENABLED, entry,
-                        IOMMU_DEV_TABLE_INIT_PASSTHRU_MASK,
-                        IOMMU_DEV_TABLE_INIT_PASSTHRU_SHIFT, &entry);
-    set_field_in_reg_u32(IOMMU_CONTROL_ENABLED, entry,
-                        IOMMU_DEV_TABLE_EINT_PASSTHRU_MASK,
-                        IOMMU_DEV_TABLE_EINT_PASSTHRU_SHIFT, &entry);
-    set_field_in_reg_u32(IOMMU_CONTROL_ENABLED, entry,
-                        IOMMU_DEV_TABLE_NMI_PASSTHRU_MASK,
-                        IOMMU_DEV_TABLE_NMI_PASSTHRU_SHIFT, &entry);
     /* Fixed and arbitrated interrupts remapepd */
     set_field_in_reg_u32(2, entry,
                         IOMMU_DEV_TABLE_INT_CONTROL_MASK,
                         IOMMU_DEV_TABLE_INT_CONTROL_SHIFT, &entry);
-    set_field_in_reg_u32(IOMMU_CONTROL_ENABLED, entry,
-                        IOMMU_DEV_TABLE_LINT0_ENABLE_MASK,
-                        IOMMU_DEV_TABLE_LINT0_ENABLE_SHIFT, &entry);
-    set_field_in_reg_u32(IOMMU_CONTROL_ENABLED, entry,
-                        IOMMU_DEV_TABLE_LINT1_ENABLE_MASK,
-                        IOMMU_DEV_TABLE_LINT1_ENABLE_SHIFT, &entry);
     dte[5] = entry;
 
     set_field_in_reg_u32((u32)addr_lo >> 6, 0,
@@ -297,14 +319,47 @@ void amd_iommu_set_dev_table_entry(u32 *
     set_field_in_reg_u32(0xB, entry,
                          IOMMU_DEV_TABLE_INT_TABLE_LENGTH_MASK,
                          IOMMU_DEV_TABLE_INT_TABLE_LENGTH_SHIFT, &entry);
+    /* ignore unmapped interrupts */
+    set_field_in_reg_u32(IOMMU_CONTROL_ENABLED, entry,
+                         IOMMU_DEV_TABLE_INT_TABLE_IGN_UNMAPPED_MASK,
+                         IOMMU_DEV_TABLE_INT_TABLE_IGN_UNMAPPED_SHIFT, &entry);
     set_field_in_reg_u32(int_valid ? IOMMU_CONTROL_ENABLED :
                          IOMMU_CONTROL_DISABLED, entry,
                          IOMMU_DEV_TABLE_INT_VALID_MASK,
                          IOMMU_DEV_TABLE_INT_VALID_SHIFT, &entry);
-    set_field_in_reg_u32(IOMMU_CONTROL_ENABLED, entry,
-                         IOMMU_DEV_TABLE_INT_TABLE_IGN_UNMAPPED_MASK,
-                         IOMMU_DEV_TABLE_INT_TABLE_IGN_UNMAPPED_SHIFT, &entry);
     dte[4] = entry;
+}
+
+void amd_iommu_add_dev_table_entry(
+    u32 *dte, u8 sys_mgt, u8 dev_ex, u8 lint1_pass, u8 lint0_pass, 
+    u8 nmi_pass, u8 ext_int_pass, u8 init_pass)
+{
+    u32 entry;
+
+    dte[7] = dte[6] = dte[4] = dte[2] = dte[1] = dte[0] = 0;
+
+
+    set_field_in_reg_u32(init_pass ? IOMMU_CONTROL_ENABLED :
+                        IOMMU_CONTROL_DISABLED, 0,
+                        IOMMU_DEV_TABLE_INIT_PASSTHRU_MASK,
+                        IOMMU_DEV_TABLE_INIT_PASSTHRU_SHIFT, &entry);
+    set_field_in_reg_u32(ext_int_pass ? IOMMU_CONTROL_ENABLED :
+                        IOMMU_CONTROL_DISABLED, entry,
+                        IOMMU_DEV_TABLE_EINT_PASSTHRU_MASK,
+                        IOMMU_DEV_TABLE_EINT_PASSTHRU_SHIFT, &entry);
+    set_field_in_reg_u32(nmi_pass ? IOMMU_CONTROL_ENABLED :
+                        IOMMU_CONTROL_DISABLED, entry,
+                        IOMMU_DEV_TABLE_NMI_PASSTHRU_MASK,
+                        IOMMU_DEV_TABLE_NMI_PASSTHRU_SHIFT, &entry);
+    set_field_in_reg_u32(lint0_pass ? IOMMU_CONTROL_ENABLED :
+                        IOMMU_CONTROL_DISABLED, entry,
+                        IOMMU_DEV_TABLE_LINT0_ENABLE_MASK,
+                        IOMMU_DEV_TABLE_LINT0_ENABLE_SHIFT, &entry);
+    set_field_in_reg_u32(lint1_pass ? IOMMU_CONTROL_ENABLED :
+                        IOMMU_CONTROL_DISABLED, entry,
+                        IOMMU_DEV_TABLE_LINT1_ENABLE_MASK,
+                        IOMMU_DEV_TABLE_LINT1_ENABLE_SHIFT, &entry);
+    dte[5] = entry;
 
     set_field_in_reg_u32(sys_mgt, 0,
                          IOMMU_DEV_TABLE_SYS_MGT_MSG_ENABLE_MASK,
@@ -313,39 +368,6 @@ void amd_iommu_set_dev_table_entry(u32 *
                          IOMMU_DEV_TABLE_ALLOW_EXCLUSION_MASK,
                          IOMMU_DEV_TABLE_ALLOW_EXCLUSION_SHIFT, &entry);
     dte[3] = entry;
-
-    set_field_in_reg_u32(domain_id, 0,
-                         IOMMU_DEV_TABLE_DOMAIN_ID_MASK,
-                         IOMMU_DEV_TABLE_DOMAIN_ID_SHIFT, &entry);
-    dte[2] = entry;
-
-    addr_lo = root_ptr & DMA_32BIT_MASK;
-    addr_hi = root_ptr >> 32;
-    set_field_in_reg_u32((u32)addr_hi, 0,
-                         IOMMU_DEV_TABLE_PAGE_TABLE_PTR_HIGH_MASK,
-                         IOMMU_DEV_TABLE_PAGE_TABLE_PTR_HIGH_SHIFT, &entry);
-    set_field_in_reg_u32(IOMMU_CONTROL_ENABLED, entry,
-                         IOMMU_DEV_TABLE_IO_WRITE_PERMISSION_MASK,
-                         IOMMU_DEV_TABLE_IO_WRITE_PERMISSION_SHIFT, &entry);
-    set_field_in_reg_u32(IOMMU_CONTROL_ENABLED, entry,
-                         IOMMU_DEV_TABLE_IO_READ_PERMISSION_MASK,
-                         IOMMU_DEV_TABLE_IO_READ_PERMISSION_SHIFT, &entry);
-    dte[1] = entry;
-
-    set_field_in_reg_u32((u32)addr_lo >> PAGE_SHIFT, 0,
-                         IOMMU_DEV_TABLE_PAGE_TABLE_PTR_LOW_MASK,
-                         IOMMU_DEV_TABLE_PAGE_TABLE_PTR_LOW_SHIFT, &entry);
-    set_field_in_reg_u32(paging_mode, entry,
-                         IOMMU_DEV_TABLE_PAGING_MODE_MASK,
-                         IOMMU_DEV_TABLE_PAGING_MODE_SHIFT, &entry);
-    set_field_in_reg_u32(IOMMU_CONTROL_ENABLED, entry,
-                         IOMMU_DEV_TABLE_TRANSLATION_VALID_MASK,
-                         IOMMU_DEV_TABLE_TRANSLATION_VALID_SHIFT, &entry);
-    set_field_in_reg_u32(valid ? IOMMU_CONTROL_ENABLED :
-                         IOMMU_CONTROL_DISABLED, entry,
-                         IOMMU_DEV_TABLE_VALID_MASK,
-                         IOMMU_DEV_TABLE_VALID_SHIFT, &entry);
-    dte[0] = entry;
 }
 
 u64 amd_iommu_get_next_table_from_pte(u32 *entry)
diff -r a2ab11e31f91 xen/drivers/passthrough/amd/pci_amd_iommu.c
--- a/xen/drivers/passthrough/amd/pci_amd_iommu.c	Mon Sep 07 09:00:21 2009 +0100
+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c	Tue Sep 15 15:39:24 2009 +0200
@@ -26,62 +26,94 @@
 
 extern unsigned short ivrs_bdf_entries;
 extern struct ivrs_mappings *ivrs_mappings;
-extern void *int_remap_table;
-
-struct amd_iommu *find_iommu_for_device(int bus, int devfn)
-{
-    u16 bdf = (bus << 8) | devfn;
+
+struct amd_iommu *find_iommu_for_device(int bdf)
+{
     BUG_ON ( bdf >= ivrs_bdf_entries );
     return ivrs_mappings[bdf].iommu;
 }
 
+/*
+ * Some devices will use alias id and original device id to index interrupt
+ * table and I/O page table respectively. Such devices will have
+ * both alias entry and select entry in IVRS structure.
+ *
+ * Return original device id, if device has valid interrupt remapping
+ * table setup for both select entry and alias entry.
+ */
+int get_dma_requestor_id(u16 bdf)
+{
+    int req_id;
+
+    BUG_ON ( bdf >= ivrs_bdf_entries );
+    req_id = ivrs_mappings[bdf].dte_requestor_id;
+    if ( (ivrs_mappings[bdf].intremap_table != NULL) &&
+         (ivrs_mappings[req_id].intremap_table != NULL) )
+        req_id = bdf;
+
+    return req_id;
+}
+
+static int is_translation_valid(u32 *entry)
+{
+    return (get_field_from_reg_u32(entry[0],
+                                   IOMMU_DEV_TABLE_VALID_MASK,
+                                   IOMMU_DEV_TABLE_VALID_SHIFT) &&
+            get_field_from_reg_u32(entry[0],
+                                   IOMMU_DEV_TABLE_TRANSLATION_VALID_MASK,
+                                   IOMMU_DEV_TABLE_TRANSLATION_VALID_SHIFT));
+}
+
+static void disable_translation(u32 *dte)
+{
+    u32 entry;
+
+    entry = dte[0];
+    set_field_in_reg_u32(IOMMU_CONTROL_DISABLED, entry,
+                         IOMMU_DEV_TABLE_TRANSLATION_VALID_MASK,
+                         IOMMU_DEV_TABLE_TRANSLATION_VALID_SHIFT, &entry);
+    set_field_in_reg_u32(IOMMU_CONTROL_DISABLED, entry,
+                         IOMMU_DEV_TABLE_VALID_MASK,
+                         IOMMU_DEV_TABLE_VALID_SHIFT, &entry);
+    dte[0] = entry;
+}
+
 static void amd_iommu_setup_domain_device(
     struct domain *domain, struct amd_iommu *iommu, int bdf)
 {
     void *dte;
     unsigned long flags;
-    int req_id;
-    u8 sys_mgt, dev_ex, valid = 1, int_valid = 1;
+    int req_id, valid = 1;
+
     struct hvm_iommu *hd = domain_hvm_iommu(domain);
 
-    BUG_ON( !hd->root_table || !hd->paging_mode || !int_remap_table );
+    BUG_ON( !hd->root_table || !hd->paging_mode || !iommu->dev_table.buffer );
+
+    if ( iommu_passthrough && (domain->domain_id == 0) )
+        valid = 0;
 
     /* get device-table entry */
-    req_id = ivrs_mappings[bdf].dte_requestor_id;
+    req_id = get_dma_requestor_id(bdf);
     dte = iommu->dev_table.buffer + (req_id * IOMMU_DEV_TABLE_ENTRY_SIZE);
 
     spin_lock_irqsave(&iommu->lock, flags);
 
-    if ( !amd_iommu_is_dte_page_translation_valid((u32 *)dte) )
+    if ( !is_translation_valid((u32 *)dte) )
     {
         /* bind DTE to domain page-tables */
-        sys_mgt = ivrs_mappings[req_id].dte_sys_mgt_enable;
-        dev_ex = ivrs_mappings[req_id].dte_allow_exclusion;
-
-        if ( iommu_passthrough && (domain->domain_id == 0) )
-            valid = 0;
-        if ( !iommu_intremap )
-            int_valid = 0;
-
-        amd_iommu_set_dev_table_entry((u32 *)dte,
-                                      page_to_maddr(hd->root_table),
-                                      virt_to_maddr(int_remap_table),
-                                      hd->domain_id, sys_mgt, dev_ex,
-                                      hd->paging_mode, valid, int_valid);
+        amd_iommu_set_root_page_table(
+            (u32 *)dte, page_to_maddr(hd->root_table), hd->domain_id,
+            hd->paging_mode, valid);
 
         invalidate_dev_table_entry(iommu, req_id);
-        invalidate_interrupt_table(iommu, req_id);
         flush_command_buffer(iommu);
-        amd_iov_info("Enable DTE:0x%x, "
-                "root_table:%"PRIx64", interrupt_table:%"PRIx64", "
-                "domain_id:%d, paging_mode:%d\n",
-                req_id, (u64)page_to_maddr(hd->root_table),
-                (u64)virt_to_maddr(int_remap_table), hd->domain_id,
-                hd->paging_mode);
+
+        amd_iov_info("Setup I/O page table at DTE:0x%x, root_table:%"PRIx64","
+        "domain_id:%d, paging_mode:%d\n", req_id,
+        page_to_maddr(hd->root_table), hd->domain_id, hd->paging_mode);
     }
 
     spin_unlock_irqrestore(&iommu->lock, flags);
-
 }
 
 static void amd_iommu_setup_dom0_devices(struct domain *d)
@@ -110,12 +142,15 @@ static void amd_iommu_setup_dom0_devices
                 list_add(&pdev->domain_list, &d->arch.pdev_list);
 
                 bdf = (bus << 8) | pdev->devfn;
-                /* supported device? */
-                iommu = (bdf < ivrs_bdf_entries) ?
-                    find_iommu_for_device(bus, pdev->devfn) : NULL;
-
-                if ( iommu )
-                    amd_iommu_setup_domain_device(d, iommu, bdf);
+                iommu = find_iommu_for_device(bdf);
+
+                if ( !iommu )
+                {
+                    amd_iov_warning("Fail to find iommu for device"
+                        "%02x:%02x.%x\n", bus, dev, func);
+                    continue;
+                }
+                amd_iommu_setup_domain_device(d, iommu, bdf);
             }
         }
     }
@@ -223,13 +258,14 @@ static void amd_iommu_disable_domain_dev
     unsigned long flags;
     int req_id;
 
-    req_id = ivrs_mappings[bdf].dte_requestor_id;
+    BUG_ON ( iommu->dev_table.buffer == NULL );
+    req_id = get_dma_requestor_id(bdf);
     dte = iommu->dev_table.buffer + (req_id * IOMMU_DEV_TABLE_ENTRY_SIZE);
 
-    spin_lock_irqsave(&iommu->lock, flags); 
-    if ( amd_iommu_is_dte_page_translation_valid((u32 *)dte) )
-    {
-        memset (dte, 0, IOMMU_DEV_TABLE_ENTRY_SIZE);
+    spin_lock_irqsave(&iommu->lock, flags);
+    if ( is_translation_valid((u32 *)dte) )
+    {
+        disable_translation((u32 *)dte);
         invalidate_dev_table_entry(iommu, req_id);
         flush_command_buffer(iommu);
         amd_iov_info("Disable DTE:0x%x,"
@@ -253,10 +289,7 @@ static int reassign_device( struct domai
         return -ENODEV;
 
     bdf = (bus << 8) | devfn;
-    /* supported device? */
-    iommu = (bdf < ivrs_bdf_entries) ?
-    find_iommu_for_device(bus, pdev->devfn) : NULL;
-
+    iommu = find_iommu_for_device(bdf);
     if ( !iommu )
     {
         amd_iov_error("Fail to find iommu."
@@ -281,7 +314,7 @@ static int amd_iommu_assign_device(struc
 static int amd_iommu_assign_device(struct domain *d, u8 bus, u8 devfn)
 {
     int bdf = (bus << 8) | devfn;
-    int req_id = ivrs_mappings[bdf].dte_requestor_id;
+    int req_id = get_dma_requestor_id(bdf);
 
     if ( ivrs_mappings[req_id].unity_map_enable )
     {
@@ -356,9 +389,7 @@ static int amd_iommu_add_device(struct p
         return -EINVAL;
 
     bdf = (pdev->bus << 8) | pdev->devfn;
-    iommu = (bdf < ivrs_bdf_entries) ?
-    find_iommu_for_device(pdev->bus, pdev->devfn) : NULL;
-
+    iommu = find_iommu_for_device(bdf);
     if ( !iommu )
     {
         amd_iov_error("Fail to find iommu."
@@ -380,9 +411,7 @@ static int amd_iommu_remove_device(struc
         return -EINVAL;
 
     bdf = (pdev->bus << 8) | pdev->devfn;
-    iommu = (bdf < ivrs_bdf_entries) ?
-    find_iommu_for_device(pdev->bus, pdev->devfn) : NULL;
-
+    iommu = find_iommu_for_device(bdf);
     if ( !iommu )
     {
         amd_iov_error("Fail to find iommu."
@@ -401,7 +430,7 @@ static int amd_iommu_group_id(u8 bus, u8
     int rt;
     int bdf = (bus << 8) | devfn;
     rt = ( bdf < ivrs_bdf_entries ) ?
-        ivrs_mappings[bdf].dte_requestor_id :
+        get_dma_requestor_id(bdf) :
         bdf;
     return rt;
 }
diff -r a2ab11e31f91 xen/include/asm-x86/amd-iommu.h
--- a/xen/include/asm-x86/amd-iommu.h	Mon Sep 07 09:00:21 2009 +0100
+++ b/xen/include/asm-x86/amd-iommu.h	Tue Sep 15 15:39:24 2009 +0200
@@ -92,5 +92,16 @@ struct ivrs_mappings {
     unsigned long addr_range_start;
     unsigned long addr_range_length;
     struct amd_iommu *iommu;
+
+    /* per device interrupt remapping table */
+    void *intremap_table;
+    spinlock_t intremap_lock;
+
+    /* interrupt remapping settings */
+    u8 dte_lint1_pass;
+    u8 dte_lint0_pass;
+    u8 dte_nmi_pass;
+    u8 dte_ext_int_pass;
+    u8 dte_init_pass;
 };
 #endif /* _ASM_X86_64_AMD_IOMMU_H */
diff -r a2ab11e31f91 xen/include/asm-x86/hvm/svm/amd-iommu-acpi.h
--- a/xen/include/asm-x86/hvm/svm/amd-iommu-acpi.h	Mon Sep 07 09:00:21 2009 +0100
+++ b/xen/include/asm-x86/hvm/svm/amd-iommu-acpi.h	Tue Sep 15 15:39:24 2009 +0200
@@ -43,6 +43,7 @@
 #define AMD_IOMMU_ACPI_IVHD_DEV_ALIAS_RANGE    67
 #define AMD_IOMMU_ACPI_IVHD_DEV_EXT_SELECT 70
 #define AMD_IOMMU_ACPI_IVHD_DEV_EXT_RANGE  71
+#define AMD_IOMMU_ACPI_IVHD_DEV_SPECIAL    72
 
 /* IVHD IOMMU Flags */
 #define AMD_IOMMU_ACPI_COHERENT_MASK       0x20
@@ -151,6 +152,13 @@ struct acpi_ivhd_device_extended_range {
    struct acpi_ivhd_device_trailer trailer;
 };
 
+struct acpi_ivhd_device_special {
+   struct acpi_ivhd_device_header header;
+   u8  handle;
+   u16 dev_id;
+   u8  variety;
+};
+
 union acpi_ivhd_device {
    struct acpi_ivhd_device_header header;
    struct acpi_ivhd_device_range range;
@@ -158,6 +166,7 @@ union acpi_ivhd_device {
    struct acpi_ivhd_device_alias_range alias_range;
    struct acpi_ivhd_device_extended extended;
    struct acpi_ivhd_device_extended_range extended_range;
+   struct acpi_ivhd_device_special special;
 };
 
 struct acpi_ivmd_block_header {
diff -r a2ab11e31f91 xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
--- a/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h	Mon Sep 07 09:00:21 2009 +0100
+++ b/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h	Tue Sep 15 15:39:24 2009 +0200
@@ -63,10 +63,14 @@ void invalidate_all_iommu_pages(struct d
 void invalidate_all_iommu_pages(struct domain *d);
 
 /* device table functions */
-void amd_iommu_set_dev_table_entry(u32 *dte, u64 root_ptr, u64 intremap_ptr,
-        u16 domain_id, u8 sys_mgt, u8 dev_ex, u8 paging_mode,
-        u8 valid, u8 int_valid);
-int amd_iommu_is_dte_page_translation_valid(u32 *entry);
+int get_dma_requestor_id(u16 bdf);
+void amd_iommu_add_dev_table_entry(
+    u32 *dte, u8 sys_mgt, u8 dev_ex, u8 lint1_pass, u8 lint0_pass, 
+    u8 nmi_pass, u8 ext_int_pass, u8 init_pass);
+void amd_iommu_set_intremap_table(
+    u32 *dte, u64 intremap_ptr, u8 int_valid);
+void amd_iommu_set_root_page_table(
+    u32 *dte, u64 root_ptr, u16 domain_id, u8 paging_mode, u8 valid);
 void invalidate_dev_table_entry(struct amd_iommu *iommu, u16 devic_id);
 
 /* send cmd to iommu */
@@ -74,11 +78,12 @@ void flush_command_buffer(struct amd_iom
 void flush_command_buffer(struct amd_iommu *iommu);
 
 /* find iommu for bdf */
-struct amd_iommu *find_iommu_for_device(int bus, int devfn);
+struct amd_iommu *find_iommu_for_device(int bdf);
 
 /*interrupt remapping */
-int __init amd_iommu_setup_intremap_table(void);
-int __init deallocate_intremap_table(void);
+int __init amd_iommu_setup_ioapic_remapping(void);
+void*__init amd_iommu_alloc_intremap_table(void);
+void __init amd_iommu_free_intremap_table(int bdf);
 void invalidate_interrupt_table(struct amd_iommu *iommu, u16 device_id);
 void amd_iommu_ioapic_update_ire(
     unsigned int apic, unsigned int reg, unsigned int value);

[-- Attachment #3: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2009-09-15 13:54 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-09-15 13:54 [PATCH] AMD IOMMU: Rework of interrupt remapping Wei Wang2

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.