All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] VT-d: fix RMRR handling
@ 2014-02-10 13:42 Jan Beulich
  2014-02-10 17:38 ` Andrew Cooper
  2014-03-01  7:03 ` Zhang, Xiantao
  0 siblings, 2 replies; 6+ messages in thread
From: Jan Beulich @ 2014-02-10 13:42 UTC (permalink / raw)
  To: xen-devel; +Cc: xiantao.zhang

[-- Attachment #1: Type: text/plain, Size: 8555 bytes --]

Removing mapped RMRR tracking structures in dma_pte_clear_one() is
wrong for two reasons: First, these regions may cover more than a
single page. And second, multiple devices (and hence multiple devices
assigned to any particular guest) may share a single RMRR (whether
assigning such devices to distinct guests is a safe thing to do is
another question).

Therefore move the removal of the tracking structures into the
counterpart function to the one doing the insertion -
intel_iommu_remove_device(), and add a reference count to the tracking
structure.

Further, for the handling of the mappings of the respective memory
regions to be correct, RMRRs must not overlap. Add a respective check
to acpi_parse_one_rmrr().

And finally, with all of this being VT-d specific, move the cleanup
of the list as well as the structure type definition where it belongs -
in VT-d specific rather than IOMMU generic code.

Note that this doesn't address yet another issue associated with RMRR
handling: The purpose of the RMRRs as well as the way the respective
IOMMU page table mappings get inserted both suggest that these regions
would need to be marked E820_RESERVED in all (HVM?) guests' memory
maps, yet nothing like this is being done in hvmloader. (For PV guests
this would also seem to be necessary, but may conflict with PV guests
possibly assuming there to be just a single E820 entry representing all
of its RAM.)

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/drivers/passthrough/vtd/dmar.c
+++ b/xen/drivers/passthrough/vtd/dmar.c
@@ -580,6 +580,16 @@ acpi_parse_one_rmrr(struct acpi_dmar_hea
     if ( (ret = acpi_dmar_check_length(header, sizeof(*rmrr))) != 0 )
         return ret;
 
+    list_for_each_entry(rmrru, &acpi_rmrr_units, list)
+       if ( base_addr <= rmrru->end_address && rmrru->base_address <= end_addr )
+       {
+           printk(XENLOG_ERR VTDPREFIX
+                  "Overlapping RMRRs [%"PRIx64",%"PRIx64"] and [%"PRIx64",%"PRIx64"]\n",
+                  rmrru->base_address, rmrru->end_address,
+                  base_addr, end_addr);
+           return -EEXIST;
+       }
+
     /* This check is here simply to detect when RMRR values are
      * not properly represented in the system memory map and
      * inform the user
--- a/xen/drivers/passthrough/iommu.c
+++ b/xen/drivers/passthrough/iommu.c
@@ -412,9 +412,8 @@ static int iommu_populate_page_table(str
 void iommu_domain_destroy(struct domain *d)
 {
     struct hvm_iommu *hd  = domain_hvm_iommu(d);
-    struct list_head *ioport_list, *rmrr_list, *tmp;
+    struct list_head *ioport_list, *tmp;
     struct g2m_ioport *ioport;
-    struct mapped_rmrr *mrmrr;
 
     if ( !iommu_enabled || !hd->platform_ops )
         return;
@@ -428,13 +427,6 @@ void iommu_domain_destroy(struct domain 
         list_del(&ioport->list);
         xfree(ioport);
     }
-
-    list_for_each_safe ( rmrr_list, tmp, &hd->mapped_rmrrs )
-    {
-        mrmrr = list_entry(rmrr_list, struct mapped_rmrr, list);
-        list_del(&mrmrr->list);
-        xfree(mrmrr);
-    }
 }
 
 int iommu_map_page(struct domain *d, unsigned long gfn, unsigned long mfn,
--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -43,6 +43,12 @@
 #include "vtd.h"
 #include "../ats.h"
 
+struct mapped_rmrr {
+    struct list_head list;
+    u64 base, end;
+    unsigned int count;
+};
+
 /* Possible unfiltered LAPIC/MSI messages from untrusted sources? */
 bool_t __read_mostly untrusted_msi;
 
@@ -620,7 +626,6 @@ static void dma_pte_clear_one(struct dom
     struct hvm_iommu *hd = domain_hvm_iommu(domain);
     struct dma_pte *page = NULL, *pte = NULL;
     u64 pg_maddr;
-    struct mapped_rmrr *mrmrr;
 
     spin_lock(&hd->mapping_lock);
     /* get last level pte */
@@ -649,21 +654,6 @@ static void dma_pte_clear_one(struct dom
         __intel_iommu_iotlb_flush(domain, addr >> PAGE_SHIFT_4K, 1, 1);
 
     unmap_vtd_domain_page(page);
-
-    /* if the cleared address is between mapped RMRR region,
-     * remove the mapped RMRR
-     */
-    spin_lock(&hd->mapping_lock);
-    list_for_each_entry ( mrmrr, &hd->mapped_rmrrs, list )
-    {
-        if ( addr >= mrmrr->base && addr <= mrmrr->end )
-        {
-            list_del(&mrmrr->list);
-            xfree(mrmrr);
-            break;
-        }
-    }
-    spin_unlock(&hd->mapping_lock);
 }
 
 static void iommu_free_pagetable(u64 pt_maddr, int level)
@@ -1704,10 +1694,17 @@ static int reassign_device_ownership(
 void iommu_domain_teardown(struct domain *d)
 {
     struct hvm_iommu *hd = domain_hvm_iommu(d);
+    struct mapped_rmrr *mrmrr, *tmp;
 
     if ( list_empty(&acpi_drhd_units) )
         return;
 
+    list_for_each_entry_safe ( mrmrr, tmp, &hd->mapped_rmrrs, list )
+    {
+        list_del(&mrmrr->list);
+        xfree(mrmrr);
+    }
+
     if ( iommu_use_hap_pt(d) )
         return;
 
@@ -1852,14 +1849,17 @@ static int rmrr_identity_mapping(struct 
     ASSERT(rmrr->base_address < rmrr->end_address);
 
     /*
-     * No need to acquire hd->mapping_lock, as the only theoretical race is
-     * with the insertion below (impossible due to holding pcidevs_lock).
+     * No need to acquire hd->mapping_lock: Both insertion and removal
+     * get done while holding pcidevs_lock.
      */
     list_for_each_entry( mrmrr, &hd->mapped_rmrrs, list )
     {
         if ( mrmrr->base == rmrr->base_address &&
              mrmrr->end == rmrr->end_address )
+        {
+            ++mrmrr->count;
             return 0;
+        }
     }
 
     base = rmrr->base_address & PAGE_MASK_4K;
@@ -1880,9 +1880,8 @@ static int rmrr_identity_mapping(struct 
         return -ENOMEM;
     mrmrr->base = rmrr->base_address;
     mrmrr->end = rmrr->end_address;
-    spin_lock(&hd->mapping_lock);
+    mrmrr->count = 1;
     list_add_tail(&mrmrr->list, &hd->mapped_rmrrs);
-    spin_unlock(&hd->mapping_lock);
 
     return 0;
 }
@@ -1944,17 +1943,50 @@ static int intel_iommu_remove_device(u8 
     if ( !pdev->domain )
         return -EINVAL;
 
-    /* If the device belongs to dom0, and it has RMRR, don't remove it
-     * from dom0, because BIOS may use RMRR at booting time.
-     */
-    if ( pdev->domain->domain_id == 0 )
+    for_each_rmrr_device ( rmrr, bdf, i )
     {
-        for_each_rmrr_device ( rmrr, bdf, i )
+        struct hvm_iommu *hd;
+        struct mapped_rmrr *mrmrr, *tmp;
+
+        if ( rmrr->segment != pdev->seg ||
+             PCI_BUS(bdf) != pdev->bus ||
+             PCI_DEVFN2(bdf) != devfn )
+            continue;
+
+        /*
+         * If the device belongs to dom0, and it has RMRR, don't remove
+         * it from dom0, because BIOS may use RMRR at booting time.
+         */
+        if ( is_hardware_domain(pdev->domain) )
+            return 0;
+
+        hd = domain_hvm_iommu(pdev->domain);
+
+        /*
+         * No need to acquire hd->mapping_lock: Both insertion and removal
+         * get done while holding pcidevs_lock.
+         */
+        ASSERT(spin_is_locked(&pcidevs_lock));
+        list_for_each_entry_safe ( mrmrr, tmp, &hd->mapped_rmrrs, list )
         {
-            if ( rmrr->segment == pdev->seg &&
-                 PCI_BUS(bdf) == pdev->bus &&
-                 PCI_DEVFN2(bdf) == devfn )
-                return 0;
+            unsigned long base_pfn, end_pfn;
+
+            if ( rmrr->base_address != mrmrr->base ||
+                 rmrr->end_address != mrmrr->end ||
+                 --mrmrr->count )
+                continue;
+
+            base_pfn = (mrmrr->base & PAGE_MASK_4K) >> PAGE_SHIFT_4K;
+            end_pfn = PAGE_ALIGN_4K(mrmrr->end) >> PAGE_SHIFT_4K;
+            while ( base_pfn < end_pfn )
+            {
+                if ( intel_iommu_unmap_page(pdev->domain, base_pfn) )
+                    return -ENXIO;
+                base_pfn++;
+            }
+
+            list_del(&mrmrr->list);
+            xfree(mrmrr);
         }
     }
 
--- a/xen/include/xen/hvm/iommu.h
+++ b/xen/include/xen/hvm/iommu.h
@@ -29,12 +29,6 @@ struct g2m_ioport {
     unsigned int np;
 };
 
-struct mapped_rmrr {
-    struct list_head list;
-    u64 base;
-    u64 end;
-};
-
 struct hvm_iommu {
     u64 pgd_maddr;                 /* io page directory machine address */
     spinlock_t mapping_lock;       /* io page table lock */



[-- Attachment #2: VT-d-RMRR-handling.patch --]
[-- Type: text/plain, Size: 8578 bytes --]

VT-d: fix RMRR handling

Removing mapped RMRR tracking structures in dma_pte_clear_one() is
wrong for two reasons: First, these regions may cover more than a
single page. And second, multiple devices (and hence multiple devices
assigned to any particular guest) may share a single RMRR (whether
assigning such devices to distinct guests is a safe thing to do is
another question).

Therefore move the removal of the tracking structures into the
counterpart function to the one doing the insertion -
intel_iommu_remove_device(), and add a reference count to the tracking
structure.

Further, for the handling of the mappings of the respective memory
regions to be correct, RMRRs must not overlap. Add a respective check
to acpi_parse_one_rmrr().

And finally, with all of this being VT-d specific, move the cleanup
of the list as well as the structure type definition where it belongs -
in VT-d specific rather than IOMMU generic code.

Note that this doesn't address yet another issue associated with RMRR
handling: The purpose of the RMRRs as well as the way the respective
IOMMU page table mappings get inserted both suggest that these regions
would need to be marked E820_RESERVED in all (HVM?) guests' memory
maps, yet nothing like this is being done in hvmloader. (For PV guests
this would also seem to be necessary, but may conflict with PV guests
possibly assuming there to be just a single E820 entry representing all
of its RAM.)

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/drivers/passthrough/vtd/dmar.c
+++ b/xen/drivers/passthrough/vtd/dmar.c
@@ -580,6 +580,16 @@ acpi_parse_one_rmrr(struct acpi_dmar_hea
     if ( (ret = acpi_dmar_check_length(header, sizeof(*rmrr))) != 0 )
         return ret;
 
+    list_for_each_entry(rmrru, &acpi_rmrr_units, list)
+       if ( base_addr <= rmrru->end_address && rmrru->base_address <= end_addr )
+       {
+           printk(XENLOG_ERR VTDPREFIX
+                  "Overlapping RMRRs [%"PRIx64",%"PRIx64"] and [%"PRIx64",%"PRIx64"]\n",
+                  rmrru->base_address, rmrru->end_address,
+                  base_addr, end_addr);
+           return -EEXIST;
+       }
+
     /* This check is here simply to detect when RMRR values are
      * not properly represented in the system memory map and
      * inform the user
--- a/xen/drivers/passthrough/iommu.c
+++ b/xen/drivers/passthrough/iommu.c
@@ -412,9 +412,8 @@ static int iommu_populate_page_table(str
 void iommu_domain_destroy(struct domain *d)
 {
     struct hvm_iommu *hd  = domain_hvm_iommu(d);
-    struct list_head *ioport_list, *rmrr_list, *tmp;
+    struct list_head *ioport_list, *tmp;
     struct g2m_ioport *ioport;
-    struct mapped_rmrr *mrmrr;
 
     if ( !iommu_enabled || !hd->platform_ops )
         return;
@@ -428,13 +427,6 @@ void iommu_domain_destroy(struct domain 
         list_del(&ioport->list);
         xfree(ioport);
     }
-
-    list_for_each_safe ( rmrr_list, tmp, &hd->mapped_rmrrs )
-    {
-        mrmrr = list_entry(rmrr_list, struct mapped_rmrr, list);
-        list_del(&mrmrr->list);
-        xfree(mrmrr);
-    }
 }
 
 int iommu_map_page(struct domain *d, unsigned long gfn, unsigned long mfn,
--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -43,6 +43,12 @@
 #include "vtd.h"
 #include "../ats.h"
 
+struct mapped_rmrr {
+    struct list_head list;
+    u64 base, end;
+    unsigned int count;
+};
+
 /* Possible unfiltered LAPIC/MSI messages from untrusted sources? */
 bool_t __read_mostly untrusted_msi;
 
@@ -620,7 +626,6 @@ static void dma_pte_clear_one(struct dom
     struct hvm_iommu *hd = domain_hvm_iommu(domain);
     struct dma_pte *page = NULL, *pte = NULL;
     u64 pg_maddr;
-    struct mapped_rmrr *mrmrr;
 
     spin_lock(&hd->mapping_lock);
     /* get last level pte */
@@ -649,21 +654,6 @@ static void dma_pte_clear_one(struct dom
         __intel_iommu_iotlb_flush(domain, addr >> PAGE_SHIFT_4K, 1, 1);
 
     unmap_vtd_domain_page(page);
-
-    /* if the cleared address is between mapped RMRR region,
-     * remove the mapped RMRR
-     */
-    spin_lock(&hd->mapping_lock);
-    list_for_each_entry ( mrmrr, &hd->mapped_rmrrs, list )
-    {
-        if ( addr >= mrmrr->base && addr <= mrmrr->end )
-        {
-            list_del(&mrmrr->list);
-            xfree(mrmrr);
-            break;
-        }
-    }
-    spin_unlock(&hd->mapping_lock);
 }
 
 static void iommu_free_pagetable(u64 pt_maddr, int level)
@@ -1704,10 +1694,17 @@ static int reassign_device_ownership(
 void iommu_domain_teardown(struct domain *d)
 {
     struct hvm_iommu *hd = domain_hvm_iommu(d);
+    struct mapped_rmrr *mrmrr, *tmp;
 
     if ( list_empty(&acpi_drhd_units) )
         return;
 
+    list_for_each_entry_safe ( mrmrr, tmp, &hd->mapped_rmrrs, list )
+    {
+        list_del(&mrmrr->list);
+        xfree(mrmrr);
+    }
+
     if ( iommu_use_hap_pt(d) )
         return;
 
@@ -1852,14 +1849,17 @@ static int rmrr_identity_mapping(struct 
     ASSERT(rmrr->base_address < rmrr->end_address);
 
     /*
-     * No need to acquire hd->mapping_lock, as the only theoretical race is
-     * with the insertion below (impossible due to holding pcidevs_lock).
+     * No need to acquire hd->mapping_lock: Both insertion and removal
+     * get done while holding pcidevs_lock.
      */
     list_for_each_entry( mrmrr, &hd->mapped_rmrrs, list )
     {
         if ( mrmrr->base == rmrr->base_address &&
              mrmrr->end == rmrr->end_address )
+        {
+            ++mrmrr->count;
             return 0;
+        }
     }
 
     base = rmrr->base_address & PAGE_MASK_4K;
@@ -1880,9 +1880,8 @@ static int rmrr_identity_mapping(struct 
         return -ENOMEM;
     mrmrr->base = rmrr->base_address;
     mrmrr->end = rmrr->end_address;
-    spin_lock(&hd->mapping_lock);
+    mrmrr->count = 1;
     list_add_tail(&mrmrr->list, &hd->mapped_rmrrs);
-    spin_unlock(&hd->mapping_lock);
 
     return 0;
 }
@@ -1944,17 +1943,50 @@ static int intel_iommu_remove_device(u8 
     if ( !pdev->domain )
         return -EINVAL;
 
-    /* If the device belongs to dom0, and it has RMRR, don't remove it
-     * from dom0, because BIOS may use RMRR at booting time.
-     */
-    if ( pdev->domain->domain_id == 0 )
+    for_each_rmrr_device ( rmrr, bdf, i )
     {
-        for_each_rmrr_device ( rmrr, bdf, i )
+        struct hvm_iommu *hd;
+        struct mapped_rmrr *mrmrr, *tmp;
+
+        if ( rmrr->segment != pdev->seg ||
+             PCI_BUS(bdf) != pdev->bus ||
+             PCI_DEVFN2(bdf) != devfn )
+            continue;
+
+        /*
+         * If the device belongs to dom0, and it has RMRR, don't remove
+         * it from dom0, because BIOS may use RMRR at booting time.
+         */
+        if ( is_hardware_domain(pdev->domain) )
+            return 0;
+
+        hd = domain_hvm_iommu(pdev->domain);
+
+        /*
+         * No need to acquire hd->mapping_lock: Both insertion and removal
+         * get done while holding pcidevs_lock.
+         */
+        ASSERT(spin_is_locked(&pcidevs_lock));
+        list_for_each_entry_safe ( mrmrr, tmp, &hd->mapped_rmrrs, list )
         {
-            if ( rmrr->segment == pdev->seg &&
-                 PCI_BUS(bdf) == pdev->bus &&
-                 PCI_DEVFN2(bdf) == devfn )
-                return 0;
+            unsigned long base_pfn, end_pfn;
+
+            if ( rmrr->base_address != mrmrr->base ||
+                 rmrr->end_address != mrmrr->end ||
+                 --mrmrr->count )
+                continue;
+
+            base_pfn = (mrmrr->base & PAGE_MASK_4K) >> PAGE_SHIFT_4K;
+            end_pfn = PAGE_ALIGN_4K(mrmrr->end) >> PAGE_SHIFT_4K;
+            while ( base_pfn < end_pfn )
+            {
+                if ( intel_iommu_unmap_page(pdev->domain, base_pfn) )
+                    return -ENXIO;
+                base_pfn++;
+            }
+
+            list_del(&mrmrr->list);
+            xfree(mrmrr);
         }
     }
 
--- a/xen/include/xen/hvm/iommu.h
+++ b/xen/include/xen/hvm/iommu.h
@@ -29,12 +29,6 @@ struct g2m_ioport {
     unsigned int np;
 };
 
-struct mapped_rmrr {
-    struct list_head list;
-    u64 base;
-    u64 end;
-};
-
 struct hvm_iommu {
     u64 pgd_maddr;                 /* io page directory machine address */
     spinlock_t mapping_lock;       /* io page table lock */

[-- Attachment #3: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] VT-d: fix RMRR handling
  2014-02-10 13:42 [PATCH] VT-d: fix RMRR handling Jan Beulich
@ 2014-02-10 17:38 ` Andrew Cooper
  2014-02-11  7:54   ` Jan Beulich
  2014-03-01  7:03 ` Zhang, Xiantao
  1 sibling, 1 reply; 6+ messages in thread
From: Andrew Cooper @ 2014-02-10 17:38 UTC (permalink / raw)
  To: Jan Beulich; +Cc: xen-devel, xiantao.zhang

On 10/02/14 13:42, Jan Beulich wrote:
> Removing mapped RMRR tracking structures in dma_pte_clear_one() is
> wrong for two reasons: First, these regions may cover more than a
> single page. And second, multiple devices (and hence multiple devices
> assigned to any particular guest) may share a single RMRR (whether
> assigning such devices to distinct guests is a safe thing to do is
> another question).
>
> Therefore move the removal of the tracking structures into the
> counterpart function to the one doing the insertion -
> intel_iommu_remove_device(), and add a reference count to the tracking
> structure.
>
> Further, for the handling of the mappings of the respective memory
> regions to be correct, RMRRs must not overlap. Add a respective check
> to acpi_parse_one_rmrr().
>
> And finally, with all of this being VT-d specific, move the cleanup
> of the list as well as the structure type definition where it belongs -
> in VT-d specific rather than IOMMU generic code.
>
> Note that this doesn't address yet another issue associated with RMRR
> handling: The purpose of the RMRRs as well as the way the respective
> IOMMU page table mappings get inserted both suggest that these regions
> would need to be marked E820_RESERVED in all (HVM?) guests' memory
> maps, yet nothing like this is being done in hvmloader. (For PV guests
> this would also seem to be necessary, but may conflict with PV guests
> possibly assuming there to be just a single E820 entry representing all
> of its RAM.)
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

Having read up on RMRRs again, I suspect it is more complicated.  RMRRs
are for legacy devices which need to DMA to the BIOS to function
correctly.  In the case of dom0, they should necessarily be identity
mapped in the IOMMU tables (which does appear to be the case currently).

For domains with passthrough of a device using an RMRR, then the region
should be marked as reserved (and possibly removed from the physmap for
HVM guests?), and the IOMMU mappings again identity mapped, so the
device can talk to the BIOS.  Having the device talk to an
equally-positioned RMRR in the guest address space seems pointless. 
Furthermore, XenServer has had one support escalation which touched upon
this.  It turned out to be unrelated to the problem causing the
escalation, but was identified as something which should be handled better.

One way or another, the security when passing through devices covered by
RMRR would end up being reduced.

~Andrew

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] VT-d: fix RMRR handling
  2014-02-10 17:38 ` Andrew Cooper
@ 2014-02-11  7:54   ` Jan Beulich
  0 siblings, 0 replies; 6+ messages in thread
From: Jan Beulich @ 2014-02-11  7:54 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: xen-devel, xiantao.zhang

>>> On 10.02.14 at 18:38, Andrew Cooper <andrew.cooper3@citrix.com> wrote:
> On 10/02/14 13:42, Jan Beulich wrote:
>> Removing mapped RMRR tracking structures in dma_pte_clear_one() is
>> wrong for two reasons: First, these regions may cover more than a
>> single page. And second, multiple devices (and hence multiple devices
>> assigned to any particular guest) may share a single RMRR (whether
>> assigning such devices to distinct guests is a safe thing to do is
>> another question).
>>
>> Therefore move the removal of the tracking structures into the
>> counterpart function to the one doing the insertion -
>> intel_iommu_remove_device(), and add a reference count to the tracking
>> structure.
>>
>> Further, for the handling of the mappings of the respective memory
>> regions to be correct, RMRRs must not overlap. Add a respective check
>> to acpi_parse_one_rmrr().
>>
>> And finally, with all of this being VT-d specific, move the cleanup
>> of the list as well as the structure type definition where it belongs -
>> in VT-d specific rather than IOMMU generic code.
>>
>> Note that this doesn't address yet another issue associated with RMRR
>> handling: The purpose of the RMRRs as well as the way the respective
>> IOMMU page table mappings get inserted both suggest that these regions
>> would need to be marked E820_RESERVED in all (HVM?) guests' memory
>> maps, yet nothing like this is being done in hvmloader. (For PV guests
>> this would also seem to be necessary, but may conflict with PV guests
>> possibly assuming there to be just a single E820 entry representing all
>> of its RAM.)
>>
>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> 
> Having read up on RMRRs again, I suspect it is more complicated.  RMRRs
> are for legacy devices which need to DMA to the BIOS to function
> correctly.  In the case of dom0, they should necessarily be identity
> mapped in the IOMMU tables (which does appear to be the case currently).
> 
> For domains with passthrough of a device using an RMRR, then the region
> should be marked as reserved (and possibly removed from the physmap for
> HVM guests?), and the IOMMU mappings again identity mapped, so the
> device can talk to the BIOS.  Having the device talk to an
> equally-positioned RMRR in the guest address space seems pointless. 

I at least meant to say exactly that. Except that I implied that _all_
RMRR specified regions should be marked reserved (and "holes"
punched _and_ enforced to remain holes in the physmap), since
otherwise you can't hotplug such a device. (Of course we could
consider the alternative of not allowing hotplug of devices listed
underneath some RMRR, or not allowing assignment of such
devices at all.)

> One way or another, the security when passing through devices covered by
> RMRR would end up being reduced.

If multiple devices share a region, and they're being assigned to
different guests (with Dom0 counted as guest) - yes. But if such
a group was assigned collectively, I don't think security would be
reduced.

Jan

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] VT-d: fix RMRR handling
  2014-02-10 13:42 [PATCH] VT-d: fix RMRR handling Jan Beulich
  2014-02-10 17:38 ` Andrew Cooper
@ 2014-03-01  7:03 ` Zhang, Xiantao
  2014-03-04  8:04   ` Jan Beulich
  1 sibling, 1 reply; 6+ messages in thread
From: Zhang, Xiantao @ 2014-03-01  7:03 UTC (permalink / raw)
  To: Jan Beulich, xen-devel

Jan, my comments embedded, thanks!

> Removing mapped RMRR tracking structures in dma_pte_clear_one() is wrong
> for two reasons: First, these regions may cover more than a single page. And
> second, multiple devices (and hence multiple devices assigned to any
> particular guest) may share a single RMRR (whether assigning such devices to
> distinct guests is a safe thing to do is another question).

Agree, this is a real issue as you described

> Therefore move the removal of the tracking structures into the counterpart
> function to the one doing the insertion - intel_iommu_remove_device(), and
> add a reference count to the tracking structure.

Adding a reference count is a good idea, but from the logic, seems you are adding a global counter for each rmrr? 
In theory, one rmrr may be mapped for multiple devices in multiple domains, global counter for once rmrr is not enough. 
Maybe we need to per-domain counter there ? 

> Further, for the handling of the mappings of the respective memory regions to
> be correct, RMRRs must not overlap. Add a respective check to
> acpi_parse_one_rmrr().
> 
> And finally, with all of this being VT-d specific, move the cleanup of the list as
> well as the structure type definition where it belongs - in VT-d specific rather
> than IOMMU generic code.
> 
> Note that this doesn't address yet another issue associated with RMRR
> handling: The purpose of the RMRRs as well as the way the respective IOMMU
> page table mappings get inserted both suggest that these regions would need
> to be marked E820_RESERVED in all (HVM?) guests' memory maps, yet nothing
> like this is being done in hvmloader. (For PV guests this would also seem to be
> necessary, but may conflict with PV guests possibly assuming there to be just a
> single E820 entry representing all of its RAM.)

Yes, this is really a pending issue there, and we need to find a clean way to handle it. 


> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> 
> --- a/xen/drivers/passthrough/vtd/dmar.c
> +++ b/xen/drivers/passthrough/vtd/dmar.c
> @@ -580,6 +580,16 @@ acpi_parse_one_rmrr(struct acpi_dmar_hea
>      if ( (ret = acpi_dmar_check_length(header, sizeof(*rmrr))) != 0 )
>          return ret;
> 
> +    list_for_each_entry(rmrru, &acpi_rmrr_units, list)
> +       if ( base_addr <= rmrru->end_address && rmrru->base_address <=
> end_addr )
> +       {
> +           printk(XENLOG_ERR VTDPREFIX
> +                  "Overlapping RMRRs [%"PRIx64",%"PRIx64"] and
> [%"PRIx64",%"PRIx64"]\n",
> +                  rmrru->base_address, rmrru->end_address,
> +                  base_addr, end_addr);
> +           return -EEXIST;
> +       }
> +
>      /* This check is here simply to detect when RMRR values are
>       * not properly represented in the system memory map and
>       * inform the user
> --- a/xen/drivers/passthrough/iommu.c
> +++ b/xen/drivers/passthrough/iommu.c
> @@ -412,9 +412,8 @@ static int iommu_populate_page_table(str  void
> iommu_domain_destroy(struct domain *d)  {
>      struct hvm_iommu *hd  = domain_hvm_iommu(d);
> -    struct list_head *ioport_list, *rmrr_list, *tmp;
> +    struct list_head *ioport_list, *tmp;
>      struct g2m_ioport *ioport;
> -    struct mapped_rmrr *mrmrr;
> 
>      if ( !iommu_enabled || !hd->platform_ops )
>          return;
> @@ -428,13 +427,6 @@ void iommu_domain_destroy(struct domain
>          list_del(&ioport->list);
>          xfree(ioport);
>      }
> -
> -    list_for_each_safe ( rmrr_list, tmp, &hd->mapped_rmrrs )
> -    {
> -        mrmrr = list_entry(rmrr_list, struct mapped_rmrr, list);
> -        list_del(&mrmrr->list);
> -        xfree(mrmrr);
> -    }
>  }
> 
>  int iommu_map_page(struct domain *d, unsigned long gfn, unsigned long
> mfn,
> --- a/xen/drivers/passthrough/vtd/iommu.c
> +++ b/xen/drivers/passthrough/vtd/iommu.c
> @@ -43,6 +43,12 @@
>  #include "vtd.h"
>  #include "../ats.h"
> 
> +struct mapped_rmrr {
> +    struct list_head list;
> +    u64 base, end;
> +    unsigned int count;
> +};
> +
>  /* Possible unfiltered LAPIC/MSI messages from untrusted sources? */
> bool_t __read_mostly untrusted_msi;
> 
> @@ -620,7 +626,6 @@ static void dma_pte_clear_one(struct dom
>      struct hvm_iommu *hd = domain_hvm_iommu(domain);
>      struct dma_pte *page = NULL, *pte = NULL;
>      u64 pg_maddr;
> -    struct mapped_rmrr *mrmrr;
> 
>      spin_lock(&hd->mapping_lock);
>      /* get last level pte */
> @@ -649,21 +654,6 @@ static void dma_pte_clear_one(struct dom
>          __intel_iommu_iotlb_flush(domain, addr >> PAGE_SHIFT_4K, 1, 1);
> 
>      unmap_vtd_domain_page(page);
> -
> -    /* if the cleared address is between mapped RMRR region,
> -     * remove the mapped RMRR
> -     */
> -    spin_lock(&hd->mapping_lock);
> -    list_for_each_entry ( mrmrr, &hd->mapped_rmrrs, list )
> -    {
> -        if ( addr >= mrmrr->base && addr <= mrmrr->end )
> -        {
> -            list_del(&mrmrr->list);
> -            xfree(mrmrr);
> -            break;
> -        }
> -    }
> -    spin_unlock(&hd->mapping_lock);
>  }
> 
>  static void iommu_free_pagetable(u64 pt_maddr, int level) @@ -1704,10
> +1694,17 @@ static int reassign_device_ownership(  void
> iommu_domain_teardown(struct domain *d)  {
>      struct hvm_iommu *hd = domain_hvm_iommu(d);
> +    struct mapped_rmrr *mrmrr, *tmp;
> 
>      if ( list_empty(&acpi_drhd_units) )
>          return;
> 
> +    list_for_each_entry_safe ( mrmrr, tmp, &hd->mapped_rmrrs, list )
> +    {
> +        list_del(&mrmrr->list);
> +        xfree(mrmrr);
> +    }
> +
>      if ( iommu_use_hap_pt(d) )
>          return;
> 
> @@ -1852,14 +1849,17 @@ static int rmrr_identity_mapping(struct
>      ASSERT(rmrr->base_address < rmrr->end_address);
> 
>      /*
> -     * No need to acquire hd->mapping_lock, as the only theoretical race is
> -     * with the insertion below (impossible due to holding pcidevs_lock).
> +     * No need to acquire hd->mapping_lock: Both insertion and removal
> +     * get done while holding pcidevs_lock.
>       */
>      list_for_each_entry( mrmrr, &hd->mapped_rmrrs, list )
>      {
>          if ( mrmrr->base == rmrr->base_address &&
>               mrmrr->end == rmrr->end_address )
> +        {
> +            ++mrmrr->count;
>              return 0;
> +        }
>      }
> 
>      base = rmrr->base_address & PAGE_MASK_4K; @@ -1880,9 +1880,8 @@
> static int rmrr_identity_mapping(struct
>          return -ENOMEM;
>      mrmrr->base = rmrr->base_address;
>      mrmrr->end = rmrr->end_address;
> -    spin_lock(&hd->mapping_lock);
> +    mrmrr->count = 1;
>      list_add_tail(&mrmrr->list, &hd->mapped_rmrrs);
> -    spin_unlock(&hd->mapping_lock);
> 
>      return 0;
>  }
> @@ -1944,17 +1943,50 @@ static int intel_iommu_remove_device(u8
>      if ( !pdev->domain )
>          return -EINVAL;
> 
> -    /* If the device belongs to dom0, and it has RMRR, don't remove it
> -     * from dom0, because BIOS may use RMRR at booting time.
> -     */
> -    if ( pdev->domain->domain_id == 0 )
> +    for_each_rmrr_device ( rmrr, bdf, i )
>      {
> -        for_each_rmrr_device ( rmrr, bdf, i )
> +        struct hvm_iommu *hd;
> +        struct mapped_rmrr *mrmrr, *tmp;
> +
> +        if ( rmrr->segment != pdev->seg ||
> +             PCI_BUS(bdf) != pdev->bus ||
> +             PCI_DEVFN2(bdf) != devfn )
> +            continue;
> +
> +        /*
> +         * If the device belongs to dom0, and it has RMRR, don't remove
> +         * it from dom0, because BIOS may use RMRR at booting time.
> +         */
> +        if ( is_hardware_domain(pdev->domain) )
> +            return 0;
> +
> +        hd = domain_hvm_iommu(pdev->domain);
> +
> +        /*
> +         * No need to acquire hd->mapping_lock: Both insertion and
> removal
> +         * get done while holding pcidevs_lock.
> +         */
> +        ASSERT(spin_is_locked(&pcidevs_lock));
> +        list_for_each_entry_safe ( mrmrr, tmp, &hd->mapped_rmrrs, list
> + )
>          {
> -            if ( rmrr->segment == pdev->seg &&
> -                 PCI_BUS(bdf) == pdev->bus &&
> -                 PCI_DEVFN2(bdf) == devfn )
> -                return 0;
> +            unsigned long base_pfn, end_pfn;
> +
> +            if ( rmrr->base_address != mrmrr->base ||
> +                 rmrr->end_address != mrmrr->end ||
> +                 --mrmrr->count )
> +                continue;
> +
> +            base_pfn = (mrmrr->base & PAGE_MASK_4K) >>
> PAGE_SHIFT_4K;
> +            end_pfn = PAGE_ALIGN_4K(mrmrr->end) >> PAGE_SHIFT_4K;
> +            while ( base_pfn < end_pfn )
> +            {
> +                if ( intel_iommu_unmap_page(pdev->domain, base_pfn) )
> +                    return -ENXIO;
> +                base_pfn++;
> +            }
> +
> +            list_del(&mrmrr->list);
> +            xfree(mrmrr);
>          }
>      }
> 
> --- a/xen/include/xen/hvm/iommu.h
> +++ b/xen/include/xen/hvm/iommu.h
> @@ -29,12 +29,6 @@ struct g2m_ioport {
>      unsigned int np;
>  };
> 
> -struct mapped_rmrr {
> -    struct list_head list;
> -    u64 base;
> -    u64 end;
> -};
> -
>  struct hvm_iommu {
>      u64 pgd_maddr;                 /* io page directory machine
> address */
>      spinlock_t mapping_lock;       /* io page table lock */
> 

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] VT-d: fix RMRR handling
  2014-03-01  7:03 ` Zhang, Xiantao
@ 2014-03-04  8:04   ` Jan Beulich
  2014-03-17 14:48     ` Zhang, Xiantao
  0 siblings, 1 reply; 6+ messages in thread
From: Jan Beulich @ 2014-03-04  8:04 UTC (permalink / raw)
  To: Xiantao Zhang; +Cc: xen-devel

>>> On 01.03.14 at 08:03, "Zhang, Xiantao" <xiantao.zhang@intel.com> wrote:
> Jan, my comments embedded, thanks!
> 
>> Removing mapped RMRR tracking structures in dma_pte_clear_one() is wrong
>> for two reasons: First, these regions may cover more than a single page. And
>> second, multiple devices (and hence multiple devices assigned to any
>> particular guest) may share a single RMRR (whether assigning such devices to
>> distinct guests is a safe thing to do is another question).
> 
> Agree, this is a real issue as you described
> 
>> Therefore move the removal of the tracking structures into the counterpart
>> function to the one doing the insertion - intel_iommu_remove_device(), and
>> add a reference count to the tracking structure.
> 
> Adding a reference count is a good idea, but from the logic, seems you are 
> adding a global counter for each rmrr? 

I don't think so: mapped_rmrrs is rooted in struct hvm_iommu, which
is a per-domain thing.

> In theory, one rmrr may be mapped for multiple devices in multiple domains, 
> global counter for once rmrr is not enough. 
> Maybe we need to per-domain counter there ? 

And even if it was, I wouldn't think so, for security reasons: Sharing
an RMRR across domains is insecure afaict, so if anything we ought
to suppress assigning devices sharing an RMRR to different guests
(e.g. via intel_iommu_group_id() or a second, similar mechanism), at
least in "iommu=force" mode.

Jan

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] VT-d: fix RMRR handling
  2014-03-04  8:04   ` Jan Beulich
@ 2014-03-17 14:48     ` Zhang, Xiantao
  0 siblings, 0 replies; 6+ messages in thread
From: Zhang, Xiantao @ 2014-03-17 14:48 UTC (permalink / raw)
  To: Jan Beulich; +Cc: xen-devel

Ok, make sense, thanks, Jan! 
Acked-by: Xiantao Zhang <xiantao.zhang@intel.com>

> -----Original Message-----
> From: Jan Beulich [mailto:JBeulich@suse.com]
> Sent: Tuesday, March 4, 2014 4:05 PM
> To: Zhang, Xiantao
> Cc: xen-devel
> Subject: RE: [PATCH] VT-d: fix RMRR handling
> 
> >>> On 01.03.14 at 08:03, "Zhang, Xiantao" <xiantao.zhang@intel.com> wrote:
> > Jan, my comments embedded, thanks!
> >
> >> Removing mapped RMRR tracking structures in dma_pte_clear_one() is
> >> wrong for two reasons: First, these regions may cover more than a
> >> single page. And second, multiple devices (and hence multiple devices
> >> assigned to any particular guest) may share a single RMRR (whether
> >> assigning such devices to distinct guests is a safe thing to do is another
> question).
> >
> > Agree, this is a real issue as you described
> >
> >> Therefore move the removal of the tracking structures into the
> >> counterpart function to the one doing the insertion -
> >> intel_iommu_remove_device(), and add a reference count to the tracking
> structure.
> >
> > Adding a reference count is a good idea, but from the logic, seems you
> > are adding a global counter for each rmrr?
> 
> I don't think so: mapped_rmrrs is rooted in struct hvm_iommu, which is a
> per-domain thing.
> 
> > In theory, one rmrr may be mapped for multiple devices in multiple
> > domains, global counter for once rmrr is not enough.
> > Maybe we need to per-domain counter there ?
> 
> And even if it was, I wouldn't think so, for security reasons: Sharing an RMRR
> across domains is insecure afaict, so if anything we ought to suppress assigning
> devices sharing an RMRR to different guests (e.g. via intel_iommu_group_id() or
> a second, similar mechanism), at least in "iommu=force" mode.
> 
> Jan

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2014-03-17 14:49 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-02-10 13:42 [PATCH] VT-d: fix RMRR handling Jan Beulich
2014-02-10 17:38 ` Andrew Cooper
2014-02-11  7:54   ` Jan Beulich
2014-03-01  7:03 ` Zhang, Xiantao
2014-03-04  8:04   ` Jan Beulich
2014-03-17 14:48     ` Zhang, Xiantao

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.