* [PATCH 0/6] VT-d: domain ID mapping improvements
@ 2021-11-12 9:46 Jan Beulich
2021-11-12 9:47 ` [PATCH 1/6] VT-d: properly reserve DID 0 for caching mode IOMMUs Jan Beulich
` (5 more replies)
0 siblings, 6 replies; 24+ messages in thread
From: Jan Beulich @ 2021-11-12 9:46 UTC (permalink / raw)
To: xen-devel; +Cc: Kevin Tian
Two bug fixes, some cleanup, and a simplification for modern hardware.
1: properly reserve DID 0 for caching mode IOMMUs
2: split domid map cleanup check into a function
3: don't leak domid mapping on error path
4: tidy domid map handling
5: introduce helper to convert DID to domid_t
6: avoid allocating domid_{bit,}map[] when possible
This depends on the previously submitted (and already approved, except
for a possible release ack) "VT-d: per-domain IOMMU bitmap needs to
have dynamic size".
Jan
^ permalink raw reply [flat|nested] 24+ messages in thread
* [PATCH 1/6] VT-d: properly reserve DID 0 for caching mode IOMMUs
2021-11-12 9:46 [PATCH 0/6] VT-d: domain ID mapping improvements Jan Beulich
@ 2021-11-12 9:47 ` Jan Beulich
2021-11-12 11:23 ` Roger Pau Monné
` (2 more replies)
2021-11-12 9:48 ` [PATCH 2/6] VT-d: split domid map cleanup check into a function Jan Beulich
` (4 subsequent siblings)
5 siblings, 3 replies; 24+ messages in thread
From: Jan Beulich @ 2021-11-12 9:47 UTC (permalink / raw)
To: xen-devel; +Cc: Kevin Tian
Merely setting bit 0 in the bitmap is insufficient, as then Dom0 will
still have DID 0 allocated to it, because of the zero-filling of
domid_map[]. Set slot 0 to DOMID_INVALID to keep DID 0 from getting
used.
Fixes: b9c20c78789f ("VT-d: per-iommu domain-id")
Signed-off-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -1257,16 +1257,19 @@ int __init iommu_alloc(struct acpi_drhd_
if ( !iommu->domid_bitmap )
return -ENOMEM;
+ iommu->domid_map = xzalloc_array(u16, nr_dom);
+ if ( !iommu->domid_map )
+ return -ENOMEM;
+
/*
- * if Caching mode is set, then invalid translations are tagged with
- * domain id 0, Hence reserve bit 0 for it
+ * If Caching mode is set, then invalid translations are tagged with
+ * domain id 0. Hence reserve bit/slot 0.
*/
if ( cap_caching_mode(iommu->cap) )
+ {
+ iommu->domid_map[0] = DOMID_INVALID;
__set_bit(0, iommu->domid_bitmap);
-
- iommu->domid_map = xzalloc_array(u16, nr_dom);
- if ( !iommu->domid_map )
- return -ENOMEM;
+ }
return 0;
}
^ permalink raw reply [flat|nested] 24+ messages in thread
* [PATCH 2/6] VT-d: split domid map cleanup check into a function
2021-11-12 9:46 [PATCH 0/6] VT-d: domain ID mapping improvements Jan Beulich
2021-11-12 9:47 ` [PATCH 1/6] VT-d: properly reserve DID 0 for caching mode IOMMUs Jan Beulich
@ 2021-11-12 9:48 ` Jan Beulich
2021-11-12 12:31 ` Roger Pau Monné
2021-11-15 5:16 ` Tian, Kevin
2021-11-12 9:48 ` [PATCH 3/6] VT-d: don't leak domid mapping on error path Jan Beulich
` (3 subsequent siblings)
5 siblings, 2 replies; 24+ messages in thread
From: Jan Beulich @ 2021-11-12 9:48 UTC (permalink / raw)
To: xen-devel; +Cc: Kevin Tian
This logic will want invoking from elsewhere.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -157,6 +157,51 @@ static void cleanup_domid_map(struct dom
}
}
+static bool any_pdev_behind_iommu(const struct domain *d,
+ const struct pci_dev *exclude,
+ const struct vtd_iommu *iommu)
+{
+ const struct pci_dev *pdev;
+
+ for_each_pdev ( d, pdev )
+ {
+ const struct acpi_drhd_unit *drhd;
+
+ if ( pdev == exclude )
+ continue;
+
+ drhd = acpi_find_matched_drhd_unit(pdev);
+ if ( drhd && drhd->iommu == iommu )
+ return true;
+ }
+
+ return false;
+}
+
+/*
+ * If no other devices under the same iommu owned by this domain,
+ * clear iommu in iommu_bitmap and clear domain_id in domid_bitmap.
+ */
+static void check_cleanup_domid_map(struct domain *d,
+ const struct pci_dev *exclude,
+ struct vtd_iommu *iommu)
+{
+ bool found = any_pdev_behind_iommu(d, exclude, iommu);
+
+ /*
+ * Hidden devices are associated with DomXEN but usable by the hardware
+ * domain. Hence they need considering here as well.
+ */
+ if ( !found && is_hardware_domain(d) )
+ found = any_pdev_behind_iommu(dom_xen, exclude, iommu);
+
+ if ( !found )
+ {
+ clear_bit(iommu->index, dom_iommu(d)->arch.vtd.iommu_bitmap);
+ cleanup_domid_map(d, iommu);
+ }
+}
+
static void sync_cache(const void *addr, unsigned int size)
{
static unsigned long clflush_size = 0;
@@ -1675,27 +1720,6 @@ int domain_context_unmap_one(
return rc;
}
-static bool any_pdev_behind_iommu(const struct domain *d,
- const struct pci_dev *exclude,
- const struct vtd_iommu *iommu)
-{
- const struct pci_dev *pdev;
-
- for_each_pdev ( d, pdev )
- {
- const struct acpi_drhd_unit *drhd;
-
- if ( pdev == exclude )
- continue;
-
- drhd = acpi_find_matched_drhd_unit(pdev);
- if ( drhd && drhd->iommu == iommu )
- return true;
- }
-
- return false;
-}
-
static int domain_context_unmap(struct domain *domain, u8 devfn,
struct pci_dev *pdev)
{
@@ -1704,7 +1728,6 @@ static int domain_context_unmap(struct d
int ret;
uint16_t seg = pdev->seg;
uint8_t bus = pdev->bus, tmp_bus, tmp_devfn, secbus;
- bool found;
switch ( pdev->type )
{
@@ -1780,28 +1803,10 @@ static int domain_context_unmap(struct d
return -EINVAL;
}
- if ( ret || QUARANTINE_SKIP(domain) || pdev->devfn != devfn )
- return ret;
+ if ( !ret && !QUARANTINE_SKIP(domain) && pdev->devfn == devfn )
+ check_cleanup_domid_map(domain, pdev, iommu);
- /*
- * If no other devices under the same iommu owned by this domain,
- * clear iommu in iommu_bitmap and clear domain_id in domid_bitmap.
- */
- found = any_pdev_behind_iommu(domain, pdev, iommu);
- /*
- * Hidden devices are associated with DomXEN but usable by the hardware
- * domain. Hence they need considering here as well.
- */
- if ( !found && is_hardware_domain(domain) )
- found = any_pdev_behind_iommu(dom_xen, pdev, iommu);
-
- if ( !found )
- {
- clear_bit(iommu->index, dom_iommu(domain)->arch.vtd.iommu_bitmap);
- cleanup_domid_map(domain, iommu);
- }
-
- return 0;
+ return ret;
}
static void iommu_clear_root_pgtable(struct domain *d)
^ permalink raw reply [flat|nested] 24+ messages in thread
* [PATCH 3/6] VT-d: don't leak domid mapping on error path
2021-11-12 9:46 [PATCH 0/6] VT-d: domain ID mapping improvements Jan Beulich
2021-11-12 9:47 ` [PATCH 1/6] VT-d: properly reserve DID 0 for caching mode IOMMUs Jan Beulich
2021-11-12 9:48 ` [PATCH 2/6] VT-d: split domid map cleanup check into a function Jan Beulich
@ 2021-11-12 9:48 ` Jan Beulich
2021-11-12 13:42 ` Roger Pau Monné
2021-11-15 5:21 ` Tian, Kevin
2021-11-12 9:49 ` [PATCH 4/6] VT-d: tidy domid map handling Jan Beulich
` (2 subsequent siblings)
5 siblings, 2 replies; 24+ messages in thread
From: Jan Beulich @ 2021-11-12 9:48 UTC (permalink / raw)
To: xen-devel; +Cc: Kevin Tian
While domain_context_mapping() invokes domain_context_unmap() in a sub-
case of handling DEV_TYPE_PCI when encountering an error, thus avoiding
a leak, individual calls to domain_context_mapping_one() aren't
similarly covered. Such a leak might persist until domain destruction.
Leverage that these cases can be recognized by pdev being non-NULL.
Fixes: dec403cc668f ("VT-d: fix iommu_domid for PCI/PCIx devices assignment")
Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
The Fixes: tag isn't strictly correct, as error handling had more severe
shortcomings at the time. But I wouldn't want to blame a commit
improving error handling to have introduced the leak.
--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -1518,7 +1518,12 @@ int domain_context_mapping_one(
rc = me_wifi_quirk(domain, bus, devfn, MAP_ME_PHANTOM_FUNC);
if ( rc )
- domain_context_unmap_one(domain, iommu, bus, devfn);
+ {
+ ret = domain_context_unmap_one(domain, iommu, bus, devfn);
+
+ if ( !ret && pdev && pdev->devfn == devfn )
+ check_cleanup_domid_map(domain, pdev, iommu);
+ }
return rc;
}
^ permalink raw reply [flat|nested] 24+ messages in thread
* [PATCH 4/6] VT-d: tidy domid map handling
2021-11-12 9:46 [PATCH 0/6] VT-d: domain ID mapping improvements Jan Beulich
` (2 preceding siblings ...)
2021-11-12 9:48 ` [PATCH 3/6] VT-d: don't leak domid mapping on error path Jan Beulich
@ 2021-11-12 9:49 ` Jan Beulich
2021-11-15 5:51 ` Tian, Kevin
2021-11-12 9:49 ` [PATCH 5/6] VT-d: introduce helper to convert DID to domid_t Jan Beulich
2021-11-12 9:50 ` [PATCH 6/6] VT-d: avoid allocating domid_{bit,}map[] when possible Jan Beulich
5 siblings, 1 reply; 24+ messages in thread
From: Jan Beulich @ 2021-11-12 9:49 UTC (permalink / raw)
To: xen-devel; +Cc: Kevin Tian
- Correct struct field type.
- Use unsigned int when that suffices.
- Eliminate a (badly typed) local variable from
context_set_domain_id().
- Don't use -EFAULT inappropriately.
- Move set_bit() such that it won't be done redundantly.
- Constification.
- Reduce scope of some variables.
- Coding style.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -62,10 +62,10 @@ static struct tasklet vtd_fault_tasklet;
static int setup_hwdom_device(u8 devfn, struct pci_dev *);
static void setup_hwdom_rmrr(struct domain *d);
-static int domain_iommu_domid(struct domain *d,
- struct vtd_iommu *iommu)
+static int domain_iommu_domid(const struct domain *d,
+ const struct vtd_iommu *iommu)
{
- unsigned long nr_dom, i;
+ unsigned int nr_dom, i;
nr_dom = cap_ndoms(iommu->cap);
i = find_first_bit(iommu->domid_bitmap, nr_dom);
@@ -74,7 +74,7 @@ static int domain_iommu_domid(struct dom
if ( iommu->domid_map[i] == d->domain_id )
return i;
- i = find_next_bit(iommu->domid_bitmap, nr_dom, i+1);
+ i = find_next_bit(iommu->domid_bitmap, nr_dom, i + 1);
}
if ( !d->is_dying )
@@ -88,61 +88,52 @@ static int domain_iommu_domid(struct dom
#define DID_FIELD_WIDTH 16
#define DID_HIGH_OFFSET 8
static int context_set_domain_id(struct context_entry *context,
- struct domain *d,
+ const struct domain *d,
struct vtd_iommu *iommu)
{
- unsigned long nr_dom, i;
- int found = 0;
+ unsigned int nr_dom, i;
ASSERT(spin_is_locked(&iommu->lock));
nr_dom = cap_ndoms(iommu->cap);
i = find_first_bit(iommu->domid_bitmap, nr_dom);
- while ( i < nr_dom )
- {
- if ( iommu->domid_map[i] == d->domain_id )
- {
- found = 1;
- break;
- }
- i = find_next_bit(iommu->domid_bitmap, nr_dom, i+1);
- }
+ while ( i < nr_dom && iommu->domid_map[i] != d->domain_id )
+ i = find_next_bit(iommu->domid_bitmap, nr_dom, i + 1);
- if ( found == 0 )
+ if ( i >= nr_dom )
{
i = find_first_zero_bit(iommu->domid_bitmap, nr_dom);
if ( i >= nr_dom )
{
dprintk(XENLOG_ERR VTDPREFIX, "IOMMU: no free domain ids\n");
- return -EFAULT;
+ return -EBUSY;
}
iommu->domid_map[i] = d->domain_id;
+ set_bit(i, iommu->domid_bitmap);
}
- set_bit(i, iommu->domid_bitmap);
context->hi |= (i & ((1 << DID_FIELD_WIDTH) - 1)) << DID_HIGH_OFFSET;
return 0;
}
-static int context_get_domain_id(struct context_entry *context,
- struct vtd_iommu *iommu)
+static int context_get_domain_id(const struct context_entry *context,
+ const struct vtd_iommu *iommu)
{
- unsigned long dom_index, nr_dom;
int domid = -1;
- if (iommu && context)
+ if ( iommu && context )
{
- nr_dom = cap_ndoms(iommu->cap);
-
- dom_index = context_domain_id(*context);
+ unsigned int nr_dom = cap_ndoms(iommu->cap);
+ unsigned int dom_index = context_domain_id(*context);
if ( dom_index < nr_dom && iommu->domid_map )
domid = iommu->domid_map[dom_index];
else
dprintk(XENLOG_DEBUG VTDPREFIX,
- "dom_index %lu exceeds nr_dom %lu or iommu has no domid_map\n",
+ "dom_index %u exceeds nr_dom %u or iommu has no domid_map\n",
dom_index, nr_dom);
}
+
return domid;
}
@@ -1302,7 +1293,7 @@ int __init iommu_alloc(struct acpi_drhd_
if ( !iommu->domid_bitmap )
return -ENOMEM;
- iommu->domid_map = xzalloc_array(u16, nr_dom);
+ iommu->domid_map = xzalloc_array(domid_t, nr_dom);
if ( !iommu->domid_map )
return -ENOMEM;
@@ -1477,11 +1468,12 @@ int domain_context_mapping_one(
spin_unlock(&hd->arch.mapping_lock);
}
- if ( context_set_domain_id(context, domain, iommu) )
+ rc = context_set_domain_id(context, domain, iommu);
+ if ( rc )
{
spin_unlock(&iommu->lock);
unmap_vtd_domain_page(context_entries);
- return -EFAULT;
+ return rc;
}
context_set_address_width(*context, level_to_agaw(iommu->nr_pt_levels));
--- a/xen/drivers/passthrough/vtd/iommu.h
+++ b/xen/drivers/passthrough/vtd/iommu.h
@@ -505,7 +505,7 @@ struct vtd_iommu {
struct list_head ats_devices;
unsigned long *domid_bitmap; /* domain id bitmap */
- u16 *domid_map; /* domain id mapping array */
+ domid_t *domid_map; /* domain id mapping array */
uint32_t version;
};
^ permalink raw reply [flat|nested] 24+ messages in thread
* [PATCH 5/6] VT-d: introduce helper to convert DID to domid_t
2021-11-12 9:46 [PATCH 0/6] VT-d: domain ID mapping improvements Jan Beulich
` (3 preceding siblings ...)
2021-11-12 9:49 ` [PATCH 4/6] VT-d: tidy domid map handling Jan Beulich
@ 2021-11-12 9:49 ` Jan Beulich
2021-11-15 5:54 ` Tian, Kevin
2021-11-12 9:50 ` [PATCH 6/6] VT-d: avoid allocating domid_{bit,}map[] when possible Jan Beulich
5 siblings, 1 reply; 24+ messages in thread
From: Jan Beulich @ 2021-11-12 9:49 UTC (permalink / raw)
To: xen-devel; +Cc: Kevin Tian
This is in preparation of adding another "translation" method. Take the
combination of the extra validation both previously open-coded have been
doing: Bounds check and bitmap check. But don't propagate the previous
pointless check of whether ->domid_map[] was actually allocated, as
failure there would lead to overall failure of IOMMU initialization
anyway.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/drivers/passthrough/vtd/extern.h
+++ b/xen/drivers/passthrough/vtd/extern.h
@@ -45,6 +45,8 @@ void disable_intremap(struct vtd_iommu *
int iommu_alloc(struct acpi_drhd_unit *drhd);
void iommu_free(struct acpi_drhd_unit *drhd);
+domid_t did_to_domain_id(const struct vtd_iommu *iommu, unsigned int did);
+
int iommu_flush_iec_global(struct vtd_iommu *iommu);
int iommu_flush_iec_index(struct vtd_iommu *iommu, u8 im, u16 iidx);
void clear_fault_bits(struct vtd_iommu *iommu);
--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -123,15 +123,16 @@ static int context_get_domain_id(const s
if ( iommu && context )
{
- unsigned int nr_dom = cap_ndoms(iommu->cap);
unsigned int dom_index = context_domain_id(*context);
- if ( dom_index < nr_dom && iommu->domid_map )
- domid = iommu->domid_map[dom_index];
- else
+ domid = did_to_domain_id(iommu, dom_index);
+ if ( domid == DOMID_INVALID )
+ {
dprintk(XENLOG_DEBUG VTDPREFIX,
- "dom_index %u exceeds nr_dom %u or iommu has no domid_map\n",
- dom_index, nr_dom);
+ "no domid for did %u (nr_dom %u)\n",
+ dom_index, cap_ndoms(iommu->cap));
+ domid = -1;
+ }
}
return domid;
@@ -193,6 +194,14 @@ static void check_cleanup_domid_map(stru
}
}
+domid_t did_to_domain_id(const struct vtd_iommu *iommu, unsigned int did)
+{
+ if ( did >= cap_ndoms(iommu->cap) || !test_bit(did, iommu->domid_bitmap) )
+ return DOMID_INVALID;
+
+ return iommu->domid_map[did];
+}
+
static void sync_cache(const void *addr, unsigned int size)
{
static unsigned long clflush_size = 0;
--- a/xen/drivers/passthrough/vtd/qinval.c
+++ b/xen/drivers/passthrough/vtd/qinval.c
@@ -229,10 +229,7 @@ static int __must_check dev_invalidate_s
rc = queue_invalidate_wait(iommu, 0, 1, 1, 1);
if ( rc == -ETIMEDOUT )
{
- struct domain *d = NULL;
-
- if ( test_bit(did, iommu->domid_bitmap) )
- d = rcu_lock_domain_by_id(iommu->domid_map[did]);
+ struct domain *d = rcu_lock_domain_by_id(did_to_domain_id(iommu, did));
/*
* In case the domain has been freed or the IOMMU domid bitmap is
^ permalink raw reply [flat|nested] 24+ messages in thread
* [PATCH 6/6] VT-d: avoid allocating domid_{bit,}map[] when possible
2021-11-12 9:46 [PATCH 0/6] VT-d: domain ID mapping improvements Jan Beulich
` (4 preceding siblings ...)
2021-11-12 9:49 ` [PATCH 5/6] VT-d: introduce helper to convert DID to domid_t Jan Beulich
@ 2021-11-12 9:50 ` Jan Beulich
2021-11-15 6:18 ` Tian, Kevin
5 siblings, 1 reply; 24+ messages in thread
From: Jan Beulich @ 2021-11-12 9:50 UTC (permalink / raw)
To: xen-devel; +Cc: Kevin Tian
When an IOMMU implements the full 16 bits worth of DID in context
entries, there's no point going through a memory base translation table.
For IOMMUs not using Caching Mode we can simply use the domain IDs
verbatim, while for Caching Mode we need to avoid DID 0.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
For the case where the memory tables are needed, xvzalloc_array() would
of course be an option to use here as well, despite this being boot time
allocations. Yet the introduction of xvmalloc() et al continues to be
stuck ...
--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -62,11 +62,32 @@ static struct tasklet vtd_fault_tasklet;
static int setup_hwdom_device(u8 devfn, struct pci_dev *);
static void setup_hwdom_rmrr(struct domain *d);
+static bool domid_mapping(const struct vtd_iommu *iommu)
+{
+ return (const void *)iommu->domid_bitmap != (const void *)iommu->domid_map;
+}
+
+static domid_t convert_domid(const struct vtd_iommu *iommu, domid_t domid)
+{
+ /*
+ * While we need to avoid DID 0 for caching-mode IOMMUs, maintain
+ * the property of the transformation being the same in either
+ * direction. By clipping to 16 bits we ensure that the resulting
+ * DID will fit in the respective context entry field.
+ */
+ BUILD_BUG_ON(sizeof(domid_t) > sizeof(uint16_t));
+
+ return !cap_caching_mode(iommu->cap) ? domid : ~domid;
+}
+
static int domain_iommu_domid(const struct domain *d,
const struct vtd_iommu *iommu)
{
unsigned int nr_dom, i;
+ if ( !domid_mapping(iommu) )
+ return convert_domid(iommu, d->domain_id);
+
nr_dom = cap_ndoms(iommu->cap);
i = find_first_bit(iommu->domid_bitmap, nr_dom);
while ( i < nr_dom )
@@ -91,26 +112,32 @@ static int context_set_domain_id(struct
const struct domain *d,
struct vtd_iommu *iommu)
{
- unsigned int nr_dom, i;
+ unsigned int i;
ASSERT(spin_is_locked(&iommu->lock));
- nr_dom = cap_ndoms(iommu->cap);
- i = find_first_bit(iommu->domid_bitmap, nr_dom);
- while ( i < nr_dom && iommu->domid_map[i] != d->domain_id )
- i = find_next_bit(iommu->domid_bitmap, nr_dom, i + 1);
-
- if ( i >= nr_dom )
+ if ( domid_mapping(iommu) )
{
- i = find_first_zero_bit(iommu->domid_bitmap, nr_dom);
+ unsigned int nr_dom = cap_ndoms(iommu->cap);
+
+ i = find_first_bit(iommu->domid_bitmap, nr_dom);
+ while ( i < nr_dom && iommu->domid_map[i] != d->domain_id )
+ i = find_next_bit(iommu->domid_bitmap, nr_dom, i + 1);
+
if ( i >= nr_dom )
{
- dprintk(XENLOG_ERR VTDPREFIX, "IOMMU: no free domain ids\n");
- return -EBUSY;
+ i = find_first_zero_bit(iommu->domid_bitmap, nr_dom);
+ if ( i >= nr_dom )
+ {
+ dprintk(XENLOG_ERR VTDPREFIX, "IOMMU: no free domain id\n");
+ return -EBUSY;
+ }
+ iommu->domid_map[i] = d->domain_id;
+ set_bit(i, iommu->domid_bitmap);
}
- iommu->domid_map[i] = d->domain_id;
- set_bit(i, iommu->domid_bitmap);
}
+ else
+ i = convert_domid(iommu, d->domain_id);
context->hi |= (i & ((1 << DID_FIELD_WIDTH) - 1)) << DID_HIGH_OFFSET;
return 0;
@@ -140,7 +167,12 @@ static int context_get_domain_id(const s
static void cleanup_domid_map(struct domain *domain, struct vtd_iommu *iommu)
{
- int iommu_domid = domain_iommu_domid(domain, iommu);
+ int iommu_domid;
+
+ if ( !domid_mapping(iommu) )
+ return;
+
+ iommu_domid = domain_iommu_domid(domain, iommu);
if ( iommu_domid >= 0 )
{
@@ -196,7 +228,13 @@ static void check_cleanup_domid_map(stru
domid_t did_to_domain_id(const struct vtd_iommu *iommu, unsigned int did)
{
- if ( did >= cap_ndoms(iommu->cap) || !test_bit(did, iommu->domid_bitmap) )
+ if ( did >= min(cap_ndoms(iommu->cap), DOMID_MASK + 1) )
+ return DOMID_INVALID;
+
+ if ( !domid_mapping(iommu) )
+ return convert_domid(iommu, did);
+
+ if ( !test_bit(did, iommu->domid_bitmap) )
return DOMID_INVALID;
return iommu->domid_map[did];
@@ -1296,24 +1334,32 @@ int __init iommu_alloc(struct acpi_drhd_
if ( !ecap_coherent(iommu->ecap) )
vtd_ops.sync_cache = sync_cache;
- /* allocate domain id bitmap */
nr_dom = cap_ndoms(iommu->cap);
- iommu->domid_bitmap = xzalloc_array(unsigned long, BITS_TO_LONGS(nr_dom));
- if ( !iommu->domid_bitmap )
- return -ENOMEM;
- iommu->domid_map = xzalloc_array(domid_t, nr_dom);
- if ( !iommu->domid_map )
- return -ENOMEM;
+ if ( nr_dom <= DOMID_MASK + cap_caching_mode(iommu->cap) )
+ {
+ /* Allocate domain id (bit) maps. */
+ iommu->domid_bitmap = xzalloc_array(unsigned long,
+ BITS_TO_LONGS(nr_dom));
+ iommu->domid_map = xzalloc_array(domid_t, nr_dom);
+ if ( !iommu->domid_bitmap || !iommu->domid_map )
+ return -ENOMEM;
- /*
- * If Caching mode is set, then invalid translations are tagged with
- * domain id 0. Hence reserve bit/slot 0.
- */
- if ( cap_caching_mode(iommu->cap) )
+ /*
+ * If Caching mode is set, then invalid translations are tagged
+ * with domain id 0. Hence reserve bit/slot 0.
+ */
+ if ( cap_caching_mode(iommu->cap) )
+ {
+ iommu->domid_map[0] = DOMID_INVALID;
+ __set_bit(0, iommu->domid_bitmap);
+ }
+ }
+ else
{
- iommu->domid_map[0] = DOMID_INVALID;
- __set_bit(0, iommu->domid_bitmap);
+ /* Don't leave dangling NULL pointers. */
+ iommu->domid_bitmap = ZERO_BLOCK_PTR;
+ iommu->domid_map = ZERO_BLOCK_PTR;
}
return 0;
--- a/xen/drivers/passthrough/vtd/iommu.h
+++ b/xen/drivers/passthrough/vtd/iommu.h
@@ -87,7 +87,7 @@
#define cap_plmr(c) (((c) >> 5) & 1)
#define cap_rwbf(c) (((c) >> 4) & 1)
#define cap_afl(c) (((c) >> 3) & 1)
-#define cap_ndoms(c) (1 << (4 + 2 * ((c) & 0x7)))
+#define cap_ndoms(c) (1U << (4 + 2 * ((c) & 0x7)))
/*
* Extended Capability Register
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH 1/6] VT-d: properly reserve DID 0 for caching mode IOMMUs
2021-11-12 9:47 ` [PATCH 1/6] VT-d: properly reserve DID 0 for caching mode IOMMUs Jan Beulich
@ 2021-11-12 11:23 ` Roger Pau Monné
2021-11-12 12:07 ` Jan Beulich
2021-11-12 12:21 ` Roger Pau Monné
2021-11-15 5:13 ` Tian, Kevin
2 siblings, 1 reply; 24+ messages in thread
From: Roger Pau Monné @ 2021-11-12 11:23 UTC (permalink / raw)
To: Jan Beulich; +Cc: xen-devel, Kevin Tian
On Fri, Nov 12, 2021 at 10:47:59AM +0100, Jan Beulich wrote:
> Merely setting bit 0 in the bitmap is insufficient, as then Dom0 will
> still have DID 0 allocated to it, because of the zero-filling of
> domid_map[]. Set slot 0 to DOMID_INVALID to keep DID 0 from getting
> used.
Shouldn't the whole domid_map be initialized to DOMID_INVALID to
prevent dom0 matching against any unused slot?
Similarly cleanup_domid_map should set the slot to DOMID_INVALID.
Thanks, Roger.
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH 1/6] VT-d: properly reserve DID 0 for caching mode IOMMUs
2021-11-12 11:23 ` Roger Pau Monné
@ 2021-11-12 12:07 ` Jan Beulich
2021-11-12 12:19 ` Roger Pau Monné
0 siblings, 1 reply; 24+ messages in thread
From: Jan Beulich @ 2021-11-12 12:07 UTC (permalink / raw)
To: Roger Pau Monné; +Cc: xen-devel, Kevin Tian
On 12.11.2021 12:23, Roger Pau Monné wrote:
> On Fri, Nov 12, 2021 at 10:47:59AM +0100, Jan Beulich wrote:
>> Merely setting bit 0 in the bitmap is insufficient, as then Dom0 will
>> still have DID 0 allocated to it, because of the zero-filling of
>> domid_map[]. Set slot 0 to DOMID_INVALID to keep DID 0 from getting
>> used.
>
> Shouldn't the whole domid_map be initialized to DOMID_INVALID to
> prevent dom0 matching against any unused slot?
>
> Similarly cleanup_domid_map should set the slot to DOMID_INVALID.
I don't think so, that's the purpose of setting the bit in domid_bitmap.
The problem really was only with setting a bit in that bitmap without
invalidating the corresponding slot.
This said, I can still see value in doing as you suggest, but as a
separate change with a different justification. In fact domid_bitmap is
kind of redundant now anyway; aiui it was the thing that existed first.
Then domid_map[] was simply added, rather than fully replacing the
original bitmap.
Jan
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH 1/6] VT-d: properly reserve DID 0 for caching mode IOMMUs
2021-11-12 12:07 ` Jan Beulich
@ 2021-11-12 12:19 ` Roger Pau Monné
2021-11-15 5:13 ` Tian, Kevin
0 siblings, 1 reply; 24+ messages in thread
From: Roger Pau Monné @ 2021-11-12 12:19 UTC (permalink / raw)
To: Jan Beulich; +Cc: xen-devel, Kevin Tian
On Fri, Nov 12, 2021 at 01:07:33PM +0100, Jan Beulich wrote:
> On 12.11.2021 12:23, Roger Pau Monné wrote:
> > On Fri, Nov 12, 2021 at 10:47:59AM +0100, Jan Beulich wrote:
> >> Merely setting bit 0 in the bitmap is insufficient, as then Dom0 will
> >> still have DID 0 allocated to it, because of the zero-filling of
> >> domid_map[]. Set slot 0 to DOMID_INVALID to keep DID 0 from getting
> >> used.
> >
> > Shouldn't the whole domid_map be initialized to DOMID_INVALID to
> > prevent dom0 matching against any unused slot?
> >
> > Similarly cleanup_domid_map should set the slot to DOMID_INVALID.
>
> I don't think so, that's the purpose of setting the bit in domid_bitmap.
> The problem really was only with setting a bit in that bitmap without
> invalidating the corresponding slot.
>
> This said, I can still see value in doing as you suggest, but as a
> separate change with a different justification. In fact domid_bitmap is
> kind of redundant now anyway; aiui it was the thing that existed first.
> Then domid_map[] was simply added, rather than fully replacing the
> original bitmap.
I guess using domid_bitmap to find a free slot is faster than scanning
the array of iommu IDs to domids. Not sure how performance critical
that search is, so maybe it's fine to just drop domid_bitmap and rely
exclusively on the array.
Thanks, Roger.
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH 1/6] VT-d: properly reserve DID 0 for caching mode IOMMUs
2021-11-12 9:47 ` [PATCH 1/6] VT-d: properly reserve DID 0 for caching mode IOMMUs Jan Beulich
2021-11-12 11:23 ` Roger Pau Monné
@ 2021-11-12 12:21 ` Roger Pau Monné
2021-11-15 5:13 ` Tian, Kevin
2 siblings, 0 replies; 24+ messages in thread
From: Roger Pau Monné @ 2021-11-12 12:21 UTC (permalink / raw)
To: Jan Beulich; +Cc: xen-devel, Kevin Tian
On Fri, Nov 12, 2021 at 10:47:59AM +0100, Jan Beulich wrote:
> Merely setting bit 0 in the bitmap is insufficient, as then Dom0 will
> still have DID 0 allocated to it, because of the zero-filling of
> domid_map[]. Set slot 0 to DOMID_INVALID to keep DID 0 from getting
> used.
>
> Fixes: b9c20c78789f ("VT-d: per-iommu domain-id")
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>
Thanks, Roger.
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH 2/6] VT-d: split domid map cleanup check into a function
2021-11-12 9:48 ` [PATCH 2/6] VT-d: split domid map cleanup check into a function Jan Beulich
@ 2021-11-12 12:31 ` Roger Pau Monné
2021-11-15 5:16 ` Tian, Kevin
1 sibling, 0 replies; 24+ messages in thread
From: Roger Pau Monné @ 2021-11-12 12:31 UTC (permalink / raw)
To: Jan Beulich; +Cc: xen-devel, Kevin Tian
On Fri, Nov 12, 2021 at 10:48:19AM +0100, Jan Beulich wrote:
> This logic will want invoking from elsewhere.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>
Might be worth to explicitly note this is a non functional change.
Thanks, Roger.
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH 3/6] VT-d: don't leak domid mapping on error path
2021-11-12 9:48 ` [PATCH 3/6] VT-d: don't leak domid mapping on error path Jan Beulich
@ 2021-11-12 13:42 ` Roger Pau Monné
2021-11-12 13:45 ` Jan Beulich
2021-11-15 5:21 ` Tian, Kevin
1 sibling, 1 reply; 24+ messages in thread
From: Roger Pau Monné @ 2021-11-12 13:42 UTC (permalink / raw)
To: Jan Beulich; +Cc: xen-devel, Kevin Tian
On Fri, Nov 12, 2021 at 10:48:43AM +0100, Jan Beulich wrote:
> While domain_context_mapping() invokes domain_context_unmap() in a sub-
> case of handling DEV_TYPE_PCI when encountering an error, thus avoiding
> a leak, individual calls to domain_context_mapping_one() aren't
> similarly covered. Such a leak might persist until domain destruction.
> Leverage that these cases can be recognized by pdev being non-NULL.
Would it help to place the domid cleanup in domain_context_unmap_one,
as that would then cover calls from domain_context_unmap and the
failure path in domain_context_mapping_one.
Thanks, Roger.
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH 3/6] VT-d: don't leak domid mapping on error path
2021-11-12 13:42 ` Roger Pau Monné
@ 2021-11-12 13:45 ` Jan Beulich
2021-11-12 14:35 ` Roger Pau Monné
0 siblings, 1 reply; 24+ messages in thread
From: Jan Beulich @ 2021-11-12 13:45 UTC (permalink / raw)
To: Roger Pau Monné; +Cc: xen-devel, Kevin Tian
On 12.11.2021 14:42, Roger Pau Monné wrote:
> On Fri, Nov 12, 2021 at 10:48:43AM +0100, Jan Beulich wrote:
>> While domain_context_mapping() invokes domain_context_unmap() in a sub-
>> case of handling DEV_TYPE_PCI when encountering an error, thus avoiding
>> a leak, individual calls to domain_context_mapping_one() aren't
>> similarly covered. Such a leak might persist until domain destruction.
>> Leverage that these cases can be recognized by pdev being non-NULL.
>
> Would it help to place the domid cleanup in domain_context_unmap_one,
> as that would then cover calls from domain_context_unmap and the
> failure path in domain_context_mapping_one.
I don't think that would work (without further convolution), because of
the up to 3 successive calls in DEV_TYPE_PCI handling. Cleanup may happen
only on the first map's error path or after the last unmap.
Jan
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH 3/6] VT-d: don't leak domid mapping on error path
2021-11-12 13:45 ` Jan Beulich
@ 2021-11-12 14:35 ` Roger Pau Monné
2021-11-15 9:32 ` Jan Beulich
0 siblings, 1 reply; 24+ messages in thread
From: Roger Pau Monné @ 2021-11-12 14:35 UTC (permalink / raw)
To: Jan Beulich; +Cc: xen-devel, Kevin Tian
On Fri, Nov 12, 2021 at 02:45:14PM +0100, Jan Beulich wrote:
> On 12.11.2021 14:42, Roger Pau Monné wrote:
> > On Fri, Nov 12, 2021 at 10:48:43AM +0100, Jan Beulich wrote:
> >> While domain_context_mapping() invokes domain_context_unmap() in a sub-
> >> case of handling DEV_TYPE_PCI when encountering an error, thus avoiding
> >> a leak, individual calls to domain_context_mapping_one() aren't
> >> similarly covered. Such a leak might persist until domain destruction.
> >> Leverage that these cases can be recognized by pdev being non-NULL.
> >
> > Would it help to place the domid cleanup in domain_context_unmap_one,
> > as that would then cover calls from domain_context_unmap and the
> > failure path in domain_context_mapping_one.
>
> I don't think that would work (without further convolution), because of
> the up to 3 successive calls in DEV_TYPE_PCI handling. Cleanup may happen
> only on the first map's error path or after the last unmap.
Hm, I see. And AFAICT that's because some devices that get assigned to
a guest iommu context are not actually assigned to the guest (ie:
pdev->domain doesn't get set, neither the device is added to the
per-domain list), which makes them invisible to
any_pdev_behind_iommu.
I dislike that the domid is added in domain_context_mapping_one, while
the cleanup is not done in domain_context_unmap_one, and that some
devices context could be using the domain id without being assigned to
the domain.
Thanks, Roger.
^ permalink raw reply [flat|nested] 24+ messages in thread
* RE: [PATCH 1/6] VT-d: properly reserve DID 0 for caching mode IOMMUs
2021-11-12 12:19 ` Roger Pau Monné
@ 2021-11-15 5:13 ` Tian, Kevin
0 siblings, 0 replies; 24+ messages in thread
From: Tian, Kevin @ 2021-11-15 5:13 UTC (permalink / raw)
To: Roger Pau Monné, Jan Beulich; +Cc: xen-devel
> From: Roger Pau Monné <roger.pau@citrix.com>
> Sent: Friday, November 12, 2021 8:19 PM
>
> On Fri, Nov 12, 2021 at 01:07:33PM +0100, Jan Beulich wrote:
> > On 12.11.2021 12:23, Roger Pau Monné wrote:
> > > On Fri, Nov 12, 2021 at 10:47:59AM +0100, Jan Beulich wrote:
> > >> Merely setting bit 0 in the bitmap is insufficient, as then Dom0 will
> > >> still have DID 0 allocated to it, because of the zero-filling of
> > >> domid_map[]. Set slot 0 to DOMID_INVALID to keep DID 0 from getting
> > >> used.
> > >
> > > Shouldn't the whole domid_map be initialized to DOMID_INVALID to
> > > prevent dom0 matching against any unused slot?
> > >
> > > Similarly cleanup_domid_map should set the slot to DOMID_INVALID.
> >
> > I don't think so, that's the purpose of setting the bit in domid_bitmap.
> > The problem really was only with setting a bit in that bitmap without
> > invalidating the corresponding slot.
> >
> > This said, I can still see value in doing as you suggest, but as a
> > separate change with a different justification. In fact domid_bitmap is
> > kind of redundant now anyway; aiui it was the thing that existed first.
> > Then domid_map[] was simply added, rather than fully replacing the
> > original bitmap.
>
> I guess using domid_bitmap to find a free slot is faster than scanning
> the array of iommu IDs to domids. Not sure how performance critical
> that search is, so maybe it's fine to just drop domid_bitmap and rely
> exclusively on the array.
>
I'm fine to drop domid_bitmap. I don't think domain creation
is in hot path...
^ permalink raw reply [flat|nested] 24+ messages in thread
* RE: [PATCH 1/6] VT-d: properly reserve DID 0 for caching mode IOMMUs
2021-11-12 9:47 ` [PATCH 1/6] VT-d: properly reserve DID 0 for caching mode IOMMUs Jan Beulich
2021-11-12 11:23 ` Roger Pau Monné
2021-11-12 12:21 ` Roger Pau Monné
@ 2021-11-15 5:13 ` Tian, Kevin
2 siblings, 0 replies; 24+ messages in thread
From: Tian, Kevin @ 2021-11-15 5:13 UTC (permalink / raw)
To: Jan Beulich, xen-devel
> From: Jan Beulich <jbeulich@suse.com>
> Sent: Friday, November 12, 2021 5:48 PM
>
> Merely setting bit 0 in the bitmap is insufficient, as then Dom0 will
> still have DID 0 allocated to it, because of the zero-filling of
> domid_map[]. Set slot 0 to DOMID_INVALID to keep DID 0 from getting
> used.
>
> Fixes: b9c20c78789f ("VT-d: per-iommu domain-id")
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
>
> --- a/xen/drivers/passthrough/vtd/iommu.c
> +++ b/xen/drivers/passthrough/vtd/iommu.c
> @@ -1257,16 +1257,19 @@ int __init iommu_alloc(struct acpi_drhd_
> if ( !iommu->domid_bitmap )
> return -ENOMEM;
>
> + iommu->domid_map = xzalloc_array(u16, nr_dom);
> + if ( !iommu->domid_map )
> + return -ENOMEM;
> +
> /*
> - * if Caching mode is set, then invalid translations are tagged with
> - * domain id 0, Hence reserve bit 0 for it
> + * If Caching mode is set, then invalid translations are tagged with
> + * domain id 0. Hence reserve bit/slot 0.
> */
> if ( cap_caching_mode(iommu->cap) )
> + {
> + iommu->domid_map[0] = DOMID_INVALID;
> __set_bit(0, iommu->domid_bitmap);
> -
> - iommu->domid_map = xzalloc_array(u16, nr_dom);
> - if ( !iommu->domid_map )
> - return -ENOMEM;
> + }
>
> return 0;
> }
^ permalink raw reply [flat|nested] 24+ messages in thread
* RE: [PATCH 2/6] VT-d: split domid map cleanup check into a function
2021-11-12 9:48 ` [PATCH 2/6] VT-d: split domid map cleanup check into a function Jan Beulich
2021-11-12 12:31 ` Roger Pau Monné
@ 2021-11-15 5:16 ` Tian, Kevin
1 sibling, 0 replies; 24+ messages in thread
From: Tian, Kevin @ 2021-11-15 5:16 UTC (permalink / raw)
To: Jan Beulich, xen-devel
> From: Jan Beulich <jbeulich@suse.com>
> Sent: Friday, November 12, 2021 5:48 PM
>
> This logic will want invoking from elsewhere.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
>
> --- a/xen/drivers/passthrough/vtd/iommu.c
> +++ b/xen/drivers/passthrough/vtd/iommu.c
> @@ -157,6 +157,51 @@ static void cleanup_domid_map(struct dom
> }
> }
>
> +static bool any_pdev_behind_iommu(const struct domain *d,
> + const struct pci_dev *exclude,
> + const struct vtd_iommu *iommu)
> +{
> + const struct pci_dev *pdev;
> +
> + for_each_pdev ( d, pdev )
> + {
> + const struct acpi_drhd_unit *drhd;
> +
> + if ( pdev == exclude )
> + continue;
> +
> + drhd = acpi_find_matched_drhd_unit(pdev);
> + if ( drhd && drhd->iommu == iommu )
> + return true;
> + }
> +
> + return false;
> +}
> +
> +/*
> + * If no other devices under the same iommu owned by this domain,
> + * clear iommu in iommu_bitmap and clear domain_id in domid_bitmap.
> + */
> +static void check_cleanup_domid_map(struct domain *d,
> + const struct pci_dev *exclude,
> + struct vtd_iommu *iommu)
> +{
> + bool found = any_pdev_behind_iommu(d, exclude, iommu);
> +
> + /*
> + * Hidden devices are associated with DomXEN but usable by the
> hardware
> + * domain. Hence they need considering here as well.
> + */
> + if ( !found && is_hardware_domain(d) )
> + found = any_pdev_behind_iommu(dom_xen, exclude, iommu);
> +
> + if ( !found )
> + {
> + clear_bit(iommu->index, dom_iommu(d)->arch.vtd.iommu_bitmap);
> + cleanup_domid_map(d, iommu);
> + }
> +}
> +
> static void sync_cache(const void *addr, unsigned int size)
> {
> static unsigned long clflush_size = 0;
> @@ -1675,27 +1720,6 @@ int domain_context_unmap_one(
> return rc;
> }
>
> -static bool any_pdev_behind_iommu(const struct domain *d,
> - const struct pci_dev *exclude,
> - const struct vtd_iommu *iommu)
> -{
> - const struct pci_dev *pdev;
> -
> - for_each_pdev ( d, pdev )
> - {
> - const struct acpi_drhd_unit *drhd;
> -
> - if ( pdev == exclude )
> - continue;
> -
> - drhd = acpi_find_matched_drhd_unit(pdev);
> - if ( drhd && drhd->iommu == iommu )
> - return true;
> - }
> -
> - return false;
> -}
> -
> static int domain_context_unmap(struct domain *domain, u8 devfn,
> struct pci_dev *pdev)
> {
> @@ -1704,7 +1728,6 @@ static int domain_context_unmap(struct d
> int ret;
> uint16_t seg = pdev->seg;
> uint8_t bus = pdev->bus, tmp_bus, tmp_devfn, secbus;
> - bool found;
>
> switch ( pdev->type )
> {
> @@ -1780,28 +1803,10 @@ static int domain_context_unmap(struct d
> return -EINVAL;
> }
>
> - if ( ret || QUARANTINE_SKIP(domain) || pdev->devfn != devfn )
> - return ret;
> + if ( !ret && !QUARANTINE_SKIP(domain) && pdev->devfn == devfn )
> + check_cleanup_domid_map(domain, pdev, iommu);
>
> - /*
> - * If no other devices under the same iommu owned by this domain,
> - * clear iommu in iommu_bitmap and clear domain_id in domid_bitmap.
> - */
> - found = any_pdev_behind_iommu(domain, pdev, iommu);
> - /*
> - * Hidden devices are associated with DomXEN but usable by the
> hardware
> - * domain. Hence they need considering here as well.
> - */
> - if ( !found && is_hardware_domain(domain) )
> - found = any_pdev_behind_iommu(dom_xen, pdev, iommu);
> -
> - if ( !found )
> - {
> - clear_bit(iommu->index, dom_iommu(domain)-
> >arch.vtd.iommu_bitmap);
> - cleanup_domid_map(domain, iommu);
> - }
> -
> - return 0;
> + return ret;
> }
>
> static void iommu_clear_root_pgtable(struct domain *d)
^ permalink raw reply [flat|nested] 24+ messages in thread
* RE: [PATCH 3/6] VT-d: don't leak domid mapping on error path
2021-11-12 9:48 ` [PATCH 3/6] VT-d: don't leak domid mapping on error path Jan Beulich
2021-11-12 13:42 ` Roger Pau Monné
@ 2021-11-15 5:21 ` Tian, Kevin
1 sibling, 0 replies; 24+ messages in thread
From: Tian, Kevin @ 2021-11-15 5:21 UTC (permalink / raw)
To: Jan Beulich, xen-devel
> From: Jan Beulich <jbeulich@suse.com>
> Sent: Friday, November 12, 2021 5:49 PM
>
> While domain_context_mapping() invokes domain_context_unmap() in a
> sub-
> case of handling DEV_TYPE_PCI when encountering an error, thus avoiding
> a leak, individual calls to domain_context_mapping_one() aren't
> similarly covered. Such a leak might persist until domain destruction.
> Leverage that these cases can be recognized by pdev being non-NULL.
>
> Fixes: dec403cc668f ("VT-d: fix iommu_domid for PCI/PCIx devices
> assignment")
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
> ---
> The Fixes: tag isn't strictly correct, as error handling had more severe
> shortcomings at the time. But I wouldn't want to blame a commit
> improving error handling to have introduced the leak.
>
> --- a/xen/drivers/passthrough/vtd/iommu.c
> +++ b/xen/drivers/passthrough/vtd/iommu.c
> @@ -1518,7 +1518,12 @@ int domain_context_mapping_one(
> rc = me_wifi_quirk(domain, bus, devfn, MAP_ME_PHANTOM_FUNC);
>
> if ( rc )
> - domain_context_unmap_one(domain, iommu, bus, devfn);
> + {
> + ret = domain_context_unmap_one(domain, iommu, bus, devfn);
> +
> + if ( !ret && pdev && pdev->devfn == devfn )
> + check_cleanup_domid_map(domain, pdev, iommu);
> + }
>
> return rc;
> }
^ permalink raw reply [flat|nested] 24+ messages in thread
* RE: [PATCH 4/6] VT-d: tidy domid map handling
2021-11-12 9:49 ` [PATCH 4/6] VT-d: tidy domid map handling Jan Beulich
@ 2021-11-15 5:51 ` Tian, Kevin
0 siblings, 0 replies; 24+ messages in thread
From: Tian, Kevin @ 2021-11-15 5:51 UTC (permalink / raw)
To: Jan Beulich, xen-devel
> From: Jan Beulich <jbeulich@suse.com>
> Sent: Friday, November 12, 2021 5:49 PM
>
> - Correct struct field type.
> - Use unsigned int when that suffices.
> - Eliminate a (badly typed) local variable from
> context_set_domain_id().
> - Don't use -EFAULT inappropriately.
> - Move set_bit() such that it won't be done redundantly.
> - Constification.
> - Reduce scope of some variables.
> - Coding style.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
>
> --- a/xen/drivers/passthrough/vtd/iommu.c
> +++ b/xen/drivers/passthrough/vtd/iommu.c
> @@ -62,10 +62,10 @@ static struct tasklet vtd_fault_tasklet;
> static int setup_hwdom_device(u8 devfn, struct pci_dev *);
> static void setup_hwdom_rmrr(struct domain *d);
>
> -static int domain_iommu_domid(struct domain *d,
> - struct vtd_iommu *iommu)
> +static int domain_iommu_domid(const struct domain *d,
> + const struct vtd_iommu *iommu)
> {
> - unsigned long nr_dom, i;
> + unsigned int nr_dom, i;
>
> nr_dom = cap_ndoms(iommu->cap);
> i = find_first_bit(iommu->domid_bitmap, nr_dom);
> @@ -74,7 +74,7 @@ static int domain_iommu_domid(struct dom
> if ( iommu->domid_map[i] == d->domain_id )
> return i;
>
> - i = find_next_bit(iommu->domid_bitmap, nr_dom, i+1);
> + i = find_next_bit(iommu->domid_bitmap, nr_dom, i + 1);
> }
>
> if ( !d->is_dying )
> @@ -88,61 +88,52 @@ static int domain_iommu_domid(struct dom
> #define DID_FIELD_WIDTH 16
> #define DID_HIGH_OFFSET 8
> static int context_set_domain_id(struct context_entry *context,
> - struct domain *d,
> + const struct domain *d,
> struct vtd_iommu *iommu)
> {
> - unsigned long nr_dom, i;
> - int found = 0;
> + unsigned int nr_dom, i;
>
> ASSERT(spin_is_locked(&iommu->lock));
>
> nr_dom = cap_ndoms(iommu->cap);
> i = find_first_bit(iommu->domid_bitmap, nr_dom);
> - while ( i < nr_dom )
> - {
> - if ( iommu->domid_map[i] == d->domain_id )
> - {
> - found = 1;
> - break;
> - }
> - i = find_next_bit(iommu->domid_bitmap, nr_dom, i+1);
> - }
> + while ( i < nr_dom && iommu->domid_map[i] != d->domain_id )
> + i = find_next_bit(iommu->domid_bitmap, nr_dom, i + 1);
>
> - if ( found == 0 )
> + if ( i >= nr_dom )
> {
> i = find_first_zero_bit(iommu->domid_bitmap, nr_dom);
> if ( i >= nr_dom )
> {
> dprintk(XENLOG_ERR VTDPREFIX, "IOMMU: no free domain ids\n");
> - return -EFAULT;
> + return -EBUSY;
> }
> iommu->domid_map[i] = d->domain_id;
> + set_bit(i, iommu->domid_bitmap);
> }
>
> - set_bit(i, iommu->domid_bitmap);
> context->hi |= (i & ((1 << DID_FIELD_WIDTH) - 1)) << DID_HIGH_OFFSET;
> return 0;
> }
>
> -static int context_get_domain_id(struct context_entry *context,
> - struct vtd_iommu *iommu)
> +static int context_get_domain_id(const struct context_entry *context,
> + const struct vtd_iommu *iommu)
> {
> - unsigned long dom_index, nr_dom;
> int domid = -1;
>
> - if (iommu && context)
> + if ( iommu && context )
> {
> - nr_dom = cap_ndoms(iommu->cap);
> -
> - dom_index = context_domain_id(*context);
> + unsigned int nr_dom = cap_ndoms(iommu->cap);
> + unsigned int dom_index = context_domain_id(*context);
>
> if ( dom_index < nr_dom && iommu->domid_map )
> domid = iommu->domid_map[dom_index];
> else
> dprintk(XENLOG_DEBUG VTDPREFIX,
> - "dom_index %lu exceeds nr_dom %lu or iommu has no
> domid_map\n",
> + "dom_index %u exceeds nr_dom %u or iommu has no
> domid_map\n",
> dom_index, nr_dom);
> }
> +
> return domid;
> }
>
> @@ -1302,7 +1293,7 @@ int __init iommu_alloc(struct acpi_drhd_
> if ( !iommu->domid_bitmap )
> return -ENOMEM;
>
> - iommu->domid_map = xzalloc_array(u16, nr_dom);
> + iommu->domid_map = xzalloc_array(domid_t, nr_dom);
> if ( !iommu->domid_map )
> return -ENOMEM;
>
> @@ -1477,11 +1468,12 @@ int domain_context_mapping_one(
> spin_unlock(&hd->arch.mapping_lock);
> }
>
> - if ( context_set_domain_id(context, domain, iommu) )
> + rc = context_set_domain_id(context, domain, iommu);
> + if ( rc )
> {
> spin_unlock(&iommu->lock);
> unmap_vtd_domain_page(context_entries);
> - return -EFAULT;
> + return rc;
> }
>
> context_set_address_width(*context, level_to_agaw(iommu-
> >nr_pt_levels));
> --- a/xen/drivers/passthrough/vtd/iommu.h
> +++ b/xen/drivers/passthrough/vtd/iommu.h
> @@ -505,7 +505,7 @@ struct vtd_iommu {
>
> struct list_head ats_devices;
> unsigned long *domid_bitmap; /* domain id bitmap */
> - u16 *domid_map; /* domain id mapping array */
> + domid_t *domid_map; /* domain id mapping array */
> uint32_t version;
> };
>
^ permalink raw reply [flat|nested] 24+ messages in thread
* RE: [PATCH 5/6] VT-d: introduce helper to convert DID to domid_t
2021-11-12 9:49 ` [PATCH 5/6] VT-d: introduce helper to convert DID to domid_t Jan Beulich
@ 2021-11-15 5:54 ` Tian, Kevin
0 siblings, 0 replies; 24+ messages in thread
From: Tian, Kevin @ 2021-11-15 5:54 UTC (permalink / raw)
To: Jan Beulich, xen-devel
> From: Jan Beulich <jbeulich@suse.com>
> Sent: Friday, November 12, 2021 5:50 PM
>
> This is in preparation of adding another "translation" method. Take the
> combination of the extra validation both previously open-coded have been
> doing: Bounds check and bitmap check. But don't propagate the previous
> pointless check of whether ->domid_map[] was actually allocated, as
> failure there would lead to overall failure of IOMMU initialization
> anyway.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
>
> --- a/xen/drivers/passthrough/vtd/extern.h
> +++ b/xen/drivers/passthrough/vtd/extern.h
> @@ -45,6 +45,8 @@ void disable_intremap(struct vtd_iommu *
> int iommu_alloc(struct acpi_drhd_unit *drhd);
> void iommu_free(struct acpi_drhd_unit *drhd);
>
> +domid_t did_to_domain_id(const struct vtd_iommu *iommu, unsigned int
> did);
> +
> int iommu_flush_iec_global(struct vtd_iommu *iommu);
> int iommu_flush_iec_index(struct vtd_iommu *iommu, u8 im, u16 iidx);
> void clear_fault_bits(struct vtd_iommu *iommu);
> --- a/xen/drivers/passthrough/vtd/iommu.c
> +++ b/xen/drivers/passthrough/vtd/iommu.c
> @@ -123,15 +123,16 @@ static int context_get_domain_id(const s
>
> if ( iommu && context )
> {
> - unsigned int nr_dom = cap_ndoms(iommu->cap);
> unsigned int dom_index = context_domain_id(*context);
>
> - if ( dom_index < nr_dom && iommu->domid_map )
> - domid = iommu->domid_map[dom_index];
> - else
> + domid = did_to_domain_id(iommu, dom_index);
> + if ( domid == DOMID_INVALID )
> + {
> dprintk(XENLOG_DEBUG VTDPREFIX,
> - "dom_index %u exceeds nr_dom %u or iommu has no
> domid_map\n",
> - dom_index, nr_dom);
> + "no domid for did %u (nr_dom %u)\n",
> + dom_index, cap_ndoms(iommu->cap));
> + domid = -1;
> + }
> }
>
> return domid;
> @@ -193,6 +194,14 @@ static void check_cleanup_domid_map(stru
> }
> }
>
> +domid_t did_to_domain_id(const struct vtd_iommu *iommu, unsigned int
> did)
> +{
> + if ( did >= cap_ndoms(iommu->cap) || !test_bit(did, iommu-
> >domid_bitmap) )
> + return DOMID_INVALID;
> +
> + return iommu->domid_map[did];
> +}
> +
> static void sync_cache(const void *addr, unsigned int size)
> {
> static unsigned long clflush_size = 0;
> --- a/xen/drivers/passthrough/vtd/qinval.c
> +++ b/xen/drivers/passthrough/vtd/qinval.c
> @@ -229,10 +229,7 @@ static int __must_check dev_invalidate_s
> rc = queue_invalidate_wait(iommu, 0, 1, 1, 1);
> if ( rc == -ETIMEDOUT )
> {
> - struct domain *d = NULL;
> -
> - if ( test_bit(did, iommu->domid_bitmap) )
> - d = rcu_lock_domain_by_id(iommu->domid_map[did]);
> + struct domain *d = rcu_lock_domain_by_id(did_to_domain_id(iommu,
> did));
>
> /*
> * In case the domain has been freed or the IOMMU domid bitmap is
^ permalink raw reply [flat|nested] 24+ messages in thread
* RE: [PATCH 6/6] VT-d: avoid allocating domid_{bit,}map[] when possible
2021-11-12 9:50 ` [PATCH 6/6] VT-d: avoid allocating domid_{bit,}map[] when possible Jan Beulich
@ 2021-11-15 6:18 ` Tian, Kevin
2021-11-15 9:37 ` Jan Beulich
0 siblings, 1 reply; 24+ messages in thread
From: Tian, Kevin @ 2021-11-15 6:18 UTC (permalink / raw)
To: Jan Beulich, xen-devel
> From: Jan Beulich <jbeulich@suse.com>
> Sent: Friday, November 12, 2021 5:50 PM
>
> When an IOMMU implements the full 16 bits worth of DID in context
> entries, there's no point going through a memory base translation table.
> For IOMMUs not using Caching Mode we can simply use the domain IDs
> verbatim, while for Caching Mode we need to avoid DID 0.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> ---
> For the case where the memory tables are needed, xvzalloc_array() would
> of course be an option to use here as well, despite this being boot time
> allocations. Yet the introduction of xvmalloc() et al continues to be
> stuck ...
>
> --- a/xen/drivers/passthrough/vtd/iommu.c
> +++ b/xen/drivers/passthrough/vtd/iommu.c
> @@ -62,11 +62,32 @@ static struct tasklet vtd_fault_tasklet;
> static int setup_hwdom_device(u8 devfn, struct pci_dev *);
> static void setup_hwdom_rmrr(struct domain *d);
>
> +static bool domid_mapping(const struct vtd_iommu *iommu)
> +{
> + return (const void *)iommu->domid_bitmap != (const void *)iommu-
> >domid_map;
> +}
> +
> +static domid_t convert_domid(const struct vtd_iommu *iommu, domid_t
> domid)
> +{
> + /*
> + * While we need to avoid DID 0 for caching-mode IOMMUs, maintain
> + * the property of the transformation being the same in either
> + * direction. By clipping to 16 bits we ensure that the resulting
> + * DID will fit in the respective context entry field.
> + */
> + BUILD_BUG_ON(sizeof(domid_t) > sizeof(uint16_t));
> +
> + return !cap_caching_mode(iommu->cap) ? domid : ~domid;
If DOMID_MASK grows to 0xFFFF (though unlikely), then it translates
to '0' when caching mode is true. We need extend BUILD_BUG_ON() to
check DOMID_MASK in this case, since caching mode implies
total_size minus one for available domain IDs
Thanks
Kevin
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH 3/6] VT-d: don't leak domid mapping on error path
2021-11-12 14:35 ` Roger Pau Monné
@ 2021-11-15 9:32 ` Jan Beulich
0 siblings, 0 replies; 24+ messages in thread
From: Jan Beulich @ 2021-11-15 9:32 UTC (permalink / raw)
To: Roger Pau Monné; +Cc: xen-devel, Kevin Tian
On 12.11.2021 15:35, Roger Pau Monné wrote:
> On Fri, Nov 12, 2021 at 02:45:14PM +0100, Jan Beulich wrote:
>> On 12.11.2021 14:42, Roger Pau Monné wrote:
>>> On Fri, Nov 12, 2021 at 10:48:43AM +0100, Jan Beulich wrote:
>>>> While domain_context_mapping() invokes domain_context_unmap() in a sub-
>>>> case of handling DEV_TYPE_PCI when encountering an error, thus avoiding
>>>> a leak, individual calls to domain_context_mapping_one() aren't
>>>> similarly covered. Such a leak might persist until domain destruction.
>>>> Leverage that these cases can be recognized by pdev being non-NULL.
>>>
>>> Would it help to place the domid cleanup in domain_context_unmap_one,
>>> as that would then cover calls from domain_context_unmap and the
>>> failure path in domain_context_mapping_one.
>>
>> I don't think that would work (without further convolution), because of
>> the up to 3 successive calls in DEV_TYPE_PCI handling. Cleanup may happen
>> only on the first map's error path or after the last unmap.
>
> Hm, I see. And AFAICT that's because some devices that get assigned to
> a guest iommu context are not actually assigned to the guest (ie:
> pdev->domain doesn't get set, neither the device is added to the
> per-domain list), which makes them invisible to
> any_pdev_behind_iommu.
>
> I dislike that the domid is added in domain_context_mapping_one, while
> the cleanup is not done in domain_context_unmap_one, and that some
> devices context could be using the domain id without being assigned to
> the domain.
This all isn't really pretty, is it? As Andrew has been saying (I think
more than once), ideally we'd rewrite IOMMU code from scratch. But I
don't see anyone having enough spare time to actually do so ...
Jan
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH 6/6] VT-d: avoid allocating domid_{bit,}map[] when possible
2021-11-15 6:18 ` Tian, Kevin
@ 2021-11-15 9:37 ` Jan Beulich
0 siblings, 0 replies; 24+ messages in thread
From: Jan Beulich @ 2021-11-15 9:37 UTC (permalink / raw)
To: Tian, Kevin; +Cc: xen-devel
On 15.11.2021 07:18, Tian, Kevin wrote:
>> From: Jan Beulich <jbeulich@suse.com>
>> Sent: Friday, November 12, 2021 5:50 PM
>>
>> When an IOMMU implements the full 16 bits worth of DID in context
>> entries, there's no point going through a memory base translation table.
>> For IOMMUs not using Caching Mode we can simply use the domain IDs
>> verbatim, while for Caching Mode we need to avoid DID 0.
>>
>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>> ---
>> For the case where the memory tables are needed, xvzalloc_array() would
>> of course be an option to use here as well, despite this being boot time
>> allocations. Yet the introduction of xvmalloc() et al continues to be
>> stuck ...
>>
>> --- a/xen/drivers/passthrough/vtd/iommu.c
>> +++ b/xen/drivers/passthrough/vtd/iommu.c
>> @@ -62,11 +62,32 @@ static struct tasklet vtd_fault_tasklet;
>> static int setup_hwdom_device(u8 devfn, struct pci_dev *);
>> static void setup_hwdom_rmrr(struct domain *d);
>>
>> +static bool domid_mapping(const struct vtd_iommu *iommu)
>> +{
>> + return (const void *)iommu->domid_bitmap != (const void *)iommu-
>>> domid_map;
>> +}
>> +
>> +static domid_t convert_domid(const struct vtd_iommu *iommu, domid_t
>> domid)
>> +{
>> + /*
>> + * While we need to avoid DID 0 for caching-mode IOMMUs, maintain
>> + * the property of the transformation being the same in either
>> + * direction. By clipping to 16 bits we ensure that the resulting
>> + * DID will fit in the respective context entry field.
>> + */
>> + BUILD_BUG_ON(sizeof(domid_t) > sizeof(uint16_t));
>> +
>> + return !cap_caching_mode(iommu->cap) ? domid : ~domid;
>
> If DOMID_MASK grows to 0xFFFF (though unlikely),
I did consider this too unlikely to warrant taking care of. Now that
you ask for it anyway, ...
> then it translates
> to '0' when caching mode is true. We need extend BUILD_BUG_ON() to
> check DOMID_MASK in this case, since caching mode implies
> total_size minus one for available domain IDs
... I guess I'd rather replace the BUILD_BUG_ON() than extend it or
add a 2nd one.
Jan
^ permalink raw reply [flat|nested] 24+ messages in thread
end of thread, other threads:[~2021-11-15 9:37 UTC | newest]
Thread overview: 24+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-11-12 9:46 [PATCH 0/6] VT-d: domain ID mapping improvements Jan Beulich
2021-11-12 9:47 ` [PATCH 1/6] VT-d: properly reserve DID 0 for caching mode IOMMUs Jan Beulich
2021-11-12 11:23 ` Roger Pau Monné
2021-11-12 12:07 ` Jan Beulich
2021-11-12 12:19 ` Roger Pau Monné
2021-11-15 5:13 ` Tian, Kevin
2021-11-12 12:21 ` Roger Pau Monné
2021-11-15 5:13 ` Tian, Kevin
2021-11-12 9:48 ` [PATCH 2/6] VT-d: split domid map cleanup check into a function Jan Beulich
2021-11-12 12:31 ` Roger Pau Monné
2021-11-15 5:16 ` Tian, Kevin
2021-11-12 9:48 ` [PATCH 3/6] VT-d: don't leak domid mapping on error path Jan Beulich
2021-11-12 13:42 ` Roger Pau Monné
2021-11-12 13:45 ` Jan Beulich
2021-11-12 14:35 ` Roger Pau Monné
2021-11-15 9:32 ` Jan Beulich
2021-11-15 5:21 ` Tian, Kevin
2021-11-12 9:49 ` [PATCH 4/6] VT-d: tidy domid map handling Jan Beulich
2021-11-15 5:51 ` Tian, Kevin
2021-11-12 9:49 ` [PATCH 5/6] VT-d: introduce helper to convert DID to domid_t Jan Beulich
2021-11-15 5:54 ` Tian, Kevin
2021-11-12 9:50 ` [PATCH 6/6] VT-d: avoid allocating domid_{bit,}map[] when possible Jan Beulich
2021-11-15 6:18 ` Tian, Kevin
2021-11-15 9:37 ` Jan Beulich
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).