* [PATCH v3 1/5] iommu/s390: Fix duplicate domain attachments
2022-09-29 15:32 [PATCH v3 0/5] iommu/s390: Fixes related to attach and aperture handling Niklas Schnelle
@ 2022-09-29 15:32 ` Niklas Schnelle
2022-09-29 16:10 ` Jason Gunthorpe
2022-09-29 15:32 ` [PATCH v3 2/5] iommu/s390: Get rid of s390_domain_device Niklas Schnelle
` (3 subsequent siblings)
4 siblings, 1 reply; 12+ messages in thread
From: Niklas Schnelle @ 2022-09-29 15:32 UTC (permalink / raw)
To: Matthew Rosato, Pierre Morel, iommu
Cc: linux-s390, borntraeger, hca, gor, gerald.schaefer, agordeev,
svens, joro, will, robin.murphy, jgg, linux-kernel
Since commit fa7e9ecc5e1c ("iommu/s390: Tolerate repeat attach_dev
calls") we can end up with duplicates in the list of devices attached to
a domain. This is inefficient and confusing since only one domain can
actually be in control of the IOMMU translations for a device. Fix this
by detaching the device from the previous domain, if any, on attach.
Add a WARN_ON() in case we still have attached devices on freeing the
domain. While here remove the re-attach on failure dance as it was
determined to be unlikely to help and may confuse debug and recovery.
Fixes: fa7e9ecc5e1c ("iommu/s390: Tolerate repeat attach_dev calls")
Signed-off-by: Niklas Schnelle <schnelle@linux.ibm.com>
---
Changes since v2:
- Make __s390_iommu_detach_device() return void (Jason)
- Remove superfluous locking when we're freeing anyway (Jason)
- Remove the re-attach on failure dance as it is unlikely to help
and complicates debug and recovery (Jason)
- Ignore attempts to detach from domain that the device is no longer
attached to.
drivers/iommu/s390-iommu.c | 77 +++++++++++++++++---------------------
1 file changed, 34 insertions(+), 43 deletions(-)
diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
index c898bcbbce11..6fcb64e4b5e6 100644
--- a/drivers/iommu/s390-iommu.c
+++ b/drivers/iommu/s390-iommu.c
@@ -79,10 +79,36 @@ static void s390_domain_free(struct iommu_domain *domain)
{
struct s390_domain *s390_domain = to_s390_domain(domain);
+ WARN_ON(!list_empty(&s390_domain->devices));
dma_cleanup_tables(s390_domain->dma_table);
kfree(s390_domain);
}
+static void __s390_iommu_detach_device(struct s390_domain *s390_domain,
+ struct zpci_dev *zdev)
+{
+ struct s390_domain_device *domain_device, *tmp;
+ unsigned long flags;
+
+ if (!zdev || zdev->s390_domain != s390_domain)
+ return;
+
+ spin_lock_irqsave(&s390_domain->list_lock, flags);
+ list_for_each_entry_safe(domain_device, tmp, &s390_domain->devices,
+ list) {
+ if (domain_device->zdev == zdev) {
+ list_del(&domain_device->list);
+ kfree(domain_device);
+ break;
+ }
+ }
+ spin_unlock_irqrestore(&s390_domain->list_lock, flags);
+
+ zpci_unregister_ioat(zdev, 0);
+ zdev->s390_domain = NULL;
+ zdev->dma_table = NULL;
+}
+
static int s390_iommu_attach_device(struct iommu_domain *domain,
struct device *dev)
{
@@ -90,7 +116,7 @@ static int s390_iommu_attach_device(struct iommu_domain *domain,
struct zpci_dev *zdev = to_zpci_dev(dev);
struct s390_domain_device *domain_device;
unsigned long flags;
- int cc, rc;
+ int cc, rc = 0;
if (!zdev)
return -ENODEV;
@@ -99,23 +125,17 @@ static int s390_iommu_attach_device(struct iommu_domain *domain,
if (!domain_device)
return -ENOMEM;
- if (zdev->dma_table && !zdev->s390_domain) {
- cc = zpci_dma_exit_device(zdev);
- if (cc) {
- rc = -EIO;
- goto out_free;
- }
- }
-
if (zdev->s390_domain)
- zpci_unregister_ioat(zdev, 0);
+ __s390_iommu_detach_device(zdev->s390_domain, zdev);
+ else if (zdev->dma_table)
+ zpci_dma_exit_device(zdev);
zdev->dma_table = s390_domain->dma_table;
cc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
virt_to_phys(zdev->dma_table));
if (cc) {
rc = -EIO;
- goto out_restore;
+ goto out_free;
}
spin_lock_irqsave(&s390_domain->list_lock, flags);
@@ -129,7 +149,7 @@ static int s390_iommu_attach_device(struct iommu_domain *domain,
domain->geometry.aperture_end != zdev->end_dma) {
rc = -EINVAL;
spin_unlock_irqrestore(&s390_domain->list_lock, flags);
- goto out_restore;
+ goto out_free;
}
domain_device->zdev = zdev;
zdev->s390_domain = s390_domain;
@@ -138,14 +158,6 @@ static int s390_iommu_attach_device(struct iommu_domain *domain,
return 0;
-out_restore:
- if (!zdev->s390_domain) {
- zpci_dma_init_device(zdev);
- } else {
- zdev->dma_table = zdev->s390_domain->dma_table;
- zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
- virt_to_phys(zdev->dma_table));
- }
out_free:
kfree(domain_device);
@@ -157,30 +169,9 @@ static void s390_iommu_detach_device(struct iommu_domain *domain,
{
struct s390_domain *s390_domain = to_s390_domain(domain);
struct zpci_dev *zdev = to_zpci_dev(dev);
- struct s390_domain_device *domain_device, *tmp;
- unsigned long flags;
- int found = 0;
-
- if (!zdev)
- return;
-
- spin_lock_irqsave(&s390_domain->list_lock, flags);
- list_for_each_entry_safe(domain_device, tmp, &s390_domain->devices,
- list) {
- if (domain_device->zdev == zdev) {
- list_del(&domain_device->list);
- kfree(domain_device);
- found = 1;
- break;
- }
- }
- spin_unlock_irqrestore(&s390_domain->list_lock, flags);
- if (found && (zdev->s390_domain == s390_domain)) {
- zdev->s390_domain = NULL;
- zpci_unregister_ioat(zdev, 0);
- zpci_dma_init_device(zdev);
- }
+ __s390_iommu_detach_device(s390_domain, zdev);
+ zpci_dma_init_device(zdev);
}
static struct iommu_device *s390_iommu_probe_device(struct device *dev)
--
2.34.1
^ permalink raw reply related [flat|nested] 12+ messages in thread
* Re: [PATCH v3 1/5] iommu/s390: Fix duplicate domain attachments
2022-09-29 15:32 ` [PATCH v3 1/5] iommu/s390: Fix duplicate domain attachments Niklas Schnelle
@ 2022-09-29 16:10 ` Jason Gunthorpe
0 siblings, 0 replies; 12+ messages in thread
From: Jason Gunthorpe @ 2022-09-29 16:10 UTC (permalink / raw)
To: Niklas Schnelle
Cc: Matthew Rosato, Pierre Morel, iommu, linux-s390, borntraeger,
hca, gor, gerald.schaefer, agordeev, svens, joro, will,
robin.murphy, linux-kernel
On Thu, Sep 29, 2022 at 05:32:58PM +0200, Niklas Schnelle wrote:
> Since commit fa7e9ecc5e1c ("iommu/s390: Tolerate repeat attach_dev
> calls") we can end up with duplicates in the list of devices attached to
> a domain. This is inefficient and confusing since only one domain can
> actually be in control of the IOMMU translations for a device. Fix this
> by detaching the device from the previous domain, if any, on attach.
> Add a WARN_ON() in case we still have attached devices on freeing the
> domain. While here remove the re-attach on failure dance as it was
> determined to be unlikely to help and may confuse debug and recovery.
>
> Fixes: fa7e9ecc5e1c ("iommu/s390: Tolerate repeat attach_dev calls")
> Signed-off-by: Niklas Schnelle <schnelle@linux.ibm.com>
> ---
> Changes since v2:
> - Make __s390_iommu_detach_device() return void (Jason)
> - Remove superfluous locking when we're freeing anyway (Jason)
> - Remove the re-attach on failure dance as it is unlikely to help
> and complicates debug and recovery (Jason)
> - Ignore attempts to detach from domain that the device is no longer
> attached to.
>
> drivers/iommu/s390-iommu.c | 77 +++++++++++++++++---------------------
> 1 file changed, 34 insertions(+), 43 deletions(-)
>
> diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
> index c898bcbbce11..6fcb64e4b5e6 100644
> --- a/drivers/iommu/s390-iommu.c
> +++ b/drivers/iommu/s390-iommu.c
> @@ -79,10 +79,36 @@ static void s390_domain_free(struct iommu_domain *domain)
> {
> struct s390_domain *s390_domain = to_s390_domain(domain);
>
> + WARN_ON(!list_empty(&s390_domain->devices));
> dma_cleanup_tables(s390_domain->dma_table);
> kfree(s390_domain);
> }
>
> +static void __s390_iommu_detach_device(struct s390_domain *s390_domain,
> + struct zpci_dev *zdev)
> +{
> + struct s390_domain_device *domain_device, *tmp;
> + unsigned long flags;
> +
> + if (!zdev || zdev->s390_domain != s390_domain)
Please drop the s390_domain from this function, it is pointless..
Calling detach_device with a mismatched domain argument is a WARN_ON
offense, the correct recovery is still to remove the domain.
And zdev can already never be null due to the call chain
Also, s390_iommu_release_device() should call this new function since
we don't want to return back to the platform DMA when releasing.
So, like this:
diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
index 2bad24d6cfef59..e8333a9301ec95 100644
--- a/drivers/iommu/s390-iommu.c
+++ b/drivers/iommu/s390-iommu.c
@@ -75,12 +75,12 @@ static void s390_domain_free(struct iommu_domain *domain)
kfree(s390_domain);
}
-static void __s390_iommu_detach_device(struct s390_domain *s390_domain,
- struct zpci_dev *zdev)
+static void __s390_iommu_detach_device(struct zpci_dev *zdev)
{
+ struct s390_domain *s390_domain = zdev->s390_domain;
unsigned long flags;
- if (!zdev || zdev->s390_domain != s390_domain)
+ if (!s390_domain)
return;
spin_lock_irqsave(&s390_domain->list_lock, flags);
@@ -108,7 +108,7 @@ static int s390_iommu_attach_device(struct iommu_domain *domain,
return -EINVAL;
if (zdev->s390_domain)
- __s390_iommu_detach_device(zdev->s390_domain, zdev);
+ __s390_iommu_detach_device(zdev);
else if (zdev->dma_table)
zpci_dma_exit_device(zdev);
@@ -130,10 +130,11 @@ static int s390_iommu_attach_device(struct iommu_domain *domain,
static void s390_iommu_detach_device(struct iommu_domain *domain,
struct device *dev)
{
- struct s390_domain *s390_domain = to_s390_domain(domain);
struct zpci_dev *zdev = to_zpci_dev(dev);
- __s390_iommu_detach_device(s390_domain, zdev);
+ WARN_ON(zdev->s390_domain != to_s390_domain(domain));
+
+ __s390_iommu_detach_device(zdev);
zpci_dma_init_device(zdev);
}
@@ -174,21 +175,11 @@ static void s390_iommu_release_device(struct device *dev)
struct iommu_domain *domain;
/*
- * This is a workaround for a scenario where the IOMMU API common code
- * "forgets" to call the detach_dev callback: After binding a device
- * to vfio-pci and completing the VFIO_SET_IOMMU ioctl (which triggers
- * the attach_dev), removing the device via
- * "echo 1 > /sys/bus/pci/devices/.../remove" won't trigger detach_dev,
- * only release_device will be called via the BUS_NOTIFY_REMOVED_DEVICE
- * notifier.
- *
- * So let's call detach_dev from here if it hasn't been called before.
+ * release_device is expected to detach any domain currently attached
+ * to the device, but keep it attached to other devices in the group.
*/
- if (zdev && zdev->s390_domain) {
- domain = iommu_get_domain_for_dev(dev);
- if (domain)
- s390_iommu_detach_device(domain, dev);
- }
+ if (zdev)
+ __s390_iommu_detach_device(zdev);
}
static int s390_iommu_update_trans(struct s390_domain *s390_domain,
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [PATCH v3 2/5] iommu/s390: Get rid of s390_domain_device
2022-09-29 15:32 [PATCH v3 0/5] iommu/s390: Fixes related to attach and aperture handling Niklas Schnelle
2022-09-29 15:32 ` [PATCH v3 1/5] iommu/s390: Fix duplicate domain attachments Niklas Schnelle
@ 2022-09-29 15:32 ` Niklas Schnelle
2022-09-29 16:20 ` Jason Gunthorpe
2022-09-29 15:33 ` [PATCH v3 3/5] iommu/s390: Fix potential s390_domain aperture shrinking Niklas Schnelle
` (2 subsequent siblings)
4 siblings, 1 reply; 12+ messages in thread
From: Niklas Schnelle @ 2022-09-29 15:32 UTC (permalink / raw)
To: Matthew Rosato, Pierre Morel, iommu
Cc: linux-s390, borntraeger, hca, gor, gerald.schaefer, agordeev,
svens, joro, will, robin.murphy, jgg, linux-kernel
The struct s390_domain_device serves the sole purpose as list entry for
the devices list of a struct s390_domain. As it contains no additional
information besides a list_head and a pointer to the struct zpci_dev we
can simplify things and just thread the device list through struct
zpci_dev directly. This removes the need to allocate during domain
attach and gets rid of one level of indirection during mapping
operations.
Signed-off-by: Niklas Schnelle <schnelle@linux.ibm.com>
---
v2->v3:
- Remove search in devices list when we have the pointer to the
list item (Jason)
arch/s390/include/asm/pci.h | 1 +
drivers/iommu/s390-iommu.c | 45 ++++++++-----------------------------
2 files changed, 10 insertions(+), 36 deletions(-)
diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index 108e732d7b14..15f8714ca9b7 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -117,6 +117,7 @@ struct zpci_bus {
struct zpci_dev {
struct zpci_bus *zbus;
struct list_head entry; /* list of all zpci_devices, needed for hotplug, etc. */
+ struct list_head iommu_list;
struct kref kref;
struct hotplug_slot hotplug_slot;
diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
index 6fcb64e4b5e6..8c4b180b3247 100644
--- a/drivers/iommu/s390-iommu.c
+++ b/drivers/iommu/s390-iommu.c
@@ -29,11 +29,6 @@ struct s390_domain {
spinlock_t list_lock;
};
-struct s390_domain_device {
- struct list_head list;
- struct zpci_dev *zdev;
-};
-
static struct s390_domain *to_s390_domain(struct iommu_domain *dom)
{
return container_of(dom, struct s390_domain, domain);
@@ -87,21 +82,13 @@ static void s390_domain_free(struct iommu_domain *domain)
static void __s390_iommu_detach_device(struct s390_domain *s390_domain,
struct zpci_dev *zdev)
{
- struct s390_domain_device *domain_device, *tmp;
unsigned long flags;
if (!zdev || zdev->s390_domain != s390_domain)
return;
spin_lock_irqsave(&s390_domain->list_lock, flags);
- list_for_each_entry_safe(domain_device, tmp, &s390_domain->devices,
- list) {
- if (domain_device->zdev == zdev) {
- list_del(&domain_device->list);
- kfree(domain_device);
- break;
- }
- }
+ list_del_init(&zdev->iommu_list);
spin_unlock_irqrestore(&s390_domain->list_lock, flags);
zpci_unregister_ioat(zdev, 0);
@@ -114,17 +101,12 @@ static int s390_iommu_attach_device(struct iommu_domain *domain,
{
struct s390_domain *s390_domain = to_s390_domain(domain);
struct zpci_dev *zdev = to_zpci_dev(dev);
- struct s390_domain_device *domain_device;
unsigned long flags;
- int cc, rc = 0;
+ int cc;
if (!zdev)
return -ENODEV;
- domain_device = kzalloc(sizeof(*domain_device), GFP_KERNEL);
- if (!domain_device)
- return -ENOMEM;
-
if (zdev->s390_domain)
__s390_iommu_detach_device(zdev->s390_domain, zdev);
else if (zdev->dma_table)
@@ -133,10 +115,8 @@ static int s390_iommu_attach_device(struct iommu_domain *domain,
zdev->dma_table = s390_domain->dma_table;
cc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
virt_to_phys(zdev->dma_table));
- if (cc) {
- rc = -EIO;
- goto out_free;
- }
+ if (cc)
+ return -EIO;
spin_lock_irqsave(&s390_domain->list_lock, flags);
/* First device defines the DMA range limits */
@@ -147,21 +127,14 @@ static int s390_iommu_attach_device(struct iommu_domain *domain,
/* Allow only devices with identical DMA range limits */
} else if (domain->geometry.aperture_start != zdev->start_dma ||
domain->geometry.aperture_end != zdev->end_dma) {
- rc = -EINVAL;
spin_unlock_irqrestore(&s390_domain->list_lock, flags);
- goto out_free;
+ return -EINVAL;
}
- domain_device->zdev = zdev;
zdev->s390_domain = s390_domain;
- list_add(&domain_device->list, &s390_domain->devices);
+ list_add(&zdev->iommu_list, &s390_domain->devices);
spin_unlock_irqrestore(&s390_domain->list_lock, flags);
return 0;
-
-out_free:
- kfree(domain_device);
-
- return rc;
}
static void s390_iommu_detach_device(struct iommu_domain *domain,
@@ -208,10 +181,10 @@ static int s390_iommu_update_trans(struct s390_domain *s390_domain,
phys_addr_t pa, dma_addr_t dma_addr,
size_t size, int flags)
{
- struct s390_domain_device *domain_device;
phys_addr_t page_addr = pa & PAGE_MASK;
dma_addr_t start_dma_addr = dma_addr;
unsigned long irq_flags, nr_pages, i;
+ struct zpci_dev *zdev;
unsigned long *entry;
int rc = 0;
@@ -236,8 +209,8 @@ static int s390_iommu_update_trans(struct s390_domain *s390_domain,
}
spin_lock(&s390_domain->list_lock);
- list_for_each_entry(domain_device, &s390_domain->devices, list) {
- rc = zpci_refresh_trans((u64) domain_device->zdev->fh << 32,
+ list_for_each_entry(zdev, &s390_domain->devices, iommu_list) {
+ rc = zpci_refresh_trans((u64)zdev->fh << 32,
start_dma_addr, nr_pages * PAGE_SIZE);
if (rc)
break;
--
2.34.1
^ permalink raw reply related [flat|nested] 12+ messages in thread
* Re: [PATCH v3 2/5] iommu/s390: Get rid of s390_domain_device
2022-09-29 15:32 ` [PATCH v3 2/5] iommu/s390: Get rid of s390_domain_device Niklas Schnelle
@ 2022-09-29 16:20 ` Jason Gunthorpe
0 siblings, 0 replies; 12+ messages in thread
From: Jason Gunthorpe @ 2022-09-29 16:20 UTC (permalink / raw)
To: Niklas Schnelle
Cc: Matthew Rosato, Pierre Morel, iommu, linux-s390, borntraeger,
hca, gor, gerald.schaefer, agordeev, svens, joro, will,
robin.murphy, linux-kernel
On Thu, Sep 29, 2022 at 05:32:59PM +0200, Niklas Schnelle wrote:
> The struct s390_domain_device serves the sole purpose as list entry for
> the devices list of a struct s390_domain. As it contains no additional
> information besides a list_head and a pointer to the struct zpci_dev we
> can simplify things and just thread the device list through struct
> zpci_dev directly. This removes the need to allocate during domain
> attach and gets rid of one level of indirection during mapping
> operations.
>
> Signed-off-by: Niklas Schnelle <schnelle@linux.ibm.com>
> ---
> v2->v3:
> - Remove search in devices list when we have the pointer to the
> list item (Jason)
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Jason
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH v3 3/5] iommu/s390: Fix potential s390_domain aperture shrinking
2022-09-29 15:32 [PATCH v3 0/5] iommu/s390: Fixes related to attach and aperture handling Niklas Schnelle
2022-09-29 15:32 ` [PATCH v3 1/5] iommu/s390: Fix duplicate domain attachments Niklas Schnelle
2022-09-29 15:32 ` [PATCH v3 2/5] iommu/s390: Get rid of s390_domain_device Niklas Schnelle
@ 2022-09-29 15:33 ` Niklas Schnelle
2022-09-29 16:24 ` Jason Gunthorpe
2022-09-29 15:33 ` [PATCH v3 4/5] iommu/s390: Fix incorrect aperture check Niklas Schnelle
2022-09-29 15:33 ` [PATCH v3 5/5] iommu/s390: Fix incorrect pgsize_bitmap Niklas Schnelle
4 siblings, 1 reply; 12+ messages in thread
From: Niklas Schnelle @ 2022-09-29 15:33 UTC (permalink / raw)
To: Matthew Rosato, Pierre Morel, iommu
Cc: linux-s390, borntraeger, hca, gor, gerald.schaefer, agordeev,
svens, joro, will, robin.murphy, jgg, linux-kernel
The s390 IOMMU driver currently sets the IOMMU domain's aperture to
match the device specific DMA address range of the device that is first
attached. This is not ideal. For one if the domain has no device
attached in the meantime the aperture could be shrunk allowing
translations outside the aperture to exist in the translation tables.
Also this is a bit of a misuse of the aperture which really should
describe what addresses can be translated and not some device specific
limitations.
Instead of misusing the aperture like this we can instead create
reserved ranges for the ranges inaccessible to the attached devices
allowing devices with overlapping ranges to still share an IOMMU domain.
This also significantly simplifies s390_iommu_attach_device() allowing
us to move the aperture check to the beginning of the function and
removing the need to hold the device list's lock to check the aperture.
Suggested-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Niklas Schnelle <schnelle@linux.ibm.com>
---
drivers/iommu/s390-iommu.c | 50 +++++++++++++++++++++++++++-----------
1 file changed, 36 insertions(+), 14 deletions(-)
diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
index 8c4b180b3247..ed0e64f478cf 100644
--- a/drivers/iommu/s390-iommu.c
+++ b/drivers/iommu/s390-iommu.c
@@ -62,6 +62,9 @@ static struct iommu_domain *s390_domain_alloc(unsigned domain_type)
kfree(s390_domain);
return NULL;
}
+ s390_domain->domain.geometry.force_aperture = true;
+ s390_domain->domain.geometry.aperture_start = 0;
+ s390_domain->domain.geometry.aperture_end = ZPCI_TABLE_SIZE_RT - 1;
spin_lock_init(&s390_domain->dma_table_lock);
spin_lock_init(&s390_domain->list_lock);
@@ -107,30 +110,24 @@ static int s390_iommu_attach_device(struct iommu_domain *domain,
if (!zdev)
return -ENODEV;
+ if (domain->geometry.aperture_start > zdev->end_dma ||
+ domain->geometry.aperture_end < zdev->start_dma)
+ return -EINVAL;
+
if (zdev->s390_domain)
__s390_iommu_detach_device(zdev->s390_domain, zdev);
else if (zdev->dma_table)
zpci_dma_exit_device(zdev);
- zdev->dma_table = s390_domain->dma_table;
cc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
- virt_to_phys(zdev->dma_table));
+ virt_to_phys(s390_domain->dma_table));
if (cc)
return -EIO;
- spin_lock_irqsave(&s390_domain->list_lock, flags);
- /* First device defines the DMA range limits */
- if (list_empty(&s390_domain->devices)) {
- domain->geometry.aperture_start = zdev->start_dma;
- domain->geometry.aperture_end = zdev->end_dma;
- domain->geometry.force_aperture = true;
- /* Allow only devices with identical DMA range limits */
- } else if (domain->geometry.aperture_start != zdev->start_dma ||
- domain->geometry.aperture_end != zdev->end_dma) {
- spin_unlock_irqrestore(&s390_domain->list_lock, flags);
- return -EINVAL;
- }
+ zdev->dma_table = s390_domain->dma_table;
zdev->s390_domain = s390_domain;
+
+ spin_lock_irqsave(&s390_domain->list_lock, flags);
list_add(&zdev->iommu_list, &s390_domain->devices);
spin_unlock_irqrestore(&s390_domain->list_lock, flags);
@@ -147,6 +144,30 @@ static void s390_iommu_detach_device(struct iommu_domain *domain,
zpci_dma_init_device(zdev);
}
+static void s390_iommu_get_resv_regions(struct device *dev,
+ struct list_head *list)
+{
+ struct zpci_dev *zdev = to_zpci_dev(dev);
+ struct iommu_resv_region *region;
+
+ if (zdev->start_dma) {
+ region = iommu_alloc_resv_region(0, zdev->start_dma, 0,
+ IOMMU_RESV_RESERVED);
+ if (!region)
+ return;
+ list_add_tail(®ion->list, list);
+ }
+
+ if (zdev->end_dma < ZPCI_TABLE_SIZE_RT - 1) {
+ region = iommu_alloc_resv_region(zdev->end_dma + 1,
+ ZPCI_TABLE_SIZE_RT - zdev->end_dma - 1,
+ 0, IOMMU_RESV_RESERVED);
+ if (!region)
+ return;
+ list_add_tail(®ion->list, list);
+ }
+}
+
static struct iommu_device *s390_iommu_probe_device(struct device *dev)
{
struct zpci_dev *zdev = to_zpci_dev(dev);
@@ -340,6 +361,7 @@ static const struct iommu_ops s390_iommu_ops = {
.release_device = s390_iommu_release_device,
.device_group = generic_device_group,
.pgsize_bitmap = S390_IOMMU_PGSIZES,
+ .get_resv_regions = s390_iommu_get_resv_regions,
.default_domain_ops = &(const struct iommu_domain_ops) {
.attach_dev = s390_iommu_attach_device,
.detach_dev = s390_iommu_detach_device,
--
2.34.1
^ permalink raw reply related [flat|nested] 12+ messages in thread
* Re: [PATCH v3 3/5] iommu/s390: Fix potential s390_domain aperture shrinking
2022-09-29 15:33 ` [PATCH v3 3/5] iommu/s390: Fix potential s390_domain aperture shrinking Niklas Schnelle
@ 2022-09-29 16:24 ` Jason Gunthorpe
0 siblings, 0 replies; 12+ messages in thread
From: Jason Gunthorpe @ 2022-09-29 16:24 UTC (permalink / raw)
To: Niklas Schnelle
Cc: Matthew Rosato, Pierre Morel, iommu, linux-s390, borntraeger,
hca, gor, gerald.schaefer, agordeev, svens, joro, will,
robin.murphy, linux-kernel
On Thu, Sep 29, 2022 at 05:33:00PM +0200, Niklas Schnelle wrote:
> The s390 IOMMU driver currently sets the IOMMU domain's aperture to
> match the device specific DMA address range of the device that is first
> attached. This is not ideal. For one if the domain has no device
> attached in the meantime the aperture could be shrunk allowing
> translations outside the aperture to exist in the translation tables.
> Also this is a bit of a misuse of the aperture which really should
> describe what addresses can be translated and not some device specific
> limitations.
>
> Instead of misusing the aperture like this we can instead create
> reserved ranges for the ranges inaccessible to the attached devices
> allowing devices with overlapping ranges to still share an IOMMU domain.
> This also significantly simplifies s390_iommu_attach_device() allowing
> us to move the aperture check to the beginning of the function and
> removing the need to hold the device list's lock to check the aperture.
>
> Suggested-by: Jason Gunthorpe <jgg@nvidia.com>
> Signed-off-by: Niklas Schnelle <schnelle@linux.ibm.com>
> ---
> drivers/iommu/s390-iommu.c | 50 +++++++++++++++++++++++++++-----------
> 1 file changed, 36 insertions(+), 14 deletions(-)
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Jason
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH v3 4/5] iommu/s390: Fix incorrect aperture check
2022-09-29 15:32 [PATCH v3 0/5] iommu/s390: Fixes related to attach and aperture handling Niklas Schnelle
` (2 preceding siblings ...)
2022-09-29 15:33 ` [PATCH v3 3/5] iommu/s390: Fix potential s390_domain aperture shrinking Niklas Schnelle
@ 2022-09-29 15:33 ` Niklas Schnelle
2022-09-29 15:58 ` Jason Gunthorpe
2022-09-29 15:33 ` [PATCH v3 5/5] iommu/s390: Fix incorrect pgsize_bitmap Niklas Schnelle
4 siblings, 1 reply; 12+ messages in thread
From: Niklas Schnelle @ 2022-09-29 15:33 UTC (permalink / raw)
To: Matthew Rosato, Pierre Morel, iommu
Cc: linux-s390, borntraeger, hca, gor, gerald.schaefer, agordeev,
svens, joro, will, robin.murphy, jgg, linux-kernel
The domain->geometry.aperture_end specifies the last valid address treat
it as such when checking if a DMA address is valid.
Reviewed-by: Pierre Morel <pmorel@linux.ibm.com>
Reviewed-by: Matthew Rosato <mjrosato@linux.ibm.com>
Signed-off-by: Niklas Schnelle <schnelle@linux.ibm.com>
---
drivers/iommu/s390-iommu.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
index ed0e64f478cf..6d4a9c7db32c 100644
--- a/drivers/iommu/s390-iommu.c
+++ b/drivers/iommu/s390-iommu.c
@@ -210,7 +210,7 @@ static int s390_iommu_update_trans(struct s390_domain *s390_domain,
int rc = 0;
if (dma_addr < s390_domain->domain.geometry.aperture_start ||
- dma_addr + size > s390_domain->domain.geometry.aperture_end)
+ dma_addr + size > s390_domain->domain.geometry.aperture_end + 1)
return -EINVAL;
nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
--
2.34.1
^ permalink raw reply related [flat|nested] 12+ messages in thread
* Re: [PATCH v3 4/5] iommu/s390: Fix incorrect aperture check
2022-09-29 15:33 ` [PATCH v3 4/5] iommu/s390: Fix incorrect aperture check Niklas Schnelle
@ 2022-09-29 15:58 ` Jason Gunthorpe
2022-09-30 8:01 ` Niklas Schnelle
0 siblings, 1 reply; 12+ messages in thread
From: Jason Gunthorpe @ 2022-09-29 15:58 UTC (permalink / raw)
To: Niklas Schnelle
Cc: Matthew Rosato, Pierre Morel, iommu, linux-s390, borntraeger,
hca, gor, gerald.schaefer, agordeev, svens, joro, will,
robin.murphy, linux-kernel
On Thu, Sep 29, 2022 at 05:33:01PM +0200, Niklas Schnelle wrote:
> The domain->geometry.aperture_end specifies the last valid address treat
> it as such when checking if a DMA address is valid.
>
> Reviewed-by: Pierre Morel <pmorel@linux.ibm.com>
> Reviewed-by: Matthew Rosato <mjrosato@linux.ibm.com>
> Signed-off-by: Niklas Schnelle <schnelle@linux.ibm.com>
> ---
> drivers/iommu/s390-iommu.c | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
> index ed0e64f478cf..6d4a9c7db32c 100644
> --- a/drivers/iommu/s390-iommu.c
> +++ b/drivers/iommu/s390-iommu.c
> @@ -210,7 +210,7 @@ static int s390_iommu_update_trans(struct s390_domain *s390_domain,
> int rc = 0;
>
> if (dma_addr < s390_domain->domain.geometry.aperture_start ||
> - dma_addr + size > s390_domain->domain.geometry.aperture_end)
> + dma_addr + size > s390_domain->domain.geometry.aperture_end + 1)
The reason the iommu layer uses 'last' (= start + size - 1) not 'end'
is to allow for the very last byte of the range to be used.
Meaning (start + size) == 0 in some cases due to the overflow.
Generally when working with lasts's I prefer people write code in a
way that doesn't trigger the overflow, because there are some
complicated C rules about integer promotion that can mean the desired
overflow silently doesn't happen in obscure cases - especially if
unsigned long != u64
So, I'd write this as:
(dma_addr + size - 1) > s390_domain->domain.geometry.aperture_end
Jason
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH v3 4/5] iommu/s390: Fix incorrect aperture check
2022-09-29 15:58 ` Jason Gunthorpe
@ 2022-09-30 8:01 ` Niklas Schnelle
0 siblings, 0 replies; 12+ messages in thread
From: Niklas Schnelle @ 2022-09-30 8:01 UTC (permalink / raw)
To: Jason Gunthorpe
Cc: Matthew Rosato, Pierre Morel, iommu, linux-s390, borntraeger,
hca, gor, gerald.schaefer, agordeev, svens, joro, will,
robin.murphy, linux-kernel
On Thu, 2022-09-29 at 12:58 -0300, Jason Gunthorpe wrote:
> On Thu, Sep 29, 2022 at 05:33:01PM +0200, Niklas Schnelle wrote:
> > The domain->geometry.aperture_end specifies the last valid address treat
> > it as such when checking if a DMA address is valid.
> >
> > Reviewed-by: Pierre Morel <pmorel@linux.ibm.com>
> > Reviewed-by: Matthew Rosato <mjrosato@linux.ibm.com>
> > Signed-off-by: Niklas Schnelle <schnelle@linux.ibm.com>
> > ---
> > drivers/iommu/s390-iommu.c | 2 +-
> > 1 file changed, 1 insertion(+), 1 deletion(-)
> >
> > diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
> > index ed0e64f478cf..6d4a9c7db32c 100644
> > --- a/drivers/iommu/s390-iommu.c
> > +++ b/drivers/iommu/s390-iommu.c
> > @@ -210,7 +210,7 @@ static int s390_iommu_update_trans(struct s390_domain *s390_domain,
> > int rc = 0;
> >
> > if (dma_addr < s390_domain->domain.geometry.aperture_start ||
> > - dma_addr + size > s390_domain->domain.geometry.aperture_end)
> > + dma_addr + size > s390_domain->domain.geometry.aperture_end + 1)
>
> The reason the iommu layer uses 'last' (= start + size - 1) not 'end'
> is to allow for the very last byte of the range to be used.
>
> Meaning (start + size) == 0 in some cases due to the overflow.
>
> Generally when working with lasts's I prefer people write code in a
> way that doesn't trigger the overflow, because there are some
> complicated C rules about integer promotion that can mean the desired
> overflow silently doesn't happen in obscure cases - especially if
> unsigned long != u64
>
> So, I'd write this as:
>
> (dma_addr + size - 1) > s390_domain->domain.geometry.aperture_end
>
> Jason
Makes sense. Thanks.
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH v3 5/5] iommu/s390: Fix incorrect pgsize_bitmap
2022-09-29 15:32 [PATCH v3 0/5] iommu/s390: Fixes related to attach and aperture handling Niklas Schnelle
` (3 preceding siblings ...)
2022-09-29 15:33 ` [PATCH v3 4/5] iommu/s390: Fix incorrect aperture check Niklas Schnelle
@ 2022-09-29 15:33 ` Niklas Schnelle
2022-09-29 15:59 ` Jason Gunthorpe
4 siblings, 1 reply; 12+ messages in thread
From: Niklas Schnelle @ 2022-09-29 15:33 UTC (permalink / raw)
To: Matthew Rosato, Pierre Morel, iommu
Cc: linux-s390, borntraeger, hca, gor, gerald.schaefer, agordeev,
svens, joro, will, robin.murphy, jgg, linux-kernel
The .pgsize_bitmap property of struct iommu_ops is not a page mask but
rather has a bit set for each size of pages the IOMMU supports. As the
comment correctly pointed out at this moment the code only support 4K
pages so simply use SZ_4K here.
Signed-off-by: Niklas Schnelle <schnelle@linux.ibm.com>
---
drivers/iommu/s390-iommu.c | 9 +--------
1 file changed, 1 insertion(+), 8 deletions(-)
diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
index 6d4a9c7db32c..2bad24d6cfef 100644
--- a/drivers/iommu/s390-iommu.c
+++ b/drivers/iommu/s390-iommu.c
@@ -12,13 +12,6 @@
#include <linux/sizes.h>
#include <asm/pci_dma.h>
-/*
- * Physically contiguous memory regions can be mapped with 4 KiB alignment,
- * we allow all page sizes that are an order of 4KiB (no special large page
- * support so far).
- */
-#define S390_IOMMU_PGSIZES (~0xFFFUL)
-
static const struct iommu_ops s390_iommu_ops;
struct s390_domain {
@@ -360,7 +353,7 @@ static const struct iommu_ops s390_iommu_ops = {
.probe_device = s390_iommu_probe_device,
.release_device = s390_iommu_release_device,
.device_group = generic_device_group,
- .pgsize_bitmap = S390_IOMMU_PGSIZES,
+ .pgsize_bitmap = SZ_4K,
.get_resv_regions = s390_iommu_get_resv_regions,
.default_domain_ops = &(const struct iommu_domain_ops) {
.attach_dev = s390_iommu_attach_device,
--
2.34.1
^ permalink raw reply related [flat|nested] 12+ messages in thread
* Re: [PATCH v3 5/5] iommu/s390: Fix incorrect pgsize_bitmap
2022-09-29 15:33 ` [PATCH v3 5/5] iommu/s390: Fix incorrect pgsize_bitmap Niklas Schnelle
@ 2022-09-29 15:59 ` Jason Gunthorpe
0 siblings, 0 replies; 12+ messages in thread
From: Jason Gunthorpe @ 2022-09-29 15:59 UTC (permalink / raw)
To: Niklas Schnelle
Cc: Matthew Rosato, Pierre Morel, iommu, linux-s390, borntraeger,
hca, gor, gerald.schaefer, agordeev, svens, joro, will,
robin.murphy, linux-kernel
On Thu, Sep 29, 2022 at 05:33:02PM +0200, Niklas Schnelle wrote:
> The .pgsize_bitmap property of struct iommu_ops is not a page mask but
> rather has a bit set for each size of pages the IOMMU supports. As the
> comment correctly pointed out at this moment the code only support 4K
> pages so simply use SZ_4K here.
>
> Signed-off-by: Niklas Schnelle <schnelle@linux.ibm.com>
> ---
> drivers/iommu/s390-iommu.c | 9 +--------
> 1 file changed, 1 insertion(+), 8 deletions(-)
Yep!
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Jason
^ permalink raw reply [flat|nested] 12+ messages in thread