linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2] iommu/s390: Fix race with release_device ops
@ 2022-08-26 19:47 Matthew Rosato
  2022-08-29  9:24 ` Niklas Schnelle
  2022-08-29 11:03 ` Heiko Carstens
  0 siblings, 2 replies; 4+ messages in thread
From: Matthew Rosato @ 2022-08-26 19:47 UTC (permalink / raw)
  To: iommu
  Cc: linux-s390, schnelle, pmorel, borntraeger, hca, gor,
	gerald.schaefer, agordeev, svens, joro, will, robin.murphy, jgg,
	linux-kernel

With commit fa7e9ecc5e1c ("iommu/s390: Tolerate repeat attach_dev
calls") s390-iommu is supposed to handle dynamic switching between IOMMU
domains and the DMA API handling.  However, this commit does not
sufficiently handle the case where the device is released via a call
to the release_device op as it may occur at the same time as an opposing
attach_dev or detach_dev since the group mutex is not held over
release_device.  This was observed if the device is deconfigured during a
small window during vfio-pci initialization and can result in WARNs and
potential kernel panics.

Handle this by tracking when the device is probed/released via
dev_iommu_priv_set/get().  Ensure that once the device is released only
release_device handles the re-init of the device DMA.

Fixes: fa7e9ecc5e1c ("iommu/s390: Tolerate repeat attach_dev calls")
Signed-off-by: Matthew Rosato <mjrosato@linux.ibm.com>
---
Changes since v1:
- Use mutex instead of spinlock due to potential sleep (Pierre)
---
 arch/s390/include/asm/pci.h |  1 +
 arch/s390/pci/pci.c         |  1 +
 drivers/iommu/s390-iommu.c  | 67 ++++++++++++++++++++++++++++---------
 3 files changed, 53 insertions(+), 16 deletions(-)

diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index 7b4cdadbc023..8f2041eea741 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -157,6 +157,7 @@ struct zpci_dev {
 	/* DMA stuff */
 	unsigned long	*dma_table;
 	spinlock_t	dma_table_lock;
+	struct mutex	dma_domain_lock;
 	int		tlb_refresh;
 
 	spinlock_t	iommu_bitmap_lock;
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index 73cdc5539384..973edd32ecc9 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -832,6 +832,7 @@ struct zpci_dev *zpci_create_device(u32 fid, u32 fh, enum zpci_state state)
 	kref_init(&zdev->kref);
 	mutex_init(&zdev->lock);
 	mutex_init(&zdev->kzdev_lock);
+	mutex_init(&zdev->dma_domain_lock);
 
 	rc = zpci_init_iommu(zdev);
 	if (rc)
diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
index c898bcbbce11..ffb4e3cee7f6 100644
--- a/drivers/iommu/s390-iommu.c
+++ b/drivers/iommu/s390-iommu.c
@@ -90,15 +90,39 @@ static int s390_iommu_attach_device(struct iommu_domain *domain,
 	struct zpci_dev *zdev = to_zpci_dev(dev);
 	struct s390_domain_device *domain_device;
 	unsigned long flags;
-	int cc, rc;
+	int cc, rc = 0;
 
 	if (!zdev)
 		return -ENODEV;
 
+	/* First check compatibility */
+	spin_lock_irqsave(&s390_domain->list_lock, flags);
+	/* First device defines the DMA range limits */
+	if (list_empty(&s390_domain->devices)) {
+		domain->geometry.aperture_start = zdev->start_dma;
+		domain->geometry.aperture_end = zdev->end_dma;
+		domain->geometry.force_aperture = true;
+	/* Allow only devices with identical DMA range limits */
+	} else if (domain->geometry.aperture_start != zdev->start_dma ||
+		   domain->geometry.aperture_end != zdev->end_dma) {
+		rc = -EINVAL;
+	}
+	spin_unlock_irqrestore(&s390_domain->list_lock, flags);
+	if (rc)
+		return rc;
+
 	domain_device = kzalloc(sizeof(*domain_device), GFP_KERNEL);
 	if (!domain_device)
 		return -ENOMEM;
 
+	/* Leave now if the device has already been released */
+	mutex_lock(&zdev->dma_domain_lock);
+	if (!dev_iommu_priv_get(dev)) {
+		mutex_unlock(&zdev->dma_domain_lock);
+		kfree(domain_device);
+		return 0;
+	}
+
 	if (zdev->dma_table && !zdev->s390_domain) {
 		cc = zpci_dma_exit_device(zdev);
 		if (cc) {
@@ -117,22 +141,11 @@ static int s390_iommu_attach_device(struct iommu_domain *domain,
 		rc = -EIO;
 		goto out_restore;
 	}
+	zdev->s390_domain = s390_domain;
+	mutex_unlock(&zdev->dma_domain_lock);
 
 	spin_lock_irqsave(&s390_domain->list_lock, flags);
-	/* First device defines the DMA range limits */
-	if (list_empty(&s390_domain->devices)) {
-		domain->geometry.aperture_start = zdev->start_dma;
-		domain->geometry.aperture_end = zdev->end_dma;
-		domain->geometry.force_aperture = true;
-	/* Allow only devices with identical DMA range limits */
-	} else if (domain->geometry.aperture_start != zdev->start_dma ||
-		   domain->geometry.aperture_end != zdev->end_dma) {
-		rc = -EINVAL;
-		spin_unlock_irqrestore(&s390_domain->list_lock, flags);
-		goto out_restore;
-	}
 	domain_device->zdev = zdev;
-	zdev->s390_domain = s390_domain;
 	list_add(&domain_device->list, &s390_domain->devices);
 	spin_unlock_irqrestore(&s390_domain->list_lock, flags);
 
@@ -147,6 +160,7 @@ static int s390_iommu_attach_device(struct iommu_domain *domain,
 				   virt_to_phys(zdev->dma_table));
 	}
 out_free:
+	mutex_unlock(&zdev->dma_domain_lock);
 	kfree(domain_device);
 
 	return rc;
@@ -176,17 +190,22 @@ static void s390_iommu_detach_device(struct iommu_domain *domain,
 	}
 	spin_unlock_irqrestore(&s390_domain->list_lock, flags);
 
-	if (found && (zdev->s390_domain == s390_domain)) {
+	mutex_lock(&zdev->dma_domain_lock);
+	if (found && (zdev->s390_domain == s390_domain) &&
+	    dev_iommu_priv_get(dev)) {
 		zdev->s390_domain = NULL;
 		zpci_unregister_ioat(zdev, 0);
 		zpci_dma_init_device(zdev);
 	}
+	mutex_unlock(&zdev->dma_domain_lock);
 }
 
 static struct iommu_device *s390_iommu_probe_device(struct device *dev)
 {
 	struct zpci_dev *zdev = to_zpci_dev(dev);
 
+	dev_iommu_priv_set(dev, zdev);
+
 	return &zdev->iommu_dev;
 }
 
@@ -206,10 +225,26 @@ static void s390_iommu_release_device(struct device *dev)
 	 *
 	 * So let's call detach_dev from here if it hasn't been called before.
 	 */
-	if (zdev && zdev->s390_domain) {
+	if (zdev) {
+		/*
+		 * Clear priv to block further attaches for this device,
+		 * ensure detaches don't init DMA
+		 */
+		mutex_lock(&zdev->dma_domain_lock);
+		dev_iommu_priv_set(dev, NULL);
+		mutex_unlock(&zdev->dma_domain_lock);
+		/* Make sure this device is removed from the domain list */
 		domain = iommu_get_domain_for_dev(dev);
 		if (domain)
 			s390_iommu_detach_device(domain, dev);
+		/* Now ensure DMA is initialized from here */
+		mutex_lock(&zdev->dma_domain_lock);
+		if (zdev->s390_domain) {
+			zdev->s390_domain = NULL;
+			zpci_unregister_ioat(zdev, 0);
+			zpci_dma_init_device(zdev);
+		}
+		mutex_unlock(&zdev->dma_domain_lock);
 	}
 }
 
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH v2] iommu/s390: Fix race with release_device ops
  2022-08-26 19:47 [PATCH v2] iommu/s390: Fix race with release_device ops Matthew Rosato
@ 2022-08-29  9:24 ` Niklas Schnelle
  2022-08-29 11:03 ` Heiko Carstens
  1 sibling, 0 replies; 4+ messages in thread
From: Niklas Schnelle @ 2022-08-29  9:24 UTC (permalink / raw)
  To: Matthew Rosato, iommu
  Cc: linux-s390, pmorel, borntraeger, hca, gor, gerald.schaefer,
	agordeev, svens, joro, will, robin.murphy, jgg, linux-kernel

On Fri, 2022-08-26 at 15:47 -0400, Matthew Rosato wrote:
> With commit fa7e9ecc5e1c ("iommu/s390: Tolerate repeat attach_dev
> calls") s390-iommu is supposed to handle dynamic switching between IOMMU
> domains and the DMA API handling.  However, this commit does not
> sufficiently handle the case where the device is released via a call
> to the release_device op as it may occur at the same time as an opposing
> attach_dev or detach_dev since the group mutex is not held over
> release_device.  This was observed if the device is deconfigured during a
> small window during vfio-pci initialization and can result in WARNs and
> potential kernel panics.
> 
> Handle this by tracking when the device is probed/released via
> dev_iommu_priv_set/get().  Ensure that once the device is released only
> release_device handles the re-init of the device DMA.
> 
> Fixes: fa7e9ecc5e1c ("iommu/s390: Tolerate repeat attach_dev calls")
> Signed-off-by: Matthew Rosato <mjrosato@linux.ibm.com>
> ---
> Changes since v1:
> - Use mutex instead of spinlock due to potential sleep (Pierre)
> ---
>  arch/s390/include/asm/pci.h |  1 +
>  arch/s390/pci/pci.c         |  1 +
>  drivers/iommu/s390-iommu.c  | 67 ++++++++++++++++++++++++++++---------
>  3 files changed, 53 insertions(+), 16 deletions(-)
> 
> diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
> index 7b4cdadbc023..8f2041eea741 100644
> --- a/arch/s390/include/asm/pci.h
> +++ b/arch/s390/include/asm/pci.h
> @@ -157,6 +157,7 @@ struct zpci_dev {
>  	/* DMA stuff */
>  	unsigned long	*dma_table;
>  	spinlock_t	dma_table_lock;
> +	struct mutex	dma_domain_lock;
>  	int		tlb_refresh;
>  
>  	spinlock_t	iommu_bitmap_lock;
> diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
> index 73cdc5539384..973edd32ecc9 100644
> --- a/arch/s390/pci/pci.c
> +++ b/arch/s390/pci/pci.c
> @@ -832,6 +832,7 @@ struct zpci_dev *zpci_create_device(u32 fid, u32 fh, enum zpci_state state)
>  	kref_init(&zdev->kref);
>  	mutex_init(&zdev->lock);
>  	mutex_init(&zdev->kzdev_lock);
> +	mutex_init(&zdev->dma_domain_lock);
>  
>  	rc = zpci_init_iommu(zdev);
>  	if (rc)
> diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
> index c898bcbbce11..ffb4e3cee7f6 100644
> --- a/drivers/iommu/s390-iommu.c
> +++ b/drivers/iommu/s390-iommu.c
> @@ -90,15 +90,39 @@ static int s390_iommu_attach_device(struct iommu_domain *domain,
>  	struct zpci_dev *zdev = to_zpci_dev(dev);
>  	struct s390_domain_device *domain_device;
>  	unsigned long flags;
> -	int cc, rc;
> +	int cc, rc = 0;
>  
>  	if (!zdev)
>  		return -ENODEV;
>  
> +	/* First check compatibility */
> +	spin_lock_irqsave(&s390_domain->list_lock, flags);
> +	/* First device defines the DMA range limits */
> +	if (list_empty(&s390_domain->devices)) {
> +		domain->geometry.aperture_start = zdev->start_dma;
> +		domain->geometry.aperture_end = zdev->end_dma;
> +		domain->geometry.force_aperture = true;
> +	/* Allow only devices with identical DMA range limits */
> +	} else if (domain->geometry.aperture_start != zdev->start_dma ||
> +		   domain->geometry.aperture_end != zdev->end_dma) {
> +		rc = -EINVAL;
> +	}
> +	spin_unlock_irqrestore(&s390_domain->list_lock, flags);
> +	if (rc)
> +		return rc;
> +
>  	domain_device = kzalloc(sizeof(*domain_device), GFP_KERNEL);
>  	if (!domain_device)
>  		return -ENOMEM;
>  
> +	/* Leave now if the device has already been released */
> +	mutex_lock(&zdev->dma_domain_lock);
> +	if (!dev_iommu_priv_get(dev)) {
> +		mutex_unlock(&zdev->dma_domain_lock);
> +		kfree(domain_device);
> +		return 0;
> +	}
> +
>  	if (zdev->dma_table && !zdev->s390_domain) {
>  		cc = zpci_dma_exit_device(zdev);
>  		if (cc) {
> @@ -117,22 +141,11 @@ static int s390_iommu_attach_device(struct iommu_domain *domain,
>  		rc = -EIO;
>  		goto out_restore;
>  	}
> +	zdev->s390_domain = s390_domain;
> +	mutex_unlock(&zdev->dma_domain_lock);
>  
>  	spin_lock_irqsave(&s390_domain->list_lock, flags);
> -	/* First device defines the DMA range limits */
> -	if (list_empty(&s390_domain->devices)) {
> -		domain->geometry.aperture_start = zdev->start_dma;
> -		domain->geometry.aperture_end = zdev->end_dma;
> -		domain->geometry.force_aperture = true;
> -	/* Allow only devices with identical DMA range limits */
> -	} else if (domain->geometry.aperture_start != zdev->start_dma ||
> -		   domain->geometry.aperture_end != zdev->end_dma) {
> -		rc = -EINVAL;
> -		spin_unlock_irqrestore(&s390_domain->list_lock, flags);
> -		goto out_restore;
> -	}
>  	domain_device->zdev = zdev;
> -	zdev->s390_domain = s390_domain;
>  	list_add(&domain_device->list, &s390_domain->devices);
>  	spin_unlock_irqrestore(&s390_domain->list_lock, flags);
>  
> @@ -147,6 +160,7 @@ static int s390_iommu_attach_device(struct iommu_domain *domain,
>  				   virt_to_phys(zdev->dma_table));
>  	}
>  out_free:
> +	mutex_unlock(&zdev->dma_domain_lock);
>  	kfree(domain_device);
>  
>  	return rc;
> @@ -176,17 +190,22 @@ static void s390_iommu_detach_device(struct iommu_domain *domain,
>  	}
>  	spin_unlock_irqrestore(&s390_domain->list_lock, flags);
>  
> -	if (found && (zdev->s390_domain == s390_domain)) {
> +	mutex_lock(&zdev->dma_domain_lock);
> +	if (found && (zdev->s390_domain == s390_domain) &&
> +	    dev_iommu_priv_get(dev)) {
>  		zdev->s390_domain = NULL;
>  		zpci_unregister_ioat(zdev, 0);
>  		zpci_dma_init_device(zdev);
>  	}
> +	mutex_unlock(&zdev->dma_domain_lock);
>  }
>  
>  static struct iommu_device *s390_iommu_probe_device(struct device *dev)
>  {
>  	struct zpci_dev *zdev = to_zpci_dev(dev);
>  
> +	dev_iommu_priv_set(dev, zdev);
> +
>  	return &zdev->iommu_dev;
>  }
>  
> @@ -206,10 +225,26 @@ static void s390_iommu_release_device(struct device *dev)
>  	 *
>  	 * So let's call detach_dev from here if it hasn't been called before.
>  	 */
> -	if (zdev && zdev->s390_domain) {
> +	if (zdev) {
> +		/*
> +		 * Clear priv to block further attaches for this device,
> +		 * ensure detaches don't init DMA
> +		 */
> +		mutex_lock(&zdev->dma_domain_lock);
> +		dev_iommu_priv_set(dev, NULL);
> +		mutex_unlock(&zdev->dma_domain_lock);
> +		/* Make sure this device is removed from the domain list */
>  		domain = iommu_get_domain_for_dev(dev);
>  		if (domain)
>  			s390_iommu_detach_device(domain, dev);
> +		/* Now ensure DMA is initialized from here */
> +		mutex_lock(&zdev->dma_domain_lock);
> +		if (zdev->s390_domain) {
> +			zdev->s390_domain = NULL;
> +			zpci_unregister_ioat(zdev, 0);
> +			zpci_dma_init_device(zdev);
> +		}
> +		mutex_unlock(&zdev->dma_domain_lock);
>  	}
>  }
>  

Looks good to me now. Thanks for tackling this! Feel free to add my:

Reviewed-by: Niklas Schnelle <schnelle@linux.ibm.com>
Tested-by: Niklas Schnelle <schnelle@linux.ibm.com>




^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH v2] iommu/s390: Fix race with release_device ops
  2022-08-26 19:47 [PATCH v2] iommu/s390: Fix race with release_device ops Matthew Rosato
  2022-08-29  9:24 ` Niklas Schnelle
@ 2022-08-29 11:03 ` Heiko Carstens
  2022-08-29 13:38   ` Matthew Rosato
  1 sibling, 1 reply; 4+ messages in thread
From: Heiko Carstens @ 2022-08-29 11:03 UTC (permalink / raw)
  To: Matthew Rosato
  Cc: iommu, linux-s390, schnelle, pmorel, borntraeger, gor,
	gerald.schaefer, agordeev, svens, joro, will, robin.murphy, jgg,
	linux-kernel

On Fri, Aug 26, 2022 at 03:47:21PM -0400, Matthew Rosato wrote:
> With commit fa7e9ecc5e1c ("iommu/s390: Tolerate repeat attach_dev
> calls") s390-iommu is supposed to handle dynamic switching between IOMMU
> domains and the DMA API handling.  However, this commit does not
> sufficiently handle the case where the device is released via a call
> to the release_device op as it may occur at the same time as an opposing
> attach_dev or detach_dev since the group mutex is not held over
> release_device.  This was observed if the device is deconfigured during a
> small window during vfio-pci initialization and can result in WARNs and
> potential kernel panics.
> 
> Handle this by tracking when the device is probed/released via
> dev_iommu_priv_set/get().  Ensure that once the device is released only
> release_device handles the re-init of the device DMA.
> 
> Fixes: fa7e9ecc5e1c ("iommu/s390: Tolerate repeat attach_dev calls")
> Signed-off-by: Matthew Rosato <mjrosato@linux.ibm.com>
...
> +	/* First check compatibility */
> +	spin_lock_irqsave(&s390_domain->list_lock, flags);
> +	/* First device defines the DMA range limits */
> +	if (list_empty(&s390_domain->devices)) {
> +		domain->geometry.aperture_start = zdev->start_dma;
> +		domain->geometry.aperture_end = zdev->end_dma;
> +		domain->geometry.force_aperture = true;
> +	/* Allow only devices with identical DMA range limits */
> +	} else if (domain->geometry.aperture_start != zdev->start_dma ||
> +		   domain->geometry.aperture_end != zdev->end_dma) {
> +		rc = -EINVAL;
> +	}
> +	spin_unlock_irqrestore(&s390_domain->list_lock, flags);
...
>  	spin_lock_irqsave(&s390_domain->list_lock, flags);
> -	/* First device defines the DMA range limits */
> -	if (list_empty(&s390_domain->devices)) {
> -		domain->geometry.aperture_start = zdev->start_dma;
> -		domain->geometry.aperture_end = zdev->end_dma;
> -		domain->geometry.force_aperture = true;
> -	/* Allow only devices with identical DMA range limits */
> -	} else if (domain->geometry.aperture_start != zdev->start_dma ||
> -		   domain->geometry.aperture_end != zdev->end_dma) {
> -		rc = -EINVAL;
> -		spin_unlock_irqrestore(&s390_domain->list_lock, flags);
> -		goto out_restore;
> -	}
>  	domain_device->zdev = zdev;
> -	zdev->s390_domain = s390_domain;
>  	list_add(&domain_device->list, &s390_domain->devices);
>  	spin_unlock_irqrestore(&s390_domain->list_lock, flags);

Stupid question: but how is this not racy when the spinlock is
released between doing something that depends on an empty list and
actually adding to the list later, after the lock had been released?

> +		mutex_lock(&zdev->dma_domain_lock);
> +		dev_iommu_priv_set(dev, NULL);
> +		mutex_unlock(&zdev->dma_domain_lock);
> +		/* Make sure this device is removed from the domain list */
>  		domain = iommu_get_domain_for_dev(dev);
>  		if (domain)
>  			s390_iommu_detach_device(domain, dev);
> +		/* Now ensure DMA is initialized from here */
> +		mutex_lock(&zdev->dma_domain_lock);
> +		if (zdev->s390_domain) {
> +			zdev->s390_domain = NULL;
> +			zpci_unregister_ioat(zdev, 0);
> +			zpci_dma_init_device(zdev);
> +		}
> +		mutex_unlock(&zdev->dma_domain_lock);

Looking at the patch and this code it is also anything but obvious
which _data_ is actually protected by the mutex. Anyway.. just some
stupid comments while briefly looking at the patch :)

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH v2] iommu/s390: Fix race with release_device ops
  2022-08-29 11:03 ` Heiko Carstens
@ 2022-08-29 13:38   ` Matthew Rosato
  0 siblings, 0 replies; 4+ messages in thread
From: Matthew Rosato @ 2022-08-29 13:38 UTC (permalink / raw)
  To: Heiko Carstens
  Cc: iommu, linux-s390, schnelle, pmorel, borntraeger, gor,
	gerald.schaefer, agordeev, svens, joro, will, robin.murphy, jgg,
	linux-kernel

On 8/29/22 7:03 AM, Heiko Carstens wrote:
> On Fri, Aug 26, 2022 at 03:47:21PM -0400, Matthew Rosato wrote:
>> With commit fa7e9ecc5e1c ("iommu/s390: Tolerate repeat attach_dev
>> calls") s390-iommu is supposed to handle dynamic switching between IOMMU
>> domains and the DMA API handling.  However, this commit does not
>> sufficiently handle the case where the device is released via a call
>> to the release_device op as it may occur at the same time as an opposing
>> attach_dev or detach_dev since the group mutex is not held over
>> release_device.  This was observed if the device is deconfigured during a
>> small window during vfio-pci initialization and can result in WARNs and
>> potential kernel panics.
>>
>> Handle this by tracking when the device is probed/released via
>> dev_iommu_priv_set/get().  Ensure that once the device is released only
>> release_device handles the re-init of the device DMA.
>>
>> Fixes: fa7e9ecc5e1c ("iommu/s390: Tolerate repeat attach_dev calls")
>> Signed-off-by: Matthew Rosato <mjrosato@linux.ibm.com>
> ...
>> +	/* First check compatibility */
>> +	spin_lock_irqsave(&s390_domain->list_lock, flags);
>> +	/* First device defines the DMA range limits */
>> +	if (list_empty(&s390_domain->devices)) {
>> +		domain->geometry.aperture_start = zdev->start_dma;
>> +		domain->geometry.aperture_end = zdev->end_dma;
>> +		domain->geometry.force_aperture = true;
>> +	/* Allow only devices with identical DMA range limits */
>> +	} else if (domain->geometry.aperture_start != zdev->start_dma ||
>> +		   domain->geometry.aperture_end != zdev->end_dma) {
>> +		rc = -EINVAL;
>> +	}
>> +	spin_unlock_irqrestore(&s390_domain->list_lock, flags);
> ...
>>  	spin_lock_irqsave(&s390_domain->list_lock, flags);
>> -	/* First device defines the DMA range limits */
>> -	if (list_empty(&s390_domain->devices)) {
>> -		domain->geometry.aperture_start = zdev->start_dma;
>> -		domain->geometry.aperture_end = zdev->end_dma;
>> -		domain->geometry.force_aperture = true;
>> -	/* Allow only devices with identical DMA range limits */
>> -	} else if (domain->geometry.aperture_start != zdev->start_dma ||
>> -		   domain->geometry.aperture_end != zdev->end_dma) {
>> -		rc = -EINVAL;
>> -		spin_unlock_irqrestore(&s390_domain->list_lock, flags);
>> -		goto out_restore;
>> -	}
>>  	domain_device->zdev = zdev;
>> -	zdev->s390_domain = s390_domain;
>>  	list_add(&domain_device->list, &s390_domain->devices);
>>  	spin_unlock_irqrestore(&s390_domain->list_lock, flags);
> 
> Stupid question: but how is this not racy when the spinlock is
> released between doing something that depends on an empty list and
> actually adding to the list later, after the lock had been released?
> 

Oh, that's a good point...  This was re-arranged to simplify error backout cases, but theoretically yeah there could be 2 competing threads with different apertures that would both list_add here when really first-in should win and the 2nd should be rejected.  That's no good.  In practice this wouldn't happen today because of the device<->domain relationship we have in s390, but we still need to avoid introducing such an issue.

We should probably just go back to doing the aperture check at the later point at the same time we list_add and live with the more complicated backout path -- I'll have a closer look but either way there will be a v3 that changes this, thanks.

>> +		mutex_lock(&zdev->dma_domain_lock);
>> +		dev_iommu_priv_set(dev, NULL);
>> +		mutex_unlock(&zdev->dma_domain_lock);
>> +		/* Make sure this device is removed from the domain list */
>>  		domain = iommu_get_domain_for_dev(dev);
>>  		if (domain)
>>  			s390_iommu_detach_device(domain, dev);
>> +		/* Now ensure DMA is initialized from here */
>> +		mutex_lock(&zdev->dma_domain_lock);
>> +		if (zdev->s390_domain) {
>> +			zdev->s390_domain = NULL;
>> +			zpci_unregister_ioat(zdev, 0);
>> +			zpci_dma_init_device(zdev);
>> +		}
>> +		mutex_unlock(&zdev->dma_domain_lock);
> 
> Looking at the patch and this code it is also anything but obvious
> which _data_ is actually protected by the mutex. Anyway.. just some
> stupid comments while briefly looking at the patch :)

Fair; largely, it's purpose is to protect the setting of zdev->s390_domain (which indicates e.g. whether s390-iommu or s390 pci-dma is being used to access the device).  However we also use it to protect the updating/checking of the priv value (e.g. probe state of the device) as this is relevant to what change we make to s390_domain during an attach/detach and we want to make that decision based on a consistent 'probed' state.  Since there will already be a v3 I will try to add a comment to its definition in pci.h and/or maybe here in release_device.

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2022-08-29 13:39 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-08-26 19:47 [PATCH v2] iommu/s390: Fix race with release_device ops Matthew Rosato
2022-08-29  9:24 ` Niklas Schnelle
2022-08-29 11:03 ` Heiko Carstens
2022-08-29 13:38   ` Matthew Rosato

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).