All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/2] iommu: Remove iommu_sva_ops::mm_exit()
@ 2020-04-08 14:04 Jean-Philippe Brucker
  2020-04-08 14:04 ` [PATCH 1/2] uacce: Remove mm_exit() op Jean-Philippe Brucker
                   ` (3 more replies)
  0 siblings, 4 replies; 26+ messages in thread
From: Jean-Philippe Brucker @ 2020-04-08 14:04 UTC (permalink / raw)
  To: iommu, linux-accelerators
  Cc: Jean-Philippe Brucker, arnd, gregkh, jgg, zhangfei.gao

The IOMMU SVA API currently requires device drivers to implement an
mm_exit() callback, which stops device jobs that do DMA. This function
is called in the release() MMU notifier, when an address space that is
shared with a device exits.

It has been noted several time during discussions about SVA that
cancelling DMA jobs can be slow and complex, and doing it in the
release() notifier might cause synchronization issues (patch 2 has more
background). Device drivers must in any case call unbind() to remove
their bond, after stopping DMA from a more favorable context (release of
a file descriptor).

So after mm exits, rather than notifying device drivers, we can hold on
to the PASID until unbind(), ask IOMMU drivers to silently abort DMA and
Page Requests in the meantime. This change should relieve the mmput()
path.

Patch 1 removes the mm_exit() callback from the uacce module, and patch
2 removes it from the IOMMU API.

Jean-Philippe Brucker (2):
  uacce: Remove mm_exit() op
  iommu: Remove iommu_sva_ops::mm_exit()

 include/linux/iommu.h      |  30 -------
 include/linux/uacce.h      |  34 ++------
 drivers/iommu/iommu.c      |  11 ---
 drivers/misc/uacce/uacce.c | 171 +++++++++----------------------------
 4 files changed, 50 insertions(+), 196 deletions(-)

-- 
2.26.0

_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply	[flat|nested] 26+ messages in thread

* [PATCH 1/2] uacce: Remove mm_exit() op
  2020-04-08 14:04 [PATCH 0/2] iommu: Remove iommu_sva_ops::mm_exit() Jean-Philippe Brucker
@ 2020-04-08 14:04 ` Jean-Philippe Brucker
  2020-04-09  9:07   ` Zhangfei Gao
  2020-04-08 14:04 ` [PATCH 2/2] iommu: Remove iommu_sva_ops::mm_exit() Jean-Philippe Brucker
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 26+ messages in thread
From: Jean-Philippe Brucker @ 2020-04-08 14:04 UTC (permalink / raw)
  To: iommu, linux-accelerators
  Cc: Jean-Philippe Brucker, arnd, gregkh, jgg, zhangfei.gao

The mm_exit() op will be removed from the SVA API. When a process dies
and its mm goes away, the IOMMU driver won't notify device drivers
anymore. Drivers should expect to handle a lot more aborted DMA. On the
upside, it does greatly simplify the queue management.

The uacce_mm struct, that tracks all queues bound to an mm, was only
used by the mm_exit() callback. Remove it.

Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
---
 include/linux/uacce.h      |  34 ++------
 drivers/misc/uacce/uacce.c | 171 +++++++++----------------------------
 2 files changed, 50 insertions(+), 155 deletions(-)

diff --git a/include/linux/uacce.h b/include/linux/uacce.h
index 0e215e6d0534a..454c2f6672d79 100644
--- a/include/linux/uacce.h
+++ b/include/linux/uacce.h
@@ -68,19 +68,21 @@ enum uacce_q_state {
  * @uacce: pointer to uacce
  * @priv: private pointer
  * @wait: wait queue head
- * @list: index into uacce_mm
- * @uacce_mm: the corresponding mm
+ * @list: index into uacce queues list
  * @qfrs: pointer of qfr regions
  * @state: queue state machine
+ * @pasid: pasid associated to the mm
+ * @handle: iommu_sva handle returned by iommu_sva_bind_device()
  */
 struct uacce_queue {
 	struct uacce_device *uacce;
 	void *priv;
 	wait_queue_head_t wait;
 	struct list_head list;
-	struct uacce_mm *uacce_mm;
 	struct uacce_qfile_region *qfrs[UACCE_MAX_REGION];
 	enum uacce_q_state state;
+	int pasid;
+	struct iommu_sva *handle;
 };
 
 /**
@@ -96,8 +98,8 @@ struct uacce_queue {
  * @cdev: cdev of the uacce
  * @dev: dev of the uacce
  * @priv: private pointer of the uacce
- * @mm_list: list head of uacce_mm->list
- * @mm_lock: lock for mm_list
+ * @queues: list of queues
+ * @queues_lock: lock for queues list
  * @inode: core vfs
  */
 struct uacce_device {
@@ -112,27 +114,9 @@ struct uacce_device {
 	struct cdev *cdev;
 	struct device dev;
 	void *priv;
-	struct list_head mm_list;
-	struct mutex mm_lock;
-	struct inode *inode;
-};
-
-/**
- * struct uacce_mm - keep track of queues bound to a process
- * @list: index into uacce_device
- * @queues: list of queues
- * @mm: the mm struct
- * @lock: protects the list of queues
- * @pasid: pasid of the uacce_mm
- * @handle: iommu_sva handle return from iommu_sva_bind_device
- */
-struct uacce_mm {
-	struct list_head list;
 	struct list_head queues;
-	struct mm_struct *mm;
-	struct mutex lock;
-	int pasid;
-	struct iommu_sva *handle;
+	struct mutex queues_lock;
+	struct inode *inode;
 };
 
 #if IS_ENABLED(CONFIG_UACCE)
diff --git a/drivers/misc/uacce/uacce.c b/drivers/misc/uacce/uacce.c
index d39307f060bd6..0598dbf1499ad 100644
--- a/drivers/misc/uacce/uacce.c
+++ b/drivers/misc/uacce/uacce.c
@@ -90,109 +90,38 @@ static long uacce_fops_compat_ioctl(struct file *filep,
 }
 #endif
 
-static int uacce_sva_exit(struct device *dev, struct iommu_sva *handle,
-			  void *data)
+static int uacce_bind_queue(struct uacce_device *uacce, struct uacce_queue *q)
 {
-	struct uacce_mm *uacce_mm = data;
-	struct uacce_queue *q;
-
-	/*
-	 * No new queue can be added concurrently because no caller can have a
-	 * reference to this mm. But there may be concurrent calls to
-	 * uacce_mm_put(), so we need the lock.
-	 */
-	mutex_lock(&uacce_mm->lock);
-	list_for_each_entry(q, &uacce_mm->queues, list)
-		uacce_put_queue(q);
-	uacce_mm->mm = NULL;
-	mutex_unlock(&uacce_mm->lock);
+	int pasid;
+	struct iommu_sva *handle;
 
-	return 0;
-}
-
-static struct iommu_sva_ops uacce_sva_ops = {
-	.mm_exit = uacce_sva_exit,
-};
-
-static struct uacce_mm *uacce_mm_get(struct uacce_device *uacce,
-				     struct uacce_queue *q,
-				     struct mm_struct *mm)
-{
-	struct uacce_mm *uacce_mm = NULL;
-	struct iommu_sva *handle = NULL;
-	int ret;
-
-	lockdep_assert_held(&uacce->mm_lock);
-
-	list_for_each_entry(uacce_mm, &uacce->mm_list, list) {
-		if (uacce_mm->mm == mm) {
-			mutex_lock(&uacce_mm->lock);
-			list_add(&q->list, &uacce_mm->queues);
-			mutex_unlock(&uacce_mm->lock);
-			return uacce_mm;
-		}
-	}
-
-	uacce_mm = kzalloc(sizeof(*uacce_mm), GFP_KERNEL);
-	if (!uacce_mm)
-		return NULL;
+	if (!(uacce->flags & UACCE_DEV_SVA))
+		return 0;
 
-	if (uacce->flags & UACCE_DEV_SVA) {
-		/*
-		 * Safe to pass an incomplete uacce_mm, since mm_exit cannot
-		 * fire while we hold a reference to the mm.
-		 */
-		handle = iommu_sva_bind_device(uacce->parent, mm, uacce_mm);
-		if (IS_ERR(handle))
-			goto err_free;
+	handle = iommu_sva_bind_device(uacce->parent, current->mm, NULL);
+	if (IS_ERR(handle))
+		return PTR_ERR(handle);
 
-		ret = iommu_sva_set_ops(handle, &uacce_sva_ops);
-		if (ret)
-			goto err_unbind;
-
-		uacce_mm->pasid = iommu_sva_get_pasid(handle);
-		if (uacce_mm->pasid == IOMMU_PASID_INVALID)
-			goto err_unbind;
+	pasid = iommu_sva_get_pasid(handle);
+	if (pasid == IOMMU_PASID_INVALID) {
+		iommu_sva_unbind_device(handle);
+		return -ENODEV;
 	}
 
-	uacce_mm->mm = mm;
-	uacce_mm->handle = handle;
-	INIT_LIST_HEAD(&uacce_mm->queues);
-	mutex_init(&uacce_mm->lock);
-	list_add(&q->list, &uacce_mm->queues);
-	list_add(&uacce_mm->list, &uacce->mm_list);
-
-	return uacce_mm;
-
-err_unbind:
-	if (handle)
-		iommu_sva_unbind_device(handle);
-err_free:
-	kfree(uacce_mm);
-	return NULL;
+	q->handle = handle;
+	q->pasid = pasid;
+	return 0;
 }
 
-static void uacce_mm_put(struct uacce_queue *q)
+static void uacce_unbind_queue(struct uacce_queue *q)
 {
-	struct uacce_mm *uacce_mm = q->uacce_mm;
-
-	lockdep_assert_held(&q->uacce->mm_lock);
-
-	mutex_lock(&uacce_mm->lock);
-	list_del(&q->list);
-	mutex_unlock(&uacce_mm->lock);
-
-	if (list_empty(&uacce_mm->queues)) {
-		if (uacce_mm->handle)
-			iommu_sva_unbind_device(uacce_mm->handle);
-		list_del(&uacce_mm->list);
-		kfree(uacce_mm);
-	}
+	if (!q->handle)
+		return;
+	iommu_sva_unbind_device(q->handle);
 }
 
 static int uacce_fops_open(struct inode *inode, struct file *filep)
 {
-	struct uacce_mm *uacce_mm = NULL;
 	struct uacce_device *uacce;
 	struct uacce_queue *q;
 	int ret = 0;
@@ -205,21 +134,16 @@ static int uacce_fops_open(struct inode *inode, struct file *filep)
 	if (!q)
 		return -ENOMEM;
 
-	mutex_lock(&uacce->mm_lock);
-	uacce_mm = uacce_mm_get(uacce, q, current->mm);
-	mutex_unlock(&uacce->mm_lock);
-	if (!uacce_mm) {
-		ret = -ENOMEM;
+	ret = uacce_bind_queue(uacce, q);
+	if (ret)
 		goto out_with_mem;
-	}
 
 	q->uacce = uacce;
-	q->uacce_mm = uacce_mm;
 
 	if (uacce->ops->get_queue) {
-		ret = uacce->ops->get_queue(uacce, uacce_mm->pasid, q);
+		ret = uacce->ops->get_queue(uacce, q->pasid, q);
 		if (ret < 0)
-			goto out_with_mm;
+			goto out_with_bond;
 	}
 
 	init_waitqueue_head(&q->wait);
@@ -227,12 +151,14 @@ static int uacce_fops_open(struct inode *inode, struct file *filep)
 	uacce->inode = inode;
 	q->state = UACCE_Q_INIT;
 
+	mutex_lock(&uacce->queues_lock);
+	list_add(&q->list, &uacce->queues);
+	mutex_unlock(&uacce->queues_lock);
+
 	return 0;
 
-out_with_mm:
-	mutex_lock(&uacce->mm_lock);
-	uacce_mm_put(q);
-	mutex_unlock(&uacce->mm_lock);
+out_with_bond:
+	uacce_unbind_queue(q);
 out_with_mem:
 	kfree(q);
 	return ret;
@@ -241,14 +167,12 @@ static int uacce_fops_open(struct inode *inode, struct file *filep)
 static int uacce_fops_release(struct inode *inode, struct file *filep)
 {
 	struct uacce_queue *q = filep->private_data;
-	struct uacce_device *uacce = q->uacce;
 
+	mutex_lock(&q->uacce->queues_lock);
+	list_del(&q->list);
+	mutex_unlock(&q->uacce->queues_lock);
 	uacce_put_queue(q);
-
-	mutex_lock(&uacce->mm_lock);
-	uacce_mm_put(q);
-	mutex_unlock(&uacce->mm_lock);
-
+	uacce_unbind_queue(q);
 	kfree(q);
 
 	return 0;
@@ -513,8 +437,8 @@ struct uacce_device *uacce_alloc(struct device *parent,
 	if (ret < 0)
 		goto err_with_uacce;
 
-	INIT_LIST_HEAD(&uacce->mm_list);
-	mutex_init(&uacce->mm_lock);
+	INIT_LIST_HEAD(&uacce->queues);
+	mutex_init(&uacce->queues_lock);
 	device_initialize(&uacce->dev);
 	uacce->dev.devt = MKDEV(MAJOR(uacce_devt), uacce->dev_id);
 	uacce->dev.class = uacce_class;
@@ -561,8 +485,7 @@ EXPORT_SYMBOL_GPL(uacce_register);
  */
 void uacce_remove(struct uacce_device *uacce)
 {
-	struct uacce_mm *uacce_mm;
-	struct uacce_queue *q;
+	struct uacce_queue *q, *next_q;
 
 	if (!uacce)
 		return;
@@ -574,24 +497,12 @@ void uacce_remove(struct uacce_device *uacce)
 		unmap_mapping_range(uacce->inode->i_mapping, 0, 0, 1);
 
 	/* ensure no open queue remains */
-	mutex_lock(&uacce->mm_lock);
-	list_for_each_entry(uacce_mm, &uacce->mm_list, list) {
-		/*
-		 * We don't take the uacce_mm->lock here. Since we hold the
-		 * device's mm_lock, no queue can be added to or removed from
-		 * this uacce_mm. We may run concurrently with mm_exit, but
-		 * uacce_put_queue() is serialized and iommu_sva_unbind_device()
-		 * waits for the lock that mm_exit is holding.
-		 */
-		list_for_each_entry(q, &uacce_mm->queues, list)
-			uacce_put_queue(q);
-
-		if (uacce->flags & UACCE_DEV_SVA) {
-			iommu_sva_unbind_device(uacce_mm->handle);
-			uacce_mm->handle = NULL;
-		}
+	mutex_lock(&uacce->queues_lock);
+	list_for_each_entry_safe(q, next_q, &uacce->queues, list) {
+		uacce_put_queue(q);
+		uacce_unbind_queue(q);
 	}
-	mutex_unlock(&uacce->mm_lock);
+	mutex_unlock(&uacce->queues_lock);
 
 	/* disable sva now since no opened queues */
 	if (uacce->flags & UACCE_DEV_SVA)
-- 
2.26.0

_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 2/2] iommu: Remove iommu_sva_ops::mm_exit()
  2020-04-08 14:04 [PATCH 0/2] iommu: Remove iommu_sva_ops::mm_exit() Jean-Philippe Brucker
  2020-04-08 14:04 ` [PATCH 1/2] uacce: Remove mm_exit() op Jean-Philippe Brucker
@ 2020-04-08 14:04 ` Jean-Philippe Brucker
  2020-04-08 18:35 ` [PATCH 0/2] " Jacob Pan
  2020-04-08 19:04 ` Jason Gunthorpe
  3 siblings, 0 replies; 26+ messages in thread
From: Jean-Philippe Brucker @ 2020-04-08 14:04 UTC (permalink / raw)
  To: iommu, linux-accelerators
  Cc: Jean-Philippe Brucker, arnd, gregkh, jgg, zhangfei.gao

After binding a device to an mm, device drivers currently need to
register a mm_exit handler. This function is called when the mm exits,
to gracefully stop DMA targeting the address space and flush page faults
to the IOMMU.

This is deemed too complex for the MMU release() notifier, which may be
triggered by any mmput() invocation, from about 120 callsites [1]. The
upcoming SVA module has an example of such complexity: the I/O Page
Fault handler would need to call mmput_async() instead of mmput() after
handling an IOPF, to avoid triggering the release() notifier which would
in turn drain the IOPF queue and lock up.

Another concern is the DMA stop function taking too long, up to several
minutes [2]. For some mmput() callers this may disturb other users. For
example, if the OOM killer picks the mm bound to a device as the victim
and that mm's memory is locked, if the release() takes too long, it
might choose additional innocent victims to kill.

To simplify the MMU release notifier, don't forward the notification to
device drivers. They don't need to stop DMA and drain page faults
anymore in the release() path. IOMMU drivers still remove the pgd and
invalidate IOTLBs, but they don't need to drain the IOPF queues anymore.
The PASID isn't freed as soon as the mm exits, it is held until the
device driver stops DMA and calls unbind.

Similarly to accessing invalid mappings:
* Incoming DMA transactions are aborted but not reported.
* ATS Translation Requests return Successful Translation Completions
  with R=W=0.
* PRI Page Requests return with Invalid Request.

For now remove iommu_sva_ops entirerly. We might need to re-introduce
them at some point, for example to notify device drivers of unhandled
IOPF.

[1] https://lore.kernel.org/linux-iommu/20200306174239.GM31668@ziepe.ca/
[2] https://lore.kernel.org/linux-iommu/4d68da96-0ad5-b412-5987-2f7a6aa796c3@amd.com/

Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
---
 include/linux/iommu.h | 30 ------------------------------
 drivers/iommu/iommu.c | 11 -----------
 2 files changed, 41 deletions(-)

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 7ef8b0bda6951..bd330d6343b78 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -53,8 +53,6 @@ struct iommu_fault_event;
 
 typedef int (*iommu_fault_handler_t)(struct iommu_domain *,
 			struct device *, unsigned long, int, void *);
-typedef int (*iommu_mm_exit_handler_t)(struct device *dev, struct iommu_sva *,
-				       void *);
 typedef int (*iommu_dev_fault_handler_t)(struct iommu_fault *, void *);
 
 struct iommu_domain_geometry {
@@ -171,25 +169,6 @@ enum iommu_dev_features {
 
 #define IOMMU_PASID_INVALID	(-1U)
 
-/**
- * struct iommu_sva_ops - device driver callbacks for an SVA context
- *
- * @mm_exit: called when the mm is about to be torn down by exit_mmap. After
- *           @mm_exit returns, the device must not issue any more transaction
- *           with the PASID given as argument.
- *
- *           The @mm_exit handler is allowed to sleep. Be careful about the
- *           locks taken in @mm_exit, because they might lead to deadlocks if
- *           they are also held when dropping references to the mm. Consider the
- *           following call chain:
- *           mutex_lock(A); mmput(mm) -> exit_mm() -> @mm_exit() -> mutex_lock(A)
- *           Using mmput_async() prevents this scenario.
- *
- */
-struct iommu_sva_ops {
-	iommu_mm_exit_handler_t mm_exit;
-};
-
 #ifdef CONFIG_IOMMU_API
 
 /**
@@ -605,7 +584,6 @@ struct iommu_fwspec {
  */
 struct iommu_sva {
 	struct device			*dev;
-	const struct iommu_sva_ops	*ops;
 };
 
 int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode,
@@ -653,8 +631,6 @@ struct iommu_sva *iommu_sva_bind_device(struct device *dev,
 					struct mm_struct *mm,
 					void *drvdata);
 void iommu_sva_unbind_device(struct iommu_sva *handle);
-int iommu_sva_set_ops(struct iommu_sva *handle,
-		      const struct iommu_sva_ops *ops);
 int iommu_sva_get_pasid(struct iommu_sva *handle);
 
 #else /* CONFIG_IOMMU_API */
@@ -1058,12 +1034,6 @@ static inline void iommu_sva_unbind_device(struct iommu_sva *handle)
 {
 }
 
-static inline int iommu_sva_set_ops(struct iommu_sva *handle,
-				    const struct iommu_sva_ops *ops)
-{
-	return -EINVAL;
-}
-
 static inline int iommu_sva_get_pasid(struct iommu_sva *handle)
 {
 	return IOMMU_PASID_INVALID;
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 2b471419e26c3..dfed12328c032 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -2637,17 +2637,6 @@ void iommu_sva_unbind_device(struct iommu_sva *handle)
 }
 EXPORT_SYMBOL_GPL(iommu_sva_unbind_device);
 
-int iommu_sva_set_ops(struct iommu_sva *handle,
-		      const struct iommu_sva_ops *sva_ops)
-{
-	if (handle->ops && handle->ops != sva_ops)
-		return -EEXIST;
-
-	handle->ops = sva_ops;
-	return 0;
-}
-EXPORT_SYMBOL_GPL(iommu_sva_set_ops);
-
 int iommu_sva_get_pasid(struct iommu_sva *handle)
 {
 	const struct iommu_ops *ops = handle->dev->bus->iommu_ops;
-- 
2.26.0

_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* Re: [PATCH 0/2] iommu: Remove iommu_sva_ops::mm_exit()
  2020-04-08 14:04 [PATCH 0/2] iommu: Remove iommu_sva_ops::mm_exit() Jean-Philippe Brucker
  2020-04-08 14:04 ` [PATCH 1/2] uacce: Remove mm_exit() op Jean-Philippe Brucker
  2020-04-08 14:04 ` [PATCH 2/2] iommu: Remove iommu_sva_ops::mm_exit() Jean-Philippe Brucker
@ 2020-04-08 18:35 ` Jacob Pan
  2020-04-08 19:02   ` Jason Gunthorpe
  2020-04-08 19:04 ` Jason Gunthorpe
  3 siblings, 1 reply; 26+ messages in thread
From: Jacob Pan @ 2020-04-08 18:35 UTC (permalink / raw)
  To: Jean-Philippe Brucker
  Cc: arnd, gregkh, iommu, jgg, zhangfei.gao, linux-accelerators

Hi Jean,

On Wed,  8 Apr 2020 16:04:25 +0200
Jean-Philippe Brucker <jean-philippe@linaro.org> wrote:

> The IOMMU SVA API currently requires device drivers to implement an
> mm_exit() callback, which stops device jobs that do DMA. This function
> is called in the release() MMU notifier, when an address space that is
> shared with a device exits.
> 
> It has been noted several time during discussions about SVA that
> cancelling DMA jobs can be slow and complex, and doing it in the
> release() notifier might cause synchronization issues (patch 2 has
> more background). Device drivers must in any case call unbind() to
> remove their bond, after stopping DMA from a more favorable context
> (release of a file descriptor).
> 
> So after mm exits, rather than notifying device drivers, we can hold
> on to the PASID until unbind(), ask IOMMU drivers to silently abort
> DMA and Page Requests in the meantime. This change should relieve the
> mmput() path.
> 
I assume mm is destroyed after all the FDs are closed, so if uacce
tied unbind to FD release then unbind already happened before mm is
destroyed. What is there to worry about?

In VT-d, because of enqcmd and lazy PASID free we plan to hold on to the
PASID until mmdrop.
https://lore.kernel.org/patchwork/patch/1217762/

> Patch 1 removes the mm_exit() callback from the uacce module, and
> patch 2 removes it from the IOMMU API.
> 
> Jean-Philippe Brucker (2):
>   uacce: Remove mm_exit() op
>   iommu: Remove iommu_sva_ops::mm_exit()
> 
>  include/linux/iommu.h      |  30 -------
>  include/linux/uacce.h      |  34 ++------
>  drivers/iommu/iommu.c      |  11 ---
>  drivers/misc/uacce/uacce.c | 171
> +++++++++---------------------------- 4 files changed, 50
> insertions(+), 196 deletions(-)
> 

[Jacob Pan]
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 0/2] iommu: Remove iommu_sva_ops::mm_exit()
  2020-04-08 18:35 ` [PATCH 0/2] " Jacob Pan
@ 2020-04-08 19:02   ` Jason Gunthorpe
  2020-04-08 21:35     ` Jacob Pan
  0 siblings, 1 reply; 26+ messages in thread
From: Jason Gunthorpe @ 2020-04-08 19:02 UTC (permalink / raw)
  To: Jacob Pan
  Cc: Jean-Philippe Brucker, arnd, gregkh, iommu, zhangfei.gao,
	linux-accelerators

On Wed, Apr 08, 2020 at 11:35:52AM -0700, Jacob Pan wrote:
> Hi Jean,
> 
> On Wed,  8 Apr 2020 16:04:25 +0200
> Jean-Philippe Brucker <jean-philippe@linaro.org> wrote:
> 
> > The IOMMU SVA API currently requires device drivers to implement an
> > mm_exit() callback, which stops device jobs that do DMA. This function
> > is called in the release() MMU notifier, when an address space that is
> > shared with a device exits.
> > 
> > It has been noted several time during discussions about SVA that
> > cancelling DMA jobs can be slow and complex, and doing it in the
> > release() notifier might cause synchronization issues (patch 2 has
> > more background). Device drivers must in any case call unbind() to
> > remove their bond, after stopping DMA from a more favorable context
> > (release of a file descriptor).
> > 
> > So after mm exits, rather than notifying device drivers, we can hold
> > on to the PASID until unbind(), ask IOMMU drivers to silently abort
> > DMA and Page Requests in the meantime. This change should relieve the
> > mmput() path.
>
> I assume mm is destroyed after all the FDs are closed

FDs do not hold a mmget(), but they may hold a mmgrab(), ie anything
using mmu_notifiers has to hold a grab until the notifier is
destroyed, which is often triggered by FD close.

So the exit_mmap() -> release() may happen before the FDs are
destroyed, but the final mmdrop() will be during some FD release when
the final mmdrop() happens.

But, in all the drivers I've looked at the PASID and the mmu_notifier
must have identical lifetimes.

> In VT-d, because of enqcmd and lazy PASID free we plan to hold on to the
> PASID until mmdrop.
> https://lore.kernel.org/patchwork/patch/1217762/

Why? The bind already gets a mmu_notifier which has refcounts and the
right lifetime for PASID.. This code could already be simplified by
using the mmu_notifier_get()/put() stuff.

A reason to store the PASID in the mm_struct would be if some code
needs fast access to it, but then I'm not sure how that works with
SVM_FLAG_PRIVATE_PASID ..

Jason
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 0/2] iommu: Remove iommu_sva_ops::mm_exit()
  2020-04-08 14:04 [PATCH 0/2] iommu: Remove iommu_sva_ops::mm_exit() Jean-Philippe Brucker
                   ` (2 preceding siblings ...)
  2020-04-08 18:35 ` [PATCH 0/2] " Jacob Pan
@ 2020-04-08 19:04 ` Jason Gunthorpe
  3 siblings, 0 replies; 26+ messages in thread
From: Jason Gunthorpe @ 2020-04-08 19:04 UTC (permalink / raw)
  To: Jean-Philippe Brucker
  Cc: arnd, gregkh, iommu, zhangfei.gao, linux-accelerators

On Wed, Apr 08, 2020 at 04:04:25PM +0200, Jean-Philippe Brucker wrote:
> The IOMMU SVA API currently requires device drivers to implement an
> mm_exit() callback, which stops device jobs that do DMA. This function
> is called in the release() MMU notifier, when an address space that is
> shared with a device exits.
> 
> It has been noted several time during discussions about SVA that
> cancelling DMA jobs can be slow and complex, and doing it in the
> release() notifier might cause synchronization issues (patch 2 has more
> background). Device drivers must in any case call unbind() to remove
> their bond, after stopping DMA from a more favorable context (release of
> a file descriptor).
> 
> So after mm exits, rather than notifying device drivers, we can hold on
> to the PASID until unbind(), ask IOMMU drivers to silently abort DMA and
> Page Requests in the meantime. This change should relieve the mmput()
> path.

At least all the patch comments look like they are on the right track
to me, thanks for doing this.

Jason
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 0/2] iommu: Remove iommu_sva_ops::mm_exit()
  2020-04-08 19:02   ` Jason Gunthorpe
@ 2020-04-08 21:35     ` Jacob Pan
  2020-04-08 22:32       ` Jason Gunthorpe
  0 siblings, 1 reply; 26+ messages in thread
From: Jacob Pan @ 2020-04-08 21:35 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: Jean-Philippe Brucker, arnd, Yu, Fenghua, gregkh, iommu,
	zhangfei.gao, linux-accelerators

Hi Jason,

Thanks for the explanation, more comments/questions inline.

On Wed, 8 Apr 2020 16:02:26 -0300
Jason Gunthorpe <jgg@ziepe.ca> wrote:

> On Wed, Apr 08, 2020 at 11:35:52AM -0700, Jacob Pan wrote:
> > Hi Jean,
> > 
> > On Wed,  8 Apr 2020 16:04:25 +0200
> > Jean-Philippe Brucker <jean-philippe@linaro.org> wrote:
> >   
> > > The IOMMU SVA API currently requires device drivers to implement
> > > an mm_exit() callback, which stops device jobs that do DMA. This
> > > function is called in the release() MMU notifier, when an address
> > > space that is shared with a device exits.
> > > 
> > > It has been noted several time during discussions about SVA that
> > > cancelling DMA jobs can be slow and complex, and doing it in the
> > > release() notifier might cause synchronization issues (patch 2 has
> > > more background). Device drivers must in any case call unbind() to
> > > remove their bond, after stopping DMA from a more favorable
> > > context (release of a file descriptor).
> > > 
> > > So after mm exits, rather than notifying device drivers, we can
> > > hold on to the PASID until unbind(), ask IOMMU drivers to
> > > silently abort DMA and Page Requests in the meantime. This change
> > > should relieve the mmput() path.  
> >
> > I assume mm is destroyed after all the FDs are closed  
> 
> FDs do not hold a mmget(), but they may hold a mmgrab(), ie anything
> using mmu_notifiers has to hold a grab until the notifier is
> destroyed, which is often triggered by FD close.
> 
Sorry, I don't get this. Are you saying we have to hold a mmgrab()
between svm_bind/mmu_notifier_register and
svm_unbind/mmu_notifier_unregister?
Isn't the idea of mmu_notifier is to avoid holding the mm reference and
rely on the notifier to tell us when mm is going away?
It seems both Intel and AMD iommu drivers don't hold mmgrab after
mmu_notifier_register.

> So the exit_mmap() -> release() may happen before the FDs are
> destroyed, but the final mmdrop() will be during some FD release when
> the final mmdrop() happens.
> 
Do you mean mmdrop() is after FD release? If so, unbind is called in FD
release should take care of everything, i.e. stops DMA, clear PASID
context on IOMMU, flush PRS queue etc.

Enforcing unbind upon FD close might be a precarious path, perhaps that
is why we have to deal with out of order situation?

> But, in all the drivers I've looked at the PASID and the mmu_notifier
> must have identical lifetimes.
> 
> > In VT-d, because of enqcmd and lazy PASID free we plan to hold on
> > to the PASID until mmdrop.
> > https://lore.kernel.org/patchwork/patch/1217762/  
> 
> Why? The bind already gets a mmu_notifier which has refcounts and the
> right lifetime for PASID.. This code could already be simplified by
> using the mmu_notifier_get()/put() stuff.
> 
Yes, I guess mmu_notifier_get()/put() is new :)
+Fenghua

> A reason to store the PASID in the mm_struct would be if some code
> needs fast access to it, but then I'm not sure how that works with
> SVM_FLAG_PRIVATE_PASID ..
> 
We plan to remove this flag.

> Jason

[Jacob Pan]
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 0/2] iommu: Remove iommu_sva_ops::mm_exit()
  2020-04-08 21:35     ` Jacob Pan
@ 2020-04-08 22:32       ` Jason Gunthorpe
  2020-04-08 23:48         ` Jacob Pan
  2020-04-08 23:49         ` Fenghua Yu
  0 siblings, 2 replies; 26+ messages in thread
From: Jason Gunthorpe @ 2020-04-08 22:32 UTC (permalink / raw)
  To: Jacob Pan
  Cc: Jean-Philippe Brucker, arnd, Yu, Fenghua, gregkh, iommu,
	zhangfei.gao, linux-accelerators

On Wed, Apr 08, 2020 at 02:35:52PM -0700, Jacob Pan wrote:
> > On Wed, Apr 08, 2020 at 11:35:52AM -0700, Jacob Pan wrote:
> > > Hi Jean,
> > > 
> > > On Wed,  8 Apr 2020 16:04:25 +0200
> > > Jean-Philippe Brucker <jean-philippe@linaro.org> wrote:
> > >   
> > > > The IOMMU SVA API currently requires device drivers to implement
> > > > an mm_exit() callback, which stops device jobs that do DMA. This
> > > > function is called in the release() MMU notifier, when an address
> > > > space that is shared with a device exits.
> > > > 
> > > > It has been noted several time during discussions about SVA that
> > > > cancelling DMA jobs can be slow and complex, and doing it in the
> > > > release() notifier might cause synchronization issues (patch 2 has
> > > > more background). Device drivers must in any case call unbind() to
> > > > remove their bond, after stopping DMA from a more favorable
> > > > context (release of a file descriptor).
> > > > 
> > > > So after mm exits, rather than notifying device drivers, we can
> > > > hold on to the PASID until unbind(), ask IOMMU drivers to
> > > > silently abort DMA and Page Requests in the meantime. This change
> > > > should relieve the mmput() path.  
> > >
> > > I assume mm is destroyed after all the FDs are closed  
> > 
> > FDs do not hold a mmget(), but they may hold a mmgrab(), ie anything
> > using mmu_notifiers has to hold a grab until the notifier is
> > destroyed, which is often triggered by FD close.
> > 
> Sorry, I don't get this. Are you saying we have to hold a mmgrab()
> between svm_bind/mmu_notifier_register and
> svm_unbind/mmu_notifier_unregister?

Yes. This is done automatically for the caller inside the mmu_notifier
implementation. We now even store the mm_struct pointer inside the
notifier.

Once a notifier is registered the mm_struct remains valid memory until
the notifier is unregistered.

> Isn't the idea of mmu_notifier is to avoid holding the mm reference and
> rely on the notifier to tell us when mm is going away?

The notifier only holds a mmgrab(), not a mmget() - this allows
exit_mmap to proceed, but the mm_struct memory remains.

This is also probably why it is a bad idea to tie the lifetime of
something like a pasid to the mmdrop as a evil user could cause a
large number of mm structs to be released but not freed, probably
defeating cgroup limits and so forth (not sure)

> It seems both Intel and AMD iommu drivers don't hold mmgrab after
> mmu_notifier_register.

It is done internally to the implementation.

> > So the exit_mmap() -> release() may happen before the FDs are
> > destroyed, but the final mmdrop() will be during some FD release when
> > the final mmdrop() happens.
> 
> Do you mean mmdrop() is after FD release? 

Yes, it will be done by the mmu_notifier_unregister(), which should be
called during FD release if the iommu lifetime is linked to some FD.

> If so, unbind is called in FD release should take care of
> everything, i.e. stops DMA, clear PASID context on IOMMU, flush PRS
> queue etc.

Yes, this is the proper way, when the DMA is stopped and no use of the
PASID remains then you can drop the mmu notifier and release the PASID
entirely. If that is linked to the lifetime of the FD then forget
completely about the mm_struct lifetime, it doesn't matter..

> Enforcing unbind upon FD close might be a precarious path, perhaps
> that is why we have to deal with out of order situation?

How so? You just put it in the FD release function :)

> > > In VT-d, because of enqcmd and lazy PASID free we plan to hold on
> > > to the PASID until mmdrop.
> > > https://lore.kernel.org/patchwork/patch/1217762/  
> > 
> > Why? The bind already gets a mmu_notifier which has refcounts and the
> > right lifetime for PASID.. This code could already be simplified by
> > using the mmu_notifier_get()/put() stuff.
> > 
> Yes, I guess mmu_notifier_get()/put() is new :)
> +Fenghua

I was going to convert the intel code when I did many other drivers,
but it was a bit too complex..

But the approach is straightforward. Get rid of the mm search list and
use mmu_notifier_get(). This returns a singlton notifier for the
mm_struct and handles refcounting/etc

Use mmu_notifier_put() during a unbind, it will callback to
free_notifier() to do the final frees (ie this is where the pasid
should go away)

For the SVM_FLAG_PRIVATE_PASID continue to use mmu_notifier_register,
however this can now be mixed with mmu_notifier_put() so the cleanup
is the same. A separate ops static struct is needed to create a unique
key though

Jason
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 0/2] iommu: Remove iommu_sva_ops::mm_exit()
  2020-04-08 22:32       ` Jason Gunthorpe
@ 2020-04-08 23:48         ` Jacob Pan
  2020-04-09  6:39           ` Jean-Philippe Brucker
  2020-04-09 12:08           ` Jason Gunthorpe
  2020-04-08 23:49         ` Fenghua Yu
  1 sibling, 2 replies; 26+ messages in thread
From: Jacob Pan @ 2020-04-08 23:48 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: Jean-Philippe Brucker, arnd, Yu, Fenghua, gregkh, iommu,
	zhangfei.gao, linux-accelerators

On Wed, 8 Apr 2020 19:32:18 -0300
Jason Gunthorpe <jgg@ziepe.ca> wrote:

> On Wed, Apr 08, 2020 at 02:35:52PM -0700, Jacob Pan wrote:
> > > On Wed, Apr 08, 2020 at 11:35:52AM -0700, Jacob Pan wrote:  
> > > > Hi Jean,
> > > > 
> > > > On Wed,  8 Apr 2020 16:04:25 +0200
> > > > Jean-Philippe Brucker <jean-philippe@linaro.org> wrote:
> > > >     
> > > > > The IOMMU SVA API currently requires device drivers to
> > > > > implement an mm_exit() callback, which stops device jobs that
> > > > > do DMA. This function is called in the release() MMU
> > > > > notifier, when an address space that is shared with a device
> > > > > exits.
> > > > > 
> > > > > It has been noted several time during discussions about SVA
> > > > > that cancelling DMA jobs can be slow and complex, and doing
> > > > > it in the release() notifier might cause synchronization
> > > > > issues (patch 2 has more background). Device drivers must in
> > > > > any case call unbind() to remove their bond, after stopping
> > > > > DMA from a more favorable context (release of a file
> > > > > descriptor).
> > > > > 
> > > > > So after mm exits, rather than notifying device drivers, we
> > > > > can hold on to the PASID until unbind(), ask IOMMU drivers to
> > > > > silently abort DMA and Page Requests in the meantime. This
> > > > > change should relieve the mmput() path.    
> > > >
> > > > I assume mm is destroyed after all the FDs are closed    
> > > 
> > > FDs do not hold a mmget(), but they may hold a mmgrab(), ie
> > > anything using mmu_notifiers has to hold a grab until the
> > > notifier is destroyed, which is often triggered by FD close.
> > >   
> > Sorry, I don't get this. Are you saying we have to hold a mmgrab()
> > between svm_bind/mmu_notifier_register and
> > svm_unbind/mmu_notifier_unregister?  
> 
> Yes. This is done automatically for the caller inside the mmu_notifier
> implementation. We now even store the mm_struct pointer inside the
> notifier.
> 
> Once a notifier is registered the mm_struct remains valid memory until
> the notifier is unregistered.
> 
> > Isn't the idea of mmu_notifier is to avoid holding the mm reference
> > and rely on the notifier to tell us when mm is going away?  
> 
> The notifier only holds a mmgrab(), not a mmget() - this allows
> exit_mmap to proceed, but the mm_struct memory remains.
> 
> This is also probably why it is a bad idea to tie the lifetime of
> something like a pasid to the mmdrop as a evil user could cause a
> large number of mm structs to be released but not freed, probably
> defeating cgroup limits and so forth (not sure)
> 
> > It seems both Intel and AMD iommu drivers don't hold mmgrab after
> > mmu_notifier_register.  
> 
> It is done internally to the implementation.
> 
> > > So the exit_mmap() -> release() may happen before the FDs are
> > > destroyed, but the final mmdrop() will be during some FD release
> > > when the final mmdrop() happens.  
> > 
> > Do you mean mmdrop() is after FD release?   
> 
> Yes, it will be done by the mmu_notifier_unregister(), which should be
> called during FD release if the iommu lifetime is linked to some FD.
> 
> > If so, unbind is called in FD release should take care of
> > everything, i.e. stops DMA, clear PASID context on IOMMU, flush PRS
> > queue etc.  
> 
> Yes, this is the proper way, when the DMA is stopped and no use of the
> PASID remains then you can drop the mmu notifier and release the PASID
> entirely. If that is linked to the lifetime of the FD then forget
> completely about the mm_struct lifetime, it doesn't matter..
> 
Got everything above, thanks a lot.

If everything is in order with the FD close. Why do we need to 
"ask IOMMU drivers to silently abort DMA and Page Requests in the
meantime." in mm_exit notifier? This will be done orderly in unbind
anyway.

> > Enforcing unbind upon FD close might be a precarious path, perhaps
> > that is why we have to deal with out of order situation?  
> 
> How so? You just put it in the FD release function :)
> 
I was thinking some driver may choose to defer unbind in some workqueue
etc.

> > > > In VT-d, because of enqcmd and lazy PASID free we plan to hold
> > > > on to the PASID until mmdrop.
> > > > https://lore.kernel.org/patchwork/patch/1217762/    
> > > 
> > > Why? The bind already gets a mmu_notifier which has refcounts and
> > > the right lifetime for PASID.. This code could already be
> > > simplified by using the mmu_notifier_get()/put() stuff.
> > >   
> > Yes, I guess mmu_notifier_get()/put() is new :)
> > +Fenghua  
> 
> I was going to convert the intel code when I did many other drivers,
> but it was a bit too complex..
> 
> But the approach is straightforward. Get rid of the mm search list and
> use mmu_notifier_get(). This returns a singlton notifier for the
> mm_struct and handles refcounting/etc
> 
> Use mmu_notifier_put() during a unbind, it will callback to
> free_notifier() to do the final frees (ie this is where the pasid
> should go away)
> 
> For the SVM_FLAG_PRIVATE_PASID continue to use mmu_notifier_register,
> however this can now be mixed with mmu_notifier_put() so the cleanup
> is the same. A separate ops static struct is needed to create a unique
> key though
> 
> Jason

[Jacob Pan]
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 0/2] iommu: Remove iommu_sva_ops::mm_exit()
  2020-04-08 22:32       ` Jason Gunthorpe
  2020-04-08 23:48         ` Jacob Pan
@ 2020-04-08 23:49         ` Fenghua Yu
  2020-04-09 12:12           ` Jason Gunthorpe
  1 sibling, 1 reply; 26+ messages in thread
From: Fenghua Yu @ 2020-04-08 23:49 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: Jean-Philippe Brucker, arnd, gregkh, iommu, zhangfei.gao,
	linux-accelerators

On Wed, Apr 08, 2020 at 07:32:18PM -0300, Jason Gunthorpe wrote:
> On Wed, Apr 08, 2020 at 02:35:52PM -0700, Jacob Pan wrote:
> > > On Wed, Apr 08, 2020 at 11:35:52AM -0700, Jacob Pan wrote:
> > > > Hi Jean,
> > > > 
> > > > On Wed,  8 Apr 2020 16:04:25 +0200
> > > > Jean-Philippe Brucker <jean-philippe@linaro.org> wrote:
> > > >   
> > > > > The IOMMU SVA API currently requires device drivers to implement
> > > > > an mm_exit() callback, which stops device jobs that do DMA. This
> > > > > function is called in the release() MMU notifier, when an address
> > > > > space that is shared with a device exits.
> > > > > 
> > > > > It has been noted several time during discussions about SVA that
> > > > > cancelling DMA jobs can be slow and complex, and doing it in the
> > > > > release() notifier might cause synchronization issues (patch 2 has
> > > > > more background). Device drivers must in any case call unbind() to
> > > > > remove their bond, after stopping DMA from a more favorable
> > > > > context (release of a file descriptor).
> > > > > 
> > > > > So after mm exits, rather than notifying device drivers, we can
> > > > > hold on to the PASID until unbind(), ask IOMMU drivers to
> > > > > silently abort DMA and Page Requests in the meantime. This change
> > > > > should relieve the mmput() path.  
> > > >
> > > > I assume mm is destroyed after all the FDs are closed  
> > > 
> > > FDs do not hold a mmget(), but they may hold a mmgrab(), ie anything
> > > using mmu_notifiers has to hold a grab until the notifier is
> > > destroyed, which is often triggered by FD close.
> > > 
> > Sorry, I don't get this. Are you saying we have to hold a mmgrab()
> > between svm_bind/mmu_notifier_register and
> > svm_unbind/mmu_notifier_unregister?
> 
> Yes. This is done automatically for the caller inside the mmu_notifier
> implementation. We now even store the mm_struct pointer inside the
> notifier.
> 
> Once a notifier is registered the mm_struct remains valid memory until
> the notifier is unregistered.
> 
> > Isn't the idea of mmu_notifier is to avoid holding the mm reference and
> > rely on the notifier to tell us when mm is going away?
> 
> The notifier only holds a mmgrab(), not a mmget() - this allows
> exit_mmap to proceed, but the mm_struct memory remains.
> 
> This is also probably why it is a bad idea to tie the lifetime of
> something like a pasid to the mmdrop as a evil user could cause a
> large number of mm structs to be released but not freed, probably
> defeating cgroup limits and so forth (not sure)

The max number of processes can be limited for a user. PASID is per
address space so the max number of PASID can be limited for the user.
So the user cannot exhaust PASID so easily, right?

Intel ENQCMD instruction uses PASID MSR to store the PASID. Each software
thread can store the PASID in its own MSR/fpu state.

If free PASID in unbind_mm(), the threads PASID MSRs need to be cleared
as well: tracking which thread has the MSR set up, searching the threads,
sending IPIs to the thread to clear the MSR, locking, etc. It's doable but
complex with low performance.

Binding the PASID to the mm and freeing the PASID in __mmdrop() can get
ride of the complexity.

Thanks.

-Fenghua
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 0/2] iommu: Remove iommu_sva_ops::mm_exit()
  2020-04-08 23:48         ` Jacob Pan
@ 2020-04-09  6:39           ` Jean-Philippe Brucker
  2020-04-09 14:14             ` Jacob Pan
  2020-04-09 12:08           ` Jason Gunthorpe
  1 sibling, 1 reply; 26+ messages in thread
From: Jean-Philippe Brucker @ 2020-04-09  6:39 UTC (permalink / raw)
  To: Jacob Pan
  Cc: Yu, Fenghua, arnd, gregkh, iommu, Jason Gunthorpe, zhangfei.gao,
	linux-accelerators

On Wed, Apr 08, 2020 at 04:48:02PM -0700, Jacob Pan wrote:
> On Wed, 8 Apr 2020 19:32:18 -0300
> Jason Gunthorpe <jgg@ziepe.ca> wrote:
> 
> > On Wed, Apr 08, 2020 at 02:35:52PM -0700, Jacob Pan wrote:
> > > > On Wed, Apr 08, 2020 at 11:35:52AM -0700, Jacob Pan wrote:  
> > > > > Hi Jean,
> > > > > 
> > > > > On Wed,  8 Apr 2020 16:04:25 +0200
> > > > > Jean-Philippe Brucker <jean-philippe@linaro.org> wrote:
> > > > >     
> > > > > > The IOMMU SVA API currently requires device drivers to
> > > > > > implement an mm_exit() callback, which stops device jobs that
> > > > > > do DMA. This function is called in the release() MMU
> > > > > > notifier, when an address space that is shared with a device
> > > > > > exits.
> > > > > > 
> > > > > > It has been noted several time during discussions about SVA
> > > > > > that cancelling DMA jobs can be slow and complex, and doing
> > > > > > it in the release() notifier might cause synchronization
> > > > > > issues (patch 2 has more background). Device drivers must in
> > > > > > any case call unbind() to remove their bond, after stopping
> > > > > > DMA from a more favorable context (release of a file
> > > > > > descriptor).
> > > > > > 
> > > > > > So after mm exits, rather than notifying device drivers, we
> > > > > > can hold on to the PASID until unbind(), ask IOMMU drivers to
> > > > > > silently abort DMA and Page Requests in the meantime. This
> > > > > > change should relieve the mmput() path.    
> > > > >
> > > > > I assume mm is destroyed after all the FDs are closed    
> > > > 
> > > > FDs do not hold a mmget(), but they may hold a mmgrab(), ie
> > > > anything using mmu_notifiers has to hold a grab until the
> > > > notifier is destroyed, which is often triggered by FD close.
> > > >   
> > > Sorry, I don't get this. Are you saying we have to hold a mmgrab()
> > > between svm_bind/mmu_notifier_register and
> > > svm_unbind/mmu_notifier_unregister?  
> > 
> > Yes. This is done automatically for the caller inside the mmu_notifier
> > implementation. We now even store the mm_struct pointer inside the
> > notifier.
> > 
> > Once a notifier is registered the mm_struct remains valid memory until
> > the notifier is unregistered.
> > 
> > > Isn't the idea of mmu_notifier is to avoid holding the mm reference
> > > and rely on the notifier to tell us when mm is going away?  
> > 
> > The notifier only holds a mmgrab(), not a mmget() - this allows
> > exit_mmap to proceed, but the mm_struct memory remains.
> > 
> > This is also probably why it is a bad idea to tie the lifetime of
> > something like a pasid to the mmdrop as a evil user could cause a
> > large number of mm structs to be released but not freed, probably
> > defeating cgroup limits and so forth (not sure)
> > 
> > > It seems both Intel and AMD iommu drivers don't hold mmgrab after
> > > mmu_notifier_register.  
> > 
> > It is done internally to the implementation.
> > 
> > > > So the exit_mmap() -> release() may happen before the FDs are
> > > > destroyed, but the final mmdrop() will be during some FD release
> > > > when the final mmdrop() happens.  
> > > 
> > > Do you mean mmdrop() is after FD release?   
> > 
> > Yes, it will be done by the mmu_notifier_unregister(), which should be
> > called during FD release if the iommu lifetime is linked to some FD.
> > 
> > > If so, unbind is called in FD release should take care of
> > > everything, i.e. stops DMA, clear PASID context on IOMMU, flush PRS
> > > queue etc.  
> > 
> > Yes, this is the proper way, when the DMA is stopped and no use of the
> > PASID remains then you can drop the mmu notifier and release the PASID
> > entirely. If that is linked to the lifetime of the FD then forget
> > completely about the mm_struct lifetime, it doesn't matter..
> > 
> Got everything above, thanks a lot.
> 
> If everything is in order with the FD close. Why do we need to 
> "ask IOMMU drivers to silently abort DMA and Page Requests in the
> meantime." in mm_exit notifier? This will be done orderly in unbind
> anyway.

When the process is killed, mm release can happen before fds are released.
If you look at do_exit() in kernel/exit.c:

	exit_mm()
	  mmput()
	   -> mmu release notifier
	...
	exit_files()
	  close_files()
	    fput()
	exit_task_work()
	  __fput()
	   -> unbind()

Thanks,
Jean

> 
> > > Enforcing unbind upon FD close might be a precarious path, perhaps
> > > that is why we have to deal with out of order situation?  
> > 
> > How so? You just put it in the FD release function :)
> > 
> I was thinking some driver may choose to defer unbind in some workqueue
> etc.
> 
> > > > > In VT-d, because of enqcmd and lazy PASID free we plan to hold
> > > > > on to the PASID until mmdrop.
> > > > > https://lore.kernel.org/patchwork/patch/1217762/    
> > > > 
> > > > Why? The bind already gets a mmu_notifier which has refcounts and
> > > > the right lifetime for PASID.. This code could already be
> > > > simplified by using the mmu_notifier_get()/put() stuff.
> > > >   
> > > Yes, I guess mmu_notifier_get()/put() is new :)
> > > +Fenghua  
> > 
> > I was going to convert the intel code when I did many other drivers,
> > but it was a bit too complex..
> > 
> > But the approach is straightforward. Get rid of the mm search list and
> > use mmu_notifier_get(). This returns a singlton notifier for the
> > mm_struct and handles refcounting/etc
> > 
> > Use mmu_notifier_put() during a unbind, it will callback to
> > free_notifier() to do the final frees (ie this is where the pasid
> > should go away)
> > 
> > For the SVM_FLAG_PRIVATE_PASID continue to use mmu_notifier_register,
> > however this can now be mixed with mmu_notifier_put() so the cleanup
> > is the same. A separate ops static struct is needed to create a unique
> > key though
> > 
> > Jason
> 
> [Jacob Pan]
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 1/2] uacce: Remove mm_exit() op
  2020-04-08 14:04 ` [PATCH 1/2] uacce: Remove mm_exit() op Jean-Philippe Brucker
@ 2020-04-09  9:07   ` Zhangfei Gao
  2020-04-09  9:44     ` Jean-Philippe Brucker
  0 siblings, 1 reply; 26+ messages in thread
From: Zhangfei Gao @ 2020-04-09  9:07 UTC (permalink / raw)
  To: Jean-Philippe Brucker, iommu, linux-accelerators; +Cc: arnd, gregkh, jgg



On 2020/4/8 下午10:04, Jean-Philippe Brucker wrote:
> The mm_exit() op will be removed from the SVA API. When a process dies
> and its mm goes away, the IOMMU driver won't notify device drivers
> anymore. Drivers should expect to handle a lot more aborted DMA. On the
> upside, it does greatly simplify the queue management.
>
> The uacce_mm struct, that tracks all queues bound to an mm, was only
> used by the mm_exit() callback. Remove it.
>
> Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Thanks Jean for doing this.

Tested-by: Zhangfei Gao <zhangfei.gao@linaro.org>

Except one line.
> -static void uacce_mm_put(struct uacce_queue *q)
> +static void uacce_unbind_queue(struct uacce_queue *q)
>   {
> -	struct uacce_mm *uacce_mm = q->uacce_mm;
> -
> -	lockdep_assert_held(&q->uacce->mm_lock);
> -
> -	mutex_lock(&uacce_mm->lock);
> -	list_del(&q->list);
> -	mutex_unlock(&uacce_mm->lock);
> -
> -	if (list_empty(&uacce_mm->queues)) {
> -		if (uacce_mm->handle)
> -			iommu_sva_unbind_device(uacce_mm->handle);
> -		list_del(&uacce_mm->list);
> -		kfree(uacce_mm);
> -	}
> +	if (!q->handle)
> +		return;
> +	iommu_sva_unbind_device(q->handle);
+ q->handle = 0;

Otherwise iommu_sva_unbind_device maybe called twice.
Since uacce_unbind_queue can be called by uacce_remove and uacce_fops_release.

Thanks

_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 1/2] uacce: Remove mm_exit() op
  2020-04-09  9:07   ` Zhangfei Gao
@ 2020-04-09  9:44     ` Jean-Philippe Brucker
  0 siblings, 0 replies; 26+ messages in thread
From: Jean-Philippe Brucker @ 2020-04-09  9:44 UTC (permalink / raw)
  To: Zhangfei Gao; +Cc: gregkh, iommu, arnd, linux-accelerators, jgg

On Thu, Apr 09, 2020 at 05:07:34PM +0800, Zhangfei Gao wrote:
> 
> 
> On 2020/4/8 下午10:04, Jean-Philippe Brucker wrote:
> > The mm_exit() op will be removed from the SVA API. When a process dies
> > and its mm goes away, the IOMMU driver won't notify device drivers
> > anymore. Drivers should expect to handle a lot more aborted DMA. On the
> > upside, it does greatly simplify the queue management.
> > 
> > The uacce_mm struct, that tracks all queues bound to an mm, was only
> > used by the mm_exit() callback. Remove it.
> > 
> > Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
> Thanks Jean for doing this.
> 
> Tested-by: Zhangfei Gao <zhangfei.gao@linaro.org>
> 
> Except one line.
> > -static void uacce_mm_put(struct uacce_queue *q)
> > +static void uacce_unbind_queue(struct uacce_queue *q)
> >   {
> > -	struct uacce_mm *uacce_mm = q->uacce_mm;
> > -
> > -	lockdep_assert_held(&q->uacce->mm_lock);
> > -
> > -	mutex_lock(&uacce_mm->lock);
> > -	list_del(&q->list);
> > -	mutex_unlock(&uacce_mm->lock);
> > -
> > -	if (list_empty(&uacce_mm->queues)) {
> > -		if (uacce_mm->handle)
> > -			iommu_sva_unbind_device(uacce_mm->handle);
> > -		list_del(&uacce_mm->list);
> > -		kfree(uacce_mm);
> > -	}
> > +	if (!q->handle)
> > +		return;
> > +	iommu_sva_unbind_device(q->handle);
> + q->handle = 0;
> 
> Otherwise iommu_sva_unbind_device maybe called twice.
> Since uacce_unbind_queue can be called by uacce_remove and uacce_fops_release.

Thanks, I'll add it in v2

Jean
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 0/2] iommu: Remove iommu_sva_ops::mm_exit()
  2020-04-08 23:48         ` Jacob Pan
  2020-04-09  6:39           ` Jean-Philippe Brucker
@ 2020-04-09 12:08           ` Jason Gunthorpe
  2020-04-09 16:31             ` Jacob Pan
  1 sibling, 1 reply; 26+ messages in thread
From: Jason Gunthorpe @ 2020-04-09 12:08 UTC (permalink / raw)
  To: Jacob Pan
  Cc: Jean-Philippe Brucker, arnd, Yu, Fenghua, gregkh, iommu,
	zhangfei.gao, linux-accelerators

On Wed, Apr 08, 2020 at 04:48:02PM -0700, Jacob Pan wrote:
> > Yes, this is the proper way, when the DMA is stopped and no use of the
> > PASID remains then you can drop the mmu notifier and release the PASID
> > entirely. If that is linked to the lifetime of the FD then forget
> > completely about the mm_struct lifetime, it doesn't matter..
> > 
> Got everything above, thanks a lot.
> 
> If everything is in order with the FD close. Why do we need to 
> "ask IOMMU drivers to silently abort DMA and Page Requests in the
> meantime." in mm_exit notifier? This will be done orderly in unbind
> anyway.

I thought this is exactly what Jean-Phillippe is removing here, it is
a bad idea for the reasons he explained.

> > > Enforcing unbind upon FD close might be a precarious path, perhaps
> > > that is why we have to deal with out of order situation?  
> > 
> > How so? You just put it in the FD release function :)
> 
> I was thinking some driver may choose to defer unbind in some workqueue
> etc.

Doesn't really change anything, the lifetime of the PASID wouuld be
the lifetime of the notifier and the bind, and DMA can't continue
without the notifier registered.

Jason
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 0/2] iommu: Remove iommu_sva_ops::mm_exit()
  2020-04-08 23:49         ` Fenghua Yu
@ 2020-04-09 12:12           ` Jason Gunthorpe
  0 siblings, 0 replies; 26+ messages in thread
From: Jason Gunthorpe @ 2020-04-09 12:12 UTC (permalink / raw)
  To: Fenghua Yu
  Cc: Jean-Philippe Brucker, arnd, gregkh, iommu, zhangfei.gao,
	linux-accelerators

On Wed, Apr 08, 2020 at 04:49:01PM -0700, Fenghua Yu wrote:

> > > Isn't the idea of mmu_notifier is to avoid holding the mm reference and
> > > rely on the notifier to tell us when mm is going away?
> > 
> > The notifier only holds a mmgrab(), not a mmget() - this allows
> > exit_mmap to proceed, but the mm_struct memory remains.
> > 
> > This is also probably why it is a bad idea to tie the lifetime of
> > something like a pasid to the mmdrop as a evil user could cause a
> > large number of mm structs to be released but not freed, probably
> > defeating cgroup limits and so forth (not sure)
> 
> The max number of processes can be limited for a user. PASID is per
> address space so the max number of PASID can be limited for the user.
> So the user cannot exhaust PASID so easily, right?

With the patch Jacob pointed to the PASID lifetime is tied to mmdrop,
and I think (but did not check) that the cgroup accounting happens
before mmdrop.

> Binding the PASID to the mm and freeing the PASID in __mmdrop() can get
> ride of the complexity.

This is a much more reasonable explanation and should be in the patch
commit instead of what is there.

However, it still seems unnecessary to reach for arch code - the
singleton notifier can be arranged to live until exit_mmap or fd
release, whichever is longer by putting a mmu_notififer_put() in the
release() method

Jason
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 0/2] iommu: Remove iommu_sva_ops::mm_exit()
  2020-04-09  6:39           ` Jean-Philippe Brucker
@ 2020-04-09 14:14             ` Jacob Pan
  2020-04-09 14:25               ` Jason Gunthorpe
  2020-04-09 14:50               ` Jean-Philippe Brucker
  0 siblings, 2 replies; 26+ messages in thread
From: Jacob Pan @ 2020-04-09 14:14 UTC (permalink / raw)
  To: Jean-Philippe Brucker
  Cc: Yu, Fenghua, arnd, gregkh, iommu, Jason Gunthorpe, zhangfei.gao,
	linux-accelerators

On Thu, 9 Apr 2020 08:39:05 +0200
Jean-Philippe Brucker <jean-philippe@linaro.org> wrote:

> On Wed, Apr 08, 2020 at 04:48:02PM -0700, Jacob Pan wrote:
> > On Wed, 8 Apr 2020 19:32:18 -0300
> > Jason Gunthorpe <jgg@ziepe.ca> wrote:
> >   
> > > On Wed, Apr 08, 2020 at 02:35:52PM -0700, Jacob Pan wrote:  
> > > > > On Wed, Apr 08, 2020 at 11:35:52AM -0700, Jacob Pan wrote:    
> > > > > > Hi Jean,
> > > > > > 
> > > > > > On Wed,  8 Apr 2020 16:04:25 +0200
> > > > > > Jean-Philippe Brucker <jean-philippe@linaro.org> wrote:
> > > > > >       
> > > > > > > The IOMMU SVA API currently requires device drivers to
> > > > > > > implement an mm_exit() callback, which stops device jobs
> > > > > > > that do DMA. This function is called in the release() MMU
> > > > > > > notifier, when an address space that is shared with a
> > > > > > > device exits.
> > > > > > > 
> > > > > > > It has been noted several time during discussions about
> > > > > > > SVA that cancelling DMA jobs can be slow and complex, and
> > > > > > > doing it in the release() notifier might cause
> > > > > > > synchronization issues (patch 2 has more background).
> > > > > > > Device drivers must in any case call unbind() to remove
> > > > > > > their bond, after stopping DMA from a more favorable
> > > > > > > context (release of a file descriptor).
> > > > > > > 
> > > > > > > So after mm exits, rather than notifying device drivers,
> > > > > > > we can hold on to the PASID until unbind(), ask IOMMU
> > > > > > > drivers to silently abort DMA and Page Requests in the
> > > > > > > meantime. This change should relieve the mmput()
> > > > > > > path.      
> > > > > >
> > > > > > I assume mm is destroyed after all the FDs are closed      
> > > > > 
> > > > > FDs do not hold a mmget(), but they may hold a mmgrab(), ie
> > > > > anything using mmu_notifiers has to hold a grab until the
> > > > > notifier is destroyed, which is often triggered by FD close.
> > > > >     
> > > > Sorry, I don't get this. Are you saying we have to hold a
> > > > mmgrab() between svm_bind/mmu_notifier_register and
> > > > svm_unbind/mmu_notifier_unregister?    
> > > 
> > > Yes. This is done automatically for the caller inside the
> > > mmu_notifier implementation. We now even store the mm_struct
> > > pointer inside the notifier.
> > > 
> > > Once a notifier is registered the mm_struct remains valid memory
> > > until the notifier is unregistered.
> > >   
> > > > Isn't the idea of mmu_notifier is to avoid holding the mm
> > > > reference and rely on the notifier to tell us when mm is going
> > > > away?    
> > > 
> > > The notifier only holds a mmgrab(), not a mmget() - this allows
> > > exit_mmap to proceed, but the mm_struct memory remains.
> > > 
> > > This is also probably why it is a bad idea to tie the lifetime of
> > > something like a pasid to the mmdrop as a evil user could cause a
> > > large number of mm structs to be released but not freed, probably
> > > defeating cgroup limits and so forth (not sure)
> > >   
> > > > It seems both Intel and AMD iommu drivers don't hold mmgrab
> > > > after mmu_notifier_register.    
> > > 
> > > It is done internally to the implementation.
> > >   
> > > > > So the exit_mmap() -> release() may happen before the FDs are
> > > > > destroyed, but the final mmdrop() will be during some FD
> > > > > release when the final mmdrop() happens.    
> > > > 
> > > > Do you mean mmdrop() is after FD release?     
> > > 
> > > Yes, it will be done by the mmu_notifier_unregister(), which
> > > should be called during FD release if the iommu lifetime is
> > > linked to some FD. 
> > > > If so, unbind is called in FD release should take care of
> > > > everything, i.e. stops DMA, clear PASID context on IOMMU, flush
> > > > PRS queue etc.    
> > > 
> > > Yes, this is the proper way, when the DMA is stopped and no use
> > > of the PASID remains then you can drop the mmu notifier and
> > > release the PASID entirely. If that is linked to the lifetime of
> > > the FD then forget completely about the mm_struct lifetime, it
> > > doesn't matter.. 
> > Got everything above, thanks a lot.
> > 
> > If everything is in order with the FD close. Why do we need to 
> > "ask IOMMU drivers to silently abort DMA and Page Requests in the
> > meantime." in mm_exit notifier? This will be done orderly in unbind
> > anyway.  
> 
> When the process is killed, mm release can happen before fds are
> released. If you look at do_exit() in kernel/exit.c:
> 
> 	exit_mm()
> 	  mmput()
> 	   -> mmu release notifier  
> 	...
> 	exit_files()
> 	  close_files()
> 	    fput()
> 	exit_task_work()
> 	  __fput()
> 	   -> unbind()  
> 
So unbind is coming anyway, the difference in handling in mmu release
notifier is whether we silently drop DMA fault vs. reporting fault?
If a process crash during unbind, something already went seriously
wrong, DMA fault is expected.
I think having some error indication is useful, compared to "silently
drop"

Thanks,

Jacob

> Thanks,
> Jean
> 
> >   
> > > > Enforcing unbind upon FD close might be a precarious path,
> > > > perhaps that is why we have to deal with out of order
> > > > situation?    
> > > 
> > > How so? You just put it in the FD release function :)
> > >   
> > I was thinking some driver may choose to defer unbind in some
> > workqueue etc.
> >   
> > > > > > In VT-d, because of enqcmd and lazy PASID free we plan to
> > > > > > hold on to the PASID until mmdrop.
> > > > > > https://lore.kernel.org/patchwork/patch/1217762/      
> > > > > 
> > > > > Why? The bind already gets a mmu_notifier which has refcounts
> > > > > and the right lifetime for PASID.. This code could already be
> > > > > simplified by using the mmu_notifier_get()/put() stuff.
> > > > >     
> > > > Yes, I guess mmu_notifier_get()/put() is new :)
> > > > +Fenghua    
> > > 
> > > I was going to convert the intel code when I did many other
> > > drivers, but it was a bit too complex..
> > > 
> > > But the approach is straightforward. Get rid of the mm search
> > > list and use mmu_notifier_get(). This returns a singlton notifier
> > > for the mm_struct and handles refcounting/etc
> > > 
> > > Use mmu_notifier_put() during a unbind, it will callback to
> > > free_notifier() to do the final frees (ie this is where the pasid
> > > should go away)
> > > 
> > > For the SVM_FLAG_PRIVATE_PASID continue to use
> > > mmu_notifier_register, however this can now be mixed with
> > > mmu_notifier_put() so the cleanup is the same. A separate ops
> > > static struct is needed to create a unique key though
> > > 
> > > Jason  
> > 
> > [Jacob Pan]  

[Jacob Pan]
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 0/2] iommu: Remove iommu_sva_ops::mm_exit()
  2020-04-09 14:14             ` Jacob Pan
@ 2020-04-09 14:25               ` Jason Gunthorpe
  2020-04-09 16:21                 ` Jacob Pan
  2020-04-09 14:50               ` Jean-Philippe Brucker
  1 sibling, 1 reply; 26+ messages in thread
From: Jason Gunthorpe @ 2020-04-09 14:25 UTC (permalink / raw)
  To: Jacob Pan
  Cc: Jean-Philippe Brucker, arnd, Yu, Fenghua, gregkh, iommu,
	zhangfei.gao, linux-accelerators

On Thu, Apr 09, 2020 at 07:14:24AM -0700, Jacob Pan wrote:
> > When the process is killed, mm release can happen before fds are
> > released. If you look at do_exit() in kernel/exit.c:
> > 
> > 	exit_mm()
> > 	  mmput()
> > 	   -> mmu release notifier  
> > 	...
> > 	exit_files()
> > 	  close_files()
> > 	    fput()
> > 	exit_task_work()
> > 	  __fput()
> > 	   -> unbind()  
> > 
> So unbind is coming anyway, the difference in handling in mmu release
> notifier is whether we silently drop DMA fault vs. reporting fault?

Userspace can significantly delay the final fput triggering the
unbind, the above is only for the trivial case where the process
owning the mm_struct is the only process holding the fd.

The destruction of a mm_struct should be treated the same as unmapping
every vma in the process. The observable effect should be no different
than munmap.

Jason
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 0/2] iommu: Remove iommu_sva_ops::mm_exit()
  2020-04-09 14:14             ` Jacob Pan
  2020-04-09 14:25               ` Jason Gunthorpe
@ 2020-04-09 14:50               ` Jean-Philippe Brucker
  2020-04-09 16:27                 ` Jacob Pan
  2020-04-10 15:52                 ` Jacob Pan
  1 sibling, 2 replies; 26+ messages in thread
From: Jean-Philippe Brucker @ 2020-04-09 14:50 UTC (permalink / raw)
  To: Jacob Pan
  Cc: Yu, Fenghua, arnd, gregkh, iommu, Jason Gunthorpe, zhangfei.gao,
	linux-accelerators

On Thu, Apr 09, 2020 at 07:14:24AM -0700, Jacob Pan wrote:
> On Thu, 9 Apr 2020 08:39:05 +0200
> Jean-Philippe Brucker <jean-philippe@linaro.org> wrote:
> 
> > On Wed, Apr 08, 2020 at 04:48:02PM -0700, Jacob Pan wrote:
> > > On Wed, 8 Apr 2020 19:32:18 -0300
> > > Jason Gunthorpe <jgg@ziepe.ca> wrote:
> > >   
> > > > On Wed, Apr 08, 2020 at 02:35:52PM -0700, Jacob Pan wrote:  
> > > > > > On Wed, Apr 08, 2020 at 11:35:52AM -0700, Jacob Pan wrote:    
> > > > > > > Hi Jean,
> > > > > > > 
> > > > > > > On Wed,  8 Apr 2020 16:04:25 +0200
> > > > > > > Jean-Philippe Brucker <jean-philippe@linaro.org> wrote:
> > > > > > >       
> > > > > > > > The IOMMU SVA API currently requires device drivers to
> > > > > > > > implement an mm_exit() callback, which stops device jobs
> > > > > > > > that do DMA. This function is called in the release() MMU
> > > > > > > > notifier, when an address space that is shared with a
> > > > > > > > device exits.
> > > > > > > > 
> > > > > > > > It has been noted several time during discussions about
> > > > > > > > SVA that cancelling DMA jobs can be slow and complex, and
> > > > > > > > doing it in the release() notifier might cause
> > > > > > > > synchronization issues (patch 2 has more background).
> > > > > > > > Device drivers must in any case call unbind() to remove
> > > > > > > > their bond, after stopping DMA from a more favorable
> > > > > > > > context (release of a file descriptor).
> > > > > > > > 
> > > > > > > > So after mm exits, rather than notifying device drivers,
> > > > > > > > we can hold on to the PASID until unbind(), ask IOMMU
> > > > > > > > drivers to silently abort DMA and Page Requests in the
> > > > > > > > meantime. This change should relieve the mmput()
> > > > > > > > path.      
> > > > > > >
> > > > > > > I assume mm is destroyed after all the FDs are closed      
> > > > > > 
> > > > > > FDs do not hold a mmget(), but they may hold a mmgrab(), ie
> > > > > > anything using mmu_notifiers has to hold a grab until the
> > > > > > notifier is destroyed, which is often triggered by FD close.
> > > > > >     
> > > > > Sorry, I don't get this. Are you saying we have to hold a
> > > > > mmgrab() between svm_bind/mmu_notifier_register and
> > > > > svm_unbind/mmu_notifier_unregister?    
> > > > 
> > > > Yes. This is done automatically for the caller inside the
> > > > mmu_notifier implementation. We now even store the mm_struct
> > > > pointer inside the notifier.
> > > > 
> > > > Once a notifier is registered the mm_struct remains valid memory
> > > > until the notifier is unregistered.
> > > >   
> > > > > Isn't the idea of mmu_notifier is to avoid holding the mm
> > > > > reference and rely on the notifier to tell us when mm is going
> > > > > away?    
> > > > 
> > > > The notifier only holds a mmgrab(), not a mmget() - this allows
> > > > exit_mmap to proceed, but the mm_struct memory remains.
> > > > 
> > > > This is also probably why it is a bad idea to tie the lifetime of
> > > > something like a pasid to the mmdrop as a evil user could cause a
> > > > large number of mm structs to be released but not freed, probably
> > > > defeating cgroup limits and so forth (not sure)
> > > >   
> > > > > It seems both Intel and AMD iommu drivers don't hold mmgrab
> > > > > after mmu_notifier_register.    
> > > > 
> > > > It is done internally to the implementation.
> > > >   
> > > > > > So the exit_mmap() -> release() may happen before the FDs are
> > > > > > destroyed, but the final mmdrop() will be during some FD
> > > > > > release when the final mmdrop() happens.    
> > > > > 
> > > > > Do you mean mmdrop() is after FD release?     
> > > > 
> > > > Yes, it will be done by the mmu_notifier_unregister(), which
> > > > should be called during FD release if the iommu lifetime is
> > > > linked to some FD. 
> > > > > If so, unbind is called in FD release should take care of
> > > > > everything, i.e. stops DMA, clear PASID context on IOMMU, flush
> > > > > PRS queue etc.    
> > > > 
> > > > Yes, this is the proper way, when the DMA is stopped and no use
> > > > of the PASID remains then you can drop the mmu notifier and
> > > > release the PASID entirely. If that is linked to the lifetime of
> > > > the FD then forget completely about the mm_struct lifetime, it
> > > > doesn't matter.. 
> > > Got everything above, thanks a lot.
> > > 
> > > If everything is in order with the FD close. Why do we need to 
> > > "ask IOMMU drivers to silently abort DMA and Page Requests in the
> > > meantime." in mm_exit notifier? This will be done orderly in unbind
> > > anyway.  
> > 
> > When the process is killed, mm release can happen before fds are
> > released. If you look at do_exit() in kernel/exit.c:
> > 
> > 	exit_mm()
> > 	  mmput()
> > 	   -> mmu release notifier  
> > 	...
> > 	exit_files()
> > 	  close_files()
> > 	    fput()
> > 	exit_task_work()
> > 	  __fput()
> > 	   -> unbind()  
> > 
> So unbind is coming anyway, the difference in handling in mmu release
> notifier is whether we silently drop DMA fault vs. reporting fault?

What I meant is, between mmu release notifier and unbind(), we can't print
any error from DMA fault on dmesg, because an mm exit is easily triggered
by userspace. Look at the lifetime of the bond:

bind()
 |
 : Here any DMA fault is handled by mm, and on error we don't print
 : anything to dmesg. Userspace can easily trigger faults by issuing DMA
 : on unmapped buffers.
 |
mm exit -> clear pgd, invalidate IOTLBs
 |
 : Here the PASID descriptor doesn't have the pgd anymore, but we don't
 : print out any error to dmesg either. DMA is likely still running but
 : any fault has to be ignored.
 :
 : We also can't free the PASID yet, since transactions are still coming
 : in with this PASID.
 |
unbind() -> clear context descriptor, release PASID and mmu notifier
 |
 : Here the PASID descriptor is clear. If DMA is still running the device
 : driver really messed up and we have to print out any fault.

For that middle state I had to introduce a new pasid descriptor state in
the SMMU driver, to avoid reporting errors between mm exit and unbind().

Thanks,
Jean

> If a process crash during unbind, something already went seriously
> wrong, DMA fault is expected.
> I think having some error indication is useful, compared to "silently
> drop"
> 
> Thanks,
> 
> Jacob
> 
> > Thanks,
> > Jean
> > 
> > >   
> > > > > Enforcing unbind upon FD close might be a precarious path,
> > > > > perhaps that is why we have to deal with out of order
> > > > > situation?    
> > > > 
> > > > How so? You just put it in the FD release function :)
> > > >   
> > > I was thinking some driver may choose to defer unbind in some
> > > workqueue etc.
> > >   
> > > > > > > In VT-d, because of enqcmd and lazy PASID free we plan to
> > > > > > > hold on to the PASID until mmdrop.
> > > > > > > https://lore.kernel.org/patchwork/patch/1217762/      
> > > > > > 
> > > > > > Why? The bind already gets a mmu_notifier which has refcounts
> > > > > > and the right lifetime for PASID.. This code could already be
> > > > > > simplified by using the mmu_notifier_get()/put() stuff.
> > > > > >     
> > > > > Yes, I guess mmu_notifier_get()/put() is new :)
> > > > > +Fenghua    
> > > > 
> > > > I was going to convert the intel code when I did many other
> > > > drivers, but it was a bit too complex..
> > > > 
> > > > But the approach is straightforward. Get rid of the mm search
> > > > list and use mmu_notifier_get(). This returns a singlton notifier
> > > > for the mm_struct and handles refcounting/etc
> > > > 
> > > > Use mmu_notifier_put() during a unbind, it will callback to
> > > > free_notifier() to do the final frees (ie this is where the pasid
> > > > should go away)
> > > > 
> > > > For the SVM_FLAG_PRIVATE_PASID continue to use
> > > > mmu_notifier_register, however this can now be mixed with
> > > > mmu_notifier_put() so the cleanup is the same. A separate ops
> > > > static struct is needed to create a unique key though
> > > > 
> > > > Jason  
> > > 
> > > [Jacob Pan]  
> 
> [Jacob Pan]
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 0/2] iommu: Remove iommu_sva_ops::mm_exit()
  2020-04-09 14:25               ` Jason Gunthorpe
@ 2020-04-09 16:21                 ` Jacob Pan
  2020-04-09 16:58                   ` Jason Gunthorpe
  0 siblings, 1 reply; 26+ messages in thread
From: Jacob Pan @ 2020-04-09 16:21 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: Jean-Philippe Brucker, arnd, Yu, Fenghua, gregkh, iommu,
	zhangfei.gao, linux-accelerators

On Thu, 9 Apr 2020 11:25:19 -0300
Jason Gunthorpe <jgg@ziepe.ca> wrote:

> On Thu, Apr 09, 2020 at 07:14:24AM -0700, Jacob Pan wrote:
> > > When the process is killed, mm release can happen before fds are
> > > released. If you look at do_exit() in kernel/exit.c:
> > > 
> > > 	exit_mm()
> > > 	  mmput()  
> > > 	   -> mmu release notifier    
> > > 	...
> > > 	exit_files()
> > > 	  close_files()
> > > 	    fput()
> > > 	exit_task_work()
> > > 	  __fput()  
> > > 	   -> unbind()    
> > >   
> > So unbind is coming anyway, the difference in handling in mmu
> > release notifier is whether we silently drop DMA fault vs.
> > reporting fault?  
> 
> Userspace can significantly delay the final fput triggering the
> unbind, the above is only for the trivial case where the process
> owning the mm_struct is the only process holding the fd.
> 
Are you talking about FDs owned buy children after fork() or FDs sent
over to another process. I think, in either case SVA is not supported.

> The destruction of a mm_struct should be treated the same as unmapping
> every vma in the process. The observable effect should be no different
> than munmap.
> 
Good point. I agree, we should suppress the error in the window before
unbind.

> Jason

[Jacob Pan]
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 0/2] iommu: Remove iommu_sva_ops::mm_exit()
  2020-04-09 14:50               ` Jean-Philippe Brucker
@ 2020-04-09 16:27                 ` Jacob Pan
  2020-04-10 15:52                 ` Jacob Pan
  1 sibling, 0 replies; 26+ messages in thread
From: Jacob Pan @ 2020-04-09 16:27 UTC (permalink / raw)
  To: Jean-Philippe Brucker
  Cc: Yu, Fenghua, arnd, gregkh, iommu, Jason Gunthorpe, zhangfei.gao,
	linux-accelerators

On Thu, 9 Apr 2020 16:50:58 +0200
Jean-Philippe Brucker <jean-philippe@linaro.org> wrote:

> On Thu, Apr 09, 2020 at 07:14:24AM -0700, Jacob Pan wrote:
> > On Thu, 9 Apr 2020 08:39:05 +0200
> > Jean-Philippe Brucker <jean-philippe@linaro.org> wrote:
> >   
> > > On Wed, Apr 08, 2020 at 04:48:02PM -0700, Jacob Pan wrote:  
> > > > On Wed, 8 Apr 2020 19:32:18 -0300
> > > > Jason Gunthorpe <jgg@ziepe.ca> wrote:
> > > >     
> > > > > On Wed, Apr 08, 2020 at 02:35:52PM -0700, Jacob Pan wrote:    
> > > > > > > On Wed, Apr 08, 2020 at 11:35:52AM -0700, Jacob Pan
> > > > > > > wrote:      
> > > > > > > > Hi Jean,
> > > > > > > > 
> > > > > > > > On Wed,  8 Apr 2020 16:04:25 +0200
> > > > > > > > Jean-Philippe Brucker <jean-philippe@linaro.org> wrote:
> > > > > > > >         
> > > > > > > > > The IOMMU SVA API currently requires device drivers to
> > > > > > > > > implement an mm_exit() callback, which stops device
> > > > > > > > > jobs that do DMA. This function is called in the
> > > > > > > > > release() MMU notifier, when an address space that is
> > > > > > > > > shared with a device exits.
> > > > > > > > > 
> > > > > > > > > It has been noted several time during discussions
> > > > > > > > > about SVA that cancelling DMA jobs can be slow and
> > > > > > > > > complex, and doing it in the release() notifier might
> > > > > > > > > cause synchronization issues (patch 2 has more
> > > > > > > > > background). Device drivers must in any case call
> > > > > > > > > unbind() to remove their bond, after stopping DMA
> > > > > > > > > from a more favorable context (release of a file
> > > > > > > > > descriptor).
> > > > > > > > > 
> > > > > > > > > So after mm exits, rather than notifying device
> > > > > > > > > drivers, we can hold on to the PASID until unbind(),
> > > > > > > > > ask IOMMU drivers to silently abort DMA and Page
> > > > > > > > > Requests in the meantime. This change should relieve
> > > > > > > > > the mmput() path.        
> > > > > > > >
> > > > > > > > I assume mm is destroyed after all the FDs are
> > > > > > > > closed        
> > > > > > > 
> > > > > > > FDs do not hold a mmget(), but they may hold a mmgrab(),
> > > > > > > ie anything using mmu_notifiers has to hold a grab until
> > > > > > > the notifier is destroyed, which is often triggered by FD
> > > > > > > close. 
> > > > > > Sorry, I don't get this. Are you saying we have to hold a
> > > > > > mmgrab() between svm_bind/mmu_notifier_register and
> > > > > > svm_unbind/mmu_notifier_unregister?      
> > > > > 
> > > > > Yes. This is done automatically for the caller inside the
> > > > > mmu_notifier implementation. We now even store the mm_struct
> > > > > pointer inside the notifier.
> > > > > 
> > > > > Once a notifier is registered the mm_struct remains valid
> > > > > memory until the notifier is unregistered.
> > > > >     
> > > > > > Isn't the idea of mmu_notifier is to avoid holding the mm
> > > > > > reference and rely on the notifier to tell us when mm is
> > > > > > going away?      
> > > > > 
> > > > > The notifier only holds a mmgrab(), not a mmget() - this
> > > > > allows exit_mmap to proceed, but the mm_struct memory remains.
> > > > > 
> > > > > This is also probably why it is a bad idea to tie the
> > > > > lifetime of something like a pasid to the mmdrop as a evil
> > > > > user could cause a large number of mm structs to be released
> > > > > but not freed, probably defeating cgroup limits and so forth
> > > > > (not sure) 
> > > > > > It seems both Intel and AMD iommu drivers don't hold mmgrab
> > > > > > after mmu_notifier_register.      
> > > > > 
> > > > > It is done internally to the implementation.
> > > > >     
> > > > > > > So the exit_mmap() -> release() may happen before the FDs
> > > > > > > are destroyed, but the final mmdrop() will be during some
> > > > > > > FD release when the final mmdrop() happens.      
> > > > > > 
> > > > > > Do you mean mmdrop() is after FD release?       
> > > > > 
> > > > > Yes, it will be done by the mmu_notifier_unregister(), which
> > > > > should be called during FD release if the iommu lifetime is
> > > > > linked to some FD.   
> > > > > > If so, unbind is called in FD release should take care of
> > > > > > everything, i.e. stops DMA, clear PASID context on IOMMU,
> > > > > > flush PRS queue etc.      
> > > > > 
> > > > > Yes, this is the proper way, when the DMA is stopped and no
> > > > > use of the PASID remains then you can drop the mmu notifier
> > > > > and release the PASID entirely. If that is linked to the
> > > > > lifetime of the FD then forget completely about the mm_struct
> > > > > lifetime, it doesn't matter..   
> > > > Got everything above, thanks a lot.
> > > > 
> > > > If everything is in order with the FD close. Why do we need to 
> > > > "ask IOMMU drivers to silently abort DMA and Page Requests in
> > > > the meantime." in mm_exit notifier? This will be done orderly
> > > > in unbind anyway.    
> > > 
> > > When the process is killed, mm release can happen before fds are
> > > released. If you look at do_exit() in kernel/exit.c:
> > > 
> > > 	exit_mm()
> > > 	  mmput()  
> > > 	   -> mmu release notifier    
> > > 	...
> > > 	exit_files()
> > > 	  close_files()
> > > 	    fput()
> > > 	exit_task_work()
> > > 	  __fput()  
> > > 	   -> unbind()    
> > >   
> > So unbind is coming anyway, the difference in handling in mmu
> > release notifier is whether we silently drop DMA fault vs.
> > reporting fault?  
> 
> What I meant is, between mmu release notifier and unbind(), we can't
> print any error from DMA fault on dmesg, because an mm exit is easily
> triggered by userspace. Look at the lifetime of the bond:
> 
> bind()
>  |
>  : Here any DMA fault is handled by mm, and on error we don't print
>  : anything to dmesg. Userspace can easily trigger faults by issuing
> DMA : on unmapped buffers.
>  |
> mm exit -> clear pgd, invalidate IOTLBs
>  |
>  : Here the PASID descriptor doesn't have the pgd anymore, but we
> don't : print out any error to dmesg either. DMA is likely still
> running but : any fault has to be ignored.
>  :
>  : We also can't free the PASID yet, since transactions are still
> coming : in with this PASID.
>  |
> unbind() -> clear context descriptor, release PASID and mmu notifier
>  |
>  : Here the PASID descriptor is clear. If DMA is still running the
> device : driver really messed up and we have to print out any fault.
> 
> For that middle state I had to introduce a new pasid descriptor state
> in the SMMU driver, to avoid reporting errors between mm exit and
> unbind().
> 
I agree. Silent error in this window is the right way to go. Also, per
Jason's point, mm destroy should have similar behavior than vm unmap.

Thanks,

Jacob

> Thanks,
> Jean
> 
> > If a process crash during unbind, something already went seriously
> > wrong, DMA fault is expected.
> > I think having some error indication is useful, compared to
> > "silently drop"
> > 
> > Thanks,
> > 
> > Jacob
> >   
> > > Thanks,
> > > Jean
> > >   
> > > >     
> > > > > > Enforcing unbind upon FD close might be a precarious path,
> > > > > > perhaps that is why we have to deal with out of order
> > > > > > situation?      
> > > > > 
> > > > > How so? You just put it in the FD release function :)
> > > > >     
> > > > I was thinking some driver may choose to defer unbind in some
> > > > workqueue etc.
> > > >     
> > > > > > > > In VT-d, because of enqcmd and lazy PASID free we plan
> > > > > > > > to hold on to the PASID until mmdrop.
> > > > > > > > https://lore.kernel.org/patchwork/patch/1217762/        
> > > > > > > 
> > > > > > > Why? The bind already gets a mmu_notifier which has
> > > > > > > refcounts and the right lifetime for PASID.. This code
> > > > > > > could already be simplified by using the
> > > > > > > mmu_notifier_get()/put() stuff. 
> > > > > > Yes, I guess mmu_notifier_get()/put() is new :)
> > > > > > +Fenghua      
> > > > > 
> > > > > I was going to convert the intel code when I did many other
> > > > > drivers, but it was a bit too complex..
> > > > > 
> > > > > But the approach is straightforward. Get rid of the mm search
> > > > > list and use mmu_notifier_get(). This returns a singlton
> > > > > notifier for the mm_struct and handles refcounting/etc
> > > > > 
> > > > > Use mmu_notifier_put() during a unbind, it will callback to
> > > > > free_notifier() to do the final frees (ie this is where the
> > > > > pasid should go away)
> > > > > 
> > > > > For the SVM_FLAG_PRIVATE_PASID continue to use
> > > > > mmu_notifier_register, however this can now be mixed with
> > > > > mmu_notifier_put() so the cleanup is the same. A separate ops
> > > > > static struct is needed to create a unique key though
> > > > > 
> > > > > Jason    
> > > > 
> > > > [Jacob Pan]    
> > 
> > [Jacob Pan]  

[Jacob Pan]
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 0/2] iommu: Remove iommu_sva_ops::mm_exit()
  2020-04-09 12:08           ` Jason Gunthorpe
@ 2020-04-09 16:31             ` Jacob Pan
  0 siblings, 0 replies; 26+ messages in thread
From: Jacob Pan @ 2020-04-09 16:31 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: Jean-Philippe Brucker, arnd, Yu, Fenghua, gregkh, iommu,
	zhangfei.gao, linux-accelerators

On Thu, 9 Apr 2020 09:08:21 -0300
Jason Gunthorpe <jgg@ziepe.ca> wrote:

> On Wed, Apr 08, 2020 at 04:48:02PM -0700, Jacob Pan wrote:
> > > Yes, this is the proper way, when the DMA is stopped and no use
> > > of the PASID remains then you can drop the mmu notifier and
> > > release the PASID entirely. If that is linked to the lifetime of
> > > the FD then forget completely about the mm_struct lifetime, it
> > > doesn't matter.. 
> > Got everything above, thanks a lot.
> > 
> > If everything is in order with the FD close. Why do we need to 
> > "ask IOMMU drivers to silently abort DMA and Page Requests in the
> > meantime." in mm_exit notifier? This will be done orderly in unbind
> > anyway.  
> 
> I thought this is exactly what Jean-Phillippe is removing here, it is
> a bad idea for the reasons he explained.
> 
I think this patch only removed driver side callbacks, i.e. to stop
DMA. But not removing IOMMU side of stop DMA, PRS.

Before uacce, (universal accelerator framework), sva bind/unbind is not
guaranteed at open/close FD time. Therefore, mmu notifier is needed if
mmexit comes without unbind.

> > > > Enforcing unbind upon FD close might be a precarious path,
> > > > perhaps that is why we have to deal with out of order
> > > > situation?    
> > > 
> > > How so? You just put it in the FD release function :)  
> > 
> > I was thinking some driver may choose to defer unbind in some
> > workqueue etc.  
> 
> Doesn't really change anything, the lifetime of the PASID wouuld be
> the lifetime of the notifier and the bind, and DMA can't continue
> without the notifier registered.
> 
True, it is just better not to defer. Otherwise, the window of
suppressing error gets longer.
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 0/2] iommu: Remove iommu_sva_ops::mm_exit()
  2020-04-09 16:21                 ` Jacob Pan
@ 2020-04-09 16:58                   ` Jason Gunthorpe
  0 siblings, 0 replies; 26+ messages in thread
From: Jason Gunthorpe @ 2020-04-09 16:58 UTC (permalink / raw)
  To: Jacob Pan
  Cc: Jean-Philippe Brucker, arnd, Yu, Fenghua, gregkh, iommu,
	zhangfei.gao, linux-accelerators

On Thu, Apr 09, 2020 at 09:21:34AM -0700, Jacob Pan wrote:
> On Thu, 9 Apr 2020 11:25:19 -0300
> Jason Gunthorpe <jgg@ziepe.ca> wrote:
> 
> > On Thu, Apr 09, 2020 at 07:14:24AM -0700, Jacob Pan wrote:
> > > > When the process is killed, mm release can happen before fds are
> > > > released. If you look at do_exit() in kernel/exit.c:
> > > > 
> > > > 	exit_mm()
> > > > 	  mmput()  
> > > > 	   -> mmu release notifier    
> > > > 	...
> > > > 	exit_files()
> > > > 	  close_files()
> > > > 	    fput()
> > > > 	exit_task_work()
> > > > 	  __fput()  
> > > > 	   -> unbind()    
> > > >   
> > > So unbind is coming anyway, the difference in handling in mmu
> > > release notifier is whether we silently drop DMA fault vs.
> > > reporting fault?  
> > 
> > Userspace can significantly delay the final fput triggering the
> > unbind, the above is only for the trivial case where the process
> > owning the mm_struct is the only process holding the fd.
> > 
> Are you talking about FDs owned buy children after fork() or FDs sent
> over to another process. I think, in either case SVA is not supported.

Supported or not a hostile user space can trigger these conditions and
it should not cause misbehavior from the kernel (eg log spamming)

Jason
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 0/2] iommu: Remove iommu_sva_ops::mm_exit()
  2020-04-09 14:50               ` Jean-Philippe Brucker
  2020-04-09 16:27                 ` Jacob Pan
@ 2020-04-10 15:52                 ` Jacob Pan
  2020-04-15  7:47                   ` Jean-Philippe Brucker
  1 sibling, 1 reply; 26+ messages in thread
From: Jacob Pan @ 2020-04-10 15:52 UTC (permalink / raw)
  To: Jean-Philippe Brucker
  Cc: Yu, Fenghua, arnd, gregkh, iommu, Jason Gunthorpe, zhangfei.gao,
	linux-accelerators

On Thu, 9 Apr 2020 16:50:58 +0200
Jean-Philippe Brucker <jean-philippe@linaro.org> wrote:

> > So unbind is coming anyway, the difference in handling in mmu
> > release notifier is whether we silently drop DMA fault vs.
> > reporting fault?  
> 
> What I meant is, between mmu release notifier and unbind(), we can't
> print any error from DMA fault on dmesg, because an mm exit is easily
> triggered by userspace. Look at the lifetime of the bond:
> 
> bind()
>  |
>  : Here any DMA fault is handled by mm, and on error we don't print
>  : anything to dmesg. Userspace can easily trigger faults by issuing
> DMA : on unmapped buffers.
>  |
> mm exit -> clear pgd, invalidate IOTLBs
>  |
>  : Here the PASID descriptor doesn't have the pgd anymore, but we
> don't : print out any error to dmesg either. DMA is likely still
> running but : any fault has to be ignored.
>  :
>  : We also can't free the PASID yet, since transactions are still
> coming : in with this PASID.
>  |
> unbind() -> clear context descriptor, release PASID and mmu notifier
>  |
>  : Here the PASID descriptor is clear. If DMA is still running the
> device : driver really messed up and we have to print out any fault.
> 
> For that middle state I had to introduce a new pasid descriptor state
> in the SMMU driver, to avoid reporting errors between mm exit and
> unbind().
I must have missed something, but why bother with a state when you can
always check if the mm is dead by mmget_not_zero()? You would not
handle IOPF if the mm is dead anyway, similarly for other DMA errors.

Also, since you are not freeing ioasid in mmu_notifier release anymore,
does it mean the IOASID notifier chain can be non-atomic?

Thanks,

Jacob
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 0/2] iommu: Remove iommu_sva_ops::mm_exit()
  2020-04-10 15:52                 ` Jacob Pan
@ 2020-04-15  7:47                   ` Jean-Philippe Brucker
  2020-04-16 20:58                     ` Jacob Pan
  0 siblings, 1 reply; 26+ messages in thread
From: Jean-Philippe Brucker @ 2020-04-15  7:47 UTC (permalink / raw)
  To: Jacob Pan
  Cc: Yu, Fenghua, arnd, gregkh, iommu, Jason Gunthorpe, zhangfei.gao,
	linux-accelerators

On Fri, Apr 10, 2020 at 08:52:49AM -0700, Jacob Pan wrote:
> On Thu, 9 Apr 2020 16:50:58 +0200
> Jean-Philippe Brucker <jean-philippe@linaro.org> wrote:
> 
> > > So unbind is coming anyway, the difference in handling in mmu
> > > release notifier is whether we silently drop DMA fault vs.
> > > reporting fault?  
> > 
> > What I meant is, between mmu release notifier and unbind(), we can't
> > print any error from DMA fault on dmesg, because an mm exit is easily
> > triggered by userspace. Look at the lifetime of the bond:
> > 
> > bind()
> >  |
> >  : Here any DMA fault is handled by mm, and on error we don't print
> >  : anything to dmesg. Userspace can easily trigger faults by issuing
> > DMA : on unmapped buffers.
> >  |
> > mm exit -> clear pgd, invalidate IOTLBs
> >  |
> >  : Here the PASID descriptor doesn't have the pgd anymore, but we
> > don't : print out any error to dmesg either. DMA is likely still
> > running but : any fault has to be ignored.
> >  :
> >  : We also can't free the PASID yet, since transactions are still
> > coming : in with this PASID.
> >  |
> > unbind() -> clear context descriptor, release PASID and mmu notifier
> >  |
> >  : Here the PASID descriptor is clear. If DMA is still running the
> > device : driver really messed up and we have to print out any fault.
> > 
> > For that middle state I had to introduce a new pasid descriptor state
> > in the SMMU driver, to avoid reporting errors between mm exit and
> > unbind().
> I must have missed something, but why bother with a state when you can
> always check if the mm is dead by mmget_not_zero()? You would not
> handle IOPF if the mm is dead anyway, similarly for other DMA errors.

In the SMMU a cleared PASID descriptor results in unrecoverable faults,
which do not go through the I/O page fault handler. I've been thinking
about injecting everything to the IOPF handler, recoverable or not, but
filtering down the stream is complicated. Most of the time outside this
small window, we really need to print out those messages because they
would indicate serious bugs.

> Also, since you are not freeing ioasid in mmu_notifier release anymore,
> does it mean the IOASID notifier chain can be non-atomic?

Unfortunately not, ioasid_free() is called from
mmu_notifier_ops::free_notifier() in the RCU callback that results from
mmu_notifier_put(). 

Thanks,
Jean
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 0/2] iommu: Remove iommu_sva_ops::mm_exit()
  2020-04-15  7:47                   ` Jean-Philippe Brucker
@ 2020-04-16 20:58                     ` Jacob Pan
  2020-04-20  8:02                       ` Jean-Philippe Brucker
  0 siblings, 1 reply; 26+ messages in thread
From: Jacob Pan @ 2020-04-16 20:58 UTC (permalink / raw)
  To: Jean-Philippe Brucker
  Cc: Yu, Fenghua, arnd, gregkh, iommu, Jason Gunthorpe, zhangfei.gao,
	linux-accelerators

On Wed, 15 Apr 2020 09:47:36 +0200
Jean-Philippe Brucker <jean-philippe@linaro.org> wrote:

> On Fri, Apr 10, 2020 at 08:52:49AM -0700, Jacob Pan wrote:
> > On Thu, 9 Apr 2020 16:50:58 +0200
> > Jean-Philippe Brucker <jean-philippe@linaro.org> wrote:
> >   
> > > > So unbind is coming anyway, the difference in handling in mmu
> > > > release notifier is whether we silently drop DMA fault vs.
> > > > reporting fault?    
> > > 
> > > What I meant is, between mmu release notifier and unbind(), we
> > > can't print any error from DMA fault on dmesg, because an mm exit
> > > is easily triggered by userspace. Look at the lifetime of the
> > > bond:
> > > 
> > > bind()
> > >  |
> > >  : Here any DMA fault is handled by mm, and on error we don't
> > > print : anything to dmesg. Userspace can easily trigger faults by
> > > issuing DMA : on unmapped buffers.
> > >  |
> > > mm exit -> clear pgd, invalidate IOTLBs
> > >  |
> > >  : Here the PASID descriptor doesn't have the pgd anymore, but we
> > > don't : print out any error to dmesg either. DMA is likely still
> > > running but : any fault has to be ignored.
> > >  :
> > >  : We also can't free the PASID yet, since transactions are still
> > > coming : in with this PASID.
> > >  |
> > > unbind() -> clear context descriptor, release PASID and mmu
> > > notifier |
> > >  : Here the PASID descriptor is clear. If DMA is still running the
> > > device : driver really messed up and we have to print out any
> > > fault.
> > > 
> > > For that middle state I had to introduce a new pasid descriptor
> > > state in the SMMU driver, to avoid reporting errors between mm
> > > exit and unbind().  
> > I must have missed something, but why bother with a state when you
> > can always check if the mm is dead by mmget_not_zero()? You would
> > not handle IOPF if the mm is dead anyway, similarly for other DMA
> > errors.  
> 
> In the SMMU a cleared PASID descriptor results in unrecoverable
> faults, which do not go through the I/O page fault handler. I've been
> thinking about injecting everything to the IOPF handler, recoverable
> or not, but filtering down the stream is complicated. Most of the
> time outside this small window, we really need to print out those
> messages because they would indicate serious bugs.
> 
VT-d also results in unrecoverable fault for a cleared PASID. I am
assuming in the fault record, SMMU can also identify the PASID and
source ID. So that should be able to find the matching mm.
Then you can check if the mm is defunct?

> > Also, since you are not freeing ioasid in mmu_notifier release
> > anymore, does it mean the IOASID notifier chain can be non-atomic?  
> 
> Unfortunately not, ioasid_free() is called from
> mmu_notifier_ops::free_notifier() in the RCU callback that results
> from mmu_notifier_put(). 
> 
I agree. I looked at the code, it is much more clean with the
mmu_notifier_get/put.

I am thinking perhaps adding a reclaim mechanism such that IOASID not
directly freed can stay in an in_active list (while waiting for its
states get cleared) until it can be reclaimed. Do you see this is
useful for SMMU?

This is useful for VT-d, since we have more consumers for a given PASID,
i.e. VMCS, VDCM, and IOMMU. Each consumer has its own PASID context to
clean up.

Thanks for the explanation!
> Thanks,
> Jean

[Jacob Pan]
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 0/2] iommu: Remove iommu_sva_ops::mm_exit()
  2020-04-16 20:58                     ` Jacob Pan
@ 2020-04-20  8:02                       ` Jean-Philippe Brucker
  0 siblings, 0 replies; 26+ messages in thread
From: Jean-Philippe Brucker @ 2020-04-20  8:02 UTC (permalink / raw)
  To: Jacob Pan
  Cc: Yu, Fenghua, arnd, gregkh, iommu, Jason Gunthorpe, zhangfei.gao,
	linux-accelerators

On Thu, Apr 16, 2020 at 01:58:29PM -0700, Jacob Pan wrote:
> On Wed, 15 Apr 2020 09:47:36 +0200
> Jean-Philippe Brucker <jean-philippe@linaro.org> wrote:
> 
> > On Fri, Apr 10, 2020 at 08:52:49AM -0700, Jacob Pan wrote:
> > > On Thu, 9 Apr 2020 16:50:58 +0200
> > > Jean-Philippe Brucker <jean-philippe@linaro.org> wrote:
> > >   
> > > > > So unbind is coming anyway, the difference in handling in mmu
> > > > > release notifier is whether we silently drop DMA fault vs.
> > > > > reporting fault?    
> > > > 
> > > > What I meant is, between mmu release notifier and unbind(), we
> > > > can't print any error from DMA fault on dmesg, because an mm exit
> > > > is easily triggered by userspace. Look at the lifetime of the
> > > > bond:
> > > > 
> > > > bind()
> > > >  |
> > > >  : Here any DMA fault is handled by mm, and on error we don't
> > > > print : anything to dmesg. Userspace can easily trigger faults by
> > > > issuing DMA : on unmapped buffers.
> > > >  |
> > > > mm exit -> clear pgd, invalidate IOTLBs
> > > >  |
> > > >  : Here the PASID descriptor doesn't have the pgd anymore, but we
> > > > don't : print out any error to dmesg either. DMA is likely still
> > > > running but : any fault has to be ignored.
> > > >  :
> > > >  : We also can't free the PASID yet, since transactions are still
> > > > coming : in with this PASID.
> > > >  |
> > > > unbind() -> clear context descriptor, release PASID and mmu
> > > > notifier |
> > > >  : Here the PASID descriptor is clear. If DMA is still running the
> > > > device : driver really messed up and we have to print out any
> > > > fault.
> > > > 
> > > > For that middle state I had to introduce a new pasid descriptor
> > > > state in the SMMU driver, to avoid reporting errors between mm
> > > > exit and unbind().  
> > > I must have missed something, but why bother with a state when you
> > > can always check if the mm is dead by mmget_not_zero()? You would
> > > not handle IOPF if the mm is dead anyway, similarly for other DMA
> > > errors.  
> > 
> > In the SMMU a cleared PASID descriptor results in unrecoverable
> > faults, which do not go through the I/O page fault handler. I've been
> > thinking about injecting everything to the IOPF handler, recoverable
> > or not, but filtering down the stream is complicated. Most of the
> > time outside this small window, we really need to print out those
> > messages because they would indicate serious bugs.
> > 
> VT-d also results in unrecoverable fault for a cleared PASID. I am
> assuming in the fault record, SMMU can also identify the PASID and
> source ID. So that should be able to find the matching mm.
> Then you can check if the mm is defunct?

I'd rather not add a PASID lookup in the SMMU event thread, as that
function needs to be fast and there already is a PASID lookup in the IOPF
worker

> > > Also, since you are not freeing ioasid in mmu_notifier release
> > > anymore, does it mean the IOASID notifier chain can be non-atomic?  
> > 
> > Unfortunately not, ioasid_free() is called from
> > mmu_notifier_ops::free_notifier() in the RCU callback that results
> > from mmu_notifier_put(). 
> > 
> I agree. I looked at the code, it is much more clean with the
> mmu_notifier_get/put.
> 
> I am thinking perhaps adding a reclaim mechanism such that IOASID not
> directly freed can stay in an in_active list (while waiting for its
> states get cleared) until it can be reclaimed. Do you see this is
> useful for SMMU?

Probably not useful for SMMU, but I don't see any issue with adding it. 

> This is useful for VT-d, since we have more consumers for a given PASID,
> i.e. VMCS, VDCM, and IOMMU. Each consumer has its own PASID context to
> clean up.

Right I'm still having trouble understanding the whole picture, will need
to read more about this. The main point is not adding too much work in the
mm exit path.

Thanks,
Jean
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply	[flat|nested] 26+ messages in thread

end of thread, other threads:[~2020-04-20  8:02 UTC | newest]

Thread overview: 26+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-04-08 14:04 [PATCH 0/2] iommu: Remove iommu_sva_ops::mm_exit() Jean-Philippe Brucker
2020-04-08 14:04 ` [PATCH 1/2] uacce: Remove mm_exit() op Jean-Philippe Brucker
2020-04-09  9:07   ` Zhangfei Gao
2020-04-09  9:44     ` Jean-Philippe Brucker
2020-04-08 14:04 ` [PATCH 2/2] iommu: Remove iommu_sva_ops::mm_exit() Jean-Philippe Brucker
2020-04-08 18:35 ` [PATCH 0/2] " Jacob Pan
2020-04-08 19:02   ` Jason Gunthorpe
2020-04-08 21:35     ` Jacob Pan
2020-04-08 22:32       ` Jason Gunthorpe
2020-04-08 23:48         ` Jacob Pan
2020-04-09  6:39           ` Jean-Philippe Brucker
2020-04-09 14:14             ` Jacob Pan
2020-04-09 14:25               ` Jason Gunthorpe
2020-04-09 16:21                 ` Jacob Pan
2020-04-09 16:58                   ` Jason Gunthorpe
2020-04-09 14:50               ` Jean-Philippe Brucker
2020-04-09 16:27                 ` Jacob Pan
2020-04-10 15:52                 ` Jacob Pan
2020-04-15  7:47                   ` Jean-Philippe Brucker
2020-04-16 20:58                     ` Jacob Pan
2020-04-20  8:02                       ` Jean-Philippe Brucker
2020-04-09 12:08           ` Jason Gunthorpe
2020-04-09 16:31             ` Jacob Pan
2020-04-08 23:49         ` Fenghua Yu
2020-04-09 12:12           ` Jason Gunthorpe
2020-04-08 19:04 ` Jason Gunthorpe

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.