All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jason Gunthorpe <jgg@nvidia.com>
To: iommu@lists.linux.dev, Kevin Tian <kevin.tian@intel.com>,
	linux-kselftest@vger.kernel.org
Cc: kvm@vger.kernel.org, Nicolin Chen <nicolinc@nvidia.com>,
	Yi Liu <yi.l.liu@intel.com>
Subject: [PATCH 09/14] iommufd: Add iommufd_device_replace()
Date: Fri, 24 Feb 2023 20:27:54 -0400	[thread overview]
Message-ID: <9-v1-7612f88c19f5+2f21-iommufd_alloc_jgg@nvidia.com> (raw)
In-Reply-To: <0-v1-7612f88c19f5+2f21-iommufd_alloc_jgg@nvidia.com>

Replace allows all the devices in a group to move in one step to a new
HWPT. Further, the HWPT move is done without going through a blocking
domain so that the IOMMU driver can implement some level of
non-distruption to ongoing DMA if that has meaning for it (eg for future
special driver domains)

Replace uses a lot of the same logic as normal attach, except the actual
domain change over has different restrictions, and we are careful to
sequence things so that failure is going to leave everything the way it
was, and not get trapped in a blocking domain or something if there is
ENOMEM.

Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/iommu/iommufd/device.c | 216 ++++++++++++++++++++++++++-------
 drivers/iommu/iommufd/main.c   |   1 +
 2 files changed, 175 insertions(+), 42 deletions(-)

diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c
index 7f95876d142d7f..913de911361115 100644
--- a/drivers/iommu/iommufd/device.c
+++ b/drivers/iommu/iommufd/device.c
@@ -4,6 +4,7 @@
 #include <linux/iommufd.h>
 #include <linux/slab.h>
 #include <linux/iommu.h>
+#include "../iommu-priv.h"
 
 #include "io_pagetable.h"
 #include "iommufd_private.h"
@@ -338,27 +339,101 @@ iommufd_hw_pagetable_detach(struct iommufd_device *idev)
 	return hwpt;
 }
 
-static int iommufd_device_do_attach(struct iommufd_device *idev,
-				    struct iommufd_hw_pagetable *hwpt)
+static struct iommufd_hw_pagetable *
+iommufd_device_do_attach(struct iommufd_device *idev,
+			 struct iommufd_hw_pagetable *hwpt)
 {
 	int rc;
 
 	mutex_lock(&idev->igroup->lock);
 	rc = iommufd_hw_pagetable_attach(hwpt, idev);
 	mutex_unlock(&idev->igroup->lock);
-	return rc;
+	if (rc)
+		return ERR_PTR(rc);
+	return NULL;
+}
+
+static struct iommufd_hw_pagetable *
+iommufd_device_do_replace_locked(struct iommufd_device *idev,
+				 struct iommufd_hw_pagetable *hwpt)
+{
+	struct iommufd_hw_pagetable *old_hwpt;
+	int rc;
+
+	lockdep_assert_held(&idev->igroup->lock);
+
+	/* Try to upgrade the domain we have */
+	if (idev->enforce_cache_coherency) {
+		rc = iommufd_hw_pagetable_enforce_cc(hwpt);
+		if (rc)
+			return ERR_PTR(-EINVAL);
+	}
+
+	rc = iommufd_device_setup_msi(idev, hwpt);
+	if (rc)
+		return ERR_PTR(rc);
+
+	old_hwpt = idev->igroup->hwpt;
+	if (hwpt->ioas != old_hwpt->ioas) {
+		rc = iopt_table_enforce_group_resv_regions(
+			&hwpt->ioas->iopt, idev->igroup->group, NULL);
+		if (rc)
+			return ERR_PTR(rc);
+	}
+
+	rc = iommu_group_replace_domain(idev->igroup->group, hwpt->domain);
+	if (rc)
+		goto err_unresv;
+
+	if (hwpt->ioas != old_hwpt->ioas)
+		iopt_remove_reserved_iova(&old_hwpt->ioas->iopt,
+					  idev->igroup->group);
+
+	idev->igroup->hwpt = hwpt;
+	refcount_inc(&hwpt->obj.users);
+	return old_hwpt;
+err_unresv:
+	iopt_remove_reserved_iova(&hwpt->ioas->iopt, idev->igroup->group);
+	return ERR_PTR(rc);
+}
+
+static struct iommufd_hw_pagetable *
+iommufd_device_do_replace(struct iommufd_device *idev,
+			  struct iommufd_hw_pagetable *hwpt)
+{
+	struct iommufd_hw_pagetable *destroy_hwpt = NULL;
+	int rc;
+
+	mutex_lock(&idev->igroup->lock);
+	destroy_hwpt = iommufd_device_do_replace_locked(idev, hwpt);
+	if (IS_ERR(destroy_hwpt)) {
+		rc = PTR_ERR(destroy_hwpt);
+		goto out_unlock;
+	}
+	mutex_unlock(&idev->igroup->lock);
+	return destroy_hwpt;
+
+out_unlock:
+	mutex_unlock(&idev->igroup->lock);
+	return ERR_PTR(rc);
 }
 
+typedef struct iommufd_hw_pagetable *(*attach_fn)(
+	struct iommufd_device *idev, struct iommufd_hw_pagetable *hwpt);
+
 /*
  * When automatically managing the domains we search for a compatible domain in
  * the iopt and if one is found use it, otherwise create a new domain.
  * Automatic domain selection will never pick a manually created domain.
  */
-static int iommufd_device_auto_get_domain(struct iommufd_device *idev,
-					  struct iommufd_ioas *ioas, u32 *pt_id)
+static struct iommufd_hw_pagetable *
+iommufd_device_auto_get_domain(struct iommufd_device *idev,
+			       struct iommufd_ioas *ioas, u32 *pt_id,
+			       attach_fn do_attach)
 {
+	bool immediate_attach = do_attach == iommufd_device_do_attach;
+	struct iommufd_hw_pagetable *destroy_hwpt;
 	struct iommufd_hw_pagetable *hwpt;
-	int rc;
 
 	/*
 	 * There is no differentiation when domains are allocated, so any domain
@@ -372,52 +447,57 @@ static int iommufd_device_auto_get_domain(struct iommufd_device *idev,
 
 		if (!iommufd_lock_obj(&hwpt->obj))
 			continue;
-		rc = iommufd_device_do_attach(idev, hwpt);
-		iommufd_put_object(&hwpt->obj);
-
-		/*
-		 * -EINVAL means the domain is incompatible with the device.
-		 * Other error codes should propagate to userspace as failure.
-		 * Success means the domain is attached.
-		 */
-		if (rc == -EINVAL)
-			continue;
+		destroy_hwpt = (*do_attach)(idev, hwpt);
 		*pt_id = hwpt->obj.id;
+		iommufd_put_object(&hwpt->obj);
+		if (IS_ERR(destroy_hwpt)) {
+			/*
+			 * -EINVAL means the domain is incompatible with the
+			 * device. Other error codes should propagate to
+			 * userspace as failure. Success means the domain is
+			 * attached.
+			 */
+			if (PTR_ERR(destroy_hwpt) == -EINVAL)
+				continue;
+			goto out_unlock;
+		}
 		goto out_unlock;
 	}
 
-	hwpt = iommufd_hw_pagetable_alloc(idev->ictx, ioas, idev, true);
+	hwpt = iommufd_hw_pagetable_alloc(idev->ictx, ioas, idev,
+					  immediate_attach);
 	if (IS_ERR(hwpt)) {
-		rc = PTR_ERR(hwpt);
+		destroy_hwpt = ERR_CAST(hwpt);
 		goto out_unlock;
 	}
+
+	if (!immediate_attach) {
+		destroy_hwpt = (*do_attach)(idev, hwpt);
+		if (IS_ERR(destroy_hwpt))
+			goto out_abort;
+	} else {
+		destroy_hwpt = NULL;
+	}
+
 	hwpt->auto_domain = true;
 	*pt_id = hwpt->obj.id;
 
 	mutex_unlock(&ioas->mutex);
 	iommufd_object_finalize(idev->ictx, &hwpt->obj);
-	return 0;
+	return destroy_hwpt;
+
+out_abort:
+	iommufd_object_abort_and_destroy(idev->ictx, &hwpt->obj);
 out_unlock:
 	mutex_unlock(&ioas->mutex);
-	return rc;
+	return destroy_hwpt;
 }
 
-/**
- * iommufd_device_attach - Connect a device from an iommu_domain
- * @idev: device to attach
- * @pt_id: Input a IOMMUFD_OBJ_IOAS, or IOMMUFD_OBJ_HW_PAGETABLE
- *         Output the IOMMUFD_OBJ_HW_PAGETABLE ID
- *
- * This connects the device to an iommu_domain, either automatically or manually
- * selected. Once this completes the device could do DMA.
- *
- * The caller should return the resulting pt_id back to userspace.
- * This function is undone by calling iommufd_device_detach().
- */
-int iommufd_device_attach(struct iommufd_device *idev, u32 *pt_id)
+static int iommufd_device_change_pt(struct iommufd_device *idev, u32 *pt_id,
+				    attach_fn do_attach)
 {
+	struct iommufd_hw_pagetable *destroy_hwpt;
 	struct iommufd_object *pt_obj;
-	int rc;
 
 	pt_obj = iommufd_get_object(idev->ictx, *pt_id, IOMMUFD_OBJ_ANY);
 	if (IS_ERR(pt_obj))
@@ -428,8 +508,8 @@ int iommufd_device_attach(struct iommufd_device *idev, u32 *pt_id)
 		struct iommufd_hw_pagetable *hwpt =
 			container_of(pt_obj, struct iommufd_hw_pagetable, obj);
 
-		rc = iommufd_device_do_attach(idev, hwpt);
-		if (rc)
+		destroy_hwpt = (*do_attach)(idev, hwpt);
+		if (IS_ERR(destroy_hwpt))
 			goto out_put_pt_obj;
 		break;
 	}
@@ -437,25 +517,77 @@ int iommufd_device_attach(struct iommufd_device *idev, u32 *pt_id)
 		struct iommufd_ioas *ioas =
 			container_of(pt_obj, struct iommufd_ioas, obj);
 
-		rc = iommufd_device_auto_get_domain(idev, ioas, pt_id);
-		if (rc)
+		destroy_hwpt = iommufd_device_auto_get_domain(idev, ioas, pt_id,
+							      do_attach);
+		if (IS_ERR(destroy_hwpt))
 			goto out_put_pt_obj;
 		break;
 	}
 	default:
-		rc = -EINVAL;
+		destroy_hwpt = ERR_PTR(-EINVAL);
 		goto out_put_pt_obj;
 	}
+	iommufd_put_object(pt_obj);
 
-	refcount_inc(&idev->obj.users);
-	rc = 0;
+	/* This destruction has to be after we unlock everything */
+	if (destroy_hwpt)
+		iommufd_hw_pagetable_put(idev->ictx, destroy_hwpt);
+	return 0;
 
 out_put_pt_obj:
 	iommufd_put_object(pt_obj);
-	return rc;
+	return PTR_ERR(destroy_hwpt);
+}
+
+/**
+ * iommufd_device_attach - Connect a device to an iommu_domain
+ * @idev: device to attach
+ * @pt_id: Input a IOMMUFD_OBJ_IOAS, or IOMMUFD_OBJ_HW_PAGETABLE
+ *         Output the IOMMUFD_OBJ_HW_PAGETABLE ID
+ *
+ * This connects the device to an iommu_domain, either automatically or manually
+ * selected. Once this completes the device could do DMA.
+ *
+ * The caller should return the resulting pt_id back to userspace.
+ * This function is undone by calling iommufd_device_detach().
+ */
+int iommufd_device_attach(struct iommufd_device *idev, u32 *pt_id)
+{
+	int rc;
+
+	rc = iommufd_device_change_pt(idev, pt_id, &iommufd_device_do_attach);
+	if (rc)
+		return rc;
+
+	/*
+	 * Pairs with iommufd_device_detach() - catches caller bugs attempting
+	 * to destroy a device with an attachment.
+	 */
+	refcount_inc(&idev->obj.users);
+	return 0;
 }
 EXPORT_SYMBOL_NS_GPL(iommufd_device_attach, IOMMUFD);
 
+/**
+ * iommufd_device_replace - Change the device's iommu_domain
+ * @idev: device to change
+ * @pt_id: Input a IOMMUFD_OBJ_IOAS, or IOMMUFD_OBJ_HW_PAGETABLE
+ *         Output the IOMMUFD_OBJ_HW_PAGETABLE ID
+ *
+ * This is the same as:
+ *   iommufd_device_detach();
+ *   iommufd_device_attach()
+ * If it fails then no change is made to the attachment. The iommu driver may
+ * implement this so there is no disruption in translation. This can only be
+ * called if iommufd_device_attach() has already succeeded.
+ */
+int iommufd_device_replace(struct iommufd_device *idev, u32 *pt_id)
+{
+	return iommufd_device_change_pt(idev, pt_id,
+					&iommufd_device_do_replace);
+}
+EXPORT_SYMBOL_NS_GPL(iommufd_device_replace, IOMMUFD);
+
 /**
  * iommufd_device_detach - Disconnect a device to an iommu_domain
  * @idev: device to detach
diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c
index e5ed5dfa91a0b5..8597f2fb88da3a 100644
--- a/drivers/iommu/iommufd/main.c
+++ b/drivers/iommu/iommufd/main.c
@@ -461,5 +461,6 @@ module_exit(iommufd_exit);
 MODULE_ALIAS_MISCDEV(VFIO_MINOR);
 MODULE_ALIAS("devname:vfio/vfio");
 #endif
+MODULE_IMPORT_NS(IOMMUFD_INTERNAL);
 MODULE_DESCRIPTION("I/O Address Space Management for passthrough devices");
 MODULE_LICENSE("GPL");
-- 
2.39.1


  parent reply	other threads:[~2023-02-25  0:28 UTC|newest]

Thread overview: 64+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-02-25  0:27 [PATCH 00/14] Add iommufd physical device operations for replace and alloc hwpt Jason Gunthorpe
2023-02-25  0:27 ` [PATCH 01/14] iommufd: Move isolated msi enforcement to iommufd_device_bind() Jason Gunthorpe
2023-03-02  7:45   ` Tian, Kevin
2023-02-25  0:27 ` [PATCH 02/14] iommufd: Add iommufd_group Jason Gunthorpe
2023-03-02  7:55   ` Tian, Kevin
2023-03-02 12:51     ` Jason Gunthorpe
2023-03-03  2:13       ` Tian, Kevin
2023-03-06 19:16         ` Jason Gunthorpe
2023-03-07  2:32           ` Tian, Kevin
2023-02-25  0:27 ` [PATCH 03/14] iommufd: Replace the hwpt->devices list with iommufd_group Jason Gunthorpe
2023-03-02  8:01   ` Tian, Kevin
2023-03-06 20:22     ` Jason Gunthorpe
2023-03-07  2:38       ` Tian, Kevin
2023-03-07 13:53         ` Jason Gunthorpe
2023-03-08  7:29           ` Tian, Kevin
2023-03-08 19:00             ` Jason Gunthorpe
2023-02-25  0:27 ` [PATCH 04/14] iommufd: Use the iommufd_group to avoid duplicate reserved groups and msi setup Jason Gunthorpe
2023-03-02  8:06   ` Tian, Kevin
2023-03-02 12:55     ` Jason Gunthorpe
2023-03-03  2:16       ` Tian, Kevin
2023-02-25  0:27 ` [PATCH 05/14] iommufd: Make sw_msi_start a group global Jason Gunthorpe
2023-03-02  8:09   ` Tian, Kevin
2023-03-06 20:27     ` Jason Gunthorpe
2023-02-25  0:27 ` [PATCH 06/14] iommufd: Move putting a hwpt to a helper function Jason Gunthorpe
2023-03-02  8:12   ` Tian, Kevin
2023-03-06 20:29     ` Jason Gunthorpe
2023-02-25  0:27 ` [PATCH 07/14] iommufd: Add enforced_cache_coherency to iommufd_hw_pagetable_alloc() Jason Gunthorpe
2023-03-02  8:14   ` Tian, Kevin
2023-02-25  0:27 ` [PATCH 08/14] iommu: Introduce a new iommu_group_replace_domain() API Jason Gunthorpe
2023-03-02  8:16   ` Tian, Kevin
2023-02-25  0:27 ` Jason Gunthorpe [this message]
2023-02-26  3:01   ` [PATCH 09/14] iommufd: Add iommufd_device_replace() Baolu Lu
2023-02-27 13:58     ` Jason Gunthorpe
2023-02-28  1:50       ` Baolu Lu
2023-02-28 13:51         ` Jason Gunthorpe
2023-03-01  1:55           ` Baolu Lu
2023-02-26  3:13   ` Baolu Lu
2023-02-27 14:00     ` Jason Gunthorpe
2023-02-28  2:10       ` Baolu Lu
2023-02-28 13:52         ` Jason Gunthorpe
2023-03-01  2:23           ` Baolu Lu
2023-03-02  8:20   ` Tian, Kevin
2023-03-06 20:44     ` Jason Gunthorpe
2023-03-07  2:42       ` Tian, Kevin
2023-03-07 13:54         ` Jason Gunthorpe
2023-02-25  0:27 ` [PATCH 10/14] iommufd: Make destroy_rwsem use a lock class per object type Jason Gunthorpe
2023-02-25  0:27 ` [PATCH 11/14] iommufd/selftest: Test iommufd_device_replace() Jason Gunthorpe
2023-02-25  0:27 ` [PATCH 12/14] iommufd: Add IOMMU_HWPT_ALLOC Jason Gunthorpe
2023-03-06  1:42   ` Nicolin Chen
2023-03-06 20:31     ` Jason Gunthorpe
2023-03-17  3:02   ` Tian, Kevin
2023-03-17  4:02     ` Nicolin Chen
2023-03-17 10:20       ` Tian, Kevin
2023-03-21 17:16     ` Jason Gunthorpe
2023-02-25  0:27 ` [PATCH 13/14] iommufd/selftest: Return the real idev id from selftest mock_domain Jason Gunthorpe
2023-02-25  0:27 ` [PATCH 14/14] iommufd/selftest: Add a selftest for IOMMU_HWPT_ALLOC Jason Gunthorpe
2023-02-26 19:29   ` Nicolin Chen
2023-02-27 15:02     ` Jason Gunthorpe
2023-02-28  0:17       ` Nicolin Chen
2023-03-07  8:42 ` [PATCH 00/14] Add iommufd physical device operations for replace and alloc hwpt Tian, Kevin
2023-03-07 12:46   ` Jason Gunthorpe
2023-03-08  2:08     ` Baolu Lu
2023-03-08  7:38       ` Tian, Kevin
2023-03-08 18:59         ` Jason Gunthorpe

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=9-v1-7612f88c19f5+2f21-iommufd_alloc_jgg@nvidia.com \
    --to=jgg@nvidia.com \
    --cc=iommu@lists.linux.dev \
    --cc=kevin.tian@intel.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kselftest@vger.kernel.org \
    --cc=nicolinc@nvidia.com \
    --cc=yi.l.liu@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.