All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jason Gunthorpe <jgg@nvidia.com>
To: iommu@lists.linux.dev, linux-kselftest@vger.kernel.org
Cc: Lu Baolu <baolu.lu@linux.intel.com>,
	Kevin Tian <kevin.tian@intel.com>,
	kvm@vger.kernel.org, Lixiao Yang <lixiao.yang@intel.com>,
	Matthew Rosato <mjrosato@linux.ibm.com>,
	Nicolin Chen <nicolinc@nvidia.com>, Yi Liu <yi.l.liu@intel.com>
Subject: [PATCH v6 14/19] iommufd: Add iommufd_device_replace()
Date: Thu, 20 Apr 2023 15:51:29 -0300	[thread overview]
Message-ID: <14-v6-fdb604df649a+369-iommufd_alloc_jgg@nvidia.com> (raw)
In-Reply-To: <0-v6-fdb604df649a+369-iommufd_alloc_jgg@nvidia.com>

Replace allows all the devices in a group to move in one step to a new
HWPT. Further, the HWPT move is done without going through a blocking
domain so that the IOMMU driver can implement some level of
non-distruption to ongoing DMA if that has meaning for it (eg for future
special driver domains)

Replace uses a lot of the same logic as normal attach, except the actual
domain change over has different restrictions, and we are careful to
sequence things so that failure is going to leave everything the way it
was, and not get trapped in a blocking domain or something if there is
ENOMEM.

Reviewed-by: Lu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/iommu/iommufd/device.c | 99 ++++++++++++++++++++++++++++++++++
 drivers/iommu/iommufd/main.c   |  1 +
 2 files changed, 100 insertions(+)

diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c
index 9d61a62c4be067..2770087059ba73 100644
--- a/drivers/iommu/iommufd/device.c
+++ b/drivers/iommu/iommufd/device.c
@@ -4,6 +4,7 @@
 #include <linux/iommufd.h>
 #include <linux/slab.h>
 #include <linux/iommu.h>
+#include "../iommu-priv.h"
 
 #include "io_pagetable.h"
 #include "iommufd_private.h"
@@ -365,6 +366,84 @@ iommufd_device_do_attach(struct iommufd_device *idev,
 	return NULL;
 }
 
+static struct iommufd_hw_pagetable *
+iommufd_device_do_replace(struct iommufd_device *idev,
+			  struct iommufd_hw_pagetable *hwpt)
+{
+	struct iommufd_group *igroup = idev->igroup;
+	struct iommufd_hw_pagetable *old_hwpt;
+	unsigned int num_devices = 0;
+	struct iommufd_device *cur;
+	int rc;
+
+	mutex_lock(&idev->igroup->lock);
+
+	if (igroup->hwpt == NULL) {
+		rc = -EINVAL;
+		goto err_unlock;
+	}
+
+	if (hwpt == igroup->hwpt) {
+		mutex_unlock(&idev->igroup->lock);
+		return NULL;
+	}
+
+	/* Try to upgrade the domain we have */
+	list_for_each_entry(cur, &igroup->device_list, group_item) {
+		num_devices++;
+		if (cur->enforce_cache_coherency) {
+			rc = iommufd_hw_pagetable_enforce_cc(hwpt);
+			if (rc)
+				goto err_unlock;
+		}
+	}
+
+	old_hwpt = igroup->hwpt;
+	if (hwpt->ioas != old_hwpt->ioas) {
+		list_for_each_entry(cur, &igroup->device_list, group_item) {
+			rc = iopt_table_enforce_dev_resv_regions(
+				&hwpt->ioas->iopt, cur->dev, NULL);
+			if (rc)
+				goto err_unresv;
+		}
+	}
+
+	rc = iommufd_group_setup_msi(idev->igroup, hwpt);
+	if (rc)
+		goto err_unresv;
+
+	rc = iommu_group_replace_domain(igroup->group, hwpt->domain);
+	if (rc)
+		goto err_unresv;
+
+	if (hwpt->ioas != old_hwpt->ioas) {
+		list_for_each_entry(cur, &igroup->device_list, group_item)
+			iopt_remove_reserved_iova(&old_hwpt->ioas->iopt,
+						  cur->dev);
+	}
+
+	igroup->hwpt = hwpt;
+
+	/*
+	 * Move the refcounts held by the device_list to the new hwpt. Retain a
+	 * refcount for this thread as the caller will free it.
+	 */
+	refcount_add(num_devices, &hwpt->obj.users);
+	if (num_devices > 1)
+		WARN_ON(refcount_sub_and_test(num_devices - 1,
+					      &old_hwpt->obj.users));
+	mutex_unlock(&idev->igroup->lock);
+
+	/* Caller must destroy old_hwpt */
+	return old_hwpt;
+err_unresv:
+	list_for_each_entry(cur, &igroup->device_list, group_item)
+		iopt_remove_reserved_iova(&hwpt->ioas->iopt, cur->dev);
+err_unlock:
+	mutex_unlock(&idev->igroup->lock);
+	return ERR_PTR(rc);
+}
+
 typedef struct iommufd_hw_pagetable *(*attach_fn)(
 	struct iommufd_device *idev, struct iommufd_hw_pagetable *hwpt);
 
@@ -523,6 +602,26 @@ int iommufd_device_attach(struct iommufd_device *idev, u32 *pt_id)
 }
 EXPORT_SYMBOL_NS_GPL(iommufd_device_attach, IOMMUFD);
 
+/**
+ * iommufd_device_replace - Change the device's iommu_domain
+ * @idev: device to change
+ * @pt_id: Input a IOMMUFD_OBJ_IOAS, or IOMMUFD_OBJ_HW_PAGETABLE
+ *         Output the IOMMUFD_OBJ_HW_PAGETABLE ID
+ *
+ * This is the same as
+ *   iommufd_device_detach();
+ *   iommufd_device_attach();
+ * If it fails then no change is made to the attachment. The iommu driver may
+ * implement this so there is no disruption in translation. This can only be
+ * called if iommufd_device_attach() has already succeeded.
+ */
+int iommufd_device_replace(struct iommufd_device *idev, u32 *pt_id)
+{
+	return iommufd_device_change_pt(idev, pt_id,
+					&iommufd_device_do_replace);
+}
+EXPORT_SYMBOL_NS_GPL(iommufd_device_replace, IOMMUFD);
+
 /**
  * iommufd_device_detach - Disconnect a device to an iommu_domain
  * @idev: device to detach
diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c
index 24f30f384df6f9..5b7f70543adb24 100644
--- a/drivers/iommu/iommufd/main.c
+++ b/drivers/iommu/iommufd/main.c
@@ -466,5 +466,6 @@ module_exit(iommufd_exit);
 MODULE_ALIAS_MISCDEV(VFIO_MINOR);
 MODULE_ALIAS("devname:vfio/vfio");
 #endif
+MODULE_IMPORT_NS(IOMMUFD_INTERNAL);
 MODULE_DESCRIPTION("I/O Address Space Management for passthrough devices");
 MODULE_LICENSE("GPL");
-- 
2.40.0


  parent reply	other threads:[~2023-04-20 18:52 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-04-20 18:51 [PATCH v6 00/19] Add iommufd physical device operations for replace and alloc hwpt Jason Gunthorpe
2023-04-20 18:51 ` [PATCH v6 01/19] iommufd: Move isolated msi enforcement to iommufd_device_bind() Jason Gunthorpe
2023-04-20 18:51 ` [PATCH v6 02/19] iommufd: Add iommufd_group Jason Gunthorpe
2023-04-20 18:51 ` [PATCH v6 03/19] iommufd: Replace the hwpt->devices list with iommufd_group Jason Gunthorpe
2023-04-20 18:51 ` [PATCH v6 04/19] iommu: Export iommu_get_resv_regions() Jason Gunthorpe
2023-04-20 18:51 ` [PATCH v6 05/19] iommufd: Keep track of each device's reserved regions instead of groups Jason Gunthorpe
2023-04-20 18:51 ` [PATCH v6 06/19] iommufd: Use the iommufd_group to avoid duplicate MSI setup Jason Gunthorpe
2023-04-20 18:51 ` [PATCH v6 07/19] iommufd: Make sw_msi_start a group global Jason Gunthorpe
2023-04-20 18:51 ` [PATCH v6 08/19] iommufd: Move putting a hwpt to a helper function Jason Gunthorpe
2023-04-20 18:51 ` [PATCH v6 09/19] iommufd: Add enforced_cache_coherency to iommufd_hw_pagetable_alloc() Jason Gunthorpe
2023-04-20 18:51 ` [PATCH v6 10/19] iommufd: Allow a hwpt to be aborted after allocation Jason Gunthorpe
2023-04-21  7:29   ` Tian, Kevin
2023-04-20 18:51 ` [PATCH v6 11/19] iommufd: Fix locking around hwpt allocation Jason Gunthorpe
2023-04-21  7:30   ` Tian, Kevin
2023-04-20 18:51 ` [PATCH v6 12/19] iommufd: Reorganize iommufd_device_attach into iommufd_device_change_pt Jason Gunthorpe
2023-04-20 18:51 ` [PATCH v6 13/19] iommu: Introduce a new iommu_group_replace_domain() API Jason Gunthorpe
2023-04-20 18:51 ` Jason Gunthorpe [this message]
2023-04-20 18:51 ` [PATCH v6 15/19] iommufd: Make destroy_rwsem use a lock class per object type Jason Gunthorpe
2023-04-20 18:51 ` [PATCH v6 16/19] iommufd/selftest: Test iommufd_device_replace() Jason Gunthorpe
2023-04-20 18:51 ` [PATCH v6 17/19] iommufd: Add IOMMU_HWPT_ALLOC Jason Gunthorpe
2023-04-20 18:51 ` [PATCH v6 18/19] iommufd/selftest: Return the real idev id from selftest mock_domain Jason Gunthorpe
2023-04-20 18:51 ` [PATCH v6 19/19] iommufd/selftest: Add a selftest for IOMMU_HWPT_ALLOC Jason Gunthorpe
2023-04-21  1:35 ` [PATCH v6 00/19] Add iommufd physical device operations for replace and alloc hwpt Nicolin Chen

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=14-v6-fdb604df649a+369-iommufd_alloc_jgg@nvidia.com \
    --to=jgg@nvidia.com \
    --cc=baolu.lu@linux.intel.com \
    --cc=iommu@lists.linux.dev \
    --cc=kevin.tian@intel.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kselftest@vger.kernel.org \
    --cc=lixiao.yang@intel.com \
    --cc=mjrosato@linux.ibm.com \
    --cc=nicolinc@nvidia.com \
    --cc=yi.l.liu@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.