kvm.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* more iommu dead code removal
@ 2021-05-10  6:53 Christoph Hellwig
  2021-05-10  6:54 ` [PATCH 1/6] iommu: remove the unused dev_has_feat method Christoph Hellwig
                   ` (6 more replies)
  0 siblings, 7 replies; 39+ messages in thread
From: Christoph Hellwig @ 2021-05-10  6:53 UTC (permalink / raw)
  To: Joerg Roedel, Alex Williamson
  Cc: David Woodhouse, Lu Baolu, Will Deacon, Kirti Wankhede,
	Jason Gunthorpe, linux-arm-kernel, iommu, kvm

Hi all,

this is another series to remove dead code from the IOMMU subsystem,
this time mostly about the hacky code to pass an iommu device in
struct mdev_device and huge piles of support code.  All of this was
merged two years ago and (fortunately) never got used.

Note that the mdev.h changes might have minor contextual conflicts
with the pending work from Jason, but it is trivial to resolve.

Diffstat:
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c |    2 
 drivers/iommu/intel/iommu.c                 |  362 ----------------------------
 drivers/iommu/intel/svm.c                   |    6 
 drivers/iommu/iommu.c                       |   57 ----
 drivers/vfio/vfio_iommu_type1.c             |  132 +---------
 include/linux/intel-iommu.h                 |   11 
 include/linux/iommu.h                       |   41 ---
 include/linux/mdev.h                        |   20 -
 8 files changed, 31 insertions(+), 600 deletions(-)

^ permalink raw reply	[flat|nested] 39+ messages in thread

* [PATCH 1/6] iommu: remove the unused dev_has_feat method
  2021-05-10  6:53 more iommu dead code removal Christoph Hellwig
@ 2021-05-10  6:54 ` Christoph Hellwig
  2021-05-10  6:54 ` [PATCH 2/6] iommu: remove the unused iommu_aux_get_pasid interface Christoph Hellwig
                   ` (5 subsequent siblings)
  6 siblings, 0 replies; 39+ messages in thread
From: Christoph Hellwig @ 2021-05-10  6:54 UTC (permalink / raw)
  To: Joerg Roedel, Alex Williamson
  Cc: David Woodhouse, Lu Baolu, Will Deacon, Kirti Wankhede,
	Jason Gunthorpe, linux-arm-kernel, iommu, kvm

This method is never actually called.  Simplify the instance in
intel-iommu that is directly called by inlining the relevant code.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c |  1 -
 drivers/iommu/intel/iommu.c                 | 67 ++-------------------
 include/linux/iommu.h                       |  4 +-
 3 files changed, 7 insertions(+), 65 deletions(-)

diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index 54b2f27b81d439..9689563eec0f30 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -2666,7 +2666,6 @@ static struct iommu_ops arm_smmu_ops = {
 	.of_xlate		= arm_smmu_of_xlate,
 	.get_resv_regions	= arm_smmu_get_resv_regions,
 	.put_resv_regions	= generic_iommu_put_resv_regions,
-	.dev_has_feat		= arm_smmu_dev_has_feature,
 	.dev_feat_enabled	= arm_smmu_dev_feature_enabled,
 	.dev_enable_feat	= arm_smmu_dev_enable_feature,
 	.dev_disable_feat	= arm_smmu_dev_disable_feature,
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 708f430af1c440..ba1060f6785119 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -5326,72 +5326,18 @@ static int intel_iommu_disable_auxd(struct device *dev)
 	return 0;
 }
 
-/*
- * A PCI express designated vendor specific extended capability is defined
- * in the section 3.7 of Intel scalable I/O virtualization technical spec
- * for system software and tools to detect endpoint devices supporting the
- * Intel scalable IO virtualization without host driver dependency.
- *
- * Returns the address of the matching extended capability structure within
- * the device's PCI configuration space or 0 if the device does not support
- * it.
- */
-static int siov_find_pci_dvsec(struct pci_dev *pdev)
-{
-	int pos;
-	u16 vendor, id;
-
-	pos = pci_find_next_ext_capability(pdev, 0, 0x23);
-	while (pos) {
-		pci_read_config_word(pdev, pos + 4, &vendor);
-		pci_read_config_word(pdev, pos + 8, &id);
-		if (vendor == PCI_VENDOR_ID_INTEL && id == 5)
-			return pos;
-
-		pos = pci_find_next_ext_capability(pdev, pos, 0x23);
-	}
-
-	return 0;
-}
-
-static bool
-intel_iommu_dev_has_feat(struct device *dev, enum iommu_dev_features feat)
-{
-	struct device_domain_info *info = get_domain_info(dev);
-
-	if (feat == IOMMU_DEV_FEAT_AUX) {
-		int ret;
-
-		if (!dev_is_pci(dev) || dmar_disabled ||
-		    !scalable_mode_support() || !pasid_mode_support())
-			return false;
-
-		ret = pci_pasid_features(to_pci_dev(dev));
-		if (ret < 0)
-			return false;
-
-		return !!siov_find_pci_dvsec(to_pci_dev(dev));
-	}
-
-	if (feat == IOMMU_DEV_FEAT_IOPF)
-		return info && info->pri_supported;
-
-	if (feat == IOMMU_DEV_FEAT_SVA)
-		return info && (info->iommu->flags & VTD_FLAG_SVM_CAPABLE) &&
-			info->pasid_supported && info->pri_supported &&
-			info->ats_supported;
-
-	return false;
-}
-
 static int
 intel_iommu_dev_enable_feat(struct device *dev, enum iommu_dev_features feat)
 {
 	if (feat == IOMMU_DEV_FEAT_AUX)
 		return intel_iommu_enable_auxd(dev);
 
-	if (feat == IOMMU_DEV_FEAT_IOPF)
-		return intel_iommu_dev_has_feat(dev, feat) ? 0 : -ENODEV;
+	if (feat == IOMMU_DEV_FEAT_IOPF) {
+		struct device_domain_info *info = get_domain_info(dev);
+
+		if (info &&  info->pri_supported)
+			return 0;
+	}
 
 	if (feat == IOMMU_DEV_FEAT_SVA) {
 		struct device_domain_info *info = get_domain_info(dev);
@@ -5552,7 +5498,6 @@ const struct iommu_ops intel_iommu_ops = {
 	.get_resv_regions	= intel_iommu_get_resv_regions,
 	.put_resv_regions	= generic_iommu_put_resv_regions,
 	.device_group		= intel_iommu_device_group,
-	.dev_has_feat		= intel_iommu_dev_has_feat,
 	.dev_feat_enabled	= intel_iommu_dev_feat_enabled,
 	.dev_enable_feat	= intel_iommu_dev_enable_feat,
 	.dev_disable_feat	= intel_iommu_dev_disable_feat,
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 32d448050bf76e..1092a7f967a5e8 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -141,8 +141,7 @@ struct iommu_resv_region {
  *			 supported, this feature must be enabled before and
  *			 disabled after %IOMMU_DEV_FEAT_SVA.
  *
- * Device drivers query whether a feature is supported using
- * iommu_dev_has_feature(), and enable it using iommu_dev_enable_feature().
+ * Device drivers enable a feature using iommu_dev_enable_feature().
  */
 enum iommu_dev_features {
 	IOMMU_DEV_FEAT_AUX,
@@ -256,7 +255,6 @@ struct iommu_ops {
 	bool (*is_attach_deferred)(struct iommu_domain *domain, struct device *dev);
 
 	/* Per device IOMMU features */
-	bool (*dev_has_feat)(struct device *dev, enum iommu_dev_features f);
 	bool (*dev_feat_enabled)(struct device *dev, enum iommu_dev_features f);
 	int (*dev_enable_feat)(struct device *dev, enum iommu_dev_features f);
 	int (*dev_disable_feat)(struct device *dev, enum iommu_dev_features f);
-- 
2.30.2


^ permalink raw reply	[flat|nested] 39+ messages in thread

* [PATCH 2/6] iommu: remove the unused iommu_aux_get_pasid interface
  2021-05-10  6:53 more iommu dead code removal Christoph Hellwig
  2021-05-10  6:54 ` [PATCH 1/6] iommu: remove the unused dev_has_feat method Christoph Hellwig
@ 2021-05-10  6:54 ` Christoph Hellwig
  2021-05-10  6:54 ` [PATCH 3/6] vfio: remove the unused mdev iommu hook Christoph Hellwig
                   ` (4 subsequent siblings)
  6 siblings, 0 replies; 39+ messages in thread
From: Christoph Hellwig @ 2021-05-10  6:54 UTC (permalink / raw)
  To: Joerg Roedel, Alex Williamson
  Cc: David Woodhouse, Lu Baolu, Will Deacon, Kirti Wankhede,
	Jason Gunthorpe, linux-arm-kernel, iommu, kvm

This function was never used since it was added more than 2 years ago.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/iommu/intel/iommu.c | 10 ----------
 drivers/iommu/iommu.c       | 11 -----------
 include/linux/iommu.h       |  9 ---------
 3 files changed, 30 deletions(-)

diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index ba1060f6785119..cc07f316adcb18 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -5375,15 +5375,6 @@ intel_iommu_dev_feat_enabled(struct device *dev, enum iommu_dev_features feat)
 	return false;
 }
 
-static int
-intel_iommu_aux_get_pasid(struct iommu_domain *domain, struct device *dev)
-{
-	struct dmar_domain *dmar_domain = to_dmar_domain(domain);
-
-	return dmar_domain->default_pasid > 0 ?
-			dmar_domain->default_pasid : -EINVAL;
-}
-
 static bool intel_iommu_is_attach_deferred(struct iommu_domain *domain,
 					   struct device *dev)
 {
@@ -5485,7 +5476,6 @@ const struct iommu_ops intel_iommu_ops = {
 	.detach_dev		= intel_iommu_detach_device,
 	.aux_attach_dev		= intel_iommu_aux_attach_device,
 	.aux_detach_dev		= intel_iommu_aux_detach_device,
-	.aux_get_pasid		= intel_iommu_aux_get_pasid,
 	.map			= intel_iommu_map,
 	.iotlb_sync_map		= intel_iommu_iotlb_sync_map,
 	.unmap			= intel_iommu_unmap,
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 808ab70d5df50f..6721ac17baf29b 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -2934,17 +2934,6 @@ void iommu_aux_detach_device(struct iommu_domain *domain, struct device *dev)
 }
 EXPORT_SYMBOL_GPL(iommu_aux_detach_device);
 
-int iommu_aux_get_pasid(struct iommu_domain *domain, struct device *dev)
-{
-	int ret = -ENODEV;
-
-	if (domain->ops->aux_get_pasid)
-		ret = domain->ops->aux_get_pasid(domain, dev);
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(iommu_aux_get_pasid);
-
 /**
  * iommu_sva_bind_device() - Bind a process address space to a device
  * @dev: the device
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 1092a7f967a5e8..d8aa5c8a5ba57a 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -202,7 +202,6 @@ struct iommu_iotlb_gather {
  *                               iommu specific features.
  * @dev_feat_enabled: check enabled feature
  * @aux_attach/detach_dev: aux-domain specific attach/detach entries.
- * @aux_get_pasid: get the pasid given an aux-domain
  * @sva_bind: Bind process address space to device
  * @sva_unbind: Unbind process address space from device
  * @sva_get_pasid: Get PASID associated to a SVA handle
@@ -262,7 +261,6 @@ struct iommu_ops {
 	/* Aux-domain specific attach/detach entries */
 	int (*aux_attach_dev)(struct iommu_domain *domain, struct device *dev);
 	void (*aux_detach_dev)(struct iommu_domain *domain, struct device *dev);
-	int (*aux_get_pasid)(struct iommu_domain *domain, struct device *dev);
 
 	struct iommu_sva *(*sva_bind)(struct device *dev, struct mm_struct *mm,
 				      void *drvdata);
@@ -594,7 +592,6 @@ int iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features f);
 bool iommu_dev_feature_enabled(struct device *dev, enum iommu_dev_features f);
 int iommu_aux_attach_device(struct iommu_domain *domain, struct device *dev);
 void iommu_aux_detach_device(struct iommu_domain *domain, struct device *dev);
-int iommu_aux_get_pasid(struct iommu_domain *domain, struct device *dev);
 
 struct iommu_sva *iommu_sva_bind_device(struct device *dev,
 					struct mm_struct *mm,
@@ -945,12 +942,6 @@ iommu_aux_detach_device(struct iommu_domain *domain, struct device *dev)
 {
 }
 
-static inline int
-iommu_aux_get_pasid(struct iommu_domain *domain, struct device *dev)
-{
-	return -ENODEV;
-}
-
 static inline struct iommu_sva *
 iommu_sva_bind_device(struct device *dev, struct mm_struct *mm, void *drvdata)
 {
-- 
2.30.2


^ permalink raw reply	[flat|nested] 39+ messages in thread

* [PATCH 3/6] vfio: remove the unused mdev iommu hook
  2021-05-10  6:53 more iommu dead code removal Christoph Hellwig
  2021-05-10  6:54 ` [PATCH 1/6] iommu: remove the unused dev_has_feat method Christoph Hellwig
  2021-05-10  6:54 ` [PATCH 2/6] iommu: remove the unused iommu_aux_get_pasid interface Christoph Hellwig
@ 2021-05-10  6:54 ` Christoph Hellwig
  2021-05-10 15:54   ` Jason Gunthorpe
  2021-05-10  6:54 ` [PATCH 4/6] iommu: remove iommu_aux_{attach,detach}_device Christoph Hellwig
                   ` (3 subsequent siblings)
  6 siblings, 1 reply; 39+ messages in thread
From: Christoph Hellwig @ 2021-05-10  6:54 UTC (permalink / raw)
  To: Joerg Roedel, Alex Williamson
  Cc: David Woodhouse, Lu Baolu, Will Deacon, Kirti Wankhede,
	Jason Gunthorpe, linux-arm-kernel, iommu, kvm

The iommu_device field in struct mdev_device has never been used
since it was added more than 2 years ago.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/vfio/vfio_iommu_type1.c | 132 ++++++--------------------------
 include/linux/mdev.h            |  20 -----
 2 files changed, 25 insertions(+), 127 deletions(-)

diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index a0747c35a7781e..5974a3fb2e2e12 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -113,7 +113,6 @@ struct vfio_batch {
 struct vfio_group {
 	struct iommu_group	*iommu_group;
 	struct list_head	next;
-	bool			mdev_group;	/* An mdev group */
 	bool			pinned_page_dirty_scope;
 };
 
@@ -1932,61 +1931,6 @@ static bool vfio_iommu_has_sw_msi(struct list_head *group_resv_regions,
 	return ret;
 }
 
-static int vfio_mdev_attach_domain(struct device *dev, void *data)
-{
-	struct mdev_device *mdev = to_mdev_device(dev);
-	struct iommu_domain *domain = data;
-	struct device *iommu_device;
-
-	iommu_device = mdev_get_iommu_device(mdev);
-	if (iommu_device) {
-		if (iommu_dev_feature_enabled(iommu_device, IOMMU_DEV_FEAT_AUX))
-			return iommu_aux_attach_device(domain, iommu_device);
-		else
-			return iommu_attach_device(domain, iommu_device);
-	}
-
-	return -EINVAL;
-}
-
-static int vfio_mdev_detach_domain(struct device *dev, void *data)
-{
-	struct mdev_device *mdev = to_mdev_device(dev);
-	struct iommu_domain *domain = data;
-	struct device *iommu_device;
-
-	iommu_device = mdev_get_iommu_device(mdev);
-	if (iommu_device) {
-		if (iommu_dev_feature_enabled(iommu_device, IOMMU_DEV_FEAT_AUX))
-			iommu_aux_detach_device(domain, iommu_device);
-		else
-			iommu_detach_device(domain, iommu_device);
-	}
-
-	return 0;
-}
-
-static int vfio_iommu_attach_group(struct vfio_domain *domain,
-				   struct vfio_group *group)
-{
-	if (group->mdev_group)
-		return iommu_group_for_each_dev(group->iommu_group,
-						domain->domain,
-						vfio_mdev_attach_domain);
-	else
-		return iommu_attach_group(domain->domain, group->iommu_group);
-}
-
-static void vfio_iommu_detach_group(struct vfio_domain *domain,
-				    struct vfio_group *group)
-{
-	if (group->mdev_group)
-		iommu_group_for_each_dev(group->iommu_group, domain->domain,
-					 vfio_mdev_detach_domain);
-	else
-		iommu_detach_group(domain->domain, group->iommu_group);
-}
-
 static bool vfio_bus_is_mdev(struct bus_type *bus)
 {
 	struct bus_type *mdev_bus;
@@ -2001,20 +1945,6 @@ static bool vfio_bus_is_mdev(struct bus_type *bus)
 	return ret;
 }
 
-static int vfio_mdev_iommu_device(struct device *dev, void *data)
-{
-	struct mdev_device *mdev = to_mdev_device(dev);
-	struct device **old = data, *new;
-
-	new = mdev_get_iommu_device(mdev);
-	if (!new || (*old && *old != new))
-		return -EINVAL;
-
-	*old = new;
-
-	return 0;
-}
-
 /*
  * This is a helper function to insert an address range to iova list.
  * The list is initially created with a single entry corresponding to
@@ -2275,38 +2205,24 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
 		goto out_free;
 
 	if (vfio_bus_is_mdev(bus)) {
-		struct device *iommu_device = NULL;
-
-		group->mdev_group = true;
-
-		/* Determine the isolation type */
-		ret = iommu_group_for_each_dev(iommu_group, &iommu_device,
-					       vfio_mdev_iommu_device);
-		if (ret || !iommu_device) {
-			if (!iommu->external_domain) {
-				INIT_LIST_HEAD(&domain->group_list);
-				iommu->external_domain = domain;
-				vfio_update_pgsize_bitmap(iommu);
-			} else {
-				kfree(domain);
-			}
-
-			list_add(&group->next,
-				 &iommu->external_domain->group_list);
-			/*
-			 * Non-iommu backed group cannot dirty memory directly,
-			 * it can only use interfaces that provide dirty
-			 * tracking.
-			 * The iommu scope can only be promoted with the
-			 * addition of a dirty tracking group.
-			 */
-			group->pinned_page_dirty_scope = true;
-			mutex_unlock(&iommu->lock);
-
-			return 0;
+		if (!iommu->external_domain) {
+			INIT_LIST_HEAD(&domain->group_list);
+			iommu->external_domain = domain;
+			vfio_update_pgsize_bitmap(iommu);
+		} else {
+			kfree(domain);
 		}
 
-		bus = iommu_device->bus;
+		list_add(&group->next, &iommu->external_domain->group_list);
+		/*
+		 * Non-iommu backed group cannot dirty memory directly, it can
+		 * only use interfaces that provide dirty tracking.
+		 * The iommu scope can only be promoted with the addition of a
+		 * dirty tracking group.
+		 */
+		group->pinned_page_dirty_scope = true;
+		mutex_unlock(&iommu->lock);
+		return 0;
 	}
 
 	domain->domain = iommu_domain_alloc(bus);
@@ -2321,7 +2237,7 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
 			goto out_domain;
 	}
 
-	ret = vfio_iommu_attach_group(domain, group);
+	ret = iommu_attach_group(domain->domain, group->iommu_group);
 	if (ret)
 		goto out_domain;
 
@@ -2388,15 +2304,17 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
 	list_for_each_entry(d, &iommu->domain_list, next) {
 		if (d->domain->ops == domain->domain->ops &&
 		    d->prot == domain->prot) {
-			vfio_iommu_detach_group(domain, group);
-			if (!vfio_iommu_attach_group(d, group)) {
+			iommu_detach_group(domain->domain, group->iommu_group);
+			if (!iommu_attach_group(d->domain,
+						group->iommu_group)) {
 				list_add(&group->next, &d->group_list);
 				iommu_domain_free(domain->domain);
 				kfree(domain);
 				goto done;
 			}
 
-			ret = vfio_iommu_attach_group(domain, group);
+			ret = iommu_attach_group(domain->domain,
+						 group->iommu_group);
 			if (ret)
 				goto out_domain;
 		}
@@ -2433,7 +2351,7 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
 	return 0;
 
 out_detach:
-	vfio_iommu_detach_group(domain, group);
+	iommu_detach_group(domain->domain, group->iommu_group);
 out_domain:
 	iommu_domain_free(domain->domain);
 	vfio_iommu_iova_free(&iova_copy);
@@ -2598,7 +2516,7 @@ static void vfio_iommu_type1_detach_group(void *iommu_data,
 		if (!group)
 			continue;
 
-		vfio_iommu_detach_group(domain, group);
+		iommu_detach_group(domain->domain, group->iommu_group);
 		update_dirty_scope = !group->pinned_page_dirty_scope;
 		list_del(&group->next);
 		kfree(group);
@@ -2686,7 +2604,7 @@ static void vfio_release_domain(struct vfio_domain *domain, bool external)
 	list_for_each_entry_safe(group, group_tmp,
 				 &domain->group_list, next) {
 		if (!external)
-			vfio_iommu_detach_group(domain, group);
+			iommu_detach_group(domain->domain, group->iommu_group);
 		list_del(&group->next);
 		kfree(group);
 	}
diff --git a/include/linux/mdev.h b/include/linux/mdev.h
index 1fb34ea394ad46..bf818e6f8931ed 100644
--- a/include/linux/mdev.h
+++ b/include/linux/mdev.h
@@ -18,7 +18,6 @@ struct mdev_device {
 	void *driver_data;
 	struct list_head next;
 	struct mdev_type *type;
-	struct device *iommu_device;
 	bool active;
 };
 
@@ -27,25 +26,6 @@ static inline struct mdev_device *to_mdev_device(struct device *dev)
 	return container_of(dev, struct mdev_device, dev);
 }
 
-/*
- * Called by the parent device driver to set the device which represents
- * this mdev in iommu protection scope. By default, the iommu device is
- * NULL, that indicates using vendor defined isolation.
- *
- * @dev: the mediated device that iommu will isolate.
- * @iommu_device: a pci device which represents the iommu for @dev.
- */
-static inline void mdev_set_iommu_device(struct mdev_device *mdev,
-					 struct device *iommu_device)
-{
-	mdev->iommu_device = iommu_device;
-}
-
-static inline struct device *mdev_get_iommu_device(struct mdev_device *mdev)
-{
-	return mdev->iommu_device;
-}
-
 unsigned int mdev_get_type_group_id(struct mdev_device *mdev);
 unsigned int mtype_get_type_group_id(struct mdev_type *mtype);
 struct device *mtype_get_parent_dev(struct mdev_type *mtype);
-- 
2.30.2


^ permalink raw reply	[flat|nested] 39+ messages in thread

* [PATCH 4/6] iommu: remove iommu_aux_{attach,detach}_device
  2021-05-10  6:53 more iommu dead code removal Christoph Hellwig
                   ` (2 preceding siblings ...)
  2021-05-10  6:54 ` [PATCH 3/6] vfio: remove the unused mdev iommu hook Christoph Hellwig
@ 2021-05-10  6:54 ` Christoph Hellwig
  2021-05-10  6:54 ` [PATCH 5/6] iommu: remove IOMMU_DEV_FEAT_AUX Christoph Hellwig
                   ` (2 subsequent siblings)
  6 siblings, 0 replies; 39+ messages in thread
From: Christoph Hellwig @ 2021-05-10  6:54 UTC (permalink / raw)
  To: Joerg Roedel, Alex Williamson
  Cc: David Woodhouse, Lu Baolu, Will Deacon, Kirti Wankhede,
	Jason Gunthorpe, linux-arm-kernel, iommu, kvm

These are entirely unused now, and were only called by the previously
entirely unused vfio mdev iommu interaction.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/iommu/intel/iommu.c | 205 ------------------------------------
 drivers/iommu/iommu.c       |  33 ------
 include/linux/intel-iommu.h |  10 --
 include/linux/iommu.h       |  17 ---
 4 files changed, 265 deletions(-)

diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index cc07f316adcb18..6cae6e4d693226 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -1497,18 +1497,6 @@ static void domain_update_iotlb(struct dmar_domain *domain)
 			break;
 		}
 
-	if (!has_iotlb_device) {
-		struct subdev_domain_info *sinfo;
-
-		list_for_each_entry(sinfo, &domain->subdevices, link_domain) {
-			info = get_domain_info(sinfo->pdev);
-			if (info && info->ats_enabled) {
-				has_iotlb_device = true;
-				break;
-			}
-		}
-	}
-
 	domain->has_iotlb_device = has_iotlb_device;
 }
 
@@ -1606,7 +1594,6 @@ static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
 {
 	unsigned long flags;
 	struct device_domain_info *info;
-	struct subdev_domain_info *sinfo;
 
 	if (!domain->has_iotlb_device)
 		return;
@@ -1614,11 +1601,6 @@ static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
 	spin_lock_irqsave(&device_domain_lock, flags);
 	list_for_each_entry(info, &domain->devices, link)
 		__iommu_flush_dev_iotlb(info, addr, mask);
-
-	list_for_each_entry(sinfo, &domain->subdevices, link_domain) {
-		info = get_domain_info(sinfo->pdev);
-		__iommu_flush_dev_iotlb(info, addr, mask);
-	}
 	spin_unlock_irqrestore(&device_domain_lock, flags);
 }
 
@@ -1903,7 +1885,6 @@ static struct dmar_domain *alloc_domain(int flags)
 		domain->flags |= DOMAIN_FLAG_USE_FIRST_LEVEL;
 	domain->has_iotlb_device = false;
 	INIT_LIST_HEAD(&domain->devices);
-	INIT_LIST_HEAD(&domain->subdevices);
 
 	return domain;
 }
@@ -2593,7 +2574,6 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
 	info->iommu = iommu;
 	info->pasid_table = NULL;
 	info->auxd_enabled = 0;
-	INIT_LIST_HEAD(&info->subdevices);
 
 	if (dev && dev_is_pci(dev)) {
 		struct pci_dev *pdev = to_pci_dev(info->dev);
@@ -4579,168 +4559,6 @@ is_aux_domain(struct device *dev, struct iommu_domain *domain)
 			domain->type == IOMMU_DOMAIN_UNMANAGED;
 }
 
-static inline struct subdev_domain_info *
-lookup_subdev_info(struct dmar_domain *domain, struct device *dev)
-{
-	struct subdev_domain_info *sinfo;
-
-	if (!list_empty(&domain->subdevices)) {
-		list_for_each_entry(sinfo, &domain->subdevices, link_domain) {
-			if (sinfo->pdev == dev)
-				return sinfo;
-		}
-	}
-
-	return NULL;
-}
-
-static int auxiliary_link_device(struct dmar_domain *domain,
-				 struct device *dev)
-{
-	struct device_domain_info *info = get_domain_info(dev);
-	struct subdev_domain_info *sinfo = lookup_subdev_info(domain, dev);
-
-	assert_spin_locked(&device_domain_lock);
-	if (WARN_ON(!info))
-		return -EINVAL;
-
-	if (!sinfo) {
-		sinfo = kzalloc(sizeof(*sinfo), GFP_ATOMIC);
-		sinfo->domain = domain;
-		sinfo->pdev = dev;
-		list_add(&sinfo->link_phys, &info->subdevices);
-		list_add(&sinfo->link_domain, &domain->subdevices);
-	}
-
-	return ++sinfo->users;
-}
-
-static int auxiliary_unlink_device(struct dmar_domain *domain,
-				   struct device *dev)
-{
-	struct device_domain_info *info = get_domain_info(dev);
-	struct subdev_domain_info *sinfo = lookup_subdev_info(domain, dev);
-	int ret;
-
-	assert_spin_locked(&device_domain_lock);
-	if (WARN_ON(!info || !sinfo || sinfo->users <= 0))
-		return -EINVAL;
-
-	ret = --sinfo->users;
-	if (!ret) {
-		list_del(&sinfo->link_phys);
-		list_del(&sinfo->link_domain);
-		kfree(sinfo);
-	}
-
-	return ret;
-}
-
-static int aux_domain_add_dev(struct dmar_domain *domain,
-			      struct device *dev)
-{
-	int ret;
-	unsigned long flags;
-	struct intel_iommu *iommu;
-
-	iommu = device_to_iommu(dev, NULL, NULL);
-	if (!iommu)
-		return -ENODEV;
-
-	if (domain->default_pasid <= 0) {
-		u32 pasid;
-
-		/* No private data needed for the default pasid */
-		pasid = ioasid_alloc(NULL, PASID_MIN,
-				     pci_max_pasids(to_pci_dev(dev)) - 1,
-				     NULL);
-		if (pasid == INVALID_IOASID) {
-			pr_err("Can't allocate default pasid\n");
-			return -ENODEV;
-		}
-		domain->default_pasid = pasid;
-	}
-
-	spin_lock_irqsave(&device_domain_lock, flags);
-	ret = auxiliary_link_device(domain, dev);
-	if (ret <= 0)
-		goto link_failed;
-
-	/*
-	 * Subdevices from the same physical device can be attached to the
-	 * same domain. For such cases, only the first subdevice attachment
-	 * needs to go through the full steps in this function. So if ret >
-	 * 1, just goto out.
-	 */
-	if (ret > 1)
-		goto out;
-
-	/*
-	 * iommu->lock must be held to attach domain to iommu and setup the
-	 * pasid entry for second level translation.
-	 */
-	spin_lock(&iommu->lock);
-	ret = domain_attach_iommu(domain, iommu);
-	if (ret)
-		goto attach_failed;
-
-	/* Setup the PASID entry for mediated devices: */
-	if (domain_use_first_level(domain))
-		ret = domain_setup_first_level(iommu, domain, dev,
-					       domain->default_pasid);
-	else
-		ret = intel_pasid_setup_second_level(iommu, domain, dev,
-						     domain->default_pasid);
-	if (ret)
-		goto table_failed;
-
-	spin_unlock(&iommu->lock);
-out:
-	spin_unlock_irqrestore(&device_domain_lock, flags);
-
-	return 0;
-
-table_failed:
-	domain_detach_iommu(domain, iommu);
-attach_failed:
-	spin_unlock(&iommu->lock);
-	auxiliary_unlink_device(domain, dev);
-link_failed:
-	spin_unlock_irqrestore(&device_domain_lock, flags);
-	if (list_empty(&domain->subdevices) && domain->default_pasid > 0)
-		ioasid_put(domain->default_pasid);
-
-	return ret;
-}
-
-static void aux_domain_remove_dev(struct dmar_domain *domain,
-				  struct device *dev)
-{
-	struct device_domain_info *info;
-	struct intel_iommu *iommu;
-	unsigned long flags;
-
-	if (!is_aux_domain(dev, &domain->domain))
-		return;
-
-	spin_lock_irqsave(&device_domain_lock, flags);
-	info = get_domain_info(dev);
-	iommu = info->iommu;
-
-	if (!auxiliary_unlink_device(domain, dev)) {
-		spin_lock(&iommu->lock);
-		intel_pasid_tear_down_entry(iommu, dev,
-					    domain->default_pasid, false);
-		domain_detach_iommu(domain, iommu);
-		spin_unlock(&iommu->lock);
-	}
-
-	spin_unlock_irqrestore(&device_domain_lock, flags);
-
-	if (list_empty(&domain->subdevices) && domain->default_pasid > 0)
-		ioasid_put(domain->default_pasid);
-}
-
 static int prepare_domain_attach_device(struct iommu_domain *domain,
 					struct device *dev)
 {
@@ -4813,33 +4631,12 @@ static int intel_iommu_attach_device(struct iommu_domain *domain,
 	return domain_add_dev_info(to_dmar_domain(domain), dev);
 }
 
-static int intel_iommu_aux_attach_device(struct iommu_domain *domain,
-					 struct device *dev)
-{
-	int ret;
-
-	if (!is_aux_domain(dev, domain))
-		return -EPERM;
-
-	ret = prepare_domain_attach_device(domain, dev);
-	if (ret)
-		return ret;
-
-	return aux_domain_add_dev(to_dmar_domain(domain), dev);
-}
-
 static void intel_iommu_detach_device(struct iommu_domain *domain,
 				      struct device *dev)
 {
 	dmar_remove_one_dev_info(dev);
 }
 
-static void intel_iommu_aux_detach_device(struct iommu_domain *domain,
-					  struct device *dev)
-{
-	aux_domain_remove_dev(to_dmar_domain(domain), dev);
-}
-
 #ifdef CONFIG_INTEL_IOMMU_SVM
 /*
  * 2D array for converting and sanitizing IOMMU generic TLB granularity to
@@ -5474,8 +5271,6 @@ const struct iommu_ops intel_iommu_ops = {
 	.enable_nesting		= intel_iommu_enable_nesting,
 	.attach_dev		= intel_iommu_attach_device,
 	.detach_dev		= intel_iommu_detach_device,
-	.aux_attach_dev		= intel_iommu_aux_attach_device,
-	.aux_detach_dev		= intel_iommu_aux_detach_device,
 	.map			= intel_iommu_map,
 	.iotlb_sync_map		= intel_iommu_iotlb_sync_map,
 	.unmap			= intel_iommu_unmap,
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 6721ac17baf29b..c4b92270946c2f 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -2901,39 +2901,6 @@ bool iommu_dev_feature_enabled(struct device *dev, enum iommu_dev_features feat)
 }
 EXPORT_SYMBOL_GPL(iommu_dev_feature_enabled);
 
-/*
- * Aux-domain specific attach/detach.
- *
- * Only works if iommu_dev_feature_enabled(dev, IOMMU_DEV_FEAT_AUX) returns
- * true. Also, as long as domains are attached to a device through this
- * interface, any tries to call iommu_attach_device() should fail
- * (iommu_detach_device() can't fail, so we fail when trying to re-attach).
- * This should make us safe against a device being attached to a guest as a
- * whole while there are still pasid users on it (aux and sva).
- */
-int iommu_aux_attach_device(struct iommu_domain *domain, struct device *dev)
-{
-	int ret = -ENODEV;
-
-	if (domain->ops->aux_attach_dev)
-		ret = domain->ops->aux_attach_dev(domain, dev);
-
-	if (!ret)
-		trace_attach_device_to_domain(dev);
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(iommu_aux_attach_device);
-
-void iommu_aux_detach_device(struct iommu_domain *domain, struct device *dev)
-{
-	if (domain->ops->aux_detach_dev) {
-		domain->ops->aux_detach_dev(domain, dev);
-		trace_detach_device_from_domain(dev);
-	}
-}
-EXPORT_SYMBOL_GPL(iommu_aux_detach_device);
-
 /**
  * iommu_sva_bind_device() - Bind a process address space to a device
  * @dev: the device
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 03faf20a6817e8..c12b8e6545733c 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -548,7 +548,6 @@ struct dmar_domain {
 
 	bool has_iotlb_device;
 	struct list_head devices;	/* all devices' list */
-	struct list_head subdevices;	/* all subdevices' list */
 	struct iova_domain iovad;	/* iova's that belong to this domain */
 
 	struct dma_pte	*pgd;		/* virtual address */
@@ -621,15 +620,6 @@ struct intel_iommu {
 	struct dmar_drhd_unit *drhd;
 };
 
-/* Per subdevice private data */
-struct subdev_domain_info {
-	struct list_head link_phys;	/* link to phys device siblings */
-	struct list_head link_domain;	/* link to domain siblings */
-	struct device *pdev;		/* physical device derived from */
-	struct dmar_domain *domain;	/* aux-domain */
-	int users;			/* user count */
-};
-
 /* PCI domain-device relationship */
 struct device_domain_info {
 	struct list_head link;	/* link to domain siblings */
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index d8aa5c8a5ba57a..b0bf99dfd80d16 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -258,10 +258,6 @@ struct iommu_ops {
 	int (*dev_enable_feat)(struct device *dev, enum iommu_dev_features f);
 	int (*dev_disable_feat)(struct device *dev, enum iommu_dev_features f);
 
-	/* Aux-domain specific attach/detach entries */
-	int (*aux_attach_dev)(struct iommu_domain *domain, struct device *dev);
-	void (*aux_detach_dev)(struct iommu_domain *domain, struct device *dev);
-
 	struct iommu_sva *(*sva_bind)(struct device *dev, struct mm_struct *mm,
 				      void *drvdata);
 	void (*sva_unbind)(struct iommu_sva *handle);
@@ -590,8 +586,6 @@ void iommu_release_device(struct device *dev);
 int iommu_dev_enable_feature(struct device *dev, enum iommu_dev_features f);
 int iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features f);
 bool iommu_dev_feature_enabled(struct device *dev, enum iommu_dev_features f);
-int iommu_aux_attach_device(struct iommu_domain *domain, struct device *dev);
-void iommu_aux_detach_device(struct iommu_domain *domain, struct device *dev);
 
 struct iommu_sva *iommu_sva_bind_device(struct device *dev,
 					struct mm_struct *mm,
@@ -931,17 +925,6 @@ iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features feat)
 	return -ENODEV;
 }
 
-static inline int
-iommu_aux_attach_device(struct iommu_domain *domain, struct device *dev)
-{
-	return -ENODEV;
-}
-
-static inline void
-iommu_aux_detach_device(struct iommu_domain *domain, struct device *dev)
-{
-}
-
 static inline struct iommu_sva *
 iommu_sva_bind_device(struct device *dev, struct mm_struct *mm, void *drvdata)
 {
-- 
2.30.2


^ permalink raw reply	[flat|nested] 39+ messages in thread

* [PATCH 5/6] iommu: remove IOMMU_DEV_FEAT_AUX
  2021-05-10  6:53 more iommu dead code removal Christoph Hellwig
                   ` (3 preceding siblings ...)
  2021-05-10  6:54 ` [PATCH 4/6] iommu: remove iommu_aux_{attach,detach}_device Christoph Hellwig
@ 2021-05-10  6:54 ` Christoph Hellwig
  2021-05-10  6:54 ` [PATCH 6/6] iommu: remove iommu_dev_feature_enabled Christoph Hellwig
  2021-05-10 11:54 ` more iommu dead code removal Jason Gunthorpe
  6 siblings, 0 replies; 39+ messages in thread
From: Christoph Hellwig @ 2021-05-10  6:54 UTC (permalink / raw)
  To: Joerg Roedel, Alex Williamson
  Cc: David Woodhouse, Lu Baolu, Will Deacon, Kirti Wankhede,
	Jason Gunthorpe, linux-arm-kernel, iommu, kvm

This feature is entirely unused, and was only ever called by unused code
either.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/iommu/intel/iommu.c | 82 -------------------------------------
 drivers/iommu/intel/svm.c   |  6 ---
 include/linux/intel-iommu.h |  1 -
 include/linux/iommu.h       |  2 -
 4 files changed, 91 deletions(-)

diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 6cae6e4d693226..2603d95ecb4c0e 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -2573,7 +2573,6 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
 	info->domain = domain;
 	info->iommu = iommu;
 	info->pasid_table = NULL;
-	info->auxd_enabled = 0;
 
 	if (dev && dev_is_pci(dev)) {
 		struct pci_dev *pdev = to_pci_dev(info->dev);
@@ -4546,19 +4545,6 @@ static void intel_iommu_domain_free(struct iommu_domain *domain)
 		domain_exit(to_dmar_domain(domain));
 }
 
-/*
- * Check whether a @domain could be attached to the @dev through the
- * aux-domain attach/detach APIs.
- */
-static inline bool
-is_aux_domain(struct device *dev, struct iommu_domain *domain)
-{
-	struct device_domain_info *info = get_domain_info(dev);
-
-	return info && info->auxd_enabled &&
-			domain->type == IOMMU_DOMAIN_UNMANAGED;
-}
-
 static int prepare_domain_attach_device(struct iommu_domain *domain,
 					struct device *dev)
 {
@@ -4612,9 +4598,6 @@ static int intel_iommu_attach_device(struct iommu_domain *domain,
 		return -EPERM;
 	}
 
-	if (is_aux_domain(dev, domain))
-		return -EPERM;
-
 	/* normally dev is not mapped */
 	if (unlikely(domain_context_mapped(dev))) {
 		struct dmar_domain *old_domain;
@@ -5083,52 +5066,9 @@ static struct iommu_group *intel_iommu_device_group(struct device *dev)
 	return generic_device_group(dev);
 }
 
-static int intel_iommu_enable_auxd(struct device *dev)
-{
-	struct device_domain_info *info;
-	struct intel_iommu *iommu;
-	unsigned long flags;
-	int ret;
-
-	iommu = device_to_iommu(dev, NULL, NULL);
-	if (!iommu || dmar_disabled)
-		return -EINVAL;
-
-	if (!sm_supported(iommu) || !pasid_supported(iommu))
-		return -EINVAL;
-
-	ret = intel_iommu_enable_pasid(iommu, dev);
-	if (ret)
-		return -ENODEV;
-
-	spin_lock_irqsave(&device_domain_lock, flags);
-	info = get_domain_info(dev);
-	info->auxd_enabled = 1;
-	spin_unlock_irqrestore(&device_domain_lock, flags);
-
-	return 0;
-}
-
-static int intel_iommu_disable_auxd(struct device *dev)
-{
-	struct device_domain_info *info;
-	unsigned long flags;
-
-	spin_lock_irqsave(&device_domain_lock, flags);
-	info = get_domain_info(dev);
-	if (!WARN_ON(!info))
-		info->auxd_enabled = 0;
-	spin_unlock_irqrestore(&device_domain_lock, flags);
-
-	return 0;
-}
-
 static int
 intel_iommu_dev_enable_feat(struct device *dev, enum iommu_dev_features feat)
 {
-	if (feat == IOMMU_DEV_FEAT_AUX)
-		return intel_iommu_enable_auxd(dev);
-
 	if (feat == IOMMU_DEV_FEAT_IOPF) {
 		struct device_domain_info *info = get_domain_info(dev);
 
@@ -5152,26 +5092,6 @@ intel_iommu_dev_enable_feat(struct device *dev, enum iommu_dev_features feat)
 	return -ENODEV;
 }
 
-static int
-intel_iommu_dev_disable_feat(struct device *dev, enum iommu_dev_features feat)
-{
-	if (feat == IOMMU_DEV_FEAT_AUX)
-		return intel_iommu_disable_auxd(dev);
-
-	return -ENODEV;
-}
-
-static bool
-intel_iommu_dev_feat_enabled(struct device *dev, enum iommu_dev_features feat)
-{
-	struct device_domain_info *info = get_domain_info(dev);
-
-	if (feat == IOMMU_DEV_FEAT_AUX)
-		return scalable_mode_support() && info && info->auxd_enabled;
-
-	return false;
-}
-
 static bool intel_iommu_is_attach_deferred(struct iommu_domain *domain,
 					   struct device *dev)
 {
@@ -5283,9 +5203,7 @@ const struct iommu_ops intel_iommu_ops = {
 	.get_resv_regions	= intel_iommu_get_resv_regions,
 	.put_resv_regions	= generic_iommu_put_resv_regions,
 	.device_group		= intel_iommu_device_group,
-	.dev_feat_enabled	= intel_iommu_dev_feat_enabled,
 	.dev_enable_feat	= intel_iommu_dev_enable_feat,
-	.dev_disable_feat	= intel_iommu_dev_disable_feat,
 	.is_attach_deferred	= intel_iommu_is_attach_deferred,
 	.def_domain_type	= device_def_domain_type,
 	.pgsize_bitmap		= INTEL_IOMMU_PGSIZES,
diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
index 5165cea904211a..54b55f867438c8 100644
--- a/drivers/iommu/intel/svm.c
+++ b/drivers/iommu/intel/svm.c
@@ -347,10 +347,6 @@ int intel_svm_bind_gpasid(struct iommu_domain *domain, struct device *dev,
 	sdev->sid = PCI_DEVID(info->bus, info->devfn);
 	sdev->iommu = iommu;
 
-	/* Only count users if device has aux domains */
-	if (iommu_dev_feature_enabled(dev, IOMMU_DEV_FEAT_AUX))
-		sdev->users = 1;
-
 	/* Set up device context entry for PASID if not enabled already */
 	ret = intel_iommu_enable_pasid(iommu, sdev->dev);
 	if (ret) {
@@ -412,8 +408,6 @@ int intel_svm_unbind_gpasid(struct device *dev, u32 pasid)
 		goto out;
 
 	if (sdev) {
-		if (iommu_dev_feature_enabled(dev, IOMMU_DEV_FEAT_AUX))
-			sdev->users--;
 		if (!sdev->users) {
 			list_del_rcu(&sdev->list);
 			intel_pasid_tear_down_entry(iommu, dev,
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index c12b8e6545733c..4b855c42435f7c 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -636,7 +636,6 @@ struct device_domain_info {
 	u8 pri_enabled:1;
 	u8 ats_supported:1;
 	u8 ats_enabled:1;
-	u8 auxd_enabled:1;	/* Multiple domains per device */
 	u8 ats_qdep;
 	struct device *dev; /* it's NULL for PCIe-to-PCI bridge */
 	struct intel_iommu *iommu; /* IOMMU used by this device */
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index b0bf99dfd80d16..ef9e7dcef76e90 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -132,7 +132,6 @@ struct iommu_resv_region {
 
 /**
  * enum iommu_dev_features - Per device IOMMU features
- * @IOMMU_DEV_FEAT_AUX: Auxiliary domain feature
  * @IOMMU_DEV_FEAT_SVA: Shared Virtual Addresses
  * @IOMMU_DEV_FEAT_IOPF: I/O Page Faults such as PRI or Stall. Generally
  *			 enabling %IOMMU_DEV_FEAT_SVA requires
@@ -144,7 +143,6 @@ struct iommu_resv_region {
  * Device drivers enable a feature using iommu_dev_enable_feature().
  */
 enum iommu_dev_features {
-	IOMMU_DEV_FEAT_AUX,
 	IOMMU_DEV_FEAT_SVA,
 	IOMMU_DEV_FEAT_IOPF,
 };
-- 
2.30.2


^ permalink raw reply	[flat|nested] 39+ messages in thread

* [PATCH 6/6] iommu: remove iommu_dev_feature_enabled
  2021-05-10  6:53 more iommu dead code removal Christoph Hellwig
                   ` (4 preceding siblings ...)
  2021-05-10  6:54 ` [PATCH 5/6] iommu: remove IOMMU_DEV_FEAT_AUX Christoph Hellwig
@ 2021-05-10  6:54 ` Christoph Hellwig
  2021-05-10 11:54 ` more iommu dead code removal Jason Gunthorpe
  6 siblings, 0 replies; 39+ messages in thread
From: Christoph Hellwig @ 2021-05-10  6:54 UTC (permalink / raw)
  To: Joerg Roedel, Alex Williamson
  Cc: David Woodhouse, Lu Baolu, Will Deacon, Kirti Wankhede,
	Jason Gunthorpe, linux-arm-kernel, iommu, kvm

Remove the unused iommu_dev_feature_enabled function.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c |  1 -
 drivers/iommu/iommu.c                       | 13 -------------
 include/linux/iommu.h                       |  9 ---------
 3 files changed, 23 deletions(-)

diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index 9689563eec0f30..77deabf2ab9c83 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -2666,7 +2666,6 @@ static struct iommu_ops arm_smmu_ops = {
 	.of_xlate		= arm_smmu_of_xlate,
 	.get_resv_regions	= arm_smmu_get_resv_regions,
 	.put_resv_regions	= generic_iommu_put_resv_regions,
-	.dev_feat_enabled	= arm_smmu_dev_feature_enabled,
 	.dev_enable_feat	= arm_smmu_dev_enable_feature,
 	.dev_disable_feat	= arm_smmu_dev_disable_feature,
 	.sva_bind		= arm_smmu_sva_bind,
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index c4b92270946c2f..3d0d67b918a98f 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -2888,19 +2888,6 @@ int iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features feat)
 }
 EXPORT_SYMBOL_GPL(iommu_dev_disable_feature);
 
-bool iommu_dev_feature_enabled(struct device *dev, enum iommu_dev_features feat)
-{
-	if (dev->iommu && dev->iommu->iommu_dev) {
-		const struct iommu_ops *ops = dev->iommu->iommu_dev->ops;
-
-		if (ops->dev_feat_enabled)
-			return ops->dev_feat_enabled(dev, feat);
-	}
-
-	return false;
-}
-EXPORT_SYMBOL_GPL(iommu_dev_feature_enabled);
-
 /**
  * iommu_sva_bind_device() - Bind a process address space to a device
  * @dev: the device
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index ef9e7dcef76e90..c0685c88b3f8b5 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -198,7 +198,6 @@ struct iommu_iotlb_gather {
  *                      driver init to device driver init (default no)
  * @dev_has/enable/disable_feat: per device entries to check/enable/disable
  *                               iommu specific features.
- * @dev_feat_enabled: check enabled feature
  * @aux_attach/detach_dev: aux-domain specific attach/detach entries.
  * @sva_bind: Bind process address space to device
  * @sva_unbind: Unbind process address space from device
@@ -252,7 +251,6 @@ struct iommu_ops {
 	bool (*is_attach_deferred)(struct iommu_domain *domain, struct device *dev);
 
 	/* Per device IOMMU features */
-	bool (*dev_feat_enabled)(struct device *dev, enum iommu_dev_features f);
 	int (*dev_enable_feat)(struct device *dev, enum iommu_dev_features f);
 	int (*dev_disable_feat)(struct device *dev, enum iommu_dev_features f);
 
@@ -583,7 +581,6 @@ void iommu_release_device(struct device *dev);
 
 int iommu_dev_enable_feature(struct device *dev, enum iommu_dev_features f);
 int iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features f);
-bool iommu_dev_feature_enabled(struct device *dev, enum iommu_dev_features f);
 
 struct iommu_sva *iommu_sva_bind_device(struct device *dev,
 					struct mm_struct *mm,
@@ -905,12 +902,6 @@ const struct iommu_ops *iommu_ops_from_fwnode(struct fwnode_handle *fwnode)
 	return NULL;
 }
 
-static inline bool
-iommu_dev_feature_enabled(struct device *dev, enum iommu_dev_features feat)
-{
-	return false;
-}
-
 static inline int
 iommu_dev_enable_feature(struct device *dev, enum iommu_dev_features feat)
 {
-- 
2.30.2


^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: more iommu dead code removal
  2021-05-10  6:53 more iommu dead code removal Christoph Hellwig
                   ` (5 preceding siblings ...)
  2021-05-10  6:54 ` [PATCH 6/6] iommu: remove iommu_dev_feature_enabled Christoph Hellwig
@ 2021-05-10 11:54 ` Jason Gunthorpe
  6 siblings, 0 replies; 39+ messages in thread
From: Jason Gunthorpe @ 2021-05-10 11:54 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Joerg Roedel, Alex Williamson, David Woodhouse, Lu Baolu,
	Will Deacon, Kirti Wankhede, Jacob Pan, Raj, Ashok, Wu, Hao,
	Jiang, Dave, Tian, Kevin, linux-arm-kernel, iommu, kvm

On Mon, May 10, 2021 at 08:53:59AM +0200, Christoph Hellwig wrote:
> Hi all,
> 
> this is another series to remove dead code from the IOMMU subsystem,
> this time mostly about the hacky code to pass an iommu device in
> struct mdev_device and huge piles of support code.  All of this was
> merged two years ago and (fortunately) never got used.

Yes, I looked at this too. Intel has been merging dead code for a
while now. Ostensibly to prepare to get PASID support in.. But the
whole PASID thing looks to be redesigned from what was originally
imagined.

At least from VFIO I think the PASID support should not use this hacky
stuff, /dev/ioasid should provide a clean solution

Jason

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 3/6] vfio: remove the unused mdev iommu hook
  2021-05-10  6:54 ` [PATCH 3/6] vfio: remove the unused mdev iommu hook Christoph Hellwig
@ 2021-05-10 15:54   ` Jason Gunthorpe
  2021-05-13  3:28     ` Tian, Kevin
  0 siblings, 1 reply; 39+ messages in thread
From: Jason Gunthorpe @ 2021-05-10 15:54 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Joerg Roedel, Alex Williamson, David Woodhouse, Lu Baolu,
	Will Deacon, Kirti Wankhede, linux-arm-kernel, iommu, kvm

On Mon, May 10, 2021 at 08:54:02AM +0200, Christoph Hellwig wrote:
> The iommu_device field in struct mdev_device has never been used
> since it was added more than 2 years ago.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  drivers/vfio/vfio_iommu_type1.c | 132 ++++++--------------------------
>  include/linux/mdev.h            |  20 -----
>  2 files changed, 25 insertions(+), 127 deletions(-)

I asked Intel folks to deal with this a month ago:

https://lore.kernel.org/kvm/20210406200030.GA425310@nvidia.com/

So lets just remove it, it is clearly a bad idea

Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>

Jason

^ permalink raw reply	[flat|nested] 39+ messages in thread

* RE: [PATCH 3/6] vfio: remove the unused mdev iommu hook
  2021-05-10 15:54   ` Jason Gunthorpe
@ 2021-05-13  3:28     ` Tian, Kevin
  2021-05-13 12:00       ` Jason Gunthorpe
  0 siblings, 1 reply; 39+ messages in thread
From: Tian, Kevin @ 2021-05-13  3:28 UTC (permalink / raw)
  To: Jason Gunthorpe, Christoph Hellwig
  Cc: Joerg Roedel, Alex Williamson, David Woodhouse, Lu Baolu,
	Will Deacon, Kirti Wankhede, linux-arm-kernel, iommu, kvm

> From: Jason Gunthorpe <jgg@ziepe.ca>
> Sent: Monday, May 10, 2021 11:55 PM
> 
> On Mon, May 10, 2021 at 08:54:02AM +0200, Christoph Hellwig wrote:
> > The iommu_device field in struct mdev_device has never been used
> > since it was added more than 2 years ago.
> >
> > Signed-off-by: Christoph Hellwig <hch@lst.de>
> > ---
> >  drivers/vfio/vfio_iommu_type1.c | 132 ++++++--------------------------
> >  include/linux/mdev.h            |  20 -----
> >  2 files changed, 25 insertions(+), 127 deletions(-)
> 
> I asked Intel folks to deal with this a month ago:
> 
> https://lore.kernel.org/kvm/20210406200030.GA425310@nvidia.com/
> 
> So lets just remove it, it is clearly a bad idea
> 
> Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
> 

Want to get a clearer picture on what needs to be redesigned after this
removal.

Are you specially concerned about this iommu_device hack which 
directly connects mdev_device to iommu layer or the entire removed
logic including the aux domain concept? For the former we are now 
following up the referred thread to find a clean way. But for the latter 
we feel it's still necessary regardless of how iommu interface is redesigned 
to support device connection from the upper level driver. The reason is 
that with mdev or subdevice one physical device could be attached to 
multiple domains now. there could be a primary domain with DOMAIN_
DMA type for DMA_API use by parent driver itself, and multiple auxiliary 
domains with DOMAIN_UNMANAGED types for subdevices assigned to 
different VMs. In this case It's a different model from existing policy 
which allows only one domain per device. In removed code we followed 
Joerg's suggestion to keep existing iommu_attach_device for connecting 
device to the primary domain and then add new iommu_aux_attach_
device for connecting device to auxiliary domains. Lots of removed iommu 
code deal with the management of auxiliary domains in the iommu core 
layer and intel-iommu drivers, which imho is largely generic and could
be leveraged w/o intrusive refactoring to support redesigned iommu 
interface.

Does it sound a reasonable position?

Thanks
Kevin

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 3/6] vfio: remove the unused mdev iommu hook
  2021-05-13  3:28     ` Tian, Kevin
@ 2021-05-13 12:00       ` Jason Gunthorpe
  2021-05-14  6:27         ` Tian, Kevin
                           ` (2 more replies)
  0 siblings, 3 replies; 39+ messages in thread
From: Jason Gunthorpe @ 2021-05-13 12:00 UTC (permalink / raw)
  To: Tian, Kevin
  Cc: Christoph Hellwig, Joerg Roedel, Alex Williamson,
	David Woodhouse, Lu Baolu, Will Deacon, Kirti Wankhede,
	linux-arm-kernel, iommu, kvm

On Thu, May 13, 2021 at 03:28:52AM +0000, Tian, Kevin wrote:

> Are you specially concerned about this iommu_device hack which 
> directly connects mdev_device to iommu layer or the entire removed
> logic including the aux domain concept? For the former we are now 
> following up the referred thread to find a clean way. But for the latter 
> we feel it's still necessary regardless of how iommu interface is redesigned 
> to support device connection from the upper level driver. The reason is 
> that with mdev or subdevice one physical device could be attached to 
> multiple domains now. there could be a primary domain with DOMAIN_
> DMA type for DMA_API use by parent driver itself, and multiple auxiliary 
> domains with DOMAIN_UNMANAGED types for subdevices assigned to 
> different VMs.

Why do we need more domains than just the physical domain for the
parent? How does auxdomain appear in /dev/ioasid?

I never understood what this dead code was supposed to be used for.

Jason

^ permalink raw reply	[flat|nested] 39+ messages in thread

* RE: [PATCH 3/6] vfio: remove the unused mdev iommu hook
  2021-05-13 12:00       ` Jason Gunthorpe
@ 2021-05-14  6:27         ` Tian, Kevin
  2021-05-14  6:54         ` Tian, Kevin
  2021-05-14 13:17         ` Tian, Kevin
  2 siblings, 0 replies; 39+ messages in thread
From: Tian, Kevin @ 2021-05-14  6:27 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: Christoph Hellwig, Joerg Roedel, Alex Williamson,
	David Woodhouse, Lu Baolu, Will Deacon, Kirti Wankhede,
	linux-arm-kernel, iommu, kvm

> From: Jason Gunthorpe <jgg@ziepe.ca>
> Sent: Thursday, May 13, 2021 8:01 PM
> 
> On Thu, May 13, 2021 at 03:28:52AM +0000, Tian, Kevin wrote:
> 
> > Are you specially concerned about this iommu_device hack which
> > directly connects mdev_device to iommu layer or the entire removed
> > logic including the aux domain concept? For the former we are now
> > following up the referred thread to find a clean way. But for the latter
> > we feel it's still necessary regardless of how iommu interface is redesigned
> > to support device connection from the upper level driver. The reason is
> > that with mdev or subdevice one physical device could be attached to
> > multiple domains now. there could be a primary domain with DOMAIN_
> > DMA type for DMA_API use by parent driver itself, and multiple auxiliary
> > domains with DOMAIN_UNMANAGED types for subdevices assigned to
> > different VMs.
> 
> Why do we need more domains than just the physical domain for the
> parent? How does auxdomain appear in /dev/ioasid?
> 

Say the parent device has three WQs. WQ1 is used by parent driver itself,
while WQ2/WQ3 are assigned to VM1/VM2 respectively.

WQ1 is attached to domain1 for an IOVA space to support DMA API 
operations in parent driver. 

WQ2 is attached to domain2 for the GPA space of VM1. Domain2 is
created when WQ2 is assigned to VM1 as a mdev.

WQ3 is attached to domain3 for the GPA space of VM2. Domain3 is
created when WQ3 is assigned to VM2 as a mdev.

In this case domain1 is the primary while the other two are auxiliary
to the parent.

auxdomain represents as a normal domain in /dev/ioasid, with only
care required when doing attachment.

e.g. VM1 is assigned with both a pdev and mdev. Qemu creates 
gpa_ioasid which is associated with a single domain for VM1's 
GPA space and this domain is shared by both pdev and mdev.

The domain becomes the primary domain of pdev when attaching 
pdev to gpa_ioasid:
	iommu_attach_device(domain, device);

The domain becomes the auxiliary domain of mdev's parent when
attaching mdev to gpa_ioasid:
	iommu_aux_attach_device(domain, device, pasid);

Thanks
Kevin

^ permalink raw reply	[flat|nested] 39+ messages in thread

* RE: [PATCH 3/6] vfio: remove the unused mdev iommu hook
  2021-05-13 12:00       ` Jason Gunthorpe
  2021-05-14  6:27         ` Tian, Kevin
@ 2021-05-14  6:54         ` Tian, Kevin
  2021-05-14 12:19           ` Jason Gunthorpe
  2021-05-14 13:17         ` Tian, Kevin
  2 siblings, 1 reply; 39+ messages in thread
From: Tian, Kevin @ 2021-05-14  6:54 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: Christoph Hellwig, Joerg Roedel, Alex Williamson,
	David Woodhouse, Lu Baolu, Will Deacon, Kirti Wankhede,
	linux-arm-kernel, iommu, kvm

> From: Tian, Kevin
> Sent: Friday, May 14, 2021 2:28 PM
> 
> > From: Jason Gunthorpe <jgg@ziepe.ca>
> > Sent: Thursday, May 13, 2021 8:01 PM
> >
> > On Thu, May 13, 2021 at 03:28:52AM +0000, Tian, Kevin wrote:
> >
> > > Are you specially concerned about this iommu_device hack which
> > > directly connects mdev_device to iommu layer or the entire removed
> > > logic including the aux domain concept? For the former we are now
> > > following up the referred thread to find a clean way. But for the latter
> > > we feel it's still necessary regardless of how iommu interface is
> redesigned
> > > to support device connection from the upper level driver. The reason is
> > > that with mdev or subdevice one physical device could be attached to
> > > multiple domains now. there could be a primary domain with DOMAIN_
> > > DMA type for DMA_API use by parent driver itself, and multiple auxiliary
> > > domains with DOMAIN_UNMANAGED types for subdevices assigned to
> > > different VMs.
> >
> > Why do we need more domains than just the physical domain for the
> > parent? How does auxdomain appear in /dev/ioasid?
> >
> 
> Say the parent device has three WQs. WQ1 is used by parent driver itself,
> while WQ2/WQ3 are assigned to VM1/VM2 respectively.
> 
> WQ1 is attached to domain1 for an IOVA space to support DMA API
> operations in parent driver.
> 
> WQ2 is attached to domain2 for the GPA space of VM1. Domain2 is
> created when WQ2 is assigned to VM1 as a mdev.
> 
> WQ3 is attached to domain3 for the GPA space of VM2. Domain3 is
> created when WQ3 is assigned to VM2 as a mdev.
> 
> In this case domain1 is the primary while the other two are auxiliary
> to the parent.
> 
> auxdomain represents as a normal domain in /dev/ioasid, with only
> care required when doing attachment.
> 
> e.g. VM1 is assigned with both a pdev and mdev. Qemu creates
> gpa_ioasid which is associated with a single domain for VM1's
> GPA space and this domain is shared by both pdev and mdev.

Here pdev/mdev are just conceptual description. Following your
earlier suggestion /dev/ioasid will not refer to explicit mdev_device.
Instead, each vfio device attached to an ioasid is represented by either
"struct device" for pdev or "struct device + pasid" for mdev. The
presence of pasid decides which iommu_attach api should be used.

> 
> The domain becomes the primary domain of pdev when attaching
> pdev to gpa_ioasid:
> 	iommu_attach_device(domain, device);
> 
> The domain becomes the auxiliary domain of mdev's parent when
> attaching mdev to gpa_ioasid:
> 	iommu_aux_attach_device(domain, device, pasid);
> 
> Thanks
> Kevin

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 3/6] vfio: remove the unused mdev iommu hook
  2021-05-14  6:54         ` Tian, Kevin
@ 2021-05-14 12:19           ` Jason Gunthorpe
  2021-05-14 12:58             ` Tian, Kevin
  0 siblings, 1 reply; 39+ messages in thread
From: Jason Gunthorpe @ 2021-05-14 12:19 UTC (permalink / raw)
  To: Tian, Kevin
  Cc: Christoph Hellwig, Joerg Roedel, Alex Williamson,
	David Woodhouse, Lu Baolu, Will Deacon, Kirti Wankhede,
	linux-arm-kernel, iommu, kvm

On Fri, May 14, 2021 at 06:54:16AM +0000, Tian, Kevin wrote:
> > From: Tian, Kevin
> > Sent: Friday, May 14, 2021 2:28 PM
> > 
> > > From: Jason Gunthorpe <jgg@ziepe.ca>
> > > Sent: Thursday, May 13, 2021 8:01 PM
> > >
> > > On Thu, May 13, 2021 at 03:28:52AM +0000, Tian, Kevin wrote:
> > >
> > > > Are you specially concerned about this iommu_device hack which
> > > > directly connects mdev_device to iommu layer or the entire removed
> > > > logic including the aux domain concept? For the former we are now
> > > > following up the referred thread to find a clean way. But for the latter
> > > > we feel it's still necessary regardless of how iommu interface is
> > redesigned
> > > > to support device connection from the upper level driver. The reason is
> > > > that with mdev or subdevice one physical device could be attached to
> > > > multiple domains now. there could be a primary domain with DOMAIN_
> > > > DMA type for DMA_API use by parent driver itself, and multiple auxiliary
> > > > domains with DOMAIN_UNMANAGED types for subdevices assigned to
> > > > different VMs.
> > >
> > > Why do we need more domains than just the physical domain for the
> > > parent? How does auxdomain appear in /dev/ioasid?
> > >
> > 
> > Say the parent device has three WQs. WQ1 is used by parent driver itself,
> > while WQ2/WQ3 are assigned to VM1/VM2 respectively.
> > 
> > WQ1 is attached to domain1 for an IOVA space to support DMA API
> > operations in parent driver.

More specifically WQ1 uses a PASID that is represented by an IOASID to
userspace.

> > WQ2 is attached to domain2 for the GPA space of VM1. Domain2 is
> > created when WQ2 is assigned to VM1 as a mdev.
> > 
> > WQ3 is attached to domain3 for the GPA space of VM2. Domain3 is
> > created when WQ3 is assigned to VM2 as a mdev.
> > 
> > In this case domain1 is the primary while the other two are auxiliary
> > to the parent.
> > 
> > auxdomain represents as a normal domain in /dev/ioasid, with only
> > care required when doing attachment.
> > 
> > e.g. VM1 is assigned with both a pdev and mdev. Qemu creates
> > gpa_ioasid which is associated with a single domain for VM1's
> > GPA space and this domain is shared by both pdev and mdev.
> 
> Here pdev/mdev are just conceptual description. Following your
> earlier suggestion /dev/ioasid will not refer to explicit mdev_device.
> Instead, each vfio device attached to an ioasid is represented by either
> "struct device" for pdev or "struct device + pasid" for mdev. The
> presence of pasid decides which iommu_attach api should be used.

But you still haven't explained what an aux domain is to /dev/ioasid.

Why do I need more public kernel objects to represent a PASID IOASID?

Are you creating a domain for every IOASID? Why?

Jason

^ permalink raw reply	[flat|nested] 39+ messages in thread

* RE: [PATCH 3/6] vfio: remove the unused mdev iommu hook
  2021-05-14 12:19           ` Jason Gunthorpe
@ 2021-05-14 12:58             ` Tian, Kevin
  2021-05-14 13:31               ` Jason Gunthorpe
  0 siblings, 1 reply; 39+ messages in thread
From: Tian, Kevin @ 2021-05-14 12:58 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: Christoph Hellwig, Joerg Roedel, Alex Williamson,
	David Woodhouse, Lu Baolu, Will Deacon, Kirti Wankhede,
	linux-arm-kernel, iommu, kvm

> From: Jason Gunthorpe <jgg@ziepe.ca>
> Sent: Friday, May 14, 2021 8:19 PM
> 
> On Fri, May 14, 2021 at 06:54:16AM +0000, Tian, Kevin wrote:
> > > From: Tian, Kevin
> > > Sent: Friday, May 14, 2021 2:28 PM
> > >
> > > > From: Jason Gunthorpe <jgg@ziepe.ca>
> > > > Sent: Thursday, May 13, 2021 8:01 PM
> > > >
> > > > On Thu, May 13, 2021 at 03:28:52AM +0000, Tian, Kevin wrote:
> > > >
> > > > > Are you specially concerned about this iommu_device hack which
> > > > > directly connects mdev_device to iommu layer or the entire removed
> > > > > logic including the aux domain concept? For the former we are now
> > > > > following up the referred thread to find a clean way. But for the latter
> > > > > we feel it's still necessary regardless of how iommu interface is
> > > redesigned
> > > > > to support device connection from the upper level driver. The reason
> is
> > > > > that with mdev or subdevice one physical device could be attached to
> > > > > multiple domains now. there could be a primary domain with
> DOMAIN_
> > > > > DMA type for DMA_API use by parent driver itself, and multiple
> auxiliary
> > > > > domains with DOMAIN_UNMANAGED types for subdevices assigned
> to
> > > > > different VMs.
> > > >
> > > > Why do we need more domains than just the physical domain for the
> > > > parent? How does auxdomain appear in /dev/ioasid?
> > > >
> > >
> > > Say the parent device has three WQs. WQ1 is used by parent driver itself,
> > > while WQ2/WQ3 are assigned to VM1/VM2 respectively.
> > >
> > > WQ1 is attached to domain1 for an IOVA space to support DMA API
> > > operations in parent driver.
> 
> More specifically WQ1 uses a PASID that is represented by an IOASID to
> userspace.

No. WQ1 is used by parent driver itself so it's not related to userspace. 

> 
> > > WQ2 is attached to domain2 for the GPA space of VM1. Domain2 is
> > > created when WQ2 is assigned to VM1 as a mdev.
> > >
> > > WQ3 is attached to domain3 for the GPA space of VM2. Domain3 is
> > > created when WQ3 is assigned to VM2 as a mdev.
> > >
> > > In this case domain1 is the primary while the other two are auxiliary
> > > to the parent.
> > >
> > > auxdomain represents as a normal domain in /dev/ioasid, with only
> > > care required when doing attachment.
> > >
> > > e.g. VM1 is assigned with both a pdev and mdev. Qemu creates
> > > gpa_ioasid which is associated with a single domain for VM1's
> > > GPA space and this domain is shared by both pdev and mdev.
> >
> > Here pdev/mdev are just conceptual description. Following your
> > earlier suggestion /dev/ioasid will not refer to explicit mdev_device.
> > Instead, each vfio device attached to an ioasid is represented by either
> > "struct device" for pdev or "struct device + pasid" for mdev. The
> > presence of pasid decides which iommu_attach api should be used.
> 
> But you still haven't explained what an aux domain is to /dev/ioasid.

'aux' vs. 'primary' matters only in the iommu layer. For /dev/ioasid
it is just normal iommu domain. The only attention required is to tell
the iommu layer whether this domain should be treated as 'aux' or
'primary' when attaching the domain to a device (based on pdev vs.
mdev). After this point, the domain is managed through existing 
iommu ops (map, unmap, etc.) just like how it works today, applying 
to all pdevs/mdevs attached to this ioasid

> 
> Why do I need more public kernel objects to represent a PASID IOASID?

This avoids changing every iommu ops to include a PASID and forcing
the upper-layer drivers to do it differently between pdev and mdev.
Actually this was a main motivation when working out aux domain 
proposal with Joerg two years ago.

> 
> Are you creating a domain for every IOASID? Why?
> 

Create a domain for every non-nesting ioasid. For nesting ioasid 
(e.g. ioasid1 on ioasid2), ioasid1 should inherit the domain from 
ioasid2.

The reason is that iommu domain represents an IOVA address
space shareable by multiple devices. It should be created at the 
point where the address space is managed. 

Thanks
Kevin

^ permalink raw reply	[flat|nested] 39+ messages in thread

* RE: [PATCH 3/6] vfio: remove the unused mdev iommu hook
  2021-05-13 12:00       ` Jason Gunthorpe
  2021-05-14  6:27         ` Tian, Kevin
  2021-05-14  6:54         ` Tian, Kevin
@ 2021-05-14 13:17         ` Tian, Kevin
  2021-05-14 13:39           ` Jason Gunthorpe
  2 siblings, 1 reply; 39+ messages in thread
From: Tian, Kevin @ 2021-05-14 13:17 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: Christoph Hellwig, Joerg Roedel, Alex Williamson,
	David Woodhouse, Lu Baolu, Will Deacon, Kirti Wankhede,
	linux-arm-kernel, iommu, kvm

> From: Jason Gunthorpe <jgg@ziepe.ca>
> Sent: Thursday, May 13, 2021 8:01 PM
> 
> On Thu, May 13, 2021 at 03:28:52AM +0000, Tian, Kevin wrote:
> 
> > Are you specially concerned about this iommu_device hack which
> > directly connects mdev_device to iommu layer or the entire removed
> > logic including the aux domain concept? For the former we are now
> > following up the referred thread to find a clean way. But for the latter
> > we feel it's still necessary regardless of how iommu interface is redesigned
> > to support device connection from the upper level driver. The reason is
> > that with mdev or subdevice one physical device could be attached to
> > multiple domains now. there could be a primary domain with DOMAIN_
> > DMA type for DMA_API use by parent driver itself, and multiple auxiliary
> > domains with DOMAIN_UNMANAGED types for subdevices assigned to
> > different VMs.
> 
> Why do we need more domains than just the physical domain for the
> parent? How does auxdomain appear in /dev/ioasid?
> 

Another simple reason. Say you have 4 mdevs each from a different 
parent are attached to an ioasid. If only using physical domain of the 
parent + PASID it means there are 4 domains (thus 4 page tables) under 
this IOASID thus every dma map operation must be replicated in all
4 domains which is really unnecessary. Having the domain created
with ioasid and allow a device attaching to multiple domains is much
cleaner for the upper-layer drivers to work with iommu interface.

Thanks
Kevin

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 3/6] vfio: remove the unused mdev iommu hook
  2021-05-14 12:58             ` Tian, Kevin
@ 2021-05-14 13:31               ` Jason Gunthorpe
  2021-05-17 12:22                 ` Joerg Roedel
  0 siblings, 1 reply; 39+ messages in thread
From: Jason Gunthorpe @ 2021-05-14 13:31 UTC (permalink / raw)
  To: Tian, Kevin
  Cc: Christoph Hellwig, Joerg Roedel, Alex Williamson,
	David Woodhouse, Lu Baolu, Will Deacon, Kirti Wankhede,
	linux-arm-kernel, iommu, kvm

On Fri, May 14, 2021 at 12:58:10PM +0000, Tian, Kevin wrote:

> This avoids changing every iommu ops to include a PASID and forcing
> the upper-layer drivers to do it differently between pdev and mdev.
> Actually this was a main motivation when working out aux domain
> proposal with Joerg two years ago.

Well, usually when people say stuff like that it means it is a hack..

Oh, this does look like a big hack, yes.

/dev/ioasid is focused on IOASIDs. As an API you have to be able to use
it to create all kinds of IOASID's *against a single HW struct
device*.

In this world you can't create domains for every struct device as hack
to pass in the PASID.

The IOASID itself must be an object that contains the HW struct device
and the PASID. IOASID must be a first class object in the entire API.

Remember, when a driver wants to connect to an IOASID it wants to make
some very simple calls like:

   iommu_attach_ioasid_rid(&pci_device->dev, ioasid_ptr);
   iommu_attach_ioasid_pasid(&pci_device->dev, ioasid_ptr);

Which is based *only* on what the PCI device does. If the device issues
TLPs without PASID then the driver must use the first. If the device
issues TLPs with a PASID then the driver must use the latter.

There is no place for "domain as a carrier of PASID"
there. mdev_device should NOT participate in the IOMMU layer because
it is NOT a HW device. Trying to warp mdev_device into an IOMMU
presence is already the source of a lot of this hacky code.

To juggle multiple IOASID per HW device the IOMMU API obviously has to
be made IOASID aware. It can't just blindly assume that a struct
device implies the single IOASID to use and hope for the best.

> The reason is that iommu domain represents an IOVA address
> space shareable by multiple devices. It should be created at the 
> point where the address space is managed. 

IOASID represents the IOVA address space.

Two concepts that represent the same thing is not great :)

Jason

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 3/6] vfio: remove the unused mdev iommu hook
  2021-05-14 13:17         ` Tian, Kevin
@ 2021-05-14 13:39           ` Jason Gunthorpe
  2021-05-14 14:28             ` Tian, Kevin
  0 siblings, 1 reply; 39+ messages in thread
From: Jason Gunthorpe @ 2021-05-14 13:39 UTC (permalink / raw)
  To: Tian, Kevin
  Cc: Christoph Hellwig, Joerg Roedel, Alex Williamson,
	David Woodhouse, Lu Baolu, Will Deacon, Kirti Wankhede,
	linux-arm-kernel, iommu, kvm

On Fri, May 14, 2021 at 01:17:23PM +0000, Tian, Kevin wrote:
> > From: Jason Gunthorpe <jgg@ziepe.ca>
> > Sent: Thursday, May 13, 2021 8:01 PM
> > 
> > On Thu, May 13, 2021 at 03:28:52AM +0000, Tian, Kevin wrote:
> > 
> > > Are you specially concerned about this iommu_device hack which
> > > directly connects mdev_device to iommu layer or the entire removed
> > > logic including the aux domain concept? For the former we are now
> > > following up the referred thread to find a clean way. But for the latter
> > > we feel it's still necessary regardless of how iommu interface is redesigned
> > > to support device connection from the upper level driver. The reason is
> > > that with mdev or subdevice one physical device could be attached to
> > > multiple domains now. there could be a primary domain with DOMAIN_
> > > DMA type for DMA_API use by parent driver itself, and multiple auxiliary
> > > domains with DOMAIN_UNMANAGED types for subdevices assigned to
> > > different VMs.
> > 
> > Why do we need more domains than just the physical domain for the
> > parent? How does auxdomain appear in /dev/ioasid?
> > 
> 
> Another simple reason. Say you have 4 mdevs each from a different 
> parent are attached to an ioasid. If only using physical domain of the 
> parent + PASID it means there are 4 domains (thus 4 page tables) under 
> this IOASID thus every dma map operation must be replicated in all
> 4 domains which is really unnecessary. Having the domain created
> with ioasid and allow a device attaching to multiple domains is much
> cleaner for the upper-layer drivers to work with iommu interface.

Eh? That sounds messed up.

The IOASID is the page table. If you have one IOASID and you attach it
to 4 IOMMU routings (be it pasid, rid, whatever) then there should
only ever by one page table.

The iommu driver should just point the iommu HW at the shared page
table for each of the 4 routes and be done with it. At worst it has to
replicate invalidates, but that is very HW dependent.

Domain is just a half-completed-ioasid concept. It is today not
flexible enough to be a true IOASID, but it still does hold the io
page table.

Basically your data structure is an IOASID. It holds a single HW
specific page table. The IOASID has a list of RID and (RID,PASID) that
are authorized to use it. The IOMMU HW is programed to match the
RID/(RID,PASID) list and search the single page table. When the page
table is changed the IOMMU is told to dump caches, however that works.

When a device driver wants to use an IOASID it tells the iommu to
setup the route, either RID or (RID,PASID). Setting the route causes
the IOMMU driver to update the HW.

The struct device has enough context to provide the RID and the IOMMU
driver connection when operating on the IOASID.

Jason

^ permalink raw reply	[flat|nested] 39+ messages in thread

* RE: [PATCH 3/6] vfio: remove the unused mdev iommu hook
  2021-05-14 13:39           ` Jason Gunthorpe
@ 2021-05-14 14:28             ` Tian, Kevin
  2021-05-14 14:44               ` Jason Gunthorpe
  0 siblings, 1 reply; 39+ messages in thread
From: Tian, Kevin @ 2021-05-14 14:28 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: Christoph Hellwig, Joerg Roedel, Alex Williamson,
	David Woodhouse, Lu Baolu, Will Deacon, Kirti Wankhede,
	linux-arm-kernel, iommu, kvm

> From: Jason Gunthorpe <jgg@ziepe.ca>
> Sent: Friday, May 14, 2021 9:40 PM
> 
> On Fri, May 14, 2021 at 01:17:23PM +0000, Tian, Kevin wrote:
> > > From: Jason Gunthorpe <jgg@ziepe.ca>
> > > Sent: Thursday, May 13, 2021 8:01 PM
> > >
> > > On Thu, May 13, 2021 at 03:28:52AM +0000, Tian, Kevin wrote:
> > >
> > > > Are you specially concerned about this iommu_device hack which
> > > > directly connects mdev_device to iommu layer or the entire removed
> > > > logic including the aux domain concept? For the former we are now
> > > > following up the referred thread to find a clean way. But for the latter
> > > > we feel it's still necessary regardless of how iommu interface is
> redesigned
> > > > to support device connection from the upper level driver. The reason is
> > > > that with mdev or subdevice one physical device could be attached to
> > > > multiple domains now. there could be a primary domain with DOMAIN_
> > > > DMA type for DMA_API use by parent driver itself, and multiple
> auxiliary
> > > > domains with DOMAIN_UNMANAGED types for subdevices assigned to
> > > > different VMs.
> > >
> > > Why do we need more domains than just the physical domain for the
> > > parent? How does auxdomain appear in /dev/ioasid?
> > >
> >
> > Another simple reason. Say you have 4 mdevs each from a different
> > parent are attached to an ioasid. If only using physical domain of the
> > parent + PASID it means there are 4 domains (thus 4 page tables) under
> > this IOASID thus every dma map operation must be replicated in all
> > 4 domains which is really unnecessary. Having the domain created
> > with ioasid and allow a device attaching to multiple domains is much
> > cleaner for the upper-layer drivers to work with iommu interface.
> 
> Eh? That sounds messed up.
> 
> The IOASID is the page table. If you have one IOASID and you attach it
> to 4 IOMMU routings (be it pasid, rid, whatever) then there should
> only ever by one page table.
> 
> The iommu driver should just point the iommu HW at the shared page
> table for each of the 4 routes and be done with it. At worst it has to
> replicate invalidates, but that is very HW dependent.
> 
> Domain is just a half-completed-ioasid concept. It is today not
> flexible enough to be a true IOASID, but it still does hold the io
> page table.
> 
> Basically your data structure is an IOASID. It holds a single HW
> specific page table. The IOASID has a list of RID and (RID,PASID) that
> are authorized to use it. The IOMMU HW is programed to match the
> RID/(RID,PASID) list and search the single page table. When the page
> table is changed the IOMMU is told to dump caches, however that works.
> 
> When a device driver wants to use an IOASID it tells the iommu to
> setup the route, either RID or (RID,PASID). Setting the route causes
> the IOMMU driver to update the HW.
> 
> The struct device has enough context to provide the RID and the IOMMU
> driver connection when operating on the IOASID.
> 

Well, I see what you meant now. Basically you want to make IOASID 
as the first-class object in the entire iommu stack, replacing what 
iommu domain fulfill todays. Our original proposal was still based on 
domain-centric philosophy thus containing IOASID and its routing info 
only in the uAPI layer of /dev/ioasid and then connecting to domains.

As this touches the core concept of the iommu layer, we'd really like 
to also hear from Jeorg. It's a huge work.

btw are you OK with our ongoing uAPI proposal still based on domain
flavor for now? the uAPI semantics should be generic regardless of 
how underlying iommu interfaces are designed. At least separate
uAPI discussion from iommu ops re-design.

Thanks
Kevin

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 3/6] vfio: remove the unused mdev iommu hook
  2021-05-14 14:28             ` Tian, Kevin
@ 2021-05-14 14:44               ` Jason Gunthorpe
  0 siblings, 0 replies; 39+ messages in thread
From: Jason Gunthorpe @ 2021-05-14 14:44 UTC (permalink / raw)
  To: Tian, Kevin
  Cc: Christoph Hellwig, Joerg Roedel, Alex Williamson,
	David Woodhouse, Lu Baolu, Will Deacon, Kirti Wankhede,
	linux-arm-kernel, iommu, kvm

On Fri, May 14, 2021 at 02:28:44PM +0000, Tian, Kevin wrote:
> Well, I see what you meant now. Basically you want to make IOASID 
> as the first-class object in the entire iommu stack, replacing what 
> iommu domain fulfill todays. 

Alternatively you transform domain into being a full fledged IOASID.
I don't know which path works out to be a better patch series.

> Our original proposal was still based on domain-centric philosophy
> thus containing IOASID and its routing info only in the uAPI layer
> of /dev/ioasid and then connecting to domains.

Where do the domains come from though? You still have to hack hack all
the drivers to create dummy domains to support this plan, and in the
process PASID is pretty hobbled as an actual API if every PASID
instance requires a wonky dummy struct device and domain.

> btw are you OK with our ongoing uAPI proposal still based on domain
> flavor for now? the uAPI semantics should be generic regardless of 
> how underlying iommu interfaces are designed. At least separate
> uAPI discussion from iommu ops re-design.

The most important thing is the uAPI, you don't get to change that later.

The next most is the driver facing API.

You can figure out the IOMMU layer internals in stages.

Clearly IOASID == domain today as domain is kind of half a
IOASID. When you extend to PASID and other stuff I think you have
little choice but to make a full IOASID more first class.

Dummy domains are a very poor substitute.

In my experiance these kinds of transformations can usually be managed
as "just alot of typing". Usually the old driver code structure can be
kept enough to not break it while reorganizing.

Jason

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 3/6] vfio: remove the unused mdev iommu hook
  2021-05-14 13:31               ` Jason Gunthorpe
@ 2021-05-17 12:22                 ` Joerg Roedel
  2021-05-17 12:30                   ` Jason Gunthorpe
  0 siblings, 1 reply; 39+ messages in thread
From: Joerg Roedel @ 2021-05-17 12:22 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: Tian, Kevin, Christoph Hellwig, Alex Williamson, David Woodhouse,
	Lu Baolu, Will Deacon, Kirti Wankhede, linux-arm-kernel, iommu,
	kvm

On Fri, May 14, 2021 at 10:31:43AM -0300, Jason Gunthorpe wrote:
> There is no place for "domain as a carrier of PASID"
> there. mdev_device should NOT participate in the IOMMU layer because
> it is NOT a HW device. Trying to warp mdev_device into an IOMMU
> presence is already the source of a lot of this hacky code.

Having the mdev abstraction is way better than making the IOMMU code
IOASID aware. The later requires either new parameters to existing
functions or new functions. With the mdev abstraction no new functions
are needed in the core API.

Yes, I know, We have the AUX-domain specific functions now, but I
suggested a while back to turn the mdev code into its own bus
implementation and let the IOMMU driver deal with whether it has an mdev
or a pdev. When that is done the aux-specific functions can go away.

We are not there yet, but I think this is a cleaner abstraction than
making the IOMMU-API ioasid-aware. Also because it separates the details
of device-partitioning nicely from the IOMMU core code.

> To juggle multiple IOASID per HW device the IOMMU API obviously has to
> be made IOASID aware. It can't just blindly assume that a struct
> device implies the single IOASID to use and hope for the best.

The one-device<->one address-space idea is blindly assumed. Simply
because the vast amount of devices out there work that way. When ioasids
come into the game this changes of course, but we can work that out
based on how the ioasids are used.

For device partitioning the mdev abstraction work well if it is
correctly implemented. The other use-case is device-access to user-space
memory, and that is a completely different story.

> IOASID represents the IOVA address space.

No, when it comes to the IOMMU-API, a domain represents an address
space.

> Two concepts that represent the same thing is not great :)

Agreed, so an IOASID should be an IOMMU-domain, if its not used for
passing an mm_struct to a device.

Regards,

	Joerg

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 3/6] vfio: remove the unused mdev iommu hook
  2021-05-17 12:22                 ` Joerg Roedel
@ 2021-05-17 12:30                   ` Jason Gunthorpe
  2021-05-17 12:53                     ` Joerg Roedel
  0 siblings, 1 reply; 39+ messages in thread
From: Jason Gunthorpe @ 2021-05-17 12:30 UTC (permalink / raw)
  To: Joerg Roedel
  Cc: Tian, Kevin, Christoph Hellwig, Alex Williamson, David Woodhouse,
	Lu Baolu, Will Deacon, Kirti Wankhede, linux-arm-kernel, iommu,
	kvm

On Mon, May 17, 2021 at 02:22:06PM +0200, Joerg Roedel wrote:
> On Fri, May 14, 2021 at 10:31:43AM -0300, Jason Gunthorpe wrote:
> > There is no place for "domain as a carrier of PASID"
> > there. mdev_device should NOT participate in the IOMMU layer because
> > it is NOT a HW device. Trying to warp mdev_device into an IOMMU
> > presence is already the source of a lot of this hacky code.
> 
> Having the mdev abstraction is way better than making the IOMMU code
> IOASID aware. The later requires either new parameters to existing
> functions or new functions. With the mdev abstraction no new functions
> are needed in the core API.

All that does it lock PASID to mdev which is not at all where this
needs to go.

> Yes, I know, We have the AUX-domain specific functions now, but I
> suggested a while back to turn the mdev code into its own bus
> implementation and let the IOMMU driver deal with whether it has an mdev
> or a pdev. When that is done the aux-specific functions can go away.

Yuk, no.

PASID is not connected to the driver model. It is inherently wrong to
suggest this. 

PASID is a property of a PCI device and any PCI device driver should
be able to spawn PASIDs without silly restrictions.

Fixing the IOMMU API is clearly needed here to get a clean PASID
implementation in the kernel.

> > IOASID represents the IOVA address space.
> 
> No, when it comes to the IOMMU-API, a domain represents an address
> space.

Intel is building a new uAPI and here IOASID is the word they picked
to represent the IOVA address space. How it is mapped to the kernel is
TBD, I guess, but domain implies both more and less than a IOASID so it
isn't a 1:1 correspondence.

> > Two concepts that represent the same thing is not great :)
> 
> Agreed, so an IOASID should be an IOMMU-domain, if its not used for
> passing an mm_struct to a device.

We aren't talking about mm_struct.

As above a domain isn't an IOASID, it only does a few things an IOASID
can do.

Jason

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 3/6] vfio: remove the unused mdev iommu hook
  2021-05-17 12:30                   ` Jason Gunthorpe
@ 2021-05-17 12:53                     ` Joerg Roedel
  2021-05-17 13:35                       ` Jason Gunthorpe
  0 siblings, 1 reply; 39+ messages in thread
From: Joerg Roedel @ 2021-05-17 12:53 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: Tian, Kevin, Christoph Hellwig, Alex Williamson, David Woodhouse,
	Lu Baolu, Will Deacon, Kirti Wankhede, linux-arm-kernel, iommu,
	kvm

On Mon, May 17, 2021 at 09:30:10AM -0300, Jason Gunthorpe wrote:
> On Mon, May 17, 2021 at 02:22:06PM +0200, Joerg Roedel wrote:
> > Yes, I know, We have the AUX-domain specific functions now, but I
> > suggested a while back to turn the mdev code into its own bus
> > implementation and let the IOMMU driver deal with whether it has an mdev
> > or a pdev. When that is done the aux-specific functions can go away.
> 
> Yuk, no.
> 
> PASID is not connected to the driver model. It is inherently wrong to
> suggest this.

There will be no drivers for that, but an mdev is a container for
resources of a physical device which can be assigned to a virtual
machine or a user-space process. In this way it fits very well into the
existing abstractions.

> PASID is a property of a PCI device and any PCI device driver should
> be able to spawn PASIDs without silly restrictions.

Some points here:

	1) The concept of PASIDs is not limited to PCI devices.
	
	2) An mdev is not a restriction. It is an abstraction with which
	   other parts of the kernel can work.

> Fixing the IOMMU API is clearly needed here to get a clean PASID
> implementation in the kernel.

You still have to convince me that this is needed and a good idea. By
now I am not even remotely convinced and putting words like 'fixing',
'clearly' and 'silly' in a sentence is by far not enough for this to
happen.

> We aren't talking about mm_struct.

Passing an mm_struct to a device is another use-case for PASIDs, you
should keep that in mind. The mdev abstraction was made for a different
use-case.

To be clear, I agree that the aux-domain specifics should be removed
from the IOMMU-API, but the mdev-abstraction itself still makes sense.

> As above a domain isn't an IOASID, it only does a few things an IOASID
> can do.

For example?


Regards,

	Joerg

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 3/6] vfio: remove the unused mdev iommu hook
  2021-05-17 12:53                     ` Joerg Roedel
@ 2021-05-17 13:35                       ` Jason Gunthorpe
  2021-05-17 15:35                         ` Joerg Roedel
  0 siblings, 1 reply; 39+ messages in thread
From: Jason Gunthorpe @ 2021-05-17 13:35 UTC (permalink / raw)
  To: Joerg Roedel
  Cc: Tian, Kevin, Christoph Hellwig, Alex Williamson, David Woodhouse,
	Lu Baolu, Will Deacon, Kirti Wankhede, linux-arm-kernel, iommu,
	kvm

On Mon, May 17, 2021 at 02:53:16PM +0200, Joerg Roedel wrote:
> On Mon, May 17, 2021 at 09:30:10AM -0300, Jason Gunthorpe wrote:
> > On Mon, May 17, 2021 at 02:22:06PM +0200, Joerg Roedel wrote:
> > > Yes, I know, We have the AUX-domain specific functions now, but I
> > > suggested a while back to turn the mdev code into its own bus
> > > implementation and let the IOMMU driver deal with whether it has an mdev
> > > or a pdev. When that is done the aux-specific functions can go away.
> > 
> > Yuk, no.
> > 
> > PASID is not connected to the driver model. It is inherently wrong to
> > suggest this.
> 
> There will be no drivers for that, but an mdev is a container for
> resources of a physical device which can be assigned to a virtual
> machine or a user-space process. In this way it fits very well into the
> existing abstractions.

There world is a lot bigger than just mdev and vfio.

> > PASID is a property of a PCI device and any PCI device driver should
> > be able to spawn PASIDs without silly restrictions.
> 
> Some points here:
> 
> 	1) The concept of PASIDs is not limited to PCI devices.

Do I have to type PASID/stream id/etc/etc in every place? We all know
this is gerenal, it is why Intel is picking IOASID for the uAPI name.

> 	2) An mdev is not a restriction. It is an abstraction with which
> 	   other parts of the kernel can work.

mdev is really gross, new things should avoid using it. It is also
100% VFIO specific and should never be used outside VFIO.

My recent work significantly eliminates the need for mdev at all, the
remaining gaps have to do with the way mdev hackily overrides the
iommu mechanisms in VFIO.

Tying any decision to the assumption that mdev will, or even should,
continue is not aligned with the direction things are going.

> > Fixing the IOMMU API is clearly needed here to get a clean PASID
> > implementation in the kernel.
> 
> You still have to convince me that this is needed and a good idea. By
> now I am not even remotely convinced and putting words like 'fixing',
> 'clearly' and 'silly' in a sentence is by far not enough for this to
> happen.

Well, I'm sorry, but there is a huge other thread talking about the
IOASID design in great detail and why this is all needed. Jumping into
this thread without context and basically rejecting all the
conclusions that were reached over the last several weeks is really
not helpful - especially since your objection is not technical.

I think you should wait for Intel to put together the /dev/ioasid uAPI
proposal and the example use cases it should address then you can give
feedback there, with proper context.

In this case the mdev specific auxdomain for PASID use is replaced by
sane /dev/ioasid objects - and how that gets implemented through the
iommu layer would still be a big TBD. Though I can't forsee any
quality implementation of /dev/ioasid being driven by a one io page
table per struct device scheme.

Hopefully Intel will make the reasons why, and the use cases
supporting the desgin, clear in their RFC.

> To be clear, I agree that the aux-domain specifics should be removed
> from the IOMMU-API, but the mdev-abstraction itself still makes sense.

Expect mdev is basically legacy too. 

Jason

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 3/6] vfio: remove the unused mdev iommu hook
  2021-05-17 13:35                       ` Jason Gunthorpe
@ 2021-05-17 15:35                         ` Joerg Roedel
  2021-05-19 15:23                           ` Robin Murphy
                                             ` (2 more replies)
  0 siblings, 3 replies; 39+ messages in thread
From: Joerg Roedel @ 2021-05-17 15:35 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: Tian, Kevin, Christoph Hellwig, Alex Williamson, David Woodhouse,
	Lu Baolu, Will Deacon, Kirti Wankhede, linux-arm-kernel, iommu,
	kvm

On Mon, May 17, 2021 at 10:35:00AM -0300, Jason Gunthorpe wrote:
> Well, I'm sorry, but there is a huge other thread talking about the
> IOASID design in great detail and why this is all needed. Jumping into
> this thread without context and basically rejecting all the
> conclusions that were reached over the last several weeks is really
> not helpful - especially since your objection is not technical.
> 
> I think you should wait for Intel to put together the /dev/ioasid uAPI
> proposal and the example use cases it should address then you can give
> feedback there, with proper context.

Yes, I think the next step is that someone who read the whole thread
writes up the conclusions and a rough /dev/ioasid API proposal, also
mentioning the use-cases it addresses. Based on that we can discuss the
implications this needs to have for IOMMU-API and code.

From the use-cases I know the mdev concept is just fine. But if there is
a more generic one we can talk about it.

Regards,

	Joerg


^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 3/6] vfio: remove the unused mdev iommu hook
  2021-05-17 15:35                         ` Joerg Roedel
@ 2021-05-19 15:23                           ` Robin Murphy
  2021-05-19 18:06                             ` Jason Gunthorpe
  2021-06-30  9:08                           ` Tian, Kevin
  2021-07-22  6:02                           ` Tian, Kevin
  2 siblings, 1 reply; 39+ messages in thread
From: Robin Murphy @ 2021-05-19 15:23 UTC (permalink / raw)
  To: Joerg Roedel, Jason Gunthorpe
  Cc: Tian, Kevin, kvm, Will Deacon, Kirti Wankhede, iommu,
	Alex Williamson, David Woodhouse, Christoph Hellwig,
	linux-arm-kernel

On 2021-05-17 16:35, Joerg Roedel wrote:
> On Mon, May 17, 2021 at 10:35:00AM -0300, Jason Gunthorpe wrote:
>> Well, I'm sorry, but there is a huge other thread talking about the
>> IOASID design in great detail and why this is all needed. Jumping into
>> this thread without context and basically rejecting all the
>> conclusions that were reached over the last several weeks is really
>> not helpful - especially since your objection is not technical.
>>
>> I think you should wait for Intel to put together the /dev/ioasid uAPI
>> proposal and the example use cases it should address then you can give
>> feedback there, with proper context.
> 
> Yes, I think the next step is that someone who read the whole thread
> writes up the conclusions and a rough /dev/ioasid API proposal, also
> mentioning the use-cases it addresses. Based on that we can discuss the
> implications this needs to have for IOMMU-API and code.
> 
>  From the use-cases I know the mdev concept is just fine. But if there is
> a more generic one we can talk about it.

Just to add another voice here, I have some colleagues working on 
drivers where they want to use SMMU Substream IDs for a single hardware 
block to operate on multiple iommu_domains managed entirely within the 
kernel. Using an mdev-like approach with aux domains is pretty much the 
ideal fit for this use-case, while all the IOASID discussion appears 
centred on SVA and userspace interfaces, and as such barely relevant if 
at all.

I seem to recall a non-trivial amount of effort going into the aux 
domain design too, so the promise of replacing it with "a big TBD" just 
because vfio-mdev turned out to be awful hardly fills me with enthusiasm 
either :/

Robin.

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 3/6] vfio: remove the unused mdev iommu hook
  2021-05-19 15:23                           ` Robin Murphy
@ 2021-05-19 18:06                             ` Jason Gunthorpe
  2021-05-19 23:12                               ` Tian, Kevin
  0 siblings, 1 reply; 39+ messages in thread
From: Jason Gunthorpe @ 2021-05-19 18:06 UTC (permalink / raw)
  To: Robin Murphy
  Cc: Joerg Roedel, Tian, Kevin, kvm, Will Deacon, Kirti Wankhede,
	iommu, Alex Williamson, David Woodhouse, Christoph Hellwig,
	linux-arm-kernel

On Wed, May 19, 2021 at 04:23:21PM +0100, Robin Murphy wrote:
> On 2021-05-17 16:35, Joerg Roedel wrote:
> > On Mon, May 17, 2021 at 10:35:00AM -0300, Jason Gunthorpe wrote:
> > > Well, I'm sorry, but there is a huge other thread talking about the
> > > IOASID design in great detail and why this is all needed. Jumping into
> > > this thread without context and basically rejecting all the
> > > conclusions that were reached over the last several weeks is really
> > > not helpful - especially since your objection is not technical.
> > > 
> > > I think you should wait for Intel to put together the /dev/ioasid uAPI
> > > proposal and the example use cases it should address then you can give
> > > feedback there, with proper context.
> > 
> > Yes, I think the next step is that someone who read the whole thread
> > writes up the conclusions and a rough /dev/ioasid API proposal, also
> > mentioning the use-cases it addresses. Based on that we can discuss the
> > implications this needs to have for IOMMU-API and code.
> > 
> >  From the use-cases I know the mdev concept is just fine. But if there is
> > a more generic one we can talk about it.
> 
> Just to add another voice here, I have some colleagues working on drivers
> where they want to use SMMU Substream IDs for a single hardware block to
> operate on multiple iommu_domains managed entirely within the
> kernel.

If it is entirely within the kernel I'm confused how mdev gets
involved? mdev is only for vfio which is userspace.

In any event, if you are kernel only it is not quite as big a deal to
create what you need. A 'substream domain' disconnected from the
struct device is not unreasonable.

> an mdev-like approach with aux domains is pretty much the ideal fit for this
> use-case, while all the IOASID discussion appears centred on SVA and
> userspace interfaces, and as such barely relevant if at all.

/dev/ioasid is centered on userspace, but usually the way this works
is a user API like that will be close to a mirror kernel
implementation if someone needs such a thing.

Jason

^ permalink raw reply	[flat|nested] 39+ messages in thread

* RE: [PATCH 3/6] vfio: remove the unused mdev iommu hook
  2021-05-19 18:06                             ` Jason Gunthorpe
@ 2021-05-19 23:12                               ` Tian, Kevin
  2021-05-19 23:24                                 ` Jason Gunthorpe
  0 siblings, 1 reply; 39+ messages in thread
From: Tian, Kevin @ 2021-05-19 23:12 UTC (permalink / raw)
  To: Jason Gunthorpe, Robin Murphy
  Cc: Joerg Roedel, kvm, Will Deacon, Kirti Wankhede, iommu,
	Alex Williamson, David Woodhouse, Christoph Hellwig,
	linux-arm-kernel

> From: Jason Gunthorpe <jgg@ziepe.ca>
> Sent: Thursday, May 20, 2021 2:07 AM
> 
> On Wed, May 19, 2021 at 04:23:21PM +0100, Robin Murphy wrote:
> > On 2021-05-17 16:35, Joerg Roedel wrote:
> > > On Mon, May 17, 2021 at 10:35:00AM -0300, Jason Gunthorpe wrote:
> > > > Well, I'm sorry, but there is a huge other thread talking about the
> > > > IOASID design in great detail and why this is all needed. Jumping into
> > > > this thread without context and basically rejecting all the
> > > > conclusions that were reached over the last several weeks is really
> > > > not helpful - especially since your objection is not technical.
> > > >
> > > > I think you should wait for Intel to put together the /dev/ioasid uAPI
> > > > proposal and the example use cases it should address then you can give
> > > > feedback there, with proper context.
> > >
> > > Yes, I think the next step is that someone who read the whole thread
> > > writes up the conclusions and a rough /dev/ioasid API proposal, also
> > > mentioning the use-cases it addresses. Based on that we can discuss the
> > > implications this needs to have for IOMMU-API and code.
> > >
> > >  From the use-cases I know the mdev concept is just fine. But if there is
> > > a more generic one we can talk about it.
> >
> > Just to add another voice here, I have some colleagues working on drivers
> > where they want to use SMMU Substream IDs for a single hardware block
> to
> > operate on multiple iommu_domains managed entirely within the
> > kernel.
> 
> If it is entirely within the kernel I'm confused how mdev gets
> involved? mdev is only for vfio which is userspace.
> 

Just add some background. aux domain is used to support mdev but they
are not tied together. Literally aux domain just implies that there could be 
multiple domains attached to a device then when one of them becomes
the primary all the remaining are deemed as auxiliary. From this angle it
doesn't matter whether the requirement of multiple domains come from
user or kernel.

btw I do realize the current aux domain design has some problem to support
the new /dev/ioasid proposal (will send out soon, under internal review). We
can further discuss there to see how aux domain can be revised or redesigned
to support both userspace and kernel usages.

Thanks
Kevin

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 3/6] vfio: remove the unused mdev iommu hook
  2021-05-19 23:12                               ` Tian, Kevin
@ 2021-05-19 23:24                                 ` Jason Gunthorpe
  2021-05-20 14:13                                   ` Robin Murphy
  0 siblings, 1 reply; 39+ messages in thread
From: Jason Gunthorpe @ 2021-05-19 23:24 UTC (permalink / raw)
  To: Tian, Kevin
  Cc: Robin Murphy, Joerg Roedel, kvm, Will Deacon, Kirti Wankhede,
	iommu, Alex Williamson, David Woodhouse, Christoph Hellwig,
	linux-arm-kernel

On Wed, May 19, 2021 at 11:12:46PM +0000, Tian, Kevin wrote:
> > From: Jason Gunthorpe <jgg@ziepe.ca>
> > Sent: Thursday, May 20, 2021 2:07 AM
> > 
> > On Wed, May 19, 2021 at 04:23:21PM +0100, Robin Murphy wrote:
> > > On 2021-05-17 16:35, Joerg Roedel wrote:
> > > > On Mon, May 17, 2021 at 10:35:00AM -0300, Jason Gunthorpe wrote:
> > > > > Well, I'm sorry, but there is a huge other thread talking about the
> > > > > IOASID design in great detail and why this is all needed. Jumping into
> > > > > this thread without context and basically rejecting all the
> > > > > conclusions that were reached over the last several weeks is really
> > > > > not helpful - especially since your objection is not technical.
> > > > >
> > > > > I think you should wait for Intel to put together the /dev/ioasid uAPI
> > > > > proposal and the example use cases it should address then you can give
> > > > > feedback there, with proper context.
> > > >
> > > > Yes, I think the next step is that someone who read the whole thread
> > > > writes up the conclusions and a rough /dev/ioasid API proposal, also
> > > > mentioning the use-cases it addresses. Based on that we can discuss the
> > > > implications this needs to have for IOMMU-API and code.
> > > >
> > > >  From the use-cases I know the mdev concept is just fine. But if there is
> > > > a more generic one we can talk about it.
> > >
> > > Just to add another voice here, I have some colleagues working on drivers
> > > where they want to use SMMU Substream IDs for a single hardware block
> > to
> > > operate on multiple iommu_domains managed entirely within the
> > > kernel.
> > 
> > If it is entirely within the kernel I'm confused how mdev gets
> > involved? mdev is only for vfio which is userspace.
> > 
> 
> Just add some background. aux domain is used to support mdev but they
> are not tied together. Literally aux domain just implies that there could be 
> multiple domains attached to a device then when one of them becomes
> the primary all the remaining are deemed as auxiliary. From this angle it
> doesn't matter whether the requirement of multiple domains come from
> user or kernel.

You can't entirely use aux domain from inside the kernel because you
can't compose it with the DMA API unless you also attach it to some
struct device, and where will the struct device come from?

We already talked about this on the "how to use PASID from the kernel"
thread.

If Robin just wants to use a stream ID from a kernel driver then that
API to make a PASID == RID seems like a better answer for kernel DMA
than aux domains is.

Jason

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 3/6] vfio: remove the unused mdev iommu hook
  2021-05-19 23:24                                 ` Jason Gunthorpe
@ 2021-05-20 14:13                                   ` Robin Murphy
  2021-05-20 14:34                                     ` Jason Gunthorpe
  0 siblings, 1 reply; 39+ messages in thread
From: Robin Murphy @ 2021-05-20 14:13 UTC (permalink / raw)
  To: Jason Gunthorpe, Tian, Kevin
  Cc: Joerg Roedel, kvm, Will Deacon, Kirti Wankhede, iommu,
	Alex Williamson, David Woodhouse, Christoph Hellwig,
	linux-arm-kernel

On 2021-05-20 00:24, Jason Gunthorpe wrote:
> On Wed, May 19, 2021 at 11:12:46PM +0000, Tian, Kevin wrote:
>>> From: Jason Gunthorpe <jgg@ziepe.ca>
>>> Sent: Thursday, May 20, 2021 2:07 AM
>>>
>>> On Wed, May 19, 2021 at 04:23:21PM +0100, Robin Murphy wrote:
>>>> On 2021-05-17 16:35, Joerg Roedel wrote:
>>>>> On Mon, May 17, 2021 at 10:35:00AM -0300, Jason Gunthorpe wrote:
>>>>>> Well, I'm sorry, but there is a huge other thread talking about the
>>>>>> IOASID design in great detail and why this is all needed. Jumping into
>>>>>> this thread without context and basically rejecting all the
>>>>>> conclusions that were reached over the last several weeks is really
>>>>>> not helpful - especially since your objection is not technical.
>>>>>>
>>>>>> I think you should wait for Intel to put together the /dev/ioasid uAPI
>>>>>> proposal and the example use cases it should address then you can give
>>>>>> feedback there, with proper context.
>>>>>
>>>>> Yes, I think the next step is that someone who read the whole thread
>>>>> writes up the conclusions and a rough /dev/ioasid API proposal, also
>>>>> mentioning the use-cases it addresses. Based on that we can discuss the
>>>>> implications this needs to have for IOMMU-API and code.
>>>>>
>>>>>   From the use-cases I know the mdev concept is just fine. But if there is
>>>>> a more generic one we can talk about it.
>>>>
>>>> Just to add another voice here, I have some colleagues working on drivers
>>>> where they want to use SMMU Substream IDs for a single hardware block
>>> to
>>>> operate on multiple iommu_domains managed entirely within the
>>>> kernel.
>>>
>>> If it is entirely within the kernel I'm confused how mdev gets
>>> involved? mdev is only for vfio which is userspace.

By "mdev-like" I mean it's very similar in shape to the general 
SIOV-style mediated device concept - i.e. a physical device with an 
awareness of operating on multiple contexts at once, using a Substream 
ID/PASID for each one - but instead of exposing control of the contexts 
to anyone else, they remain hidden behind the kernel driver which 
already has its own abstracted uAPI, so overall it ends up as more just 
internal housekeeping than any actual mediation. We were looking at the 
mdev code for inspiration, but directly using it was never the plan.

>> Just add some background. aux domain is used to support mdev but they
>> are not tied together.

[ yes, technically my comments are relevant to patch #4, but the 
discussion was here, so... :) ]

>> Literally aux domain just implies that there could be
>> multiple domains attached to a device then when one of them becomes
>> the primary all the remaining are deemed as auxiliary. From this angle it
>> doesn't matter whether the requirement of multiple domains come from
>> user or kernel.
> 
> You can't entirely use aux domain from inside the kernel because you
> can't compose it with the DMA API unless you also attach it to some
> struct device, and where will the struct device come from?

DMA mapping would still be done using the physical device - where this 
model diverges from mdev is that it doesn't need to fake up a struct 
device to represent each context since they aren't exposed to anyone 
else. Assume the driver already has some kind of token to represent each 
client process, so it just allocates an iommu_domain for a client 
context and does an iommu_aux_attach_dev() to hook it up to some PASID 
(which again nobody else ever sees). The driver simply needs to keep 
track of the domains and PASIDs - when a process submits some work, it 
can look up the relevant domain, iommu_map() the user pages to the right 
addresses, dma_map() them for coherency, then poke in the PASID as part 
of scheduling the work on the physical device.

> We already talked about this on the "how to use PASID from the kernel"
> thread.

Do you have a pointer to the right thread so I can catch up? It's not 
the easiest thing to search for on lore amongst all the other 
PASID-related business :(

> If Robin just wants to use a stream ID from a kernel driver then that
> API to make a PASID == RID seems like a better answer for kernel DMA
> than aux domains is.

No, that's not the model - the device has a single Stream ID (RID), and 
it wants multiple Substream IDs (PASIDs) hanging off that for distinct 
client contexts; it can still generate non-PASID traffic for stuff like 
loading its firmware (the regular iommu_domain might be 
explicitly-managed or might be automatic via iommu-dma - it doesn’t 
really matter in this context). Aux domains really were a perfect fit 
conceptually, even if the edges were a bit rough.

Now, much as I’d like a stable upstream solution, I can't argue based on 
this particular driver, since the PASID functionality is still in 
development, and there seems little likelihood of it being upstreamed 
either way (the driver belongs to a product team rather than the OSS 
group I'm part of; I'm just helping them with the SMMU angle). If 
designing something around aux domains is a dead-end then we (Arm) will 
probably just prototype our thing using downstream patches to the SMMU 
driver for now. However given the clear overlap with SIOV mdev in terms 
of implementation at the IOMMU API level and below, it seems a general 
enough use-case that I’m very keen not to lose sight of it in whatever 
replacement we (upstream) do come up with. FWIW my non-SVA view is that 
a PASID is merely an index into a set of iommu_domains, and in that 
context it doesn't even really matter *who* allocates them, only that 
the device driver and IOMMU driver are in sync :)

Thanks,
Robin.

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 3/6] vfio: remove the unused mdev iommu hook
  2021-05-20 14:13                                   ` Robin Murphy
@ 2021-05-20 14:34                                     ` Jason Gunthorpe
  2021-05-24 18:18                                       ` Robin Murphy
  0 siblings, 1 reply; 39+ messages in thread
From: Jason Gunthorpe @ 2021-05-20 14:34 UTC (permalink / raw)
  To: Robin Murphy
  Cc: Tian, Kevin, Joerg Roedel, kvm, Will Deacon, Kirti Wankhede,
	iommu, Alex Williamson, David Woodhouse, Christoph Hellwig,
	linux-arm-kernel

On Thu, May 20, 2021 at 03:13:55PM +0100, Robin Murphy wrote:

> By "mdev-like" I mean it's very similar in shape to the general SIOV-style
> mediated device concept - i.e. a physical device with an awareness of
> operating on multiple contexts at once, using a Substream ID/PASID for each
> one - but instead of exposing control of the contexts to anyone else, they
> remain hidden behind the kernel driver which already has its own abstracted
> uAPI, so overall it ends up as more just internal housekeeping than any
> actual mediation. We were looking at the mdev code for inspiration, but
> directly using it was never the plan.

Well:
 - Who maps memory into the IOASID (ie the specific sub stream id)?
 - What memory must be mapped?
 - Who triggers DMA to this memory?
 
> The driver simply needs to keep track of the domains and PASIDs -
> when a process submits some work, it can look up the relevant
> domain, iommu_map() the user pages to the right addresses, dma_map()
> them for coherency, then poke in the PASID as part of scheduling the
> work on the physical device.

If you are doing stuff like this then the /dev/ioasid is what you
actually want. The userprocess can create its own IOASID, program the
io page tables for that IOASID to point to pages as it wants and then
just hand over a fully instantiated io page table to the device
driver.

What you are describing is the literal use case of /dev/ioasid - a
clean seperation of managing the IOMMU related parts through
/dev/ioasid and the device driver itself is only concerned with
generating device DMA that has the proper PASID/substream tag.

The entire point is to not duplicate all the iommu code you are
describing having written into every driver that just wants an IOASID.

In particular, you are talking about having a substream capable device
and driver but your driver's uAPI is so limited it can't address the
full range of substream configurations:

 - A substream pointing at a SVA
 - A substream pointing a IO page table nested under another
 - A substream pointing at an IOMMU page table shared by many users

And more. Which is bad.

> > We already talked about this on the "how to use PASID from the kernel"
> > thread.
> 
> Do you have a pointer to the right thread so I can catch up? It's not the
> easiest thing to search for on lore amongst all the other PASID-related
> business :(

Somewhere in here:

http://lore.kernel.org/r/20210517143758.GP1002214@nvidia.com

> FWIW my non-SVA view is that a PASID is merely an index into a set of
> iommu_domains, and in that context it doesn't even really matter *who*
> allocates them, only that the device driver and IOMMU driver are in sync :)

Right, this is where /dev/ioasid is going.

However it gets worked out at the kAPI level in the iommu layer the
things you asked for are intended to be solved, and lots more.

Jason

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 3/6] vfio: remove the unused mdev iommu hook
  2021-05-20 14:34                                     ` Jason Gunthorpe
@ 2021-05-24 18:18                                       ` Robin Murphy
  2021-05-25  0:00                                         ` Jason Gunthorpe
  0 siblings, 1 reply; 39+ messages in thread
From: Robin Murphy @ 2021-05-24 18:18 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: Tian, Kevin, Joerg Roedel, kvm, Will Deacon, Kirti Wankhede,
	iommu, Alex Williamson, David Woodhouse, Christoph Hellwig,
	linux-arm-kernel

On 2021-05-20 15:34, Jason Gunthorpe wrote:
> On Thu, May 20, 2021 at 03:13:55PM +0100, Robin Murphy wrote:
> 
>> By "mdev-like" I mean it's very similar in shape to the general SIOV-style
>> mediated device concept - i.e. a physical device with an awareness of
>> operating on multiple contexts at once, using a Substream ID/PASID for each
>> one - but instead of exposing control of the contexts to anyone else, they
>> remain hidden behind the kernel driver which already has its own abstracted
>> uAPI, so overall it ends up as more just internal housekeeping than any
>> actual mediation. We were looking at the mdev code for inspiration, but
>> directly using it was never the plan.
> 
> Well:
>   - Who maps memory into the IOASID (ie the specific sub stream id)?

Sorry to nitpick, but I think it's important to get terminology right 
here to avoid unnecessary misunderstanding. You can't map memory into an 
address space ID; it's just a number. Ultimately that identifier ends up 
pointing at some actual address space, and most of the current work is 
focused on the case of that address space being provided by an mm where 
things are mapped implicitly by a userspace process; I care about the 
case of it being provided by an iommu_domain where things are mapped 
explicitly by a kernel driver. I would be extremely wary of creating 
some new third *address space* abstraction.

>   - What memory must be mapped?
>   - Who triggers DMA to this memory?

It's a pretty typical DMA flow, as far as I understand. Userspace 
allocates some buffers (in this case, via the kernel driver, but in 
general I'm not sure it makes much difference), puts data in the 
buffers, issues an ioctl to say "process this data", and polls for 
completion; the kernel driver makes sure the buffers are mapped in the 
device address space (at allocation time in this case, but in general I 
assume it could equally be done at request time for user pages), and 
deals with scheduling requests onto the hardware. I understand this 
interface is already deployed in a driver stack which supports a single 
client process at once; extending the internals to allow requests from 
multiple processes to run in parallel using Substream IDs for isolation 
is the future goal. The interface itself shouldn't change, only some 
internal arbitration details.

>> The driver simply needs to keep track of the domains and PASIDs -
>> when a process submits some work, it can look up the relevant
>> domain, iommu_map() the user pages to the right addresses, dma_map()
>> them for coherency, then poke in the PASID as part of scheduling the
>> work on the physical device.
> 
> If you are doing stuff like this then the /dev/ioasid is what you
> actually want. The userprocess can create its own IOASID, program the
> io page tables for that IOASID to point to pages as it wants and then
> just hand over a fully instantiated io page table to the device
> driver.

No. In our case, the device does not need to operate on userspace 
addresses, in fact quite the opposite. There may need to be additional 
things mapped into the device address space which are not, and should 
not be, visible to userspace. There are also some quite weird criteria 
for optimal address space layout which frankly are best left hidden 
inside the kernel driver. Said driver is already explicitly managing its 
own iommu_domain in the same manner as various DRM drivers and others, 
so growing that to multiple parallel domains really isn't a big leap. 
Moving any of this responsibility into userspace would be unwanted and 
unnecessary upheaval.

> What you are describing is the literal use case of /dev/ioasid - a
> clean seperation of managing the IOMMU related parts through
> /dev/ioasid and the device driver itself is only concerned with
> generating device DMA that has the proper PASID/substream tag.
> 
> The entire point is to not duplicate all the iommu code you are
> describing having written into every driver that just wants an IOASID.
> 
> In particular, you are talking about having a substream capable device
> and driver but your driver's uAPI is so limited it can't address the
> full range of substream configurations:
> 
>   - A substream pointing at a SVA
>   - A substream pointing a IO page table nested under another
>   - A substream pointing at an IOMMU page table shared by many users
> 
> And more. Which is bad.

None of which make much if any sense for the way this device and the 
rest of its software stack are designed to work, though. Anyway, the 
actual uAPI in question is essentially just chucking buffer fds about in 
a very abstract manner, so I don't see that it has any relevance here. 
We're talking about a kernel driver *internally* managing how it chooses 
to expose the buffers backing those fds to the hardware. SVA has no 
meaning in that context (there's nothing to share), and I don't even 
understand your second case, but attaching multiple SSIDs to a single 
domain is absolutely something which _could_ be done, there's just zero 
point in a single driver doing that privately when it could simply run 
the relevant jobs under the same SSID instead.

>>> We already talked about this on the "how to use PASID from the kernel"
>>> thread.
>>
>> Do you have a pointer to the right thread so I can catch up? It's not the
>> easiest thing to search for on lore amongst all the other PASID-related
>> business :(
> 
> Somewhere in here:
> 
> http://lore.kernel.org/r/20210517143758.GP1002214@nvidia.com

Thanks, along with our discussion here that kind of confirms my concern. 
Assuming IOASID can wrap up a whole encapsulated thing which is either 
SVA or IOMMU_DOMAIN_DMA is too much of an overabstraction. There 
definitely *are* uses for IOMMU_DOMAIN_DMA - say you want to put some 
SIOV ADIs to work for the host kernel using their regular 
non-IOMMU-aware driver - but there will also be cases for 
IOMMU_DOMAIN_UNMANAGED, although I do mostly expect those to be SoC 
devices whose drivers are already IOMMU-aware and just want to be so at 
a finer-grained level, not PCI devices. Even IOMMU_DOMAIN_PASSTHROUGH 
for IOASIDs _could_ be doable if a sufficiently compelling reason came 
along. I agree that SVA on init_mm is pretty bonkers, but don't get too 
hung up on the DMA API angle which is really orthogonal - passthrough 
domains with dma-direct ops have been working fine for years.

>> FWIW my non-SVA view is that a PASID is merely an index into a set of
>> iommu_domains, and in that context it doesn't even really matter *who*
>> allocates them, only that the device driver and IOMMU driver are in sync :)
> 
> Right, this is where /dev/ioasid is going.
> 
> However it gets worked out at the kAPI level in the iommu layer the
> things you asked for are intended to be solved, and lots more.

Great! It feels like one of the major things will be that, at least 
without major surgery to the DMA API, most of the use-cases will likely 
still need a struct device wrapped around the IOASID. I think the 
particular one I want to solve is actually the odd one out in that it 
doesn't really care, and could be made to work either way.

Thanks,
Robin.

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 3/6] vfio: remove the unused mdev iommu hook
  2021-05-24 18:18                                       ` Robin Murphy
@ 2021-05-25  0:00                                         ` Jason Gunthorpe
  0 siblings, 0 replies; 39+ messages in thread
From: Jason Gunthorpe @ 2021-05-25  0:00 UTC (permalink / raw)
  To: Robin Murphy
  Cc: Tian, Kevin, Joerg Roedel, kvm, Will Deacon, Kirti Wankhede,
	iommu, Alex Williamson, David Woodhouse, Christoph Hellwig,
	linux-arm-kernel

On Mon, May 24, 2021 at 07:18:33PM +0100, Robin Murphy wrote:
> On 2021-05-20 15:34, Jason Gunthorpe wrote:
> > On Thu, May 20, 2021 at 03:13:55PM +0100, Robin Murphy wrote:
> > 
> > > By "mdev-like" I mean it's very similar in shape to the general SIOV-style
> > > mediated device concept - i.e. a physical device with an awareness of
> > > operating on multiple contexts at once, using a Substream ID/PASID for each
> > > one - but instead of exposing control of the contexts to anyone else, they
> > > remain hidden behind the kernel driver which already has its own abstracted
> > > uAPI, so overall it ends up as more just internal housekeeping than any
> > > actual mediation. We were looking at the mdev code for inspiration, but
> > > directly using it was never the plan.
> > 
> > Well:
> >   - Who maps memory into the IOASID (ie the specific sub stream id)?
> 
> Sorry to nitpick, but I think it's important to get terminology right here
> to avoid unnecessary misunderstanding. You can't map memory into an address
> space ID; it's just a number. 

Ah sorry, the naming in the other thread for the uAPI seems to trended
into the IOASID == what the kernel calls domain and what the kernel
calls ioasid (the number) is just some subproperty.

Nobody has come up with a better name to refer to an abstract io page
table object. Maybe the RFC stage will elicit a better idea.

> implicitly by a userspace process; I care about the case of it being
> provided by an iommu_domain where things are mapped explicitly by a
> kernel driver. I would be extremely wary of creating some new third
> *address space* abstraction.

Well we have lots, and every time you add new uAPI to kernel drivers
to program an IOMMU domain you are making more.

Frankly, the idea of having a PASID/substream ID that is entirely
programmed by the kernel feels like using the thing wrong.. Why do
this? The primary point of these things is to create a security
boundary, but if the kernel already controls everything there isn't a
security boundary to be had.

What is the issue with just jamming everything into the the main IO
page table for the device?
 
> >   - What memory must be mapped?
> >   - Who triggers DMA to this memory?
> 
> It's a pretty typical DMA flow, as far as I understand. Userspace allocates
> some buffers (in this case, via the kernel driver, but in general I'm not
> sure it makes much difference), puts data in the buffers, issues an ioctl to
> say "process this data", and polls for completion; the kernel driver makes
> sure the buffers are mapped in the device address space (at allocation time
> in this case, but in general I assume it could equally be done at request
> time for user pages), and deals with scheduling requests onto the hardware.

Sounds like a GPU :P

> I understand this interface is already deployed in a driver stack which
> supports a single client process at once; extending the internals to allow
> requests from multiple processes to run in parallel using Substream IDs for
> isolation is the future goal. The interface itself shouldn't change, only
> some internal arbitration details.

Using substreams for isolation makes sense, but here isolation should
really mean everything. Stuffing a mix of kernel private and
application data into the same isolation security box sounds like a
recipe for CVEs to me...

> No. In our case, the device does not need to operate on userspace addresses,
> in fact quite the opposite. There may need to be additional things mapped
> into the device address space which are not, and should not be, visible to
> userspace. There are also some quite weird criteria for optimal address
> space layout which frankly are best left hidden inside the kernel driver.
> Said driver is already explicitly managing its own iommu_domain in the same
> manner as various DRM drivers and others, so growing that to multiple
> parallel domains really isn't a big leap. Moving any of this responsibility
> into userspace would be unwanted and unnecessary upheaval.

This is all out of tree right?
 
> (there's nothing to share), and I don't even understand your second case,
> but attaching multiple SSIDs to a single domain is absolutely something
> which _could_ be done, there's just zero point in a single driver doing that
> privately when it could simply run the relevant jobs under the same SSID
> instead.

It makes sense in the virtualization context where often a goal is to
just map the guest's physical address space into the IOMMU and share
it to all DMA devices connected to the VM.

Keep in mind most of the motivation here is to do something more
robust for the virtualization story.

> > http://lore.kernel.org/r/20210517143758.GP1002214@nvidia.com
> 
> Thanks, along with our discussion here that kind of confirms my concern.
> Assuming IOASID can wrap up a whole encapsulated thing which is either SVA
> or IOMMU_DOMAIN_DMA is too much of an overabstraction.

I think it is more than just those two simple things. There are lots
of platform specific challenges to creating vIOMMUs, especially with
PASID/etc that needs to be addressed too.

> There definitely *are* uses for IOMMU_DOMAIN_DMA - say you want to
> put some SIOV ADIs to work for the host kernel using their regular
> non-IOMMU-aware driver - but there will also be cases for

Er, I don't think SIOV's work like that. Nobody is going to create a
SIOV using a completely unaware driver - that only works in
virtualization and relies on hypervisor software to build up the
fiction of a real device.

In-kernel SIOV usages are going to have to either continue to use the
real device's IOMMU page tables or to convince the DMA API to give it
another PASID/SSID/etc.

At least this is how I'm seeing real SIOV device drivers evolving
right now. We already have some real examples on this in mlx5 and
today it uses the parent device's IOMMU page tables.

> IOMMU_DOMAIN_UNMANAGED, although I do mostly expect those to be SoC
> devices whose drivers are already IOMMU-aware and just want to be so
> at a finer-grained level, not PCI devices. Even
> IOMMU_DOMAIN_PASSTHROUGH for IOASIDs _could_ be doable if a
> sufficiently compelling reason came along. I agree that SVA on
> init_mm is pretty bonkers, but don't get too hung up on the DMA API
> angle which is really orthogonal - passthrough domains with
> dma-direct ops have been working fine for years.

I've heard the DMA API maintainers refer to that "working fine" as
hacky crap, so <shrug>.

A formalization of this stuff should not be excluding the DMA API.

> Great! It feels like one of the major things will be that, at least without
> major surgery to the DMA API,

So long as the DMA is all orchestrated by userspace to userspace
buffers, the DMA API doesn't get involved. It is only the thing that
in-kernel users should use.

IMHO if your use case is to do DMA to a security domain then it should
all go through the DMA API, including the mapping of memory into the
IOMMU page tables for that domain. Having a kernel driver bypassing
the whole thing by directly using the domain directly seems quite
rough to me.

A drivers/iommu API call to take an arbitary struct device and bind
the DMA API for the struct device to a newly created PASID/SSID of a
real device seems like a reasonable direction to me for in-kernel use.

Especially if the struct device doesn't need to be device_add()'d.

Jason

^ permalink raw reply	[flat|nested] 39+ messages in thread

* RE: [PATCH 3/6] vfio: remove the unused mdev iommu hook
  2021-05-17 15:35                         ` Joerg Roedel
  2021-05-19 15:23                           ` Robin Murphy
@ 2021-06-30  9:08                           ` Tian, Kevin
  2021-07-22 13:34                             ` Christoph Hellwig
  2021-07-22  6:02                           ` Tian, Kevin
  2 siblings, 1 reply; 39+ messages in thread
From: Tian, Kevin @ 2021-06-30  9:08 UTC (permalink / raw)
  To: Joerg Roedel, Jason Gunthorpe
  Cc: Christoph Hellwig, Alex Williamson, David Woodhouse, Lu Baolu,
	Will Deacon, Kirti Wankhede, linux-arm-kernel, iommu, kvm,
	Robin Murphy

> From: Joerg Roedel <joro@8bytes.org>
> Sent: Monday, May 17, 2021 11:35 PM
> 
> On Mon, May 17, 2021 at 10:35:00AM -0300, Jason Gunthorpe wrote:
> > Well, I'm sorry, but there is a huge other thread talking about the
> > IOASID design in great detail and why this is all needed. Jumping into
> > this thread without context and basically rejecting all the
> > conclusions that were reached over the last several weeks is really
> > not helpful - especially since your objection is not technical.
> >
> > I think you should wait for Intel to put together the /dev/ioasid uAPI
> > proposal and the example use cases it should address then you can give
> > feedback there, with proper context.
> 
> Yes, I think the next step is that someone who read the whole thread
> writes up the conclusions and a rough /dev/ioasid API proposal, also
> mentioning the use-cases it addresses. Based on that we can discuss the
> implications this needs to have for IOMMU-API and code.
> 
> From the use-cases I know the mdev concept is just fine. But if there is
> a more generic one we can talk about it.
> 

Although /dev/iommu v2 proposal is still in progress, I think there are 
enough background gathered in v1 to resume this discussion now.

In a nutshell /dev/iommu requires two sets of services from the iommu
layer:

-   for an kernel-managed I/O page table via map/unmap;
-   for an user-managed I/O page table via bind/invalidate and nested on 
    a kernel-managed parent I/O page table;

Each I/O page table could be attached by multiple devices. /dev/iommu
maintains device specific routing information (RID, or RID+PASID) for
where to install the I/O page table in the IOMMU for each attached device.

Kernel-managed page table is represented by iommu domain. Existing 
IOMMU-API allows /dev/iommu to attach a RID device to iommu domain. 
A new interface is required, e.g. iommu_attach_device_pasid(domain, dev, 
pasid), to cover (RID+PASID) attaching. Once attaching succeeds, no change
to following map/unmap which are domain-wide thus applied to both RID 
and RID+PASID. In case of dev_iotlb invalidation is required, the iommu 
driver is responsible for handling it for every attached RID or RID+PASID
if ats is enabled.

to take one example, the parent (RID1) has three work queues. WQ1 is 
for parent's own DMA-API usage, with WQ2 (PASID-x) assigned to VM1 
and WQ3 (PASID-y) assigned to VM2. VM2 is also assigned with another 
device (RID2). In this case there are three kernel-managed I/O page 
tables (IOVA in kernel, GPA for VM1 and GPA for VM2), thus RID1 is 
attached to three domains:

RID1 --- domain1 (default, IOVA)
     |      |
     |      |-- [RID1]
     |
     |-- domain2 (vm1, GPA)
     |      |
     |      |-- [RID1, PASID-x]
     |
     |-- domain3 (vm2, GPA)
     |      |
     |      |-- [RID1, PASID-y]
     |      |
     |      |-- [RID2]

The iommu layer should maintain above attaching status per device and per
iommu domain. There is no mdev/subdev concept in the iommu layer. It's
just about RID or PASID.

User-manage I/O page table might be represented by a new object which 
describes:

    - routing information (RID or RID+PASID)
    - pointer to iommu_domain of the parent I/O page table (inherit the
      domain ID in iotlb due to nesting)
    - address of the I/O page table

There might be chance to share the structure with native SVA which also
has page table managed outside of iommu subsystem. But we can leave 
it and figure out until coding.

And a new set of IOMMU-API:

    - iommu_{un}bind_pgtable(domain, dev, addr);
    - iommu_{un}bind_pgtable_pasid(domain, dev, addr, pasid);
    - iommu_cache_invalidate(domain, dev, invalid_info);
    - and APIs for registering fault handler and completing faults;
(here 'domain' is the one representing the parent I/O page table)

Because nesting essentially creates a new reference to the parent I/O 
page table, iommu_bind_pgtable_pasid() implicitly calls __iommu_attach_
device_pasid() to setup the connection between the parent domain and 
the new [RID,PASID]. It's not necessary to do so for iommu_bind_pgtable() 
since the RID is already attached when the parent I/O page table is created.

In consequence the example topology is updated as below, with guest
SVA enabled in both vm1 and vm2:

RID1 --- domain1 (default, IOVA)
     |      |
     |      |-- [RID1]
     |
     |-- domain2 (vm1, GPA)
     |      |
     |      |-- [RID1, PASID-x]
     |      |-- [RID1, PASID-a] // nested for vm1 process1
     |      |-- [RID1, PASID-b] // nested for vm1 process2
     |
     |-- domain3 (vm2, GPA)
     |      |
     |      |-- [RID1, PASID-y]
     |      |-- [RID1, PASID-c] // nested for vm2 process1
     |      |
     |      |-- [RID2]
     |      |-- [RID2, PASID-a] // nested for vm2 process2

Thoughts?

Thanks
Kevin

^ permalink raw reply	[flat|nested] 39+ messages in thread

* RE: [PATCH 3/6] vfio: remove the unused mdev iommu hook
  2021-05-17 15:35                         ` Joerg Roedel
  2021-05-19 15:23                           ` Robin Murphy
  2021-06-30  9:08                           ` Tian, Kevin
@ 2021-07-22  6:02                           ` Tian, Kevin
  2 siblings, 0 replies; 39+ messages in thread
From: Tian, Kevin @ 2021-07-22  6:02 UTC (permalink / raw)
  To: Joerg Roedel, Jason Gunthorpe
  Cc: Christoph Hellwig, Alex Williamson, David Woodhouse, Lu Baolu,
	Will Deacon, Kirti Wankhede, linux-arm-kernel, iommu, kvm,
	Robin Murphy

A gentle ping...

> From: Tian, Kevin
> Sent: Wednesday, June 30, 2021 5:08 PM
> 
> > From: Joerg Roedel <joro@8bytes.org>
> > Sent: Monday, May 17, 2021 11:35 PM
> >
> > On Mon, May 17, 2021 at 10:35:00AM -0300, Jason Gunthorpe wrote:
> > > Well, I'm sorry, but there is a huge other thread talking about the
> > > IOASID design in great detail and why this is all needed. Jumping into
> > > this thread without context and basically rejecting all the
> > > conclusions that were reached over the last several weeks is really
> > > not helpful - especially since your objection is not technical.
> > >
> > > I think you should wait for Intel to put together the /dev/ioasid uAPI
> > > proposal and the example use cases it should address then you can give
> > > feedback there, with proper context.
> >
> > Yes, I think the next step is that someone who read the whole thread
> > writes up the conclusions and a rough /dev/ioasid API proposal, also
> > mentioning the use-cases it addresses. Based on that we can discuss the
> > implications this needs to have for IOMMU-API and code.
> >
> > From the use-cases I know the mdev concept is just fine. But if there is
> > a more generic one we can talk about it.
> >
> 
> Although /dev/iommu v2 proposal is still in progress, I think there are
> enough background gathered in v1 to resume this discussion now.
> 
> In a nutshell /dev/iommu requires two sets of services from the iommu
> layer:
> 
> -   for an kernel-managed I/O page table via map/unmap;
> -   for an user-managed I/O page table via bind/invalidate and nested on
>     a kernel-managed parent I/O page table;
> 
> Each I/O page table could be attached by multiple devices. /dev/iommu
> maintains device specific routing information (RID, or RID+PASID) for
> where to install the I/O page table in the IOMMU for each attached device.
> 
> Kernel-managed page table is represented by iommu domain. Existing
> IOMMU-API allows /dev/iommu to attach a RID device to iommu domain.
> A new interface is required, e.g. iommu_attach_device_pasid(domain, dev,
> pasid), to cover (RID+PASID) attaching. Once attaching succeeds, no change
> to following map/unmap which are domain-wide thus applied to both RID
> and RID+PASID. In case of dev_iotlb invalidation is required, the iommu
> driver is responsible for handling it for every attached RID or RID+PASID
> if ats is enabled.
> 
> to take one example, the parent (RID1) has three work queues. WQ1 is
> for parent's own DMA-API usage, with WQ2 (PASID-x) assigned to VM1
> and WQ3 (PASID-y) assigned to VM2. VM2 is also assigned with another
> device (RID2). In this case there are three kernel-managed I/O page
> tables (IOVA in kernel, GPA for VM1 and GPA for VM2), thus RID1 is
> attached to three domains:
> 
> RID1 --- domain1 (default, IOVA)
>      |      |
>      |      |-- [RID1]
>      |
>      |-- domain2 (vm1, GPA)
>      |      |
>      |      |-- [RID1, PASID-x]
>      |
>      |-- domain3 (vm2, GPA)
>      |      |
>      |      |-- [RID1, PASID-y]
>      |      |
>      |      |-- [RID2]
> 
> The iommu layer should maintain above attaching status per device and per
> iommu domain. There is no mdev/subdev concept in the iommu layer. It's
> just about RID or PASID.
> 
> User-manage I/O page table might be represented by a new object which
> describes:
> 
>     - routing information (RID or RID+PASID)
>     - pointer to iommu_domain of the parent I/O page table (inherit the
>       domain ID in iotlb due to nesting)
>     - address of the I/O page table
> 
> There might be chance to share the structure with native SVA which also
> has page table managed outside of iommu subsystem. But we can leave
> it and figure out until coding.
> 
> And a new set of IOMMU-API:
> 
>     - iommu_{un}bind_pgtable(domain, dev, addr);
>     - iommu_{un}bind_pgtable_pasid(domain, dev, addr, pasid);
>     - iommu_cache_invalidate(domain, dev, invalid_info);
>     - and APIs for registering fault handler and completing faults;
> (here 'domain' is the one representing the parent I/O page table)
> 
> Because nesting essentially creates a new reference to the parent I/O
> page table, iommu_bind_pgtable_pasid() implicitly calls __iommu_attach_
> device_pasid() to setup the connection between the parent domain and
> the new [RID,PASID]. It's not necessary to do so for iommu_bind_pgtable()
> since the RID is already attached when the parent I/O page table is created.
> 
> In consequence the example topology is updated as below, with guest
> SVA enabled in both vm1 and vm2:
> 
> RID1 --- domain1 (default, IOVA)
>      |      |
>      |      |-- [RID1]
>      |
>      |-- domain2 (vm1, GPA)
>      |      |
>      |      |-- [RID1, PASID-x]
>      |      |-- [RID1, PASID-a] // nested for vm1 process1
>      |      |-- [RID1, PASID-b] // nested for vm1 process2
>      |
>      |-- domain3 (vm2, GPA)
>      |      |
>      |      |-- [RID1, PASID-y]
>      |      |-- [RID1, PASID-c] // nested for vm2 process1
>      |      |
>      |      |-- [RID2]
>      |      |-- [RID2, PASID-a] // nested for vm2 process2
> 
> Thoughts?
> 
> Thanks
> Kevin

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 3/6] vfio: remove the unused mdev iommu hook
  2021-06-30  9:08                           ` Tian, Kevin
@ 2021-07-22 13:34                             ` Christoph Hellwig
  2021-07-23  5:36                               ` Tian, Kevin
  0 siblings, 1 reply; 39+ messages in thread
From: Christoph Hellwig @ 2021-07-22 13:34 UTC (permalink / raw)
  To: Tian, Kevin
  Cc: Joerg Roedel, Jason Gunthorpe, Christoph Hellwig,
	Alex Williamson, David Woodhouse, Lu Baolu, Will Deacon,
	Kirti Wankhede, linux-arm-kernel, iommu, kvm, Robin Murphy

On Wed, Jun 30, 2021 at 09:08:19AM +0000, Tian, Kevin wrote:
> The iommu layer should maintain above attaching status per device and per
> iommu domain. There is no mdev/subdev concept in the iommu layer. It's
> just about RID or PASID.

Yes, I think that makes sense.

> And a new set of IOMMU-API:
> 
>     - iommu_{un}bind_pgtable(domain, dev, addr);
>     - iommu_{un}bind_pgtable_pasid(domain, dev, addr, pasid);
>     - iommu_cache_invalidate(domain, dev, invalid_info);

What caches is this supposed to "invalidate"?

^ permalink raw reply	[flat|nested] 39+ messages in thread

* RE: [PATCH 3/6] vfio: remove the unused mdev iommu hook
  2021-07-22 13:34                             ` Christoph Hellwig
@ 2021-07-23  5:36                               ` Tian, Kevin
  2021-07-23  5:41                                 ` Christoph Hellwig
  0 siblings, 1 reply; 39+ messages in thread
From: Tian, Kevin @ 2021-07-23  5:36 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Joerg Roedel, Jason Gunthorpe, Alex Williamson, David Woodhouse,
	Lu Baolu, Will Deacon, Kirti Wankhede, linux-arm-kernel, iommu,
	kvm, Robin Murphy

> From: Christoph Hellwig <hch@lst.de>
> Sent: Thursday, July 22, 2021 9:35 PM
> 
> On Wed, Jun 30, 2021 at 09:08:19AM +0000, Tian, Kevin wrote:
> > The iommu layer should maintain above attaching status per device and
> per
> > iommu domain. There is no mdev/subdev concept in the iommu layer. It's
> > just about RID or PASID.
> 
> Yes, I think that makes sense.
> 
> > And a new set of IOMMU-API:
> >
> >     - iommu_{un}bind_pgtable(domain, dev, addr);
> >     - iommu_{un}bind_pgtable_pasid(domain, dev, addr, pasid);
> >     - iommu_cache_invalidate(domain, dev, invalid_info);
> 
> What caches is this supposed to "invalidate"?

pasid cache, iotlb or dev_iotlb entries that are related to the bound 
pgtable. the actual affected cache type and granularity (device-wide,
pasid-wide, selected addr-range) are specified by the caller.

Thanks
Kevin

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 3/6] vfio: remove the unused mdev iommu hook
  2021-07-23  5:36                               ` Tian, Kevin
@ 2021-07-23  5:41                                 ` Christoph Hellwig
  2021-07-23  5:44                                   ` Tian, Kevin
  0 siblings, 1 reply; 39+ messages in thread
From: Christoph Hellwig @ 2021-07-23  5:41 UTC (permalink / raw)
  To: Tian, Kevin
  Cc: Christoph Hellwig, Joerg Roedel, Jason Gunthorpe,
	Alex Williamson, David Woodhouse, Lu Baolu, Will Deacon,
	Kirti Wankhede, linux-arm-kernel, iommu, kvm, Robin Murphy

On Fri, Jul 23, 2021 at 05:36:17AM +0000, Tian, Kevin wrote:
> > > And a new set of IOMMU-API:
> > >
> > >     - iommu_{un}bind_pgtable(domain, dev, addr);
> > >     - iommu_{un}bind_pgtable_pasid(domain, dev, addr, pasid);
> > >     - iommu_cache_invalidate(domain, dev, invalid_info);
> > 
> > What caches is this supposed to "invalidate"?
> 
> pasid cache, iotlb or dev_iotlb entries that are related to the bound 
> pgtable. the actual affected cache type and granularity (device-wide,
> pasid-wide, selected addr-range) are specified by the caller.

Maybe call it pgtable_invalidate or similar?  To avoid confusing it
with the CPUs dcache.

^ permalink raw reply	[flat|nested] 39+ messages in thread

* RE: [PATCH 3/6] vfio: remove the unused mdev iommu hook
  2021-07-23  5:41                                 ` Christoph Hellwig
@ 2021-07-23  5:44                                   ` Tian, Kevin
  0 siblings, 0 replies; 39+ messages in thread
From: Tian, Kevin @ 2021-07-23  5:44 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Joerg Roedel, Jason Gunthorpe, Alex Williamson, David Woodhouse,
	Lu Baolu, Will Deacon, Kirti Wankhede, linux-arm-kernel, iommu,
	kvm, Robin Murphy

> From: Christoph Hellwig <hch@lst.de>
> Sent: Friday, July 23, 2021 1:41 PM
> 
> On Fri, Jul 23, 2021 at 05:36:17AM +0000, Tian, Kevin wrote:
> > > > And a new set of IOMMU-API:
> > > >
> > > >     - iommu_{un}bind_pgtable(domain, dev, addr);
> > > >     - iommu_{un}bind_pgtable_pasid(domain, dev, addr, pasid);
> > > >     - iommu_cache_invalidate(domain, dev, invalid_info);
> > >
> > > What caches is this supposed to "invalidate"?
> >
> > pasid cache, iotlb or dev_iotlb entries that are related to the bound
> > pgtable. the actual affected cache type and granularity (device-wide,
> > pasid-wide, selected addr-range) are specified by the caller.
> 
> Maybe call it pgtable_invalidate or similar?  To avoid confusing it
> with the CPUs dcache.

sure. btw above are just placeholders. We can clear them up when
working on the actual implementation. 😊

Thanks
Kevin

^ permalink raw reply	[flat|nested] 39+ messages in thread

end of thread, other threads:[~2021-07-23  5:44 UTC | newest]

Thread overview: 39+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-05-10  6:53 more iommu dead code removal Christoph Hellwig
2021-05-10  6:54 ` [PATCH 1/6] iommu: remove the unused dev_has_feat method Christoph Hellwig
2021-05-10  6:54 ` [PATCH 2/6] iommu: remove the unused iommu_aux_get_pasid interface Christoph Hellwig
2021-05-10  6:54 ` [PATCH 3/6] vfio: remove the unused mdev iommu hook Christoph Hellwig
2021-05-10 15:54   ` Jason Gunthorpe
2021-05-13  3:28     ` Tian, Kevin
2021-05-13 12:00       ` Jason Gunthorpe
2021-05-14  6:27         ` Tian, Kevin
2021-05-14  6:54         ` Tian, Kevin
2021-05-14 12:19           ` Jason Gunthorpe
2021-05-14 12:58             ` Tian, Kevin
2021-05-14 13:31               ` Jason Gunthorpe
2021-05-17 12:22                 ` Joerg Roedel
2021-05-17 12:30                   ` Jason Gunthorpe
2021-05-17 12:53                     ` Joerg Roedel
2021-05-17 13:35                       ` Jason Gunthorpe
2021-05-17 15:35                         ` Joerg Roedel
2021-05-19 15:23                           ` Robin Murphy
2021-05-19 18:06                             ` Jason Gunthorpe
2021-05-19 23:12                               ` Tian, Kevin
2021-05-19 23:24                                 ` Jason Gunthorpe
2021-05-20 14:13                                   ` Robin Murphy
2021-05-20 14:34                                     ` Jason Gunthorpe
2021-05-24 18:18                                       ` Robin Murphy
2021-05-25  0:00                                         ` Jason Gunthorpe
2021-06-30  9:08                           ` Tian, Kevin
2021-07-22 13:34                             ` Christoph Hellwig
2021-07-23  5:36                               ` Tian, Kevin
2021-07-23  5:41                                 ` Christoph Hellwig
2021-07-23  5:44                                   ` Tian, Kevin
2021-07-22  6:02                           ` Tian, Kevin
2021-05-14 13:17         ` Tian, Kevin
2021-05-14 13:39           ` Jason Gunthorpe
2021-05-14 14:28             ` Tian, Kevin
2021-05-14 14:44               ` Jason Gunthorpe
2021-05-10  6:54 ` [PATCH 4/6] iommu: remove iommu_aux_{attach,detach}_device Christoph Hellwig
2021-05-10  6:54 ` [PATCH 5/6] iommu: remove IOMMU_DEV_FEAT_AUX Christoph Hellwig
2021-05-10  6:54 ` [PATCH 6/6] iommu: remove iommu_dev_feature_enabled Christoph Hellwig
2021-05-10 11:54 ` more iommu dead code removal Jason Gunthorpe

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).