All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] vfio: Support VFIO_NOIOMMU with iommufd
@ 2023-01-09 14:22 Jason Gunthorpe
  2023-01-09 23:34 ` Alex Williamson
  2023-01-10  6:20 ` Tian, Kevin
  0 siblings, 2 replies; 5+ messages in thread
From: Jason Gunthorpe @ 2023-01-09 14:22 UTC (permalink / raw)
  To: Alex Williamson, Cornelia Huck, iommu, Kevin Tian, kvm, Yi Liu

Add a small amount of emulation to vfio_compat to accept the SET_IOMMU
to VFIO_NOIOMMU_IOMMU and have vfio just ignore iommufd if it is working
on a no-iommu enabled device.

Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/iommu/iommufd/Kconfig       |  2 +-
 drivers/iommu/iommufd/vfio_compat.c | 46 ++++++++++++++++++++++++-----
 drivers/vfio/group.c                | 13 ++++----
 drivers/vfio/iommufd.c              | 21 ++++++++++++-
 include/linux/iommufd.h             |  6 ++--
 5 files changed, 70 insertions(+), 18 deletions(-)

This needs a testing confirmation with dpdk to go forward, thanks

diff --git a/drivers/iommu/iommufd/Kconfig b/drivers/iommu/iommufd/Kconfig
index 8306616b6d8198..ada693ea51a78e 100644
--- a/drivers/iommu/iommufd/Kconfig
+++ b/drivers/iommu/iommufd/Kconfig
@@ -23,7 +23,7 @@ config IOMMUFD_VFIO_CONTAINER
 	  removed.
 
 	  IOMMUFD VFIO container emulation is known to lack certain features
-	  of the native VFIO container, such as no-IOMMU support, peer-to-peer
+	  of the native VFIO container, such as peer-to-peer
 	  DMA mapping, PPC IOMMU support, as well as other potentially
 	  undiscovered gaps.  This option is currently intended for the
 	  purpose of testing IOMMUFD with unmodified userspace supporting VFIO
diff --git a/drivers/iommu/iommufd/vfio_compat.c b/drivers/iommu/iommufd/vfio_compat.c
index 3ceca0e8311c39..6c8e5dc1c221f4 100644
--- a/drivers/iommu/iommufd/vfio_compat.c
+++ b/drivers/iommu/iommufd/vfio_compat.c
@@ -26,16 +26,35 @@ static struct iommufd_ioas *get_compat_ioas(struct iommufd_ctx *ictx)
 }
 
 /**
- * iommufd_vfio_compat_ioas_id - Return the IOAS ID that vfio should use
+ * iommufd_vfio_compat_ioas_get_id - Ensure a comat IOAS exists
+ * @ictx: Context to operate on
+ *
+ * Return the ID of the current compatibility ioas. The ID can be passed into
+ * other functions that take an ioas_id.
+ */
+int iommufd_vfio_compat_ioas_get_id(struct iommufd_ctx *ictx, u32 *out_ioas_id)
+{
+	struct iommufd_ioas *ioas;
+
+	ioas = get_compat_ioas(ictx);
+	if (IS_ERR(ioas))
+		return PTR_ERR(ioas);
+	*out_ioas_id = ioas->obj.id;
+	iommufd_put_object(&ioas->obj);
+	return 0;
+}
+EXPORT_SYMBOL_NS_GPL(iommufd_vfio_compat_ioas_get_id, IOMMUFD_VFIO);
+
+/**
+ * iommufd_vfio_compat_ioas_create_id - Return the IOAS ID that vfio should use
  * @ictx: Context to operate on
- * @out_ioas_id: The ioas_id the caller should use
  *
  * The compatibility IOAS is the IOAS that the vfio compatibility ioctls operate
  * on since they do not have an IOAS ID input in their ABI. Only attaching a
- * group should cause a default creation of the internal ioas, this returns the
- * existing ioas if it has already been assigned somehow.
+ * group should cause a default creation of the internal ioas, this does nothing
+ * if an existing ioas has already been assigned somehow.
  */
-int iommufd_vfio_compat_ioas_id(struct iommufd_ctx *ictx, u32 *out_ioas_id)
+int iommufd_vfio_compat_ioas_create_id(struct iommufd_ctx *ictx)
 {
 	struct iommufd_ioas *ioas = NULL;
 	struct iommufd_ioas *out_ioas;
@@ -53,7 +72,6 @@ int iommufd_vfio_compat_ioas_id(struct iommufd_ctx *ictx, u32 *out_ioas_id)
 	}
 	xa_unlock(&ictx->objects);
 
-	*out_ioas_id = out_ioas->obj.id;
 	if (out_ioas != ioas) {
 		iommufd_put_object(&out_ioas->obj);
 		iommufd_object_abort(ictx, &ioas->obj);
@@ -68,7 +86,7 @@ int iommufd_vfio_compat_ioas_id(struct iommufd_ctx *ictx, u32 *out_ioas_id)
 	iommufd_object_finalize(ictx, &ioas->obj);
 	return 0;
 }
-EXPORT_SYMBOL_NS_GPL(iommufd_vfio_compat_ioas_id, IOMMUFD_VFIO);
+EXPORT_SYMBOL_NS_GPL(iommufd_vfio_compat_ioas_create_id, IOMMUFD_VFIO);
 
 int iommufd_vfio_ioas(struct iommufd_ucmd *ucmd)
 {
@@ -235,6 +253,9 @@ static int iommufd_vfio_check_extension(struct iommufd_ctx *ictx,
 	case VFIO_UNMAP_ALL:
 		return 1;
 
+	case VFIO_NOIOMMU_IOMMU:
+	return IS_ENABLED(CONFIG_VFIO_NOIOMMU);
+
 	case VFIO_DMA_CC_IOMMU:
 		return iommufd_vfio_cc_iommu(ictx);
 
@@ -264,6 +285,17 @@ static int iommufd_vfio_set_iommu(struct iommufd_ctx *ictx, unsigned long type)
 	struct iommufd_ioas *ioas = NULL;
 	int rc = 0;
 
+	/*
+	 * Emulation for NOIMMU is imperfect in that VFIO blocks almost all
+	 * other ioctls. We let them keep working but they mostly fail since no
+	 * IOAS should exist.
+	 */
+	if (IS_ENABLED(CONFIG_VFIO_NOIOMMU) && type == VFIO_NOIOMMU_IOMMU) {
+		if (!capable(CAP_SYS_RAWIO))
+			return -EPERM;
+		return 0;
+	}
+
 	if (type != VFIO_TYPE1_IOMMU && type != VFIO_TYPE1v2_IOMMU)
 		return -EINVAL;
 
diff --git a/drivers/vfio/group.c b/drivers/vfio/group.c
index bb24b2f0271e03..0f2a483a1f3bdb 100644
--- a/drivers/vfio/group.c
+++ b/drivers/vfio/group.c
@@ -133,12 +133,13 @@ static int vfio_group_ioctl_set_container(struct vfio_group *group,
 
 	iommufd = iommufd_ctx_from_file(f.file);
 	if (!IS_ERR(iommufd)) {
-		u32 ioas_id;
-
-		ret = iommufd_vfio_compat_ioas_id(iommufd, &ioas_id);
-		if (ret) {
-			iommufd_ctx_put(group->iommufd);
-			goto out_unlock;
+		if (!IS_ENABLED(CONFIG_VFIO_NO_IOMMU) ||
+		    group->type != VFIO_NO_IOMMU) {
+			ret = iommufd_vfio_compat_ioas_create_id(iommufd);
+			if (ret) {
+				iommufd_ctx_put(group->iommufd);
+				goto out_unlock;
+			}
 		}
 
 		group->iommufd = iommufd;
diff --git a/drivers/vfio/iommufd.c b/drivers/vfio/iommufd.c
index 4f82a6fa7c6c7f..da50feb24b6e1d 100644
--- a/drivers/vfio/iommufd.c
+++ b/drivers/vfio/iommufd.c
@@ -18,6 +18,21 @@ int vfio_iommufd_bind(struct vfio_device *vdev, struct iommufd_ctx *ictx)
 
 	lockdep_assert_held(&vdev->dev_set->lock);
 
+	if (IS_ENABLED(CONFIG_VFIO_NO_IOMMU) &&
+	    vdev->group->type == VFIO_NO_IOMMU) {
+		if (!capable(CAP_SYS_RAWIO))
+			return -EPERM;
+
+		/*
+		 * Require no compat ioas to be assigned to proceed. The basic
+		 * statement is that the user cannot have done something that
+		 * implies they expected translation to exist
+		 */
+		if (!iommufd_vfio_compat_ioas_get_id(ictx, &ioas_id))
+			return -EPERM;
+		return 0;
+	}
+
 	/*
 	 * If the driver doesn't provide this op then it means the device does
 	 * not do DMA at all. So nothing to do.
@@ -29,7 +44,7 @@ int vfio_iommufd_bind(struct vfio_device *vdev, struct iommufd_ctx *ictx)
 	if (ret)
 		return ret;
 
-	ret = iommufd_vfio_compat_ioas_id(ictx, &ioas_id);
+	ret = iommufd_vfio_compat_ioas_get_id(ictx, &ioas_id);
 	if (ret)
 		goto err_unbind;
 	ret = vdev->ops->attach_ioas(vdev, &ioas_id);
@@ -52,6 +67,10 @@ void vfio_iommufd_unbind(struct vfio_device *vdev)
 {
 	lockdep_assert_held(&vdev->dev_set->lock);
 
+	if (IS_ENABLED(CONFIG_VFIO_NO_IOMMU) &&
+	    vdev->group->type == VFIO_NO_IOMMU)
+		return;
+
 	if (vdev->ops->unbind_iommufd)
 		vdev->ops->unbind_iommufd(vdev);
 }
diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h
index 650d45629647a7..9d1afd417215d0 100644
--- a/include/linux/iommufd.h
+++ b/include/linux/iommufd.h
@@ -57,7 +57,8 @@ void iommufd_access_unpin_pages(struct iommufd_access *access,
 				unsigned long iova, unsigned long length);
 int iommufd_access_rw(struct iommufd_access *access, unsigned long iova,
 		      void *data, size_t len, unsigned int flags);
-int iommufd_vfio_compat_ioas_id(struct iommufd_ctx *ictx, u32 *out_ioas_id);
+int iommufd_vfio_compat_ioas_get_id(struct iommufd_ctx *ictx, u32 *out_ioas_id);
+int iommufd_vfio_compat_ioas_create_id(struct iommufd_ctx *ictx);
 #else /* !CONFIG_IOMMUFD */
 static inline struct iommufd_ctx *iommufd_ctx_from_file(struct file *file)
 {
@@ -89,8 +90,7 @@ static inline int iommufd_access_rw(struct iommufd_access *access, unsigned long
 	return -EOPNOTSUPP;
 }
 
-static inline int iommufd_vfio_compat_ioas_id(struct iommufd_ctx *ictx,
-					      u32 *out_ioas_id)
+static inline int iommufd_vfio_compat_ioas_create_id(struct iommufd_ctx *ictx)
 {
 	return -EOPNOTSUPP;
 }

base-commit: 88603b6dc419445847923fcb7fe5080067a30f98
-- 
2.39.0


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH] vfio: Support VFIO_NOIOMMU with iommufd
  2023-01-09 14:22 [PATCH] vfio: Support VFIO_NOIOMMU with iommufd Jason Gunthorpe
@ 2023-01-09 23:34 ` Alex Williamson
  2023-01-10  0:29   ` Jason Gunthorpe
  2023-01-10  6:20 ` Tian, Kevin
  1 sibling, 1 reply; 5+ messages in thread
From: Alex Williamson @ 2023-01-09 23:34 UTC (permalink / raw)
  To: Jason Gunthorpe; +Cc: Cornelia Huck, iommu, Kevin Tian, kvm, Yi Liu

On Mon,  9 Jan 2023 10:22:59 -0400
Jason Gunthorpe <jgg@nvidia.com> wrote:

> Add a small amount of emulation to vfio_compat to accept the SET_IOMMU
> to VFIO_NOIOMMU_IOMMU and have vfio just ignore iommufd if it is working
> on a no-iommu enabled device.
>
> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
> ---
>  drivers/iommu/iommufd/Kconfig       |  2 +-
>  drivers/iommu/iommufd/vfio_compat.c | 46 ++++++++++++++++++++++++-----
>  drivers/vfio/group.c                | 13 ++++----
>  drivers/vfio/iommufd.c              | 21 ++++++++++++-
>  include/linux/iommufd.h             |  6 ++--
>  5 files changed, 70 insertions(+), 18 deletions(-)
> 
> This needs a testing confirmation with dpdk to go forward, thanks

How do we create a noiommu group w/o the vfio_noiommu flag that's
provided by container.c?  Even without dpdk, you should be able to turn
off the system IOMMU and get something bound to vfio-pci that still
taints the kernel and provides a noiommu-%d group under /dev/vfio/.
There's a rudimentary unit test for noiommu here[1].  Thanks,

Alex

[1]https://github.com/awilliam/tests/blob/master/vfio-noiommu-pci-device-open.c

> diff --git a/drivers/iommu/iommufd/Kconfig b/drivers/iommu/iommufd/Kconfig
> index 8306616b6d8198..ada693ea51a78e 100644
> --- a/drivers/iommu/iommufd/Kconfig
> +++ b/drivers/iommu/iommufd/Kconfig
> @@ -23,7 +23,7 @@ config IOMMUFD_VFIO_CONTAINER
>  	  removed.
>  
>  	  IOMMUFD VFIO container emulation is known to lack certain features
> -	  of the native VFIO container, such as no-IOMMU support, peer-to-peer
> +	  of the native VFIO container, such as peer-to-peer
>  	  DMA mapping, PPC IOMMU support, as well as other potentially
>  	  undiscovered gaps.  This option is currently intended for the
>  	  purpose of testing IOMMUFD with unmodified userspace supporting VFIO
> diff --git a/drivers/iommu/iommufd/vfio_compat.c b/drivers/iommu/iommufd/vfio_compat.c
> index 3ceca0e8311c39..6c8e5dc1c221f4 100644
> --- a/drivers/iommu/iommufd/vfio_compat.c
> +++ b/drivers/iommu/iommufd/vfio_compat.c
> @@ -26,16 +26,35 @@ static struct iommufd_ioas *get_compat_ioas(struct iommufd_ctx *ictx)
>  }
>  
>  /**
> - * iommufd_vfio_compat_ioas_id - Return the IOAS ID that vfio should use
> + * iommufd_vfio_compat_ioas_get_id - Ensure a comat IOAS exists
> + * @ictx: Context to operate on
> + *
> + * Return the ID of the current compatibility ioas. The ID can be passed into
> + * other functions that take an ioas_id.
> + */
> +int iommufd_vfio_compat_ioas_get_id(struct iommufd_ctx *ictx, u32 *out_ioas_id)
> +{
> +	struct iommufd_ioas *ioas;
> +
> +	ioas = get_compat_ioas(ictx);
> +	if (IS_ERR(ioas))
> +		return PTR_ERR(ioas);
> +	*out_ioas_id = ioas->obj.id;
> +	iommufd_put_object(&ioas->obj);
> +	return 0;
> +}
> +EXPORT_SYMBOL_NS_GPL(iommufd_vfio_compat_ioas_get_id, IOMMUFD_VFIO);
> +
> +/**
> + * iommufd_vfio_compat_ioas_create_id - Return the IOAS ID that vfio should use
>   * @ictx: Context to operate on
> - * @out_ioas_id: The ioas_id the caller should use
>   *
>   * The compatibility IOAS is the IOAS that the vfio compatibility ioctls operate
>   * on since they do not have an IOAS ID input in their ABI. Only attaching a
> - * group should cause a default creation of the internal ioas, this returns the
> - * existing ioas if it has already been assigned somehow.
> + * group should cause a default creation of the internal ioas, this does nothing
> + * if an existing ioas has already been assigned somehow.
>   */
> -int iommufd_vfio_compat_ioas_id(struct iommufd_ctx *ictx, u32 *out_ioas_id)
> +int iommufd_vfio_compat_ioas_create_id(struct iommufd_ctx *ictx)
>  {
>  	struct iommufd_ioas *ioas = NULL;
>  	struct iommufd_ioas *out_ioas;
> @@ -53,7 +72,6 @@ int iommufd_vfio_compat_ioas_id(struct iommufd_ctx *ictx, u32 *out_ioas_id)
>  	}
>  	xa_unlock(&ictx->objects);
>  
> -	*out_ioas_id = out_ioas->obj.id;
>  	if (out_ioas != ioas) {
>  		iommufd_put_object(&out_ioas->obj);
>  		iommufd_object_abort(ictx, &ioas->obj);
> @@ -68,7 +86,7 @@ int iommufd_vfio_compat_ioas_id(struct iommufd_ctx *ictx, u32 *out_ioas_id)
>  	iommufd_object_finalize(ictx, &ioas->obj);
>  	return 0;
>  }
> -EXPORT_SYMBOL_NS_GPL(iommufd_vfio_compat_ioas_id, IOMMUFD_VFIO);
> +EXPORT_SYMBOL_NS_GPL(iommufd_vfio_compat_ioas_create_id, IOMMUFD_VFIO);
>  
>  int iommufd_vfio_ioas(struct iommufd_ucmd *ucmd)
>  {
> @@ -235,6 +253,9 @@ static int iommufd_vfio_check_extension(struct iommufd_ctx *ictx,
>  	case VFIO_UNMAP_ALL:
>  		return 1;
>  
> +	case VFIO_NOIOMMU_IOMMU:
> +	return IS_ENABLED(CONFIG_VFIO_NOIOMMU);
> +
>  	case VFIO_DMA_CC_IOMMU:
>  		return iommufd_vfio_cc_iommu(ictx);
>  
> @@ -264,6 +285,17 @@ static int iommufd_vfio_set_iommu(struct iommufd_ctx *ictx, unsigned long type)
>  	struct iommufd_ioas *ioas = NULL;
>  	int rc = 0;
>  
> +	/*
> +	 * Emulation for NOIMMU is imperfect in that VFIO blocks almost all
> +	 * other ioctls. We let them keep working but they mostly fail since no
> +	 * IOAS should exist.
> +	 */
> +	if (IS_ENABLED(CONFIG_VFIO_NOIOMMU) && type == VFIO_NOIOMMU_IOMMU) {
> +		if (!capable(CAP_SYS_RAWIO))
> +			return -EPERM;
> +		return 0;
> +	}
> +
>  	if (type != VFIO_TYPE1_IOMMU && type != VFIO_TYPE1v2_IOMMU)
>  		return -EINVAL;
>  
> diff --git a/drivers/vfio/group.c b/drivers/vfio/group.c
> index bb24b2f0271e03..0f2a483a1f3bdb 100644
> --- a/drivers/vfio/group.c
> +++ b/drivers/vfio/group.c
> @@ -133,12 +133,13 @@ static int vfio_group_ioctl_set_container(struct vfio_group *group,
>  
>  	iommufd = iommufd_ctx_from_file(f.file);
>  	if (!IS_ERR(iommufd)) {
> -		u32 ioas_id;
> -
> -		ret = iommufd_vfio_compat_ioas_id(iommufd, &ioas_id);
> -		if (ret) {
> -			iommufd_ctx_put(group->iommufd);
> -			goto out_unlock;
> +		if (!IS_ENABLED(CONFIG_VFIO_NO_IOMMU) ||
> +		    group->type != VFIO_NO_IOMMU) {
> +			ret = iommufd_vfio_compat_ioas_create_id(iommufd);
> +			if (ret) {
> +				iommufd_ctx_put(group->iommufd);
> +				goto out_unlock;
> +			}
>  		}
>  
>  		group->iommufd = iommufd;
> diff --git a/drivers/vfio/iommufd.c b/drivers/vfio/iommufd.c
> index 4f82a6fa7c6c7f..da50feb24b6e1d 100644
> --- a/drivers/vfio/iommufd.c
> +++ b/drivers/vfio/iommufd.c
> @@ -18,6 +18,21 @@ int vfio_iommufd_bind(struct vfio_device *vdev, struct iommufd_ctx *ictx)
>  
>  	lockdep_assert_held(&vdev->dev_set->lock);
>  
> +	if (IS_ENABLED(CONFIG_VFIO_NO_IOMMU) &&
> +	    vdev->group->type == VFIO_NO_IOMMU) {
> +		if (!capable(CAP_SYS_RAWIO))
> +			return -EPERM;
> +
> +		/*
> +		 * Require no compat ioas to be assigned to proceed. The basic
> +		 * statement is that the user cannot have done something that
> +		 * implies they expected translation to exist
> +		 */
> +		if (!iommufd_vfio_compat_ioas_get_id(ictx, &ioas_id))
> +			return -EPERM;
> +		return 0;
> +	}
> +
>  	/*
>  	 * If the driver doesn't provide this op then it means the device does
>  	 * not do DMA at all. So nothing to do.
> @@ -29,7 +44,7 @@ int vfio_iommufd_bind(struct vfio_device *vdev, struct iommufd_ctx *ictx)
>  	if (ret)
>  		return ret;
>  
> -	ret = iommufd_vfio_compat_ioas_id(ictx, &ioas_id);
> +	ret = iommufd_vfio_compat_ioas_get_id(ictx, &ioas_id);
>  	if (ret)
>  		goto err_unbind;
>  	ret = vdev->ops->attach_ioas(vdev, &ioas_id);
> @@ -52,6 +67,10 @@ void vfio_iommufd_unbind(struct vfio_device *vdev)
>  {
>  	lockdep_assert_held(&vdev->dev_set->lock);
>  
> +	if (IS_ENABLED(CONFIG_VFIO_NO_IOMMU) &&
> +	    vdev->group->type == VFIO_NO_IOMMU)
> +		return;
> +
>  	if (vdev->ops->unbind_iommufd)
>  		vdev->ops->unbind_iommufd(vdev);
>  }
> diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h
> index 650d45629647a7..9d1afd417215d0 100644
> --- a/include/linux/iommufd.h
> +++ b/include/linux/iommufd.h
> @@ -57,7 +57,8 @@ void iommufd_access_unpin_pages(struct iommufd_access *access,
>  				unsigned long iova, unsigned long length);
>  int iommufd_access_rw(struct iommufd_access *access, unsigned long iova,
>  		      void *data, size_t len, unsigned int flags);
> -int iommufd_vfio_compat_ioas_id(struct iommufd_ctx *ictx, u32 *out_ioas_id);
> +int iommufd_vfio_compat_ioas_get_id(struct iommufd_ctx *ictx, u32 *out_ioas_id);
> +int iommufd_vfio_compat_ioas_create_id(struct iommufd_ctx *ictx);
>  #else /* !CONFIG_IOMMUFD */
>  static inline struct iommufd_ctx *iommufd_ctx_from_file(struct file *file)
>  {
> @@ -89,8 +90,7 @@ static inline int iommufd_access_rw(struct iommufd_access *access, unsigned long
>  	return -EOPNOTSUPP;
>  }
>  
> -static inline int iommufd_vfio_compat_ioas_id(struct iommufd_ctx *ictx,
> -					      u32 *out_ioas_id)
> +static inline int iommufd_vfio_compat_ioas_create_id(struct iommufd_ctx *ictx)
>  {
>  	return -EOPNOTSUPP;
>  }
> 
> base-commit: 88603b6dc419445847923fcb7fe5080067a30f98


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] vfio: Support VFIO_NOIOMMU with iommufd
  2023-01-09 23:34 ` Alex Williamson
@ 2023-01-10  0:29   ` Jason Gunthorpe
  0 siblings, 0 replies; 5+ messages in thread
From: Jason Gunthorpe @ 2023-01-10  0:29 UTC (permalink / raw)
  To: Alex Williamson; +Cc: Cornelia Huck, iommu, Kevin Tian, kvm, Yi Liu

On Mon, Jan 09, 2023 at 04:34:34PM -0700, Alex Williamson wrote:
> On Mon,  9 Jan 2023 10:22:59 -0400
> Jason Gunthorpe <jgg@nvidia.com> wrote:
> 
> > Add a small amount of emulation to vfio_compat to accept the SET_IOMMU
> > to VFIO_NOIOMMU_IOMMU and have vfio just ignore iommufd if it is working
> > on a no-iommu enabled device.
> >
> > Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
> > ---
> >  drivers/iommu/iommufd/Kconfig       |  2 +-
> >  drivers/iommu/iommufd/vfio_compat.c | 46 ++++++++++++++++++++++++-----
> >  drivers/vfio/group.c                | 13 ++++----
> >  drivers/vfio/iommufd.c              | 21 ++++++++++++-
> >  include/linux/iommufd.h             |  6 ++--
> >  5 files changed, 70 insertions(+), 18 deletions(-)
> > 
> > This needs a testing confirmation with dpdk to go forward, thanks
> 
> How do we create a noiommu group w/o the vfio_noiommu flag that's
> provided by container.c?

Ah, the module option is now in the wrong place, I'll move it to
vfio_main.c

> Even without dpdk, you should be able to turn off the system IOMMU
> and get something bound to vfio-pci that still taints the kernel and
> provides a noiommu-%d group under /dev/vfio/.  There's a rudimentary
> unit test for noiommu here[1].  Thanks,

Thanks, I'll check it

Jason

^ permalink raw reply	[flat|nested] 5+ messages in thread

* RE: [PATCH] vfio: Support VFIO_NOIOMMU with iommufd
  2023-01-09 14:22 [PATCH] vfio: Support VFIO_NOIOMMU with iommufd Jason Gunthorpe
  2023-01-09 23:34 ` Alex Williamson
@ 2023-01-10  6:20 ` Tian, Kevin
  2023-01-10 18:02   ` Jason Gunthorpe
  1 sibling, 1 reply; 5+ messages in thread
From: Tian, Kevin @ 2023-01-10  6:20 UTC (permalink / raw)
  To: Jason Gunthorpe, Alex Williamson, Cornelia Huck, iommu, kvm, Liu, Yi L

> From: Jason Gunthorpe <jgg@nvidia.com>
> Sent: Monday, January 9, 2023 10:23 PM
>
>  /**
> - * iommufd_vfio_compat_ioas_id - Return the IOAS ID that vfio should use
> + * iommufd_vfio_compat_ioas_get_id - Ensure a comat IOAS exists

s/comat/compat/

> +/**
> + * iommufd_vfio_compat_ioas_create_id - Return the IOAS ID that vfio

'ID' is not returned in this case.

and it's slightly clearer to remove the trailing '_id' in the function name.

> @@ -235,6 +253,9 @@ static int iommufd_vfio_check_extension(struct
> iommufd_ctx *ictx,
>  	case VFIO_UNMAP_ALL:
>  		return 1;
> 
> +	case VFIO_NOIOMMU_IOMMU:
> +	return IS_ENABLED(CONFIG_VFIO_NOIOMMU);
> +

also check vfio_noiommu?

> @@ -264,6 +285,17 @@ static int iommufd_vfio_set_iommu(struct
> iommufd_ctx *ictx, unsigned long type)
>  	struct iommufd_ioas *ioas = NULL;
>  	int rc = 0;
>
> 
> +	/*
> +	 * Emulation for NOIMMU is imperfect in that VFIO blocks almost all

s/NOIMMU/NOIOMMU/

> +	 * other ioctls. We let them keep working but they mostly fail since no
> +	 * IOAS should exist.
> +	 */
> +	if (IS_ENABLED(CONFIG_VFIO_NOIOMMU) && type ==
> VFIO_NOIOMMU_IOMMU) {
> +		if (!capable(CAP_SYS_RAWIO))
> +			return -EPERM;
> +		return 0;
> +	}
> +

Another subtle difference. The container path has below check which applies
to noiommu:

	/*
	 * The container is designed to be an unprivileged interface while
	 * the group can be assigned to specific users.  Therefore, only by
	 * adding a group to a container does the user get the privilege of
	 * enabling the iommu, which may allocate finite resources.  There
	 * is no unset_iommu, but by removing all the groups from a container,
	 * the container is deprivileged and returns to an unset state.
	 */
	if (list_empty(&container->group_list) || container->iommu_driver) {
		up_write(&container->group_lock);
		return -EINVAL;
	}



> @@ -133,12 +133,13 @@ static int vfio_group_ioctl_set_container(struct
> vfio_group *group,
> 
>  	iommufd = iommufd_ctx_from_file(f.file);
>  	if (!IS_ERR(iommufd)) {
> -		u32 ioas_id;
> -
> -		ret = iommufd_vfio_compat_ioas_id(iommufd, &ioas_id);
> -		if (ret) {
> -			iommufd_ctx_put(group->iommufd);
> -			goto out_unlock;
> +		if (!IS_ENABLED(CONFIG_VFIO_NO_IOMMU) ||
> +		    group->type != VFIO_NO_IOMMU) {
> +			ret =
> iommufd_vfio_compat_ioas_create_id(iommufd);
> +			if (ret) {
> +				iommufd_ctx_put(group->iommufd);
> +				goto out_unlock;
> +			}

This doesn't prevent userspace from mixing noiommu and the check
in vfio_iommufd_bind() is partial which only ensures no compat ioas
when binding a noiommu device. It's still possible to bind a VFIO_IOMMU
type device and create the compat ioas afterwards.

somehow we need a sticky bit similar to container->noiommu.


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] vfio: Support VFIO_NOIOMMU with iommufd
  2023-01-10  6:20 ` Tian, Kevin
@ 2023-01-10 18:02   ` Jason Gunthorpe
  0 siblings, 0 replies; 5+ messages in thread
From: Jason Gunthorpe @ 2023-01-10 18:02 UTC (permalink / raw)
  To: Tian, Kevin; +Cc: Alex Williamson, Cornelia Huck, iommu, kvm, Liu, Yi L

On Tue, Jan 10, 2023 at 06:20:33AM +0000, Tian, Kevin wrote:
> > +/**
> > + * iommufd_vfio_compat_ioas_create_id - Return the IOAS ID that vfio
> 
> 'ID' is not returned in this case.
> 
> and it's slightly clearer to remove the trailing '_id' in the function name.
> 
> > @@ -235,6 +253,9 @@ static int iommufd_vfio_check_extension(struct
> > iommufd_ctx *ictx,
> >  	case VFIO_UNMAP_ALL:
> >  		return 1;
> > 
> > +	case VFIO_NOIOMMU_IOMMU:
> > +	return IS_ENABLED(CONFIG_VFIO_NOIOMMU);
> > +
> 
> also check vfio_noiommu?

Can't easily, that value is in another module

> Another subtle difference. The container path has below check which applies
> to noiommu:
> 
> 	/*
> 	 * The container is designed to be an unprivileged interface while
> 	 * the group can be assigned to specific users.  Therefore, only by
> 	 * adding a group to a container does the user get the privilege of
> 	 * enabling the iommu, which may allocate finite resources.  There
> 	 * is no unset_iommu, but by removing all the groups from a container,
> 	 * the container is deprivileged and returns to an unset state.
> 	 */
> 	if (list_empty(&container->group_list) || container->iommu_driver) {
> 		up_write(&container->group_lock);
> 		return -EINVAL;
> 	}

We don't follow that model in iommufd, once you have an iommufd you
can immediately start allocating resources, and it is not considered
"unprivileged". Instead all resource usage is constrained by the cgroup

I suppose it is something of a difference in IOMMUFD_VFIO_CONTAINER
that it behaves like this, but fixing it is not related to noiommu

> > @@ -133,12 +133,13 @@ static int vfio_group_ioctl_set_container(struct
> > vfio_group *group,
> > 
> >  	iommufd = iommufd_ctx_from_file(f.file);
> >  	if (!IS_ERR(iommufd)) {
> > -		u32 ioas_id;
> > -
> > -		ret = iommufd_vfio_compat_ioas_id(iommufd, &ioas_id);
> > -		if (ret) {
> > -			iommufd_ctx_put(group->iommufd);
> > -			goto out_unlock;
> > +		if (!IS_ENABLED(CONFIG_VFIO_NO_IOMMU) ||
> > +		    group->type != VFIO_NO_IOMMU) {
> > +			ret =
> > iommufd_vfio_compat_ioas_create_id(iommufd);
> > +			if (ret) {
> > +				iommufd_ctx_put(group->iommufd);
> > +				goto out_unlock;
> > +			}
> 
> This doesn't prevent userspace from mixing noiommu and the check
> in vfio_iommufd_bind() is partial which only ensures no compat ioas
> when binding a noiommu device. It's still possible to bind a VFIO_IOMMU
> type device and create the compat ioas afterwards.

There are many ways to have an IOAS but also have an iommufd "bound"
to a noiommu device - userspace could just use the native iommufd
interface, or mess with the IOMMUFD_CMD_VFIO_IOAS, for instance.

I don't really want to get to the same kind of protection as vfio
typically did because it would be very invasive for little gain.

So long as a well behaved userspace does not become confused that it
thinks there is translation but none exists I think this is good
enough.

The way it is set here is if userspace somehow had a compat IOAS
assigned then things will fail, and if it attempts to do a map after
binding then it will also fail. This is enough to protect well behaved
userspace.

This will become more explicit in the cdev stuff where userspace must
explicitly specify -1 as the iommufd to run in noiommu and userspace
cannot become confused.

Jason

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2023-01-10 18:05 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-01-09 14:22 [PATCH] vfio: Support VFIO_NOIOMMU with iommufd Jason Gunthorpe
2023-01-09 23:34 ` Alex Williamson
2023-01-10  0:29   ` Jason Gunthorpe
2023-01-10  6:20 ` Tian, Kevin
2023-01-10 18:02   ` Jason Gunthorpe

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.