All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 00/10] Remove vfio_group from the struct file facing VFIO API
@ 2022-04-14 18:45 Jason Gunthorpe
  2022-04-14 18:46 ` [PATCH 01/10] kvm/vfio: Move KVM_DEV_VFIO_GROUP_* ioctls into functions Jason Gunthorpe
                   ` (9 more replies)
  0 siblings, 10 replies; 47+ messages in thread
From: Jason Gunthorpe @ 2022-04-14 18:45 UTC (permalink / raw)
  To: Alex Williamson, Cornelia Huck, kvm, Paolo Bonzini
  Cc: Eric Auger, Christoph Hellwig, Yi Liu

This is the other half of removing the vfio_group from the externally
facing VFIO API.

VFIO provides an API to manipulate its struct file *'s for use by KVM and
VFIO PCI. Instead of converting the struct file into a ref counted struct
vfio_group simply use the struct file as the handle throughout the API.

Along the way some of the APIs are simplified to be more direct about what
they are trying to do with an eye to making future iommufd implementations
for all of them.

This also simplifies the container_users ref counting by not holding a
users refcount while KVM holds the group file.

Removing vfio_group from the external facing API is part of the iommufd
work to modualize and compartmentalize the VFIO container and group object
to be entirely internal to VFIO itself.

This is on github: https://github.com/jgunthorpe/linux/commits/vfio_kvm_no_group

Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>

Jason Gunthorpe (10):
  kvm/vfio: Move KVM_DEV_VFIO_GROUP_* ioctls into functions
  kvm/vfio: Reduce the scope of PPC #ifdefs
  kvm/vfio: Store the struct file in the kvm_vfio_group
  vfio: Use a struct of function pointers instead of a many
    symbol_get()'s
  vfio: Move vfio_external_user_iommu_id() to vfio_file_ops
  vfio: Remove vfio_external_group_match_file()
  vfio: Move vfio_external_check_extension() to vfio_file_ops
  vfio: Move vfio_group_set_kvm() into vfio_file_ops
  kvm/vfio: Remove vfio_group from kvm
  vfio/pci: Use the struct file as the handle not the vfio_group

 drivers/vfio/pci/vfio_pci_core.c |  43 ++--
 drivers/vfio/vfio.c              | 156 ++++++------
 include/linux/vfio.h             |  17 +-
 virt/kvm/vfio.c                  | 405 ++++++++++++-------------------
 4 files changed, 278 insertions(+), 343 deletions(-)


base-commit: ce522ba9ef7e2d9fb22a39eb3371c0c64e2a433e
-- 
2.35.1


^ permalink raw reply	[flat|nested] 47+ messages in thread

* [PATCH 01/10] kvm/vfio: Move KVM_DEV_VFIO_GROUP_* ioctls into functions
  2022-04-14 18:45 [PATCH 00/10] Remove vfio_group from the struct file facing VFIO API Jason Gunthorpe
@ 2022-04-14 18:46 ` Jason Gunthorpe
  2022-04-15  3:36   ` Tian, Kevin
  2022-04-15  7:18   ` Christoph Hellwig
  2022-04-14 18:46 ` [PATCH 02/10] kvm/vfio: Reduce the scope of PPC #ifdefs Jason Gunthorpe
                   ` (8 subsequent siblings)
  9 siblings, 2 replies; 47+ messages in thread
From: Jason Gunthorpe @ 2022-04-14 18:46 UTC (permalink / raw)
  To: Alex Williamson, Cornelia Huck, kvm, Paolo Bonzini
  Cc: Eric Auger, Christoph Hellwig, Yi Liu

To make it easier to read and change in following patches.

Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 virt/kvm/vfio.c | 271 ++++++++++++++++++++++++++----------------------
 1 file changed, 146 insertions(+), 125 deletions(-)

This is best viewed using 'git diff -b' to ignore the whitespace change.

diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c
index 8fcbc50221c2d2..a1167ab7a2246f 100644
--- a/virt/kvm/vfio.c
+++ b/virt/kvm/vfio.c
@@ -181,149 +181,170 @@ static void kvm_vfio_update_coherency(struct kvm_device *dev)
 	mutex_unlock(&kv->lock);
 }
 
-static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg)
+static int kvm_vfio_group_add(struct kvm_device *dev, unsigned int fd)
 {
 	struct kvm_vfio *kv = dev->private;
 	struct vfio_group *vfio_group;
 	struct kvm_vfio_group *kvg;
-	int32_t __user *argp = (int32_t __user *)(unsigned long)arg;
 	struct fd f;
-	int32_t fd;
 	int ret;
 
+	f = fdget(fd);
+	if (!f.file)
+		return -EBADF;
+
+	vfio_group = kvm_vfio_group_get_external_user(f.file);
+	fdput(f);
+
+	if (IS_ERR(vfio_group))
+		return PTR_ERR(vfio_group);
+
+	mutex_lock(&kv->lock);
+
+	list_for_each_entry(kvg, &kv->group_list, node) {
+		if (kvg->vfio_group == vfio_group) {
+			ret = -EEXIST;
+			goto err_unlock;
+		}
+	}
+
+	kvg = kzalloc(sizeof(*kvg), GFP_KERNEL_ACCOUNT);
+	if (!kvg) {
+		ret = -ENOMEM;
+		goto err_unlock;
+	}
+
+	list_add_tail(&kvg->node, &kv->group_list);
+	kvg->vfio_group = vfio_group;
+
+	kvm_arch_start_assignment(dev->kvm);
+
+	mutex_unlock(&kv->lock);
+
+	kvm_vfio_group_set_kvm(vfio_group, dev->kvm);
+	kvm_vfio_update_coherency(dev);
+
+	return 0;
+err_unlock:
+	mutex_unlock(&kv->lock);
+	kvm_vfio_group_put_external_user(vfio_group);
+	return ret;
+}
+
+static int kvm_vfio_group_del(struct kvm_device *dev, unsigned int fd)
+{
+	struct kvm_vfio *kv = dev->private;
+	struct kvm_vfio_group *kvg;
+	struct fd f;
+	int ret;
+
+	f = fdget(fd);
+	if (!f.file)
+		return -EBADF;
+
+	ret = -ENOENT;
+
+	mutex_lock(&kv->lock);
+
+	list_for_each_entry(kvg, &kv->group_list, node) {
+		if (!kvm_vfio_external_group_match_file(kvg->vfio_group,
+							f.file))
+			continue;
+
+		list_del(&kvg->node);
+		kvm_arch_end_assignment(dev->kvm);
+#ifdef CONFIG_SPAPR_TCE_IOMMU
+		kvm_spapr_tce_release_vfio_group(dev->kvm, kvg->vfio_group);
+#endif
+		kvm_vfio_group_set_kvm(kvg->vfio_group, NULL);
+		kvm_vfio_group_put_external_user(kvg->vfio_group);
+		kfree(kvg);
+		ret = 0;
+		break;
+	}
+
+	mutex_unlock(&kv->lock);
+
+	fdput(f);
+
+	kvm_vfio_update_coherency(dev);
+
+	return ret;
+}
+
+#ifdef CONFIG_SPAPR_TCE_IOMMU
+static int kvm_vfio_group_set_spapr_tce(struct kvm_device *dev,
+					void __user *arg)
+{
+	struct kvm_vfio_spapr_tce param;
+	struct kvm_vfio *kv = dev->private;
+	struct vfio_group *vfio_group;
+	struct kvm_vfio_group *kvg;
+	struct fd f;
+	struct iommu_group *grp;
+	int ret;
+
+	if (copy_from_user(&param, arg, sizeof(struct kvm_vfio_spapr_tce)))
+		return -EFAULT;
+
+	f = fdget(param.groupfd);
+	if (!f.file)
+		return -EBADF;
+
+	vfio_group = kvm_vfio_group_get_external_user(f.file);
+	fdput(f);
+
+	if (IS_ERR(vfio_group))
+		return PTR_ERR(vfio_group);
+
+	grp = kvm_vfio_group_get_iommu_group(vfio_group);
+	if (WARN_ON_ONCE(!grp)) {
+		ret = -EIO;
+		goto err_put_external;
+	}
+
+	ret = -ENOENT;
+
+	mutex_lock(&kv->lock);
+
+	list_for_each_entry(kvg, &kv->group_list, node) {
+		if (kvg->vfio_group != vfio_group)
+			continue;
+
+		ret = kvm_spapr_tce_attach_iommu_group(dev->kvm, param.tablefd,
+						       grp);
+		break;
+	}
+
+	mutex_unlock(&kv->lock);
+
+	iommu_group_put(grp);
+err_put_external:
+	kvm_vfio_group_put_external_user(vfio_group);
+	return ret;
+}
+#endif
+
+static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg)
+{
+	int32_t __user *argp = (int32_t __user *)(unsigned long)arg;
+	int32_t fd;
+
 	switch (attr) {
 	case KVM_DEV_VFIO_GROUP_ADD:
 		if (get_user(fd, argp))
 			return -EFAULT;
-
-		f = fdget(fd);
-		if (!f.file)
-			return -EBADF;
-
-		vfio_group = kvm_vfio_group_get_external_user(f.file);
-		fdput(f);
-
-		if (IS_ERR(vfio_group))
-			return PTR_ERR(vfio_group);
-
-		mutex_lock(&kv->lock);
-
-		list_for_each_entry(kvg, &kv->group_list, node) {
-			if (kvg->vfio_group == vfio_group) {
-				mutex_unlock(&kv->lock);
-				kvm_vfio_group_put_external_user(vfio_group);
-				return -EEXIST;
-			}
-		}
-
-		kvg = kzalloc(sizeof(*kvg), GFP_KERNEL_ACCOUNT);
-		if (!kvg) {
-			mutex_unlock(&kv->lock);
-			kvm_vfio_group_put_external_user(vfio_group);
-			return -ENOMEM;
-		}
-
-		list_add_tail(&kvg->node, &kv->group_list);
-		kvg->vfio_group = vfio_group;
-
-		kvm_arch_start_assignment(dev->kvm);
-
-		mutex_unlock(&kv->lock);
-
-		kvm_vfio_group_set_kvm(vfio_group, dev->kvm);
-
-		kvm_vfio_update_coherency(dev);
-
-		return 0;
+		return kvm_vfio_group_add(dev, fd);
 
 	case KVM_DEV_VFIO_GROUP_DEL:
 		if (get_user(fd, argp))
 			return -EFAULT;
+		return kvm_vfio_group_del(dev, fd);
 
-		f = fdget(fd);
-		if (!f.file)
-			return -EBADF;
-
-		ret = -ENOENT;
-
-		mutex_lock(&kv->lock);
-
-		list_for_each_entry(kvg, &kv->group_list, node) {
-			if (!kvm_vfio_external_group_match_file(kvg->vfio_group,
-								f.file))
-				continue;
-
-			list_del(&kvg->node);
-			kvm_arch_end_assignment(dev->kvm);
 #ifdef CONFIG_SPAPR_TCE_IOMMU
-			kvm_spapr_tce_release_vfio_group(dev->kvm,
-							 kvg->vfio_group);
+	case KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE:
+		return kvm_vfio_group_set_spapr_tce(dev, (void __user *)arg);
 #endif
-			kvm_vfio_group_set_kvm(kvg->vfio_group, NULL);
-			kvm_vfio_group_put_external_user(kvg->vfio_group);
-			kfree(kvg);
-			ret = 0;
-			break;
-		}
-
-		mutex_unlock(&kv->lock);
-
-		fdput(f);
-
-		kvm_vfio_update_coherency(dev);
-
-		return ret;
-
-#ifdef CONFIG_SPAPR_TCE_IOMMU
-	case KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE: {
-		struct kvm_vfio_spapr_tce param;
-		struct kvm_vfio *kv = dev->private;
-		struct vfio_group *vfio_group;
-		struct kvm_vfio_group *kvg;
-		struct fd f;
-		struct iommu_group *grp;
-
-		if (copy_from_user(&param, (void __user *)arg,
-				sizeof(struct kvm_vfio_spapr_tce)))
-			return -EFAULT;
-
-		f = fdget(param.groupfd);
-		if (!f.file)
-			return -EBADF;
-
-		vfio_group = kvm_vfio_group_get_external_user(f.file);
-		fdput(f);
-
-		if (IS_ERR(vfio_group))
-			return PTR_ERR(vfio_group);
-
-		grp = kvm_vfio_group_get_iommu_group(vfio_group);
-		if (WARN_ON_ONCE(!grp)) {
-			kvm_vfio_group_put_external_user(vfio_group);
-			return -EIO;
-		}
-
-		ret = -ENOENT;
-
-		mutex_lock(&kv->lock);
-
-		list_for_each_entry(kvg, &kv->group_list, node) {
-			if (kvg->vfio_group != vfio_group)
-				continue;
-
-			ret = kvm_spapr_tce_attach_iommu_group(dev->kvm,
-					param.tablefd, grp);
-			break;
-		}
-
-		mutex_unlock(&kv->lock);
-
-		iommu_group_put(grp);
-		kvm_vfio_group_put_external_user(vfio_group);
-
-		return ret;
-	}
-#endif /* CONFIG_SPAPR_TCE_IOMMU */
 	}
 
 	return -ENXIO;
-- 
2.35.1


^ permalink raw reply related	[flat|nested] 47+ messages in thread

* [PATCH 02/10] kvm/vfio: Reduce the scope of PPC #ifdefs
  2022-04-14 18:45 [PATCH 00/10] Remove vfio_group from the struct file facing VFIO API Jason Gunthorpe
  2022-04-14 18:46 ` [PATCH 01/10] kvm/vfio: Move KVM_DEV_VFIO_GROUP_* ioctls into functions Jason Gunthorpe
@ 2022-04-14 18:46 ` Jason Gunthorpe
  2022-04-15  4:47   ` Christoph Hellwig
  2022-04-14 18:46 ` [PATCH 03/10] kvm/vfio: Store the struct file in the kvm_vfio_group Jason Gunthorpe
                   ` (7 subsequent siblings)
  9 siblings, 1 reply; 47+ messages in thread
From: Jason Gunthorpe @ 2022-04-14 18:46 UTC (permalink / raw)
  To: Alex Williamson, Cornelia Huck, kvm, Paolo Bonzini
  Cc: Eric Auger, Christoph Hellwig, Yi Liu

Use IS_ENABLED and static inlines instead of just ifdef'ing away all the
PPC code. This allows it to be compile tested on all platforms and makes
it easier to maintain.

Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 virt/kvm/vfio.c | 34 +++++++++++++++++++++-------------
 1 file changed, 21 insertions(+), 13 deletions(-)

I'm not a big fan if #ifdefs around #include, but lack a better idea - these
weird spapr things should not be part of the generic kvm arch API at least,
IMHO.

diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c
index a1167ab7a2246f..9b942f447df79d 100644
--- a/virt/kvm/vfio.c
+++ b/virt/kvm/vfio.c
@@ -19,6 +19,16 @@
 
 #ifdef CONFIG_SPAPR_TCE_IOMMU
 #include <asm/kvm_ppc.h>
+#else
+static long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd,
+		struct iommu_group *grp)
+{
+	return -EINVAL;
+}
+static void kvm_spapr_tce_release_iommu_group(struct kvm *kvm,
+					      struct iommu_group *grp)
+{
+}
 #endif
 
 struct kvm_vfio_group {
@@ -106,7 +116,6 @@ static bool kvm_vfio_group_is_coherent(struct vfio_group *vfio_group)
 	return ret > 0;
 }
 
-#ifdef CONFIG_SPAPR_TCE_IOMMU
 static int kvm_vfio_external_user_iommu_id(struct vfio_group *vfio_group)
 {
 	int (*fn)(struct vfio_group *);
@@ -137,15 +146,18 @@ static struct iommu_group *kvm_vfio_group_get_iommu_group(
 static void kvm_spapr_tce_release_vfio_group(struct kvm *kvm,
 		struct vfio_group *vfio_group)
 {
-	struct iommu_group *grp = kvm_vfio_group_get_iommu_group(vfio_group);
+	struct iommu_group *grp;
 
+	if (!IS_ENABLED(CONFIG_SPAPR_TCE_IOMMU))
+		return;
+
+	grp = kvm_vfio_group_get_iommu_group(vfio_group);
 	if (WARN_ON_ONCE(!grp))
 		return;
 
 	kvm_spapr_tce_release_iommu_group(kvm, grp);
 	iommu_group_put(grp);
 }
-#endif
 
 /*
  * Groups can use the same or different IOMMU domains.  If the same then
@@ -253,9 +265,7 @@ static int kvm_vfio_group_del(struct kvm_device *dev, unsigned int fd)
 
 		list_del(&kvg->node);
 		kvm_arch_end_assignment(dev->kvm);
-#ifdef CONFIG_SPAPR_TCE_IOMMU
 		kvm_spapr_tce_release_vfio_group(dev->kvm, kvg->vfio_group);
-#endif
 		kvm_vfio_group_set_kvm(kvg->vfio_group, NULL);
 		kvm_vfio_group_put_external_user(kvg->vfio_group);
 		kfree(kvg);
@@ -272,7 +282,6 @@ static int kvm_vfio_group_del(struct kvm_device *dev, unsigned int fd)
 	return ret;
 }
 
-#ifdef CONFIG_SPAPR_TCE_IOMMU
 static int kvm_vfio_group_set_spapr_tce(struct kvm_device *dev,
 					void __user *arg)
 {
@@ -284,6 +293,9 @@ static int kvm_vfio_group_set_spapr_tce(struct kvm_device *dev,
 	struct iommu_group *grp;
 	int ret;
 
+	if (!IS_ENABLED(CONFIG_SPAPR_TCE_IOMMU))
+		return -ENXIO;
+
 	if (copy_from_user(&param, arg, sizeof(struct kvm_vfio_spapr_tce)))
 		return -EFAULT;
 
@@ -323,7 +335,6 @@ static int kvm_vfio_group_set_spapr_tce(struct kvm_device *dev,
 	kvm_vfio_group_put_external_user(vfio_group);
 	return ret;
 }
-#endif
 
 static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg)
 {
@@ -341,10 +352,8 @@ static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg)
 			return -EFAULT;
 		return kvm_vfio_group_del(dev, fd);
 
-#ifdef CONFIG_SPAPR_TCE_IOMMU
 	case KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE:
 		return kvm_vfio_group_set_spapr_tce(dev, (void __user *)arg);
-#endif
 	}
 
 	return -ENXIO;
@@ -369,9 +378,10 @@ static int kvm_vfio_has_attr(struct kvm_device *dev,
 		switch (attr->attr) {
 		case KVM_DEV_VFIO_GROUP_ADD:
 		case KVM_DEV_VFIO_GROUP_DEL:
-#ifdef CONFIG_SPAPR_TCE_IOMMU
+			return 0;
 		case KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE:
-#endif
+			if (!IS_ENABLED(CONFIG_SPAPR_TCE_IOMMU))
+				break;
 			return 0;
 		}
 
@@ -387,9 +397,7 @@ static void kvm_vfio_destroy(struct kvm_device *dev)
 	struct kvm_vfio_group *kvg, *tmp;
 
 	list_for_each_entry_safe(kvg, tmp, &kv->group_list, node) {
-#ifdef CONFIG_SPAPR_TCE_IOMMU
 		kvm_spapr_tce_release_vfio_group(dev->kvm, kvg->vfio_group);
-#endif
 		kvm_vfio_group_set_kvm(kvg->vfio_group, NULL);
 		kvm_vfio_group_put_external_user(kvg->vfio_group);
 		list_del(&kvg->node);
-- 
2.35.1


^ permalink raw reply related	[flat|nested] 47+ messages in thread

* [PATCH 03/10] kvm/vfio: Store the struct file in the kvm_vfio_group
  2022-04-14 18:45 [PATCH 00/10] Remove vfio_group from the struct file facing VFIO API Jason Gunthorpe
  2022-04-14 18:46 ` [PATCH 01/10] kvm/vfio: Move KVM_DEV_VFIO_GROUP_* ioctls into functions Jason Gunthorpe
  2022-04-14 18:46 ` [PATCH 02/10] kvm/vfio: Reduce the scope of PPC #ifdefs Jason Gunthorpe
@ 2022-04-14 18:46 ` Jason Gunthorpe
  2022-04-15  3:44   ` Tian, Kevin
  2022-04-15  7:20   ` Christoph Hellwig
  2022-04-14 18:46 ` [PATCH 04/10] vfio: Use a struct of function pointers instead of a many symbol_get()'s Jason Gunthorpe
                   ` (6 subsequent siblings)
  9 siblings, 2 replies; 47+ messages in thread
From: Jason Gunthorpe @ 2022-04-14 18:46 UTC (permalink / raw)
  To: Alex Williamson, Cornelia Huck, kvm, Paolo Bonzini
  Cc: Eric Auger, Christoph Hellwig, Yi Liu

Following patches will chage the APIs to use the struct file as the handle
instead of the vfio_group, so hang on to a reference to it with the same
duration of as the vfio_group.

Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 virt/kvm/vfio.c | 40 ++++++++++++++++++++++++----------------
 1 file changed, 24 insertions(+), 16 deletions(-)

diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c
index 9b942f447df79d..7e1793a1f5f1fd 100644
--- a/virt/kvm/vfio.c
+++ b/virt/kvm/vfio.c
@@ -33,6 +33,7 @@ static void kvm_spapr_tce_release_iommu_group(struct kvm *kvm,
 
 struct kvm_vfio_group {
 	struct list_head node;
+	struct file *filp;
 	struct vfio_group *vfio_group;
 };
 
@@ -198,23 +199,17 @@ static int kvm_vfio_group_add(struct kvm_device *dev, unsigned int fd)
 	struct kvm_vfio *kv = dev->private;
 	struct vfio_group *vfio_group;
 	struct kvm_vfio_group *kvg;
-	struct fd f;
+	struct file *filp;
 	int ret;
 
-	f = fdget(fd);
-	if (!f.file)
+	filp = fget(fd);
+	if (!filp)
 		return -EBADF;
 
-	vfio_group = kvm_vfio_group_get_external_user(f.file);
-	fdput(f);
-
-	if (IS_ERR(vfio_group))
-		return PTR_ERR(vfio_group);
-
 	mutex_lock(&kv->lock);
 
 	list_for_each_entry(kvg, &kv->group_list, node) {
-		if (kvg->vfio_group == vfio_group) {
+		if (kvg->filp == filp) {
 			ret = -EEXIST;
 			goto err_unlock;
 		}
@@ -226,6 +221,13 @@ static int kvm_vfio_group_add(struct kvm_device *dev, unsigned int fd)
 		goto err_unlock;
 	}
 
+	vfio_group = kvm_vfio_group_get_external_user(filp);
+	if (IS_ERR(vfio_group)) {
+		ret = PTR_ERR(vfio_group);
+		goto err_free;
+	}
+
+	kvg->filp = filp;
 	list_add_tail(&kvg->node, &kv->group_list);
 	kvg->vfio_group = vfio_group;
 
@@ -237,9 +239,11 @@ static int kvm_vfio_group_add(struct kvm_device *dev, unsigned int fd)
 	kvm_vfio_update_coherency(dev);
 
 	return 0;
+err_free:
+	kfree(kvg);
 err_unlock:
 	mutex_unlock(&kv->lock);
-	kvm_vfio_group_put_external_user(vfio_group);
+	fput(filp);
 	return ret;
 }
 
@@ -268,6 +272,7 @@ static int kvm_vfio_group_del(struct kvm_device *dev, unsigned int fd)
 		kvm_spapr_tce_release_vfio_group(dev->kvm, kvg->vfio_group);
 		kvm_vfio_group_set_kvm(kvg->vfio_group, NULL);
 		kvm_vfio_group_put_external_user(kvg->vfio_group);
+		fput(kvg->filp);
 		kfree(kvg);
 		ret = 0;
 		break;
@@ -304,10 +309,10 @@ static int kvm_vfio_group_set_spapr_tce(struct kvm_device *dev,
 		return -EBADF;
 
 	vfio_group = kvm_vfio_group_get_external_user(f.file);
-	fdput(f);
-
-	if (IS_ERR(vfio_group))
-		return PTR_ERR(vfio_group);
+	if (IS_ERR(vfio_group)) {
+		ret = PTR_ERR(vfio_group);
+		goto err_fdput;
+	}
 
 	grp = kvm_vfio_group_get_iommu_group(vfio_group);
 	if (WARN_ON_ONCE(!grp)) {
@@ -320,7 +325,7 @@ static int kvm_vfio_group_set_spapr_tce(struct kvm_device *dev,
 	mutex_lock(&kv->lock);
 
 	list_for_each_entry(kvg, &kv->group_list, node) {
-		if (kvg->vfio_group != vfio_group)
+		if (kvg->filp != f.file)
 			continue;
 
 		ret = kvm_spapr_tce_attach_iommu_group(dev->kvm, param.tablefd,
@@ -333,6 +338,8 @@ static int kvm_vfio_group_set_spapr_tce(struct kvm_device *dev,
 	iommu_group_put(grp);
 err_put_external:
 	kvm_vfio_group_put_external_user(vfio_group);
+err_fdput:
+	fdput(f);
 	return ret;
 }
 
@@ -400,6 +407,7 @@ static void kvm_vfio_destroy(struct kvm_device *dev)
 		kvm_spapr_tce_release_vfio_group(dev->kvm, kvg->vfio_group);
 		kvm_vfio_group_set_kvm(kvg->vfio_group, NULL);
 		kvm_vfio_group_put_external_user(kvg->vfio_group);
+		fput(kvg->filp);
 		list_del(&kvg->node);
 		kfree(kvg);
 		kvm_arch_end_assignment(dev->kvm);
-- 
2.35.1


^ permalink raw reply related	[flat|nested] 47+ messages in thread

* [PATCH 04/10] vfio: Use a struct of function pointers instead of a many symbol_get()'s
  2022-04-14 18:45 [PATCH 00/10] Remove vfio_group from the struct file facing VFIO API Jason Gunthorpe
                   ` (2 preceding siblings ...)
  2022-04-14 18:46 ` [PATCH 03/10] kvm/vfio: Store the struct file in the kvm_vfio_group Jason Gunthorpe
@ 2022-04-14 18:46 ` Jason Gunthorpe
  2022-04-15  3:57   ` Tian, Kevin
  2022-04-15  4:45   ` Christoph Hellwig
  2022-04-14 18:46 ` [PATCH 05/10] vfio: Move vfio_external_user_iommu_id() to vfio_file_ops Jason Gunthorpe
                   ` (5 subsequent siblings)
  9 siblings, 2 replies; 47+ messages in thread
From: Jason Gunthorpe @ 2022-04-14 18:46 UTC (permalink / raw)
  To: Alex Williamson, Cornelia Huck, kvm, Paolo Bonzini
  Cc: Eric Auger, Christoph Hellwig, Yi Liu

kvm and VFIO need to be coupled together however neither is willing to
tolerate a direct module dependency. Instead when kvm is given a VFIO FD
it uses many symbol_get()'s to access VFIO.

Provide a single VFIO function vfio_file_get_ops() which validates the
given struct file * is a VFIO file and then returns a struct of ops.

Following patches will redo each of the symbol_get() calls into an
indirection through this ops struct.

Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/vfio/vfio.c  | 19 +++++++++++++++++++
 include/linux/vfio.h |  3 +++
 virt/kvm/vfio.c      | 14 ++++++++++++++
 3 files changed, 36 insertions(+)

diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index a4555014bd1e72..93508f6a8beda5 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -2013,6 +2013,25 @@ long vfio_external_check_extension(struct vfio_group *group, unsigned long arg)
 }
 EXPORT_SYMBOL_GPL(vfio_external_check_extension);
 
+static const struct vfio_file_ops vfio_file_group_ops = {
+};
+
+/**
+ * vfio_file_get_ops - Return a struct of function pointers to use with the file
+ * @filep: VFIO file to return pointers for
+ *
+ * This API exists to allow KVM to call into VFIO without tightly coupling the
+ * VFIO and KVM modules together. KVM will call this using symbol_get() and from
+ * then on will call VFIO through the returned function pointers.
+ */
+const struct vfio_file_ops *vfio_file_get_ops(struct file *filep)
+{
+	if (filep->f_op != &vfio_group_fops)
+		return ERR_PTR(-EINVAL);
+	return &vfio_file_group_ops;
+}
+EXPORT_SYMBOL_GPL(vfio_file_get_ops);
+
 /*
  * Sub-module support
  */
diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index 66dda06ec42d1b..409bbf817206cc 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -138,6 +138,8 @@ int vfio_mig_get_next_state(struct vfio_device *device,
 /*
  * External user API
  */
+struct vfio_file_ops {
+};
 extern struct vfio_group *vfio_group_get_external_user(struct file *filep);
 extern void vfio_group_put_external_user(struct vfio_group *group);
 extern struct vfio_group *vfio_group_get_external_user_from_dev(struct device
@@ -147,6 +149,7 @@ extern bool vfio_external_group_match_file(struct vfio_group *group,
 extern int vfio_external_user_iommu_id(struct vfio_group *group);
 extern long vfio_external_check_extension(struct vfio_group *group,
 					  unsigned long arg);
+const struct vfio_file_ops *vfio_file_get_ops(struct file *filep);
 
 #define VFIO_PIN_PAGES_MAX_ENTRIES	(PAGE_SIZE/sizeof(unsigned long))
 
diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c
index 7e1793a1f5f1fd..254d8c18378163 100644
--- a/virt/kvm/vfio.c
+++ b/virt/kvm/vfio.c
@@ -34,6 +34,7 @@ static void kvm_spapr_tce_release_iommu_group(struct kvm *kvm,
 struct kvm_vfio_group {
 	struct list_head node;
 	struct file *filp;
+	const struct vfio_file_ops *ops;
 	struct vfio_group *vfio_group;
 };
 
@@ -196,6 +197,7 @@ static void kvm_vfio_update_coherency(struct kvm_device *dev)
 
 static int kvm_vfio_group_add(struct kvm_device *dev, unsigned int fd)
 {
+	const struct vfio_file_ops *(*fn)(struct file *filep);
 	struct kvm_vfio *kv = dev->private;
 	struct vfio_group *vfio_group;
 	struct kvm_vfio_group *kvg;
@@ -221,6 +223,18 @@ static int kvm_vfio_group_add(struct kvm_device *dev, unsigned int fd)
 		goto err_unlock;
 	}
 
+	fn = symbol_get(vfio_file_get_ops);
+	if (!fn) {
+		ret = -EINVAL;
+		goto err_free;
+	}
+	kvg->ops = fn(filp);
+	symbol_put(vfio_file_get_ops);
+	if (IS_ERR(kvg->ops)) {
+		ret = PTR_ERR(kvg->ops);
+		goto err_free;
+	}
+
 	vfio_group = kvm_vfio_group_get_external_user(filp);
 	if (IS_ERR(vfio_group)) {
 		ret = PTR_ERR(vfio_group);
-- 
2.35.1


^ permalink raw reply related	[flat|nested] 47+ messages in thread

* [PATCH 05/10] vfio: Move vfio_external_user_iommu_id() to vfio_file_ops
  2022-04-14 18:45 [PATCH 00/10] Remove vfio_group from the struct file facing VFIO API Jason Gunthorpe
                   ` (3 preceding siblings ...)
  2022-04-14 18:46 ` [PATCH 04/10] vfio: Use a struct of function pointers instead of a many symbol_get()'s Jason Gunthorpe
@ 2022-04-14 18:46 ` Jason Gunthorpe
  2022-04-15  3:59   ` Tian, Kevin
  2022-04-15  7:31   ` Christoph Hellwig
  2022-04-14 18:46 ` [PATCH 06/10] vfio: Remove vfio_external_group_match_file() Jason Gunthorpe
                   ` (4 subsequent siblings)
  9 siblings, 2 replies; 47+ messages in thread
From: Jason Gunthorpe @ 2022-04-14 18:46 UTC (permalink / raw)
  To: Alex Williamson, Cornelia Huck, kvm, Paolo Bonzini
  Cc: Eric Auger, Christoph Hellwig, Yi Liu

The only user wants to get a pointer to the struct iommu_group associated
with the VFIO file being used. Instead of returning the group ID then
searching sysfs for that string just directly return the iommu_group
pointer already held by the vfio_group struct.

It already has a safe lifetime due to the struct file kref.

Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/vfio/vfio.c  | 20 ++++++++-----
 include/linux/vfio.h |  2 +-
 virt/kvm/vfio.c      | 68 ++++++--------------------------------------
 3 files changed, 22 insertions(+), 68 deletions(-)

diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index 93508f6a8beda5..4d62de69705573 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -1919,10 +1919,7 @@ static const struct file_operations vfio_device_fops = {
  * increments the container user counter to prevent
  * the VFIO group from disposal before KVM exits.
  *
- * 3. The external user calls vfio_external_user_iommu_id()
- * to know an IOMMU ID.
- *
- * 4. When the external KVM finishes, it calls
+ * 3. When the external KVM finishes, it calls
  * vfio_group_put_external_user() to release the VFIO group.
  * This call decrements the container user counter.
  */
@@ -2001,11 +1998,19 @@ bool vfio_external_group_match_file(struct vfio_group *test_group,
 }
 EXPORT_SYMBOL_GPL(vfio_external_group_match_file);
 
-int vfio_external_user_iommu_id(struct vfio_group *group)
+/**
+ * vfio_file_iommu_group - Return the struct iommu_group for the vfio file
+ * @filep: VFIO file
+ *
+ * The returned iommu_group is valid as long as a ref is held on the filep.
+ * VFIO files always have an iommu_group, so this cannot fail.
+ */
+static struct iommu_group *vfio_file_iommu_group(struct file *filep)
 {
-	return iommu_group_id(group->iommu_group);
+	struct vfio_group *group = filep->private_data;
+
+	return group->iommu_group;
 }
-EXPORT_SYMBOL_GPL(vfio_external_user_iommu_id);
 
 long vfio_external_check_extension(struct vfio_group *group, unsigned long arg)
 {
@@ -2014,6 +2019,7 @@ long vfio_external_check_extension(struct vfio_group *group, unsigned long arg)
 EXPORT_SYMBOL_GPL(vfio_external_check_extension);
 
 static const struct vfio_file_ops vfio_file_group_ops = {
+	.get_iommu_group = vfio_file_iommu_group,
 };
 
 /**
diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index 409bbf817206cc..e5ca7d5a0f1584 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -139,6 +139,7 @@ int vfio_mig_get_next_state(struct vfio_device *device,
  * External user API
  */
 struct vfio_file_ops {
+	struct iommu_group *(*get_iommu_group)(struct file *filep);
 };
 extern struct vfio_group *vfio_group_get_external_user(struct file *filep);
 extern void vfio_group_put_external_user(struct vfio_group *group);
@@ -146,7 +147,6 @@ extern struct vfio_group *vfio_group_get_external_user_from_dev(struct device
 								*dev);
 extern bool vfio_external_group_match_file(struct vfio_group *group,
 					   struct file *filep);
-extern int vfio_external_user_iommu_id(struct vfio_group *group);
 extern long vfio_external_check_extension(struct vfio_group *group,
 					  unsigned long arg);
 const struct vfio_file_ops *vfio_file_get_ops(struct file *filep);
diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c
index 254d8c18378163..743e4870fa1825 100644
--- a/virt/kvm/vfio.c
+++ b/virt/kvm/vfio.c
@@ -118,47 +118,14 @@ static bool kvm_vfio_group_is_coherent(struct vfio_group *vfio_group)
 	return ret > 0;
 }
 
-static int kvm_vfio_external_user_iommu_id(struct vfio_group *vfio_group)
-{
-	int (*fn)(struct vfio_group *);
-	int ret = -EINVAL;
-
-	fn = symbol_get(vfio_external_user_iommu_id);
-	if (!fn)
-		return ret;
-
-	ret = fn(vfio_group);
-
-	symbol_put(vfio_external_user_iommu_id);
-
-	return ret;
-}
-
-static struct iommu_group *kvm_vfio_group_get_iommu_group(
-		struct vfio_group *group)
-{
-	int group_id = kvm_vfio_external_user_iommu_id(group);
-
-	if (group_id < 0)
-		return NULL;
-
-	return iommu_group_get_by_id(group_id);
-}
-
 static void kvm_spapr_tce_release_vfio_group(struct kvm *kvm,
-		struct vfio_group *vfio_group)
+					     struct kvm_vfio_group *kvg)
 {
-	struct iommu_group *grp;
-
 	if (!IS_ENABLED(CONFIG_SPAPR_TCE_IOMMU))
 		return;
 
-	grp = kvm_vfio_group_get_iommu_group(vfio_group);
-	if (WARN_ON_ONCE(!grp))
-		return;
-
-	kvm_spapr_tce_release_iommu_group(kvm, grp);
-	iommu_group_put(grp);
+	kvm_spapr_tce_release_iommu_group(kvm,
+					  kvg->ops->get_iommu_group(kvg->filp));
 }
 
 /*
@@ -283,7 +250,7 @@ static int kvm_vfio_group_del(struct kvm_device *dev, unsigned int fd)
 
 		list_del(&kvg->node);
 		kvm_arch_end_assignment(dev->kvm);
-		kvm_spapr_tce_release_vfio_group(dev->kvm, kvg->vfio_group);
+		kvm_spapr_tce_release_vfio_group(dev->kvm, kvg);
 		kvm_vfio_group_set_kvm(kvg->vfio_group, NULL);
 		kvm_vfio_group_put_external_user(kvg->vfio_group);
 		fput(kvg->filp);
@@ -306,10 +273,8 @@ static int kvm_vfio_group_set_spapr_tce(struct kvm_device *dev,
 {
 	struct kvm_vfio_spapr_tce param;
 	struct kvm_vfio *kv = dev->private;
-	struct vfio_group *vfio_group;
 	struct kvm_vfio_group *kvg;
 	struct fd f;
-	struct iommu_group *grp;
 	int ret;
 
 	if (!IS_ENABLED(CONFIG_SPAPR_TCE_IOMMU))
@@ -322,18 +287,6 @@ static int kvm_vfio_group_set_spapr_tce(struct kvm_device *dev,
 	if (!f.file)
 		return -EBADF;
 
-	vfio_group = kvm_vfio_group_get_external_user(f.file);
-	if (IS_ERR(vfio_group)) {
-		ret = PTR_ERR(vfio_group);
-		goto err_fdput;
-	}
-
-	grp = kvm_vfio_group_get_iommu_group(vfio_group);
-	if (WARN_ON_ONCE(!grp)) {
-		ret = -EIO;
-		goto err_put_external;
-	}
-
 	ret = -ENOENT;
 
 	mutex_lock(&kv->lock);
@@ -341,18 +294,13 @@ static int kvm_vfio_group_set_spapr_tce(struct kvm_device *dev,
 	list_for_each_entry(kvg, &kv->group_list, node) {
 		if (kvg->filp != f.file)
 			continue;
-
-		ret = kvm_spapr_tce_attach_iommu_group(dev->kvm, param.tablefd,
-						       grp);
+		ret = kvm_spapr_tce_attach_iommu_group(
+			dev->kvm, param.tablefd,
+			kvg->ops->get_iommu_group(kvg->filp));
 		break;
 	}
 
 	mutex_unlock(&kv->lock);
-
-	iommu_group_put(grp);
-err_put_external:
-	kvm_vfio_group_put_external_user(vfio_group);
-err_fdput:
 	fdput(f);
 	return ret;
 }
@@ -418,7 +366,7 @@ static void kvm_vfio_destroy(struct kvm_device *dev)
 	struct kvm_vfio_group *kvg, *tmp;
 
 	list_for_each_entry_safe(kvg, tmp, &kv->group_list, node) {
-		kvm_spapr_tce_release_vfio_group(dev->kvm, kvg->vfio_group);
+		kvm_spapr_tce_release_vfio_group(dev->kvm, kvg);
 		kvm_vfio_group_set_kvm(kvg->vfio_group, NULL);
 		kvm_vfio_group_put_external_user(kvg->vfio_group);
 		fput(kvg->filp);
-- 
2.35.1


^ permalink raw reply related	[flat|nested] 47+ messages in thread

* [PATCH 06/10] vfio: Remove vfio_external_group_match_file()
  2022-04-14 18:45 [PATCH 00/10] Remove vfio_group from the struct file facing VFIO API Jason Gunthorpe
                   ` (4 preceding siblings ...)
  2022-04-14 18:46 ` [PATCH 05/10] vfio: Move vfio_external_user_iommu_id() to vfio_file_ops Jason Gunthorpe
@ 2022-04-14 18:46 ` Jason Gunthorpe
  2022-04-15  4:02   ` Tian, Kevin
  2022-04-15  7:32   ` Christoph Hellwig
  2022-04-14 18:46 ` [PATCH 07/10] vfio: Move vfio_external_check_extension() to vfio_file_ops Jason Gunthorpe
                   ` (3 subsequent siblings)
  9 siblings, 2 replies; 47+ messages in thread
From: Jason Gunthorpe @ 2022-04-14 18:46 UTC (permalink / raw)
  To: Alex Williamson, Cornelia Huck, kvm, Paolo Bonzini
  Cc: Eric Auger, Christoph Hellwig, Yi Liu

vfio_group_fops_open() ensures there is only ever one struct file open for
any struct vfio_group at any time:

	/* Do we need multiple instances of the group open?  Seems not. */
	opened = atomic_cmpxchg(&group->opened, 0, 1);
	if (opened) {
		vfio_group_put(group);
		return -EBUSY;

Therefor the struct file * can be used directly to search the list of VFIO
groups that KVM keeps instead of using the
vfio_external_group_match_file() callback to try to figure out if the
passed in FD matches the list or not.

Delete vfio_external_group_match_file().

Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/vfio/vfio.c  |  9 ---------
 include/linux/vfio.h |  2 --
 virt/kvm/vfio.c      | 19 +------------------
 3 files changed, 1 insertion(+), 29 deletions(-)

diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index 4d62de69705573..eb65b4c80ece64 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -1989,15 +1989,6 @@ void vfio_group_put_external_user(struct vfio_group *group)
 }
 EXPORT_SYMBOL_GPL(vfio_group_put_external_user);
 
-bool vfio_external_group_match_file(struct vfio_group *test_group,
-				    struct file *filep)
-{
-	struct vfio_group *group = filep->private_data;
-
-	return (filep->f_op == &vfio_group_fops) && (group == test_group);
-}
-EXPORT_SYMBOL_GPL(vfio_external_group_match_file);
-
 /**
  * vfio_file_iommu_group - Return the struct iommu_group for the vfio file
  * @filep: VFIO file
diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index e5ca7d5a0f1584..d09a1856d4e5ea 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -145,8 +145,6 @@ extern struct vfio_group *vfio_group_get_external_user(struct file *filep);
 extern void vfio_group_put_external_user(struct vfio_group *group);
 extern struct vfio_group *vfio_group_get_external_user_from_dev(struct device
 								*dev);
-extern bool vfio_external_group_match_file(struct vfio_group *group,
-					   struct file *filep);
 extern long vfio_external_check_extension(struct vfio_group *group,
 					  unsigned long arg);
 const struct vfio_file_ops *vfio_file_get_ops(struct file *filep);
diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c
index 743e4870fa1825..955cabc0683b29 100644
--- a/virt/kvm/vfio.c
+++ b/virt/kvm/vfio.c
@@ -60,22 +60,6 @@ static struct vfio_group *kvm_vfio_group_get_external_user(struct file *filep)
 	return vfio_group;
 }
 
-static bool kvm_vfio_external_group_match_file(struct vfio_group *group,
-					       struct file *filep)
-{
-	bool ret, (*fn)(struct vfio_group *, struct file *);
-
-	fn = symbol_get(vfio_external_group_match_file);
-	if (!fn)
-		return false;
-
-	ret = fn(group, filep);
-
-	symbol_put(vfio_external_group_match_file);
-
-	return ret;
-}
-
 static void kvm_vfio_group_put_external_user(struct vfio_group *vfio_group)
 {
 	void (*fn)(struct vfio_group *);
@@ -244,8 +228,7 @@ static int kvm_vfio_group_del(struct kvm_device *dev, unsigned int fd)
 	mutex_lock(&kv->lock);
 
 	list_for_each_entry(kvg, &kv->group_list, node) {
-		if (!kvm_vfio_external_group_match_file(kvg->vfio_group,
-							f.file))
+		if (kvg->filp != f.file)
 			continue;
 
 		list_del(&kvg->node);
-- 
2.35.1


^ permalink raw reply related	[flat|nested] 47+ messages in thread

* [PATCH 07/10] vfio: Move vfio_external_check_extension() to vfio_file_ops
  2022-04-14 18:45 [PATCH 00/10] Remove vfio_group from the struct file facing VFIO API Jason Gunthorpe
                   ` (5 preceding siblings ...)
  2022-04-14 18:46 ` [PATCH 06/10] vfio: Remove vfio_external_group_match_file() Jason Gunthorpe
@ 2022-04-14 18:46 ` Jason Gunthorpe
  2022-04-15  4:07   ` Tian, Kevin
  2022-04-15  4:48   ` Christoph Hellwig
  2022-04-14 18:46 ` [PATCH 08/10] vfio: Move vfio_group_set_kvm() into vfio_file_ops Jason Gunthorpe
                   ` (2 subsequent siblings)
  9 siblings, 2 replies; 47+ messages in thread
From: Jason Gunthorpe @ 2022-04-14 18:46 UTC (permalink / raw)
  To: Alex Williamson, Cornelia Huck, kvm, Paolo Bonzini
  Cc: Eric Auger, Christoph Hellwig, Yi Liu

Focus the new op into is_enforced_coherent() which only checks the
enforced DMA coherency property of the file.

Make the new op self contained by properly refcounting the container
before touching it.

Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/vfio/vfio.c  | 27 ++++++++++++++++++++++++---
 include/linux/vfio.h |  3 +--
 virt/kvm/vfio.c      | 18 +-----------------
 3 files changed, 26 insertions(+), 22 deletions(-)

diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index eb65b4c80ece64..c08093fb6d28d5 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -2003,14 +2003,35 @@ static struct iommu_group *vfio_file_iommu_group(struct file *filep)
 	return group->iommu_group;
 }
 
-long vfio_external_check_extension(struct vfio_group *group, unsigned long arg)
+/**
+ * vfio_file_enforced_coherent - True if the DMA associated with the VFIO file
+ *        is always CPU cache coherent
+ * @filep: VFIO file
+ *
+ * Enforced coherent means that the IOMMU ignores things like the PCIe no-snoop
+ * bit in DMA transactions. A return of false indicates that the user has
+ * rights to access additional instructions such as wbinvd on x86.
+ */
+static bool vfio_file_enforced_coherent(struct file *filep)
 {
-	return vfio_ioctl_check_extension(group->container, arg);
+	struct vfio_group *group = filep->private_data;
+	bool ret;
+
+	/*
+	 * Since the coherency state is determined only once a container is
+	 * attached the user must do so before they can prove they have
+	 * permission.
+	 */
+	if (vfio_group_add_container_user(group))
+		return true;
+	ret = vfio_ioctl_check_extension(group->container, VFIO_DMA_CC_IOMMU);
+	vfio_group_try_dissolve_container(group);
+	return ret;
 }
-EXPORT_SYMBOL_GPL(vfio_external_check_extension);
 
 static const struct vfio_file_ops vfio_file_group_ops = {
 	.get_iommu_group = vfio_file_iommu_group,
+	.is_enforced_coherent = vfio_file_enforced_coherent,
 };
 
 /**
diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index d09a1856d4e5ea..b1583eb80f12e6 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -140,13 +140,12 @@ int vfio_mig_get_next_state(struct vfio_device *device,
  */
 struct vfio_file_ops {
 	struct iommu_group *(*get_iommu_group)(struct file *filep);
+	bool (*is_enforced_coherent)(struct file *filep);
 };
 extern struct vfio_group *vfio_group_get_external_user(struct file *filep);
 extern void vfio_group_put_external_user(struct vfio_group *group);
 extern struct vfio_group *vfio_group_get_external_user_from_dev(struct device
 								*dev);
-extern long vfio_external_check_extension(struct vfio_group *group,
-					  unsigned long arg);
 const struct vfio_file_ops *vfio_file_get_ops(struct file *filep);
 
 #define VFIO_PIN_PAGES_MAX_ENTRIES	(PAGE_SIZE/sizeof(unsigned long))
diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c
index 955cabc0683b29..f5ef78192a97ab 100644
--- a/virt/kvm/vfio.c
+++ b/virt/kvm/vfio.c
@@ -86,22 +86,6 @@ static void kvm_vfio_group_set_kvm(struct vfio_group *group, struct kvm *kvm)
 	symbol_put(vfio_group_set_kvm);
 }
 
-static bool kvm_vfio_group_is_coherent(struct vfio_group *vfio_group)
-{
-	long (*fn)(struct vfio_group *, unsigned long);
-	long ret;
-
-	fn = symbol_get(vfio_external_check_extension);
-	if (!fn)
-		return false;
-
-	ret = fn(vfio_group, VFIO_DMA_CC_IOMMU);
-
-	symbol_put(vfio_external_check_extension);
-
-	return ret > 0;
-}
-
 static void kvm_spapr_tce_release_vfio_group(struct kvm *kvm,
 					     struct kvm_vfio_group *kvg)
 {
@@ -128,7 +112,7 @@ static void kvm_vfio_update_coherency(struct kvm_device *dev)
 	mutex_lock(&kv->lock);
 
 	list_for_each_entry(kvg, &kv->group_list, node) {
-		if (!kvm_vfio_group_is_coherent(kvg->vfio_group)) {
+		if (!kvg->ops->is_enforced_coherent(kvg->filp)) {
 			noncoherent = true;
 			break;
 		}
-- 
2.35.1


^ permalink raw reply related	[flat|nested] 47+ messages in thread

* [PATCH 08/10] vfio: Move vfio_group_set_kvm() into vfio_file_ops
  2022-04-14 18:45 [PATCH 00/10] Remove vfio_group from the struct file facing VFIO API Jason Gunthorpe
                   ` (6 preceding siblings ...)
  2022-04-14 18:46 ` [PATCH 07/10] vfio: Move vfio_external_check_extension() to vfio_file_ops Jason Gunthorpe
@ 2022-04-14 18:46 ` Jason Gunthorpe
  2022-04-15  4:09   ` Tian, Kevin
  2022-04-14 18:46 ` [PATCH 09/10] kvm/vfio: Remove vfio_group from kvm Jason Gunthorpe
  2022-04-14 18:46 ` [PATCH 10/10] vfio/pci: Use the struct file as the handle not the vfio_group Jason Gunthorpe
  9 siblings, 1 reply; 47+ messages in thread
From: Jason Gunthorpe @ 2022-04-14 18:46 UTC (permalink / raw)
  To: Alex Williamson, Cornelia Huck, kvm, Paolo Bonzini
  Cc: Eric Auger, Christoph Hellwig, Yi Liu

Just move the exported function into the op and have it accept the struct
file instead of vfio_group.

Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/vfio/vfio.c  | 26 ++++++++++++++++++--------
 include/linux/vfio.h |  5 +++--
 virt/kvm/vfio.c      | 19 +++----------------
 3 files changed, 24 insertions(+), 26 deletions(-)

diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index c08093fb6d28d5..2eb63d9dded8fb 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -2029,9 +2029,27 @@ static bool vfio_file_enforced_coherent(struct file *filep)
 	return ret;
 }
 
+/**
+ * vfio_file_set_kvm - Link a kvm with VFIO drivers
+ * @filep: VFIO file
+ * @kvm: KVM to link
+ *
+ * The kvm pointer will be forwarded to all the vfio_device's attached to the
+ * VFIO file via the VFIO_GROUP_NOTIFY_SET_KVM notifier.
+ */
+static void vfio_file_set_kvm(struct file *filep, struct kvm *kvm)
+{
+	struct vfio_group *group = filep->private_data;
+
+	group->kvm = kvm;
+	blocking_notifier_call_chain(&group->notifier,
+				     VFIO_GROUP_NOTIFY_SET_KVM, kvm);
+}
+
 static const struct vfio_file_ops vfio_file_group_ops = {
 	.get_iommu_group = vfio_file_iommu_group,
 	.is_enforced_coherent = vfio_file_enforced_coherent,
+	.set_kvm = vfio_file_set_kvm,
 };
 
 /**
@@ -2461,14 +2479,6 @@ static int vfio_unregister_iommu_notifier(struct vfio_group *group,
 	return ret;
 }
 
-void vfio_group_set_kvm(struct vfio_group *group, struct kvm *kvm)
-{
-	group->kvm = kvm;
-	blocking_notifier_call_chain(&group->notifier,
-				VFIO_GROUP_NOTIFY_SET_KVM, kvm);
-}
-EXPORT_SYMBOL_GPL(vfio_group_set_kvm);
-
 static int vfio_register_group_notifier(struct vfio_group *group,
 					unsigned long *events,
 					struct notifier_block *nb)
diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index b1583eb80f12e6..bc6e47f3f26560 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -15,6 +15,8 @@
 #include <linux/poll.h>
 #include <uapi/linux/vfio.h>
 
+struct kvm;
+
 /*
  * VFIO devices can be placed in a set, this allows all devices to share this
  * structure and the VFIO core will provide a lock that is held around
@@ -141,6 +143,7 @@ int vfio_mig_get_next_state(struct vfio_device *device,
 struct vfio_file_ops {
 	struct iommu_group *(*get_iommu_group)(struct file *filep);
 	bool (*is_enforced_coherent)(struct file *filep);
+	void (*set_kvm)(struct file *filep, struct kvm *kvm);
 };
 extern struct vfio_group *vfio_group_get_external_user(struct file *filep);
 extern void vfio_group_put_external_user(struct vfio_group *group);
@@ -186,8 +189,6 @@ extern int vfio_unregister_notifier(struct device *dev,
 				    enum vfio_notify_type type,
 				    struct notifier_block *nb);
 
-struct kvm;
-extern void vfio_group_set_kvm(struct vfio_group *group, struct kvm *kvm);
 
 /*
  * Sub-module helpers
diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c
index f5ef78192a97ab..9baf04c5b0cc3d 100644
--- a/virt/kvm/vfio.c
+++ b/virt/kvm/vfio.c
@@ -73,19 +73,6 @@ static void kvm_vfio_group_put_external_user(struct vfio_group *vfio_group)
 	symbol_put(vfio_group_put_external_user);
 }
 
-static void kvm_vfio_group_set_kvm(struct vfio_group *group, struct kvm *kvm)
-{
-	void (*fn)(struct vfio_group *, struct kvm *);
-
-	fn = symbol_get(vfio_group_set_kvm);
-	if (!fn)
-		return;
-
-	fn(group, kvm);
-
-	symbol_put(vfio_group_set_kvm);
-}
-
 static void kvm_spapr_tce_release_vfio_group(struct kvm *kvm,
 					     struct kvm_vfio_group *kvg)
 {
@@ -184,7 +171,7 @@ static int kvm_vfio_group_add(struct kvm_device *dev, unsigned int fd)
 
 	mutex_unlock(&kv->lock);
 
-	kvm_vfio_group_set_kvm(vfio_group, dev->kvm);
+	kvg->ops->set_kvm(kvg->filp, dev->kvm);
 	kvm_vfio_update_coherency(dev);
 
 	return 0;
@@ -218,7 +205,7 @@ static int kvm_vfio_group_del(struct kvm_device *dev, unsigned int fd)
 		list_del(&kvg->node);
 		kvm_arch_end_assignment(dev->kvm);
 		kvm_spapr_tce_release_vfio_group(dev->kvm, kvg);
-		kvm_vfio_group_set_kvm(kvg->vfio_group, NULL);
+		kvg->ops->set_kvm(kvg->filp, NULL);
 		kvm_vfio_group_put_external_user(kvg->vfio_group);
 		fput(kvg->filp);
 		kfree(kvg);
@@ -334,7 +321,7 @@ static void kvm_vfio_destroy(struct kvm_device *dev)
 
 	list_for_each_entry_safe(kvg, tmp, &kv->group_list, node) {
 		kvm_spapr_tce_release_vfio_group(dev->kvm, kvg);
-		kvm_vfio_group_set_kvm(kvg->vfio_group, NULL);
+		kvg->ops->set_kvm(kvg->filp, NULL);
 		kvm_vfio_group_put_external_user(kvg->vfio_group);
 		fput(kvg->filp);
 		list_del(&kvg->node);
-- 
2.35.1


^ permalink raw reply related	[flat|nested] 47+ messages in thread

* [PATCH 09/10] kvm/vfio: Remove vfio_group from kvm
  2022-04-14 18:45 [PATCH 00/10] Remove vfio_group from the struct file facing VFIO API Jason Gunthorpe
                   ` (7 preceding siblings ...)
  2022-04-14 18:46 ` [PATCH 08/10] vfio: Move vfio_group_set_kvm() into vfio_file_ops Jason Gunthorpe
@ 2022-04-14 18:46 ` Jason Gunthorpe
  2022-04-15  4:21   ` Tian, Kevin
  2022-04-14 18:46 ` [PATCH 10/10] vfio/pci: Use the struct file as the handle not the vfio_group Jason Gunthorpe
  9 siblings, 1 reply; 47+ messages in thread
From: Jason Gunthorpe @ 2022-04-14 18:46 UTC (permalink / raw)
  To: Alex Williamson, Cornelia Huck, kvm, Paolo Bonzini
  Cc: Eric Auger, Christoph Hellwig, Yi Liu

None of the VFIO APIs take in the vfio_group anymore, so we can remove it
completely.

This has a subtle side effect on the enforced coherency tracking. The
vfio_group_get_external_user() was holding on to the container_users which
would prevent the iommu_domain and thus the enforced coherency value from
changing while the group is registered with kvm.

It changes the security proof slightly into 'user must hold a group FD
that has a device that cannot enforce DMA coherence'. As opening the group
FD, not attaching the container, is the privileged operation this doesn't
change the security properties much.

On the flip side it paves the way to changing the iommu_domain/container
attached to a group at runtime which is something that will be required to
support nested translation.

Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 virt/kvm/vfio.c | 40 ----------------------------------------
 1 file changed, 40 deletions(-)

diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c
index 9baf04c5b0cc3d..39834a0653d83a 100644
--- a/virt/kvm/vfio.c
+++ b/virt/kvm/vfio.c
@@ -35,7 +35,6 @@ struct kvm_vfio_group {
 	struct list_head node;
 	struct file *filp;
 	const struct vfio_file_ops *ops;
-	struct vfio_group *vfio_group;
 };
 
 struct kvm_vfio {
@@ -44,35 +43,6 @@ struct kvm_vfio {
 	bool noncoherent;
 };
 
-static struct vfio_group *kvm_vfio_group_get_external_user(struct file *filep)
-{
-	struct vfio_group *vfio_group;
-	struct vfio_group *(*fn)(struct file *);
-
-	fn = symbol_get(vfio_group_get_external_user);
-	if (!fn)
-		return ERR_PTR(-EINVAL);
-
-	vfio_group = fn(filep);
-
-	symbol_put(vfio_group_get_external_user);
-
-	return vfio_group;
-}
-
-static void kvm_vfio_group_put_external_user(struct vfio_group *vfio_group)
-{
-	void (*fn)(struct vfio_group *);
-
-	fn = symbol_get(vfio_group_put_external_user);
-	if (!fn)
-		return;
-
-	fn(vfio_group);
-
-	symbol_put(vfio_group_put_external_user);
-}
-
 static void kvm_spapr_tce_release_vfio_group(struct kvm *kvm,
 					     struct kvm_vfio_group *kvg)
 {
@@ -121,7 +91,6 @@ static int kvm_vfio_group_add(struct kvm_device *dev, unsigned int fd)
 {
 	const struct vfio_file_ops *(*fn)(struct file *filep);
 	struct kvm_vfio *kv = dev->private;
-	struct vfio_group *vfio_group;
 	struct kvm_vfio_group *kvg;
 	struct file *filp;
 	int ret;
@@ -157,15 +126,8 @@ static int kvm_vfio_group_add(struct kvm_device *dev, unsigned int fd)
 		goto err_free;
 	}
 
-	vfio_group = kvm_vfio_group_get_external_user(filp);
-	if (IS_ERR(vfio_group)) {
-		ret = PTR_ERR(vfio_group);
-		goto err_free;
-	}
-
 	kvg->filp = filp;
 	list_add_tail(&kvg->node, &kv->group_list);
-	kvg->vfio_group = vfio_group;
 
 	kvm_arch_start_assignment(dev->kvm);
 
@@ -206,7 +168,6 @@ static int kvm_vfio_group_del(struct kvm_device *dev, unsigned int fd)
 		kvm_arch_end_assignment(dev->kvm);
 		kvm_spapr_tce_release_vfio_group(dev->kvm, kvg);
 		kvg->ops->set_kvm(kvg->filp, NULL);
-		kvm_vfio_group_put_external_user(kvg->vfio_group);
 		fput(kvg->filp);
 		kfree(kvg);
 		ret = 0;
@@ -322,7 +283,6 @@ static void kvm_vfio_destroy(struct kvm_device *dev)
 	list_for_each_entry_safe(kvg, tmp, &kv->group_list, node) {
 		kvm_spapr_tce_release_vfio_group(dev->kvm, kvg);
 		kvg->ops->set_kvm(kvg->filp, NULL);
-		kvm_vfio_group_put_external_user(kvg->vfio_group);
 		fput(kvg->filp);
 		list_del(&kvg->node);
 		kfree(kvg);
-- 
2.35.1


^ permalink raw reply related	[flat|nested] 47+ messages in thread

* [PATCH 10/10] vfio/pci: Use the struct file as the handle not the vfio_group
  2022-04-14 18:45 [PATCH 00/10] Remove vfio_group from the struct file facing VFIO API Jason Gunthorpe
                   ` (8 preceding siblings ...)
  2022-04-14 18:46 ` [PATCH 09/10] kvm/vfio: Remove vfio_group from kvm Jason Gunthorpe
@ 2022-04-14 18:46 ` Jason Gunthorpe
  9 siblings, 0 replies; 47+ messages in thread
From: Jason Gunthorpe @ 2022-04-14 18:46 UTC (permalink / raw)
  To: Alex Williamson, Cornelia Huck, kvm, Paolo Bonzini
  Cc: Eric Auger, Christoph Hellwig, Yi Liu

VFIO PCI does a security check as part of hot reset to prove that the user
has permission to manipulate all the devices that will be impacted by the
reset.

Use a new API vfio_file_has_dev() to perform this security check against
the struct file directly and remove the vfio_group from VFIO PCI.

Since VFIO PCI was the last user of vfio_group_get_external_user() remove
it as well.

Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/vfio/pci/vfio_pci_core.c | 43 +++++++++++-----------
 drivers/vfio/vfio.c              | 63 +++++++++-----------------------
 include/linux/vfio.h             |  2 +-
 3 files changed, 41 insertions(+), 67 deletions(-)

diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c
index b7bb16f92ac628..ea584934683848 100644
--- a/drivers/vfio/pci/vfio_pci_core.c
+++ b/drivers/vfio/pci/vfio_pci_core.c
@@ -577,7 +577,7 @@ static int vfio_pci_fill_devs(struct pci_dev *pdev, void *data)
 
 struct vfio_pci_group_info {
 	int count;
-	struct vfio_group **groups;
+	struct file **files;
 };
 
 static bool vfio_pci_dev_below_slot(struct pci_dev *pdev, struct pci_slot *slot)
@@ -1039,10 +1039,10 @@ long vfio_pci_core_ioctl(struct vfio_device *core_vdev, unsigned int cmd,
 	} else if (cmd == VFIO_DEVICE_PCI_HOT_RESET) {
 		struct vfio_pci_hot_reset hdr;
 		int32_t *group_fds;
-		struct vfio_group **groups;
+		struct file **files;
 		struct vfio_pci_group_info info;
 		bool slot = false;
-		int group_idx, count = 0, ret = 0;
+		int file_idx, count = 0, ret = 0;
 
 		minsz = offsetofend(struct vfio_pci_hot_reset, count);
 
@@ -1075,17 +1075,17 @@ long vfio_pci_core_ioctl(struct vfio_device *core_vdev, unsigned int cmd,
 			return -EINVAL;
 
 		group_fds = kcalloc(hdr.count, sizeof(*group_fds), GFP_KERNEL);
-		groups = kcalloc(hdr.count, sizeof(*groups), GFP_KERNEL);
-		if (!group_fds || !groups) {
+		files = kcalloc(hdr.count, sizeof(*files), GFP_KERNEL);
+		if (!group_fds || !files) {
 			kfree(group_fds);
-			kfree(groups);
+			kfree(files);
 			return -ENOMEM;
 		}
 
 		if (copy_from_user(group_fds, (void __user *)(arg + minsz),
 				   hdr.count * sizeof(*group_fds))) {
 			kfree(group_fds);
-			kfree(groups);
+			kfree(files);
 			return -EFAULT;
 		}
 
@@ -1094,22 +1094,23 @@ long vfio_pci_core_ioctl(struct vfio_device *core_vdev, unsigned int cmd,
 		 * user interface and store the group and iommu ID.  This
 		 * ensures the group is held across the reset.
 		 */
-		for (group_idx = 0; group_idx < hdr.count; group_idx++) {
-			struct vfio_group *group;
-			struct fd f = fdget(group_fds[group_idx]);
-			if (!f.file) {
+		for (file_idx = 0; file_idx < hdr.count; file_idx++) {
+			struct file *file = fget(group_fds[file_idx]);
+			const struct vfio_file_ops *ops;
+
+			if (!file) {
 				ret = -EBADF;
 				break;
 			}
 
-			group = vfio_group_get_external_user(f.file);
-			fdput(f);
-			if (IS_ERR(group)) {
-				ret = PTR_ERR(group);
+			ops = vfio_file_get_ops(file);
+			if (IS_ERR(ops)) {
+				fput(file);
+				ret = PTR_ERR(ops);
 				break;
 			}
 
-			groups[group_idx] = group;
+			files[file_idx] = file;
 		}
 
 		kfree(group_fds);
@@ -1119,15 +1120,15 @@ long vfio_pci_core_ioctl(struct vfio_device *core_vdev, unsigned int cmd,
 			goto hot_reset_release;
 
 		info.count = hdr.count;
-		info.groups = groups;
+		info.files = files;
 
 		ret = vfio_pci_dev_set_hot_reset(vdev->vdev.dev_set, &info);
 
 hot_reset_release:
-		for (group_idx--; group_idx >= 0; group_idx--)
-			vfio_group_put_external_user(groups[group_idx]);
+		for (file_idx--; file_idx >= 0; file_idx--)
+			fput(files[file_idx]);
 
-		kfree(groups);
+		kfree(files);
 		return ret;
 	} else if (cmd == VFIO_DEVICE_IOEVENTFD) {
 		struct vfio_device_ioeventfd ioeventfd;
@@ -1964,7 +1965,7 @@ static bool vfio_dev_in_groups(struct vfio_pci_core_device *vdev,
 	unsigned int i;
 
 	for (i = 0; i < groups->count; i++)
-		if (groups->groups[i] == vdev->vdev.group)
+		if (vfio_file_has_dev(groups->files[i], &vdev->vdev))
 			return true;
 	return false;
 }
diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index 2eb63d9dded8fb..69772105ec43e8 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -1899,51 +1899,6 @@ static const struct file_operations vfio_device_fops = {
 	.mmap		= vfio_device_fops_mmap,
 };
 
-/*
- * External user API, exported by symbols to be linked dynamically.
- *
- * The protocol includes:
- *  1. do normal VFIO init operation:
- *	- opening a new container;
- *	- attaching group(s) to it;
- *	- setting an IOMMU driver for a container.
- * When IOMMU is set for a container, all groups in it are
- * considered ready to use by an external user.
- *
- * 2. User space passes a group fd to an external user.
- * The external user calls vfio_group_get_external_user()
- * to verify that:
- *	- the group is initialized;
- *	- IOMMU is set for it.
- * If both checks passed, vfio_group_get_external_user()
- * increments the container user counter to prevent
- * the VFIO group from disposal before KVM exits.
- *
- * 3. When the external KVM finishes, it calls
- * vfio_group_put_external_user() to release the VFIO group.
- * This call decrements the container user counter.
- */
-struct vfio_group *vfio_group_get_external_user(struct file *filep)
-{
-	struct vfio_group *group = filep->private_data;
-	int ret;
-
-	if (filep->f_op != &vfio_group_fops)
-		return ERR_PTR(-EINVAL);
-
-	ret = vfio_group_add_container_user(group);
-	if (ret)
-		return ERR_PTR(ret);
-
-	/*
-	 * Since the caller holds the fget on the file group->users must be >= 1
-	 */
-	vfio_group_get(group);
-
-	return group;
-}
-EXPORT_SYMBOL_GPL(vfio_group_get_external_user);
-
 /*
  * External user API, exported by symbols to be linked dynamically.
  * The external user passes in a device pointer
@@ -2068,6 +2023,24 @@ const struct vfio_file_ops *vfio_file_get_ops(struct file *filep)
 }
 EXPORT_SYMBOL_GPL(vfio_file_get_ops);
 
+/**
+ * vfio_file_has_dev - True if the VFIO file is a handle for device
+ * @filep: VFIO file to check
+ * @device: Device that must be part of the file
+ *
+ * Returns true if given file has permission to manipulate the given device.
+ */
+bool vfio_file_has_dev(struct file *filep, struct vfio_device *device)
+{
+	struct vfio_group *group = filep->private_data;
+
+	if (filep->f_op != &vfio_group_fops)
+		return false;
+
+	return group == device->group;
+}
+EXPORT_SYMBOL_GPL(vfio_file_has_dev);
+
 /*
  * Sub-module support
  */
diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index bc6e47f3f26560..ea254283fa0522 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -145,11 +145,11 @@ struct vfio_file_ops {
 	bool (*is_enforced_coherent)(struct file *filep);
 	void (*set_kvm)(struct file *filep, struct kvm *kvm);
 };
-extern struct vfio_group *vfio_group_get_external_user(struct file *filep);
 extern void vfio_group_put_external_user(struct vfio_group *group);
 extern struct vfio_group *vfio_group_get_external_user_from_dev(struct device
 								*dev);
 const struct vfio_file_ops *vfio_file_get_ops(struct file *filep);
+bool vfio_file_has_dev(struct file *filep, struct vfio_device *device);
 
 #define VFIO_PIN_PAGES_MAX_ENTRIES	(PAGE_SIZE/sizeof(unsigned long))
 
-- 
2.35.1


^ permalink raw reply related	[flat|nested] 47+ messages in thread

* RE: [PATCH 01/10] kvm/vfio: Move KVM_DEV_VFIO_GROUP_* ioctls into functions
  2022-04-14 18:46 ` [PATCH 01/10] kvm/vfio: Move KVM_DEV_VFIO_GROUP_* ioctls into functions Jason Gunthorpe
@ 2022-04-15  3:36   ` Tian, Kevin
  2022-04-15  7:18   ` Christoph Hellwig
  1 sibling, 0 replies; 47+ messages in thread
From: Tian, Kevin @ 2022-04-15  3:36 UTC (permalink / raw)
  To: Jason Gunthorpe, Alex Williamson, Cornelia Huck, kvm, Paolo Bonzini
  Cc: Eric Auger, Christoph Hellwig, Liu, Yi L

> From: Jason Gunthorpe <jgg@nvidia.com>
> Sent: Friday, April 15, 2022 2:46 AM
> 
> To make it easier to read and change in following patches.
> 
> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>

Reviewed-by: Kevin Tian <kevin.tian@intel.com>

> ---
>  virt/kvm/vfio.c | 271 ++++++++++++++++++++++++++----------------------
>  1 file changed, 146 insertions(+), 125 deletions(-)
> 
> This is best viewed using 'git diff -b' to ignore the whitespace change.
> 
> diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c
> index 8fcbc50221c2d2..a1167ab7a2246f 100644
> --- a/virt/kvm/vfio.c
> +++ b/virt/kvm/vfio.c
> @@ -181,149 +181,170 @@ static void kvm_vfio_update_coherency(struct
> kvm_device *dev)
>  	mutex_unlock(&kv->lock);
>  }
> 
> -static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg)
> +static int kvm_vfio_group_add(struct kvm_device *dev, unsigned int fd)
>  {
>  	struct kvm_vfio *kv = dev->private;
>  	struct vfio_group *vfio_group;
>  	struct kvm_vfio_group *kvg;
> -	int32_t __user *argp = (int32_t __user *)(unsigned long)arg;
>  	struct fd f;
> -	int32_t fd;
>  	int ret;
> 
> +	f = fdget(fd);
> +	if (!f.file)
> +		return -EBADF;
> +
> +	vfio_group = kvm_vfio_group_get_external_user(f.file);
> +	fdput(f);
> +
> +	if (IS_ERR(vfio_group))
> +		return PTR_ERR(vfio_group);
> +
> +	mutex_lock(&kv->lock);
> +
> +	list_for_each_entry(kvg, &kv->group_list, node) {
> +		if (kvg->vfio_group == vfio_group) {
> +			ret = -EEXIST;
> +			goto err_unlock;
> +		}
> +	}
> +
> +	kvg = kzalloc(sizeof(*kvg), GFP_KERNEL_ACCOUNT);
> +	if (!kvg) {
> +		ret = -ENOMEM;
> +		goto err_unlock;
> +	}
> +
> +	list_add_tail(&kvg->node, &kv->group_list);
> +	kvg->vfio_group = vfio_group;
> +
> +	kvm_arch_start_assignment(dev->kvm);
> +
> +	mutex_unlock(&kv->lock);
> +
> +	kvm_vfio_group_set_kvm(vfio_group, dev->kvm);
> +	kvm_vfio_update_coherency(dev);
> +
> +	return 0;
> +err_unlock:
> +	mutex_unlock(&kv->lock);
> +	kvm_vfio_group_put_external_user(vfio_group);
> +	return ret;
> +}
> +
> +static int kvm_vfio_group_del(struct kvm_device *dev, unsigned int fd)
> +{
> +	struct kvm_vfio *kv = dev->private;
> +	struct kvm_vfio_group *kvg;
> +	struct fd f;
> +	int ret;
> +
> +	f = fdget(fd);
> +	if (!f.file)
> +		return -EBADF;
> +
> +	ret = -ENOENT;
> +
> +	mutex_lock(&kv->lock);
> +
> +	list_for_each_entry(kvg, &kv->group_list, node) {
> +		if (!kvm_vfio_external_group_match_file(kvg->vfio_group,
> +							f.file))
> +			continue;
> +
> +		list_del(&kvg->node);
> +		kvm_arch_end_assignment(dev->kvm);
> +#ifdef CONFIG_SPAPR_TCE_IOMMU
> +		kvm_spapr_tce_release_vfio_group(dev->kvm, kvg-
> >vfio_group);
> +#endif
> +		kvm_vfio_group_set_kvm(kvg->vfio_group, NULL);
> +		kvm_vfio_group_put_external_user(kvg->vfio_group);
> +		kfree(kvg);
> +		ret = 0;
> +		break;
> +	}
> +
> +	mutex_unlock(&kv->lock);
> +
> +	fdput(f);
> +
> +	kvm_vfio_update_coherency(dev);
> +
> +	return ret;
> +}
> +
> +#ifdef CONFIG_SPAPR_TCE_IOMMU
> +static int kvm_vfio_group_set_spapr_tce(struct kvm_device *dev,
> +					void __user *arg)
> +{
> +	struct kvm_vfio_spapr_tce param;
> +	struct kvm_vfio *kv = dev->private;
> +	struct vfio_group *vfio_group;
> +	struct kvm_vfio_group *kvg;
> +	struct fd f;
> +	struct iommu_group *grp;
> +	int ret;
> +
> +	if (copy_from_user(&param, arg, sizeof(struct kvm_vfio_spapr_tce)))
> +		return -EFAULT;
> +
> +	f = fdget(param.groupfd);
> +	if (!f.file)
> +		return -EBADF;
> +
> +	vfio_group = kvm_vfio_group_get_external_user(f.file);
> +	fdput(f);
> +
> +	if (IS_ERR(vfio_group))
> +		return PTR_ERR(vfio_group);
> +
> +	grp = kvm_vfio_group_get_iommu_group(vfio_group);
> +	if (WARN_ON_ONCE(!grp)) {
> +		ret = -EIO;
> +		goto err_put_external;
> +	}
> +
> +	ret = -ENOENT;
> +
> +	mutex_lock(&kv->lock);
> +
> +	list_for_each_entry(kvg, &kv->group_list, node) {
> +		if (kvg->vfio_group != vfio_group)
> +			continue;
> +
> +		ret = kvm_spapr_tce_attach_iommu_group(dev->kvm,
> param.tablefd,
> +						       grp);
> +		break;
> +	}
> +
> +	mutex_unlock(&kv->lock);
> +
> +	iommu_group_put(grp);
> +err_put_external:
> +	kvm_vfio_group_put_external_user(vfio_group);
> +	return ret;
> +}
> +#endif
> +
> +static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg)
> +{
> +	int32_t __user *argp = (int32_t __user *)(unsigned long)arg;
> +	int32_t fd;
> +
>  	switch (attr) {
>  	case KVM_DEV_VFIO_GROUP_ADD:
>  		if (get_user(fd, argp))
>  			return -EFAULT;
> -
> -		f = fdget(fd);
> -		if (!f.file)
> -			return -EBADF;
> -
> -		vfio_group = kvm_vfio_group_get_external_user(f.file);
> -		fdput(f);
> -
> -		if (IS_ERR(vfio_group))
> -			return PTR_ERR(vfio_group);
> -
> -		mutex_lock(&kv->lock);
> -
> -		list_for_each_entry(kvg, &kv->group_list, node) {
> -			if (kvg->vfio_group == vfio_group) {
> -				mutex_unlock(&kv->lock);
> -
> 	kvm_vfio_group_put_external_user(vfio_group);
> -				return -EEXIST;
> -			}
> -		}
> -
> -		kvg = kzalloc(sizeof(*kvg), GFP_KERNEL_ACCOUNT);
> -		if (!kvg) {
> -			mutex_unlock(&kv->lock);
> -			kvm_vfio_group_put_external_user(vfio_group);
> -			return -ENOMEM;
> -		}
> -
> -		list_add_tail(&kvg->node, &kv->group_list);
> -		kvg->vfio_group = vfio_group;
> -
> -		kvm_arch_start_assignment(dev->kvm);
> -
> -		mutex_unlock(&kv->lock);
> -
> -		kvm_vfio_group_set_kvm(vfio_group, dev->kvm);
> -
> -		kvm_vfio_update_coherency(dev);
> -
> -		return 0;
> +		return kvm_vfio_group_add(dev, fd);
> 
>  	case KVM_DEV_VFIO_GROUP_DEL:
>  		if (get_user(fd, argp))
>  			return -EFAULT;
> +		return kvm_vfio_group_del(dev, fd);
> 
> -		f = fdget(fd);
> -		if (!f.file)
> -			return -EBADF;
> -
> -		ret = -ENOENT;
> -
> -		mutex_lock(&kv->lock);
> -
> -		list_for_each_entry(kvg, &kv->group_list, node) {
> -			if (!kvm_vfio_external_group_match_file(kvg-
> >vfio_group,
> -								f.file))
> -				continue;
> -
> -			list_del(&kvg->node);
> -			kvm_arch_end_assignment(dev->kvm);
>  #ifdef CONFIG_SPAPR_TCE_IOMMU
> -			kvm_spapr_tce_release_vfio_group(dev->kvm,
> -							 kvg->vfio_group);
> +	case KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE:
> +		return kvm_vfio_group_set_spapr_tce(dev, (void __user
> *)arg);
>  #endif
> -			kvm_vfio_group_set_kvm(kvg->vfio_group, NULL);
> -			kvm_vfio_group_put_external_user(kvg->vfio_group);
> -			kfree(kvg);
> -			ret = 0;
> -			break;
> -		}
> -
> -		mutex_unlock(&kv->lock);
> -
> -		fdput(f);
> -
> -		kvm_vfio_update_coherency(dev);
> -
> -		return ret;
> -
> -#ifdef CONFIG_SPAPR_TCE_IOMMU
> -	case KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE: {
> -		struct kvm_vfio_spapr_tce param;
> -		struct kvm_vfio *kv = dev->private;
> -		struct vfio_group *vfio_group;
> -		struct kvm_vfio_group *kvg;
> -		struct fd f;
> -		struct iommu_group *grp;
> -
> -		if (copy_from_user(&param, (void __user *)arg,
> -				sizeof(struct kvm_vfio_spapr_tce)))
> -			return -EFAULT;
> -
> -		f = fdget(param.groupfd);
> -		if (!f.file)
> -			return -EBADF;
> -
> -		vfio_group = kvm_vfio_group_get_external_user(f.file);
> -		fdput(f);
> -
> -		if (IS_ERR(vfio_group))
> -			return PTR_ERR(vfio_group);
> -
> -		grp = kvm_vfio_group_get_iommu_group(vfio_group);
> -		if (WARN_ON_ONCE(!grp)) {
> -			kvm_vfio_group_put_external_user(vfio_group);
> -			return -EIO;
> -		}
> -
> -		ret = -ENOENT;
> -
> -		mutex_lock(&kv->lock);
> -
> -		list_for_each_entry(kvg, &kv->group_list, node) {
> -			if (kvg->vfio_group != vfio_group)
> -				continue;
> -
> -			ret = kvm_spapr_tce_attach_iommu_group(dev-
> >kvm,
> -					param.tablefd, grp);
> -			break;
> -		}
> -
> -		mutex_unlock(&kv->lock);
> -
> -		iommu_group_put(grp);
> -		kvm_vfio_group_put_external_user(vfio_group);
> -
> -		return ret;
> -	}
> -#endif /* CONFIG_SPAPR_TCE_IOMMU */
>  	}
> 
>  	return -ENXIO;
> --
> 2.35.1


^ permalink raw reply	[flat|nested] 47+ messages in thread

* RE: [PATCH 03/10] kvm/vfio: Store the struct file in the kvm_vfio_group
  2022-04-14 18:46 ` [PATCH 03/10] kvm/vfio: Store the struct file in the kvm_vfio_group Jason Gunthorpe
@ 2022-04-15  3:44   ` Tian, Kevin
  2022-04-15 22:24     ` Jason Gunthorpe
  2022-04-15  7:20   ` Christoph Hellwig
  1 sibling, 1 reply; 47+ messages in thread
From: Tian, Kevin @ 2022-04-15  3:44 UTC (permalink / raw)
  To: Jason Gunthorpe, Alex Williamson, Cornelia Huck, kvm, Paolo Bonzini
  Cc: Eric Auger, Christoph Hellwig, Liu, Yi L

> From: Jason Gunthorpe <jgg@nvidia.com>
> Sent: Friday, April 15, 2022 2:46 AM
> 
> Following patches will chage the APIs to use the struct file as the handle

s/chage/change

> @@ -304,10 +309,10 @@ static int kvm_vfio_group_set_spapr_tce(struct
> kvm_device *dev,
>  		return -EBADF;
> 
>  	vfio_group = kvm_vfio_group_get_external_user(f.file);
> -	fdput(f);
> -
> -	if (IS_ERR(vfio_group))
> -		return PTR_ERR(vfio_group);
> +	if (IS_ERR(vfio_group)) {
> +		ret = PTR_ERR(vfio_group);
> +		goto err_fdput;
> +	}
> 
>  	grp = kvm_vfio_group_get_iommu_group(vfio_group);
>  	if (WARN_ON_ONCE(!grp)) {

move above two external calls into below loop after file is
matched...

> @@ -320,7 +325,7 @@ static int kvm_vfio_group_set_spapr_tce(struct
> kvm_device *dev,
>  	mutex_lock(&kv->lock);
> 
>  	list_for_each_entry(kvg, &kv->group_list, node) {
> -		if (kvg->vfio_group != vfio_group)
> +		if (kvg->filp != f.file)
>  			continue;
> 

... here. Though they will be removed in later patch doing so at
this patch is slightly more reasonable.

otherwise,

Reviewed-by: Kevin Tian <kevin.tian@intel.com>

^ permalink raw reply	[flat|nested] 47+ messages in thread

* RE: [PATCH 04/10] vfio: Use a struct of function pointers instead of a many symbol_get()'s
  2022-04-14 18:46 ` [PATCH 04/10] vfio: Use a struct of function pointers instead of a many symbol_get()'s Jason Gunthorpe
@ 2022-04-15  3:57   ` Tian, Kevin
  2022-04-15 21:54     ` Jason Gunthorpe
  2022-04-15  4:45   ` Christoph Hellwig
  1 sibling, 1 reply; 47+ messages in thread
From: Tian, Kevin @ 2022-04-15  3:57 UTC (permalink / raw)
  To: Jason Gunthorpe, Alex Williamson, Cornelia Huck, kvm, Paolo Bonzini
  Cc: Eric Auger, Christoph Hellwig, Liu, Yi L

> From: Jason Gunthorpe <jgg@nvidia.com>
> Sent: Friday, April 15, 2022 2:46 AM
> 
> kvm and VFIO need to be coupled together however neither is willing to
> tolerate a direct module dependency. Instead when kvm is given a VFIO FD
> it uses many symbol_get()'s to access VFIO.
> 
> Provide a single VFIO function vfio_file_get_ops() which validates the
> given struct file * is a VFIO file and then returns a struct of ops.

VFIO has multiple files (container, group, and device). Here and other
places seems to assume a VFIO file is just a group file. While it is correct
in this external facing context, probably calling it 'VFIO group file' is
clearer in various code comments and patch descriptions.

> 
> Following patches will redo each of the symbol_get() calls into an
> indirection through this ops struct.
> 
> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>


Reviewed-by: Kevin Tian <kevin.tian@intel.com>

Out of curiosity, how do you envision when iommufd is introduced?
Will we need a generic ops abstraction so both vfio and iommufd
register their own ops to keep kvm side generic or a new protocol
will be introduced between iommufd and kvm? 

^ permalink raw reply	[flat|nested] 47+ messages in thread

* RE: [PATCH 05/10] vfio: Move vfio_external_user_iommu_id() to vfio_file_ops
  2022-04-14 18:46 ` [PATCH 05/10] vfio: Move vfio_external_user_iommu_id() to vfio_file_ops Jason Gunthorpe
@ 2022-04-15  3:59   ` Tian, Kevin
  2022-04-15  7:31   ` Christoph Hellwig
  1 sibling, 0 replies; 47+ messages in thread
From: Tian, Kevin @ 2022-04-15  3:59 UTC (permalink / raw)
  To: Jason Gunthorpe, Alex Williamson, Cornelia Huck, kvm, Paolo Bonzini
  Cc: Eric Auger, Christoph Hellwig, Liu, Yi L

> From: Jason Gunthorpe <jgg@nvidia.com>
> Sent: Friday, April 15, 2022 2:46 AM
> 
> The only user wants to get a pointer to the struct iommu_group associated
> with the VFIO file being used. Instead of returning the group ID then
> searching sysfs for that string just directly return the iommu_group
> pointer already held by the vfio_group struct.
> 
> It already has a safe lifetime due to the struct file kref.
> 
> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>

This is a nice improvement.

Reviewed-by: Kevin Tian <kevin.tian@intel.com>

> ---
>  drivers/vfio/vfio.c  | 20 ++++++++-----
>  include/linux/vfio.h |  2 +-
>  virt/kvm/vfio.c      | 68 ++++++--------------------------------------
>  3 files changed, 22 insertions(+), 68 deletions(-)
> 
> diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
> index 93508f6a8beda5..4d62de69705573 100644
> --- a/drivers/vfio/vfio.c
> +++ b/drivers/vfio/vfio.c
> @@ -1919,10 +1919,7 @@ static const struct file_operations
> vfio_device_fops = {
>   * increments the container user counter to prevent
>   * the VFIO group from disposal before KVM exits.
>   *
> - * 3. The external user calls vfio_external_user_iommu_id()
> - * to know an IOMMU ID.
> - *
> - * 4. When the external KVM finishes, it calls
> + * 3. When the external KVM finishes, it calls
>   * vfio_group_put_external_user() to release the VFIO group.
>   * This call decrements the container user counter.
>   */
> @@ -2001,11 +1998,19 @@ bool vfio_external_group_match_file(struct
> vfio_group *test_group,
>  }
>  EXPORT_SYMBOL_GPL(vfio_external_group_match_file);
> 
> -int vfio_external_user_iommu_id(struct vfio_group *group)
> +/**
> + * vfio_file_iommu_group - Return the struct iommu_group for the vfio file
> + * @filep: VFIO file
> + *
> + * The returned iommu_group is valid as long as a ref is held on the filep.
> + * VFIO files always have an iommu_group, so this cannot fail.
> + */
> +static struct iommu_group *vfio_file_iommu_group(struct file *filep)
>  {
> -	return iommu_group_id(group->iommu_group);
> +	struct vfio_group *group = filep->private_data;
> +
> +	return group->iommu_group;
>  }
> -EXPORT_SYMBOL_GPL(vfio_external_user_iommu_id);
> 
>  long vfio_external_check_extension(struct vfio_group *group, unsigned long
> arg)
>  {
> @@ -2014,6 +2019,7 @@ long vfio_external_check_extension(struct
> vfio_group *group, unsigned long arg)
>  EXPORT_SYMBOL_GPL(vfio_external_check_extension);
> 
>  static const struct vfio_file_ops vfio_file_group_ops = {
> +	.get_iommu_group = vfio_file_iommu_group,
>  };
> 
>  /**
> diff --git a/include/linux/vfio.h b/include/linux/vfio.h
> index 409bbf817206cc..e5ca7d5a0f1584 100644
> --- a/include/linux/vfio.h
> +++ b/include/linux/vfio.h
> @@ -139,6 +139,7 @@ int vfio_mig_get_next_state(struct vfio_device
> *device,
>   * External user API
>   */
>  struct vfio_file_ops {
> +	struct iommu_group *(*get_iommu_group)(struct file *filep);
>  };
>  extern struct vfio_group *vfio_group_get_external_user(struct file *filep);
>  extern void vfio_group_put_external_user(struct vfio_group *group);
> @@ -146,7 +147,6 @@ extern struct vfio_group
> *vfio_group_get_external_user_from_dev(struct device
>  								*dev);
>  extern bool vfio_external_group_match_file(struct vfio_group *group,
>  					   struct file *filep);
> -extern int vfio_external_user_iommu_id(struct vfio_group *group);
>  extern long vfio_external_check_extension(struct vfio_group *group,
>  					  unsigned long arg);
>  const struct vfio_file_ops *vfio_file_get_ops(struct file *filep);
> diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c
> index 254d8c18378163..743e4870fa1825 100644
> --- a/virt/kvm/vfio.c
> +++ b/virt/kvm/vfio.c
> @@ -118,47 +118,14 @@ static bool kvm_vfio_group_is_coherent(struct
> vfio_group *vfio_group)
>  	return ret > 0;
>  }
> 
> -static int kvm_vfio_external_user_iommu_id(struct vfio_group *vfio_group)
> -{
> -	int (*fn)(struct vfio_group *);
> -	int ret = -EINVAL;
> -
> -	fn = symbol_get(vfio_external_user_iommu_id);
> -	if (!fn)
> -		return ret;
> -
> -	ret = fn(vfio_group);
> -
> -	symbol_put(vfio_external_user_iommu_id);
> -
> -	return ret;
> -}
> -
> -static struct iommu_group *kvm_vfio_group_get_iommu_group(
> -		struct vfio_group *group)
> -{
> -	int group_id = kvm_vfio_external_user_iommu_id(group);
> -
> -	if (group_id < 0)
> -		return NULL;
> -
> -	return iommu_group_get_by_id(group_id);
> -}
> -
>  static void kvm_spapr_tce_release_vfio_group(struct kvm *kvm,
> -		struct vfio_group *vfio_group)
> +					     struct kvm_vfio_group *kvg)
>  {
> -	struct iommu_group *grp;
> -
>  	if (!IS_ENABLED(CONFIG_SPAPR_TCE_IOMMU))
>  		return;
> 
> -	grp = kvm_vfio_group_get_iommu_group(vfio_group);
> -	if (WARN_ON_ONCE(!grp))
> -		return;
> -
> -	kvm_spapr_tce_release_iommu_group(kvm, grp);
> -	iommu_group_put(grp);
> +	kvm_spapr_tce_release_iommu_group(kvm,
> +					  kvg->ops->get_iommu_group(kvg-
> >filp));
>  }
> 
>  /*
> @@ -283,7 +250,7 @@ static int kvm_vfio_group_del(struct kvm_device
> *dev, unsigned int fd)
> 
>  		list_del(&kvg->node);
>  		kvm_arch_end_assignment(dev->kvm);
> -		kvm_spapr_tce_release_vfio_group(dev->kvm, kvg-
> >vfio_group);
> +		kvm_spapr_tce_release_vfio_group(dev->kvm, kvg);
>  		kvm_vfio_group_set_kvm(kvg->vfio_group, NULL);
>  		kvm_vfio_group_put_external_user(kvg->vfio_group);
>  		fput(kvg->filp);
> @@ -306,10 +273,8 @@ static int kvm_vfio_group_set_spapr_tce(struct
> kvm_device *dev,
>  {
>  	struct kvm_vfio_spapr_tce param;
>  	struct kvm_vfio *kv = dev->private;
> -	struct vfio_group *vfio_group;
>  	struct kvm_vfio_group *kvg;
>  	struct fd f;
> -	struct iommu_group *grp;
>  	int ret;
> 
>  	if (!IS_ENABLED(CONFIG_SPAPR_TCE_IOMMU))
> @@ -322,18 +287,6 @@ static int kvm_vfio_group_set_spapr_tce(struct
> kvm_device *dev,
>  	if (!f.file)
>  		return -EBADF;
> 
> -	vfio_group = kvm_vfio_group_get_external_user(f.file);
> -	if (IS_ERR(vfio_group)) {
> -		ret = PTR_ERR(vfio_group);
> -		goto err_fdput;
> -	}
> -
> -	grp = kvm_vfio_group_get_iommu_group(vfio_group);
> -	if (WARN_ON_ONCE(!grp)) {
> -		ret = -EIO;
> -		goto err_put_external;
> -	}
> -
>  	ret = -ENOENT;
> 
>  	mutex_lock(&kv->lock);
> @@ -341,18 +294,13 @@ static int kvm_vfio_group_set_spapr_tce(struct
> kvm_device *dev,
>  	list_for_each_entry(kvg, &kv->group_list, node) {
>  		if (kvg->filp != f.file)
>  			continue;
> -
> -		ret = kvm_spapr_tce_attach_iommu_group(dev->kvm,
> param.tablefd,
> -						       grp);
> +		ret = kvm_spapr_tce_attach_iommu_group(
> +			dev->kvm, param.tablefd,
> +			kvg->ops->get_iommu_group(kvg->filp));
>  		break;
>  	}
> 
>  	mutex_unlock(&kv->lock);
> -
> -	iommu_group_put(grp);
> -err_put_external:
> -	kvm_vfio_group_put_external_user(vfio_group);
> -err_fdput:
>  	fdput(f);
>  	return ret;
>  }
> @@ -418,7 +366,7 @@ static void kvm_vfio_destroy(struct kvm_device *dev)
>  	struct kvm_vfio_group *kvg, *tmp;
> 
>  	list_for_each_entry_safe(kvg, tmp, &kv->group_list, node) {
> -		kvm_spapr_tce_release_vfio_group(dev->kvm, kvg-
> >vfio_group);
> +		kvm_spapr_tce_release_vfio_group(dev->kvm, kvg);
>  		kvm_vfio_group_set_kvm(kvg->vfio_group, NULL);
>  		kvm_vfio_group_put_external_user(kvg->vfio_group);
>  		fput(kvg->filp);
> --
> 2.35.1


^ permalink raw reply	[flat|nested] 47+ messages in thread

* RE: [PATCH 06/10] vfio: Remove vfio_external_group_match_file()
  2022-04-14 18:46 ` [PATCH 06/10] vfio: Remove vfio_external_group_match_file() Jason Gunthorpe
@ 2022-04-15  4:02   ` Tian, Kevin
  2022-04-15  7:32   ` Christoph Hellwig
  1 sibling, 0 replies; 47+ messages in thread
From: Tian, Kevin @ 2022-04-15  4:02 UTC (permalink / raw)
  To: Jason Gunthorpe, Alex Williamson, Cornelia Huck, kvm, Paolo Bonzini
  Cc: Eric Auger, Christoph Hellwig, Liu, Yi L

> From: Jason Gunthorpe <jgg@nvidia.com>
> Sent: Friday, April 15, 2022 2:46 AM
> 
> vfio_group_fops_open() ensures there is only ever one struct file open for
> any struct vfio_group at any time:
> 
> 	/* Do we need multiple instances of the group open?  Seems not. */
> 	opened = atomic_cmpxchg(&group->opened, 0, 1);
> 	if (opened) {
> 		vfio_group_put(group);
> 		return -EBUSY;
> 
> Therefor the struct file * can be used directly to search the list of VFIO
> groups that KVM keeps instead of using the
> vfio_external_group_match_file() callback to try to figure out if the
> passed in FD matches the list or not.
> 
> Delete vfio_external_group_match_file().
> 
> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>

Reviewed-by: Kevin Tian <kevin.tian@intel.com>

> ---
>  drivers/vfio/vfio.c  |  9 ---------
>  include/linux/vfio.h |  2 --
>  virt/kvm/vfio.c      | 19 +------------------
>  3 files changed, 1 insertion(+), 29 deletions(-)
> 
> diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
> index 4d62de69705573..eb65b4c80ece64 100644
> --- a/drivers/vfio/vfio.c
> +++ b/drivers/vfio/vfio.c
> @@ -1989,15 +1989,6 @@ void vfio_group_put_external_user(struct
> vfio_group *group)
>  }
>  EXPORT_SYMBOL_GPL(vfio_group_put_external_user);
> 
> -bool vfio_external_group_match_file(struct vfio_group *test_group,
> -				    struct file *filep)
> -{
> -	struct vfio_group *group = filep->private_data;
> -
> -	return (filep->f_op == &vfio_group_fops) && (group == test_group);
> -}
> -EXPORT_SYMBOL_GPL(vfio_external_group_match_file);
> -
>  /**
>   * vfio_file_iommu_group - Return the struct iommu_group for the vfio file
>   * @filep: VFIO file
> diff --git a/include/linux/vfio.h b/include/linux/vfio.h
> index e5ca7d5a0f1584..d09a1856d4e5ea 100644
> --- a/include/linux/vfio.h
> +++ b/include/linux/vfio.h
> @@ -145,8 +145,6 @@ extern struct vfio_group
> *vfio_group_get_external_user(struct file *filep);
>  extern void vfio_group_put_external_user(struct vfio_group *group);
>  extern struct vfio_group *vfio_group_get_external_user_from_dev(struct
> device
>  								*dev);
> -extern bool vfio_external_group_match_file(struct vfio_group *group,
> -					   struct file *filep);
>  extern long vfio_external_check_extension(struct vfio_group *group,
>  					  unsigned long arg);
>  const struct vfio_file_ops *vfio_file_get_ops(struct file *filep);
> diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c
> index 743e4870fa1825..955cabc0683b29 100644
> --- a/virt/kvm/vfio.c
> +++ b/virt/kvm/vfio.c
> @@ -60,22 +60,6 @@ static struct vfio_group
> *kvm_vfio_group_get_external_user(struct file *filep)
>  	return vfio_group;
>  }
> 
> -static bool kvm_vfio_external_group_match_file(struct vfio_group *group,
> -					       struct file *filep)
> -{
> -	bool ret, (*fn)(struct vfio_group *, struct file *);
> -
> -	fn = symbol_get(vfio_external_group_match_file);
> -	if (!fn)
> -		return false;
> -
> -	ret = fn(group, filep);
> -
> -	symbol_put(vfio_external_group_match_file);
> -
> -	return ret;
> -}
> -
>  static void kvm_vfio_group_put_external_user(struct vfio_group
> *vfio_group)
>  {
>  	void (*fn)(struct vfio_group *);
> @@ -244,8 +228,7 @@ static int kvm_vfio_group_del(struct kvm_device
> *dev, unsigned int fd)
>  	mutex_lock(&kv->lock);
> 
>  	list_for_each_entry(kvg, &kv->group_list, node) {
> -		if (!kvm_vfio_external_group_match_file(kvg->vfio_group,
> -							f.file))
> +		if (kvg->filp != f.file)
>  			continue;
> 
>  		list_del(&kvg->node);
> --
> 2.35.1


^ permalink raw reply	[flat|nested] 47+ messages in thread

* RE: [PATCH 07/10] vfio: Move vfio_external_check_extension() to vfio_file_ops
  2022-04-14 18:46 ` [PATCH 07/10] vfio: Move vfio_external_check_extension() to vfio_file_ops Jason Gunthorpe
@ 2022-04-15  4:07   ` Tian, Kevin
  2022-04-19 19:23     ` Jason Gunthorpe
  2022-04-15  4:48   ` Christoph Hellwig
  1 sibling, 1 reply; 47+ messages in thread
From: Tian, Kevin @ 2022-04-15  4:07 UTC (permalink / raw)
  To: Jason Gunthorpe, Alex Williamson, Cornelia Huck, kvm, Paolo Bonzini
  Cc: Eric Auger, Christoph Hellwig, Liu, Yi L

> From: Jason Gunthorpe <jgg@nvidia.com>
> Sent: Friday, April 15, 2022 2:46 AM
> 
> Focus the new op into is_enforced_coherent() which only checks the

s/coherent/coherency/

> enforced DMA coherency property of the file.
> 
> Make the new op self contained by properly refcounting the container
> before touching it.
> 
> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
> ---
>  drivers/vfio/vfio.c  | 27 ++++++++++++++++++++++++---
>  include/linux/vfio.h |  3 +--
>  virt/kvm/vfio.c      | 18 +-----------------
>  3 files changed, 26 insertions(+), 22 deletions(-)
> 
> diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
> index eb65b4c80ece64..c08093fb6d28d5 100644
> --- a/drivers/vfio/vfio.c
> +++ b/drivers/vfio/vfio.c
> @@ -2003,14 +2003,35 @@ static struct iommu_group
> *vfio_file_iommu_group(struct file *filep)
>  	return group->iommu_group;
>  }
> 
> -long vfio_external_check_extension(struct vfio_group *group, unsigned long
> arg)
> +/**
> + * vfio_file_enforced_coherent - True if the DMA associated with the VFIO
> file
> + *        is always CPU cache coherent
> + * @filep: VFIO file
> + *
> + * Enforced coherent means that the IOMMU ignores things like the PCIe

s/coherent/coherency/

> no-snoop
> + * bit in DMA transactions. A return of false indicates that the user has
> + * rights to access additional instructions such as wbinvd on x86.
> + */
> +static bool vfio_file_enforced_coherent(struct file *filep)
>  {
> -	return vfio_ioctl_check_extension(group->container, arg);
> +	struct vfio_group *group = filep->private_data;
> +	bool ret;
> +
> +	/*
> +	 * Since the coherency state is determined only once a container is
> +	 * attached the user must do so before they can prove they have
> +	 * permission.
> +	 */
> +	if (vfio_group_add_container_user(group))
> +		return true;

I wonder whether it's better to return error here and let KVM to
decide whether it wants to allow wbinvd in such case (though
likely the conclusion is same) or simply rejects adding the group.

Thanks
Kevin

^ permalink raw reply	[flat|nested] 47+ messages in thread

* RE: [PATCH 08/10] vfio: Move vfio_group_set_kvm() into vfio_file_ops
  2022-04-14 18:46 ` [PATCH 08/10] vfio: Move vfio_group_set_kvm() into vfio_file_ops Jason Gunthorpe
@ 2022-04-15  4:09   ` Tian, Kevin
  0 siblings, 0 replies; 47+ messages in thread
From: Tian, Kevin @ 2022-04-15  4:09 UTC (permalink / raw)
  To: Jason Gunthorpe, Alex Williamson, Cornelia Huck, kvm, Paolo Bonzini
  Cc: Eric Auger, Christoph Hellwig, Liu, Yi L

> From: Jason Gunthorpe <jgg@nvidia.com>
> Sent: Friday, April 15, 2022 2:46 AM
> 
> Just move the exported function into the op and have it accept the struct
> file instead of vfio_group.
> 
> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>

Reviewed-by: Kevin Tian <kevin.tian@intel.com>

> ---
>  drivers/vfio/vfio.c  | 26 ++++++++++++++++++--------
>  include/linux/vfio.h |  5 +++--
>  virt/kvm/vfio.c      | 19 +++----------------
>  3 files changed, 24 insertions(+), 26 deletions(-)
> 
> diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
> index c08093fb6d28d5..2eb63d9dded8fb 100644
> --- a/drivers/vfio/vfio.c
> +++ b/drivers/vfio/vfio.c
> @@ -2029,9 +2029,27 @@ static bool vfio_file_enforced_coherent(struct file
> *filep)
>  	return ret;
>  }
> 
> +/**
> + * vfio_file_set_kvm - Link a kvm with VFIO drivers
> + * @filep: VFIO file
> + * @kvm: KVM to link
> + *
> + * The kvm pointer will be forwarded to all the vfio_device's attached to the
> + * VFIO file via the VFIO_GROUP_NOTIFY_SET_KVM notifier.
> + */
> +static void vfio_file_set_kvm(struct file *filep, struct kvm *kvm)
> +{
> +	struct vfio_group *group = filep->private_data;
> +
> +	group->kvm = kvm;
> +	blocking_notifier_call_chain(&group->notifier,
> +				     VFIO_GROUP_NOTIFY_SET_KVM, kvm);
> +}
> +
>  static const struct vfio_file_ops vfio_file_group_ops = {
>  	.get_iommu_group = vfio_file_iommu_group,
>  	.is_enforced_coherent = vfio_file_enforced_coherent,
> +	.set_kvm = vfio_file_set_kvm,
>  };
> 
>  /**
> @@ -2461,14 +2479,6 @@ static int vfio_unregister_iommu_notifier(struct
> vfio_group *group,
>  	return ret;
>  }
> 
> -void vfio_group_set_kvm(struct vfio_group *group, struct kvm *kvm)
> -{
> -	group->kvm = kvm;
> -	blocking_notifier_call_chain(&group->notifier,
> -				VFIO_GROUP_NOTIFY_SET_KVM, kvm);
> -}
> -EXPORT_SYMBOL_GPL(vfio_group_set_kvm);
> -
>  static int vfio_register_group_notifier(struct vfio_group *group,
>  					unsigned long *events,
>  					struct notifier_block *nb)
> diff --git a/include/linux/vfio.h b/include/linux/vfio.h
> index b1583eb80f12e6..bc6e47f3f26560 100644
> --- a/include/linux/vfio.h
> +++ b/include/linux/vfio.h
> @@ -15,6 +15,8 @@
>  #include <linux/poll.h>
>  #include <uapi/linux/vfio.h>
> 
> +struct kvm;
> +
>  /*
>   * VFIO devices can be placed in a set, this allows all devices to share this
>   * structure and the VFIO core will provide a lock that is held around
> @@ -141,6 +143,7 @@ int vfio_mig_get_next_state(struct vfio_device
> *device,
>  struct vfio_file_ops {
>  	struct iommu_group *(*get_iommu_group)(struct file *filep);
>  	bool (*is_enforced_coherent)(struct file *filep);
> +	void (*set_kvm)(struct file *filep, struct kvm *kvm);
>  };
>  extern struct vfio_group *vfio_group_get_external_user(struct file *filep);
>  extern void vfio_group_put_external_user(struct vfio_group *group);
> @@ -186,8 +189,6 @@ extern int vfio_unregister_notifier(struct device *dev,
>  				    enum vfio_notify_type type,
>  				    struct notifier_block *nb);
> 
> -struct kvm;
> -extern void vfio_group_set_kvm(struct vfio_group *group, struct kvm *kvm);
> 
>  /*
>   * Sub-module helpers
> diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c
> index f5ef78192a97ab..9baf04c5b0cc3d 100644
> --- a/virt/kvm/vfio.c
> +++ b/virt/kvm/vfio.c
> @@ -73,19 +73,6 @@ static void kvm_vfio_group_put_external_user(struct
> vfio_group *vfio_group)
>  	symbol_put(vfio_group_put_external_user);
>  }
> 
> -static void kvm_vfio_group_set_kvm(struct vfio_group *group, struct kvm
> *kvm)
> -{
> -	void (*fn)(struct vfio_group *, struct kvm *);
> -
> -	fn = symbol_get(vfio_group_set_kvm);
> -	if (!fn)
> -		return;
> -
> -	fn(group, kvm);
> -
> -	symbol_put(vfio_group_set_kvm);
> -}
> -
>  static void kvm_spapr_tce_release_vfio_group(struct kvm *kvm,
>  					     struct kvm_vfio_group *kvg)
>  {
> @@ -184,7 +171,7 @@ static int kvm_vfio_group_add(struct kvm_device
> *dev, unsigned int fd)
> 
>  	mutex_unlock(&kv->lock);
> 
> -	kvm_vfio_group_set_kvm(vfio_group, dev->kvm);
> +	kvg->ops->set_kvm(kvg->filp, dev->kvm);
>  	kvm_vfio_update_coherency(dev);
> 
>  	return 0;
> @@ -218,7 +205,7 @@ static int kvm_vfio_group_del(struct kvm_device
> *dev, unsigned int fd)
>  		list_del(&kvg->node);
>  		kvm_arch_end_assignment(dev->kvm);
>  		kvm_spapr_tce_release_vfio_group(dev->kvm, kvg);
> -		kvm_vfio_group_set_kvm(kvg->vfio_group, NULL);
> +		kvg->ops->set_kvm(kvg->filp, NULL);
>  		kvm_vfio_group_put_external_user(kvg->vfio_group);
>  		fput(kvg->filp);
>  		kfree(kvg);
> @@ -334,7 +321,7 @@ static void kvm_vfio_destroy(struct kvm_device *dev)
> 
>  	list_for_each_entry_safe(kvg, tmp, &kv->group_list, node) {
>  		kvm_spapr_tce_release_vfio_group(dev->kvm, kvg);
> -		kvm_vfio_group_set_kvm(kvg->vfio_group, NULL);
> +		kvg->ops->set_kvm(kvg->filp, NULL);
>  		kvm_vfio_group_put_external_user(kvg->vfio_group);
>  		fput(kvg->filp);
>  		list_del(&kvg->node);
> --
> 2.35.1


^ permalink raw reply	[flat|nested] 47+ messages in thread

* RE: [PATCH 09/10] kvm/vfio: Remove vfio_group from kvm
  2022-04-14 18:46 ` [PATCH 09/10] kvm/vfio: Remove vfio_group from kvm Jason Gunthorpe
@ 2022-04-15  4:21   ` Tian, Kevin
  2022-04-15 21:56     ` Jason Gunthorpe
  0 siblings, 1 reply; 47+ messages in thread
From: Tian, Kevin @ 2022-04-15  4:21 UTC (permalink / raw)
  To: Jason Gunthorpe, Alex Williamson, Cornelia Huck, kvm, Paolo Bonzini
  Cc: Eric Auger, Christoph Hellwig, Liu, Yi L

> From: Jason Gunthorpe <jgg@nvidia.com>
> Sent: Friday, April 15, 2022 2:46 AM
> 
> None of the VFIO APIs take in the vfio_group anymore, so we can remove it
> completely.
> 
> This has a subtle side effect on the enforced coherency tracking. The
> vfio_group_get_external_user() was holding on to the container_users which
> would prevent the iommu_domain and thus the enforced coherency value
> from
> changing while the group is registered with kvm.
> 
> It changes the security proof slightly into 'user must hold a group FD
> that has a device that cannot enforce DMA coherence'. As opening the group
> FD, not attaching the container, is the privileged operation this doesn't
> change the security properties much.

If we allow vfio_file_enforced_coherent() to return error then the security
proof can be sustained? In this case kvm can simply reject adding a group
which is opened but not attached to a container. 

Thanks
Kevin

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 04/10] vfio: Use a struct of function pointers instead of a many symbol_get()'s
  2022-04-14 18:46 ` [PATCH 04/10] vfio: Use a struct of function pointers instead of a many symbol_get()'s Jason Gunthorpe
  2022-04-15  3:57   ` Tian, Kevin
@ 2022-04-15  4:45   ` Christoph Hellwig
  2022-04-15 12:13     ` Jason Gunthorpe
  1 sibling, 1 reply; 47+ messages in thread
From: Christoph Hellwig @ 2022-04-15  4:45 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: Alex Williamson, Cornelia Huck, kvm, Paolo Bonzini, Eric Auger,
	Christoph Hellwig, Yi Liu

On Thu, Apr 14, 2022 at 03:46:03PM -0300, Jason Gunthorpe wrote:
> kvm and VFIO need to be coupled together however neither is willing to
> tolerate a direct module dependency. Instead when kvm is given a VFIO FD
> it uses many symbol_get()'s to access VFIO.
> 
> Provide a single VFIO function vfio_file_get_ops() which validates the
> given struct file * is a VFIO file and then returns a struct of ops.
> 
> Following patches will redo each of the symbol_get() calls into an
> indirection through this ops struct.

So I got anoyed at this as well a while ago and I still think this
is the wrong way around.

I'd much rather EXPORT_SYMBOL_GPL kvm_register_device_ops and
just let kvm_vfio_ops live in a module than all the symbol_get
crazyness.  We'll need to be careful to deal with unload races
or just not allow unloading, though.

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 02/10] kvm/vfio: Reduce the scope of PPC #ifdefs
  2022-04-14 18:46 ` [PATCH 02/10] kvm/vfio: Reduce the scope of PPC #ifdefs Jason Gunthorpe
@ 2022-04-15  4:47   ` Christoph Hellwig
  2022-04-15 12:13     ` Jason Gunthorpe
  0 siblings, 1 reply; 47+ messages in thread
From: Christoph Hellwig @ 2022-04-15  4:47 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: Alex Williamson, Cornelia Huck, kvm, Paolo Bonzini, Eric Auger,
	Christoph Hellwig, Yi Liu

On Thu, Apr 14, 2022 at 03:46:01PM -0300, Jason Gunthorpe wrote:
> Use IS_ENABLED and static inlines instead of just ifdef'ing away all the
> PPC code. This allows it to be compile tested on all platforms and makes
> it easier to maintain.

That's even uglier than what we had.  I'd rather have a new vfio_ppc.c
to implement it, then you can stubs for it in the header (or IS_ENABLED
if you really want) but we don't need stubs for ppc-specific arch
functionality in a consumer of it.

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 07/10] vfio: Move vfio_external_check_extension() to vfio_file_ops
  2022-04-14 18:46 ` [PATCH 07/10] vfio: Move vfio_external_check_extension() to vfio_file_ops Jason Gunthorpe
  2022-04-15  4:07   ` Tian, Kevin
@ 2022-04-15  4:48   ` Christoph Hellwig
  1 sibling, 0 replies; 47+ messages in thread
From: Christoph Hellwig @ 2022-04-15  4:48 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: Alex Williamson, Cornelia Huck, kvm, Paolo Bonzini, Eric Auger,
	Christoph Hellwig, Yi Liu

On Thu, Apr 14, 2022 at 03:46:06PM -0300, Jason Gunthorpe wrote:
> Focus the new op into is_enforced_coherent() which only checks the
> enforced DMA coherency property of the file.

I have a reall hard time parsing this sentence.

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 01/10] kvm/vfio: Move KVM_DEV_VFIO_GROUP_* ioctls into functions
  2022-04-14 18:46 ` [PATCH 01/10] kvm/vfio: Move KVM_DEV_VFIO_GROUP_* ioctls into functions Jason Gunthorpe
  2022-04-15  3:36   ` Tian, Kevin
@ 2022-04-15  7:18   ` Christoph Hellwig
  1 sibling, 0 replies; 47+ messages in thread
From: Christoph Hellwig @ 2022-04-15  7:18 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: Alex Williamson, Cornelia Huck, kvm, Paolo Bonzini, Eric Auger,
	Christoph Hellwig, Yi Liu

Looks good:

Reviewed-by: Christoph Hellwig <hch@lst.de>

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 03/10] kvm/vfio: Store the struct file in the kvm_vfio_group
  2022-04-14 18:46 ` [PATCH 03/10] kvm/vfio: Store the struct file in the kvm_vfio_group Jason Gunthorpe
  2022-04-15  3:44   ` Tian, Kevin
@ 2022-04-15  7:20   ` Christoph Hellwig
  2022-04-19 19:24     ` Jason Gunthorpe
  1 sibling, 1 reply; 47+ messages in thread
From: Christoph Hellwig @ 2022-04-15  7:20 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: Alex Williamson, Cornelia Huck, kvm, Paolo Bonzini, Eric Auger,
	Christoph Hellwig, Yi Liu

I'd name the field file instead of filp, but otherwise this looks good:

Reviewed-by: Christoph Hellwig <hch@lst.de>

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 05/10] vfio: Move vfio_external_user_iommu_id() to vfio_file_ops
  2022-04-14 18:46 ` [PATCH 05/10] vfio: Move vfio_external_user_iommu_id() to vfio_file_ops Jason Gunthorpe
  2022-04-15  3:59   ` Tian, Kevin
@ 2022-04-15  7:31   ` Christoph Hellwig
  2022-04-15 12:25     ` Jason Gunthorpe
  1 sibling, 1 reply; 47+ messages in thread
From: Christoph Hellwig @ 2022-04-15  7:31 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: Alex Williamson, Cornelia Huck, kvm, Paolo Bonzini, Eric Auger,
	Christoph Hellwig, Yi Liu

On Thu, Apr 14, 2022 at 03:46:04PM -0300, Jason Gunthorpe wrote:
> The only user wants to get a pointer to the struct iommu_group associated
> with the VFIO file being used. Instead of returning the group ID then
> searching sysfs for that string just directly return the iommu_group
> pointer already held by the vfio_group struct.
> 
> It already has a safe lifetime due to the struct file kref.

Except for the strange function pointer indirection this does looks
sensible to me, but I wonder if we want to do this change even more
high level: The only thing that needs the IOMMU group is the PPC
kvm_spapr_tce_attach_iommu_group / kvm_spapr_tce_release_vfio_group
helpers.  And these use it only to find the iommu_table_group
structure.  Why can't we just have the vfio_group point to that
table group diretly in vfio or the vfio SPAPR backend and remove
all the convoluted lookups?

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 06/10] vfio: Remove vfio_external_group_match_file()
  2022-04-14 18:46 ` [PATCH 06/10] vfio: Remove vfio_external_group_match_file() Jason Gunthorpe
  2022-04-15  4:02   ` Tian, Kevin
@ 2022-04-15  7:32   ` Christoph Hellwig
  1 sibling, 0 replies; 47+ messages in thread
From: Christoph Hellwig @ 2022-04-15  7:32 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: Alex Williamson, Cornelia Huck, kvm, Paolo Bonzini, Eric Auger,
	Christoph Hellwig, Yi Liu

Looks good:

Reviewed-by: Christoph Hellwig <hch@lst.de>

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 04/10] vfio: Use a struct of function pointers instead of a many symbol_get()'s
  2022-04-15  4:45   ` Christoph Hellwig
@ 2022-04-15 12:13     ` Jason Gunthorpe
  2022-04-15 14:36       ` Christoph Hellwig
  0 siblings, 1 reply; 47+ messages in thread
From: Jason Gunthorpe @ 2022-04-15 12:13 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Alex Williamson, Cornelia Huck, kvm, Paolo Bonzini, Eric Auger, Yi Liu

On Fri, Apr 15, 2022 at 06:45:34AM +0200, Christoph Hellwig wrote:
> On Thu, Apr 14, 2022 at 03:46:03PM -0300, Jason Gunthorpe wrote:
> > kvm and VFIO need to be coupled together however neither is willing to
> > tolerate a direct module dependency. Instead when kvm is given a VFIO FD
> > it uses many symbol_get()'s to access VFIO.
> > 
> > Provide a single VFIO function vfio_file_get_ops() which validates the
> > given struct file * is a VFIO file and then returns a struct of ops.
> > 
> > Following patches will redo each of the symbol_get() calls into an
> > indirection through this ops struct.
> 
> So I got anoyed at this as well a while ago and I still think this
> is the wrong way around.

What I plan to do in future is to have differnt ops returned depending
on if the file is a struct vfio_group or a struct vfio_device, so it
is not entirely pointless like this.

> I'd much rather EXPORT_SYMBOL_GPL kvm_register_device_ops and
> just let kvm_vfio_ops live in a module than all the symbol_get
> crazyness.  We'll need to be careful to deal with unload races
> or just not allow unloading, though.

This is certainly more complicated - especially considering module
unload - than a single symbol_get(). How do you see the benefit?

Jason

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 02/10] kvm/vfio: Reduce the scope of PPC #ifdefs
  2022-04-15  4:47   ` Christoph Hellwig
@ 2022-04-15 12:13     ` Jason Gunthorpe
  2022-04-15 12:35       ` Jason Gunthorpe
  0 siblings, 1 reply; 47+ messages in thread
From: Jason Gunthorpe @ 2022-04-15 12:13 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Alex Williamson, Cornelia Huck, kvm, Paolo Bonzini, Eric Auger, Yi Liu

On Fri, Apr 15, 2022 at 06:47:31AM +0200, Christoph Hellwig wrote:
> On Thu, Apr 14, 2022 at 03:46:01PM -0300, Jason Gunthorpe wrote:
> > Use IS_ENABLED and static inlines instead of just ifdef'ing away all the
> > PPC code. This allows it to be compile tested on all platforms and makes
> > it easier to maintain.
> 
> That's even uglier than what we had.  I'd rather have a new vfio_ppc.c
> to implement it, then you can stubs for it in the header (or IS_ENABLED
> if you really want) but we don't need stubs for ppc-specific arch
> functionality in a consumer of it.

That seems like a good approach, I will try it

Thanks,
Jason

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 05/10] vfio: Move vfio_external_user_iommu_id() to vfio_file_ops
  2022-04-15  7:31   ` Christoph Hellwig
@ 2022-04-15 12:25     ` Jason Gunthorpe
  2022-04-15 14:37       ` Christoph Hellwig
  0 siblings, 1 reply; 47+ messages in thread
From: Jason Gunthorpe @ 2022-04-15 12:25 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Alex Williamson, Cornelia Huck, kvm, Paolo Bonzini, Eric Auger, Yi Liu

On Fri, Apr 15, 2022 at 09:31:25AM +0200, Christoph Hellwig wrote:
> On Thu, Apr 14, 2022 at 03:46:04PM -0300, Jason Gunthorpe wrote:
> > The only user wants to get a pointer to the struct iommu_group associated
> > with the VFIO file being used. Instead of returning the group ID then
> > searching sysfs for that string just directly return the iommu_group
> > pointer already held by the vfio_group struct.
> > 
> > It already has a safe lifetime due to the struct file kref.
> 
> Except for the strange function pointer indirection this does looks
> sensible to me, but I wonder if we want to do this change even more
> high level: The only thing that needs the IOMMU group is the PPC
> kvm_spapr_tce_attach_iommu_group / kvm_spapr_tce_release_vfio_group
> helpers.  And these use it only to find the iommu_table_group
> structure.  Why can't we just have the vfio_group point to that
> table group diretly in vfio or the vfio SPAPR backend and remove
> all the convoluted lookups?

The PPC specific iommu_table_group is PPC's "drvdata" for the common
struct iommu_group - it is obtained trivially by group->iommu_data

I think using iommu_group as the handle for it in common code is the
right thing to do.

What I don't entirely understand is what is 'tablefd' doing in all
this, or why the lookup of the kvmppc_spapr_tce_table is so
weird. PPC's unique iommu uapi is still a mystery to me.

Jason

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 02/10] kvm/vfio: Reduce the scope of PPC #ifdefs
  2022-04-15 12:13     ` Jason Gunthorpe
@ 2022-04-15 12:35       ` Jason Gunthorpe
  2022-04-15 14:36         ` Christoph Hellwig
  0 siblings, 1 reply; 47+ messages in thread
From: Jason Gunthorpe @ 2022-04-15 12:35 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Alex Williamson, Cornelia Huck, kvm, Paolo Bonzini, Eric Auger, Yi Liu

On Fri, Apr 15, 2022 at 09:13:43AM -0300, Jason Gunthorpe wrote:
> On Fri, Apr 15, 2022 at 06:47:31AM +0200, Christoph Hellwig wrote:
> > On Thu, Apr 14, 2022 at 03:46:01PM -0300, Jason Gunthorpe wrote:
> > > Use IS_ENABLED and static inlines instead of just ifdef'ing away all the
> > > PPC code. This allows it to be compile tested on all platforms and makes
> > > it easier to maintain.
> > 
> > That's even uglier than what we had.  I'd rather have a new vfio_ppc.c
> > to implement it, then you can stubs for it in the header (or IS_ENABLED
> > if you really want) but we don't need stubs for ppc-specific arch
> > functionality in a consumer of it.
> 
> That seems like a good approach, I will try it

Actually, it defeats the whole point of this patch. I wrote it so I
could compile test all this stuff on x86 - if I shift it into a
vfio_ppc.c and make some kconfig stuff then it still won't compile
test on x86.

At that point I'd rather leave the ifdefs as-is and drop this patch.

Yes, the #ifdef is ugly, but this whole PPC thing is ugly :\

Jason

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 04/10] vfio: Use a struct of function pointers instead of a many symbol_get()'s
  2022-04-15 12:13     ` Jason Gunthorpe
@ 2022-04-15 14:36       ` Christoph Hellwig
  2022-04-15 15:31         ` Jason Gunthorpe
  0 siblings, 1 reply; 47+ messages in thread
From: Christoph Hellwig @ 2022-04-15 14:36 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: Christoph Hellwig, Alex Williamson, Cornelia Huck, kvm,
	Paolo Bonzini, Eric Auger, Yi Liu

On Fri, Apr 15, 2022 at 09:13:01AM -0300, Jason Gunthorpe wrote:
> > So I got anoyed at this as well a while ago and I still think this
> > is the wrong way around.
> 
> What I plan to do in future is to have differnt ops returned depending
> on if the file is a struct vfio_group or a struct vfio_device, so it
> is not entirely pointless like this.

Uh, I think that is a rather ugly interface.  Why would kvm pass in
FDs to both into the same interface.

> 
> > I'd much rather EXPORT_SYMBOL_GPL kvm_register_device_ops and
> > just let kvm_vfio_ops live in a module than all the symbol_get
> > crazyness.  We'll need to be careful to deal with unload races
> > or just not allow unloading, though.
> 
> This is certainly more complicated - especially considering module
> unload - than a single symbol_get(). How do you see the benefit?

Because that is the sensible layering - kvm already has an abstract
interface for emulated devices.  So instead of doing symbol_get magic
of some kind we should leverage it.

But I can see how that is something you might not want to do for
this series.  So maybe stick to the individual symbol_gets for now
and I'll send a separate series to clean that up?  Especially as
I have a half-finished series for that from a while ago anyway.

> 
> Jason
---end quoted text---

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 02/10] kvm/vfio: Reduce the scope of PPC #ifdefs
  2022-04-15 12:35       ` Jason Gunthorpe
@ 2022-04-15 14:36         ` Christoph Hellwig
  0 siblings, 0 replies; 47+ messages in thread
From: Christoph Hellwig @ 2022-04-15 14:36 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: Christoph Hellwig, Alex Williamson, Cornelia Huck, kvm,
	Paolo Bonzini, Eric Auger, Yi Liu

On Fri, Apr 15, 2022 at 09:35:04AM -0300, Jason Gunthorpe wrote:
> Actually, it defeats the whole point of this patch. I wrote it so I
> could compile test all this stuff on x86 - if I shift it into a
> vfio_ppc.c and make some kconfig stuff then it still won't compile
> test on x86.
> 
> At that point I'd rather leave the ifdefs as-is and drop this patch.
> 
> Yes, the #ifdef is ugly, but this whole PPC thing is ugly :\

I'd rather leave it as is then.


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 05/10] vfio: Move vfio_external_user_iommu_id() to vfio_file_ops
  2022-04-15 12:25     ` Jason Gunthorpe
@ 2022-04-15 14:37       ` Christoph Hellwig
  0 siblings, 0 replies; 47+ messages in thread
From: Christoph Hellwig @ 2022-04-15 14:37 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: Christoph Hellwig, Alex Williamson, Cornelia Huck, kvm,
	Paolo Bonzini, Eric Auger, Yi Liu

On Fri, Apr 15, 2022 at 09:25:10AM -0300, Jason Gunthorpe wrote:
> The PPC specific iommu_table_group is PPC's "drvdata" for the common
> struct iommu_group - it is obtained trivially by group->iommu_data
> 
> I think using iommu_group as the handle for it in common code is the
> right thing to do.
> 
> What I don't entirely understand is what is 'tablefd' doing in all
> this, or why the lookup of the kvmppc_spapr_tce_table is so
> weird. PPC's unique iommu uapi is still a mystery to me.

Yeah.  So I guess we should go with something like this patch for now.
Eventually as part of the actual iommufd work we'll need to unwind
the PPC stuff anyway.

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 04/10] vfio: Use a struct of function pointers instead of a many symbol_get()'s
  2022-04-15 14:36       ` Christoph Hellwig
@ 2022-04-15 15:31         ` Jason Gunthorpe
  0 siblings, 0 replies; 47+ messages in thread
From: Jason Gunthorpe @ 2022-04-15 15:31 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Alex Williamson, Cornelia Huck, kvm, Paolo Bonzini, Eric Auger, Yi Liu

On Fri, Apr 15, 2022 at 04:36:21PM +0200, Christoph Hellwig wrote:
> On Fri, Apr 15, 2022 at 09:13:01AM -0300, Jason Gunthorpe wrote:
> > > So I got anoyed at this as well a while ago and I still think this
> > > is the wrong way around.
> > 
> > What I plan to do in future is to have differnt ops returned depending
> > on if the file is a struct vfio_group or a struct vfio_device, so it
> > is not entirely pointless like this.
> 
> Uh, I think that is a rather ugly interface.  Why would kvm pass in
> FDs to both into the same interface.

We can do it either way, but IMHO, it is not very different than
passing a socket/file/pipe/etc FD to read() - the list of VFIO files
ops works identically on vfio device or vfio file FDs. The appeal to
multiplex at the file level means we don't need to build parallel
group/device uapi paths and parallel kAPI as well.

Ultimately none of these uses of the file care about what the file is,
these are all 'security proofs' and either FD type is fine to provide
the proof.

> Because that is the sensible layering - kvm already has an abstract
> interface for emulated devices.  So instead of doing symbol_get magic
> of some kind we should leverage it.

Hm, I don't know anthing about kvm's device interface
 
> But I can see how that is something you might not want to do for
> this series.  So maybe stick to the individual symbol_gets for now
> and I'll send a separate series to clean that up?  Especially as
> I have a half-finished series for that from a while ago anyway.

Sure, I only did this because I became sad while touching all the
symbol gets - it really is ugly. It will make the diffstat much worse,
but no problem.

If you have something already then lets avoid touching it too much
here.

Thanks,
Jason

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 04/10] vfio: Use a struct of function pointers instead of a many symbol_get()'s
  2022-04-15  3:57   ` Tian, Kevin
@ 2022-04-15 21:54     ` Jason Gunthorpe
  2022-04-16  0:00       ` Tian, Kevin
  0 siblings, 1 reply; 47+ messages in thread
From: Jason Gunthorpe @ 2022-04-15 21:54 UTC (permalink / raw)
  To: Tian, Kevin
  Cc: Alex Williamson, Cornelia Huck, kvm, Paolo Bonzini, Eric Auger,
	Christoph Hellwig, Liu, Yi L

On Fri, Apr 15, 2022 at 03:57:14AM +0000, Tian, Kevin wrote:
> > From: Jason Gunthorpe <jgg@nvidia.com>
> > Sent: Friday, April 15, 2022 2:46 AM
> > 
> > kvm and VFIO need to be coupled together however neither is willing to
> > tolerate a direct module dependency. Instead when kvm is given a VFIO FD
> > it uses many symbol_get()'s to access VFIO.
> > 
> > Provide a single VFIO function vfio_file_get_ops() which validates the
> > given struct file * is a VFIO file and then returns a struct of ops.
> 
> VFIO has multiple files (container, group, and device). Here and other
> places seems to assume a VFIO file is just a group file. While it is correct
> in this external facing context, probably calling it 'VFIO group file' is
> clearer in various code comments and patch descriptions.
> 
> > 
> > Following patches will redo each of the symbol_get() calls into an
> > indirection through this ops struct.
> > 
> > Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
> 
> 
> Reviewed-by: Kevin Tian <kevin.tian@intel.com>
> 
> Out of curiosity, how do you envision when iommufd is introduced?
> Will we need a generic ops abstraction so both vfio and iommufd
> register their own ops to keep kvm side generic or a new protocol
> will be introduced between iommufd and kvm? 

I imagine using the vfio_device in all these context where the vfio
group is used, not iommufd. This keeps everything internal to vfio.

Jason

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 09/10] kvm/vfio: Remove vfio_group from kvm
  2022-04-15  4:21   ` Tian, Kevin
@ 2022-04-15 21:56     ` Jason Gunthorpe
  2022-04-16  0:42       ` Tian, Kevin
  0 siblings, 1 reply; 47+ messages in thread
From: Jason Gunthorpe @ 2022-04-15 21:56 UTC (permalink / raw)
  To: Tian, Kevin
  Cc: Alex Williamson, Cornelia Huck, kvm, Paolo Bonzini, Eric Auger,
	Christoph Hellwig, Liu, Yi L

On Fri, Apr 15, 2022 at 04:21:45AM +0000, Tian, Kevin wrote:
> > From: Jason Gunthorpe <jgg@nvidia.com>
> > Sent: Friday, April 15, 2022 2:46 AM
> > 
> > None of the VFIO APIs take in the vfio_group anymore, so we can remove it
> > completely.
> > 
> > This has a subtle side effect on the enforced coherency tracking. The
> > vfio_group_get_external_user() was holding on to the container_users which
> > would prevent the iommu_domain and thus the enforced coherency value
> > from
> > changing while the group is registered with kvm.
> > 
> > It changes the security proof slightly into 'user must hold a group FD
> > that has a device that cannot enforce DMA coherence'. As opening the group
> > FD, not attaching the container, is the privileged operation this doesn't
> > change the security properties much.
> 
> If we allow vfio_file_enforced_coherent() to return error then the security
> proof can be sustained? In this case kvm can simply reject adding a group
> which is opened but not attached to a container. 

The issue is the user can detatch the container from the group because
kvm no longer holds a refcount on the container.

Jason

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 03/10] kvm/vfio: Store the struct file in the kvm_vfio_group
  2022-04-15  3:44   ` Tian, Kevin
@ 2022-04-15 22:24     ` Jason Gunthorpe
  0 siblings, 0 replies; 47+ messages in thread
From: Jason Gunthorpe @ 2022-04-15 22:24 UTC (permalink / raw)
  To: Tian, Kevin
  Cc: Alex Williamson, Cornelia Huck, kvm, Paolo Bonzini, Eric Auger,
	Christoph Hellwig, Liu, Yi L

On Fri, Apr 15, 2022 at 03:44:56AM +0000, Tian, Kevin wrote:
> > @@ -304,10 +309,10 @@ static int kvm_vfio_group_set_spapr_tce(struct
> > kvm_device *dev,
> >  		return -EBADF;
> > 
> >  	vfio_group = kvm_vfio_group_get_external_user(f.file);
> > -	fdput(f);
> > -
> > -	if (IS_ERR(vfio_group))
> > -		return PTR_ERR(vfio_group);
> > +	if (IS_ERR(vfio_group)) {
> > +		ret = PTR_ERR(vfio_group);
> > +		goto err_fdput;
> > +	}
> > 
> >  	grp = kvm_vfio_group_get_iommu_group(vfio_group);
> >  	if (WARN_ON_ONCE(!grp)) {
> 
> move above two external calls into below loop after file is
> matched...

Actually we can delete the kvm_vfio_group_get_external_user() since it
is the same as kvg->vfio_group and we no longer need the group to
match the kvg.

> > @@ -320,7 +325,7 @@ static int kvm_vfio_group_set_spapr_tce(struct
> > kvm_device *dev,
> >  	mutex_lock(&kv->lock);
> > 
> >  	list_for_each_entry(kvg, &kv->group_list, node) {
> > -		if (kvg->vfio_group != vfio_group)
> > +		if (kvg->filp != f.file)
> >  			continue;
> 
> ... here. Though they will be removed in later patch doing so at
> this patch is slightly more reasonable.

Sure, it is nicer

Thanks,
Jason

^ permalink raw reply	[flat|nested] 47+ messages in thread

* RE: [PATCH 04/10] vfio: Use a struct of function pointers instead of a many symbol_get()'s
  2022-04-15 21:54     ` Jason Gunthorpe
@ 2022-04-16  0:00       ` Tian, Kevin
  2022-04-16  1:33         ` Jason Gunthorpe
  0 siblings, 1 reply; 47+ messages in thread
From: Tian, Kevin @ 2022-04-16  0:00 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: Alex Williamson, Cornelia Huck, kvm, Paolo Bonzini, Eric Auger,
	Christoph Hellwig, Liu, Yi L

> From: Jason Gunthorpe <jgg@nvidia.com>
> Sent: Saturday, April 16, 2022 5:54 AM
> 
> On Fri, Apr 15, 2022 at 03:57:14AM +0000, Tian, Kevin wrote:
> > > From: Jason Gunthorpe <jgg@nvidia.com>
> > > Sent: Friday, April 15, 2022 2:46 AM
> > >
> > > kvm and VFIO need to be coupled together however neither is willing to
> > > tolerate a direct module dependency. Instead when kvm is given a VFIO
> FD
> > > it uses many symbol_get()'s to access VFIO.
> > >
> > > Provide a single VFIO function vfio_file_get_ops() which validates the
> > > given struct file * is a VFIO file and then returns a struct of ops.
> >
> > VFIO has multiple files (container, group, and device). Here and other
> > places seems to assume a VFIO file is just a group file. While it is correct
> > in this external facing context, probably calling it 'VFIO group file' is
> > clearer in various code comments and patch descriptions.
> >
> > >
> > > Following patches will redo each of the symbol_get() calls into an
> > > indirection through this ops struct.
> > >
> > > Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
> >
> >
> > Reviewed-by: Kevin Tian <kevin.tian@intel.com>
> >
> > Out of curiosity, how do you envision when iommufd is introduced?
> > Will we need a generic ops abstraction so both vfio and iommufd
> > register their own ops to keep kvm side generic or a new protocol
> > will be introduced between iommufd and kvm?
> 
> I imagine using the vfio_device in all these context where the vfio
> group is used, not iommufd. This keeps everything internal to vfio.
> 

In this case although the uAPI is called KVM_DEV_VFIO_GROUP_ADD
Qemu will pass in a device fd and with this series KVM doesn't care
whether it's actually a device or group and just use struct file to call
vfio_file_ops. correct?

You probably remember there is one additional requirement when
adding ENQCMD virtualization on Intel platform. KVM is required to
setup a guest PASID to host PASID translation table in CPU vmcs
structure to support ENQCMD in the guest. Following above direction
I suppose KVM will provide a new interface to allow user pass in
 [devfd, iommufd, guest_pasid] and then call a new vfio ops e.g.
vfio_file_translate_guest_pasid(dev_file, iommufd, gpasid) to
retrieve the host pasid. This sounds correct in concept as iommufd
only knows host pasid and any g->h information is managed by
vfio device driver.

Does it also make sense to you? Just want to think forward a bit
to make the whole picture clearer.

Thanks
Kevin

^ permalink raw reply	[flat|nested] 47+ messages in thread

* RE: [PATCH 09/10] kvm/vfio: Remove vfio_group from kvm
  2022-04-15 21:56     ` Jason Gunthorpe
@ 2022-04-16  0:42       ` Tian, Kevin
  2022-04-16  1:34         ` Jason Gunthorpe
  0 siblings, 1 reply; 47+ messages in thread
From: Tian, Kevin @ 2022-04-16  0:42 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: Alex Williamson, Cornelia Huck, kvm, Paolo Bonzini, Eric Auger,
	Christoph Hellwig, Liu, Yi L

> From: Jason Gunthorpe <jgg@nvidia.com>
> Sent: Saturday, April 16, 2022 5:56 AM
> 
> On Fri, Apr 15, 2022 at 04:21:45AM +0000, Tian, Kevin wrote:
> > > From: Jason Gunthorpe <jgg@nvidia.com>
> > > Sent: Friday, April 15, 2022 2:46 AM
> > >
> > > None of the VFIO APIs take in the vfio_group anymore, so we can remove
> it
> > > completely.
> > >
> > > This has a subtle side effect on the enforced coherency tracking. The
> > > vfio_group_get_external_user() was holding on to the container_users
> which
> > > would prevent the iommu_domain and thus the enforced coherency
> value
> > > from
> > > changing while the group is registered with kvm.
> > >
> > > It changes the security proof slightly into 'user must hold a group FD
> > > that has a device that cannot enforce DMA coherence'. As opening the
> group
> > > FD, not attaching the container, is the privileged operation this doesn't
> > > change the security properties much.
> >
> > If we allow vfio_file_enforced_coherent() to return error then the security
> > proof can be sustained? In this case kvm can simply reject adding a group
> > which is opened but not attached to a container.
> 
> The issue is the user can detatch the container from the group because
> kvm no longer holds a refcount on the container.
> 

See your point. In this case the guest already loses the access to the
device once the container is detached from the group thus using a
stale coherency info in KVM side is probably just fine which becomes
more a performance issue.

Then what about PPC? w/o holding a reference to container is there
any impact on spapr_tce_table which is attached to the group? 
I don't know the relationship between this table and vfio container
and whether there is a lifetime dependency in between. 

Thanks
Kevin

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 04/10] vfio: Use a struct of function pointers instead of a many symbol_get()'s
  2022-04-16  0:00       ` Tian, Kevin
@ 2022-04-16  1:33         ` Jason Gunthorpe
  2022-04-18  3:56           ` Tian, Kevin
  0 siblings, 1 reply; 47+ messages in thread
From: Jason Gunthorpe @ 2022-04-16  1:33 UTC (permalink / raw)
  To: Tian, Kevin
  Cc: Alex Williamson, Cornelia Huck, kvm, Paolo Bonzini, Eric Auger,
	Christoph Hellwig, Liu, Yi L

On Sat, Apr 16, 2022 at 12:00:12AM +0000, Tian, Kevin wrote:
> > From: Jason Gunthorpe <jgg@nvidia.com>
> > Sent: Saturday, April 16, 2022 5:54 AM
> > 
> > On Fri, Apr 15, 2022 at 03:57:14AM +0000, Tian, Kevin wrote:
> > > > From: Jason Gunthorpe <jgg@nvidia.com>
> > > > Sent: Friday, April 15, 2022 2:46 AM
> > > >
> > > > kvm and VFIO need to be coupled together however neither is willing to
> > > > tolerate a direct module dependency. Instead when kvm is given a VFIO
> > FD
> > > > it uses many symbol_get()'s to access VFIO.
> > > >
> > > > Provide a single VFIO function vfio_file_get_ops() which validates the
> > > > given struct file * is a VFIO file and then returns a struct of ops.
> > >
> > > VFIO has multiple files (container, group, and device). Here and other
> > > places seems to assume a VFIO file is just a group file. While it is correct
> > > in this external facing context, probably calling it 'VFIO group file' is
> > > clearer in various code comments and patch descriptions.
> > >
> > > >
> > > > Following patches will redo each of the symbol_get() calls into an
> > > > indirection through this ops struct.
> > > >
> > > > Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
> > >
> > >
> > > Reviewed-by: Kevin Tian <kevin.tian@intel.com>
> > >
> > > Out of curiosity, how do you envision when iommufd is introduced?
> > > Will we need a generic ops abstraction so both vfio and iommufd
> > > register their own ops to keep kvm side generic or a new protocol
> > > will be introduced between iommufd and kvm?
> > 
> > I imagine using the vfio_device in all these context where the vfio
> > group is used, not iommufd. This keeps everything internal to vfio.
> > 
> 
> In this case although the uAPI is called KVM_DEV_VFIO_GROUP_ADD

Yes, down this path we'd probably alias it to KVM_DEV_VFIO_ADD_FD or
something.

> Qemu will pass in a device fd and with this series KVM doesn't care
> whether it's actually a device or group and just use struct file to call
> vfio_file_ops. correct?

Yes

> You probably remember there is one additional requirement when
> adding ENQCMD virtualization on Intel platform. KVM is required to
> setup a guest PASID to host PASID translation table in CPU vmcs
> structure to support ENQCMD in the guest. Following above direction
> I suppose KVM will provide a new interface to allow user pass in
>  [devfd, iommufd, guest_pasid] and then call a new vfio ops e.g.
> vfio_file_translate_guest_pasid(dev_file, iommufd, gpasid) to
> retrieve the host pasid. This sounds correct in concept as iommufd
> only knows host pasid and any g->h information is managed by
> vfio device driver.

I think there is no direct linkage of KVM to iommufd or VFIO for
ENQCMD.

The security nature of KVM is that the VM world should never have more
privilege than the hypervisor process running the KVM.

Therefore, when VM does a vENQCMD it must be equviliant to a physical
ENQCMD that the KVM process could already execute anyhow. Yes, Intel
wired ENQCMD to a single PASID, but we could imagine a system call
that allowed the process to change the PASID that ENQCMD uses from an
authorized list of PASIDs that the process has access to.

So, the linkage from iommufd is indirect. When iommufd does whatever
to install a PASID in the process's ENQCMD authorization table KVM can
be instructed to link that PASID inside the ENQCMD to a vPASID in the
VM.

As long as the PASID is in the process table KVM can allow the VM to
use it.

And it explains how userspace can actually use ENQCMD in a VFIO
scenario with iommufd, where obviously it needs to be in direct
control of what PASID ENQCMD generates and not be tied only to the
PASID associated with the mm_struct.

Jason

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 09/10] kvm/vfio: Remove vfio_group from kvm
  2022-04-16  0:42       ` Tian, Kevin
@ 2022-04-16  1:34         ` Jason Gunthorpe
  2022-04-18  6:09           ` Tian, Kevin
  0 siblings, 1 reply; 47+ messages in thread
From: Jason Gunthorpe @ 2022-04-16  1:34 UTC (permalink / raw)
  To: Tian, Kevin
  Cc: Alex Williamson, Cornelia Huck, kvm, Paolo Bonzini, Eric Auger,
	Christoph Hellwig, Liu, Yi L

On Sat, Apr 16, 2022 at 12:42:50AM +0000, Tian, Kevin wrote:

> Then what about PPC? w/o holding a reference to container is there
> any impact on spapr_tce_table which is attached to the group? 
> I don't know the relationship between this table and vfio container
> and whether there is a lifetime dependency in between. 

table seems to have its own FD so it should be refcounted
independently and not indirectly rely on the vfio container. It seemed
like there was enough protection there..

Jason

^ permalink raw reply	[flat|nested] 47+ messages in thread

* RE: [PATCH 04/10] vfio: Use a struct of function pointers instead of a many symbol_get()'s
  2022-04-16  1:33         ` Jason Gunthorpe
@ 2022-04-18  3:56           ` Tian, Kevin
  2022-04-19 12:16             ` Jason Gunthorpe
  0 siblings, 1 reply; 47+ messages in thread
From: Tian, Kevin @ 2022-04-18  3:56 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: Alex Williamson, Cornelia Huck, kvm, Paolo Bonzini, Eric Auger,
	Christoph Hellwig, Liu, Yi L, Pan, Jacob jun

> From: Jason Gunthorpe <jgg@nvidia.com>
> Sent: Saturday, April 16, 2022 9:33 AM
> 
> On Sat, Apr 16, 2022 at 12:00:12AM +0000, Tian, Kevin wrote:
> > You probably remember there is one additional requirement when
> > adding ENQCMD virtualization on Intel platform. KVM is required to
> > setup a guest PASID to host PASID translation table in CPU vmcs
> > structure to support ENQCMD in the guest. Following above direction
> > I suppose KVM will provide a new interface to allow user pass in
> >  [devfd, iommufd, guest_pasid] and then call a new vfio ops e.g.
> > vfio_file_translate_guest_pasid(dev_file, iommufd, gpasid) to
> > retrieve the host pasid. This sounds correct in concept as iommufd
> > only knows host pasid and any g->h information is managed by
> > vfio device driver.
> 
> I think there is no direct linkage of KVM to iommufd or VFIO for
> ENQCMD.
> 
> The security nature of KVM is that the VM world should never have more
> privilege than the hypervisor process running the KVM.

Indeed.

> 
> Therefore, when VM does a vENQCMD it must be equviliant to a physical
> ENQCMD that the KVM process could already execute anyhow. Yes, Intel
> wired ENQCMD to a single PASID, but we could imagine a system call
> that allowed the process to change the PASID that ENQCMD uses from an
> authorized list of PASIDs that the process has access to.

Yes, this makes more sense in concept. Just one note that for vENQCMD
guest changes PASID via xsave/xrstor which is not trapped thus we don't
need such change-PASID syscall in practice. The kernel just need maintain
a list of authorized PASIDs and setup the PASID translation structure 
properly in CPU. Then the guest is allowed to access any PASID authorized
and translated by the CPU. 

> 
> So, the linkage from iommufd is indirect. When iommufd does whatever
> to install a PASID in the process's ENQCMD authorization table KVM can
> be instructed to link that PASID inside the ENQCMD to a vPASID in the
> VM.
> 
> As long as the PASID is in the process table KVM can allow the VM to
> use it.
> 
> And it explains how userspace can actually use ENQCMD in a VFIO
> scenario with iommufd, where obviously it needs to be in direct
> control of what PASID ENQCMD generates and not be tied only to the
> PASID associated with the mm_struct.
> 

This reminds me back to the previous ioasid_set concept introduced
by Jacob [1]. Let's ignore the implementation detail for a while as lots
of logic there don't hold now given the progress of iommufd. But just
very high level concept-wise:

- Each mm is associated with a set of authorized PASIDs (ioasid_set);
- VFIO driver provides a uAPI for userspace to attach a guest virtual
  PASID (vPASID) to a hw page table in iommufd. In the uAPI:
    - a physical PASID (pPASID) is allocated and added to mm's ioasid_set;
    - the pPASID is used to actually attach to the hw page table;
    - the pPASID is returned to userspace upon successful attach;
- KVM provides a uAPI for userspace to map/unmap vPASID to pPASID
  in CPU PASID translation structure. User-provided pPASID must be
  found in mm->ioasid_set;

In this case the linkage from vfio/iommufd does be indirect.

My earlier reply was probably based on a wrong memory that the
entire ioasid_set concept was killed when the lengthy discussion
in [1] led to the debut of iommufd.

Thanks
Kevin

[1] https://lore.kernel.org/all/1614463286-97618-1-git-send-email-jacob.jun.pan@linux.intel.com/

^ permalink raw reply	[flat|nested] 47+ messages in thread

* RE: [PATCH 09/10] kvm/vfio: Remove vfio_group from kvm
  2022-04-16  1:34         ` Jason Gunthorpe
@ 2022-04-18  6:09           ` Tian, Kevin
  0 siblings, 0 replies; 47+ messages in thread
From: Tian, Kevin @ 2022-04-18  6:09 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: Alex Williamson, Cornelia Huck, kvm, Paolo Bonzini, Eric Auger,
	Christoph Hellwig, Liu, Yi L

> From: Jason Gunthorpe <jgg@nvidia.com>
> Sent: Saturday, April 16, 2022 9:34 AM
> 
> On Sat, Apr 16, 2022 at 12:42:50AM +0000, Tian, Kevin wrote:
> 
> > Then what about PPC? w/o holding a reference to container is there
> > any impact on spapr_tce_table which is attached to the group?
> > I don't know the relationship between this table and vfio container
> > and whether there is a lifetime dependency in between.
> 
> table seems to have its own FD so it should be refcounted
> independently and not indirectly rely on the vfio container. It seemed
> like there was enough protection there..
> 

the table in PPC context refers to a window in the global iova address
space. From this angle the reference should be torn down when the
group is detached from the container which represents the address
space. But given the detach operation also prevents the user from
accessing the device, having the reference held by KVM for a while
until KVM_DEV_VFIO_GROUP_DEL is called is probably not harmful.

In any case this is also worth an explanation similar to what you
did for cache coherency.

Thanks
Kevin

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 04/10] vfio: Use a struct of function pointers instead of a many symbol_get()'s
  2022-04-18  3:56           ` Tian, Kevin
@ 2022-04-19 12:16             ` Jason Gunthorpe
  0 siblings, 0 replies; 47+ messages in thread
From: Jason Gunthorpe @ 2022-04-19 12:16 UTC (permalink / raw)
  To: Tian, Kevin
  Cc: Alex Williamson, Cornelia Huck, kvm, Paolo Bonzini, Eric Auger,
	Christoph Hellwig, Liu, Yi L, Pan, Jacob jun

On Mon, Apr 18, 2022 at 03:56:01AM +0000, Tian, Kevin wrote:
 
> - Each mm is associated with a set of authorized PASIDs (ioasid_set);
> - VFIO driver provides a uAPI for userspace to attach a guest virtual
>   PASID (vPASID) to a hw page table in iommufd. In the uAPI:
>     - a physical PASID (pPASID) is allocated and added to mm's ioasid_set;
>     - the pPASID is used to actually attach to the hw page table;
>     - the pPASID is returned to userspace upon successful attach;
> - KVM provides a uAPI for userspace to map/unmap vPASID to pPASID
>   in CPU PASID translation structure. User-provided pPASID must be
>   found in mm->ioasid_set;

It is more logical to be current->ioasid_set, not mm

But yes, something like this. The mdev api may want to mirror the kvm
api and map/unmap vPASID to pPASID

Jason

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 07/10] vfio: Move vfio_external_check_extension() to vfio_file_ops
  2022-04-15  4:07   ` Tian, Kevin
@ 2022-04-19 19:23     ` Jason Gunthorpe
  2022-04-20  3:05       ` Tian, Kevin
  0 siblings, 1 reply; 47+ messages in thread
From: Jason Gunthorpe @ 2022-04-19 19:23 UTC (permalink / raw)
  To: Tian, Kevin
  Cc: Alex Williamson, Cornelia Huck, kvm, Paolo Bonzini, Eric Auger,
	Christoph Hellwig, Liu, Yi L

On Fri, Apr 15, 2022 at 04:07:51AM +0000, Tian, Kevin wrote:

> > +	struct vfio_group *group = filep->private_data;
> > +	bool ret;
> > +
> > +	/*
> > +	 * Since the coherency state is determined only once a container is
> > +	 * attached the user must do so before they can prove they have
> > +	 * permission.
> > +	 */
> > +	if (vfio_group_add_container_user(group))
> > +		return true;
> 
> I wonder whether it's better to return error here and let KVM to
> decide whether it wants to allow wbinvd in such case (though
> likely the conclusion is same) or simply rejects adding the group.

Since the new model is to present proof at add it is OK - it just
means the user doesn't have a proof to enable wbinvd.

The thing that is missing here is a notifier so kvm can track changes
in the group's assigned iommu_domain, but I think it is not necessary..

Jason

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 03/10] kvm/vfio: Store the struct file in the kvm_vfio_group
  2022-04-15  7:20   ` Christoph Hellwig
@ 2022-04-19 19:24     ` Jason Gunthorpe
  0 siblings, 0 replies; 47+ messages in thread
From: Jason Gunthorpe @ 2022-04-19 19:24 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Alex Williamson, Cornelia Huck, kvm, Paolo Bonzini, Eric Auger, Yi Liu

On Fri, Apr 15, 2022 at 09:20:27AM +0200, Christoph Hellwig wrote:
> I'd name the field file instead of filp, but otherwise this looks good:

Done everywhere

Thanks,
Jason

^ permalink raw reply	[flat|nested] 47+ messages in thread

* RE: [PATCH 07/10] vfio: Move vfio_external_check_extension() to vfio_file_ops
  2022-04-19 19:23     ` Jason Gunthorpe
@ 2022-04-20  3:05       ` Tian, Kevin
  0 siblings, 0 replies; 47+ messages in thread
From: Tian, Kevin @ 2022-04-20  3:05 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: Alex Williamson, Cornelia Huck, kvm, Paolo Bonzini, Eric Auger,
	Christoph Hellwig, Liu, Yi L

> From: Jason Gunthorpe <jgg@nvidia.com>
> Sent: Wednesday, April 20, 2022 3:24 AM
> 
> On Fri, Apr 15, 2022 at 04:07:51AM +0000, Tian, Kevin wrote:
> 
> > > +	struct vfio_group *group = filep->private_data;
> > > +	bool ret;
> > > +
> > > +	/*
> > > +	 * Since the coherency state is determined only once a container is
> > > +	 * attached the user must do so before they can prove they have
> > > +	 * permission.
> > > +	 */
> > > +	if (vfio_group_add_container_user(group))
> > > +		return true;
> >
> > I wonder whether it's better to return error here and let KVM to
> > decide whether it wants to allow wbinvd in such case (though
> > likely the conclusion is same) or simply rejects adding the group.
> 
> Since the new model is to present proof at add it is OK - it just
> means the user doesn't have a proof to enable wbinvd.

it is OK if this interface is called vfio_file_is_wbinvd_required().

But now it is called vfio_file_enforced_coherent() then whether 
it is a proof for wbinvd is a caller-side knowledge. It is clearer
to stick to the fact about cache coherency and if it cannot be
determined at this point then simply return error to the caller
to handle the situation.

Also it sounds more reasonable for KVM to fail adding group
if a container hasn't been attached instead of moving forward
assuming an incorrect coherency and then getting an error
later.

Thanks
Kevin

^ permalink raw reply	[flat|nested] 47+ messages in thread

end of thread, other threads:[~2022-04-20  3:05 UTC | newest]

Thread overview: 47+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-04-14 18:45 [PATCH 00/10] Remove vfio_group from the struct file facing VFIO API Jason Gunthorpe
2022-04-14 18:46 ` [PATCH 01/10] kvm/vfio: Move KVM_DEV_VFIO_GROUP_* ioctls into functions Jason Gunthorpe
2022-04-15  3:36   ` Tian, Kevin
2022-04-15  7:18   ` Christoph Hellwig
2022-04-14 18:46 ` [PATCH 02/10] kvm/vfio: Reduce the scope of PPC #ifdefs Jason Gunthorpe
2022-04-15  4:47   ` Christoph Hellwig
2022-04-15 12:13     ` Jason Gunthorpe
2022-04-15 12:35       ` Jason Gunthorpe
2022-04-15 14:36         ` Christoph Hellwig
2022-04-14 18:46 ` [PATCH 03/10] kvm/vfio: Store the struct file in the kvm_vfio_group Jason Gunthorpe
2022-04-15  3:44   ` Tian, Kevin
2022-04-15 22:24     ` Jason Gunthorpe
2022-04-15  7:20   ` Christoph Hellwig
2022-04-19 19:24     ` Jason Gunthorpe
2022-04-14 18:46 ` [PATCH 04/10] vfio: Use a struct of function pointers instead of a many symbol_get()'s Jason Gunthorpe
2022-04-15  3:57   ` Tian, Kevin
2022-04-15 21:54     ` Jason Gunthorpe
2022-04-16  0:00       ` Tian, Kevin
2022-04-16  1:33         ` Jason Gunthorpe
2022-04-18  3:56           ` Tian, Kevin
2022-04-19 12:16             ` Jason Gunthorpe
2022-04-15  4:45   ` Christoph Hellwig
2022-04-15 12:13     ` Jason Gunthorpe
2022-04-15 14:36       ` Christoph Hellwig
2022-04-15 15:31         ` Jason Gunthorpe
2022-04-14 18:46 ` [PATCH 05/10] vfio: Move vfio_external_user_iommu_id() to vfio_file_ops Jason Gunthorpe
2022-04-15  3:59   ` Tian, Kevin
2022-04-15  7:31   ` Christoph Hellwig
2022-04-15 12:25     ` Jason Gunthorpe
2022-04-15 14:37       ` Christoph Hellwig
2022-04-14 18:46 ` [PATCH 06/10] vfio: Remove vfio_external_group_match_file() Jason Gunthorpe
2022-04-15  4:02   ` Tian, Kevin
2022-04-15  7:32   ` Christoph Hellwig
2022-04-14 18:46 ` [PATCH 07/10] vfio: Move vfio_external_check_extension() to vfio_file_ops Jason Gunthorpe
2022-04-15  4:07   ` Tian, Kevin
2022-04-19 19:23     ` Jason Gunthorpe
2022-04-20  3:05       ` Tian, Kevin
2022-04-15  4:48   ` Christoph Hellwig
2022-04-14 18:46 ` [PATCH 08/10] vfio: Move vfio_group_set_kvm() into vfio_file_ops Jason Gunthorpe
2022-04-15  4:09   ` Tian, Kevin
2022-04-14 18:46 ` [PATCH 09/10] kvm/vfio: Remove vfio_group from kvm Jason Gunthorpe
2022-04-15  4:21   ` Tian, Kevin
2022-04-15 21:56     ` Jason Gunthorpe
2022-04-16  0:42       ` Tian, Kevin
2022-04-16  1:34         ` Jason Gunthorpe
2022-04-18  6:09           ` Tian, Kevin
2022-04-14 18:46 ` [PATCH 10/10] vfio/pci: Use the struct file as the handle not the vfio_group Jason Gunthorpe

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.