kvm.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2 13/13] vfio/type1: track iommu backed group attach
@ 2019-09-05  8:08 Liu Yi L
  2019-09-26  2:37 ` Alex Williamson
  0 siblings, 1 reply; 5+ messages in thread
From: Liu Yi L @ 2019-09-05  8:08 UTC (permalink / raw)
  To: alex.williamson, kwankhede
  Cc: kevin.tian, baolu.lu, yi.l.liu, yi.y.sun, joro, linux-kernel,
	kvm, yan.y.zhao, shaopeng.he, chenbo.xia, jun.j.tian

With the introduction of iommu aware mdev group, user may wrap a PF/VF
as a mdev. Such mdevs will be called as wrapped PF/VF mdevs in following
statements. If it's applied on a non-singleton iommu group, there would
be multiple domain attach on an iommu_device group (equal to iommu backed
group). Reason is that mdev group attaches is finally an iommu_device
group attach in the end. And existing vfio_domain.gorup_list has no idea
about it. Thus multiple attach would happen.

What's more, under default domain policy, group attach is allowed only
when its in-use domain is equal to its default domain as the code below:

static int __iommu_attach_group(struct iommu_domain *domain, ..)
{
	..
	if (group->default_domain && group->domain != group->default_domain)
		return -EBUSY;
	...
}

So for the above scenario, only the first group attach on the
non-singleton iommu group will be successful. Subsequent group
attaches will be failed. However, this is a fairly valid usage case
if the wrapped PF/VF mdevs and other devices are assigned to a single
VM. We may want to prevent it. In other words, the subsequent group
attaches should return success before going to __iommu_attach_group().

However, if user tries to assign the wrapped PF/VF mdevs and other
devices to different VMs, the subsequent group attaches on a single
iommu_device group should be failed. This means the subsequent group
attach should finally calls into __iommu_attach_group() and be failed.

To meet the above requirements, this patch introduces vfio_group_object
structure to track the group attach of an iommu_device group (a.ka.
iommu backed group). Each vfio_domain will have a group_obj_list to
record the vfio_group_objects. The search of the group_obj_list should
use iommu_device group if a group is mdev group.

	struct vfio_group_object {
		atomic_t		count;
		struct iommu_group	*iommu_group;
		struct vfio_domain	*domain;
		struct list_head	next;
	};

Each time, a successful group attach should either have a new
vfio_group_object created or count increasing of an existing
vfio_group_object instance. Details can be found in
vfio_domain_attach_group_object().

For group detach, should have count decreasing. Please check
vfio_domain_detach_group_object().

As the vfio_domain.group_obj_list is within vfio container(vfio_iommu)
scope, if user wants to passthru a non-singleton to multiple VMs, it
will be failed as VMs will have separate vfio containers. Also, if
vIOMMU is exposed, it will also fail the attempts of assigning multiple
devices (via vfio-pci or PF/VF wrapped mdev) to a single VM. This is
aligned with current vfio passthru rules.

Cc: Kevin Tian <kevin.tian@intel.com>
Cc: Lu Baolu <baolu.lu@linux.intel.com>
Suggested-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Liu Yi L <yi.l.liu@intel.com>
---
 drivers/vfio/vfio_iommu_type1.c | 167 ++++++++++++++++++++++++++++++++++++----
 1 file changed, 154 insertions(+), 13 deletions(-)

diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 317430d..6a67bd6 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -75,6 +75,7 @@ struct vfio_domain {
 	struct iommu_domain	*domain;
 	struct list_head	next;
 	struct list_head	group_list;
+	struct list_head	group_obj_list;
 	int			prot;		/* IOMMU_CACHE */
 	bool			fgsp;		/* Fine-grained super pages */
 };
@@ -97,6 +98,13 @@ struct vfio_group {
 	bool			mdev_group;	/* An mdev group */
 };
 
+struct vfio_group_object {
+	atomic_t		count;
+	struct iommu_group	*iommu_group;
+	struct vfio_domain	*domain;
+	struct list_head	next;
+};
+
 /*
  * Guest RAM pinning working set or DMA target
  */
@@ -1263,6 +1271,85 @@ static struct vfio_group *find_iommu_group(struct vfio_domain *domain,
 	return NULL;
 }
 
+static struct vfio_group_object *find_iommu_group_object(
+		struct vfio_domain *domain, struct iommu_group *iommu_group)
+{
+	struct vfio_group_object *g;
+
+	list_for_each_entry(g, &domain->group_obj_list, next) {
+		if (g->iommu_group == iommu_group)
+			return g;
+	}
+
+	return NULL;
+}
+
+static void vfio_init_iommu_group_object(struct vfio_group_object *group_obj,
+		struct vfio_domain *domain, struct iommu_group *iommu_group)
+{
+	if (!group_obj || !domain || !iommu_group) {
+		WARN_ON(1);
+		return;
+	}
+	atomic_set(&group_obj->count, 1);
+	group_obj->iommu_group = iommu_group;
+	group_obj->domain = domain;
+	list_add(&group_obj->next, &domain->group_obj_list);
+}
+
+static int vfio_domain_attach_group_object(
+		struct vfio_domain *domain, struct iommu_group *iommu_group)
+{
+	struct vfio_group_object *group_obj;
+
+	group_obj = find_iommu_group_object(domain, iommu_group);
+	if (group_obj) {
+		atomic_inc(&group_obj->count);
+		return 0;
+	}
+	group_obj = kzalloc(sizeof(*group_obj), GFP_KERNEL);
+	vfio_init_iommu_group_object(group_obj, domain, iommu_group);
+	return iommu_attach_group(domain->domain, iommu_group);
+}
+
+static int vfio_domain_detach_group_object(
+		struct vfio_domain *domain, struct iommu_group *iommu_group)
+{
+	struct vfio_group_object *group_obj;
+
+	group_obj = find_iommu_group_object(domain, iommu_group);
+	if (!group_obj) {
+		WARN_ON(1);
+		return -EINVAL;
+	}
+	if (atomic_dec_if_positive(&group_obj->count) == 0) {
+		list_del(&group_obj->next);
+		kfree(group_obj);
+	}
+	iommu_detach_group(domain->domain, iommu_group);
+	return 0;
+}
+
+/*
+ * Check if an iommu backed group has been attached to a domain within
+ * a specific container (vfio_iommu). If yes, return the vfio_group_object
+ * which tracks the previous domain attach for this group. Caller of this
+ * function should hold vfio_iommu->lock.
+ */
+static struct vfio_group_object *vfio_iommu_group_object_check(
+		struct vfio_iommu *iommu, struct iommu_group *iommu_group)
+{
+	struct vfio_domain *d;
+	struct vfio_group_object *group_obj;
+
+	list_for_each_entry(d, &iommu->domain_list, next) {
+		group_obj = find_iommu_group_object(d, iommu_group);
+		if (group_obj)
+			return group_obj;
+	}
+	return NULL;
+}
+
 static bool vfio_iommu_has_sw_msi(struct iommu_group *group, phys_addr_t *base)
 {
 	struct list_head group_resv_regions;
@@ -1310,21 +1397,23 @@ static struct device *vfio_mdev_get_iommu_device(struct device *dev)
 
 static int vfio_mdev_attach_domain(struct device *dev, void *data)
 {
-	struct iommu_domain *domain = data;
+	struct vfio_domain *domain = data;
 	struct device *iommu_device;
 	struct iommu_group *group;
 
 	iommu_device = vfio_mdev_get_iommu_device(dev);
 	if (iommu_device) {
 		if (iommu_dev_feature_enabled(iommu_device, IOMMU_DEV_FEAT_AUX))
-			return iommu_aux_attach_device(domain, iommu_device);
+			return iommu_aux_attach_device(domain->domain,
+							iommu_device);
 		else {
 			group = iommu_group_get(iommu_device);
 			if (!group) {
 				WARN_ON(1);
 				return -EINVAL;
 			}
-			return iommu_attach_group(domain, group);
+			return vfio_domain_attach_group_object(
+							domain, group);
 		}
 	}
 
@@ -1333,21 +1422,22 @@ static int vfio_mdev_attach_domain(struct device *dev, void *data)
 
 static int vfio_mdev_detach_domain(struct device *dev, void *data)
 {
-	struct iommu_domain *domain = data;
+	struct vfio_domain *domain = data;
 	struct device *iommu_device;
 	struct iommu_group *group;
 
 	iommu_device = vfio_mdev_get_iommu_device(dev);
 	if (iommu_device) {
 		if (iommu_dev_feature_enabled(iommu_device, IOMMU_DEV_FEAT_AUX))
-			iommu_aux_detach_device(domain, iommu_device);
+			iommu_aux_detach_device(domain->domain, iommu_device);
 		else {
 			group = iommu_group_get(iommu_device);
 			if (!group) {
 				WARN_ON(1);
 				return -EINVAL;
 			}
-			iommu_detach_group(domain, group);
+			return vfio_domain_detach_group_object(
+							domain, group);
 		}
 	}
 
@@ -1359,20 +1449,27 @@ static int vfio_iommu_attach_group(struct vfio_domain *domain,
 {
 	if (group->mdev_group)
 		return iommu_group_for_each_dev(group->iommu_group,
-						domain->domain,
+						domain,
 						vfio_mdev_attach_domain);
 	else
-		return iommu_attach_group(domain->domain, group->iommu_group);
+		return vfio_domain_attach_group_object(domain,
+							group->iommu_group);
 }
 
 static void vfio_iommu_detach_group(struct vfio_domain *domain,
 				    struct vfio_group *group)
 {
+	int ret;
+
 	if (group->mdev_group)
-		iommu_group_for_each_dev(group->iommu_group, domain->domain,
+		iommu_group_for_each_dev(group->iommu_group, domain,
 					 vfio_mdev_detach_domain);
-	else
-		iommu_detach_group(domain->domain, group->iommu_group);
+	else {
+		ret = vfio_domain_detach_group_object(
+						domain, group->iommu_group);
+		if (ret)
+			pr_warn("%s, deatch failed!! ret: %d", __func__, ret);
+	}
 }
 
 static bool vfio_bus_is_mdev(struct bus_type *bus)
@@ -1412,6 +1509,10 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
 	int ret;
 	bool resv_msi, msi_remap;
 	phys_addr_t resv_msi_base;
+	struct vfio_group_object *group_obj = NULL;
+	struct device *iommu_device = NULL;
+	struct iommu_group *iommu_device_group;
+
 
 	mutex_lock(&iommu->lock);
 
@@ -1438,14 +1539,20 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
 
 	group->iommu_group = iommu_group;
 
+	group_obj = vfio_iommu_group_object_check(iommu, group->iommu_group);
+	if (group_obj) {
+		atomic_inc(&group_obj->count);
+		list_add(&group->next, &group_obj->domain->group_list);
+		mutex_unlock(&iommu->lock);
+		return 0;
+	}
+
 	/* Determine bus_type in order to allocate a domain */
 	ret = iommu_group_for_each_dev(iommu_group, &bus, vfio_bus_type);
 	if (ret)
 		goto out_free;
 
 	if (vfio_bus_is_mdev(bus)) {
-		struct device *iommu_device = NULL;
-
 		group->mdev_group = true;
 
 		/* Determine the isolation type */
@@ -1469,6 +1576,39 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
 		bus = iommu_device->bus;
 	}
 
+	/*
+	 * Check if iommu backed group attached to a domain within current
+	 * container. If yes, increase the count; If no, go ahead with a
+	 * new domain attach process.
+	 */
+	group_obj = NULL;
+	if (iommu_device) {
+		iommu_device_group = iommu_group_get(iommu_device);
+		if (!iommu_device_group) {
+			WARN_ON(1);
+			kfree(domain);
+			mutex_unlock(&iommu->lock);
+			return -EINVAL;
+		}
+		group_obj = vfio_iommu_group_object_check(iommu,
+							iommu_device_group);
+	} else
+		group_obj = vfio_iommu_group_object_check(iommu,
+							group->iommu_group);
+
+	if (group_obj) {
+		atomic_inc(&group_obj->count);
+		list_add(&group->next, &group_obj->domain->group_list);
+		kfree(domain);
+		mutex_unlock(&iommu->lock);
+		return 0;
+	}
+
+	/*
+	 * Now we are sure we want to initialize a new vfio_domain.
+	 * First step is to alloc an iommu_domain from iommu abstract
+	 * layer.
+	 */
 	domain->domain = iommu_domain_alloc(bus);
 	if (!domain->domain) {
 		ret = -EIO;
@@ -1484,6 +1624,7 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
 			goto out_domain;
 	}
 
+	INIT_LIST_HEAD(&domain->group_obj_list);
 	ret = vfio_iommu_attach_group(domain, group);
 	if (ret)
 		goto out_domain;
-- 
2.7.4


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH v2 13/13] vfio/type1: track iommu backed group attach
  2019-09-05  8:08 [PATCH v2 13/13] vfio/type1: track iommu backed group attach Liu Yi L
@ 2019-09-26  2:37 ` Alex Williamson
  2019-09-30 12:41   ` Liu, Yi L
  0 siblings, 1 reply; 5+ messages in thread
From: Alex Williamson @ 2019-09-26  2:37 UTC (permalink / raw)
  To: Liu Yi L
  Cc: kwankhede, kevin.tian, baolu.lu, yi.y.sun, joro, linux-kernel,
	kvm, yan.y.zhao, shaopeng.he, chenbo.xia, jun.j.tian

On Thu,  5 Sep 2019 16:08:43 +0800
Liu Yi L <yi.l.liu@intel.com> wrote:

> With the introduction of iommu aware mdev group, user may wrap a PF/VF
> as a mdev. Such mdevs will be called as wrapped PF/VF mdevs in following
> statements. If it's applied on a non-singleton iommu group, there would
> be multiple domain attach on an iommu_device group (equal to iommu backed
> group). Reason is that mdev group attaches is finally an iommu_device
> group attach in the end. And existing vfio_domain.gorup_list has no idea
> about it. Thus multiple attach would happen.
> 
> What's more, under default domain policy, group attach is allowed only
> when its in-use domain is equal to its default domain as the code below:
> 
> static int __iommu_attach_group(struct iommu_domain *domain, ..)
> {
> 	..
> 	if (group->default_domain && group->domain != group->default_domain)
> 		return -EBUSY;
> 	...
> }
> 
> So for the above scenario, only the first group attach on the
> non-singleton iommu group will be successful. Subsequent group
> attaches will be failed. However, this is a fairly valid usage case
> if the wrapped PF/VF mdevs and other devices are assigned to a single
> VM. We may want to prevent it. In other words, the subsequent group
> attaches should return success before going to __iommu_attach_group().
> 
> However, if user tries to assign the wrapped PF/VF mdevs and other
> devices to different VMs, the subsequent group attaches on a single
> iommu_device group should be failed. This means the subsequent group
> attach should finally calls into __iommu_attach_group() and be failed.
> 
> To meet the above requirements, this patch introduces vfio_group_object
> structure to track the group attach of an iommu_device group (a.ka.
> iommu backed group). Each vfio_domain will have a group_obj_list to
> record the vfio_group_objects. The search of the group_obj_list should
> use iommu_device group if a group is mdev group.
> 
> 	struct vfio_group_object {
> 		atomic_t		count;
> 		struct iommu_group	*iommu_group;
> 		struct vfio_domain	*domain;
> 		struct list_head	next;
> 	};
> 
> Each time, a successful group attach should either have a new
> vfio_group_object created or count increasing of an existing
> vfio_group_object instance. Details can be found in
> vfio_domain_attach_group_object().
> 
> For group detach, should have count decreasing. Please check
> vfio_domain_detach_group_object().
> 
> As the vfio_domain.group_obj_list is within vfio container(vfio_iommu)
> scope, if user wants to passthru a non-singleton to multiple VMs, it
> will be failed as VMs will have separate vfio containers. Also, if
> vIOMMU is exposed, it will also fail the attempts of assigning multiple
> devices (via vfio-pci or PF/VF wrapped mdev) to a single VM. This is
> aligned with current vfio passthru rules.
> 
> Cc: Kevin Tian <kevin.tian@intel.com>
> Cc: Lu Baolu <baolu.lu@linux.intel.com>
> Suggested-by: Alex Williamson <alex.williamson@redhat.com>
> Signed-off-by: Liu Yi L <yi.l.liu@intel.com>
> ---
>  drivers/vfio/vfio_iommu_type1.c | 167 ++++++++++++++++++++++++++++++++++++----
>  1 file changed, 154 insertions(+), 13 deletions(-)
> 
> diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
> index 317430d..6a67bd6 100644
> --- a/drivers/vfio/vfio_iommu_type1.c
> +++ b/drivers/vfio/vfio_iommu_type1.c
> @@ -75,6 +75,7 @@ struct vfio_domain {
>  	struct iommu_domain	*domain;
>  	struct list_head	next;
>  	struct list_head	group_list;
> +	struct list_head	group_obj_list;
>  	int			prot;		/* IOMMU_CACHE */
>  	bool			fgsp;		/* Fine-grained super pages */
>  };
> @@ -97,6 +98,13 @@ struct vfio_group {
>  	bool			mdev_group;	/* An mdev group */
>  };
>  
> +struct vfio_group_object {
> +	atomic_t		count;
> +	struct iommu_group	*iommu_group;
> +	struct vfio_domain	*domain;
> +	struct list_head	next;
> +};
> +

So vfio_domain already has a group_list for all the groups attached to
that iommu domain.  We add a vfio_group_object list, which is also
effectively a list of groups attached to the domain, but we're tracking
something different with it.  All groups seem to get added as a
vfio_group_object, so why do we need both lists?  As I suspected when
we discussed this last, this adds complexity for something that's
currently being proposed as a sample driver.

>  /*
>   * Guest RAM pinning working set or DMA target
>   */
> @@ -1263,6 +1271,85 @@ static struct vfio_group *find_iommu_group(struct vfio_domain *domain,
>  	return NULL;
>  }
>  
> +static struct vfio_group_object *find_iommu_group_object(
> +		struct vfio_domain *domain, struct iommu_group *iommu_group)
> +{
> +	struct vfio_group_object *g;
> +
> +	list_for_each_entry(g, &domain->group_obj_list, next) {
> +		if (g->iommu_group == iommu_group)
> +			return g;
> +	}
> +
> +	return NULL;
> +}
> +
> +static void vfio_init_iommu_group_object(struct vfio_group_object *group_obj,
> +		struct vfio_domain *domain, struct iommu_group *iommu_group)
> +{
> +	if (!group_obj || !domain || !iommu_group) {
> +		WARN_ON(1);
> +		return;
> +	}

This is poor error handling, either this should never happen or we
should have an error path for it.

> +	atomic_set(&group_obj->count, 1);
> +	group_obj->iommu_group = iommu_group;
> +	group_obj->domain = domain;
> +	list_add(&group_obj->next, &domain->group_obj_list);
> +}
> +
> +static int vfio_domain_attach_group_object(
> +		struct vfio_domain *domain, struct iommu_group *iommu_group)
> +{
> +	struct vfio_group_object *group_obj;
> +
> +	group_obj = find_iommu_group_object(domain, iommu_group);
> +	if (group_obj) {
> +		atomic_inc(&group_obj->count);
> +		return 0;
> +	}
> +	group_obj = kzalloc(sizeof(*group_obj), GFP_KERNEL);

The group_obj test should be here, where we can return an error.

> +	vfio_init_iommu_group_object(group_obj, domain, iommu_group);
> +	return iommu_attach_group(domain->domain, iommu_group);
> +}
> +
> +static int vfio_domain_detach_group_object(
> +		struct vfio_domain *domain, struct iommu_group *iommu_group)

A detach should generally return void, it cannot fail.

> +{
> +	struct vfio_group_object *group_obj;
> +
> +	group_obj = find_iommu_group_object(domain, iommu_group);
> +	if (!group_obj) {
> +		WARN_ON(1);
> +		return -EINVAL;

The WARN is probably appropriate here since this is an internal
consistency failure.

> +	}
> +	if (atomic_dec_if_positive(&group_obj->count) == 0) {
> +		list_del(&group_obj->next);
> +		kfree(group_obj);
> +	}

Like in the previous patch, I don't think this atomic is doing
everything you're intending it to do, the iommu->lock seems more like
it might be the one protecting us here.  If that's true, then we don't
need this to be an atomic.

> +	iommu_detach_group(domain->domain, iommu_group);

How do we get away with detaching the group regardless of the reference
count?!

> +	return 0;
> +}
> +
> +/*
> + * Check if an iommu backed group has been attached to a domain within
> + * a specific container (vfio_iommu). If yes, return the vfio_group_object
> + * which tracks the previous domain attach for this group. Caller of this
> + * function should hold vfio_iommu->lock.
> + */
> +static struct vfio_group_object *vfio_iommu_group_object_check(
> +		struct vfio_iommu *iommu, struct iommu_group *iommu_group)

So vfio_iommu_group_object_check() finds a vfio_group_object anywhere
in the vfio_iommu while find_iommu_group_object() only finds it within
a vfio_domain.  Maybe find_iommu_group_obj_in_domain() vs
find_iommu_group_obj_in_iommu()?

> +{
> +	struct vfio_domain *d;
> +	struct vfio_group_object *group_obj;
> +
> +	list_for_each_entry(d, &iommu->domain_list, next) {
> +		group_obj = find_iommu_group_object(d, iommu_group);
> +		if (group_obj)
> +			return group_obj;
> +	}
> +	return NULL;
> +}
> +
>  static bool vfio_iommu_has_sw_msi(struct iommu_group *group, phys_addr_t *base)
>  {
>  	struct list_head group_resv_regions;
> @@ -1310,21 +1397,23 @@ static struct device *vfio_mdev_get_iommu_device(struct device *dev)
>  
>  static int vfio_mdev_attach_domain(struct device *dev, void *data)
>  {
> -	struct iommu_domain *domain = data;
> +	struct vfio_domain *domain = data;
>  	struct device *iommu_device;
>  	struct iommu_group *group;
>  
>  	iommu_device = vfio_mdev_get_iommu_device(dev);
>  	if (iommu_device) {
>  		if (iommu_dev_feature_enabled(iommu_device, IOMMU_DEV_FEAT_AUX))
> -			return iommu_aux_attach_device(domain, iommu_device);
> +			return iommu_aux_attach_device(domain->domain,
> +							iommu_device);
>  		else {
>  			group = iommu_group_get(iommu_device);
>  			if (!group) {
>  				WARN_ON(1);
>  				return -EINVAL;
>  			}
> -			return iommu_attach_group(domain, group);
> +			return vfio_domain_attach_group_object(
> +							domain, group);
>  		}
>  	}
>  
> @@ -1333,21 +1422,22 @@ static int vfio_mdev_attach_domain(struct device *dev, void *data)
>  
>  static int vfio_mdev_detach_domain(struct device *dev, void *data)
>  {
> -	struct iommu_domain *domain = data;
> +	struct vfio_domain *domain = data;
>  	struct device *iommu_device;
>  	struct iommu_group *group;
>  
>  	iommu_device = vfio_mdev_get_iommu_device(dev);
>  	if (iommu_device) {
>  		if (iommu_dev_feature_enabled(iommu_device, IOMMU_DEV_FEAT_AUX))
> -			iommu_aux_detach_device(domain, iommu_device);
> +			iommu_aux_detach_device(domain->domain, iommu_device);
>  		else {
>  			group = iommu_group_get(iommu_device);
>  			if (!group) {
>  				WARN_ON(1);
>  				return -EINVAL;
>  			}
> -			iommu_detach_group(domain, group);
> +			return vfio_domain_detach_group_object(
> +							domain, group);
>  		}
>  	}
>  
> @@ -1359,20 +1449,27 @@ static int vfio_iommu_attach_group(struct vfio_domain *domain,
>  {
>  	if (group->mdev_group)
>  		return iommu_group_for_each_dev(group->iommu_group,
> -						domain->domain,
> +						domain,
>  						vfio_mdev_attach_domain);
>  	else
> -		return iommu_attach_group(domain->domain, group->iommu_group);
> +		return vfio_domain_attach_group_object(domain,
> +							group->iommu_group);
>  }
>  
>  static void vfio_iommu_detach_group(struct vfio_domain *domain,
>  				    struct vfio_group *group)
>  {
> +	int ret;
> +
>  	if (group->mdev_group)
> -		iommu_group_for_each_dev(group->iommu_group, domain->domain,
> +		iommu_group_for_each_dev(group->iommu_group, domain,
>  					 vfio_mdev_detach_domain);
> -	else
> -		iommu_detach_group(domain->domain, group->iommu_group);
> +	else {
> +		ret = vfio_domain_detach_group_object(
> +						domain, group->iommu_group);
> +		if (ret)
> +			pr_warn("%s, deatch failed!! ret: %d", __func__, ret);

Detach cannot fail.

> +	}
>  }
>  
>  static bool vfio_bus_is_mdev(struct bus_type *bus)
> @@ -1412,6 +1509,10 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
>  	int ret;
>  	bool resv_msi, msi_remap;
>  	phys_addr_t resv_msi_base;
> +	struct vfio_group_object *group_obj = NULL;
> +	struct device *iommu_device = NULL;
> +	struct iommu_group *iommu_device_group;
> +
>  
>  	mutex_lock(&iommu->lock);
>  
> @@ -1438,14 +1539,20 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
>  
>  	group->iommu_group = iommu_group;
>  
> +	group_obj = vfio_iommu_group_object_check(iommu, group->iommu_group);
> +	if (group_obj) {
> +		atomic_inc(&group_obj->count);
> +		list_add(&group->next, &group_obj->domain->group_list);
> +		mutex_unlock(&iommu->lock);
> +		return 0;

domain is leaked.

> +	}
> +
>  	/* Determine bus_type in order to allocate a domain */
>  	ret = iommu_group_for_each_dev(iommu_group, &bus, vfio_bus_type);
>  	if (ret)
>  		goto out_free;
>  
>  	if (vfio_bus_is_mdev(bus)) {
> -		struct device *iommu_device = NULL;
> -
>  		group->mdev_group = true;
>  
>  		/* Determine the isolation type */
> @@ -1469,6 +1576,39 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
>  		bus = iommu_device->bus;
>  	}
>  
> +	/*
> +	 * Check if iommu backed group attached to a domain within current
> +	 * container. If yes, increase the count; If no, go ahead with a
> +	 * new domain attach process.
> +	 */
> +	group_obj = NULL;

How could it be otherwise?

> +	if (iommu_device) {
> +		iommu_device_group = iommu_group_get(iommu_device);
> +		if (!iommu_device_group) {
> +			WARN_ON(1);

No WARN please.

group is leaked.

> +			kfree(domain);
> +			mutex_unlock(&iommu->lock);
> +			return -EINVAL;
> +		}
> +		group_obj = vfio_iommu_group_object_check(iommu,
> +							iommu_device_group);

iommu_device_group reference is elevated.  Thanks,

Alex

> +	} else
> +		group_obj = vfio_iommu_group_object_check(iommu,
> +							group->iommu_group);
> +
> +	if (group_obj) {
> +		atomic_inc(&group_obj->count);
> +		list_add(&group->next, &group_obj->domain->group_list);
> +		kfree(domain);
> +		mutex_unlock(&iommu->lock);
> +		return 0;
> +	}
> +
> +	/*
> +	 * Now we are sure we want to initialize a new vfio_domain.
> +	 * First step is to alloc an iommu_domain from iommu abstract
> +	 * layer.
> +	 */
>  	domain->domain = iommu_domain_alloc(bus);
>  	if (!domain->domain) {
>  		ret = -EIO;
> @@ -1484,6 +1624,7 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
>  			goto out_domain;
>  	}
>  
> +	INIT_LIST_HEAD(&domain->group_obj_list);
>  	ret = vfio_iommu_attach_group(domain, group);
>  	if (ret)
>  		goto out_domain;


^ permalink raw reply	[flat|nested] 5+ messages in thread

* RE: [PATCH v2 13/13] vfio/type1: track iommu backed group attach
  2019-09-26  2:37 ` Alex Williamson
@ 2019-09-30 12:41   ` Liu, Yi L
  2019-09-30 15:49     ` Alex Williamson
  0 siblings, 1 reply; 5+ messages in thread
From: Liu, Yi L @ 2019-09-30 12:41 UTC (permalink / raw)
  To: Alex Williamson
  Cc: kwankhede, Tian, Kevin, baolu.lu, Sun, Yi Y, joro, linux-kernel,
	kvm, Zhao, Yan Y, He, Shaopeng, Xia, Chenbo, Tian, Jun J

Hi Alex,

> From: Alex Williamson [mailto:alex.williamson@redhat.com]
> Sent: Thursday, September 26, 2019 10:37 AM
> To: Liu, Yi L <yi.l.liu@intel.com>
> Subject: Re: [PATCH v2 13/13] vfio/type1: track iommu backed group attach
> 
> On Thu,  5 Sep 2019 16:08:43 +0800
> Liu Yi L <yi.l.liu@intel.com> wrote:
> 
> > With the introduction of iommu aware mdev group, user may wrap a PF/VF
> > as a mdev. Such mdevs will be called as wrapped PF/VF mdevs in following
> > statements. If it's applied on a non-singleton iommu group, there would
> > be multiple domain attach on an iommu_device group (equal to iommu backed
> > group). Reason is that mdev group attaches is finally an iommu_device
> > group attach in the end. And existing vfio_domain.gorup_list has no idea
> > about it. Thus multiple attach would happen.
> >
> > What's more, under default domain policy, group attach is allowed only
> > when its in-use domain is equal to its default domain as the code below:
> >
> > static int __iommu_attach_group(struct iommu_domain *domain, ..)
> > {
> > 	..
> > 	if (group->default_domain && group->domain != group->default_domain)
> > 		return -EBUSY;
> > 	...
> > }
> >
> > So for the above scenario, only the first group attach on the
> > non-singleton iommu group will be successful. Subsequent group
> > attaches will be failed. However, this is a fairly valid usage case
> > if the wrapped PF/VF mdevs and other devices are assigned to a single
> > VM. We may want to prevent it. In other words, the subsequent group
> > attaches should return success before going to __iommu_attach_group().
> >
> > However, if user tries to assign the wrapped PF/VF mdevs and other
> > devices to different VMs, the subsequent group attaches on a single
> > iommu_device group should be failed. This means the subsequent group
> > attach should finally calls into __iommu_attach_group() and be failed.
> >
> > To meet the above requirements, this patch introduces vfio_group_object
> > structure to track the group attach of an iommu_device group (a.ka.
> > iommu backed group). Each vfio_domain will have a group_obj_list to
> > record the vfio_group_objects. The search of the group_obj_list should
> > use iommu_device group if a group is mdev group.
> >
> > 	struct vfio_group_object {
> > 		atomic_t		count;
> > 		struct iommu_group	*iommu_group;
> > 		struct vfio_domain	*domain;
> > 		struct list_head	next;
> > 	};
> >
> > Each time, a successful group attach should either have a new
> > vfio_group_object created or count increasing of an existing
> > vfio_group_object instance. Details can be found in
> > vfio_domain_attach_group_object().
> >
> > For group detach, should have count decreasing. Please check
> > vfio_domain_detach_group_object().
> >
> > As the vfio_domain.group_obj_list is within vfio container(vfio_iommu)
> > scope, if user wants to passthru a non-singleton to multiple VMs, it
> > will be failed as VMs will have separate vfio containers. Also, if
> > vIOMMU is exposed, it will also fail the attempts of assigning multiple
> > devices (via vfio-pci or PF/VF wrapped mdev) to a single VM. This is
> > aligned with current vfio passthru rules.
> >
> > Cc: Kevin Tian <kevin.tian@intel.com>
> > Cc: Lu Baolu <baolu.lu@linux.intel.com>
> > Suggested-by: Alex Williamson <alex.williamson@redhat.com>
> > Signed-off-by: Liu Yi L <yi.l.liu@intel.com>
> > ---
> >  drivers/vfio/vfio_iommu_type1.c | 167
> ++++++++++++++++++++++++++++++++++++----
> >  1 file changed, 154 insertions(+), 13 deletions(-)
> >
> > diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
> > index 317430d..6a67bd6 100644
> > --- a/drivers/vfio/vfio_iommu_type1.c
> > +++ b/drivers/vfio/vfio_iommu_type1.c
> > @@ -75,6 +75,7 @@ struct vfio_domain {
> >  	struct iommu_domain	*domain;
> >  	struct list_head	next;
> >  	struct list_head	group_list;
> > +	struct list_head	group_obj_list;
> >  	int			prot;		/* IOMMU_CACHE */
> >  	bool			fgsp;		/* Fine-grained super pages */
> >  };
> > @@ -97,6 +98,13 @@ struct vfio_group {
> >  	bool			mdev_group;	/* An mdev group */
> >  };
> >
> > +struct vfio_group_object {
> > +	atomic_t		count;
> > +	struct iommu_group	*iommu_group;
> > +	struct vfio_domain	*domain;
> > +	struct list_head	next;
> > +};
> > +
> 
> So vfio_domain already has a group_list for all the groups attached to
> that iommu domain.  We add a vfio_group_object list, which is also
> effectively a list of groups attached to the domain, but we're tracking
> something different with it.  All groups seem to get added as a
> vfio_group_object, so why do we need both lists?

yeah. It's functional workable. But looks ugly. The key purpose of this
patch is to prevent duplicate domain attach for a single iommu group.
Got another idea, see if it matches what we expect. Let me explain.

Existing group_list tracks iommu group attachment regardless of group
type (mdev iommu group and iommu backed iommu group). For mdev
iommu group, we actually have two sub-types. mdev iommu groups with
iommu_device and groups w/o iommu_device. My idea here is to do a
slight change against mdev iommu groups with iommu_device since it is
the case we are handling. For such iommu groups, we can detect it and
use its iommu_device group to do check in the domain->group_list. e.g.
for such a group,
     *) if found its iommu_device group has been attached, then return.
     *) if failed to find a matched attach, we create a vfio_group and
          finish the rest of the domain attach
With this proposal, mdev iommu groups with iommu_device will not have
vfio_group added in the domain->group_list. It will be tracked by its
iommu_device group. For normal mdev iommu groups, it will still have its
own vfio_group added in the domain->group_list.

To achieve it, we need to detect iommu_group type at the beginning of
vfio_iommu_type1_attach_group (), which means to move the bus_type
check to the beginning. I guess it may have simpler change. Thoughts?

> As I suspected when
> we discussed this last, this adds complexity for something that's
> currently being proposed as a sample driver.

yeah, I was also hesitated to do it. However, if a user wants to wrap its
PCI device as a mdev, it will more or less face the usage on non-singleton
groups. If the new proposal is not that complex, I guess we can try to
make it happen.

> 
> >  /*
> >   * Guest RAM pinning working set or DMA target
> >   */
> > @@ -1263,6 +1271,85 @@ static struct vfio_group *find_iommu_group(struct
> vfio_domain *domain,
> >  	return NULL;
> >  }
> >
> > +static struct vfio_group_object *find_iommu_group_object(
> > +		struct vfio_domain *domain, struct iommu_group *iommu_group)
> > +{
> > +	struct vfio_group_object *g;
> > +
> > +	list_for_each_entry(g, &domain->group_obj_list, next) {
> > +		if (g->iommu_group == iommu_group)
> > +			return g;
> > +	}
> > +
> > +	return NULL;
> > +}
> > +
> > +static void vfio_init_iommu_group_object(struct vfio_group_object *group_obj,
> > +		struct vfio_domain *domain, struct iommu_group *iommu_group)
> > +{
> > +	if (!group_obj || !domain || !iommu_group) {
> > +		WARN_ON(1);
> > +		return;
> > +	}
> 
> This is poor error handling, either this should never happen or we
> should have an error path for it.

Good lesson for me. Thanks~

> > +	atomic_set(&group_obj->count, 1);
> > +	group_obj->iommu_group = iommu_group;
> > +	group_obj->domain = domain;
> > +	list_add(&group_obj->next, &domain->group_obj_list);
> > +}
> > +
> > +static int vfio_domain_attach_group_object(
> > +		struct vfio_domain *domain, struct iommu_group *iommu_group)
> > +{
> > +	struct vfio_group_object *group_obj;
> > +
> > +	group_obj = find_iommu_group_object(domain, iommu_group);
> > +	if (group_obj) {
> > +		atomic_inc(&group_obj->count);
> > +		return 0;
> > +	}
> > +	group_obj = kzalloc(sizeof(*group_obj), GFP_KERNEL);
> 
> The group_obj test should be here, where we can return an error.

yep. thanks.

> > +	vfio_init_iommu_group_object(group_obj, domain, iommu_group);
> > +	return iommu_attach_group(domain->domain, iommu_group);
> > +}
> > +
> > +static int vfio_domain_detach_group_object(
> > +		struct vfio_domain *domain, struct iommu_group *iommu_group)
> 
> A detach should generally return void, it cannot fail.

Oops, yes it is.

> > +{
> > +	struct vfio_group_object *group_obj;
> > +
> > +	group_obj = find_iommu_group_object(domain, iommu_group);
> > +	if (!group_obj) {
> > +		WARN_ON(1);
> > +		return -EINVAL;
> 
> The WARN is probably appropriate here since this is an internal
> consistency failure.

Got it. thanks.

> > +	}
> > +	if (atomic_dec_if_positive(&group_obj->count) == 0) {
> > +		list_del(&group_obj->next);
> > +		kfree(group_obj);
> > +	}
> 
> Like in the previous patch, I don't think this atomic is doing
> everything you're intending it to do, the iommu->lock seems more like
> it might be the one protecting us here.  If that's true, then we don't
> need this to be an atomic.
> 
> > +	iommu_detach_group(domain->domain, iommu_group);
> 
> How do we get away with detaching the group regardless of the reference
> count?!

how poor am I. got it. ^_^

> > +	return 0;
> > +}
> > +
> > +/*
> > + * Check if an iommu backed group has been attached to a domain within
> > + * a specific container (vfio_iommu). If yes, return the vfio_group_object
> > + * which tracks the previous domain attach for this group. Caller of this
> > + * function should hold vfio_iommu->lock.
> > + */
> > +static struct vfio_group_object *vfio_iommu_group_object_check(
> > +		struct vfio_iommu *iommu, struct iommu_group *iommu_group)
> 
> So vfio_iommu_group_object_check() finds a vfio_group_object anywhere
> in the vfio_iommu while find_iommu_group_object() only finds it within
> a vfio_domain.  Maybe find_iommu_group_obj_in_domain() vs
> find_iommu_group_obj_in_iommu()?

yes, better than mine.

> > +{
> > +	struct vfio_domain *d;
> > +	struct vfio_group_object *group_obj;
> > +
> > +	list_for_each_entry(d, &iommu->domain_list, next) {
> > +		group_obj = find_iommu_group_object(d, iommu_group);
> > +		if (group_obj)
> > +			return group_obj;
> > +	}
> > +	return NULL;
> > +}
> > +
> >  static bool vfio_iommu_has_sw_msi(struct iommu_group *group, phys_addr_t
> *base)
> >  {
> >  	struct list_head group_resv_regions;
> > @@ -1310,21 +1397,23 @@ static struct device
> *vfio_mdev_get_iommu_device(struct device *dev)
> >
> >  static int vfio_mdev_attach_domain(struct device *dev, void *data)
> >  {
> > -	struct iommu_domain *domain = data;
> > +	struct vfio_domain *domain = data;
> >  	struct device *iommu_device;
> >  	struct iommu_group *group;
> >
> >  	iommu_device = vfio_mdev_get_iommu_device(dev);
> >  	if (iommu_device) {
> >  		if (iommu_dev_feature_enabled(iommu_device,
> IOMMU_DEV_FEAT_AUX))
> > -			return iommu_aux_attach_device(domain, iommu_device);
> > +			return iommu_aux_attach_device(domain->domain,
> > +							iommu_device);
> >  		else {
> >  			group = iommu_group_get(iommu_device);
> >  			if (!group) {
> >  				WARN_ON(1);
> >  				return -EINVAL;
> >  			}
> > -			return iommu_attach_group(domain, group);
> > +			return vfio_domain_attach_group_object(
> > +							domain, group);
> >  		}
> >  	}
> >
> > @@ -1333,21 +1422,22 @@ static int vfio_mdev_attach_domain(struct device
> *dev, void *data)
> >
> >  static int vfio_mdev_detach_domain(struct device *dev, void *data)
> >  {
> > -	struct iommu_domain *domain = data;
> > +	struct vfio_domain *domain = data;
> >  	struct device *iommu_device;
> >  	struct iommu_group *group;
> >
> >  	iommu_device = vfio_mdev_get_iommu_device(dev);
> >  	if (iommu_device) {
> >  		if (iommu_dev_feature_enabled(iommu_device,
> IOMMU_DEV_FEAT_AUX))
> > -			iommu_aux_detach_device(domain, iommu_device);
> > +			iommu_aux_detach_device(domain->domain,
> iommu_device);
> >  		else {
> >  			group = iommu_group_get(iommu_device);
> >  			if (!group) {
> >  				WARN_ON(1);
> >  				return -EINVAL;
> >  			}
> > -			iommu_detach_group(domain, group);
> > +			return vfio_domain_detach_group_object(
> > +							domain, group);
> >  		}
> >  	}
> >
> > @@ -1359,20 +1449,27 @@ static int vfio_iommu_attach_group(struct
> vfio_domain *domain,
> >  {
> >  	if (group->mdev_group)
> >  		return iommu_group_for_each_dev(group->iommu_group,
> > -						domain->domain,
> > +						domain,
> >  						vfio_mdev_attach_domain);
> >  	else
> > -		return iommu_attach_group(domain->domain, group-
> >iommu_group);
> > +		return vfio_domain_attach_group_object(domain,
> > +							group->iommu_group);
> >  }
> >
> >  static void vfio_iommu_detach_group(struct vfio_domain *domain,
> >  				    struct vfio_group *group)
> >  {
> > +	int ret;
> > +
> >  	if (group->mdev_group)
> > -		iommu_group_for_each_dev(group->iommu_group, domain-
> >domain,
> > +		iommu_group_for_each_dev(group->iommu_group, domain,
> >  					 vfio_mdev_detach_domain);
> > -	else
> > -		iommu_detach_group(domain->domain, group->iommu_group);
> > +	else {
> > +		ret = vfio_domain_detach_group_object(
> > +						domain, group->iommu_group);
> > +		if (ret)
> > +			pr_warn("%s, deatch failed!! ret: %d", __func__, ret);
> 
> Detach cannot fail.

yep. let me fix it.
 
> > +	}
> >  }
> >
> >  static bool vfio_bus_is_mdev(struct bus_type *bus)
> > @@ -1412,6 +1509,10 @@ static int vfio_iommu_type1_attach_group(void
> *iommu_data,
> >  	int ret;
> >  	bool resv_msi, msi_remap;
> >  	phys_addr_t resv_msi_base;
> > +	struct vfio_group_object *group_obj = NULL;
> > +	struct device *iommu_device = NULL;
> > +	struct iommu_group *iommu_device_group;
> > +
> >
> >  	mutex_lock(&iommu->lock);
> >
> > @@ -1438,14 +1539,20 @@ static int vfio_iommu_type1_attach_group(void
> *iommu_data,
> >
> >  	group->iommu_group = iommu_group;
> >
> > +	group_obj = vfio_iommu_group_object_check(iommu, group-
> >iommu_group);
> > +	if (group_obj) {
> > +		atomic_inc(&group_obj->count);
> > +		list_add(&group->next, &group_obj->domain->group_list);
> > +		mutex_unlock(&iommu->lock);
> > +		return 0;
> 
> domain is leaked.

yep. should be fixed in next version, if we follow this proposal.

> > +	}
> > +
> >  	/* Determine bus_type in order to allocate a domain */
> >  	ret = iommu_group_for_each_dev(iommu_group, &bus, vfio_bus_type);
> >  	if (ret)
> >  		goto out_free;
> >
> >  	if (vfio_bus_is_mdev(bus)) {
> > -		struct device *iommu_device = NULL;
> > -
> >  		group->mdev_group = true;
> >
> >  		/* Determine the isolation type */
> > @@ -1469,6 +1576,39 @@ static int vfio_iommu_type1_attach_group(void
> *iommu_data,
> >  		bus = iommu_device->bus;
> >  	}
> >
> > +	/*
> > +	 * Check if iommu backed group attached to a domain within current
> > +	 * container. If yes, increase the count; If no, go ahead with a
> > +	 * new domain attach process.
> > +	 */
> > +	group_obj = NULL;
> 
> How could it be otherwise?

Oops, this comment should be better described. My point is if group_obj
is not found, then this vfio_iommu_type1_attach_group() call should go
with normal domain attach. And this means the codes behind below comment
should be finished before return.

+	/*
+	 * Now we are sure we want to initialize a new vfio_domain.
+	 * First step is to alloc an iommu_domain from iommu abstract
+	 * layer.
+	 */

> > +	if (iommu_device) {
> > +		iommu_device_group = iommu_group_get(iommu_device);
> > +		if (!iommu_device_group) {
> > +			WARN_ON(1);
> 
> No WARN please.

yep, no need here.

> group is leaked.

poor leak. let me keep it mind.

> > +			kfree(domain);
> > +			mutex_unlock(&iommu->lock);
> > +			return -EINVAL;
> > +		}
> > +		group_obj = vfio_iommu_group_object_check(iommu,
> > +							iommu_device_group);
> 
> iommu_device_group reference is elevated.  Thanks,

yes, a leak here. would be carful on it in future. :-)

> Alex

Thanks a lot Alex, good lessons in the above comments. I'll address them if
we go on with the proposal in this patch. or if you prefer the new proposal
in the first response, I would be pleased to get it implemented.

Thanks,
Yi Liu

> > +	} else
> > +		group_obj = vfio_iommu_group_object_check(iommu,
> > +							group->iommu_group);
> > +
> > +	if (group_obj) {
> > +		atomic_inc(&group_obj->count);
> > +		list_add(&group->next, &group_obj->domain->group_list);
> > +		kfree(domain);
> > +		mutex_unlock(&iommu->lock);
> > +		return 0;
> > +	}
> > +
> > +	/*
> > +	 * Now we are sure we want to initialize a new vfio_domain.
> > +	 * First step is to alloc an iommu_domain from iommu abstract
> > +	 * layer.
> > +	 */
> >  	domain->domain = iommu_domain_alloc(bus);
> >  	if (!domain->domain) {
> >  		ret = -EIO;
> > @@ -1484,6 +1624,7 @@ static int vfio_iommu_type1_attach_group(void
> *iommu_data,
> >  			goto out_domain;
> >  	}
> >
> > +	INIT_LIST_HEAD(&domain->group_obj_list);
> >  	ret = vfio_iommu_attach_group(domain, group);
> >  	if (ret)
> >  		goto out_domain;


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH v2 13/13] vfio/type1: track iommu backed group attach
  2019-09-30 12:41   ` Liu, Yi L
@ 2019-09-30 15:49     ` Alex Williamson
  0 siblings, 0 replies; 5+ messages in thread
From: Alex Williamson @ 2019-09-30 15:49 UTC (permalink / raw)
  To: Liu, Yi L
  Cc: kwankhede, Tian, Kevin, baolu.lu, Sun, Yi Y, joro, linux-kernel,
	kvm, Zhao, Yan Y, He, Shaopeng, Xia, Chenbo, Tian, Jun J

On Mon, 30 Sep 2019 12:41:03 +0000
"Liu, Yi L" <yi.l.liu@intel.com> wrote:

> Hi Alex,
> 
> > From: Alex Williamson [mailto:alex.williamson@redhat.com]
> > Sent: Thursday, September 26, 2019 10:37 AM
> > To: Liu, Yi L <yi.l.liu@intel.com>
> > Subject: Re: [PATCH v2 13/13] vfio/type1: track iommu backed group attach
> > 
> > On Thu,  5 Sep 2019 16:08:43 +0800
> > Liu Yi L <yi.l.liu@intel.com> wrote:
> >   
> > > With the introduction of iommu aware mdev group, user may wrap a PF/VF
> > > as a mdev. Such mdevs will be called as wrapped PF/VF mdevs in following
> > > statements. If it's applied on a non-singleton iommu group, there would
> > > be multiple domain attach on an iommu_device group (equal to iommu backed
> > > group). Reason is that mdev group attaches is finally an iommu_device
> > > group attach in the end. And existing vfio_domain.gorup_list has no idea
> > > about it. Thus multiple attach would happen.
> > >
> > > What's more, under default domain policy, group attach is allowed only
> > > when its in-use domain is equal to its default domain as the code below:
> > >
> > > static int __iommu_attach_group(struct iommu_domain *domain, ..)
> > > {
> > > 	..
> > > 	if (group->default_domain && group->domain != group->default_domain)
> > > 		return -EBUSY;
> > > 	...
> > > }
> > >
> > > So for the above scenario, only the first group attach on the
> > > non-singleton iommu group will be successful. Subsequent group
> > > attaches will be failed. However, this is a fairly valid usage case
> > > if the wrapped PF/VF mdevs and other devices are assigned to a single
> > > VM. We may want to prevent it. In other words, the subsequent group
> > > attaches should return success before going to __iommu_attach_group().
> > >
> > > However, if user tries to assign the wrapped PF/VF mdevs and other
> > > devices to different VMs, the subsequent group attaches on a single
> > > iommu_device group should be failed. This means the subsequent group
> > > attach should finally calls into __iommu_attach_group() and be failed.
> > >
> > > To meet the above requirements, this patch introduces vfio_group_object
> > > structure to track the group attach of an iommu_device group (a.ka.
> > > iommu backed group). Each vfio_domain will have a group_obj_list to
> > > record the vfio_group_objects. The search of the group_obj_list should
> > > use iommu_device group if a group is mdev group.
> > >
> > > 	struct vfio_group_object {
> > > 		atomic_t		count;
> > > 		struct iommu_group	*iommu_group;
> > > 		struct vfio_domain	*domain;
> > > 		struct list_head	next;
> > > 	};
> > >
> > > Each time, a successful group attach should either have a new
> > > vfio_group_object created or count increasing of an existing
> > > vfio_group_object instance. Details can be found in
> > > vfio_domain_attach_group_object().
> > >
> > > For group detach, should have count decreasing. Please check
> > > vfio_domain_detach_group_object().
> > >
> > > As the vfio_domain.group_obj_list is within vfio container(vfio_iommu)
> > > scope, if user wants to passthru a non-singleton to multiple VMs, it
> > > will be failed as VMs will have separate vfio containers. Also, if
> > > vIOMMU is exposed, it will also fail the attempts of assigning multiple
> > > devices (via vfio-pci or PF/VF wrapped mdev) to a single VM. This is
> > > aligned with current vfio passthru rules.
> > >
> > > Cc: Kevin Tian <kevin.tian@intel.com>
> > > Cc: Lu Baolu <baolu.lu@linux.intel.com>
> > > Suggested-by: Alex Williamson <alex.williamson@redhat.com>
> > > Signed-off-by: Liu Yi L <yi.l.liu@intel.com>
> > > ---
> > >  drivers/vfio/vfio_iommu_type1.c | 167  
> > ++++++++++++++++++++++++++++++++++++----  
> > >  1 file changed, 154 insertions(+), 13 deletions(-)
> > >
> > > diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
> > > index 317430d..6a67bd6 100644
> > > --- a/drivers/vfio/vfio_iommu_type1.c
> > > +++ b/drivers/vfio/vfio_iommu_type1.c
> > > @@ -75,6 +75,7 @@ struct vfio_domain {
> > >  	struct iommu_domain	*domain;
> > >  	struct list_head	next;
> > >  	struct list_head	group_list;
> > > +	struct list_head	group_obj_list;
> > >  	int			prot;		/* IOMMU_CACHE */
> > >  	bool			fgsp;		/* Fine-grained super pages */
> > >  };
> > > @@ -97,6 +98,13 @@ struct vfio_group {
> > >  	bool			mdev_group;	/* An mdev group */
> > >  };
> > >
> > > +struct vfio_group_object {
> > > +	atomic_t		count;
> > > +	struct iommu_group	*iommu_group;
> > > +	struct vfio_domain	*domain;
> > > +	struct list_head	next;
> > > +};
> > > +  
> > 
> > So vfio_domain already has a group_list for all the groups attached to
> > that iommu domain.  We add a vfio_group_object list, which is also
> > effectively a list of groups attached to the domain, but we're tracking
> > something different with it.  All groups seem to get added as a
> > vfio_group_object, so why do we need both lists?  
> 
> yeah. It's functional workable. But looks ugly. The key purpose of this
> patch is to prevent duplicate domain attach for a single iommu group.
> Got another idea, see if it matches what we expect. Let me explain.
> 
> Existing group_list tracks iommu group attachment regardless of group
> type (mdev iommu group and iommu backed iommu group). For mdev
> iommu group, we actually have two sub-types. mdev iommu groups with
> iommu_device and groups w/o iommu_device. My idea here is to do a
> slight change against mdev iommu groups with iommu_device since it is
> the case we are handling. For such iommu groups, we can detect it and
> use its iommu_device group to do check in the domain->group_list. e.g.
> for such a group,
>      *) if found its iommu_device group has been attached, then return.
>      *) if failed to find a matched attach, we create a vfio_group and
>           finish the rest of the domain attach
> With this proposal, mdev iommu groups with iommu_device will not have
> vfio_group added in the domain->group_list. It will be tracked by its
> iommu_device group. For normal mdev iommu groups, it will still have its
> own vfio_group added in the domain->group_list.
> 
> To achieve it, we need to detect iommu_group type at the beginning of
> vfio_iommu_type1_attach_group (), which means to move the bus_type
> check to the beginning. I guess it may have simpler change. Thoughts?

Sounds better, but you'll have to give it a try to know for sure.  I'd
like to have a more cohesive approach, ideally reducing the complexity
even for singleton iommu-backed mdevs.
 
> > As I suspected when
> > we discussed this last, this adds complexity for something that's
> > currently being proposed as a sample driver.  
> 
> yeah, I was also hesitated to do it. However, if a user wants to wrap its
> PCI device as a mdev, it will more or less face the usage on non-singleton
> groups. If the new proposal is not that complex, I guess we can try to
> make it happen.

I suspect that iommu-backed mdevs that we actually want to support are
likely going to be in singleton iommu groups and the type1 code has
become overly complicated with mdev support already.  I'm therefore
certainly more open to approaches that try to unify group handling.

[snip]
> > > @@ -1469,6 +1576,39 @@ static int vfio_iommu_type1_attach_group(void  
> > *iommu_data,  
> > >  		bus = iommu_device->bus;
> > >  	}
> > >
> > > +	/*
> > > +	 * Check if iommu backed group attached to a domain within current
> > > +	 * container. If yes, increase the count; If no, go ahead with a
> > > +	 * new domain attach process.
> > > +	 */
> > > +	group_obj = NULL;  
> > 
> > How could it be otherwise?  
> 
> Oops, this comment should be better described. My point is if group_obj
> is not found, then this vfio_iommu_type1_attach_group() call should go
> with normal domain attach. And this means the codes behind below comment
> should be finished before return.

I think my review comment wasn't specifically a misunderstanding of the
code comment phrasing, but the fact that group_obj cannot be other than
NULL already to reach this point in the code.  IOW, I'm noting that
setting it to NULL here is redundant.  Thanks,

Alex

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH v2 13/13] vfio/type1: track iommu backed group attach
  2019-09-05  7:59 [PATCH v2 00/13] vfio_pci: wrap pci device as a mediated device Liu Yi L
@ 2019-09-05  7:59 ` Liu Yi L
  0 siblings, 0 replies; 5+ messages in thread
From: Liu Yi L @ 2019-09-05  7:59 UTC (permalink / raw)
  To: alex.williamson, kwankhede
  Cc: kevin.tian, baolu.lu, yi.l.liu, yi.y.sun, joro, linux-kernel,
	kvm, yan.y.zhao, shaopeng.he, chenbo.xia, jun.j.tian

With the introduction of iommu aware mdev group, user may wrap a PF/VF
as a mdev. Such mdevs will be called as wrapped PF/VF mdevs in following
statements. If it's applied on a non-singleton iommu group, there would
be multiple domain attach on an iommu_device group (equal to iommu backed
group). Reason is that mdev group attaches is finally an iommu_device
group attach in the end. And existing vfio_domain.gorup_list has no idea
about it. Thus multiple attach would happen.

What's more, under default domain policy, group attach is allowed only
when its in-use domain is equal to its default domain as the code below:

static int __iommu_attach_group(struct iommu_domain *domain, ..)
{
	..
	if (group->default_domain && group->domain != group->default_domain)
		return -EBUSY;
	...
}

So for the above scenario, only the first group attach on the
non-singleton iommu group will be successful. Subsequent group
attaches will be failed. However, this is a fairly valid usage case
if the wrapped PF/VF mdevs and other devices are assigned to a single
VM. We may want to prevent it. In other words, the subsequent group
attaches should return success before going to __iommu_attach_group().

However, if user tries to assign the wrapped PF/VF mdevs and other
devices to different VMs, the subsequent group attaches on a single
iommu_device group should be failed. This means the subsequent group
attach should finally calls into __iommu_attach_group() and be failed.

To meet the above requirements, this patch introduces vfio_group_object
structure to track the group attach of an iommu_device group (a.ka.
iommu backed group). Each vfio_domain will have a group_obj_list to
record the vfio_group_objects. The search of the group_obj_list should
use iommu_device group if a group is mdev group.

	struct vfio_group_object {
		atomic_t		count;
		struct iommu_group	*iommu_group;
		struct vfio_domain	*domain;
		struct list_head	next;
	};

Each time, a successful group attach should either have a new
vfio_group_object created or count increasing of an existing
vfio_group_object instance. Details can be found in
vfio_domain_attach_group_object().

For group detach, should have count decreasing. Please check
vfio_domain_detach_group_object().

As the vfio_domain.group_obj_list is within vfio container(vfio_iommu)
scope, if user wants to passthru a non-singleton to multiple VMs, it
will be failed as VMs will have separate vfio containers. Also, if
vIOMMU is exposed, it will also fail the attempts of assigning multiple
devices (via vfio-pci or PF/VF wrapped mdev) to a single VM. This is
aligned with current vfio passthru rules.

Cc: Kevin Tian <kevin.tian@intel.com>
Cc: Lu Baolu <baolu.lu@linux.intel.com>
Suggested-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Liu Yi L <yi.l.liu@intel.com>
---
 drivers/vfio/vfio_iommu_type1.c | 167 ++++++++++++++++++++++++++++++++++++----
 1 file changed, 154 insertions(+), 13 deletions(-)

diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 317430d..6a67bd6 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -75,6 +75,7 @@ struct vfio_domain {
 	struct iommu_domain	*domain;
 	struct list_head	next;
 	struct list_head	group_list;
+	struct list_head	group_obj_list;
 	int			prot;		/* IOMMU_CACHE */
 	bool			fgsp;		/* Fine-grained super pages */
 };
@@ -97,6 +98,13 @@ struct vfio_group {
 	bool			mdev_group;	/* An mdev group */
 };
 
+struct vfio_group_object {
+	atomic_t		count;
+	struct iommu_group	*iommu_group;
+	struct vfio_domain	*domain;
+	struct list_head	next;
+};
+
 /*
  * Guest RAM pinning working set or DMA target
  */
@@ -1263,6 +1271,85 @@ static struct vfio_group *find_iommu_group(struct vfio_domain *domain,
 	return NULL;
 }
 
+static struct vfio_group_object *find_iommu_group_object(
+		struct vfio_domain *domain, struct iommu_group *iommu_group)
+{
+	struct vfio_group_object *g;
+
+	list_for_each_entry(g, &domain->group_obj_list, next) {
+		if (g->iommu_group == iommu_group)
+			return g;
+	}
+
+	return NULL;
+}
+
+static void vfio_init_iommu_group_object(struct vfio_group_object *group_obj,
+		struct vfio_domain *domain, struct iommu_group *iommu_group)
+{
+	if (!group_obj || !domain || !iommu_group) {
+		WARN_ON(1);
+		return;
+	}
+	atomic_set(&group_obj->count, 1);
+	group_obj->iommu_group = iommu_group;
+	group_obj->domain = domain;
+	list_add(&group_obj->next, &domain->group_obj_list);
+}
+
+static int vfio_domain_attach_group_object(
+		struct vfio_domain *domain, struct iommu_group *iommu_group)
+{
+	struct vfio_group_object *group_obj;
+
+	group_obj = find_iommu_group_object(domain, iommu_group);
+	if (group_obj) {
+		atomic_inc(&group_obj->count);
+		return 0;
+	}
+	group_obj = kzalloc(sizeof(*group_obj), GFP_KERNEL);
+	vfio_init_iommu_group_object(group_obj, domain, iommu_group);
+	return iommu_attach_group(domain->domain, iommu_group);
+}
+
+static int vfio_domain_detach_group_object(
+		struct vfio_domain *domain, struct iommu_group *iommu_group)
+{
+	struct vfio_group_object *group_obj;
+
+	group_obj = find_iommu_group_object(domain, iommu_group);
+	if (!group_obj) {
+		WARN_ON(1);
+		return -EINVAL;
+	}
+	if (atomic_dec_if_positive(&group_obj->count) == 0) {
+		list_del(&group_obj->next);
+		kfree(group_obj);
+	}
+	iommu_detach_group(domain->domain, iommu_group);
+	return 0;
+}
+
+/*
+ * Check if an iommu backed group has been attached to a domain within
+ * a specific container (vfio_iommu). If yes, return the vfio_group_object
+ * which tracks the previous domain attach for this group. Caller of this
+ * function should hold vfio_iommu->lock.
+ */
+static struct vfio_group_object *vfio_iommu_group_object_check(
+		struct vfio_iommu *iommu, struct iommu_group *iommu_group)
+{
+	struct vfio_domain *d;
+	struct vfio_group_object *group_obj;
+
+	list_for_each_entry(d, &iommu->domain_list, next) {
+		group_obj = find_iommu_group_object(d, iommu_group);
+		if (group_obj)
+			return group_obj;
+	}
+	return NULL;
+}
+
 static bool vfio_iommu_has_sw_msi(struct iommu_group *group, phys_addr_t *base)
 {
 	struct list_head group_resv_regions;
@@ -1310,21 +1397,23 @@ static struct device *vfio_mdev_get_iommu_device(struct device *dev)
 
 static int vfio_mdev_attach_domain(struct device *dev, void *data)
 {
-	struct iommu_domain *domain = data;
+	struct vfio_domain *domain = data;
 	struct device *iommu_device;
 	struct iommu_group *group;
 
 	iommu_device = vfio_mdev_get_iommu_device(dev);
 	if (iommu_device) {
 		if (iommu_dev_feature_enabled(iommu_device, IOMMU_DEV_FEAT_AUX))
-			return iommu_aux_attach_device(domain, iommu_device);
+			return iommu_aux_attach_device(domain->domain,
+							iommu_device);
 		else {
 			group = iommu_group_get(iommu_device);
 			if (!group) {
 				WARN_ON(1);
 				return -EINVAL;
 			}
-			return iommu_attach_group(domain, group);
+			return vfio_domain_attach_group_object(
+							domain, group);
 		}
 	}
 
@@ -1333,21 +1422,22 @@ static int vfio_mdev_attach_domain(struct device *dev, void *data)
 
 static int vfio_mdev_detach_domain(struct device *dev, void *data)
 {
-	struct iommu_domain *domain = data;
+	struct vfio_domain *domain = data;
 	struct device *iommu_device;
 	struct iommu_group *group;
 
 	iommu_device = vfio_mdev_get_iommu_device(dev);
 	if (iommu_device) {
 		if (iommu_dev_feature_enabled(iommu_device, IOMMU_DEV_FEAT_AUX))
-			iommu_aux_detach_device(domain, iommu_device);
+			iommu_aux_detach_device(domain->domain, iommu_device);
 		else {
 			group = iommu_group_get(iommu_device);
 			if (!group) {
 				WARN_ON(1);
 				return -EINVAL;
 			}
-			iommu_detach_group(domain, group);
+			return vfio_domain_detach_group_object(
+							domain, group);
 		}
 	}
 
@@ -1359,20 +1449,27 @@ static int vfio_iommu_attach_group(struct vfio_domain *domain,
 {
 	if (group->mdev_group)
 		return iommu_group_for_each_dev(group->iommu_group,
-						domain->domain,
+						domain,
 						vfio_mdev_attach_domain);
 	else
-		return iommu_attach_group(domain->domain, group->iommu_group);
+		return vfio_domain_attach_group_object(domain,
+							group->iommu_group);
 }
 
 static void vfio_iommu_detach_group(struct vfio_domain *domain,
 				    struct vfio_group *group)
 {
+	int ret;
+
 	if (group->mdev_group)
-		iommu_group_for_each_dev(group->iommu_group, domain->domain,
+		iommu_group_for_each_dev(group->iommu_group, domain,
 					 vfio_mdev_detach_domain);
-	else
-		iommu_detach_group(domain->domain, group->iommu_group);
+	else {
+		ret = vfio_domain_detach_group_object(
+						domain, group->iommu_group);
+		if (ret)
+			pr_warn("%s, deatch failed!! ret: %d", __func__, ret);
+	}
 }
 
 static bool vfio_bus_is_mdev(struct bus_type *bus)
@@ -1412,6 +1509,10 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
 	int ret;
 	bool resv_msi, msi_remap;
 	phys_addr_t resv_msi_base;
+	struct vfio_group_object *group_obj = NULL;
+	struct device *iommu_device = NULL;
+	struct iommu_group *iommu_device_group;
+
 
 	mutex_lock(&iommu->lock);
 
@@ -1438,14 +1539,20 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
 
 	group->iommu_group = iommu_group;
 
+	group_obj = vfio_iommu_group_object_check(iommu, group->iommu_group);
+	if (group_obj) {
+		atomic_inc(&group_obj->count);
+		list_add(&group->next, &group_obj->domain->group_list);
+		mutex_unlock(&iommu->lock);
+		return 0;
+	}
+
 	/* Determine bus_type in order to allocate a domain */
 	ret = iommu_group_for_each_dev(iommu_group, &bus, vfio_bus_type);
 	if (ret)
 		goto out_free;
 
 	if (vfio_bus_is_mdev(bus)) {
-		struct device *iommu_device = NULL;
-
 		group->mdev_group = true;
 
 		/* Determine the isolation type */
@@ -1469,6 +1576,39 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
 		bus = iommu_device->bus;
 	}
 
+	/*
+	 * Check if iommu backed group attached to a domain within current
+	 * container. If yes, increase the count; If no, go ahead with a
+	 * new domain attach process.
+	 */
+	group_obj = NULL;
+	if (iommu_device) {
+		iommu_device_group = iommu_group_get(iommu_device);
+		if (!iommu_device_group) {
+			WARN_ON(1);
+			kfree(domain);
+			mutex_unlock(&iommu->lock);
+			return -EINVAL;
+		}
+		group_obj = vfio_iommu_group_object_check(iommu,
+							iommu_device_group);
+	} else
+		group_obj = vfio_iommu_group_object_check(iommu,
+							group->iommu_group);
+
+	if (group_obj) {
+		atomic_inc(&group_obj->count);
+		list_add(&group->next, &group_obj->domain->group_list);
+		kfree(domain);
+		mutex_unlock(&iommu->lock);
+		return 0;
+	}
+
+	/*
+	 * Now we are sure we want to initialize a new vfio_domain.
+	 * First step is to alloc an iommu_domain from iommu abstract
+	 * layer.
+	 */
 	domain->domain = iommu_domain_alloc(bus);
 	if (!domain->domain) {
 		ret = -EIO;
@@ -1484,6 +1624,7 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
 			goto out_domain;
 	}
 
+	INIT_LIST_HEAD(&domain->group_obj_list);
 	ret = vfio_iommu_attach_group(domain, group);
 	if (ret)
 		goto out_domain;
-- 
2.7.4


^ permalink raw reply related	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2019-09-30 15:49 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-09-05  8:08 [PATCH v2 13/13] vfio/type1: track iommu backed group attach Liu Yi L
2019-09-26  2:37 ` Alex Williamson
2019-09-30 12:41   ` Liu, Yi L
2019-09-30 15:49     ` Alex Williamson
  -- strict thread matches above, loose matches on Subject: below --
2019-09-05  7:59 [PATCH v2 00/13] vfio_pci: wrap pci device as a mediated device Liu Yi L
2019-09-05  7:59 ` [PATCH v2 13/13] vfio/type1: track iommu backed group attach Liu Yi L

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).