intel-gfx.lists.freedesktop.org archive mirror
 help / color / mirror / Atom feed
From: Jason Gunthorpe <jgg@nvidia.com>
To: David Airlie <airlied@linux.ie>,
	Tony Krowiak <akrowiak@linux.ibm.com>,
	Alex Williamson <alex.williamson@redhat.com>,
	Christian Borntraeger <borntraeger@de.ibm.com>,
	Cornelia Huck <cohuck@redhat.com>,
	Jonathan Corbet <corbet@lwn.net>, Daniel Vetter <daniel@ffwll.ch>,
	Diana Craciun <diana.craciun@oss.nxp.com>,
	dri-devel@lists.freedesktop.org,
	Eric Auger <eric.auger@redhat.com>,
	Eric Farman <farman@linux.ibm.com>,
	Harald Freudenberger <freude@linux.ibm.com>,
	Vasily Gorbik <gor@linux.ibm.com>,
	Heiko Carstens <hca@linux.ibm.com>,
	intel-gfx@lists.freedesktop.org,
	intel-gvt-dev@lists.freedesktop.org,
	Jani Nikula <jani.nikula@linux.intel.com>,
	Jason Herne <jjherne@linux.ibm.com>,
	Joonas Lahtinen <joonas.lahtinen@linux.intel.com>,
	kvm@vger.kernel.org, Kirti Wankhede <kwankhede@nvidia.com>,
	linux-doc@vger.kernel.org, linux-s390@vger.kernel.org,
	Matthew Rosato <mjrosato@linux.ibm.com>,
	Peter Oberparleiter <oberpar@linux.ibm.com>,
	Halil Pasic <pasic@linux.ibm.com>,
	Rodrigo Vivi <rodrigo.vivi@intel.com>,
	Vineeth Vijayan <vneethv@linux.ibm.com>,
	Zhi Wang <zhi.a.wang@intel.com>
Cc: Max Gurtovoy <mgurtovoy@nvidia.com>,
	Yishai Hadas <yishaih@nvidia.com>,
	"Raj, Ashok" <ashok.raj@intel.com>,
	Leon Romanovsky <leonro@nvidia.com>,
	Christoph Hellwig <hch@lst.de>
Subject: [Intel-gfx] [PATCH v3 10/14] vfio/pci: Reorganize VFIO_DEVICE_PCI_HOT_RESET to use the device set
Date: Wed, 28 Jul 2021 21:49:19 -0300	[thread overview]
Message-ID: <10-v3-6c9e19cc7d44+15613-vfio_reflck_jgg@nvidia.com> (raw)
In-Reply-To: <0-v3-6c9e19cc7d44+15613-vfio_reflck_jgg@nvidia.com>

Like vfio_pci_try_bus_reset() this code wants to reset all of the devices
in the "reset group" which is the same membership as the device set.

Instead of trying to reconstruct the device set from the PCI list go
directly from the device set's device list to execute the reset.

The same basic structure as vfio_pci_try_bus_reset() is used. The
'vfio_devices' struct is replaced with the device set linked list and we
simply sweep it multiple times under the lock.

This eliminates a memory allocation and get/put traffic and another
improperly locked test of pci_dev_driver().

Reviewed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/vfio/pci/vfio_pci.c | 215 +++++++++++++++---------------------
 1 file changed, 91 insertions(+), 124 deletions(-)

diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index a1ae9a83a38621..721dcc99aaa042 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -223,9 +223,11 @@ static void vfio_pci_probe_mmaps(struct vfio_pci_device *vdev)
 	}
 }
 
+struct vfio_pci_group_info;
 static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev);
 static void vfio_pci_disable(struct vfio_pci_device *vdev);
-static int vfio_pci_try_zap_and_vma_lock_cb(struct pci_dev *pdev, void *data);
+static int vfio_hot_reset_device_set(struct vfio_pci_device *vdev,
+				     struct vfio_pci_group_info *groups);
 
 /*
  * INTx masking requires the ability to disable INTx signaling via PCI_COMMAND
@@ -645,37 +647,11 @@ static int vfio_pci_fill_devs(struct pci_dev *pdev, void *data)
 	return 0;
 }
 
-struct vfio_pci_group_entry {
-	struct vfio_group *group;
-	int id;
-};
-
 struct vfio_pci_group_info {
 	int count;
-	struct vfio_pci_group_entry *groups;
+	struct vfio_group **groups;
 };
 
-static int vfio_pci_validate_devs(struct pci_dev *pdev, void *data)
-{
-	struct vfio_pci_group_info *info = data;
-	struct iommu_group *group;
-	int id, i;
-
-	group = iommu_group_get(&pdev->dev);
-	if (!group)
-		return -EPERM;
-
-	id = iommu_group_id(group);
-
-	for (i = 0; i < info->count; i++)
-		if (info->groups[i].id == id)
-			break;
-
-	iommu_group_put(group);
-
-	return (i == info->count) ? -EINVAL : 0;
-}
-
 static bool vfio_pci_dev_below_slot(struct pci_dev *pdev, struct pci_slot *slot)
 {
 	for (; pdev; pdev = pdev->bus->self)
@@ -753,12 +729,6 @@ int vfio_pci_register_dev_region(struct vfio_pci_device *vdev,
 	return 0;
 }
 
-struct vfio_devices {
-	struct vfio_pci_device **devices;
-	int cur_index;
-	int max_index;
-};
-
 static long vfio_pci_ioctl(struct vfio_device *core_vdev,
 			   unsigned int cmd, unsigned long arg)
 {
@@ -1127,11 +1097,10 @@ static long vfio_pci_ioctl(struct vfio_device *core_vdev,
 	} else if (cmd == VFIO_DEVICE_PCI_HOT_RESET) {
 		struct vfio_pci_hot_reset hdr;
 		int32_t *group_fds;
-		struct vfio_pci_group_entry *groups;
+		struct vfio_group **groups;
 		struct vfio_pci_group_info info;
-		struct vfio_devices devs = { .cur_index = 0 };
 		bool slot = false;
-		int i, group_idx, mem_idx = 0, count = 0, ret = 0;
+		int group_idx, count = 0, ret = 0;
 
 		minsz = offsetofend(struct vfio_pci_hot_reset, count);
 
@@ -1198,9 +1167,7 @@ static long vfio_pci_ioctl(struct vfio_device *core_vdev,
 				break;
 			}
 
-			groups[group_idx].group = group;
-			groups[group_idx].id =
-					vfio_external_user_iommu_id(group);
+			groups[group_idx] = group;
 		}
 
 		kfree(group_fds);
@@ -1212,64 +1179,11 @@ static long vfio_pci_ioctl(struct vfio_device *core_vdev,
 		info.count = hdr.count;
 		info.groups = groups;
 
-		/*
-		 * Test whether all the affected devices are contained
-		 * by the set of groups provided by the user.
-		 */
-		ret = vfio_pci_for_each_slot_or_bus(vdev->pdev,
-						    vfio_pci_validate_devs,
-						    &info, slot);
-		if (ret)
-			goto hot_reset_release;
-
-		devs.max_index = count;
-		devs.devices = kcalloc(count, sizeof(struct vfio_device *),
-				       GFP_KERNEL);
-		if (!devs.devices) {
-			ret = -ENOMEM;
-			goto hot_reset_release;
-		}
-
-		/*
-		 * We need to get memory_lock for each device, but devices
-		 * can share mmap_lock, therefore we need to zap and hold
-		 * the vma_lock for each device, and only then get each
-		 * memory_lock.
-		 */
-		ret = vfio_pci_for_each_slot_or_bus(vdev->pdev,
-					    vfio_pci_try_zap_and_vma_lock_cb,
-					    &devs, slot);
-		if (ret)
-			goto hot_reset_release;
-
-		for (; mem_idx < devs.cur_index; mem_idx++) {
-			struct vfio_pci_device *tmp = devs.devices[mem_idx];
-
-			ret = down_write_trylock(&tmp->memory_lock);
-			if (!ret) {
-				ret = -EBUSY;
-				goto hot_reset_release;
-			}
-			mutex_unlock(&tmp->vma_lock);
-		}
-
-		/* User has access, do the reset */
-		ret = pci_reset_bus(vdev->pdev);
+		ret = vfio_hot_reset_device_set(vdev, &info);
 
 hot_reset_release:
-		for (i = 0; i < devs.cur_index; i++) {
-			struct vfio_pci_device *tmp = devs.devices[i];
-
-			if (i < mem_idx)
-				up_write(&tmp->memory_lock);
-			else
-				mutex_unlock(&tmp->vma_lock);
-			vfio_device_put(&tmp->vdev);
-		}
-		kfree(devs.devices);
-
 		for (group_idx--; group_idx >= 0; group_idx--)
-			vfio_group_put_external_user(groups[group_idx].group);
+			vfio_group_put_external_user(groups[group_idx]);
 
 		kfree(groups);
 		return ret;
@@ -2148,37 +2062,15 @@ static struct pci_driver vfio_pci_driver = {
 	.err_handler		= &vfio_err_handlers,
 };
 
-static int vfio_pci_try_zap_and_vma_lock_cb(struct pci_dev *pdev, void *data)
+static bool vfio_dev_in_groups(struct vfio_pci_device *vdev,
+			       struct vfio_pci_group_info *groups)
 {
-	struct vfio_devices *devs = data;
-	struct vfio_device *device;
-	struct vfio_pci_device *vdev;
-
-	if (devs->cur_index == devs->max_index)
-		return -ENOSPC;
-
-	device = vfio_device_get_from_dev(&pdev->dev);
-	if (!device)
-		return -EINVAL;
-
-	if (pci_dev_driver(pdev) != &vfio_pci_driver) {
-		vfio_device_put(device);
-		return -EBUSY;
-	}
-
-	vdev = container_of(device, struct vfio_pci_device, vdev);
+	unsigned int i;
 
-	/*
-	 * Locking multiple devices is prone to deadlock, runaway and
-	 * unwind if we hit contention.
-	 */
-	if (!vfio_pci_zap_and_vma_lock(vdev, true)) {
-		vfio_device_put(device);
-		return -EBUSY;
-	}
-
-	devs->devices[devs->cur_index++] = vdev;
-	return 0;
+	for (i = 0; i < groups->count; i++)
+		if (groups->groups[i] == vdev->vdev.group)
+			return true;
+	return false;
 }
 
 static int vfio_pci_is_device_in_set(struct pci_dev *pdev, void *data)
@@ -2194,6 +2086,81 @@ static int vfio_pci_is_device_in_set(struct pci_dev *pdev, void *data)
 	return -EBUSY;
 }
 
+/*
+ * We need to get memory_lock for each device, but devices can share mmap_lock,
+ * therefore we need to zap and hold the vma_lock for each device, and only then
+ * get each memory_lock.
+ */
+static int vfio_hot_reset_device_set(struct vfio_pci_device *vdev,
+				     struct vfio_pci_group_info *groups)
+{
+	struct vfio_device_set *dev_set = vdev->vdev.dev_set;
+	struct vfio_pci_device *cur_mem;
+	struct vfio_pci_device *cur_vma;
+	struct vfio_pci_device *cur;
+	bool is_mem = true;
+	int ret;
+
+	mutex_lock(&dev_set->lock);
+	cur_mem = list_first_entry(&dev_set->device_list,
+				   struct vfio_pci_device, vdev.dev_set_list);
+
+	/* All devices in the group to be reset need VFIO devices */
+	if (vfio_pci_for_each_slot_or_bus(
+		    vdev->pdev, vfio_pci_is_device_in_set, dev_set,
+		    !pci_probe_reset_slot(vdev->pdev->slot))) {
+		ret = -EINVAL;
+		goto err_unlock;
+	}
+
+	list_for_each_entry(cur_vma, &dev_set->device_list, vdev.dev_set_list) {
+		/*
+		 * Test whether all the affected devices are contained by the
+		 * set of groups provided by the user.
+		 */
+		if (!vfio_dev_in_groups(cur_vma, groups)) {
+			ret = -EINVAL;
+			goto err_undo;
+		}
+
+		/*
+		 * Locking multiple devices is prone to deadlock, runaway and
+		 * unwind if we hit contention.
+		 */
+		if (!vfio_pci_zap_and_vma_lock(cur_vma, true)) {
+			ret = -EBUSY;
+			goto err_undo;
+		}
+	}
+	cur_vma = NULL;
+
+	list_for_each_entry(cur_mem, &dev_set->device_list, vdev.dev_set_list) {
+		if (!down_write_trylock(&cur_mem->memory_lock)) {
+			ret = -EBUSY;
+			goto err_undo;
+		}
+		mutex_unlock(&cur_mem->vma_lock);
+	}
+	cur_mem = NULL;
+
+	ret = pci_reset_bus(vdev->pdev);
+
+err_undo:
+	list_for_each_entry(cur, &dev_set->device_list, vdev.dev_set_list) {
+		if (cur == cur_mem)
+			is_mem = false;
+		if (cur == cur_vma)
+			break;
+		if (is_mem)
+			up_write(&cur->memory_lock);
+		else
+			mutex_unlock(&cur->vma_lock);
+	}
+err_unlock:
+	mutex_unlock(&dev_set->lock);
+	return ret;
+}
+
 /*
  * vfio-core considers a group to be viable and will create a vfio_device even
  * if some devices are bound to drivers like pci-stub or pcieport.  Here we
-- 
2.32.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

  parent reply	other threads:[~2021-07-29  0:50 UTC|newest]

Thread overview: 29+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-07-29  0:49 [Intel-gfx] [PATCH v3 00/14] Provide core infrastructure for managing open/release Jason Gunthorpe
2021-07-29  0:49 ` [Intel-gfx] [PATCH v3 01/14] vfio/samples: Remove module get/put Jason Gunthorpe
2021-07-29  0:49 ` [Intel-gfx] [PATCH v3 02/14] vfio/mbochs: Fix missing error unwind of mbochs_used_mbytes Jason Gunthorpe
2021-07-29  7:21   ` Christoph Hellwig
2021-07-29  0:49 ` [Intel-gfx] [PATCH v3 03/14] vfio: Introduce a vfio_uninit_group_dev() API call Jason Gunthorpe
2021-07-29  0:49 ` [Intel-gfx] [PATCH v3 04/14] vfio: Provide better generic support for open/release vfio_device_ops Jason Gunthorpe
2021-07-29  7:26   ` Christoph Hellwig
2021-07-29  0:49 ` [Intel-gfx] [PATCH v3 05/14] vfio/samples: Delete useless open/close Jason Gunthorpe
2021-07-29  0:49 ` [Intel-gfx] [PATCH v3 06/14] vfio/fsl: Move to the device set infrastructure Jason Gunthorpe
2021-07-29  0:49 ` [Intel-gfx] [PATCH v3 07/14] vfio/platform: Use open_device() instead of open coding a refcnt scheme Jason Gunthorpe
2021-08-05 12:37   ` Eric Auger
2021-07-29  0:49 ` [Intel-gfx] [PATCH v3 08/14] vfio/pci: Move to the device set infrastructure Jason Gunthorpe
2021-07-29  0:49 ` [Intel-gfx] [PATCH v3 09/14] vfio/pci: Change vfio_pci_try_bus_reset() to use the dev_set Jason Gunthorpe
2021-07-29  7:30   ` Christoph Hellwig
2021-08-03 16:34   ` Alex Williamson
2021-08-03 16:41     ` Jason Gunthorpe
2021-08-03 16:52       ` Alex Williamson
2021-08-05 11:47         ` Jason Gunthorpe
2021-08-05 17:33           ` Alex Williamson
2021-08-05 23:05             ` Jason Gunthorpe
2021-07-29  0:49 ` Jason Gunthorpe [this message]
2021-07-29  0:49 ` [Intel-gfx] [PATCH v3 11/14] vfio/mbochs: Fix close when multiple device FDs are open Jason Gunthorpe
2021-07-29  0:49 ` [Intel-gfx] [PATCH v3 12/14] vfio/ap, ccw: Fix open/close " Jason Gunthorpe
2021-07-29  0:49 ` [Intel-gfx] [PATCH v3 13/14] vfio/gvt: " Jason Gunthorpe
2021-07-29  0:49 ` [Intel-gfx] [PATCH v3 14/14] vfio: Remove struct vfio_device_ops open/release Jason Gunthorpe
2021-07-29  2:13 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for Provide core infrastructure for managing open/release (rev7) Patchwork
2021-07-29  2:37 ` [Intel-gfx] ✓ Fi.CI.BAT: success " Patchwork
2021-07-29  8:16 ` [Intel-gfx] ✓ Fi.CI.IGT: " Patchwork
2021-08-05 14:26 ` [Intel-gfx] ✗ Fi.CI.BUILD: failure for Provide core infrastructure for managing open/release (rev8) Patchwork

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=10-v3-6c9e19cc7d44+15613-vfio_reflck_jgg@nvidia.com \
    --to=jgg@nvidia.com \
    --cc=airlied@linux.ie \
    --cc=akrowiak@linux.ibm.com \
    --cc=alex.williamson@redhat.com \
    --cc=ashok.raj@intel.com \
    --cc=borntraeger@de.ibm.com \
    --cc=cohuck@redhat.com \
    --cc=corbet@lwn.net \
    --cc=daniel@ffwll.ch \
    --cc=diana.craciun@oss.nxp.com \
    --cc=dri-devel@lists.freedesktop.org \
    --cc=eric.auger@redhat.com \
    --cc=farman@linux.ibm.com \
    --cc=freude@linux.ibm.com \
    --cc=gor@linux.ibm.com \
    --cc=hca@linux.ibm.com \
    --cc=hch@lst.de \
    --cc=intel-gfx@lists.freedesktop.org \
    --cc=intel-gvt-dev@lists.freedesktop.org \
    --cc=jani.nikula@linux.intel.com \
    --cc=jjherne@linux.ibm.com \
    --cc=joonas.lahtinen@linux.intel.com \
    --cc=kvm@vger.kernel.org \
    --cc=kwankhede@nvidia.com \
    --cc=leonro@nvidia.com \
    --cc=linux-doc@vger.kernel.org \
    --cc=linux-s390@vger.kernel.org \
    --cc=mgurtovoy@nvidia.com \
    --cc=mjrosato@linux.ibm.com \
    --cc=oberpar@linux.ibm.com \
    --cc=pasic@linux.ibm.com \
    --cc=rodrigo.vivi@intel.com \
    --cc=vneethv@linux.ibm.com \
    --cc=yishaih@nvidia.com \
    --cc=zhi.a.wang@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).