All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jason Gunthorpe <jgg@nvidia.com>
To: David Airlie <airlied@linux.ie>,
	Tony Krowiak <akrowiak@linux.ibm.com>,
	Alex Williamson <alex.williamson@redhat.com>,
	Christian Borntraeger <borntraeger@de.ibm.com>,
	Cornelia Huck <cohuck@redhat.com>,
	Jonathan Corbet <corbet@lwn.net>, Daniel Vetter <daniel@ffwll.ch>,
	Diana Craciun <diana.craciun@oss.nxp.com>,
	dri-devel@lists.freedesktop.org,
	Eric Auger <eric.auger@redhat.com>,
	Eric Farman <farman@linux.ibm.com>,
	Harald Freudenberger <freude@linux.ibm.com>,
	Vasily Gorbik <gor@linux.ibm.com>,
	Heiko Carstens <hca@linux.ibm.com>,
	intel-gfx@lists.freedesktop.org,
	intel-gvt-dev@lists.freedesktop.org,
	Jani Nikula <jani.nikula@linux.intel.com>,
	Jason Herne <jjherne@linux.ibm.com>,
	Joonas Lahtinen <joonas.lahtinen@linux.intel.com>,
	kvm@vger.kernel.org, Kirti Wankhede <kwankhede@nvidia.com>,
	linux-doc@vger.kernel.org, linux-s390@vger.kernel.org,
	Matthew Rosato <mjrosato@linux.ibm.com>,
	Peter Oberparleiter <oberpar@linux.ibm.com>,
	Halil Pasic <pasic@linux.ibm.com>,
	Rodrigo Vivi <rodrigo.vivi@intel.com>,
	Vineeth Vijayan <vneethv@linux.ibm.com>,
	Zhi Wang <zhi.a.wang@intel.com>
Cc: "Raj, Ashok" <ashok.raj@intel.com>,
	Christoph Hellwig <hch@lst.de>,
	Leon Romanovsky <leonro@nvidia.com>,
	Max Gurtovoy <mgurtovoy@nvidia.com>,
	Yishai Hadas <yishaih@nvidia.com>,
	Zhenyu Wang <zhenyuw@linux.intel.com>
Subject: [PATCH v3 09/14] vfio/pci: Change vfio_pci_try_bus_reset() to use the dev_set
Date: Wed, 28 Jul 2021 21:49:18 -0300	[thread overview]
Message-ID: <9-v3-6c9e19cc7d44+15613-vfio_reflck_jgg@nvidia.com> (raw)
In-Reply-To: <0-v3-6c9e19cc7d44+15613-vfio_reflck_jgg@nvidia.com>

Keep track of all the vfio_devices that have been added to the device set
and use this list in vfio_pci_try_bus_reset() instead of trying to work
backwards from the pci_device.

The dev_set->lock directly prevents devices from joining/leaving the set,
which further implies the pci_device cannot change drivers or that the
vfio_device be freed, eliminating the need for get/put's.

Completeness of the device set can be directly measured by checking if
every PCI device in the reset group is also in the device set - which
proves that VFIO drivers are attached to everything.

This restructuring corrects a call to pci_dev_driver() without holding the
device_lock() and removes a hard wiring to &vfio_pci_driver.

Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/vfio/pci/vfio_pci.c | 148 +++++++++++++++---------------------
 1 file changed, 62 insertions(+), 86 deletions(-)

diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index 5d6db93d6c680f..a1ae9a83a38621 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -404,6 +404,9 @@ static void vfio_pci_disable(struct vfio_pci_device *vdev)
 	struct vfio_pci_ioeventfd *ioeventfd, *ioeventfd_tmp;
 	int i, bar;
 
+	/* For needs_reset */
+	lockdep_assert_held(&vdev->vdev.dev_set->lock);
+
 	/* Stop the device from further DMA */
 	pci_clear_master(pdev);
 
@@ -2145,7 +2148,7 @@ static struct pci_driver vfio_pci_driver = {
 	.err_handler		= &vfio_err_handlers,
 };
 
-static int vfio_pci_get_unused_devs(struct pci_dev *pdev, void *data)
+static int vfio_pci_try_zap_and_vma_lock_cb(struct pci_dev *pdev, void *data)
 {
 	struct vfio_devices *devs = data;
 	struct vfio_device *device;
@@ -2165,8 +2168,11 @@ static int vfio_pci_get_unused_devs(struct pci_dev *pdev, void *data)
 
 	vdev = container_of(device, struct vfio_pci_device, vdev);
 
-	/* Fault if the device is not unused */
-	if (device->open_count) {
+	/*
+	 * Locking multiple devices is prone to deadlock, runaway and
+	 * unwind if we hit contention.
+	 */
+	if (!vfio_pci_zap_and_vma_lock(vdev, true)) {
 		vfio_device_put(device);
 		return -EBUSY;
 	}
@@ -2175,112 +2181,82 @@ static int vfio_pci_get_unused_devs(struct pci_dev *pdev, void *data)
 	return 0;
 }
 
-static int vfio_pci_try_zap_and_vma_lock_cb(struct pci_dev *pdev, void *data)
+static int vfio_pci_is_device_in_set(struct pci_dev *pdev, void *data)
 {
-	struct vfio_devices *devs = data;
-	struct vfio_device *device;
-	struct vfio_pci_device *vdev;
+	struct vfio_device_set *dev_set = data;
+	struct vfio_device *cur;
 
-	if (devs->cur_index == devs->max_index)
-		return -ENOSPC;
+	lockdep_assert_held(&dev_set->lock);
 
-	device = vfio_device_get_from_dev(&pdev->dev);
-	if (!device)
-		return -EINVAL;
-
-	if (pci_dev_driver(pdev) != &vfio_pci_driver) {
-		vfio_device_put(device);
-		return -EBUSY;
-	}
-
-	vdev = container_of(device, struct vfio_pci_device, vdev);
+	list_for_each_entry(cur, &dev_set->device_list, dev_set_list)
+		if (cur->dev == &pdev->dev)
+			return 0;
+	return -EBUSY;
+}
 
-	/*
-	 * Locking multiple devices is prone to deadlock, runaway and
-	 * unwind if we hit contention.
-	 */
-	if (!vfio_pci_zap_and_vma_lock(vdev, true)) {
-		vfio_device_put(device);
-		return -EBUSY;
+/*
+ * vfio-core considers a group to be viable and will create a vfio_device even
+ * if some devices are bound to drivers like pci-stub or pcieport.  Here we
+ * require all PCI devices to be inside our dev_set since that ensures they stay
+ * put and that every driver controlling the device can co-ordinate with the
+ * device reset.
+ */
+static struct pci_dev *vfio_pci_find_reset_target(struct vfio_pci_device *vdev)
+{
+	struct vfio_device_set *dev_set = vdev->vdev.dev_set;
+	struct vfio_pci_device *cur;
+	bool needs_reset = false;
+
+	/* No VFIO device has an open device FD */
+	list_for_each_entry(cur, &dev_set->device_list, vdev.dev_set_list) {
+		if (cur->vdev.open_count)
+			return NULL;
+		needs_reset |= cur->needs_reset;
 	}
+	if (!needs_reset)
+		return NULL;
 
-	devs->devices[devs->cur_index++] = vdev;
-	return 0;
+	/* All PCI devices in the group to be reset need to be in our dev_set */
+	if (vfio_pci_for_each_slot_or_bus(
+		    vdev->pdev, vfio_pci_is_device_in_set, dev_set,
+		    !pci_probe_reset_slot(vdev->pdev->slot)))
+		return NULL;
+	return cur->pdev;
 }
 
 /*
  * If a bus or slot reset is available for the provided device and:
  *  - All of the devices affected by that bus or slot reset are unused
- *    (!refcnt)
  *  - At least one of the affected devices is marked dirty via
  *    needs_reset (such as by lack of FLR support)
- * Then attempt to perform that bus or slot reset.  Callers are required
- * to hold vdev->dev_set->lock, protecting the bus/slot reset group from
- * concurrent opens.  A vfio_device reference is acquired for each device
- * to prevent unbinds during the reset operation.
- *
- * NB: vfio-core considers a group to be viable even if some devices are
- * bound to drivers like pci-stub or pcieport.  Here we require all devices
- * to be bound to vfio_pci since that's the only way we can be sure they
- * stay put.
+ * Then attempt to perform that bus or slot reset.
  */
 static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev)
 {
-	struct vfio_devices devs = { .cur_index = 0 };
-	int i = 0, ret = -EINVAL;
-	bool slot = false;
-	struct vfio_pci_device *tmp;
-
-	if (!pci_probe_reset_slot(vdev->pdev->slot))
-		slot = true;
-	else if (pci_probe_reset_bus(vdev->pdev->bus))
-		return;
+	struct vfio_device_set *dev_set = vdev->vdev.dev_set;
+	struct pci_dev *to_reset;
+	struct vfio_pci_device *cur;
+	int ret;
 
-	if (vfio_pci_for_each_slot_or_bus(vdev->pdev, vfio_pci_count_devs,
-					  &i, slot) || !i)
-		return;
+	lockdep_assert_held(&vdev->vdev.dev_set->lock);
 
-	devs.max_index = i;
-	devs.devices = kcalloc(i, sizeof(struct vfio_device *), GFP_KERNEL);
-	if (!devs.devices)
+	if (pci_probe_reset_slot(vdev->pdev->slot) &&
+	    pci_probe_reset_bus(vdev->pdev->bus))
 		return;
 
-	if (vfio_pci_for_each_slot_or_bus(vdev->pdev,
-					  vfio_pci_get_unused_devs,
-					  &devs, slot))
-		goto put_devs;
-
-	/* Does at least one need a reset? */
-	for (i = 0; i < devs.cur_index; i++) {
-		tmp = devs.devices[i];
-		if (tmp->needs_reset) {
-			ret = pci_reset_bus(vdev->pdev);
-			break;
-		}
-	}
-
-put_devs:
-	for (i = 0; i < devs.cur_index; i++) {
-		tmp = devs.devices[i];
-
-		/*
-		 * If reset was successful, affected devices no longer need
-		 * a reset and we should return all the collateral devices
-		 * to low power.  If not successful, we either didn't reset
-		 * the bus or timed out waiting for it, so let's not touch
-		 * the power state.
-		 */
-		if (!ret) {
-			tmp->needs_reset = false;
+	to_reset = vfio_pci_find_reset_target(vdev);
+	if (!to_reset)
+		return;
 
-			if (tmp != vdev && !disable_idle_d3)
-				vfio_pci_set_power_state(tmp, PCI_D3hot);
-		}
+	ret = pci_reset_bus(to_reset);
+	if (ret)
+		return;
 
-		vfio_device_put(&tmp->vdev);
+	list_for_each_entry(cur, &dev_set->device_list, vdev.dev_set_list) {
+		cur->needs_reset = false;
+		if (cur->pdev != to_reset && !disable_idle_d3)
+			vfio_pci_set_power_state(cur, PCI_D3hot);
 	}
-
-	kfree(devs.devices);
 }
 
 static void __exit vfio_pci_cleanup(void)
-- 
2.32.0


WARNING: multiple messages have this Message-ID (diff)
From: Jason Gunthorpe <jgg@nvidia.com>
To: David Airlie <airlied@linux.ie>,
	Tony Krowiak <akrowiak@linux.ibm.com>,
	Alex Williamson <alex.williamson@redhat.com>,
	Christian Borntraeger <borntraeger@de.ibm.com>,
	Cornelia Huck <cohuck@redhat.com>,
	Jonathan Corbet <corbet@lwn.net>, Daniel Vetter <daniel@ffwll.ch>,
	Diana Craciun <diana.craciun@oss.nxp.com>,
	dri-devel@lists.freedesktop.org,
	Eric Auger <eric.auger@redhat.com>,
	Eric Farman <farman@linux.ibm.com>,
	Harald Freudenberger <freude@linux.ibm.com>,
	Vasily Gorbik <gor@linux.ibm.com>,
	Heiko Carstens <hca@linux.ibm.com>,
	intel-gfx@lists.freedesktop.org,
	intel-gvt-dev@lists.freedesktop.org,
	Jani Nikula <jani.nikula@linux.intel.com>,
	Jason Herne <jjherne@linux.ibm.com>,
	Joonas Lahtinen <joonas.lahtinen@linux.intel.com>,
	kvm@vger.kernel.org, Kirti Wankhede <kwankhede@nvidia.com>,
	linux-doc@vger.kernel.org, linux-s390@vger.kernel.org,
	Matthew Rosato <mjrosato@linux.ibm.com>,
	Peter Oberparleiter <oberpar@linux.ibm.com>,
	Halil Pasic <pasic@linux.ibm.com>,
	Rodrigo Vivi <rodrigo.vivi@intel.com>,
	Vineeth Vijayan <vneethv@linux.ibm.com>,
	Zhi Wang <zhi.a.wang@intel.com>
Cc: Max Gurtovoy <mgurtovoy@nvidia.com>,
	Yishai Hadas <yishaih@nvidia.com>,
	"Raj, Ashok" <ashok.raj@intel.com>,
	Leon Romanovsky <leonro@nvidia.com>,
	Christoph Hellwig <hch@lst.de>
Subject: [PATCH v3 09/14] vfio/pci: Change vfio_pci_try_bus_reset() to use the dev_set
Date: Wed, 28 Jul 2021 21:49:18 -0300	[thread overview]
Message-ID: <9-v3-6c9e19cc7d44+15613-vfio_reflck_jgg@nvidia.com> (raw)
In-Reply-To: <0-v3-6c9e19cc7d44+15613-vfio_reflck_jgg@nvidia.com>

Keep track of all the vfio_devices that have been added to the device set
and use this list in vfio_pci_try_bus_reset() instead of trying to work
backwards from the pci_device.

The dev_set->lock directly prevents devices from joining/leaving the set,
which further implies the pci_device cannot change drivers or that the
vfio_device be freed, eliminating the need for get/put's.

Completeness of the device set can be directly measured by checking if
every PCI device in the reset group is also in the device set - which
proves that VFIO drivers are attached to everything.

This restructuring corrects a call to pci_dev_driver() without holding the
device_lock() and removes a hard wiring to &vfio_pci_driver.

Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/vfio/pci/vfio_pci.c | 148 +++++++++++++++---------------------
 1 file changed, 62 insertions(+), 86 deletions(-)

diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index 5d6db93d6c680f..a1ae9a83a38621 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -404,6 +404,9 @@ static void vfio_pci_disable(struct vfio_pci_device *vdev)
 	struct vfio_pci_ioeventfd *ioeventfd, *ioeventfd_tmp;
 	int i, bar;
 
+	/* For needs_reset */
+	lockdep_assert_held(&vdev->vdev.dev_set->lock);
+
 	/* Stop the device from further DMA */
 	pci_clear_master(pdev);
 
@@ -2145,7 +2148,7 @@ static struct pci_driver vfio_pci_driver = {
 	.err_handler		= &vfio_err_handlers,
 };
 
-static int vfio_pci_get_unused_devs(struct pci_dev *pdev, void *data)
+static int vfio_pci_try_zap_and_vma_lock_cb(struct pci_dev *pdev, void *data)
 {
 	struct vfio_devices *devs = data;
 	struct vfio_device *device;
@@ -2165,8 +2168,11 @@ static int vfio_pci_get_unused_devs(struct pci_dev *pdev, void *data)
 
 	vdev = container_of(device, struct vfio_pci_device, vdev);
 
-	/* Fault if the device is not unused */
-	if (device->open_count) {
+	/*
+	 * Locking multiple devices is prone to deadlock, runaway and
+	 * unwind if we hit contention.
+	 */
+	if (!vfio_pci_zap_and_vma_lock(vdev, true)) {
 		vfio_device_put(device);
 		return -EBUSY;
 	}
@@ -2175,112 +2181,82 @@ static int vfio_pci_get_unused_devs(struct pci_dev *pdev, void *data)
 	return 0;
 }
 
-static int vfio_pci_try_zap_and_vma_lock_cb(struct pci_dev *pdev, void *data)
+static int vfio_pci_is_device_in_set(struct pci_dev *pdev, void *data)
 {
-	struct vfio_devices *devs = data;
-	struct vfio_device *device;
-	struct vfio_pci_device *vdev;
+	struct vfio_device_set *dev_set = data;
+	struct vfio_device *cur;
 
-	if (devs->cur_index == devs->max_index)
-		return -ENOSPC;
+	lockdep_assert_held(&dev_set->lock);
 
-	device = vfio_device_get_from_dev(&pdev->dev);
-	if (!device)
-		return -EINVAL;
-
-	if (pci_dev_driver(pdev) != &vfio_pci_driver) {
-		vfio_device_put(device);
-		return -EBUSY;
-	}
-
-	vdev = container_of(device, struct vfio_pci_device, vdev);
+	list_for_each_entry(cur, &dev_set->device_list, dev_set_list)
+		if (cur->dev == &pdev->dev)
+			return 0;
+	return -EBUSY;
+}
 
-	/*
-	 * Locking multiple devices is prone to deadlock, runaway and
-	 * unwind if we hit contention.
-	 */
-	if (!vfio_pci_zap_and_vma_lock(vdev, true)) {
-		vfio_device_put(device);
-		return -EBUSY;
+/*
+ * vfio-core considers a group to be viable and will create a vfio_device even
+ * if some devices are bound to drivers like pci-stub or pcieport.  Here we
+ * require all PCI devices to be inside our dev_set since that ensures they stay
+ * put and that every driver controlling the device can co-ordinate with the
+ * device reset.
+ */
+static struct pci_dev *vfio_pci_find_reset_target(struct vfio_pci_device *vdev)
+{
+	struct vfio_device_set *dev_set = vdev->vdev.dev_set;
+	struct vfio_pci_device *cur;
+	bool needs_reset = false;
+
+	/* No VFIO device has an open device FD */
+	list_for_each_entry(cur, &dev_set->device_list, vdev.dev_set_list) {
+		if (cur->vdev.open_count)
+			return NULL;
+		needs_reset |= cur->needs_reset;
 	}
+	if (!needs_reset)
+		return NULL;
 
-	devs->devices[devs->cur_index++] = vdev;
-	return 0;
+	/* All PCI devices in the group to be reset need to be in our dev_set */
+	if (vfio_pci_for_each_slot_or_bus(
+		    vdev->pdev, vfio_pci_is_device_in_set, dev_set,
+		    !pci_probe_reset_slot(vdev->pdev->slot)))
+		return NULL;
+	return cur->pdev;
 }
 
 /*
  * If a bus or slot reset is available for the provided device and:
  *  - All of the devices affected by that bus or slot reset are unused
- *    (!refcnt)
  *  - At least one of the affected devices is marked dirty via
  *    needs_reset (such as by lack of FLR support)
- * Then attempt to perform that bus or slot reset.  Callers are required
- * to hold vdev->dev_set->lock, protecting the bus/slot reset group from
- * concurrent opens.  A vfio_device reference is acquired for each device
- * to prevent unbinds during the reset operation.
- *
- * NB: vfio-core considers a group to be viable even if some devices are
- * bound to drivers like pci-stub or pcieport.  Here we require all devices
- * to be bound to vfio_pci since that's the only way we can be sure they
- * stay put.
+ * Then attempt to perform that bus or slot reset.
  */
 static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev)
 {
-	struct vfio_devices devs = { .cur_index = 0 };
-	int i = 0, ret = -EINVAL;
-	bool slot = false;
-	struct vfio_pci_device *tmp;
-
-	if (!pci_probe_reset_slot(vdev->pdev->slot))
-		slot = true;
-	else if (pci_probe_reset_bus(vdev->pdev->bus))
-		return;
+	struct vfio_device_set *dev_set = vdev->vdev.dev_set;
+	struct pci_dev *to_reset;
+	struct vfio_pci_device *cur;
+	int ret;
 
-	if (vfio_pci_for_each_slot_or_bus(vdev->pdev, vfio_pci_count_devs,
-					  &i, slot) || !i)
-		return;
+	lockdep_assert_held(&vdev->vdev.dev_set->lock);
 
-	devs.max_index = i;
-	devs.devices = kcalloc(i, sizeof(struct vfio_device *), GFP_KERNEL);
-	if (!devs.devices)
+	if (pci_probe_reset_slot(vdev->pdev->slot) &&
+	    pci_probe_reset_bus(vdev->pdev->bus))
 		return;
 
-	if (vfio_pci_for_each_slot_or_bus(vdev->pdev,
-					  vfio_pci_get_unused_devs,
-					  &devs, slot))
-		goto put_devs;
-
-	/* Does at least one need a reset? */
-	for (i = 0; i < devs.cur_index; i++) {
-		tmp = devs.devices[i];
-		if (tmp->needs_reset) {
-			ret = pci_reset_bus(vdev->pdev);
-			break;
-		}
-	}
-
-put_devs:
-	for (i = 0; i < devs.cur_index; i++) {
-		tmp = devs.devices[i];
-
-		/*
-		 * If reset was successful, affected devices no longer need
-		 * a reset and we should return all the collateral devices
-		 * to low power.  If not successful, we either didn't reset
-		 * the bus or timed out waiting for it, so let's not touch
-		 * the power state.
-		 */
-		if (!ret) {
-			tmp->needs_reset = false;
+	to_reset = vfio_pci_find_reset_target(vdev);
+	if (!to_reset)
+		return;
 
-			if (tmp != vdev && !disable_idle_d3)
-				vfio_pci_set_power_state(tmp, PCI_D3hot);
-		}
+	ret = pci_reset_bus(to_reset);
+	if (ret)
+		return;
 
-		vfio_device_put(&tmp->vdev);
+	list_for_each_entry(cur, &dev_set->device_list, vdev.dev_set_list) {
+		cur->needs_reset = false;
+		if (cur->pdev != to_reset && !disable_idle_d3)
+			vfio_pci_set_power_state(cur, PCI_D3hot);
 	}
-
-	kfree(devs.devices);
 }
 
 static void __exit vfio_pci_cleanup(void)
-- 
2.32.0


WARNING: multiple messages have this Message-ID (diff)
From: Jason Gunthorpe <jgg@nvidia.com>
To: David Airlie <airlied@linux.ie>,
	Tony Krowiak <akrowiak@linux.ibm.com>,
	Alex Williamson <alex.williamson@redhat.com>,
	Christian Borntraeger <borntraeger@de.ibm.com>,
	Cornelia Huck <cohuck@redhat.com>,
	Jonathan Corbet <corbet@lwn.net>, Daniel Vetter <daniel@ffwll.ch>,
	Diana Craciun <diana.craciun@oss.nxp.com>,
	dri-devel@lists.freedesktop.org,
	Eric Auger <eric.auger@redhat.com>,
	Eric Farman <farman@linux.ibm.com>,
	Harald Freudenberger <freude@linux.ibm.com>,
	Vasily Gorbik <gor@linux.ibm.com>,
	Heiko Carstens <hca@linux.ibm.com>,
	intel-gfx@lists.freedesktop.org,
	intel-gvt-dev@lists.freedesktop.org,
	Jani Nikula <jani.nikula@linux.intel.com>,
	Jason Herne <jjherne@linux.ibm.com>,
	Joonas Lahtinen <joonas.lahtinen@linux.intel.com>,
	kvm@vger.kernel.org, Kirti Wankhede <kwankhede@nvidia.com>,
	linux-doc@vger.kernel.org, linux-s390@vger.kernel.org,
	Matthew Rosato <mjrosato@linux.ibm.com>,
	Peter Oberparleiter <oberpar@linux.ibm.com>,
	Halil Pasic <pasic@linux.ibm.com>,
	Rodrigo Vivi <rodrigo.vivi@intel.com>,
	Vineeth Vijayan <vneethv@linux.ibm.com>,
	Zhi Wang <zhi.a.wang@intel.com>
Cc: Max Gurtovoy <mgurtovoy@nvidia.com>,
	Yishai Hadas <yishaih@nvidia.com>,
	"Raj, Ashok" <ashok.raj@intel.com>,
	Leon Romanovsky <leonro@nvidia.com>,
	Christoph Hellwig <hch@lst.de>
Subject: [Intel-gfx] [PATCH v3 09/14] vfio/pci: Change vfio_pci_try_bus_reset() to use the dev_set
Date: Wed, 28 Jul 2021 21:49:18 -0300	[thread overview]
Message-ID: <9-v3-6c9e19cc7d44+15613-vfio_reflck_jgg@nvidia.com> (raw)
In-Reply-To: <0-v3-6c9e19cc7d44+15613-vfio_reflck_jgg@nvidia.com>

Keep track of all the vfio_devices that have been added to the device set
and use this list in vfio_pci_try_bus_reset() instead of trying to work
backwards from the pci_device.

The dev_set->lock directly prevents devices from joining/leaving the set,
which further implies the pci_device cannot change drivers or that the
vfio_device be freed, eliminating the need for get/put's.

Completeness of the device set can be directly measured by checking if
every PCI device in the reset group is also in the device set - which
proves that VFIO drivers are attached to everything.

This restructuring corrects a call to pci_dev_driver() without holding the
device_lock() and removes a hard wiring to &vfio_pci_driver.

Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/vfio/pci/vfio_pci.c | 148 +++++++++++++++---------------------
 1 file changed, 62 insertions(+), 86 deletions(-)

diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index 5d6db93d6c680f..a1ae9a83a38621 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -404,6 +404,9 @@ static void vfio_pci_disable(struct vfio_pci_device *vdev)
 	struct vfio_pci_ioeventfd *ioeventfd, *ioeventfd_tmp;
 	int i, bar;
 
+	/* For needs_reset */
+	lockdep_assert_held(&vdev->vdev.dev_set->lock);
+
 	/* Stop the device from further DMA */
 	pci_clear_master(pdev);
 
@@ -2145,7 +2148,7 @@ static struct pci_driver vfio_pci_driver = {
 	.err_handler		= &vfio_err_handlers,
 };
 
-static int vfio_pci_get_unused_devs(struct pci_dev *pdev, void *data)
+static int vfio_pci_try_zap_and_vma_lock_cb(struct pci_dev *pdev, void *data)
 {
 	struct vfio_devices *devs = data;
 	struct vfio_device *device;
@@ -2165,8 +2168,11 @@ static int vfio_pci_get_unused_devs(struct pci_dev *pdev, void *data)
 
 	vdev = container_of(device, struct vfio_pci_device, vdev);
 
-	/* Fault if the device is not unused */
-	if (device->open_count) {
+	/*
+	 * Locking multiple devices is prone to deadlock, runaway and
+	 * unwind if we hit contention.
+	 */
+	if (!vfio_pci_zap_and_vma_lock(vdev, true)) {
 		vfio_device_put(device);
 		return -EBUSY;
 	}
@@ -2175,112 +2181,82 @@ static int vfio_pci_get_unused_devs(struct pci_dev *pdev, void *data)
 	return 0;
 }
 
-static int vfio_pci_try_zap_and_vma_lock_cb(struct pci_dev *pdev, void *data)
+static int vfio_pci_is_device_in_set(struct pci_dev *pdev, void *data)
 {
-	struct vfio_devices *devs = data;
-	struct vfio_device *device;
-	struct vfio_pci_device *vdev;
+	struct vfio_device_set *dev_set = data;
+	struct vfio_device *cur;
 
-	if (devs->cur_index == devs->max_index)
-		return -ENOSPC;
+	lockdep_assert_held(&dev_set->lock);
 
-	device = vfio_device_get_from_dev(&pdev->dev);
-	if (!device)
-		return -EINVAL;
-
-	if (pci_dev_driver(pdev) != &vfio_pci_driver) {
-		vfio_device_put(device);
-		return -EBUSY;
-	}
-
-	vdev = container_of(device, struct vfio_pci_device, vdev);
+	list_for_each_entry(cur, &dev_set->device_list, dev_set_list)
+		if (cur->dev == &pdev->dev)
+			return 0;
+	return -EBUSY;
+}
 
-	/*
-	 * Locking multiple devices is prone to deadlock, runaway and
-	 * unwind if we hit contention.
-	 */
-	if (!vfio_pci_zap_and_vma_lock(vdev, true)) {
-		vfio_device_put(device);
-		return -EBUSY;
+/*
+ * vfio-core considers a group to be viable and will create a vfio_device even
+ * if some devices are bound to drivers like pci-stub or pcieport.  Here we
+ * require all PCI devices to be inside our dev_set since that ensures they stay
+ * put and that every driver controlling the device can co-ordinate with the
+ * device reset.
+ */
+static struct pci_dev *vfio_pci_find_reset_target(struct vfio_pci_device *vdev)
+{
+	struct vfio_device_set *dev_set = vdev->vdev.dev_set;
+	struct vfio_pci_device *cur;
+	bool needs_reset = false;
+
+	/* No VFIO device has an open device FD */
+	list_for_each_entry(cur, &dev_set->device_list, vdev.dev_set_list) {
+		if (cur->vdev.open_count)
+			return NULL;
+		needs_reset |= cur->needs_reset;
 	}
+	if (!needs_reset)
+		return NULL;
 
-	devs->devices[devs->cur_index++] = vdev;
-	return 0;
+	/* All PCI devices in the group to be reset need to be in our dev_set */
+	if (vfio_pci_for_each_slot_or_bus(
+		    vdev->pdev, vfio_pci_is_device_in_set, dev_set,
+		    !pci_probe_reset_slot(vdev->pdev->slot)))
+		return NULL;
+	return cur->pdev;
 }
 
 /*
  * If a bus or slot reset is available for the provided device and:
  *  - All of the devices affected by that bus or slot reset are unused
- *    (!refcnt)
  *  - At least one of the affected devices is marked dirty via
  *    needs_reset (such as by lack of FLR support)
- * Then attempt to perform that bus or slot reset.  Callers are required
- * to hold vdev->dev_set->lock, protecting the bus/slot reset group from
- * concurrent opens.  A vfio_device reference is acquired for each device
- * to prevent unbinds during the reset operation.
- *
- * NB: vfio-core considers a group to be viable even if some devices are
- * bound to drivers like pci-stub or pcieport.  Here we require all devices
- * to be bound to vfio_pci since that's the only way we can be sure they
- * stay put.
+ * Then attempt to perform that bus or slot reset.
  */
 static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev)
 {
-	struct vfio_devices devs = { .cur_index = 0 };
-	int i = 0, ret = -EINVAL;
-	bool slot = false;
-	struct vfio_pci_device *tmp;
-
-	if (!pci_probe_reset_slot(vdev->pdev->slot))
-		slot = true;
-	else if (pci_probe_reset_bus(vdev->pdev->bus))
-		return;
+	struct vfio_device_set *dev_set = vdev->vdev.dev_set;
+	struct pci_dev *to_reset;
+	struct vfio_pci_device *cur;
+	int ret;
 
-	if (vfio_pci_for_each_slot_or_bus(vdev->pdev, vfio_pci_count_devs,
-					  &i, slot) || !i)
-		return;
+	lockdep_assert_held(&vdev->vdev.dev_set->lock);
 
-	devs.max_index = i;
-	devs.devices = kcalloc(i, sizeof(struct vfio_device *), GFP_KERNEL);
-	if (!devs.devices)
+	if (pci_probe_reset_slot(vdev->pdev->slot) &&
+	    pci_probe_reset_bus(vdev->pdev->bus))
 		return;
 
-	if (vfio_pci_for_each_slot_or_bus(vdev->pdev,
-					  vfio_pci_get_unused_devs,
-					  &devs, slot))
-		goto put_devs;
-
-	/* Does at least one need a reset? */
-	for (i = 0; i < devs.cur_index; i++) {
-		tmp = devs.devices[i];
-		if (tmp->needs_reset) {
-			ret = pci_reset_bus(vdev->pdev);
-			break;
-		}
-	}
-
-put_devs:
-	for (i = 0; i < devs.cur_index; i++) {
-		tmp = devs.devices[i];
-
-		/*
-		 * If reset was successful, affected devices no longer need
-		 * a reset and we should return all the collateral devices
-		 * to low power.  If not successful, we either didn't reset
-		 * the bus or timed out waiting for it, so let's not touch
-		 * the power state.
-		 */
-		if (!ret) {
-			tmp->needs_reset = false;
+	to_reset = vfio_pci_find_reset_target(vdev);
+	if (!to_reset)
+		return;
 
-			if (tmp != vdev && !disable_idle_d3)
-				vfio_pci_set_power_state(tmp, PCI_D3hot);
-		}
+	ret = pci_reset_bus(to_reset);
+	if (ret)
+		return;
 
-		vfio_device_put(&tmp->vdev);
+	list_for_each_entry(cur, &dev_set->device_list, vdev.dev_set_list) {
+		cur->needs_reset = false;
+		if (cur->pdev != to_reset && !disable_idle_d3)
+			vfio_pci_set_power_state(cur, PCI_D3hot);
 	}
-
-	kfree(devs.devices);
 }
 
 static void __exit vfio_pci_cleanup(void)
-- 
2.32.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

  parent reply	other threads:[~2021-07-29  0:49 UTC|newest]

Thread overview: 74+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-07-29  0:49 [PATCH v3 00/14] Provide core infrastructure for managing open/release Jason Gunthorpe
2021-07-29  0:49 ` [Intel-gfx] " Jason Gunthorpe
2021-07-29  0:49 ` Jason Gunthorpe
2021-07-29  0:49 ` [PATCH v3 01/14] vfio/samples: Remove module get/put Jason Gunthorpe
2021-07-29  0:49   ` [Intel-gfx] " Jason Gunthorpe
2021-07-29  0:49   ` Jason Gunthorpe
2021-07-29  0:49 ` [PATCH v3 02/14] vfio/mbochs: Fix missing error unwind of mbochs_used_mbytes Jason Gunthorpe
2021-07-29  0:49   ` [Intel-gfx] " Jason Gunthorpe
2021-07-29  0:49   ` Jason Gunthorpe
2021-07-29  5:59   ` kernel test robot
2021-07-29  9:38     ` Dan Carpenter
2021-07-29  9:38     ` Dan Carpenter
2021-07-29 12:09     ` Jason Gunthorpe
2021-07-29 12:09       ` Jason Gunthorpe
2021-07-29  7:21   ` Christoph Hellwig
2021-07-29  7:21     ` [Intel-gfx] " Christoph Hellwig
2021-07-29  0:49 ` [PATCH v3 03/14] vfio: Introduce a vfio_uninit_group_dev() API call Jason Gunthorpe
2021-07-29  0:49   ` [Intel-gfx] " Jason Gunthorpe
2021-07-29  0:49   ` Jason Gunthorpe
2021-07-29  0:49 ` [PATCH v3 04/14] vfio: Provide better generic support for open/release vfio_device_ops Jason Gunthorpe
2021-07-29  0:49   ` [Intel-gfx] " Jason Gunthorpe
2021-07-29  0:49   ` Jason Gunthorpe
2021-07-29  7:26   ` Christoph Hellwig
2021-07-29  7:26     ` [Intel-gfx] " Christoph Hellwig
2021-07-29  0:49 ` [PATCH v3 05/14] vfio/samples: Delete useless open/close Jason Gunthorpe
2021-07-29  0:49   ` [Intel-gfx] " Jason Gunthorpe
2021-07-29  0:49   ` Jason Gunthorpe
2021-07-29  0:49 ` [PATCH v3 06/14] vfio/fsl: Move to the device set infrastructure Jason Gunthorpe
2021-07-29  0:49   ` [Intel-gfx] " Jason Gunthorpe
2021-07-29  0:49   ` Jason Gunthorpe
2021-07-29  0:49 ` [PATCH v3 07/14] vfio/platform: Use open_device() instead of open coding a refcnt scheme Jason Gunthorpe
2021-07-29  0:49   ` [Intel-gfx] " Jason Gunthorpe
2021-07-29  0:49   ` Jason Gunthorpe
2021-08-05 12:37   ` Eric Auger
2021-08-05 12:37     ` [Intel-gfx] " Eric Auger
2021-07-29  0:49 ` [PATCH v3 08/14] vfio/pci: Move to the device set infrastructure Jason Gunthorpe
2021-07-29  0:49   ` [Intel-gfx] " Jason Gunthorpe
2021-07-29  0:49   ` Jason Gunthorpe
2021-07-29  0:49 ` Jason Gunthorpe [this message]
2021-07-29  0:49   ` [Intel-gfx] [PATCH v3 09/14] vfio/pci: Change vfio_pci_try_bus_reset() to use the dev_set Jason Gunthorpe
2021-07-29  0:49   ` Jason Gunthorpe
2021-07-29  7:30   ` Christoph Hellwig
2021-07-29  7:30     ` [Intel-gfx] " Christoph Hellwig
2021-08-03 16:34   ` Alex Williamson
2021-08-03 16:34     ` [Intel-gfx] " Alex Williamson
2021-08-03 16:41     ` Jason Gunthorpe
2021-08-03 16:41       ` [Intel-gfx] " Jason Gunthorpe
2021-08-03 16:52       ` Alex Williamson
2021-08-03 16:52         ` [Intel-gfx] " Alex Williamson
2021-08-05 11:47         ` Jason Gunthorpe
2021-08-05 11:47           ` [Intel-gfx] " Jason Gunthorpe
2021-08-05 17:33           ` Alex Williamson
2021-08-05 17:33             ` [Intel-gfx] " Alex Williamson
2021-08-05 23:05             ` Jason Gunthorpe
2021-08-05 23:05               ` [Intel-gfx] " Jason Gunthorpe
2021-07-29  0:49 ` [PATCH v3 10/14] vfio/pci: Reorganize VFIO_DEVICE_PCI_HOT_RESET to use the device set Jason Gunthorpe
2021-07-29  0:49   ` [Intel-gfx] " Jason Gunthorpe
2021-07-29  0:49   ` Jason Gunthorpe
2021-07-29  0:49 ` [PATCH v3 11/14] vfio/mbochs: Fix close when multiple device FDs are open Jason Gunthorpe
2021-07-29  0:49   ` [Intel-gfx] " Jason Gunthorpe
2021-07-29  0:49   ` Jason Gunthorpe
2021-07-29  0:49 ` [PATCH v3 12/14] vfio/ap,ccw: Fix open/close " Jason Gunthorpe
2021-07-29  0:49   ` [Intel-gfx] [PATCH v3 12/14] vfio/ap, ccw: " Jason Gunthorpe
2021-07-29  0:49   ` Jason Gunthorpe
2021-07-29  0:49 ` [PATCH v3 13/14] vfio/gvt: " Jason Gunthorpe
2021-07-29  0:49   ` [Intel-gfx] " Jason Gunthorpe
2021-07-29  0:49   ` Jason Gunthorpe
2021-07-29  0:49 ` [PATCH v3 14/14] vfio: Remove struct vfio_device_ops open/release Jason Gunthorpe
2021-07-29  0:49   ` [Intel-gfx] " Jason Gunthorpe
2021-07-29  0:49   ` Jason Gunthorpe
2021-07-29  2:13 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for Provide core infrastructure for managing open/release (rev7) Patchwork
2021-07-29  2:37 ` [Intel-gfx] ✓ Fi.CI.BAT: success " Patchwork
2021-07-29  8:16 ` [Intel-gfx] ✓ Fi.CI.IGT: " Patchwork
2021-08-05 14:26 ` [Intel-gfx] ✗ Fi.CI.BUILD: failure for Provide core infrastructure for managing open/release (rev8) Patchwork

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=9-v3-6c9e19cc7d44+15613-vfio_reflck_jgg@nvidia.com \
    --to=jgg@nvidia.com \
    --cc=airlied@linux.ie \
    --cc=akrowiak@linux.ibm.com \
    --cc=alex.williamson@redhat.com \
    --cc=ashok.raj@intel.com \
    --cc=borntraeger@de.ibm.com \
    --cc=cohuck@redhat.com \
    --cc=corbet@lwn.net \
    --cc=daniel@ffwll.ch \
    --cc=diana.craciun@oss.nxp.com \
    --cc=dri-devel@lists.freedesktop.org \
    --cc=eric.auger@redhat.com \
    --cc=farman@linux.ibm.com \
    --cc=freude@linux.ibm.com \
    --cc=gor@linux.ibm.com \
    --cc=hca@linux.ibm.com \
    --cc=hch@lst.de \
    --cc=intel-gfx@lists.freedesktop.org \
    --cc=intel-gvt-dev@lists.freedesktop.org \
    --cc=jani.nikula@linux.intel.com \
    --cc=jjherne@linux.ibm.com \
    --cc=joonas.lahtinen@linux.intel.com \
    --cc=kvm@vger.kernel.org \
    --cc=kwankhede@nvidia.com \
    --cc=leonro@nvidia.com \
    --cc=linux-doc@vger.kernel.org \
    --cc=linux-s390@vger.kernel.org \
    --cc=mgurtovoy@nvidia.com \
    --cc=mjrosato@linux.ibm.com \
    --cc=oberpar@linux.ibm.com \
    --cc=pasic@linux.ibm.com \
    --cc=rodrigo.vivi@intel.com \
    --cc=vneethv@linux.ibm.com \
    --cc=yishaih@nvidia.com \
    --cc=zhenyuw@linux.intel.com \
    --cc=zhi.a.wang@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.