All of lore.kernel.org
 help / color / mirror / Atom feed
From: Yishai Hadas <yishaih@nvidia.com>
To: <alex.williamson@redhat.com>, <bhelgaas@google.com>,
	<jgg@nvidia.com>, <saeedm@nvidia.com>
Cc: <linux-pci@vger.kernel.org>, <kvm@vger.kernel.org>,
	<netdev@vger.kernel.org>, <kuba@kernel.org>, <leonro@nvidia.com>,
	<kwankhede@nvidia.com>, <mgurtovoy@nvidia.com>,
	<yishaih@nvidia.com>, <maorg@nvidia.com>
Subject: [PATCH V5 mlx5-next 13/13] vfio/mlx5: Use its own PCI reset_done error handler
Date: Wed, 27 Oct 2021 12:56:58 +0300	[thread overview]
Message-ID: <20211027095658.144468-14-yishaih@nvidia.com> (raw)
In-Reply-To: <20211027095658.144468-1-yishaih@nvidia.com>

Register its own handler for pci_error_handlers.reset_done and update
state accordingly.

Signed-off-by: Yishai Hadas <yishaih@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 drivers/vfio/pci/mlx5/main.c | 56 ++++++++++++++++++++++++++++++++++--
 1 file changed, 54 insertions(+), 2 deletions(-)

diff --git a/drivers/vfio/pci/mlx5/main.c b/drivers/vfio/pci/mlx5/main.c
index 467dee08ad77..a94cb9cdb82e 100644
--- a/drivers/vfio/pci/mlx5/main.c
+++ b/drivers/vfio/pci/mlx5/main.c
@@ -55,8 +55,11 @@ struct mlx5vf_pci_migration_info {
 struct mlx5vf_pci_core_device {
 	struct vfio_pci_core_device core_device;
 	u8 migrate_cap:1;
+	u8 deferred_reset:1;
 	/* protect migration state */
 	struct mutex state_mutex;
+	/* protect the reset_done flow */
+	spinlock_t reset_lock;
 	struct mlx5vf_pci_migration_info vmig;
 };
 
@@ -473,6 +476,49 @@ mlx5vf_pci_migration_data_rw(struct mlx5vf_pci_core_device *mvdev,
 	return count;
 }
 
+/*
+ * This function is called in all state_mutex unlock cases to
+ * handle a 'deferred_reset' if exists.
+ */
+static void mlx5vf_state_mutex_unlock(struct mlx5vf_pci_core_device *mvdev)
+{
+again:
+	spin_lock(&mvdev->reset_lock);
+	if (mvdev->deferred_reset) {
+		mvdev->deferred_reset = false;
+		spin_unlock(&mvdev->reset_lock);
+		mlx5vf_reset_mig_state(mvdev);
+		mvdev->vmig.vfio_dev_state = VFIO_DEVICE_STATE_RUNNING;
+		goto again;
+	}
+	mutex_unlock(&mvdev->state_mutex);
+	spin_unlock(&mvdev->reset_lock);
+}
+
+static void mlx5vf_pci_aer_reset_done(struct pci_dev *pdev)
+{
+	struct mlx5vf_pci_core_device *mvdev = dev_get_drvdata(&pdev->dev);
+
+	if (!mvdev->migrate_cap)
+		return;
+
+	/*
+	 * As the higher VFIO layers are holding locks across reset and using
+	 * those same locks with the mm_lock we need to prevent ABBA deadlock
+	 * with the state_mutex and mm_lock.
+	 * In case the state_mutex was taken already we defer the cleanup work
+	 * to the unlock flow of the other running context.
+	 */
+	spin_lock(&mvdev->reset_lock);
+	mvdev->deferred_reset = true;
+	if (!mutex_trylock(&mvdev->state_mutex)) {
+		spin_unlock(&mvdev->reset_lock);
+		return;
+	}
+	spin_unlock(&mvdev->reset_lock);
+	mlx5vf_state_mutex_unlock(mvdev);
+}
+
 static ssize_t mlx5vf_pci_mig_rw(struct vfio_pci_core_device *vdev,
 				 char __user *buf, size_t count, loff_t *ppos,
 				 bool iswrite)
@@ -541,7 +587,7 @@ static ssize_t mlx5vf_pci_mig_rw(struct vfio_pci_core_device *vdev,
 	}
 
 end:
-	mutex_unlock(&mvdev->state_mutex);
+	mlx5vf_state_mutex_unlock(mvdev);
 	return ret;
 }
 
@@ -636,6 +682,7 @@ static int mlx5vf_pci_probe(struct pci_dev *pdev,
 			if (MLX5_CAP_GEN(mdev, migration)) {
 				mvdev->migrate_cap = 1;
 				mutex_init(&mvdev->state_mutex);
+				spin_lock_init(&mvdev->reset_lock);
 			}
 			mlx5_vf_put_core_dev(mdev);
 		}
@@ -670,12 +717,17 @@ static const struct pci_device_id mlx5vf_pci_table[] = {
 
 MODULE_DEVICE_TABLE(pci, mlx5vf_pci_table);
 
+const struct pci_error_handlers mlx5vf_err_handlers = {
+	.reset_done = mlx5vf_pci_aer_reset_done,
+	.error_detected = vfio_pci_core_aer_err_detected,
+};
+
 static struct pci_driver mlx5vf_pci_driver = {
 	.name = KBUILD_MODNAME,
 	.id_table = mlx5vf_pci_table,
 	.probe = mlx5vf_pci_probe,
 	.remove = mlx5vf_pci_remove,
-	.err_handler = &vfio_pci_core_err_handlers,
+	.err_handler = &mlx5vf_err_handlers,
 };
 
 static void __exit mlx5vf_pci_cleanup(void)
-- 
2.18.1


      parent reply	other threads:[~2021-10-27  9:59 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-10-27  9:56 [PATCH V5 mlx5-next 00/13] Add mlx5 live migration driver Yishai Hadas
2021-10-27  9:56 ` [PATCH V5 mlx5-next 01/13] PCI/IOV: Add pci_iov_vf_id() to get VF index Yishai Hadas
2021-10-27  9:56 ` [PATCH V5 mlx5-next 02/13] net/mlx5: Reuse exported virtfn index function call Yishai Hadas
2021-10-27  9:56 ` [PATCH V5 mlx5-next 03/13] net/mlx5: Disable SRIOV before PF removal Yishai Hadas
2021-10-27  9:56 ` [PATCH V5 mlx5-next 04/13] PCI/IOV: Add pci_iov_get_pf_drvdata() to allow VF reaching the drvdata of a PF Yishai Hadas
2021-10-27  9:56 ` [PATCH V5 mlx5-next 05/13] net/mlx5: Expose APIs to get/put the mlx5 core device Yishai Hadas
2021-10-27  9:56 ` [PATCH V5 mlx5-next 06/13] vfio: Fix VFIO_DEVICE_STATE_SET_ERROR macro Yishai Hadas
2021-10-27 15:29   ` Max Gurtovoy
2021-10-27  9:56 ` [PATCH V5 mlx5-next 07/13] vfio: Add a macro for VFIO_DEVICE_STATE_ERROR Yishai Hadas
2021-10-27 15:30   ` Max Gurtovoy
2021-10-27  9:56 ` [PATCH V5 mlx5-next 08/13] vfio/pci_core: Make the region->release() function optional Yishai Hadas
2021-10-27  9:56 ` [PATCH V5 mlx5-next 09/13] net/mlx5: Introduce migration bits and structures Yishai Hadas
2021-10-27  9:56 ` [PATCH V5 mlx5-next 10/13] vfio/mlx5: Expose migration commands over mlx5 device Yishai Hadas
2021-10-27  9:56 ` [PATCH V5 mlx5-next 11/13] vfio/mlx5: Implement vfio_pci driver for mlx5 devices Yishai Hadas
2021-10-27  9:56 ` [PATCH V5 mlx5-next 12/13] vfio/pci: Expose vfio_pci_core_aer_err_detected() Yishai Hadas
2021-10-27  9:56 ` Yishai Hadas [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20211027095658.144468-14-yishaih@nvidia.com \
    --to=yishaih@nvidia.com \
    --cc=alex.williamson@redhat.com \
    --cc=bhelgaas@google.com \
    --cc=jgg@nvidia.com \
    --cc=kuba@kernel.org \
    --cc=kvm@vger.kernel.org \
    --cc=kwankhede@nvidia.com \
    --cc=leonro@nvidia.com \
    --cc=linux-pci@vger.kernel.org \
    --cc=maorg@nvidia.com \
    --cc=mgurtovoy@nvidia.com \
    --cc=netdev@vger.kernel.org \
    --cc=saeedm@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.