kvm.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Max Gurtovoy <mgurtovoy@nvidia.com>
To: <jgg@nvidia.com>, <cohuck@redhat.com>, <kvm@vger.kernel.org>,
	<linux-kernel@vger.kernel.org>, <alex.williamson@redhat.com>
Cc: <liranl@nvidia.com>, <oren@nvidia.com>, <tzahio@nvidia.com>,
	<leonro@nvidia.com>, <yarong@nvidia.com>, <aviadye@nvidia.com>,
	<shahafs@nvidia.com>, <artemp@nvidia.com>, <kwankhede@nvidia.com>,
	<ACurrid@nvidia.com>, <gmataev@nvidia.com>, <cjia@nvidia.com>,
	<mjrosato@linux.ibm.com>, <yishaih@nvidia.com>, <aik@ozlabs.ru>,
	Max Gurtovoy <mgurtovoy@nvidia.com>
Subject: [PATCH 4/9] mlx5-vfio-pci: add new vfio_pci driver for mlx5 devices
Date: Mon, 1 Feb 2021 16:28:23 +0000	[thread overview]
Message-ID: <20210201162828.5938-5-mgurtovoy@nvidia.com> (raw)
In-Reply-To: <20210201162828.5938-1-mgurtovoy@nvidia.com>

This driver will register to PCI bus and Auxiliary bus. In case the
probe of both devices will succeed, we'll have a vendor specific VFIO
PCI device. mlx5_vfio_pci use vfio_pci_core to register and create a
VFIO device and use auxiliary_device to get the needed extension from
the vendor device driver. If one of the probe() functions will fail, the
VFIO char device will not be created. For now, only register and bind
the auxiliary_device to the pci_device in case we have a match between
the auxiliary_device id to the pci_device BDF. Later, vendor specific
features such as live migration will be added and will be available to
the virtualization software.

Note: Although we've created the mlx5-vfio-pci.ko, the binding to
vfio-pci.ko will still work as before. It's fully backward compatible.
Of course, the extended vendor functionality will not exist in case one
will bind the device to the generic vfio_pci.ko.

Signed-off-by: Max Gurtovoy <mgurtovoy@nvidia.com>
---
 drivers/vfio/pci/Kconfig         |  10 ++
 drivers/vfio/pci/Makefile        |   3 +
 drivers/vfio/pci/mlx5_vfio_pci.c | 253 +++++++++++++++++++++++++++++++
 include/linux/mlx5/vfio_pci.h    |  36 +++++
 4 files changed, 302 insertions(+)
 create mode 100644 drivers/vfio/pci/mlx5_vfio_pci.c
 create mode 100644 include/linux/mlx5/vfio_pci.h

diff --git a/drivers/vfio/pci/Kconfig b/drivers/vfio/pci/Kconfig
index b958a48f63a0..dcb164d7d641 100644
--- a/drivers/vfio/pci/Kconfig
+++ b/drivers/vfio/pci/Kconfig
@@ -65,3 +65,13 @@ config VFIO_PCI_ZDEV
 	  for zPCI devices passed through via VFIO on s390.
 
 	  Say Y here.
+
+config MLX5_VFIO_PCI
+	tristate "VFIO support for MLX5 PCI devices"
+	depends on VFIO_PCI_CORE && MLX5_CORE
+	select AUXILIARY_BUS
+	help
+	  This provides a generic PCI support for MLX5 devices using the VFIO
+	  framework.
+
+	  If you don't know what to do here, say N.
diff --git a/drivers/vfio/pci/Makefile b/drivers/vfio/pci/Makefile
index 3f2a27e222cd..9f67edca31c5 100644
--- a/drivers/vfio/pci/Makefile
+++ b/drivers/vfio/pci/Makefile
@@ -2,6 +2,7 @@
 
 obj-$(CONFIG_VFIO_PCI_CORE) += vfio-pci-core.o
 obj-$(CONFIG_VFIO_PCI) += vfio-pci.o
+obj-$(CONFIG_MLX5_VFIO_PCI) += mlx5-vfio-pci.o
 
 vfio-pci-core-y := vfio_pci_core.o vfio_pci_intrs.o vfio_pci_rdwr.o vfio_pci_config.o
 vfio-pci-core-$(CONFIG_VFIO_PCI_IGD) += vfio_pci_igd.o
@@ -9,3 +10,5 @@ vfio-pci-core-$(CONFIG_VFIO_PCI_NVLINK2) += vfio_pci_nvlink2.o
 vfio-pci-core-$(CONFIG_VFIO_PCI_ZDEV) += vfio_pci_zdev.o
 
 vfio-pci-y := vfio_pci.o
+
+mlx5-vfio-pci-y := mlx5_vfio_pci.o
diff --git a/drivers/vfio/pci/mlx5_vfio_pci.c b/drivers/vfio/pci/mlx5_vfio_pci.c
new file mode 100644
index 000000000000..4e6b256c74bf
--- /dev/null
+++ b/drivers/vfio/pci/mlx5_vfio_pci.c
@@ -0,0 +1,253 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+ *     Author: Max Gurtovoy <mgurtovoy@nvidia.com>
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/device.h>
+#include <linux/eventfd.h>
+#include <linux/file.h>
+#include <linux/interrupt.h>
+#include <linux/iommu.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/notifier.h>
+#include <linux/pci.h>
+#include <linux/pm_runtime.h>
+#include <linux/types.h>
+#include <linux/uaccess.h>
+#include <linux/vfio.h>
+#include <linux/sched/mm.h>
+#include <linux/mlx5/vfio_pci.h>
+
+#include "vfio_pci_core.h"
+
+#define DRIVER_VERSION  "0.1"
+#define DRIVER_AUTHOR   "Max Gurtovoy <mgurtovoy@nvidia.com>"
+#define DRIVER_DESC     "MLX5 VFIO PCI - User Level meta-driver for NVIDIA MLX5 device family"
+
+/* 16k migration data size */
+#define MLX5_MIGRATION_REGION_DATA_SIZE	SZ_16K
+/* Data section offset from migration region */
+#define MLX5_MIGRATION_REGION_DATA_OFFSET  (sizeof(struct vfio_device_migration_info))
+
+struct mlx5_vfio_pci_migration_info {
+	struct vfio_device_migration_info mig;
+	char data[MLX5_MIGRATION_REGION_DATA_SIZE];
+};
+
+static LIST_HEAD(aux_devs_list);
+static DEFINE_MUTEX(aux_devs_lock);
+
+static struct mlx5_vfio_pci_adev *mlx5_vfio_pci_find_adev(struct pci_dev *pdev)
+{
+	struct mlx5_vfio_pci_adev *mvadev, *found = NULL;
+
+	mutex_lock(&aux_devs_lock);
+	list_for_each_entry(mvadev, &aux_devs_list, entry) {
+		if (mvadev->madev.adev.id == pci_dev_id(pdev)) {
+			found = mvadev;
+			break;
+		}
+	}
+	mutex_unlock(&aux_devs_lock);
+
+	return found;
+}
+
+static int mlx5_vfio_pci_aux_probe(struct auxiliary_device *adev,
+		const struct auxiliary_device_id *id)
+{
+	struct mlx5_vfio_pci_adev *mvadev;
+
+	mvadev = adev_to_mvadev(adev);
+
+	pr_info("%s aux probing bdf %02x:%02x.%d mdev is %s\n",
+		adev->name,
+		PCI_BUS_NUM(adev->id & 0xffff),
+		PCI_SLOT(adev->id & 0xff),
+		PCI_FUNC(adev->id & 0xff), dev_name(mvadev->madev.mdev->device));
+
+	mutex_lock(&aux_devs_lock);
+	list_add(&mvadev->entry, &aux_devs_list);
+	mutex_unlock(&aux_devs_lock);
+
+	return 0;
+}
+
+static void mlx5_vfio_pci_aux_remove(struct auxiliary_device *adev)
+{
+	struct mlx5_vfio_pci_adev *mvadev = adev_to_mvadev(adev);
+	struct vfio_pci_core_device *vpdev = dev_get_drvdata(&adev->dev);
+
+	/* TODO: is this the right thing to do ? maybe FLR ? */
+	if (vpdev)
+		pci_reset_function(vpdev->pdev);
+
+	mutex_lock(&aux_devs_lock);
+	list_del(&mvadev->entry);
+	mutex_unlock(&aux_devs_lock);
+}
+
+static const struct auxiliary_device_id mlx5_vfio_pci_aux_id_table[] = {
+	{ .name = MLX5_ADEV_NAME ".vfio_pci", },
+	{},
+};
+
+MODULE_DEVICE_TABLE(auxiliary, mlx5_vfio_pci_aux_id_table);
+
+static struct auxiliary_driver mlx5_vfio_pci_aux_driver = {
+	.name = "vfio_pci_ex",
+	.probe = mlx5_vfio_pci_aux_probe,
+	.remove = mlx5_vfio_pci_aux_remove,
+	.id_table = mlx5_vfio_pci_aux_id_table,
+};
+
+static void mlx5_vfio_pci_mig_release(struct vfio_pci_core_device *vpdev,
+		struct vfio_pci_region *region)
+{
+	kfree(region->data);
+}
+
+static size_t mlx5_vfio_pci_mig_rw(struct vfio_pci_core_device *vpdev,
+		char __user *buf, size_t count, loff_t *ppos, bool iswrite)
+{
+	/* TODO: add all migration logic here */
+
+	return -EINVAL;
+}
+
+static struct vfio_pci_regops migraion_ops = {
+	.rw = mlx5_vfio_pci_mig_rw,
+	.release = mlx5_vfio_pci_mig_release,
+};
+
+static int mlx5_vfio_pci_op_init(struct vfio_pci_core_device *vpdev)
+{
+	struct mlx5_vfio_pci_migration_info *vmig;
+	int ret;
+
+	vmig = kzalloc(sizeof(*vmig), GFP_KERNEL);
+	if (!vmig)
+		return -ENOMEM;
+
+	ret = vfio_pci_register_dev_region(vpdev,
+			VFIO_REGION_TYPE_MIGRATION,
+			VFIO_REGION_SUBTYPE_MIGRATION,
+			&migraion_ops, sizeof(*vmig),
+			VFIO_REGION_INFO_FLAG_READ |
+			VFIO_REGION_INFO_FLAG_WRITE, vmig);
+	if (ret)
+		goto out_free;
+
+	return 0;
+
+out_free:
+	kfree(vmig);
+	return ret;
+}
+
+static const struct vfio_pci_device_ops mlx5_vfio_pci_ops = {
+	.name		= "mlx5-vfio-pci",
+	.module		= THIS_MODULE,
+	.init		= mlx5_vfio_pci_op_init,
+};
+
+static int mlx5_vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+	struct vfio_pci_core_device *vpdev;
+	struct mlx5_vfio_pci_adev *mvadev;
+
+	mvadev = mlx5_vfio_pci_find_adev(pdev);
+	if (!mvadev) {
+		pr_err("failed to find aux device for %s\n",
+		       dev_name(&pdev->dev));
+		return -ENODEV;
+	}
+
+	vpdev = vfio_create_pci_device(pdev, &mlx5_vfio_pci_ops, mvadev);
+	if (IS_ERR(vpdev))
+		return PTR_ERR(vpdev);
+
+	dev_set_drvdata(&mvadev->madev.adev.dev, vpdev);
+	return 0;
+}
+
+static void mlx5_vfio_pci_remove(struct pci_dev *pdev)
+{
+	struct mlx5_vfio_pci_adev *mvadev;
+
+	mvadev = mlx5_vfio_pci_find_adev(pdev);
+	if (mvadev)
+		dev_set_drvdata(&mvadev->madev.adev.dev, NULL);
+
+	vfio_destroy_pci_device(pdev);
+}
+
+#ifdef CONFIG_PCI_IOV
+static int mlx5_vfio_pci_sriov_configure(struct pci_dev *pdev, int nr_virtfn)
+{
+	might_sleep();
+
+	/* DO vendor specific stuff here */
+
+	return vfio_pci_core_sriov_configure(pdev, nr_virtfn);
+}
+#endif
+
+static const struct pci_error_handlers mlx5_vfio_err_handlers = {
+	.error_detected = vfio_pci_core_aer_err_detected,
+};
+
+static const struct pci_device_id mlx5_vfio_pci_table[] = {
+	{ PCI_VDEVICE(MELLANOX, 0x6001) }, /* NVMe SNAP controllers */
+	{ PCI_DEVICE_SUB(PCI_VENDOR_ID_REDHAT_QUMRANET, 0x1042,
+			 PCI_VENDOR_ID_MELLANOX, PCI_ANY_ID) }, /* Virtio SNAP controllers */
+	{ 0, }
+};
+
+static struct pci_driver mlx5_vfio_pci_driver = {
+	.name			= "mlx5-vfio-pci",
+	.id_table		= mlx5_vfio_pci_table,
+	.probe			= mlx5_vfio_pci_probe,
+	.remove			= mlx5_vfio_pci_remove,
+#ifdef CONFIG_PCI_IOV
+	.sriov_configure	= mlx5_vfio_pci_sriov_configure,
+#endif
+	.err_handler		= &mlx5_vfio_err_handlers,
+};
+
+static void __exit mlx5_vfio_pci_cleanup(void)
+{
+	auxiliary_driver_unregister(&mlx5_vfio_pci_aux_driver);
+	pci_unregister_driver(&mlx5_vfio_pci_driver);
+}
+
+static int __init mlx5_vfio_pci_init(void)
+{
+	int ret;
+
+	ret = pci_register_driver(&mlx5_vfio_pci_driver);
+	if (ret)
+		return ret;
+
+	ret = auxiliary_driver_register(&mlx5_vfio_pci_aux_driver);
+	if (ret)
+		goto out_unregister;
+
+	return 0;
+
+out_unregister:
+	pci_unregister_driver(&mlx5_vfio_pci_driver);
+	return ret;
+}
+
+module_init(mlx5_vfio_pci_init);
+module_exit(mlx5_vfio_pci_cleanup);
+
+MODULE_VERSION(DRIVER_VERSION);
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR(DRIVER_AUTHOR);
+MODULE_DESCRIPTION(DRIVER_DESC);
diff --git a/include/linux/mlx5/vfio_pci.h b/include/linux/mlx5/vfio_pci.h
new file mode 100644
index 000000000000..c1e7b4d6da30
--- /dev/null
+++ b/include/linux/mlx5/vfio_pci.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/*
+ * Copyright (c) 2020 NVIDIA Corporation
+ */
+
+#ifndef _VFIO_PCI_H
+#define _VFIO_PCI_H
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/auxiliary_bus.h>
+#include <linux/mlx5/device.h>
+#include <linux/mlx5/driver.h>
+
+struct mlx5_vfio_pci_adev {
+	struct mlx5_adev	madev;
+
+	/* These fields should not be used outside mlx5_vfio_pci.ko */
+	struct list_head		entry;
+};
+
+static inline struct mlx5_vfio_pci_adev*
+madev_to_mvadev(struct mlx5_adev *madev)
+{
+	return container_of(madev, struct mlx5_vfio_pci_adev, madev);
+}
+
+static inline struct mlx5_vfio_pci_adev*
+adev_to_mvadev(struct auxiliary_device *adev)
+{
+	struct mlx5_adev *madev = container_of(adev, struct mlx5_adev, adev);
+
+	return madev_to_mvadev(madev);
+}
+
+#endif
-- 
2.25.4


  parent reply	other threads:[~2021-02-01 16:31 UTC|newest]

Thread overview: 57+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-02-01 16:28 [PATCH v2 0/9] Introduce vfio-pci-core subsystem Max Gurtovoy
2021-02-01 16:28 ` [PATCH 1/9] vfio-pci: rename vfio_pci.c to vfio_pci_core.c Max Gurtovoy
2021-02-01 16:28 ` [PATCH 2/9] vfio-pci: introduce vfio_pci_core subsystem driver Max Gurtovoy
2021-02-01 16:28 ` [PATCH 3/9] vfio-pci-core: export vfio_pci_register_dev_region function Max Gurtovoy
2021-02-01 16:28 ` Max Gurtovoy [this message]
2021-02-01 16:28 ` [PATCH 5/9] vfio-pci/zdev: remove unused vdev argument Max Gurtovoy
2021-02-01 17:27   ` Matthew Rosato
2021-02-02  7:57   ` Cornelia Huck
2021-02-02 17:21     ` Alex Williamson
2021-02-01 16:28 ` [PATCH 6/9] vfio-pci/zdev: fix possible segmentation fault issue Max Gurtovoy
2021-02-01 16:52   ` Cornelia Huck
2021-02-01 17:08     ` Matthew Rosato
2021-02-01 20:47       ` Alex Williamson
2021-02-02  7:58         ` Cornelia Huck
2021-02-01 16:28 ` [PATCH 7/9] vfio/pci: use s390 naming instead of zdev Max Gurtovoy
2021-02-01 16:28 ` [PATCH 8/9] vfio/pci: use x86 naming instead of igd Max Gurtovoy
2021-02-01 17:14   ` Cornelia Huck
2021-02-01 17:49     ` Matthew Rosato
2021-02-01 18:42       ` Alex Williamson
2021-02-02 16:06         ` Cornelia Huck
2021-02-02 17:10           ` Jason Gunthorpe
2021-02-11 15:47             ` Max Gurtovoy
2021-02-11 16:29               ` Matthew Rosato
2021-02-11 17:39                 ` Cornelia Huck
2021-02-02 17:41           ` Max Gurtovoy
2021-02-02 17:54             ` Alex Williamson
2021-02-02 18:50               ` Jason Gunthorpe
2021-02-02 18:55                 ` Christoph Hellwig
2021-02-02 19:05                   ` Jason Gunthorpe
2021-02-02 19:37                 ` Alex Williamson
2021-02-02 20:44                   ` Jason Gunthorpe
2021-02-02 20:59                     ` Max Gurtovoy
2021-02-02 21:30                       ` Alex Williamson
2021-02-02 23:06                         ` Jason Gunthorpe
2021-02-02 23:59                           ` Alex Williamson
2021-02-03 13:54                             ` Jason Gunthorpe
2021-02-11  8:47                               ` Christoph Hellwig
2021-02-11 14:30                                 ` Jason Gunthorpe
2021-02-11  8:44                             ` Christoph Hellwig
2021-02-11 19:43                               ` Alex Williamson
     [not found]             ` <806c138e-685c-0955-7c15-93cb1d4fe0d9@ozlabs.ru>
2021-02-03 16:07               ` Max Gurtovoy
     [not found]                 ` <83ef0164-6291-c3d1-0ce5-2c9d6c97469e@ozlabs.ru>
2021-02-04 12:51                   ` Jason Gunthorpe
2021-02-05  0:42                     ` Alexey Kardashevskiy
2021-02-08 12:44                       ` Max Gurtovoy
2021-02-09  1:55                         ` Alexey Kardashevskiy
2021-02-08 18:13                       ` Jason Gunthorpe
2021-02-09  1:51                         ` Alexey Kardashevskiy
2021-02-04  9:12               ` Max Gurtovoy
2021-02-11  8:50                 ` Christoph Hellwig
2021-02-11 14:49                   ` Jason Gunthorpe
2021-02-01 16:28 ` [PATCH 9/9] vfio/pci: use powernv naming instead of nvlink2 Max Gurtovoy
2021-02-01 18:35   ` Jason Gunthorpe
2021-02-10  7:52 ` [PATCH v2 0/9] Introduce vfio-pci-core subsystem Tian, Kevin
2021-02-10 13:34   ` Jason Gunthorpe
2021-02-10 16:37     ` Alex Williamson
2021-02-10 17:08       ` Jason Gunthorpe
2021-02-11  8:36     ` Christoph Hellwig

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210201162828.5938-5-mgurtovoy@nvidia.com \
    --to=mgurtovoy@nvidia.com \
    --cc=ACurrid@nvidia.com \
    --cc=aik@ozlabs.ru \
    --cc=alex.williamson@redhat.com \
    --cc=artemp@nvidia.com \
    --cc=aviadye@nvidia.com \
    --cc=cjia@nvidia.com \
    --cc=cohuck@redhat.com \
    --cc=gmataev@nvidia.com \
    --cc=jgg@nvidia.com \
    --cc=kvm@vger.kernel.org \
    --cc=kwankhede@nvidia.com \
    --cc=leonro@nvidia.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=liranl@nvidia.com \
    --cc=mjrosato@linux.ibm.com \
    --cc=oren@nvidia.com \
    --cc=shahafs@nvidia.com \
    --cc=tzahio@nvidia.com \
    --cc=yarong@nvidia.com \
    --cc=yishaih@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).