[RFC PATCH v2 1/3] vGPU Core driver

* [RFC PATCH v2 1/3] vGPU Core driver
@ 2016-02-23 16:24 ` Kirti Wankhede
  0 siblings, 0 replies; 38+ messages in thread
From: Kirti Wankhede @ 2016-02-23 16:24 UTC (permalink / raw)
  To: alex.williamson, pbonzini, kraxel
  Cc: qemu-devel, kvm, kevin.tian, shuai.ruan, jike.song, zhiyuan.lv,
	Kirti Wankhede, Neo Jia

Design for vGPU Driver:
Main purpose of vGPU driver is to provide a common interface for vGPU
management that can be used by differnt GPU drivers.

This module would provide a generic interface to create the device, add
it to vGPU bus, add device to IOMMU group and then add it to vfio group.

High Level block diagram:

+--------------+    vgpu_register_driver()+---------------+
|     __init() +------------------------->+               |
|              |                          |               |
|              +<-------------------------+    vgpu.ko    |
| vgpu_vfio.ko |   probe()/remove()       |               |
|              |                +---------+               +---------+
+--------------+                |         +-------+-------+         |
                                |                 ^                 |
                                | callback        |                 |
                                |         +-------+--------+        |
                                |         |vgpu_register_device()   |
                                |         |                |        |
                                +---^-----+-----+    +-----+------+-+
                                    | nvidia.ko |    |  i915.ko   |
                                    |           |    |            |
                                    +-----------+    +------------+

vGPU driver provides two types of registration interfaces:
1. Registration interface for vGPU bus driver:

/**
  * struct vgpu_driver - vGPU device driver
  * @name: driver name
  * @probe: called when new device created
  * @remove: called when device removed
  * @driver: device driver structure
  *
  **/
struct vgpu_driver {
         const char *name;
         int  (*probe)  (struct device *dev);
         void (*remove) (struct device *dev);
         struct device_driver    driver;
};

int  vgpu_register_driver(struct vgpu_driver *drv, struct module *owner);
void vgpu_unregister_driver(struct vgpu_driver *drv);

VFIO bus driver for vgpu, should use this interface to register with
vGPU driver. With this, VFIO bus driver for vGPU devices is responsible
to add vGPU device to VFIO group.

2. GPU driver interface
GPU driver interface provides GPU driver the set APIs to manage GPU driver
related work in their own driver. APIs are to:
- vgpu_supported_config: provide supported configuration list by the GPU.
- vgpu_create: to allocate basic resouces in GPU driver for a vGPU device.
- vgpu_destroy: to free resources in GPU driver during vGPU device destroy.
- vgpu_start: to initiate vGPU initialization process from GPU driver when VM
  boots and before QEMU starts.
- vgpu_shutdown: to teardown vGPU resources during VM teardown.
- read : read emulation callback.
- write: write emulation callback.
- vgpu_set_irqs: send interrupt configuration information that QEMU sets.
- vgpu_bar_info: to provice BAR size and its flags for the vGPU device.
- validate_map_request: to validate remap pfn request.

This registration interface should be used by GPU drivers to register
each physical device to vGPU driver.

Updated this patch with couple of more functions in GPU driver interface
which were discussed during v1 version of this RFC.

Thanks,
Kirti.

Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com>
Signed-off-by: Neo Jia <cjia@nvidia.com>
---
 drivers/Kconfig             |    2 +
 drivers/Makefile            |    1 +
 drivers/vgpu/Kconfig        |   26 +++
 drivers/vgpu/Makefile       |    4 +
 drivers/vgpu/vgpu-core.c    |  422 +++++++++++++++++++++++++++++++++++++++++++
 drivers/vgpu/vgpu-driver.c  |  137 ++++++++++++++
 drivers/vgpu/vgpu-sysfs.c   |  366 +++++++++++++++++++++++++++++++++++++
 drivers/vgpu/vgpu_private.h |   36 ++++
 include/linux/vgpu.h        |  217 ++++++++++++++++++++++
 9 files changed, 1211 insertions(+), 0 deletions(-)
 create mode 100644 drivers/vgpu/Kconfig
 create mode 100644 drivers/vgpu/Makefile
 create mode 100644 drivers/vgpu/vgpu-core.c
 create mode 100644 drivers/vgpu/vgpu-driver.c
 create mode 100644 drivers/vgpu/vgpu-sysfs.c
 create mode 100644 drivers/vgpu/vgpu_private.h
 create mode 100644 include/linux/vgpu.h

diff --git a/drivers/Kconfig b/drivers/Kconfig
index d2ac339..5fd9eae 100644
--- a/drivers/Kconfig
+++ b/drivers/Kconfig
@@ -122,6 +122,8 @@ source "drivers/uio/Kconfig"
 
 source "drivers/vfio/Kconfig"
 
+source "drivers/vgpu/Kconfig"
+
 source "drivers/vlynq/Kconfig"
 
 source "drivers/virt/Kconfig"
diff --git a/drivers/Makefile b/drivers/Makefile
index 795d0ca..1c43250 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -84,6 +84,7 @@ obj-$(CONFIG_FUSION)		+= message/
 obj-y				+= firewire/
 obj-$(CONFIG_UIO)		+= uio/
 obj-$(CONFIG_VFIO)		+= vfio/
+obj-$(CONFIG_VFIO)		+= vgpu/
 obj-y				+= cdrom/
 obj-y				+= auxdisplay/
 obj-$(CONFIG_PCCARD)		+= pcmcia/
diff --git a/drivers/vgpu/Kconfig b/drivers/vgpu/Kconfig
new file mode 100644
index 0000000..698ddf9
--- /dev/null
+++ b/drivers/vgpu/Kconfig
@@ -0,0 +1,26 @@
+
+menuconfig VGPU
+    tristate "VGPU driver framework"
+    depends on VFIO
+    select VGPU_VFIO
+    select VFIO_IOMMU_TYPE1_VGPU
+    help
+        VGPU provides a framework to virtualize GPU without SR-IOV cap
+        See Documentation/vgpu.txt for more details.
+
+        If you don't know what do here, say N.
+
+config VGPU
+    tristate
+    depends on VFIO
+    default n
+
+config VGPU_VFIO
+    tristate
+    depends on VGPU 
+    default n
+
+config VFIO_IOMMU_TYPE1_VGPU
+    tristate
+    depends on VGPU_VFIO
+    default n
diff --git a/drivers/vgpu/Makefile b/drivers/vgpu/Makefile
new file mode 100644
index 0000000..f5be980
--- /dev/null
+++ b/drivers/vgpu/Makefile
@@ -0,0 +1,4 @@
+
+vgpu-y := vgpu-core.o vgpu-sysfs.o vgpu-driver.o
+
+obj-$(CONFIG_VGPU)			+= vgpu.o
diff --git a/drivers/vgpu/vgpu-core.c b/drivers/vgpu/vgpu-core.c
new file mode 100644
index 0000000..7710021
--- /dev/null
+++ b/drivers/vgpu/vgpu-core.c
@@ -0,0 +1,422 @@
+/*
+ * VGPU Core Driver
+ *
+ * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
+ *     Author: Neo Jia <cjia@nvidia.com>
+ *	       Kirti Wankhede <kwankhede@nvidia.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/poll.h>
+#include <linux/slab.h>
+#include <linux/cdev.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+#include <linux/uuid.h>
+#include <linux/vfio.h>
+#include <linux/iommu.h>
+#include <linux/sysfs.h>
+#include <linux/ctype.h>
+#include <linux/vgpu.h>
+
+#include "vgpu_private.h"
+
+#define DRIVER_VERSION	"0.1"
+#define DRIVER_AUTHOR	"NVIDIA Corporation"
+#define DRIVER_DESC	"VGPU Core Driver"
+
+/*
+ * #defines
+ */
+
+#define VGPU_CLASS_NAME		"vgpu"
+
+/*
+ * Global Structures
+ */
+
+static struct vgpu {
+	struct list_head    vgpu_devices_list;
+	struct mutex        vgpu_devices_lock;
+	struct list_head    gpu_devices_list;
+	struct mutex        gpu_devices_lock;
+} vgpu;
+
+static struct class vgpu_class;
+
+/*
+ * Functions
+ */
+
+struct vgpu_device *get_vgpu_device_from_group(struct iommu_group *group)
+{
+	struct vgpu_device *vdev = NULL;
+
+	mutex_lock(&vgpu.vgpu_devices_lock);
+	list_for_each_entry(vdev, &vgpu.vgpu_devices_list, list) {
+		if (vdev->group) {
+			if (iommu_group_id(vdev->group) == iommu_group_id(group)) {
+				mutex_unlock(&vgpu.vgpu_devices_lock);
+				return vdev;
+			}
+		}
+	}
+	mutex_unlock(&vgpu.vgpu_devices_lock);
+	return NULL;
+}
+
+EXPORT_SYMBOL_GPL(get_vgpu_device_from_group);
+
+static int vgpu_add_attribute_group(struct device *dev,
+			            const struct attribute_group **groups)
+{
+        return sysfs_create_groups(&dev->kobj, groups);
+}
+
+static void vgpu_remove_attribute_group(struct device *dev,
+			                const struct attribute_group **groups)
+{
+        sysfs_remove_groups(&dev->kobj, groups);
+}
+
+int vgpu_register_device(struct pci_dev *dev, const struct gpu_device_ops *ops)
+{
+	int ret = 0;
+	struct gpu_device *gpu_dev, *tmp;
+
+	if (!dev)
+		return -EINVAL;
+
+        gpu_dev = kzalloc(sizeof(*gpu_dev), GFP_KERNEL);
+        if (!gpu_dev)
+                return -ENOMEM;
+
+	gpu_dev->dev = dev;
+        gpu_dev->ops = ops;
+
+        mutex_lock(&vgpu.gpu_devices_lock);
+
+        /* Check for duplicates */
+        list_for_each_entry(tmp, &vgpu.gpu_devices_list, gpu_next) {
+                if (tmp->dev == dev) {
+			ret = -EINVAL;
+			goto add_error;
+                }
+        }
+
+	ret = vgpu_create_pci_device_files(dev);
+	if (ret)
+		goto add_error;
+
+	ret = vgpu_add_attribute_group(&dev->dev, ops->dev_attr_groups);
+	if (ret)
+		goto add_group_error;
+
+        list_add(&gpu_dev->gpu_next, &vgpu.gpu_devices_list);
+
+	printk(KERN_INFO "VGPU: Registered dev 0x%x 0x%x, class 0x%x\n",
+			 dev->vendor, dev->device, dev->class);
+        mutex_unlock(&vgpu.gpu_devices_lock);
+
+        return 0;
+
+add_group_error:
+	vgpu_remove_pci_device_files(dev);
+add_error:
+	mutex_unlock(&vgpu.gpu_devices_lock);
+	kfree(gpu_dev);
+	return ret;
+
+}
+EXPORT_SYMBOL(vgpu_register_device);
+
+void vgpu_unregister_device(struct pci_dev *dev)
+{
+        struct gpu_device *gpu_dev;
+
+        mutex_lock(&vgpu.gpu_devices_lock);
+        list_for_each_entry(gpu_dev, &vgpu.gpu_devices_list, gpu_next) {
+		struct vgpu_device *vdev = NULL;
+
+                if (gpu_dev->dev != dev)
+			continue;
+
+		printk(KERN_INFO "VGPU: Unregistered dev 0x%x 0x%x, class 0x%x\n",
+				dev->vendor, dev->device, dev->class);
+
+		list_for_each_entry(vdev, &vgpu.vgpu_devices_list, list) {
+			if (vdev->gpu_dev != gpu_dev)
+				continue;
+			destroy_vgpu_device(vdev);
+		}
+		vgpu_remove_attribute_group(&dev->dev, gpu_dev->ops->dev_attr_groups);
+		vgpu_remove_pci_device_files(dev);
+		list_del(&gpu_dev->gpu_next);
+		mutex_unlock(&vgpu.gpu_devices_lock);
+		kfree(gpu_dev);
+		return;
+        }
+        mutex_unlock(&vgpu.gpu_devices_lock);
+}
+EXPORT_SYMBOL(vgpu_unregister_device);
+
+/*
+ * Helper Functions
+ */
+
+static struct vgpu_device *vgpu_device_alloc(uuid_le uuid, int instance, char *name)
+{
+	struct vgpu_device *vgpu_dev = NULL;
+
+	vgpu_dev = kzalloc(sizeof(*vgpu_dev), GFP_KERNEL);
+	if (!vgpu_dev)
+		return ERR_PTR(-ENOMEM);
+
+	kref_init(&vgpu_dev->kref);
+	memcpy(&vgpu_dev->uuid, &uuid, sizeof(uuid_le));
+	vgpu_dev->vgpu_instance = instance;
+	strcpy(vgpu_dev->dev_name, name);
+
+	mutex_lock(&vgpu.vgpu_devices_lock);
+	list_add(&vgpu_dev->list, &vgpu.vgpu_devices_list);
+	mutex_unlock(&vgpu.vgpu_devices_lock);
+
+	return vgpu_dev;
+}
+
+static void vgpu_device_free(struct vgpu_device *vgpu_dev)
+{
+	if (vgpu_dev) {
+		mutex_lock(&vgpu.vgpu_devices_lock);
+		list_del(&vgpu_dev->list);
+		mutex_unlock(&vgpu.vgpu_devices_lock);
+		kfree(vgpu_dev);
+	}
+	return;
+}
+
+struct vgpu_device *vgpu_drv_get_vgpu_device(uuid_le uuid, int instance)
+{
+	struct vgpu_device *vdev = NULL;
+
+	mutex_lock(&vgpu.vgpu_devices_lock);
+	list_for_each_entry(vdev, &vgpu.vgpu_devices_list, list) {
+		if ((uuid_le_cmp(vdev->uuid, uuid) == 0) &&
+		    (vdev->vgpu_instance == instance)) {
+			mutex_unlock(&vgpu.vgpu_devices_lock);
+			return vdev;
+		}
+	}
+	mutex_unlock(&vgpu.vgpu_devices_lock);
+	return NULL;
+}
+
+static void vgpu_device_release(struct device *dev)
+{
+	struct vgpu_device *vgpu_dev = to_vgpu_device(dev);
+	vgpu_device_free(vgpu_dev);
+}
+
+int create_vgpu_device(struct pci_dev *pdev, uuid_le uuid, uint32_t instance, char *vgpu_params)
+{
+	char name[64];
+	int numChar = 0;
+	int retval = 0;
+	struct vgpu_device *vgpu_dev = NULL;
+	struct gpu_device *gpu_dev;
+
+	printk(KERN_INFO "VGPU: %s: device ", __FUNCTION__);
+
+	numChar = sprintf(name, "%pUb-%d", uuid.b, instance);
+	name[numChar] = '\0';
+
+	vgpu_dev = vgpu_device_alloc(uuid, instance, name);
+	if (IS_ERR(vgpu_dev)) {
+		return PTR_ERR(vgpu_dev);
+	}
+
+	vgpu_dev->dev.parent  = NULL;
+	vgpu_dev->dev.bus     = &vgpu_bus_type;
+	vgpu_dev->dev.release = vgpu_device_release;
+	dev_set_name(&vgpu_dev->dev, "%s", name);
+
+	retval = device_register(&vgpu_dev->dev);
+	if (retval)
+		goto create_failed1;
+
+	printk(KERN_INFO "UUID %pUb \n", vgpu_dev->uuid.b);
+
+	mutex_lock(&vgpu.gpu_devices_lock);
+	list_for_each_entry(gpu_dev, &vgpu.gpu_devices_list, gpu_next) {
+		if (gpu_dev->dev != pdev)
+			continue;
+
+		vgpu_dev->gpu_dev = gpu_dev;
+		if (gpu_dev->ops->vgpu_create) {
+			retval = gpu_dev->ops->vgpu_create(pdev, vgpu_dev->uuid,
+							   instance, vgpu_params);
+			if (retval) {
+				mutex_unlock(&vgpu.gpu_devices_lock);
+				goto create_failed2;
+			}
+		}
+		break;
+	}
+	if (!vgpu_dev->gpu_dev) {
+		retval = -EINVAL;
+		mutex_unlock(&vgpu.gpu_devices_lock);
+		goto create_failed2;
+	}
+
+	mutex_unlock(&vgpu.gpu_devices_lock);
+
+	retval = vgpu_add_attribute_group(&vgpu_dev->dev, gpu_dev->ops->vgpu_attr_groups);
+	if (retval)
+		goto create_attr_error;
+
+	return retval;
+
+create_attr_error:
+	if (gpu_dev->ops->vgpu_destroy) {
+		int ret = 0;
+		ret = gpu_dev->ops->vgpu_destroy(gpu_dev->dev,
+						 vgpu_dev->uuid,
+						 vgpu_dev->vgpu_instance);
+	}
+
+create_failed2:
+	device_unregister(&vgpu_dev->dev);
+
+create_failed1:
+	vgpu_device_free(vgpu_dev);
+
+	return retval;
+}
+
+void destroy_vgpu_device(struct vgpu_device *vgpu_dev)
+{
+	struct gpu_device *gpu_dev = vgpu_dev->gpu_dev;
+
+	printk(KERN_INFO "VGPU: destroying device %s ", vgpu_dev->dev_name);
+	if (gpu_dev->ops->vgpu_destroy) {
+		int retval = 0;
+		retval = gpu_dev->ops->vgpu_destroy(gpu_dev->dev,
+						    vgpu_dev->uuid,
+						    vgpu_dev->vgpu_instance);
+	/* if vendor driver doesn't return success that means vendor driver doesn't
+	 * support hot-unplug */
+		if (retval)
+			return;
+	}
+
+	vgpu_remove_attribute_group(&vgpu_dev->dev, gpu_dev->ops->vgpu_attr_groups);
+	device_unregister(&vgpu_dev->dev);
+}
+
+void get_vgpu_supported_types(struct device *dev, char *str)
+{
+	struct gpu_device *gpu_dev;
+
+	mutex_lock(&vgpu.gpu_devices_lock);
+	list_for_each_entry(gpu_dev, &vgpu.gpu_devices_list, gpu_next) {
+		if (&gpu_dev->dev->dev == dev) {
+			if (gpu_dev->ops->vgpu_supported_config)
+				gpu_dev->ops->vgpu_supported_config(gpu_dev->dev, str);
+			break;
+		}
+	}
+	mutex_unlock(&vgpu.gpu_devices_lock);
+}
+
+int vgpu_start_callback(struct vgpu_device *vgpu_dev)
+{
+	int ret = 0;
+	struct gpu_device *gpu_dev = vgpu_dev->gpu_dev;
+
+	mutex_lock(&vgpu.gpu_devices_lock);
+	if (gpu_dev->ops->vgpu_start)
+		ret = gpu_dev->ops->vgpu_start(vgpu_dev->uuid);
+	mutex_unlock(&vgpu.gpu_devices_lock);
+	return ret;
+}
+
+int vgpu_shutdown_callback(struct vgpu_device *vgpu_dev)
+{
+	int ret = 0;
+	struct gpu_device *gpu_dev = vgpu_dev->gpu_dev;
+
+	mutex_lock(&vgpu.gpu_devices_lock);
+	if (gpu_dev->ops->vgpu_shutdown)
+		ret = gpu_dev->ops->vgpu_shutdown(vgpu_dev->uuid);
+	mutex_unlock(&vgpu.gpu_devices_lock);
+	return ret;
+}
+
+char *vgpu_devnode(struct device *dev, umode_t *mode)
+{
+	return kasprintf(GFP_KERNEL, "vgpu/%s", dev_name(dev));
+}
+
+static void release_vgpubus_dev(struct device *dev)
+{
+	struct vgpu_device *vgpu_dev = to_vgpu_device(dev);
+	destroy_vgpu_device(vgpu_dev);
+}
+
+static struct class vgpu_class = {
+	.name		= VGPU_CLASS_NAME,
+	.owner		= THIS_MODULE,
+	.class_attrs	= vgpu_class_attrs,
+	.dev_groups	= vgpu_dev_groups,
+	.devnode	= vgpu_devnode,
+	.dev_release    = release_vgpubus_dev,
+};
+
+static int __init vgpu_init(void)
+{
+	int rc = 0;
+
+	memset(&vgpu, 0 , sizeof(vgpu));
+
+	mutex_init(&vgpu.vgpu_devices_lock);
+	INIT_LIST_HEAD(&vgpu.vgpu_devices_list);
+	mutex_init(&vgpu.gpu_devices_lock);
+	INIT_LIST_HEAD(&vgpu.gpu_devices_list);
+
+	rc = class_register(&vgpu_class);
+	if (rc < 0) {
+		printk(KERN_ERR "Error: failed to register vgpu class\n");
+		goto failed1;
+	}
+
+	rc = vgpu_bus_register();
+	if (rc < 0) {
+		printk(KERN_ERR "Error: failed to register vgpu bus\n");
+		class_unregister(&vgpu_class);
+	}
+
+failed1:
+	return rc;
+}
+
+static void __exit vgpu_exit(void)
+{
+	vgpu_bus_unregister();
+	class_unregister(&vgpu_class);
+}
+
+module_init(vgpu_init)
+module_exit(vgpu_exit)
+
+MODULE_VERSION(DRIVER_VERSION);
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR(DRIVER_AUTHOR);
+MODULE_DESCRIPTION(DRIVER_DESC);
diff --git a/drivers/vgpu/vgpu-driver.c b/drivers/vgpu/vgpu-driver.c
new file mode 100644
index 0000000..6b62f19
--- /dev/null
+++ b/drivers/vgpu/vgpu-driver.c
@@ -0,0 +1,137 @@
+/*
+ * VGPU driver
+ *
+ * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
+ *     Author: Neo Jia <cjia@nvidia.com>
+ *	       Kirti Wankhede <kwankhede@nvidia.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/vfio.h>
+#include <linux/iommu.h>
+#include <linux/sysfs.h>
+#include <linux/ctype.h>
+#include <linux/vgpu.h>
+
+#include "vgpu_private.h"
+
+static int vgpu_device_attach_iommu(struct vgpu_device *vgpu_dev)
+{
+        int retval = 0;
+        struct iommu_group *group = NULL;
+
+        group = iommu_group_alloc();
+        if (IS_ERR(group)) {
+                printk(KERN_ERR "VGPU: failed to allocate group!\n");
+                return PTR_ERR(group);
+        }
+
+        retval = iommu_group_add_device(group, &vgpu_dev->dev);
+        if (retval) {
+                printk(KERN_ERR "VGPU: failed to add dev to group!\n");
+                iommu_group_put(group);
+                return retval;
+        }
+
+        vgpu_dev->group = group;
+
+        printk(KERN_INFO "VGPU: group_id = %d \n", iommu_group_id(group));
+        return retval;
+}
+
+static void vgpu_device_detach_iommu(struct vgpu_device *vgpu_dev)
+{
+        iommu_group_put(vgpu_dev->dev.iommu_group);
+        iommu_group_remove_device(&vgpu_dev->dev);
+        printk(KERN_INFO "VGPU: detaching iommu \n");
+}
+
+static int vgpu_device_probe(struct device *dev)
+{
+	struct vgpu_driver *drv = to_vgpu_driver(dev->driver);
+	struct vgpu_device *vgpu_dev = to_vgpu_device(dev);
+	int status = 0;
+
+	status = vgpu_device_attach_iommu(vgpu_dev);
+	if (status) {
+		printk(KERN_ERR "Failed to attach IOMMU\n");
+		return status;
+	}
+
+	if (drv && drv->probe) {
+		status = drv->probe(dev);
+	}
+
+	return status;
+}
+
+static int vgpu_device_remove(struct device *dev)
+{
+	struct vgpu_driver *drv = to_vgpu_driver(dev->driver);
+	struct vgpu_device *vgpu_dev = to_vgpu_device(dev);
+	int status = 0;
+
+	if (drv && drv->remove) {
+		drv->remove(dev);
+	}
+
+	vgpu_device_detach_iommu(vgpu_dev);
+
+	return status;
+}
+
+struct bus_type vgpu_bus_type = {
+	.name		= "vgpu",
+	.probe		= vgpu_device_probe,
+	.remove		= vgpu_device_remove,
+};
+EXPORT_SYMBOL_GPL(vgpu_bus_type);
+
+/**
+ * vgpu_register_driver - register a new vGPU driver
+ * @drv: the driver to register
+ * @owner: owner module of driver ro register
+ *
+ * Returns a negative value on error, otherwise 0.
+ */
+int vgpu_register_driver(struct vgpu_driver *drv, struct module *owner)
+{
+	/* initialize common driver fields */
+	drv->driver.name = drv->name;
+	drv->driver.bus = &vgpu_bus_type;
+	drv->driver.owner = owner;
+
+	/* register with core */
+	return driver_register(&drv->driver);
+}
+EXPORT_SYMBOL(vgpu_register_driver);
+
+/**
+ * vgpu_unregister_driver - unregister vGPU driver
+ * @drv: the driver to unregister
+ *
+ */
+void vgpu_unregister_driver(struct vgpu_driver *drv)
+{
+	driver_unregister(&drv->driver);
+}
+EXPORT_SYMBOL(vgpu_unregister_driver);
+
+int vgpu_bus_register(void)
+{
+	return bus_register(&vgpu_bus_type);
+}
+
+void vgpu_bus_unregister(void)
+{
+	bus_unregister(&vgpu_bus_type);
+}
+
diff --git a/drivers/vgpu/vgpu-sysfs.c b/drivers/vgpu/vgpu-sysfs.c
new file mode 100644
index 0000000..a1b321b
--- /dev/null
+++ b/drivers/vgpu/vgpu-sysfs.c
@@ -0,0 +1,366 @@
+/*
+ * File attributes for vGPU devices
+ *
+ * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
+ *     Author: Neo Jia <cjia@nvidia.com>
+ *	       Kirti Wankhede <kwankhede@nvidia.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/fs.h>
+#include <linux/sysfs.h>
+#include <linux/ctype.h>
+#include <linux/uuid.h>
+#include <linux/vfio.h>
+#include <linux/vgpu.h>
+
+#include "vgpu_private.h"
+
+/* Prototypes */
+
+static ssize_t vgpu_supported_types_show(struct device *dev,
+					 struct device_attribute *attr,
+					 char *buf);
+static DEVICE_ATTR_RO(vgpu_supported_types);
+
+static ssize_t vgpu_create_store(struct device *dev,
+				 struct device_attribute *attr,
+				 const char *buf, size_t count);
+static DEVICE_ATTR_WO(vgpu_create);
+
+static ssize_t vgpu_destroy_store(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t count);
+static DEVICE_ATTR_WO(vgpu_destroy);
+
+
+/* Static functions */
+
+static bool is_uuid_sep(char sep)
+{
+	if (sep == '\n' || sep == '-' || sep == ':' || sep == '\0')
+		return true;
+	return false;
+}
+
+
+static int uuid_parse(const char *str, uuid_le *uuid)
+{
+	int i;
+
+	if (strlen(str) < 36)
+		return -1;
+
+	for (i = 0; i < 16; i++) {
+		if (!isxdigit(str[0]) || !isxdigit(str[1])) {
+			printk(KERN_ERR "%s err", __FUNCTION__);
+			return -EINVAL;
+		}
+
+		uuid->b[i] = (hex_to_bin(str[0]) << 4) | hex_to_bin(str[1]);
+		str += 2;
+		if (is_uuid_sep(*str))
+			str++;
+	}
+
+	return 0;
+}
+
+
+/* Functions */
+static ssize_t vgpu_supported_types_show(struct device *dev,
+					 struct device_attribute *attr,
+					 char *buf)
+{
+	char *str;
+	ssize_t n;
+
+        str = kzalloc(sizeof(*str) * 512, GFP_KERNEL);
+        if (!str)
+                return -ENOMEM;
+
+	get_vgpu_supported_types(dev, str);
+
+	n = sprintf(buf,"%s\n", str);
+	kfree(str);
+
+	return n;
+}
+
+static ssize_t vgpu_create_store(struct device *dev,
+				 struct device_attribute *attr,
+				 const char *buf, size_t count)
+{
+	char *str, *pstr;
+	char *uuid_str, *instance_str, *vgpu_params = NULL;
+	uuid_le uuid;
+	uint32_t instance;
+	struct pci_dev *pdev;
+	int ret = 0;
+
+	pstr = str = kstrndup(buf, count, GFP_KERNEL);
+
+	if (!str)
+		return -ENOMEM;
+
+	if ((uuid_str = strsep(&str, ":")) == NULL) {
+		printk(KERN_ERR "%s Empty UUID or string %s \n",
+				 __FUNCTION__, buf);
+		ret = -EINVAL;
+		goto create_error;
+	}
+
+	if (!str) {
+		printk(KERN_ERR "%s vgpu instance not specified %s \n",
+				 __FUNCTION__, buf);
+		ret = -EINVAL;
+		goto create_error;
+	}
+
+	if ((instance_str = strsep(&str, ":")) == NULL) {
+		printk(KERN_ERR "%s Empty instance or string %s \n",
+				 __FUNCTION__, buf);
+		ret = -EINVAL;
+		goto create_error;
+	}
+
+	instance = (unsigned int)simple_strtoul(instance_str, NULL, 0);
+
+	if (!str) {
+		printk(KERN_ERR "%s vgpu params not specified %s \n",
+				 __FUNCTION__, buf);
+		ret = -EINVAL;
+		goto create_error;
+	}
+
+	vgpu_params = kstrdup(str, GFP_KERNEL);
+
+	if (!vgpu_params) {
+		printk(KERN_ERR "%s vgpu params allocation failed \n",
+				 __FUNCTION__);
+		ret = -EINVAL;
+		goto create_error;
+	}
+
+	if (uuid_parse(uuid_str, &uuid) < 0) {
+		printk(KERN_ERR "%s UUID parse error  %s \n", __FUNCTION__, buf);
+		ret = -EINVAL;
+		goto create_error;
+	}
+
+	if (dev_is_pci(dev)) {
+		pdev = to_pci_dev(dev);
+
+		if (create_vgpu_device(pdev, uuid, instance, vgpu_params) < 0) {
+			printk(KERN_ERR "%s vgpu create error \n", __FUNCTION__);
+			ret = -EINVAL;
+			goto create_error;
+		}
+		ret = count;
+	}
+
+create_error:
+	if (vgpu_params)
+		kfree(vgpu_params);
+
+	if (pstr)
+		kfree(pstr);
+	return ret;
+}
+
+static ssize_t vgpu_destroy_store(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t count)
+{
+	char *uuid_str, *str;
+	uuid_le uuid;
+	unsigned int instance;
+	struct vgpu_device *vgpu_dev = NULL;
+
+	str = kstrndup(buf, count, GFP_KERNEL);
+
+	if (!str)
+		return -ENOMEM;
+
+	if ((uuid_str = strsep(&str, ":")) == NULL) {
+		printk(KERN_ERR "%s Empty UUID or string %s \n", __FUNCTION__, buf);
+		return -EINVAL;
+	}
+
+	if (str == NULL) {
+		printk(KERN_ERR "%s instance not specified %s \n", __FUNCTION__, buf);
+		return -EINVAL;
+	}
+
+	instance = (unsigned int)simple_strtoul(str, NULL, 0);
+
+	if (uuid_parse(uuid_str, &uuid) < 0) {
+		printk(KERN_ERR "%s UUID parse error  %s \n", __FUNCTION__, buf);
+		return -EINVAL;
+	}
+
+	printk(KERN_INFO "%s UUID %pUb - %d \n", __FUNCTION__, uuid.b, instance);
+
+	vgpu_dev = vgpu_drv_get_vgpu_device(uuid, instance);
+
+	if (vgpu_dev)
+		destroy_vgpu_device(vgpu_dev);
+
+	return count;
+}
+
+static ssize_t
+vgpu_uuid_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct vgpu_device *drv = to_vgpu_device(dev);
+
+	if (drv)
+		return sprintf(buf, "%pUb \n", drv->uuid.b);
+
+	return sprintf(buf, " \n");
+}
+
+static DEVICE_ATTR_RO(vgpu_uuid);
+
+static ssize_t
+vgpu_group_id_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct vgpu_device *drv = to_vgpu_device(dev);
+
+	if (drv && drv->group)
+		return sprintf(buf, "%d \n", iommu_group_id(drv->group));
+
+	return sprintf(buf, " \n");
+}
+
+static DEVICE_ATTR_RO(vgpu_group_id);
+
+
+static struct attribute *vgpu_dev_attrs[] = {
+	&dev_attr_vgpu_uuid.attr,
+	&dev_attr_vgpu_group_id.attr,
+	NULL,
+};
+
+static const struct attribute_group vgpu_dev_group = {
+	.attrs = vgpu_dev_attrs,
+};
+
+const struct attribute_group *vgpu_dev_groups[] = {
+	&vgpu_dev_group,
+	NULL,
+};
+
+
+ssize_t vgpu_start_store(struct class *class, struct class_attribute *attr,
+			 const char *buf, size_t count)
+{
+	char *uuid_str;
+	uuid_le uuid;
+	struct vgpu_device *vgpu_dev = NULL;
+	int ret;
+
+	uuid_str = kstrndup(buf, count, GFP_KERNEL);
+
+	if (!uuid_str)
+		return -ENOMEM;
+
+	if (uuid_parse(uuid_str, &uuid) < 0) {
+		printk(KERN_ERR "%s UUID parse error  %s \n", __FUNCTION__, buf);
+		return -EINVAL;
+	}
+
+	vgpu_dev = vgpu_drv_get_vgpu_device(uuid, 0);
+
+	if (vgpu_dev && dev_is_vgpu(&vgpu_dev->dev)) {
+		kobject_uevent(&vgpu_dev->dev.kobj, KOBJ_ONLINE);
+
+		ret = vgpu_start_callback(vgpu_dev);
+		if (ret < 0) {
+			printk(KERN_ERR "%s vgpu_start callback failed  %d \n",
+					 __FUNCTION__, ret);
+			return ret;
+		}
+	}
+
+	return count;
+}
+
+ssize_t vgpu_shutdown_store(struct class *class, struct class_attribute *attr,
+			    const char *buf, size_t count)
+{
+	char *uuid_str;
+	uuid_le uuid;
+	struct vgpu_device *vgpu_dev = NULL;
+	int ret;
+
+	uuid_str = kstrndup(buf, count, GFP_KERNEL);
+
+	if (!uuid_str)
+		return -ENOMEM;
+
+	if (uuid_parse(uuid_str, &uuid) < 0) {
+		printk(KERN_ERR "%s UUID parse error  %s \n", __FUNCTION__, buf);
+		return -EINVAL;
+	}
+	vgpu_dev = vgpu_drv_get_vgpu_device(uuid, 0);
+
+	if (vgpu_dev && dev_is_vgpu(&vgpu_dev->dev)) {
+		kobject_uevent(&vgpu_dev->dev.kobj, KOBJ_OFFLINE);
+
+		ret = vgpu_shutdown_callback(vgpu_dev);
+		if (ret < 0) {
+			printk(KERN_ERR "%s vgpu_shutdown callback failed  %d \n",
+					 __FUNCTION__, ret);
+			return ret;
+		}
+	}
+
+	return count;
+}
+
+struct class_attribute vgpu_class_attrs[] = {
+	__ATTR_WO(vgpu_start),
+	__ATTR_WO(vgpu_shutdown),
+	__ATTR_NULL
+};
+
+int vgpu_create_pci_device_files(struct pci_dev *dev)
+{
+	int retval;
+
+	retval = sysfs_create_file(&dev->dev.kobj,
+				   &dev_attr_vgpu_supported_types.attr);
+	if (retval) {
+		printk(KERN_ERR "VGPU-VFIO: failed to create vgpu_supported_types sysfs entry\n");
+		return retval;
+	}
+
+	retval = sysfs_create_file(&dev->dev.kobj, &dev_attr_vgpu_create.attr);
+	if (retval) {
+		printk(KERN_ERR "VGPU-VFIO: failed to create vgpu_create sysfs entry\n");
+		return retval;
+	}
+
+	retval = sysfs_create_file(&dev->dev.kobj, &dev_attr_vgpu_destroy.attr);
+	if (retval) {
+		printk(KERN_ERR "VGPU-VFIO: failed to create vgpu_destroy sysfs entry\n");
+		return retval;
+	}
+
+	return 0;
+}
+
+
+void vgpu_remove_pci_device_files(struct pci_dev *dev)
+{
+	sysfs_remove_file(&dev->dev.kobj, &dev_attr_vgpu_supported_types.attr);
+	sysfs_remove_file(&dev->dev.kobj, &dev_attr_vgpu_create.attr);
+	sysfs_remove_file(&dev->dev.kobj, &dev_attr_vgpu_destroy.attr);
+}
+
diff --git a/drivers/vgpu/vgpu_private.h b/drivers/vgpu/vgpu_private.h
new file mode 100644
index 0000000..35158ef
--- /dev/null
+++ b/drivers/vgpu/vgpu_private.h
@@ -0,0 +1,36 @@
+/*
+ * VGPU interal definition
+ *
+ * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
+ *     Author:
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef VGPU_PRIVATE_H
+#define VGPU_PRIVATE_H
+
+struct vgpu_device *vgpu_drv_get_vgpu_device(uuid_le uuid, int instance);
+
+int  create_vgpu_device(struct pci_dev *pdev, uuid_le uuid, uint32_t instance,
+		       char *vgpu_params);
+void destroy_vgpu_device(struct vgpu_device *vgpu_dev);
+
+int  vgpu_bus_register(void);
+void vgpu_bus_unregister(void);
+
+/* Function prototypes for vgpu_sysfs */
+
+extern struct class_attribute vgpu_class_attrs[];
+extern const struct attribute_group *vgpu_dev_groups[];
+
+int  vgpu_create_pci_device_files(struct pci_dev *dev);
+void vgpu_remove_pci_device_files(struct pci_dev *dev);
+
+void get_vgpu_supported_types(struct device *dev, char *str);
+int  vgpu_start_callback(struct vgpu_device *vgpu_dev);
+int  vgpu_shutdown_callback(struct vgpu_device *vgpu_dev);
+
+#endif /* VGPU_PRIVATE_H */
diff --git a/include/linux/vgpu.h b/include/linux/vgpu.h
new file mode 100644
index 0000000..7e1cb4e
--- /dev/null
+++ b/include/linux/vgpu.h
@@ -0,0 +1,217 @@
+/*
+ * VGPU definition
+ *
+ * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
+ *     Author:
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef VGPU_H
+#define VGPU_H
+
+// Common Data structures
+
+struct pci_bar_info {
+	uint64_t start;
+	uint64_t size;
+	uint32_t flags;
+};
+
+enum vgpu_emul_space_e {
+	vgpu_emul_space_config = 0, /*!< PCI configuration space */
+	vgpu_emul_space_io = 1,     /*!< I/O register space */
+	vgpu_emul_space_mmio = 2    /*!< Memory-mapped I/O space */
+};
+
+struct gpu_device;
+
+/*
+ * VGPU device
+ */
+struct vgpu_device {
+	struct kref		kref;
+	struct device		dev;
+	struct gpu_device	*gpu_dev;
+	struct iommu_group	*group;
+#define DEVICE_NAME_LEN		(64)
+	char			dev_name[DEVICE_NAME_LEN];
+	uuid_le			uuid;
+	uint32_t		vgpu_instance;
+	struct device_attribute	*dev_attr_vgpu_status;
+	int			vgpu_device_status;
+
+	void			*driver_data;
+
+	struct list_head	list;
+};
+
+
+/**
+ * struct gpu_device_ops - Structure to be registered for each physical GPU to
+ * register the device to vgpu module.
+ *
+ * @owner:			The module owner.
+ * @dev_attr_groups:		Default attributes of the physical device.
+ * @vgpu_attr_groups:		Default attributes of the vGPU device.
+ * @vgpu_supported_config:	Called to get information about supported vgpu types.
+ *				@dev : pci device structure of physical GPU.
+ *				@config: should return string listing supported config
+ *				Returns integer: success (0) or error (< 0)
+ * @vgpu_create:		Called to allocate basic resouces in graphics
+ *				driver for a particular vgpu.
+ *				@dev: physical pci device structure on which vgpu
+ *				      should be created
+ *				@uuid: VM's uuid for which VM it is intended to
+ *				@instance: vgpu instance in that VM
+ *				@vgpu_params: extra parameters required by GPU driver.
+ *				Returns integer: success (0) or error (< 0)
+ * @vgpu_destroy:		Called to free resources in graphics driver for
+ *				a vgpu instance of that VM.
+ *				@dev: physical pci device structure to which
+ *				this vgpu points to.
+ *				@uuid: VM's uuid for which the vgpu belongs to.
+ *				@instance: vgpu instance in that VM
+ *				Returns integer: success (0) or error (< 0)
+ *				If VM is running and vgpu_destroy is called that
+ *				means the vGPU is being hotunpluged. Return error
+ *				if VM is running and graphics driver doesn't
+ *				support vgpu hotplug.
+ * @vgpu_start:			Called to do initiate vGPU initialization
+ *				process in graphics driver when VM boots before
+ *				qemu starts.
+ *				@uuid: VM's UUID which is booting.
+ *				Returns integer: success (0) or error (< 0)
+ * @vgpu_shutdown:		Called to teardown vGPU related resources for
+ *				the VM
+ *				@uuid: VM's UUID which is shutting down .
+ *				Returns integer: success (0) or error (< 0)
+ * @read:			Read emulation callback
+ *				@vdev: vgpu device structure
+ *				@buf: read buffer
+ *				@count: number bytes to read
+ *				@address_space: specifies for which address space
+ *				the request is: pci_config_space, IO register
+ *				space or MMIO space.
+ *				@pos: offset from base address.
+ *				Retuns number on bytes read on success or error.
+ * @write:			Write emulation callback
+ *				@vdev: vgpu device structure
+ *				@buf: write buffer
+ *				@count: number bytes to be written
+ *				@address_space: specifies for which address space
+ *				the request is: pci_config_space, IO register
+ *				space or MMIO space.
+ *				@pos: offset from base address.
+ *				Retuns number on bytes written on success or error.
+ * @vgpu_set_irqs:		Called to send about interrupts configuration
+ *				information that qemu set.
+ *				@vdev: vgpu device structure
+ *				@flags, index, start, count and *data : same as
+ *				that of struct vfio_irq_set of
+ *				VFIO_DEVICE_SET_IRQS API.
+ * @vgpu_bar_info:		Called to get BAR size and flags of vGPU device.
+ *				@vdev: vgpu device structure
+ *				@bar_index: BAR index
+ *				@bar_info: output, returns size and flags of
+ *				requested BAR
+ *				Returns integer: success (0) or error (< 0)
+ * @validate_map_request:	Validate remap pfn request
+ *				@vdev: vgpu device structure
+ *				@virtaddr: target user address to start at
+ *				@pfn: physical address of kernel memory, GPU
+ *				driver can change if required.
+ *				@size: size of map area, GPU driver can change
+ *				the size of map area if desired.
+ *				@prot: page protection flags for this mapping,
+ *				GPU driver can change, if required.
+ *				Returns integer: success (0) or error (< 0)
+ *
+ * Physical GPU that support vGPU should be register with vgpu module with
+ * gpu_device_ops structure.
+ */
+
+struct gpu_device_ops {
+	struct module   *owner;
+	const struct attribute_group **dev_attr_groups;
+	const struct attribute_group **vgpu_attr_groups;
+
+	int	(*vgpu_supported_config)(struct pci_dev *dev, char *config);
+	int     (*vgpu_create)(struct pci_dev *dev, uuid_le uuid,
+			       uint32_t instance, char *vgpu_params);
+	int     (*vgpu_destroy)(struct pci_dev *dev, uuid_le uuid,
+			        uint32_t instance);
+
+	int     (*vgpu_start)(uuid_le uuid);
+	int     (*vgpu_shutdown)(uuid_le uuid);
+
+	ssize_t (*read) (struct vgpu_device *vdev, char *buf, size_t count,
+			 uint32_t address_space, loff_t pos);
+	ssize_t (*write)(struct vgpu_device *vdev, char *buf, size_t count,
+			 uint32_t address_space, loff_t pos);
+	int     (*vgpu_set_irqs)(struct vgpu_device *vdev, uint32_t flags,
+				 unsigned index, unsigned start, unsigned count,
+				 void *data);
+	int	(*vgpu_bar_info)(struct vgpu_device *vdev, int bar_index,
+				 struct pci_bar_info *bar_info);
+	int	(*validate_map_request)(struct vgpu_device *vdev,
+					unsigned long virtaddr,
+					unsigned long *pfn, unsigned long *size,
+					pgprot_t *prot);
+};
+
+/*
+ * Physical GPU
+ */
+struct gpu_device {
+	struct pci_dev                  *dev;
+	const struct gpu_device_ops     *ops;
+	struct list_head                gpu_next;
+};
+
+/**
+ * struct vgpu_driver - vGPU device driver
+ * @name: driver name
+ * @probe: called when new device created
+ * @remove: called when device removed
+ * @driver: device driver structure
+ *
+ **/
+struct vgpu_driver {
+	const char *name;
+	int  (*probe)  (struct device *dev);
+	void (*remove) (struct device *dev);
+	struct device_driver	driver;
+};
+
+static inline struct vgpu_driver *to_vgpu_driver(struct device_driver *drv)
+{
+	return drv ? container_of(drv, struct vgpu_driver, driver) : NULL;
+}
+
+static inline struct vgpu_device *to_vgpu_device(struct device *dev)
+{
+	return dev ? container_of(dev, struct vgpu_device, dev) : NULL;
+}
+
+extern struct bus_type vgpu_bus_type;
+
+#define dev_is_vgpu(d) ((d)->bus == &vgpu_bus_type)
+
+extern int  vgpu_register_device(struct pci_dev *dev,
+				 const struct gpu_device_ops *ops);
+extern void vgpu_unregister_device(struct pci_dev *dev);
+
+extern int  vgpu_register_driver(struct vgpu_driver *drv, struct module *owner);
+extern void vgpu_unregister_driver(struct vgpu_driver *drv);
+
+extern int vgpu_map_virtual_bar(uint64_t virt_bar_addr, uint64_t phys_bar_addr,
+				uint32_t len, uint32_t flags);
+extern int vgpu_dma_do_translate(dma_addr_t * gfn_buffer, uint32_t count);
+
+struct vgpu_device *get_vgpu_device_from_group(struct iommu_group *group);
+
+#endif /* VGPU_H */
+
-- 
1.7.1


^ permalink raw reply related	[flat|nested] 38+ messages in thread