All of lore.kernel.org
 help / color / mirror / Atom feed
From: Alex Williamson <alex.williamson@redhat.com>
To: chrisw@sous-sol.org, aik@au1.ibm.com, pmac@au1.ibm.com,
	dwg@au1.ibm.com, joerg.roedel@amd.com, agraf@suse.de,
	benve@cisco.com, aafabbri@cisco.com, B08248@freescale.com,
	B07421@freescale.com, avi@redhat.com, kvm@vger.kernel.org,
	qemu-devel@nongnu.org, iommu@lists.linux-foundation.org,
	linux-pci@vger.kernel.org
Cc: alex.williamson@redhat.com
Subject: [Qemu-devel] [RFC PATCH 3/5] VFIO: Base framework for new VFIO driver
Date: Thu, 01 Sep 2011 13:50:43 -0600	[thread overview]
Message-ID: <20110901195043.2391.31843.stgit@s20.home> (raw)
In-Reply-To: <20110901194915.2391.97400.stgit@s20.home>

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---

 drivers/Kconfig             |    2 
 drivers/Makefile            |    1 
 drivers/vfio/Kconfig        |    5 
 drivers/vfio/Makefile       |    3 
 drivers/vfio/vfio_device.c  |  109 +++++
 drivers/vfio/vfio_iommu.c   |   81 ++++
 drivers/vfio/vfio_main.c    |  879 +++++++++++++++++++++++++++++++++++++++++++
 drivers/vfio/vfio_private.h |   82 ++++
 8 files changed, 1162 insertions(+), 0 deletions(-)
 create mode 100644 drivers/vfio/Kconfig
 create mode 100644 drivers/vfio/Makefile
 create mode 100644 drivers/vfio/vfio_device.c
 create mode 100644 drivers/vfio/vfio_iommu.c
 create mode 100644 drivers/vfio/vfio_main.c
 create mode 100644 drivers/vfio/vfio_private.h

diff --git a/drivers/Kconfig b/drivers/Kconfig
index 3bb154d..5b5fffc 100644
--- a/drivers/Kconfig
+++ b/drivers/Kconfig
@@ -112,6 +112,8 @@ source "drivers/auxdisplay/Kconfig"
 
 source "drivers/uio/Kconfig"
 
+source "drivers/vfio/Kconfig"
+
 source "drivers/vlynq/Kconfig"
 
 source "drivers/xen/Kconfig"
diff --git a/drivers/Makefile b/drivers/Makefile
index 09f3232..6b17848 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -56,6 +56,7 @@ obj-$(CONFIG_ATM)		+= atm/
 obj-$(CONFIG_FUSION)		+= message/
 obj-y				+= firewire/
 obj-$(CONFIG_UIO)		+= uio/
+obj-$(CONFIG_VFIO)		+= vfio/
 obj-y				+= cdrom/
 obj-y				+= auxdisplay/
 obj-$(CONFIG_PCCARD)		+= pcmcia/
diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig
new file mode 100644
index 0000000..a150521
--- /dev/null
+++ b/drivers/vfio/Kconfig
@@ -0,0 +1,5 @@
+menuconfig VFIO
+	tristate "Non-Privileged User Space driver"
+	depends on IOMMU_API
+	help
+	  If you don't know what to do here, say N.
diff --git a/drivers/vfio/Makefile b/drivers/vfio/Makefile
new file mode 100644
index 0000000..5eaa074
--- /dev/null
+++ b/drivers/vfio/Makefile
@@ -0,0 +1,3 @@
+obj-$(CONFIG_VFIO) := vfio.o
+
+vfio-y := vfio_main.o vfio_iommu.o vfio_device.o
diff --git a/drivers/vfio/vfio_device.c b/drivers/vfio/vfio_device.c
new file mode 100644
index 0000000..101cbbf
--- /dev/null
+++ b/drivers/vfio/vfio_device.c
@@ -0,0 +1,109 @@
+/*
+ * Copyright (C) 2011 Red Hat, Inc.  All rights reserved.
+ *     Author: Alex Williamson <alex.williamson@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Derived from original vfio:
+ * Copyright 2010 Cisco Systems, Inc.  All rights reserved.
+ * Author: Tom Lyon, pugs@cisco.com
+ */
+
+/*
+ * VFIO device module: Common device handling and callouts to other drivers
+ */
+
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/interrupt.h>
+#include <linux/fs.h>
+#include <linux/eventfd.h>
+#include <linux/uaccess.h>
+#include <linux/compat.h>
+#include <linux/vfio.h>
+
+#include "vfio_private.h"
+
+static int vfio_device_release(struct inode *inode, struct file *filep)
+{
+	struct vfio_device *vdev = filep->private_data;
+
+	mutex_lock(&vdev->vfio->group_lock);
+	vdev->refcnt--;
+	vdev->iommu->refcnt--;
+	mutex_unlock(&vdev->vfio->group_lock);
+
+	return 0;
+}
+
+static long vfio_device_unl_ioctl(struct file *filep,
+				  unsigned int cmd, unsigned long arg)
+{
+	struct vfio_device *vdev = filep->private_data;
+	int ret = -EINVAL;
+
+	switch (cmd) {
+	// TBD - what can we handle as common device ioctls?
+	default:
+		if (vdev->ops->fops.unlocked_ioctl)
+			ret = vdev->ops->fops.unlocked_ioctl(filep, cmd, arg);
+	}
+	return ret;
+}
+
+static ssize_t vfio_device_read(struct file *filep, char __user *buf,
+				size_t count, loff_t *ppos)
+{
+	struct vfio_device *vdev = filep->private_data;
+
+	if (vdev->ops->fops.read)
+		return vdev->ops->fops.read(filep, buf, count, ppos);
+
+	return -EINVAL;
+}
+
+static ssize_t vfio_device_write(struct file *filep, const char __user *buf,
+				 size_t count, loff_t *ppos)
+{
+	struct vfio_device *vdev = filep->private_data;
+
+	if (vdev->ops->fops.write)
+		return vdev->ops->fops.write(filep, buf, count, ppos);
+
+	return -EINVAL;
+}
+
+static int vfio_device_mmap(struct file *filep, struct vm_area_struct *vma)
+{
+	struct vfio_device *vdev = filep->private_data;
+
+	if (vdev->ops->fops.mmap)
+		return vdev->ops->fops.mmap(filep, vma);
+
+	return -EINVAL;
+}
+	
+#ifdef CONFIG_COMPAT
+static long vfio_device_compat_ioctl(struct file *filep,
+				     unsigned int cmd, unsigned long arg)
+{
+	arg = (unsigned long)compat_ptr(arg);
+	return vfio_device_unl_ioctl(filep, cmd, arg);
+}
+#endif	/* CONFIG_COMPAT */
+
+const struct file_operations vfio_device_fops = {
+	.owner		= THIS_MODULE,
+	.release	= vfio_device_release,
+	.read		= vfio_device_read,
+	.write		= vfio_device_write,
+	.unlocked_ioctl	= vfio_device_unl_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl	= vfio_device_compat_ioctl,
+#endif
+	.mmap		= vfio_device_mmap,
+};
diff --git a/drivers/vfio/vfio_iommu.c b/drivers/vfio/vfio_iommu.c
new file mode 100644
index 0000000..1a6f321
--- /dev/null
+++ b/drivers/vfio/vfio_iommu.c
@@ -0,0 +1,81 @@
+/*
+ * Copyright (C) 2011 Red Hat, Inc.  All rights reserved.
+ *     Author: Alex Williamson <alex.williamson@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Derived from original vfio:
+ * Copyright 2010 Cisco Systems, Inc.  All rights reserved.
+ * Author: Tom Lyon, pugs@cisco.com
+ */
+
+/*
+ * VFIO iomm module: iommu fd callbacks
+ */
+
+#include <linux/compat.h>
+#include <linux/device.h>
+#include <linux/fs.h>
+#include <linux/iommu.h>
+#include <linux/module.h>
+#include <linux/uaccess.h>
+#include <linux/vfio.h>
+
+#include "vfio_private.h"
+
+static int vfio_iommu_release(struct inode *inode, struct file *filep)
+{
+	struct vfio_iommu *viommu = filep->private_data;
+
+	mutex_lock(&viommu->vfio->group_lock);
+	viommu->refcnt--;
+	mutex_unlock(&viommu->vfio->group_lock);
+	return 0;
+}
+
+static long vfio_iommu_unl_ioctl(struct file *filep,
+				 unsigned int cmd, unsigned long arg)
+{
+	struct vfio_iommu *viommu = filep->private_data;
+	struct vfio_dma_map dm;
+	int ret = -ENOSYS;
+
+	switch (cmd) {
+	case VFIO_IOMMU_MAP_DMA:
+		if (copy_from_user(&dm, (void __user *)arg, sizeof dm))
+			return -EFAULT;
+		ret = 0; // XXX - Do something
+		if (!ret && copy_to_user((void __user *)arg, &dm, sizeof dm))
+			ret = -EFAULT;
+		break;
+
+	case VFIO_IOMMU_UNMAP_DMA:
+		if (copy_from_user(&dm, (void __user *)arg, sizeof dm))
+			return -EFAULT;
+		ret = 0; // XXX - Do something
+		if (!ret && copy_to_user((void __user *)arg, &dm, sizeof dm))
+			ret = -EFAULT;
+		break;
+	}
+	return ret;
+}
+
+#ifdef CONFIG_COMPAT
+static long vfio_iommu_compat_ioctl(struct file *filep,
+				    unsigned int cmd, unsigned long arg)
+{
+	arg = (unsigned long)compat_ptr(arg);
+	return vfio_iommu_unl_ioctl(filep, cmd, arg);
+}
+#endif	/* CONFIG_COMPAT */
+
+const struct file_operations vfio_iommu_fops = {
+	.owner		= THIS_MODULE,
+	.release	= vfio_iommu_release,
+	.unlocked_ioctl	= vfio_iommu_unl_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl	= vfio_iommu_compat_ioctl,
+#endif
+};
diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c
new file mode 100644
index 0000000..7f05692
--- /dev/null
+++ b/drivers/vfio/vfio_main.c
@@ -0,0 +1,879 @@
+/*
+ * Copyright (C) 2011 Red Hat, Inc.  All rights reserved.
+ *     Author: Alex Williamson <alex.williamson@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Derived from original vfio:
+ * Copyright 2010 Cisco Systems, Inc.  All rights reserved.
+ * Author: Tom Lyon, pugs@cisco.com
+ */
+
+/*
+ * VFIO main module: IOMMU group framework
+ */
+
+#include <linux/cdev.h>
+#include <linux/compat.h>
+#include <linux/device.h>
+#include <linux/file.h>
+#include <linux/anon_inodes.h>
+#include <linux/fs.h>
+#include <linux/idr.h>
+#include <linux/iommu.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/uaccess.h>
+#include <linux/vfio.h>
+
+#include "vfio_private.h"
+
+#define DRIVER_VERSION	"0.2"
+#define DRIVER_AUTHOR	"Alex Williamson <alex.williamson@redhat.com>"
+#define DRIVER_DESC	"VFIO - User Level meta-driver"
+
+#define MAX_PATH	256
+
+static int allow_unsafe_intrs;
+module_param(allow_unsafe_intrs, int, 0);
+MODULE_PARM_DESC(allow_unsafe_intrs,
+        "Allow use of IOMMUs which do not support interrupt remapping");
+
+static struct vfio vfio;
+static const struct file_operations vfio_group_fops;
+
+static inline void vfio_container_reset_read(struct vfio_container *vcontainer)
+{
+	kfree(vcontainer->read_buf);
+	vcontainer->read_buf = NULL;
+}
+
+int vfio_group_add_dev(struct device *dev, void *data)
+{
+	struct vfio_device_ops *ops = data;
+	struct list_head *pos;
+	struct vfio_group *vgroup = NULL;
+	struct vfio_device *vdev = NULL;
+	unsigned int group;
+	int ret = 0, new_group = 0;
+
+	if (iommu_device_group(dev, &group))
+		return 0;
+
+	mutex_lock(&vfio.group_lock);
+
+	list_for_each(pos, &vfio.group_list) {
+		vgroup = list_entry(pos, struct vfio_group, next);
+		if (vgroup->group == group)
+			break;
+		vgroup = NULL;
+	}
+
+	if (!vgroup) {
+		int id;
+
+		if (unlikely(idr_pre_get(&vfio.idr, GFP_KERNEL) == 0)) {
+			ret = -ENOMEM;
+			goto out;
+		}
+		vgroup = kzalloc(sizeof(*vgroup), GFP_KERNEL);
+		if (!vgroup) {
+			ret = -ENOMEM;
+			goto out;
+		}
+
+		vgroup->group = group;
+		INIT_LIST_HEAD(&vgroup->device_list);
+
+		ret = idr_get_new(&vfio.idr, vgroup, &id);
+		if (ret == 0 && id > MINORMASK) {
+			idr_remove(&vfio.idr, id);
+			kfree(vgroup);
+			ret = -ENOSPC;
+			goto out;
+		}
+
+		vgroup->devt = MKDEV(MAJOR(vfio.devt), id);
+		list_add(&vgroup->next, &vfio.group_list);
+		device_create(vfio.class, NULL, vgroup->devt,
+			      vgroup, "%u", group);
+
+		new_group = 1;
+	} else {
+		list_for_each(pos, &vgroup->device_list) {
+			vdev = list_entry(pos, struct vfio_device, next);
+			if (vdev->dev == dev)
+				break;
+			vdev = NULL;
+		}
+	}
+
+	if (!vdev) {
+		/* Adding a device for a group that's already in use? */
+		/* Maybe we should attach to the domain so others can't */
+		BUG_ON(vgroup->container &&
+		       vgroup->container->iommu &&
+		       vgroup->container->iommu->refcnt);
+
+		vdev = ops->new(dev);
+		if (IS_ERR(vdev)) {
+			/* If we just created this vgroup, tear it down */
+			if (new_group) {
+				device_destroy(vfio.class, vgroup->devt);
+				idr_remove(&vfio.idr, MINOR(vgroup->devt));
+				list_del(&vgroup->next);
+				kfree(vgroup);
+			}
+			ret = PTR_ERR(vdev);
+			goto out;
+		}
+		list_add(&vdev->next, &vgroup->device_list);
+		vdev->dev = dev;
+		vdev->ops = ops;
+		vdev->vfio = &vfio;
+	}
+out:
+	mutex_unlock(&vfio.group_lock);
+	return ret;
+}
+
+void vfio_group_del_dev(struct device *dev)
+{
+	struct list_head *pos;
+	struct vfio_container *vcontainer;
+	struct vfio_group *vgroup = NULL;
+	struct vfio_device *vdev = NULL;
+	unsigned int group;
+
+	if (iommu_device_group(dev, &group))
+		return;
+
+	mutex_lock(&vfio.group_lock);
+
+	list_for_each(pos, &vfio.group_list) {
+		vgroup = list_entry(pos, struct vfio_group, next);
+		if (vgroup->group == group)
+			break;
+		vgroup = NULL;
+	}
+
+	if (!vgroup)
+		goto out;
+
+	vcontainer = vgroup->container;
+
+	list_for_each(pos, &vgroup->device_list) {
+		vdev = list_entry(pos, struct vfio_device, next);
+		if (vdev->dev == dev)
+			break;
+		vdev = NULL;
+	}
+
+	if (!vdev)
+		goto out;
+
+	/* XXX Did a device we're using go away? */
+	BUG_ON(vdev->refcnt);
+
+	if (vcontainer && vcontainer->iommu) {
+		iommu_detach_device(vcontainer->iommu->domain, vdev->dev);
+		vfio_container_reset_read(vcontainer);
+	}
+
+	list_del(&vdev->next);
+	vdev->ops->free(vdev);
+
+	if (list_empty(&vgroup->device_list) && vgroup->refcnt == 0) {
+		device_destroy(vfio.class, vgroup->devt);
+		idr_remove(&vfio.idr, MINOR(vgroup->devt));
+		list_del(&vgroup->next);
+		kfree(vgroup);
+	}
+out:
+	mutex_unlock(&vfio.group_lock);
+}
+
+static int __vfio_group_viable(struct vfio_container *vcontainer)
+{
+	struct list_head *gpos, *dpos;
+
+	list_for_each(gpos, &vfio.group_list) {
+		struct vfio_group *vgroup;
+		vgroup = list_entry(gpos, struct vfio_group, next);
+		if (vgroup->container != vcontainer)
+			continue;
+
+		list_for_each(dpos, &vgroup->device_list) {
+			struct vfio_device *vdev;
+			vdev = list_entry(dpos, struct vfio_device, next);
+
+			if (!vdev->dev->driver ||
+			    vdev->dev->driver->owner != THIS_MODULE)
+				return 0;
+		}
+	}
+	return 1;
+}
+
+static int __vfio_close_iommu(struct vfio_container *vcontainer)
+{
+	struct list_head *gpos, *dpos;
+	struct vfio_iommu *viommu = vcontainer->iommu;
+	struct vfio_group *vgroup;
+	struct vfio_device *vdev;
+
+	if (!viommu)
+		return 0;
+
+	if (viommu->refcnt)
+		return -EBUSY;
+
+	list_for_each(gpos, &vfio.group_list) {
+		vgroup = list_entry(gpos, struct vfio_group, next);
+		if (vgroup->container != vcontainer)
+			continue;
+
+		list_for_each(dpos, &vgroup->device_list) {
+			vdev = list_entry(dpos, struct vfio_device, next);
+			iommu_detach_device(viommu->domain, vdev->dev);
+			vdev->iommu = NULL;
+		}
+	}
+	iommu_domain_free(viommu->domain);
+	kfree(viommu);
+	vcontainer->iommu = NULL;
+	return 0;
+}
+
+static int __vfio_open_iommu(struct vfio_container *vcontainer)
+{
+	struct list_head *gpos, *dpos;
+	struct vfio_iommu *viommu;
+	struct vfio_group *vgroup;
+	struct vfio_device *vdev;
+
+	if (!__vfio_group_viable(vcontainer))
+		return -EBUSY;
+
+	viommu = kzalloc(sizeof(*viommu), GFP_KERNEL);
+	if (!viommu)
+		return -ENOMEM;
+
+	viommu->domain = iommu_domain_alloc();
+	if (!viommu->domain) {
+		kfree(viommu);
+		return -EFAULT;
+	}
+
+	viommu->vfio = &vfio;
+	vcontainer->iommu = viommu;
+
+	list_for_each(gpos, &vfio.group_list) {
+		vgroup = list_entry(gpos, struct vfio_group, next);
+		if (vgroup->container != vcontainer)
+			continue;
+
+		list_for_each(dpos, &vgroup->device_list) {
+			int ret;
+
+			vdev = list_entry(dpos, struct vfio_device, next);
+
+			ret = iommu_attach_device(viommu->domain, vdev->dev);
+			if (ret) {
+				__vfio_close_iommu(vcontainer);
+				return ret;
+			}
+			vdev->iommu = viommu;
+		}
+	}
+
+	if (!allow_unsafe_intrs &&
+	    !iommu_domain_has_cap(viommu->domain, IOMMU_CAP_INTR_REMAP)) {
+		__vfio_close_iommu(vcontainer);
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
+static int vfio_group_merge(struct vfio_group *vgroup, int fd)
+{
+	struct vfio_group *vgroup2;
+	struct iommu_domain *domain;
+	struct list_head *pos;
+	struct file *file;
+	int ret = 0;
+
+	mutex_lock(&vfio.group_lock);
+
+	file = fget(fd);
+	if (!file) {
+		ret = -EBADF;
+		goto out_noput;
+	}
+	if (file->f_op != &vfio_group_fops) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	vgroup2 = file->private_data;
+	if (!vgroup2 || vgroup2 == vgroup || vgroup2->mm != vgroup->mm ||
+	    (vgroup2->container->iommu && vgroup2->container->iommu->refcnt)) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	if (!vgroup->container->iommu) {
+		ret = __vfio_open_iommu(vgroup->container);
+		if (ret)
+			goto out;
+	}
+
+	if (!vgroup2->container->iommu) {
+		ret = __vfio_open_iommu(vgroup2->container);
+		if (ret)
+			goto out;
+	}
+
+	if (iommu_domain_has_cap(vgroup->container->iommu->domain,
+				 IOMMU_CAP_CACHE_COHERENCY) !=
+	    iommu_domain_has_cap(vgroup2->container->iommu->domain,
+				 IOMMU_CAP_CACHE_COHERENCY)) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	ret = __vfio_close_iommu(vgroup2->container);
+	if (ret)
+		goto out;
+
+	domain = vgroup->container->iommu->domain;
+
+	list_for_each(pos, &vgroup2->device_list) {
+		struct vfio_device *vdev;
+
+		vdev = list_entry(pos, struct vfio_device, next);
+
+		ret = iommu_attach_device(domain, vdev->dev);
+		if (ret) {
+			list_for_each(pos, &vgroup2->device_list) {
+				struct vfio_device *vdev2;
+
+				vdev2 = list_entry(pos,
+						   struct vfio_device, next);
+				if (vdev2 == vdev)
+					break;
+
+				iommu_detach_device(domain, vdev2->dev);
+				vdev2->iommu = NULL;
+			}
+			goto out;
+		}
+		vdev->iommu = vgroup->container->iommu;
+	}
+
+	kfree(vgroup2->container->read_buf);
+	kfree(vgroup2->container);
+
+	vgroup2->container = vgroup->container;
+	vgroup->container->refcnt++;
+	vfio_container_reset_read(vgroup->container);
+
+out:
+	fput(file);
+out_noput:
+	mutex_unlock(&vfio.group_lock);
+	return ret;
+}
+
+static int vfio_group_unmerge(struct vfio_group *vgroup, int fd)
+{
+	struct vfio_group *vgroup2;
+	struct vfio_container *vcontainer2;
+	struct vfio_device *vdev;
+	struct list_head *pos;
+	struct file *file;
+	int ret = 0;
+
+	vcontainer2 = kzalloc(sizeof(*vcontainer2), GFP_KERNEL);
+	if (!vcontainer2)
+		return -ENOMEM;
+
+	mutex_lock(&vfio.group_lock);
+
+	file = fget(fd);
+	if (!file) {
+		ret = -EBADF;
+		goto out_noput;
+	}
+	if (file->f_op != &vfio_group_fops) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	vgroup2 = file->private_data;
+	if (!vgroup2 || vgroup2 == vgroup ||
+	    vgroup2->container != vgroup->container) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	list_for_each(pos, &vgroup2->device_list) {
+		vdev = list_entry(pos, struct vfio_device, next);
+		if (vdev->refcnt) {
+			ret = -EBUSY;
+			goto out;
+		}
+	}
+
+	list_for_each(pos, &vgroup2->device_list) {
+		vdev = list_entry(pos, struct vfio_device, next);
+		iommu_detach_device(vgroup->container->iommu->domain,
+				    vdev->dev);
+		vdev->iommu = NULL;
+	}
+
+	vgroup2->container = vcontainer2;
+	vcontainer2->refcnt++;
+	vgroup->container->refcnt--;
+	vfio_container_reset_read(vgroup->container);
+out:
+	fput(file);
+out_noput:
+	if (ret)
+		kfree(vcontainer2);
+	mutex_unlock(&vfio.group_lock);
+	return ret;
+}
+
+static int vfio_group_get_iommu_fd(struct vfio_group *vgroup)
+{
+	int ret = 0;
+	struct vfio_iommu *viommu;
+
+	mutex_lock(&vfio.group_lock);
+
+	if (!vgroup->container->iommu) {
+		ret = __vfio_open_iommu(vgroup->container);
+		if (ret)
+			goto out;
+	}
+
+	viommu = vgroup->container->iommu;
+
+	if (!viommu->file) {
+		viommu->file = anon_inode_getfile("vfio-iommu",
+						  &vfio_iommu_fops,
+						  viommu, O_RDWR);
+		if (IS_ERR(viommu->file)) {
+			ret = PTR_ERR(viommu->file);
+			viommu->file = NULL;
+			goto out;
+		}
+	}
+	ret = get_unused_fd();
+	if (ret < 0)
+		goto out;
+
+	fd_install(ret, viommu->file);
+
+	vgroup->container->iommu->refcnt++;
+out:
+	mutex_unlock(&vfio.group_lock);
+	return ret;
+}
+
+static int vfio_group_get_device_fd(struct vfio_group *vgroup, char *buf)
+{
+	struct vfio_container *vcontainer = vgroup->container;
+	struct list_head *gpos, *dpos;
+	int ret = -ENODEV;
+
+	mutex_lock(&vfio.group_lock);
+
+	if (!vcontainer->iommu) {
+		ret = __vfio_open_iommu(vcontainer);
+		if (ret)
+			goto out;
+	}
+
+	list_for_each(gpos, &vfio.group_list) {
+		vgroup = list_entry(gpos, struct vfio_group, next);
+		if (vgroup->container != vcontainer)
+			continue;
+
+		list_for_each(dpos, &vgroup->device_list) {
+			struct vfio_device *vdev;
+			char buf2[MAX_PATH];
+
+			vdev = list_entry(dpos, struct vfio_device, next);
+
+			snprintf(buf2, MAX_PATH, "%s", dev_name(vdev->dev));
+
+			if (!strncmp(buf, buf2, MAX_PATH)) {
+				if (!vdev->file) {
+					vdev->file = anon_inode_getfile(
+							"vfio-device",
+							&vfio_device_fops,
+							vdev, O_RDWR);
+					if (IS_ERR(vdev->file)) {
+						ret = PTR_ERR(vdev->file);
+						vdev->file = NULL;
+						goto out;
+					}
+				}
+				ret = get_unused_fd();
+				if (ret < 0)
+					goto out;
+
+				fd_install(ret, vdev->file);
+
+				vdev->refcnt++;
+				vcontainer->iommu->refcnt++;
+				goto out;
+			}
+		}
+	}
+out:
+	mutex_unlock(&vfio.group_lock);
+	return ret;
+}
+
+static long vfio_group_unl_ioctl(struct file *filep,
+				 unsigned int cmd, unsigned long arg)
+{
+	struct vfio_group *vgroup = filep->private_data;
+
+	if (vgroup->mm != current->mm)
+		return -EIO;
+
+	switch (cmd) {
+	case VFIO_GROUP_MERGE:
+	case VFIO_GROUP_UNMERGE:
+		{
+			int fd;
+		
+			if (get_user(fd, (int __user *)arg))
+				return -EFAULT;
+			if (fd < 0)
+				return -EINVAL;
+
+			if (cmd == VFIO_GROUP_MERGE)
+				return vfio_group_merge(vgroup, fd);
+			else
+				return vfio_group_unmerge(vgroup, fd);
+		}
+	case VFIO_GROUP_GET_IOMMU_FD:
+		return vfio_group_get_iommu_fd(vgroup);
+	case VFIO_GROUP_GET_DEVICE_FD:
+		{
+			char *buf;
+			int ret;
+
+			buf = strndup_user((const char __user *)arg, MAX_PATH);
+			if (IS_ERR(buf))
+				return PTR_ERR(buf);
+
+			ret = vfio_group_get_device_fd(vgroup, buf);
+			kfree(buf);
+			return ret;
+		}
+	}
+	return -ENOSYS;
+}
+
+
+#ifdef CONFIG_COMPAT
+static long vfio_group_compat_ioctl(struct file *filep,
+				    unsigned int cmd, unsigned long arg)
+{
+	arg = (unsigned long)compat_ptr(arg);
+	return vfio_group_unl_ioctl(filep, cmd, arg);
+}
+#endif	/* CONFIG_COMPAT */
+
+static int vfio_group_open(struct inode *inode, struct file *filep)
+{
+	struct vfio_group *vgroup;
+	int ret = 0;
+
+	mutex_lock(&vfio.group_lock);
+
+	vgroup = idr_find(&vfio.idr, iminor(inode));
+
+	if (!vgroup) {
+		ret = -ENODEV;
+		goto out;
+	}
+
+	if (!vgroup->refcnt) {
+		struct vfio_container *vcontainer;
+		vcontainer = kzalloc(sizeof(*vcontainer), GFP_KERNEL);
+		if (!vcontainer) {
+			ret = -ENOMEM;
+			goto out;
+		}
+		vgroup->container = vcontainer;
+		vgroup->mm = current->mm;
+	} else if (current->mm != vgroup->mm) {
+		ret = -EBUSY;
+		goto out;
+	}
+	filep->private_data = vgroup;
+	vgroup->refcnt++;
+	vgroup->container->refcnt++;
+out:
+	mutex_unlock(&vfio.group_lock);
+
+	return ret;
+}
+
+static int vfio_group_release(struct inode *inode, struct file *filep)
+{
+	struct vfio_group *vgroup = filep->private_data;
+	struct vfio_container *vcontainer = vgroup->container;
+	struct list_head *pos;
+	int ret = 0;
+
+	mutex_lock(&vfio.group_lock);
+
+	if (vgroup->refcnt > 1) {
+		vgroup->refcnt--;
+		vcontainer->refcnt--;
+		goto out;
+	}
+
+	list_for_each(pos, &vgroup->device_list) {
+		struct vfio_device *vdev;
+		vdev = list_entry(pos, struct vfio_device, next);
+		if (vdev->refcnt) {
+			ret = -EBUSY;
+			goto out;
+		}
+	}
+
+	/* Merged group? */
+	if (vcontainer->refcnt > 1) {
+		if (vcontainer->iommu) {
+			list_for_each(pos, &vgroup->device_list) {
+				struct vfio_device *vdev;
+				vdev = list_entry(pos,
+						  struct vfio_device, next);
+				iommu_detach_device(vcontainer->iommu->domain,
+						    vdev->dev);
+				vdev->iommu = NULL;
+			}
+		}
+		vcontainer->refcnt--;
+		vfio_container_reset_read(vcontainer);
+	} else {
+		if (vcontainer->iommu && vcontainer->iommu->refcnt) {
+			ret = -EBUSY;
+			goto out;
+		}
+
+		ret = __vfio_close_iommu(vcontainer);
+		if (ret)
+			goto out;
+
+		kfree(vcontainer->read_buf);
+		kfree(vcontainer);
+	}
+
+	vgroup->refcnt--;
+	vgroup->mm = NULL;
+	vgroup->container = NULL;
+
+	/* Possible we had the group open while device members were removed */
+	if (list_empty(&vgroup->device_list)) {
+		device_destroy(vfio.class, vgroup->devt);
+		idr_remove(&vfio.idr, MINOR(vgroup->devt));
+		list_del(&vgroup->next);
+		kfree(vgroup);
+	}
+out:
+	mutex_unlock(&vfio.group_lock);
+	return 0;
+}
+
+static int __vfio_container_create_read_buf(struct vfio_container *vcontainer)
+{
+	struct list_head *gpos, *dpos;
+	struct vfio_group *vgroup;
+	struct vfio_device *vdev;
+	int off = 0;
+	char *buf;
+
+	buf = kzalloc(MAX_PATH, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	list_for_each(gpos, &vfio.group_list) {
+		vgroup = list_entry(gpos, struct vfio_group, next);
+		if (vgroup->container != vcontainer)
+			continue;
+
+		off += snprintf(buf + off, MAX_PATH,
+				"group: %u\n", vgroup->group);
+		buf = krealloc(buf, off + MAX_PATH, GFP_KERNEL);
+		if (!buf)
+			return -ENOMEM;
+		memset(buf + off, 0, MAX_PATH);
+
+		list_for_each(dpos, &vgroup->device_list) {
+			vdev = list_entry(dpos, struct vfio_device, next);
+
+			off += snprintf(buf + off, MAX_PATH,
+					"device: %s\n", dev_name(vdev->dev));
+			buf = krealloc(buf, off + MAX_PATH, GFP_KERNEL);
+			if (!buf)
+				return -ENOMEM;
+			memset(buf + off, 0, MAX_PATH);
+		}
+	}
+	buf = krealloc(buf, off + 1, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	vcontainer->read_buf = buf;
+	return 0;
+}
+
+static ssize_t vfio_group_read(struct file *filep, char __user *buf,
+			       size_t count, loff_t *ppos)
+{
+	struct vfio_group *vgroup = filep->private_data;
+	struct vfio_container *vcontainer;
+	ssize_t ret = 0;
+
+	mutex_lock(&vfio.group_lock);
+
+	vcontainer = vgroup->container;
+
+	if (!vcontainer) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	if (!vcontainer->read_buf) {
+		ret = __vfio_container_create_read_buf(vcontainer);
+		if (ret)
+			goto out;
+	}
+
+	if (*ppos >= strlen(vcontainer->read_buf) + 1) {
+		ret = 0;
+		goto out;
+	}
+
+	if (*ppos + count > strlen(vcontainer->read_buf) + 1)
+		count = strlen(vcontainer->read_buf) + 1 - *ppos;
+
+	if (copy_to_user(buf, vcontainer->read_buf + *ppos, count)) {
+		ret = -EFAULT;
+		goto out;
+	}
+
+	*ppos += count;
+	ret = count;
+out:
+	mutex_unlock(&vfio.group_lock);
+	return ret;
+}
+
+static const struct file_operations vfio_group_fops = {
+	.owner		= THIS_MODULE,
+	.open		= vfio_group_open,
+	.release	= vfio_group_release,
+	.read		= vfio_group_read,
+	.unlocked_ioctl	= vfio_group_unl_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl	= vfio_group_compat_ioctl,
+#endif
+};
+
+static void vfio_class_release(struct kref *kref)
+{
+	class_destroy(vfio.class);
+	vfio.class = NULL;
+}
+
+static char *vfio_devnode(struct device *dev, mode_t *mode)
+{
+	return kasprintf(GFP_KERNEL, "vfio/%s", dev_name(dev));
+}
+
+static int __init vfio_init(void)
+{
+	int ret;
+
+	idr_init(&vfio.idr);
+	mutex_init(&vfio.group_lock);
+	INIT_LIST_HEAD(&vfio.group_list);
+
+	kref_init(&vfio.kref);
+	vfio.class = class_create(THIS_MODULE, "vfio");
+	if (IS_ERR(vfio.class)) {
+		ret = PTR_ERR(vfio.class);
+		goto err_class;
+	}
+
+	vfio.class->devnode = vfio_devnode;
+
+	/* FIXME - how many minors to allocate... all of them! */
+	ret = alloc_chrdev_region(&vfio.devt, 0, MINORMASK, "vfio");
+	if (ret)
+		goto err_chrdev;
+
+	cdev_init(&vfio.cdev, &vfio_group_fops);
+	ret = cdev_add(&vfio.cdev, vfio.devt, MINORMASK);
+	if (ret)
+		goto err_cdev;
+
+	pr_info(DRIVER_DESC " version: " DRIVER_VERSION "\n");
+
+	return 0;
+
+err_cdev:
+	unregister_chrdev_region(vfio.devt, MINORMASK);
+err_chrdev:
+	kref_put(&vfio.kref, vfio_class_release);
+err_class:
+	return ret;
+}
+
+static void __exit vfio_cleanup(void)
+{
+	struct list_head *gpos, *gppos;
+
+	list_for_each_safe(gpos, gppos, &vfio.group_list) {
+		struct vfio_group *vgroup;
+		struct list_head *dpos, *dppos;
+
+		vgroup = list_entry(gpos, struct vfio_group, next);
+
+		list_for_each_safe(dpos, dppos, &vgroup->device_list) {
+			struct vfio_device *vdev;
+
+			vdev = list_entry(dpos, struct vfio_device, next);
+			vfio_group_del_dev(vdev->dev);
+		}
+	}
+
+	idr_destroy(&vfio.idr);
+	cdev_del(&vfio.cdev);
+	unregister_chrdev_region(vfio.devt, MINORMASK);
+	kref_put(&vfio.kref, vfio_class_release);
+}
+
+module_init(vfio_init);
+module_exit(vfio_cleanup);
+
+MODULE_VERSION(DRIVER_VERSION);
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR(DRIVER_AUTHOR);
+MODULE_DESCRIPTION(DRIVER_DESC);
diff --git a/drivers/vfio/vfio_private.h b/drivers/vfio/vfio_private.h
new file mode 100644
index 0000000..2cc300c
--- /dev/null
+++ b/drivers/vfio/vfio_private.h
@@ -0,0 +1,82 @@
+/*
+ * Copyright (C) 2011 Red Hat, Inc.  All rights reserved.
+ *     Author: Alex Williamson <alex.williamson@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Derived from original vfio:
+ * Copyright 2010 Cisco Systems, Inc.  All rights reserved.
+ * Author: Tom Lyon, pugs@cisco.com
+ */
+
+#include <linux/cdev.h>
+#include <linux/device.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/idr.h>
+#include <linux/iommu.h>
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/mutex.h>
+
+#ifndef VFIO_PRIVATE_H
+#define VFIO_PRIVATE_H
+
+extern const struct file_operations vfio_iommu_fops;
+extern const struct file_operations vfio_device_fops;
+
+struct vfio {
+	dev_t			devt;
+	struct cdev		cdev;
+	struct list_head	group_list;
+	struct mutex		group_lock;
+	struct kref		kref;
+	struct class		*class;
+	struct idr		idr;
+};
+
+struct vfio_device_ops {
+	struct vfio_device	*(* new)(struct device *);
+	void			(* free)(struct vfio_device *);
+	struct file_operations	fops;
+};
+
+struct vfio_iommu {
+	struct iommu_domain	*domain;
+	struct vfio		*vfio;
+	int			refcnt;
+	struct file		*file;
+};
+
+struct vfio_device {
+	struct device		*dev;
+	struct list_head	next;
+	struct file		*file;
+	struct vfio_device_ops	*ops;
+	struct vfio		*vfio;
+	struct vfio_iommu	*iommu;
+	int			refcnt;
+};
+
+struct vfio_container {
+	struct vfio_iommu	*iommu;
+	char			*read_buf;
+	int			refcnt;
+};
+
+struct vfio_group {
+	dev_t			devt;
+	unsigned int		group;
+	int			refcnt;
+	struct mm_struct	*mm;
+	struct vfio_container	*container;
+	struct list_head	device_list;
+	struct list_head	next;
+};
+
+extern int vfio_group_add_dev(struct device *dev, void *data);
+extern void vfio_group_del_dev(struct device *dev);
+
+#endif /* VFIO_PRIVATE_H */

  parent reply	other threads:[~2011-09-01 19:51 UTC|newest]

Thread overview: 25+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-09-01 19:50 [Qemu-devel] [RFC PATCH 0/5] VFIO-NG group/device/iommu framework Alex Williamson
2011-09-01 19:50 ` [RFC PATCH 1/5] iommu: Add iommu_device_group callback and iommu_group sysfs entry Alex Williamson
2011-09-01 19:50   ` [Qemu-devel] " Alex Williamson
2011-09-01 19:50 ` [RFC PATCH 2/5] intel-iommu: Implement iommu_device_group Alex Williamson
2011-09-01 19:50   ` [Qemu-devel] " Alex Williamson
2011-09-01 19:50 ` Alex Williamson [this message]
2011-09-07 14:52   ` [RFC PATCH 3/5] VFIO: Base framework for new VFIO driver Konrad Rzeszutek Wilk
2011-09-07 14:52     ` [Qemu-devel] " Konrad Rzeszutek Wilk
2011-09-19 16:42     ` Alex Williamson
2011-09-19 16:42       ` [Qemu-devel] " Alex Williamson
2011-09-01 19:50 ` Alex Williamson
2011-09-01 19:50 ` [RFC PATCH 4/5] VFIO: Add PCI device support Alex Williamson
2011-09-01 19:50 ` [Qemu-devel] " Alex Williamson
2011-09-07 18:55   ` Konrad Rzeszutek Wilk
2011-09-07 18:55     ` [Qemu-devel] " Konrad Rzeszutek Wilk
2011-09-08  7:52     ` Avi Kivity
2011-09-08  7:52       ` [Qemu-devel] " Avi Kivity
2011-09-08 21:52       ` Alex Williamson
2011-09-08 21:52         ` [Qemu-devel] " Alex Williamson
2011-09-01 19:50 ` [Qemu-devel] [RFC PATCH 5/5] VFIO: Simple test tool Alex Williamson
2011-09-01 19:50 ` Alex Williamson
2011-09-07 11:58 ` [RFC PATCH 0/5] VFIO-NG group/device/iommu framework Alexander Graf
2011-09-07 11:58   ` [Qemu-devel] " Alexander Graf
2011-09-08 21:54   ` Alex Williamson
2011-09-08 21:54     ` [Qemu-devel] " Alex Williamson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20110901195043.2391.31843.stgit@s20.home \
    --to=alex.williamson@redhat.com \
    --cc=B07421@freescale.com \
    --cc=B08248@freescale.com \
    --cc=aafabbri@cisco.com \
    --cc=agraf@suse.de \
    --cc=aik@au1.ibm.com \
    --cc=avi@redhat.com \
    --cc=benve@cisco.com \
    --cc=chrisw@sous-sol.org \
    --cc=dwg@au1.ibm.com \
    --cc=iommu@lists.linux-foundation.org \
    --cc=joerg.roedel@amd.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-pci@vger.kernel.org \
    --cc=pmac@au1.ibm.com \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.