From mboxrd@z Thu Jan  1 00:00:00 1970
Received: from eggs.gnu.org ([2001:4830:134:3::10]:46255)
	by lists.gnu.org with esmtp (Exim 4.71)
	(envelope-from <eric.auger@linaro.org>) id 1WXuVu-00008P-E2
	for qemu-devel@nongnu.org; Wed, 09 Apr 2014 11:34:15 -0400
Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71)
	(envelope-from <eric.auger@linaro.org>) id 1WXuVp-0000Fu-M1
	for qemu-devel@nongnu.org; Wed, 09 Apr 2014 11:34:10 -0400
Received: from mail-wi0-f176.google.com ([209.85.212.176]:35198)
	by eggs.gnu.org with esmtp (Exim 4.71)
	(envelope-from <eric.auger@linaro.org>) id 1WXuVp-0000Fi-8n
	for qemu-devel@nongnu.org; Wed, 09 Apr 2014 11:34:05 -0400
Received: by mail-wi0-f176.google.com with SMTP id r20so9127950wiv.9
	for <qemu-devel@nongnu.org>; Wed, 09 Apr 2014 08:34:04 -0700 (PDT)
From: Eric Auger <eric.auger@linaro.org>
Date: Wed,  9 Apr 2014 16:33:06 +0100
Message-Id: <1397057589-11779-4-git-send-email-eric.auger@linaro.org>
In-Reply-To: <1397057589-11779-1-git-send-email-eric.auger@linaro.org>
References: <1397057589-11779-1-git-send-email-eric.auger@linaro.org>
Subject: [Qemu-devel] [RFC v2 3/6] vfio: add vfio-platform support
List-Id: <qemu-devel.nongnu.org>
List-Unsubscribe: <https://lists.nongnu.org/mailman/options/qemu-devel>,
	<mailto:qemu-devel-request@nongnu.org?subject=unsubscribe>
List-Archive: <http://lists.nongnu.org/archive/html/qemu-devel>
List-Post: <mailto:qemu-devel@nongnu.org>
List-Help: <mailto:qemu-devel-request@nongnu.org?subject=help>
List-Subscribe: <https://lists.nongnu.org/mailman/listinfo/qemu-devel>,
	<mailto:qemu-devel-request@nongnu.org?subject=subscribe>
To: eric.auger@st.com, christoffer.dall@linaro.org, qemu-devel@nongnu.org
Cc: peter.maydell@linaro.org, kim.phillips@freescale.com, Kim Phillips <kim.phillips@linaro.org>, agraf@suse.de, stuart.yoder@freescale.com, alex.williamson@redhat.com, christophe.barnichon@st.com, a.motakis@virtualopensystems.com, kvmarm@lists.cs.columbia.edu

From: Kim Phillips <kim.phillips@linaro.org>

Functions for which PCI and platform device support share are moved
into common.c.  The common vfio_{get,put}_group() get an additional
argument, a pointer to a vfio_reset_handler(), for which to pass on to
qemu_register_reset, but only if it exists (the platform device code
currently passes a NULL as its reset_handler).

For the platform device code, we basically use SysBusDevice
instead of PCIDevice.  Since realize() returns void, unlike
PCIDevice's initfn, error codes are moved into the
error message text with %m.

Currently only MMIO access is supported at this time.

The perceived path for future QEMU development is:

- add support for interrupts
- verify and test platform dev unmap path
- test existing PCI path for any regressions
- add support for creating platform devices on the qemu command line
  - currently device address specification is hardcoded for test
    development on Calxeda Midway's fff51000.ethernet device
- reset is not supported and registration of reset functions is
  bypassed for platform devices.
  - there is no standard means of resetting a platform device,
    unsure if it suffices to be handled at device--VFIO binding time

Signed-off-by: Kim Phillips <kim.phillips@linaro.org>

[1] http://www.spinics.net/lists/kvm-arm/msg08195.html
---
 hw/vfio/Makefile.objs |   2 +
 hw/vfio/common.c      | 486 ++++++++++++++++++++++++++++++++++++++++++++++++++
 hw/vfio/pci.c         | 480 ++-----------------------------------------------
 hw/vfio/platform.c    | 381 +++++++++++++++++++++++++++++++++++++++
 hw/vfio/vfio-common.h |  55 ++++++
 5 files changed, 937 insertions(+), 467 deletions(-)
 create mode 100644 hw/vfio/common.c
 create mode 100644 hw/vfio/platform.c
 create mode 100644 hw/vfio/vfio-common.h

diff --git a/hw/vfio/Makefile.objs b/hw/vfio/Makefile.objs
index 31c7dab..c5c76fe 100644
--- a/hw/vfio/Makefile.objs
+++ b/hw/vfio/Makefile.objs
@@ -1,3 +1,5 @@
 ifeq ($(CONFIG_LINUX), y)
+obj-$(CONFIG_SOFTMMU) += common.o
 obj-$(CONFIG_PCI) += pci.o
+obj-$(CONFIG_SOFTMMU) += platform.o
 endif
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
new file mode 100644
index 0000000..9d1f723
--- /dev/null
+++ b/hw/vfio/common.c
@@ -0,0 +1,486 @@
+/*
+ * vfio based device assignment support
+ *
+ * Copyright Red Hat, Inc. 2012
+ *
+ * Authors:
+ *  Alex Williamson <alex.williamson@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ * Based on qemu-kvm device-assignment:
+ *  Adapted for KVM by Qumranet.
+ *  Copyright (c) 2007, Neocleus, Alex Novik (alex@neocleus.com)
+ *  Copyright (c) 2007, Neocleus, Guy Zana (guy@neocleus.com)
+ *  Copyright (C) 2008, Qumranet, Amit Shah (amit.shah@qumranet.com)
+ *  Copyright (C) 2008, Red Hat, Amit Shah (amit.shah@redhat.com)
+ *  Copyright (C) 2008, IBM, Muli Ben-Yehuda (muli@il.ibm.com)
+ */
+
+#include <dirent.h>
+#include <linux/vfio.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "config.h"
+#include "exec/address-spaces.h"
+#include "exec/memory.h"
+#include "hw/pci/msi.h"
+#include "hw/pci/msix.h"
+#include "hw/pci/pci.h"
+#include "qemu-common.h"
+#include "qemu/error-report.h"
+#include "qemu/event_notifier.h"
+#include "qemu/queue.h"
+#include "qemu/range.h"
+#include "sysemu/kvm.h"
+#include "sysemu/sysemu.h"
+
+#include "vfio-common.h"
+
+#define DEBUG_VFIO
+#ifdef DEBUG_VFIO
+#define DPRINTF(fmt, ...) \
+    do { fprintf(stderr, "vfio: " fmt, ## __VA_ARGS__); } while (0)
+#else
+#define DPRINTF(fmt, ...) \
+    do { } while (0)
+#endif
+
+static QLIST_HEAD(, VFIOContainer)
+    container_list = QLIST_HEAD_INITIALIZER(container_list);
+
+QLIST_HEAD(, VFIOGroup)
+    group_list = QLIST_HEAD_INITIALIZER(group_list);
+
+
+struct VFIODevice;
+
+#ifdef CONFIG_KVM
+/*
+ * We have a single VFIO pseudo device per KVM VM.  Once created it lives
+ * for the life of the VM.  Closing the file descriptor only drops our
+ * reference to it and the device's reference to kvm.  Therefore once
+ * initialized, this file descriptor is only released on QEMU exit and
+ * we'll re-use it should another vfio device be attached before then.
+ */
+static int vfio_kvm_device_fd = -1;
+#endif
+
+/*
+ * DMA - Mapping and unmapping for the "type1" IOMMU interface used on x86
+ */
+static int vfio_dma_unmap(VFIOContainer *container,
+                          hwaddr iova, ram_addr_t size)
+{
+    struct vfio_iommu_type1_dma_unmap unmap = {
+        .argsz = sizeof(unmap),
+        .flags = 0,
+        .iova = iova,
+        .size = size,
+    };
+
+    if (ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, &unmap)) {
+        DPRINTF("VFIO_UNMAP_DMA: %d\n", -errno);
+        return -errno;
+    }
+
+    return 0;
+}
+
+static int vfio_dma_map(VFIOContainer *container, hwaddr iova,
+                        ram_addr_t size, void *vaddr, bool readonly)
+{
+    struct vfio_iommu_type1_dma_map map = {
+        .argsz = sizeof(map),
+        .flags = VFIO_DMA_MAP_FLAG_READ,
+        .vaddr = (__u64)(uintptr_t)vaddr,
+        .iova = iova,
+        .size = size,
+    };
+
+    if (!readonly) {
+        map.flags |= VFIO_DMA_MAP_FLAG_WRITE;
+    }
+
+    /*
+     * Try the mapping, if it fails with EBUSY, unmap the region and try
+     * again.  This shouldn't be necessary, but we sometimes see it in
+     * the the VGA ROM space.
+     */
+    if (ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0 ||
+        (errno == EBUSY && vfio_dma_unmap(container, iova, size) == 0 &&
+         ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0)) {
+        return 0;
+    }
+
+    DPRINTF("VFIO_MAP_DMA: %d\n", -errno);
+    return -errno;
+}
+
+static bool vfio_listener_skipped_section(MemoryRegionSection *section)
+{
+    return !memory_region_is_ram(section->mr) ||
+           /*
+            * Sizing an enabled 64-bit BAR can cause spurious mappings to
+            * addresses in the upper part of the 64-bit address space.  These
+            * are never accessed by the CPU and beyond the address width of
+            * some IOMMU hardware.  TODO: VFIO should tell us the IOMMU width.
+            */
+           section->offset_within_address_space & (1ULL << 63);
+}
+
+static void vfio_listener_region_add(MemoryListener *listener,
+                                     MemoryRegionSection *section)
+{
+    VFIOContainer *container = container_of(listener, VFIOContainer,
+                                            iommu_data.type1.listener);
+    hwaddr iova, end;
+    void *vaddr;
+    int ret;
+
+    assert(!memory_region_is_iommu(section->mr));
+
+    if (vfio_listener_skipped_section(section)) {
+        DPRINTF("SKIPPING region_add %"HWADDR_PRIx" - %"PRIx64"\n",
+                section->offset_within_address_space,
+                section->offset_within_address_space +
+                int128_get64(int128_sub(section->size, int128_one())));
+        return;
+    }
+
+    if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) !=
+                 (section->offset_within_region & ~TARGET_PAGE_MASK))) {
+        error_report("%s received unaligned region", __func__);
+        return;
+    }
+
+    iova = TARGET_PAGE_ALIGN(section->offset_within_address_space);
+    end = (section->offset_within_address_space + int128_get64(section->size)) &
+          TARGET_PAGE_MASK;
+
+    if (iova >= end) {
+        return;
+    }
+
+    vaddr = memory_region_get_ram_ptr(section->mr) +
+            section->offset_within_region +
+            (iova - section->offset_within_address_space);
+
+    DPRINTF("region_add %"HWADDR_PRIx" - %"HWADDR_PRIx" [%p]\n",
+            iova, end - 1, vaddr);
+
+    memory_region_ref(section->mr);
+    ret = vfio_dma_map(container, iova, end - iova, vaddr, section->readonly);
+    if (ret) {
+        error_report("vfio_dma_map(%p, 0x%"HWADDR_PRIx", "
+                     "0x%"HWADDR_PRIx", %p) = %d (%m)",
+                     container, iova, end - iova, vaddr, ret);
+
+        /*
+         * On the initfn path, store the first error in the container so we
+         * can gracefully fail.  Runtime, there's not much we can do other
+         * than throw a hardware error.
+         */
+        if (!container->iommu_data.type1.initialized) {
+            if (!container->iommu_data.type1.error) {
+                container->iommu_data.type1.error = ret;
+            }
+        } else {
+            hw_error("vfio: DMA mapping failed, unable to continue");
+        }
+    }
+}
+
+static void vfio_listener_region_del(MemoryListener *listener,
+                                     MemoryRegionSection *section)
+{
+    VFIOContainer *container = container_of(listener, VFIOContainer,
+                                            iommu_data.type1.listener);
+    hwaddr iova, end;
+    int ret;
+
+    if (vfio_listener_skipped_section(section)) {
+        DPRINTF("SKIPPING region_del %"HWADDR_PRIx" - %"PRIx64"\n",
+                section->offset_within_address_space,
+                section->offset_within_address_space +
+                int128_get64(int128_sub(section->size, int128_one())));
+        return;
+    }
+
+    if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) !=
+                 (section->offset_within_region & ~TARGET_PAGE_MASK))) {
+        error_report("%s received unaligned region", __func__);
+        return;
+    }
+
+    iova = TARGET_PAGE_ALIGN(section->offset_within_address_space);
+    end = (section->offset_within_address_space + int128_get64(section->size)) &
+          TARGET_PAGE_MASK;
+
+    if (iova >= end) {
+        return;
+    }
+
+    DPRINTF("region_del %"HWADDR_PRIx" - %"HWADDR_PRIx"\n",
+            iova, end - 1);
+
+    ret = vfio_dma_unmap(container, iova, end - iova);
+    memory_region_unref(section->mr);
+    if (ret) {
+        error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", "
+                     "0x%"HWADDR_PRIx") = %d (%m)",
+                     container, iova, end - iova, ret);
+    }
+}
+
+static MemoryListener vfio_memory_listener = {
+    .region_add = vfio_listener_region_add,
+    .region_del = vfio_listener_region_del,
+};
+
+static void vfio_listener_release(VFIOContainer *container)
+{
+    memory_listener_unregister(&container->iommu_data.type1.listener);
+}
+
+static void vfio_kvm_device_add_group(VFIOGroup *group)
+{
+#ifdef CONFIG_KVM
+    struct kvm_device_attr attr = {
+        .group = KVM_DEV_VFIO_GROUP,
+        .attr = KVM_DEV_VFIO_GROUP_ADD,
+        .addr = (uint64_t)(unsigned long)&group->fd,
+    };
+
+    if (!kvm_enabled()) {
+        return;
+    }
+
+    if (vfio_kvm_device_fd < 0) {
+        struct kvm_create_device cd = {
+            .type = KVM_DEV_TYPE_VFIO,
+        };
+
+        if (kvm_vm_ioctl(kvm_state, KVM_CREATE_DEVICE, &cd)) {
+            DPRINTF("KVM_CREATE_DEVICE: %m\n");
+            return;
+        }
+
+        vfio_kvm_device_fd = cd.fd;
+    }
+
+    if (ioctl(vfio_kvm_device_fd, KVM_SET_DEVICE_ATTR, &attr)) {
+        error_report("Failed to add group %d to KVM VFIO device: %m",
+                     group->groupid);
+    }
+#endif
+}
+
+static void vfio_kvm_device_del_group(VFIOGroup *group)
+{
+#ifdef CONFIG_KVM
+    struct kvm_device_attr attr = {
+        .group = KVM_DEV_VFIO_GROUP,
+        .attr = KVM_DEV_VFIO_GROUP_DEL,
+        .addr = (uint64_t)(unsigned long)&group->fd,
+    };
+
+    if (vfio_kvm_device_fd < 0) {
+        return;
+    }
+
+    if (ioctl(vfio_kvm_device_fd, KVM_SET_DEVICE_ATTR, &attr)) {
+        error_report("Failed to remove group %d from KVM VFIO device: %m",
+                     group->groupid);
+    }
+#endif
+}
+
+static int vfio_connect_container(VFIOGroup *group)
+{
+    VFIOContainer *container;
+    int ret, fd;
+
+    if (group->container) {
+        return 0;
+    }
+
+    QLIST_FOREACH(container, &container_list, next) {
+        if (!ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &container->fd)) {
+            group->container = container;
+            QLIST_INSERT_HEAD(&container->group_list, group, container_next);
+            return 0;
+        }
+    }
+
+    fd = qemu_open("/dev/vfio/vfio", O_RDWR);
+    if (fd < 0) {
+        error_report("vfio: failed to open /dev/vfio/vfio: %m");
+        return -errno;
+    }
+
+    ret = ioctl(fd, VFIO_GET_API_VERSION);
+    if (ret != VFIO_API_VERSION) {
+        error_report("vfio: supported vfio version: %d, "
+                     "reported version: %d", VFIO_API_VERSION, ret);
+        close(fd);
+        return -EINVAL;
+    }
+
+    container = g_malloc0(sizeof(*container));
+    container->fd = fd;
+
+    if (ioctl(fd, VFIO_CHECK_EXTENSION, VFIO_TYPE1_IOMMU)) {
+        ret = ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &fd);
+        if (ret) {
+            error_report("vfio: failed to set group container: %m");
+            g_free(container);
+            close(fd);
+            return -errno;
+        }
+
+        ret = ioctl(fd, VFIO_SET_IOMMU, VFIO_TYPE1_IOMMU);
+        if (ret) {
+            error_report("vfio: failed to set iommu for container: %m");
+            g_free(container);
+            close(fd);
+            return -errno;
+        }
+
+        container->iommu_data.type1.listener = vfio_memory_listener;
+        container->iommu_data.release = vfio_listener_release;
+
+        memory_listener_register(&container->iommu_data.type1.listener,
+                                 &address_space_memory);
+
+        if (container->iommu_data.type1.error) {
+            ret = container->iommu_data.type1.error;
+            vfio_listener_release(container);
+            g_free(container);
+            close(fd);
+            error_report("vfio: memory listener initialization failed for container");
+            return ret;
+        }
+
+        container->iommu_data.type1.initialized = true;
+
+    } else {
+        error_report("vfio: No available IOMMU models");
+        g_free(container);
+        close(fd);
+        return -EINVAL;
+    }
+
+    QLIST_INIT(&container->group_list);
+    QLIST_INSERT_HEAD(&container_list, container, next);
+
+    group->container = container;
+    QLIST_INSERT_HEAD(&container->group_list, group, container_next);
+
+    return 0;
+}
+
+static void vfio_disconnect_container(VFIOGroup *group)
+{
+    VFIOContainer *container = group->container;
+
+    if (ioctl(group->fd, VFIO_GROUP_UNSET_CONTAINER, &container->fd)) {
+        error_report("vfio: error disconnecting group %d from container",
+                     group->groupid);
+    }
+
+    QLIST_REMOVE(group, container_next);
+    group->container = NULL;
+
+    if (QLIST_EMPTY(&container->group_list)) {
+        if (container->iommu_data.release) {
+            container->iommu_data.release(container);
+        }
+        QLIST_REMOVE(container, next);
+        DPRINTF("vfio_disconnect_container: close container->fd\n");
+        close(container->fd);
+        g_free(container);
+    }
+}
+
+VFIOGroup *vfio_get_group(int groupid, QEMUResetHandler *reset_handler)
+{
+    VFIOGroup *group;
+    char path[32];
+    struct vfio_group_status status = { .argsz = sizeof(status) };
+
+    QLIST_FOREACH(group, &group_list, next) {
+        if (group->groupid == groupid) {
+            return group;
+        }
+    }
+
+    group = g_malloc0(sizeof(*group));
+
+    snprintf(path, sizeof(path), "/dev/vfio/%d", groupid);
+    group->fd = qemu_open(path, O_RDWR);
+    if (group->fd < 0) {
+        error_report("vfio: error opening %s: %m", path);
+        g_free(group);
+        return NULL;
+    }
+
+    if (ioctl(group->fd, VFIO_GROUP_GET_STATUS, &status)) {
+        error_report("vfio: error getting group status: %m");
+        close(group->fd);
+        g_free(group);
+        return NULL;
+    }
+
+    if (!(status.flags & VFIO_GROUP_FLAGS_VIABLE)) {
+        error_report("vfio: error, group %d is not viable, please ensure "
+                     "all devices within the iommu_group are bound to their "
+                     "vfio bus driver.", groupid);
+        close(group->fd);
+        g_free(group);
+        return NULL;
+    }
+
+    group->groupid = groupid;
+    QLIST_INIT(&group->device_list);
+
+    if (vfio_connect_container(group)) {
+        error_report("vfio: failed to setup container for group %d", groupid);
+        close(group->fd);
+        g_free(group);
+        return NULL;
+    }
+
+    if (QLIST_EMPTY(&group_list) && reset_handler) {
+        qemu_register_reset(reset_handler, NULL);
+    }
+
+    QLIST_INSERT_HEAD(&group_list, group, next);
+
+    vfio_kvm_device_add_group(group);
+
+    return group;
+}
+
+void vfio_put_group(VFIOGroup *group, QEMUResetHandler *reset_handler)
+{
+    if (!QLIST_EMPTY(&group->device_list)) {
+        return;
+    }
+
+    vfio_kvm_device_del_group(group);
+    vfio_disconnect_container(group);
+    QLIST_REMOVE(group, next);
+    DPRINTF("vfio_put_group: close group->fd\n");
+    close(group->fd);
+    g_free(group);
+
+    if (QLIST_EMPTY(&group_list) && reset_handler) {
+        qemu_unregister_reset(reset_handler, NULL);
+    }
+}
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 9cf5b84..9e70d68 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -1,5 +1,5 @@
 /*
- * vfio based device assignment support
+ * vfio based device assignment support - PCI devices
  *
  * Copyright Red Hat, Inc. 2012
  *
@@ -40,6 +40,8 @@
 #include "sysemu/kvm.h"
 #include "sysemu/sysemu.h"
 
+#include "vfio-common.h"
+
 /* #define DEBUG_VFIO */
 #ifdef DEBUG_VFIO
 #define DPRINTF(fmt, ...) \
@@ -55,6 +57,8 @@
 #define VFIO_ALLOW_KVM_MSI 1
 #define VFIO_ALLOW_KVM_MSIX 1
 
+extern QLIST_HEAD(, VFIOGroup) group_list;
+
 struct VFIODevice;
 
 typedef struct VFIOQuirk {
@@ -135,25 +139,6 @@ enum {
 
 struct VFIOGroup;
 
-typedef struct VFIOType1 {
-    MemoryListener listener;
-    int error;
-    bool initialized;
-} VFIOType1;
-
-typedef struct VFIOContainer {
-    int fd; /* /dev/vfio/vfio, empowered by the attached groups */
-    struct {
-        /* enable abstraction to support various iommu backends */
-        union {
-            VFIOType1 type1;
-        };
-        void (*release)(struct VFIOContainer *);
-    } iommu_data;
-    QLIST_HEAD(, VFIOGroup) group_list;
-    QLIST_ENTRY(VFIOContainer) next;
-} VFIOContainer;
-
 /* Cache of MSI-X setup plus extra mmap and memory region for split BAR map */
 typedef struct VFIOMSIXInfo {
     uint8_t table_bar;
@@ -200,15 +185,6 @@ typedef struct VFIODevice {
     bool rom_read_failed;
 } VFIODevice;
 
-typedef struct VFIOGroup {
-    int fd;
-    int groupid;
-    VFIOContainer *container;
-    QLIST_HEAD(, VFIODevice) device_list;
-    QLIST_ENTRY(VFIOGroup) next;
-    QLIST_ENTRY(VFIOGroup) container_next;
-} VFIOGroup;
-
 typedef struct VFIORomBlacklistEntry {
     uint16_t vendor_id;
     uint16_t device_id;
@@ -234,23 +210,6 @@ static const VFIORomBlacklistEntry romblacklist[] = {
 
 #define MSIX_CAP_LENGTH 12
 
-static QLIST_HEAD(, VFIOContainer)
-    container_list = QLIST_HEAD_INITIALIZER(container_list);
-
-static QLIST_HEAD(, VFIOGroup)
-    group_list = QLIST_HEAD_INITIALIZER(group_list);
-
-#ifdef CONFIG_KVM
-/*
- * We have a single VFIO pseudo device per KVM VM.  Once created it lives
- * for the life of the VM.  Closing the file descriptor only drops our
- * reference to it and the device's reference to kvm.  Therefore once
- * initialized, this file descriptor is only released on QEMU exit and
- * we'll re-use it should another vfio device be attached before then.
- */
-static int vfio_kvm_device_fd = -1;
-#endif
-
 static void vfio_disable_interrupts(VFIODevice *vdev);
 static uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len);
 static void vfio_pci_write_config(PCIDevice *pdev, uint32_t addr,
@@ -2180,183 +2139,6 @@ static void vfio_pci_write_config(PCIDevice *pdev, uint32_t addr,
 }
 
 /*
- * DMA - Mapping and unmapping for the "type1" IOMMU interface used on x86
- */
-static int vfio_dma_unmap(VFIOContainer *container,
-                          hwaddr iova, ram_addr_t size)
-{
-    struct vfio_iommu_type1_dma_unmap unmap = {
-        .argsz = sizeof(unmap),
-        .flags = 0,
-        .iova = iova,
-        .size = size,
-    };
-
-    if (ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, &unmap)) {
-        DPRINTF("VFIO_UNMAP_DMA: %d\n", -errno);
-        return -errno;
-    }
-
-    return 0;
-}
-
-static int vfio_dma_map(VFIOContainer *container, hwaddr iova,
-                        ram_addr_t size, void *vaddr, bool readonly)
-{
-    struct vfio_iommu_type1_dma_map map = {
-        .argsz = sizeof(map),
-        .flags = VFIO_DMA_MAP_FLAG_READ,
-        .vaddr = (__u64)(uintptr_t)vaddr,
-        .iova = iova,
-        .size = size,
-    };
-
-    if (!readonly) {
-        map.flags |= VFIO_DMA_MAP_FLAG_WRITE;
-    }
-
-    /*
-     * Try the mapping, if it fails with EBUSY, unmap the region and try
-     * again.  This shouldn't be necessary, but we sometimes see it in
-     * the the VGA ROM space.
-     */
-    if (ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0 ||
-        (errno == EBUSY && vfio_dma_unmap(container, iova, size) == 0 &&
-         ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0)) {
-        return 0;
-    }
-
-    DPRINTF("VFIO_MAP_DMA: %d\n", -errno);
-    return -errno;
-}
-
-static bool vfio_listener_skipped_section(MemoryRegionSection *section)
-{
-    return !memory_region_is_ram(section->mr) ||
-           /*
-            * Sizing an enabled 64-bit BAR can cause spurious mappings to
-            * addresses in the upper part of the 64-bit address space.  These
-            * are never accessed by the CPU and beyond the address width of
-            * some IOMMU hardware.  TODO: VFIO should tell us the IOMMU width.
-            */
-           section->offset_within_address_space & (1ULL << 63);
-}
-
-static void vfio_listener_region_add(MemoryListener *listener,
-                                     MemoryRegionSection *section)
-{
-    VFIOContainer *container = container_of(listener, VFIOContainer,
-                                            iommu_data.type1.listener);
-    hwaddr iova, end;
-    void *vaddr;
-    int ret;
-
-    assert(!memory_region_is_iommu(section->mr));
-
-    if (vfio_listener_skipped_section(section)) {
-        DPRINTF("SKIPPING region_add %"HWADDR_PRIx" - %"PRIx64"\n",
-                section->offset_within_address_space,
-                section->offset_within_address_space +
-                int128_get64(int128_sub(section->size, int128_one())));
-        return;
-    }
-
-    if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) !=
-                 (section->offset_within_region & ~TARGET_PAGE_MASK))) {
-        error_report("%s received unaligned region", __func__);
-        return;
-    }
-
-    iova = TARGET_PAGE_ALIGN(section->offset_within_address_space);
-    end = (section->offset_within_address_space + int128_get64(section->size)) &
-          TARGET_PAGE_MASK;
-
-    if (iova >= end) {
-        return;
-    }
-
-    vaddr = memory_region_get_ram_ptr(section->mr) +
-            section->offset_within_region +
-            (iova - section->offset_within_address_space);
-
-    DPRINTF("region_add %"HWADDR_PRIx" - %"HWADDR_PRIx" [%p]\n",
-            iova, end - 1, vaddr);
-
-    memory_region_ref(section->mr);
-    ret = vfio_dma_map(container, iova, end - iova, vaddr, section->readonly);
-    if (ret) {
-        error_report("vfio_dma_map(%p, 0x%"HWADDR_PRIx", "
-                     "0x%"HWADDR_PRIx", %p) = %d (%m)",
-                     container, iova, end - iova, vaddr, ret);
-
-        /*
-         * On the initfn path, store the first error in the container so we
-         * can gracefully fail.  Runtime, there's not much we can do other
-         * than throw a hardware error.
-         */
-        if (!container->iommu_data.type1.initialized) {
-            if (!container->iommu_data.type1.error) {
-                container->iommu_data.type1.error = ret;
-            }
-        } else {
-            hw_error("vfio: DMA mapping failed, unable to continue");
-        }
-    }
-}
-
-static void vfio_listener_region_del(MemoryListener *listener,
-                                     MemoryRegionSection *section)
-{
-    VFIOContainer *container = container_of(listener, VFIOContainer,
-                                            iommu_data.type1.listener);
-    hwaddr iova, end;
-    int ret;
-
-    if (vfio_listener_skipped_section(section)) {
-        DPRINTF("SKIPPING region_del %"HWADDR_PRIx" - %"PRIx64"\n",
-                section->offset_within_address_space,
-                section->offset_within_address_space +
-                int128_get64(int128_sub(section->size, int128_one())));
-        return;
-    }
-
-    if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) !=
-                 (section->offset_within_region & ~TARGET_PAGE_MASK))) {
-        error_report("%s received unaligned region", __func__);
-        return;
-    }
-
-    iova = TARGET_PAGE_ALIGN(section->offset_within_address_space);
-    end = (section->offset_within_address_space + int128_get64(section->size)) &
-          TARGET_PAGE_MASK;
-
-    if (iova >= end) {
-        return;
-    }
-
-    DPRINTF("region_del %"HWADDR_PRIx" - %"HWADDR_PRIx"\n",
-            iova, end - 1);
-
-    ret = vfio_dma_unmap(container, iova, end - iova);
-    memory_region_unref(section->mr);
-    if (ret) {
-        error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", "
-                     "0x%"HWADDR_PRIx") = %d (%m)",
-                     container, iova, end - iova, ret);
-    }
-}
-
-static MemoryListener vfio_memory_listener = {
-    .region_add = vfio_listener_region_add,
-    .region_del = vfio_listener_region_del,
-};
-
-static void vfio_listener_release(VFIOContainer *container)
-{
-    memory_listener_unregister(&container->iommu_data.type1.listener);
-}
-
-/*
  * Interrupt setup
  */
 static void vfio_disable_interrupts(VFIODevice *vdev)
@@ -3221,244 +3003,8 @@ static void vfio_pci_reset_handler(void *opaque)
     }
 }
 
-static void vfio_kvm_device_add_group(VFIOGroup *group)
-{
-#ifdef CONFIG_KVM
-    struct kvm_device_attr attr = {
-        .group = KVM_DEV_VFIO_GROUP,
-        .attr = KVM_DEV_VFIO_GROUP_ADD,
-        .addr = (uint64_t)(unsigned long)&group->fd,
-    };
-
-    if (!kvm_enabled()) {
-        return;
-    }
-
-    if (vfio_kvm_device_fd < 0) {
-        struct kvm_create_device cd = {
-            .type = KVM_DEV_TYPE_VFIO,
-        };
-
-        if (kvm_vm_ioctl(kvm_state, KVM_CREATE_DEVICE, &cd)) {
-            DPRINTF("KVM_CREATE_DEVICE: %m\n");
-            return;
-        }
-
-        vfio_kvm_device_fd = cd.fd;
-    }
-
-    if (ioctl(vfio_kvm_device_fd, KVM_SET_DEVICE_ATTR, &attr)) {
-        error_report("Failed to add group %d to KVM VFIO device: %m",
-                     group->groupid);
-    }
-#endif
-}
-
-static void vfio_kvm_device_del_group(VFIOGroup *group)
-{
-#ifdef CONFIG_KVM
-    struct kvm_device_attr attr = {
-        .group = KVM_DEV_VFIO_GROUP,
-        .attr = KVM_DEV_VFIO_GROUP_DEL,
-        .addr = (uint64_t)(unsigned long)&group->fd,
-    };
-
-    if (vfio_kvm_device_fd < 0) {
-        return;
-    }
-
-    if (ioctl(vfio_kvm_device_fd, KVM_SET_DEVICE_ATTR, &attr)) {
-        error_report("Failed to remove group %d from KVM VFIO device: %m",
-                     group->groupid);
-    }
-#endif
-}
-
-static int vfio_connect_container(VFIOGroup *group)
-{
-    VFIOContainer *container;
-    int ret, fd;
-
-    if (group->container) {
-        return 0;
-    }
-
-    QLIST_FOREACH(container, &container_list, next) {
-        if (!ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &container->fd)) {
-            group->container = container;
-            QLIST_INSERT_HEAD(&container->group_list, group, container_next);
-            return 0;
-        }
-    }
-
-    fd = qemu_open("/dev/vfio/vfio", O_RDWR);
-    if (fd < 0) {
-        error_report("vfio: failed to open /dev/vfio/vfio: %m");
-        return -errno;
-    }
-
-    ret = ioctl(fd, VFIO_GET_API_VERSION);
-    if (ret != VFIO_API_VERSION) {
-        error_report("vfio: supported vfio version: %d, "
-                     "reported version: %d", VFIO_API_VERSION, ret);
-        close(fd);
-        return -EINVAL;
-    }
-
-    container = g_malloc0(sizeof(*container));
-    container->fd = fd;
-
-    if (ioctl(fd, VFIO_CHECK_EXTENSION, VFIO_TYPE1_IOMMU)) {
-        ret = ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &fd);
-        if (ret) {
-            error_report("vfio: failed to set group container: %m");
-            g_free(container);
-            close(fd);
-            return -errno;
-        }
-
-        ret = ioctl(fd, VFIO_SET_IOMMU, VFIO_TYPE1_IOMMU);
-        if (ret) {
-            error_report("vfio: failed to set iommu for container: %m");
-            g_free(container);
-            close(fd);
-            return -errno;
-        }
-
-        container->iommu_data.type1.listener = vfio_memory_listener;
-        container->iommu_data.release = vfio_listener_release;
-
-        memory_listener_register(&container->iommu_data.type1.listener,
-                                 &address_space_memory);
-
-        if (container->iommu_data.type1.error) {
-            ret = container->iommu_data.type1.error;
-            vfio_listener_release(container);
-            g_free(container);
-            close(fd);
-            error_report("vfio: memory listener initialization failed for container");
-            return ret;
-        }
-
-        container->iommu_data.type1.initialized = true;
-
-    } else {
-        error_report("vfio: No available IOMMU models");
-        g_free(container);
-        close(fd);
-        return -EINVAL;
-    }
-
-    QLIST_INIT(&container->group_list);
-    QLIST_INSERT_HEAD(&container_list, container, next);
-
-    group->container = container;
-    QLIST_INSERT_HEAD(&container->group_list, group, container_next);
-
-    return 0;
-}
-
-static void vfio_disconnect_container(VFIOGroup *group)
-{
-    VFIOContainer *container = group->container;
-
-    if (ioctl(group->fd, VFIO_GROUP_UNSET_CONTAINER, &container->fd)) {
-        error_report("vfio: error disconnecting group %d from container",
-                     group->groupid);
-    }
-
-    QLIST_REMOVE(group, container_next);
-    group->container = NULL;
-
-    if (QLIST_EMPTY(&container->group_list)) {
-        if (container->iommu_data.release) {
-            container->iommu_data.release(container);
-        }
-        QLIST_REMOVE(container, next);
-        DPRINTF("vfio_disconnect_container: close container->fd\n");
-        close(container->fd);
-        g_free(container);
-    }
-}
-
-static VFIOGroup *vfio_get_group(int groupid)
-{
-    VFIOGroup *group;
-    char path[32];
-    struct vfio_group_status status = { .argsz = sizeof(status) };
-
-    QLIST_FOREACH(group, &group_list, next) {
-        if (group->groupid == groupid) {
-            return group;
-        }
-    }
-
-    group = g_malloc0(sizeof(*group));
-
-    snprintf(path, sizeof(path), "/dev/vfio/%d", groupid);
-    group->fd = qemu_open(path, O_RDWR);
-    if (group->fd < 0) {
-        error_report("vfio: error opening %s: %m", path);
-        g_free(group);
-        return NULL;
-    }
-
-    if (ioctl(group->fd, VFIO_GROUP_GET_STATUS, &status)) {
-        error_report("vfio: error getting group status: %m");
-        close(group->fd);
-        g_free(group);
-        return NULL;
-    }
-
-    if (!(status.flags & VFIO_GROUP_FLAGS_VIABLE)) {
-        error_report("vfio: error, group %d is not viable, please ensure "
-                     "all devices within the iommu_group are bound to their "
-                     "vfio bus driver.", groupid);
-        close(group->fd);
-        g_free(group);
-        return NULL;
-    }
-
-    group->groupid = groupid;
-    QLIST_INIT(&group->device_list);
-
-    if (vfio_connect_container(group)) {
-        error_report("vfio: failed to setup container for group %d", groupid);
-        close(group->fd);
-        g_free(group);
-        return NULL;
-    }
-
-    if (QLIST_EMPTY(&group_list)) {
-        qemu_register_reset(vfio_pci_reset_handler, NULL);
-    }
-
-    QLIST_INSERT_HEAD(&group_list, group, next);
-
-    vfio_kvm_device_add_group(group);
-
-    return group;
-}
-
-static void vfio_put_group(VFIOGroup *group)
-{
-    if (!QLIST_EMPTY(&group->device_list)) {
-        return;
-    }
-
-    vfio_kvm_device_del_group(group);
-    vfio_disconnect_container(group);
-    QLIST_REMOVE(group, next);
-    DPRINTF("vfio_put_group: close group->fd\n");
-    close(group->fd);
-    g_free(group);
-
-    if (QLIST_EMPTY(&group_list)) {
-        qemu_unregister_reset(vfio_pci_reset_handler, NULL);
-    }
-}
-
-static int vfio_get_device(VFIOGroup *group, const char *name, VFIODevice *vdev)
+static int vfio_get_device(VFIOGroup *group, const char *name,
+                           struct VFIODevice *vdev)
 {
     struct vfio_device_info dev_info = { .argsz = sizeof(dev_info) };
     struct vfio_region_info reg_info = { .argsz = sizeof(reg_info) };
@@ -3485,7 +3031,7 @@ static int vfio_get_device(VFIOGroup *group, const char *name, VFIODevice *vdev)
         goto error;
     }
 
-    DPRINTF("Device %s flags: %u, regions: %u, irgs: %u\n", name,
+    DPRINTF("Device %s flags: %u, regions: %u, irqs: %u\n", name,
             dev_info.flags, dev_info.num_regions, dev_info.num_irqs);
 
     if (!(dev_info.flags & VFIO_DEVICE_FLAGS_PCI)) {
@@ -3768,7 +3314,7 @@ static int vfio_initfn(PCIDevice *pdev)
     DPRINTF("%s(%04x:%02x:%02x.%x) group %d\n", __func__, vdev->host.domain,
             vdev->host.bus, vdev->host.slot, vdev->host.function, groupid);
 
-    group = vfio_get_group(groupid);
+    group = vfio_get_group(groupid, vfio_pci_reset_handler);
     if (!group) {
         error_report("vfio: failed to get group %d", groupid);
         return -ENOENT;
@@ -3785,7 +3331,7 @@ static int vfio_initfn(PCIDevice *pdev)
             pvdev->host.function == vdev->host.function) {
 
             error_report("vfio: error: device %s is already attached", path);
-            vfio_put_group(group);
+            vfio_put_group(group, vfio_pci_reset_handler);
             return -EBUSY;
         }
     }
@@ -3793,7 +3339,7 @@ static int vfio_initfn(PCIDevice *pdev)
     ret = vfio_get_device(group, path, vdev);
     if (ret) {
         error_report("vfio: failed to get device %s", path);
-        vfio_put_group(group);
+        vfio_put_group(group, vfio_pci_reset_handler);
         return ret;
     }
 
@@ -3879,7 +3425,7 @@ out_teardown:
 out_put:
     g_free(vdev->emulated_config_bits);
     vfio_put_device(vdev);
-    vfio_put_group(group);
+    vfio_put_group(group, vfio_pci_reset_handler);
     return ret;
 }
 
@@ -3899,7 +3445,7 @@ static void vfio_exitfn(PCIDevice *pdev)
     g_free(vdev->emulated_config_bits);
     g_free(vdev->rom);
     vfio_put_device(vdev);
-    vfio_put_group(group);
+    vfio_put_group(group, vfio_pci_reset_handler);
 }
 
 static void vfio_pci_reset(DeviceState *dev)
diff --git a/hw/vfio/platform.c b/hw/vfio/platform.c
new file mode 100644
index 0000000..138fb13
--- /dev/null
+++ b/hw/vfio/platform.c
@@ -0,0 +1,381 @@
+/*
+ * vfio based device assignment support - platform devices
+ *
+ * Copyright Linaro Limited, 2014
+ *
+ * Authors:
+ *  Kim Phillips <kim.phillips@linaro.org>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ * Based on vfio based PCI device assignment support:
+ *  Copyright Red Hat, Inc. 2012
+ */
+
+#include <dirent.h>
+#include <linux/vfio.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "config.h"
+#include "exec/address-spaces.h"
+#include "exec/memory.h"
+#include "qemu-common.h"
+#include "qemu/error-report.h"
+#include "qemu/event_notifier.h"
+#include "qemu/queue.h"
+#include "qemu/range.h"
+#include "sysemu/kvm.h"
+#include "sysemu/sysemu.h"
+#include "hw/qdev-properties.h"
+#include "migration/vmstate.h"
+#include "hw/hw.h"
+#include "hw/sysbus.h"
+
+#include "vfio-common.h"
+
+#define DEBUG_VFIO
+#ifdef DEBUG_VFIO
+#define DPRINTF(fmt, ...) \
+    do { fprintf(stderr, "vfio: " fmt, ## __VA_ARGS__); } while (0)
+#else
+#define DPRINTF(fmt, ...) \
+    do { } while (0)
+#endif
+
+/* Extra debugging, trap acceleration paths for more logging */
+#define VFIO_ALLOW_MMAP 1
+
+#define TYPE_VFIO_PLATFORM "vfio-platform"
+
+typedef struct VFIORegion {
+    off_t fd_offset; /* offset of region within device fd */
+    int fd; /* device fd, allows us to pass VFIORegion as opaque data */
+    MemoryRegion mem; /* slow, read/write access */
+    MemoryRegion mmap_mem; /* direct mapped access */
+    void *mmap;
+    size_t size;
+    uint32_t flags; /* VFIO region flags (rd/wr/mmap) */
+    uint8_t nr; /* cache the region number for debug */
+} VFIORegion;
+
+typedef struct VFIODevice {
+    SysBusDevice sbdev;
+    int fd;
+    int num_regions;
+    VFIORegion *regions;
+    QLIST_ENTRY(VFIODevice) next;
+    struct VFIOGroup *group;
+    char *name;
+} VFIODevice;
+
+static int vfio_mmap_region(VFIODevice *vdev, VFIORegion *region,
+                         MemoryRegion *mem, MemoryRegion *submem,
+                         void **map, size_t size, off_t offset,
+                         const char *name)
+{
+    int ret = 0;
+
+    if (VFIO_ALLOW_MMAP && size && region->flags & VFIO_REGION_INFO_FLAG_MMAP) {
+        int prot = 0;
+        ret = 0;
+
+        if (region->flags & VFIO_REGION_INFO_FLAG_READ) {
+            prot |= PROT_READ;
+        }
+
+        if (region->flags & VFIO_REGION_INFO_FLAG_WRITE) {
+            prot |= PROT_WRITE;
+        }
+
+        *map = mmap(NULL, size, prot, MAP_SHARED,
+                    region->fd, region->fd_offset + offset);
+        if (*map == MAP_FAILED) {
+            ret = -errno;
+            *map = NULL;
+            goto error;
+        }
+
+        memory_region_init_ram_ptr(submem, OBJECT(vdev), name, size, *map);
+    }
+
+    memory_region_add_subregion(mem, offset, submem);
+
+error:
+    return ret;
+}
+
+/*
+ * IO Port/MMIO - Beware of the endians, VFIO is always little endian
+ */
+static void vfio_region_write(void *opaque, hwaddr addr,
+                              uint64_t data, unsigned size)
+{
+    VFIORegion *region = opaque;
+    union {
+        uint8_t byte;
+        uint16_t word;
+        uint32_t dword;
+        uint64_t qword;
+    } buf;
+
+    switch (size) {
+    case 1:
+        buf.byte = data;
+        break;
+    case 2:
+        buf.word = data;
+        break;
+    case 4:
+        buf.dword = data;
+        break;
+    default:
+        hw_error("vfio: unsupported write size, %d bytes\n", size);
+        break;
+    }
+
+    if (pwrite(region->fd, &buf, size, region->fd_offset + addr) != size) {
+        error_report("%s(,0x%"HWADDR_PRIx", 0x%"PRIx64", %d) failed: %m",
+                     __func__, addr, data, size);
+    }
+
+    DPRINTF("%s(region %d+0x%"HWADDR_PRIx", 0x%"PRIx64", %d)\n",
+             __func__, region->nr, addr, data, size);
+}
+
+static uint64_t vfio_region_read(void *opaque, hwaddr addr, unsigned size)
+{
+    VFIORegion *region = opaque;
+    union {
+        uint8_t byte;
+        uint16_t word;
+        uint32_t dword;
+        uint64_t qword;
+    } buf;
+    uint64_t data = 0;
+
+    if (pread(region->fd, &buf, size, region->fd_offset + addr) != size) {
+        error_report("%s(,0x%"HWADDR_PRIx", %d) failed: %m",
+                     __func__, addr, size);
+        return (uint64_t)-1;
+    }
+
+    switch (size) {
+    case 1:
+        data = buf.byte;
+        break;
+    case 2:
+        data = buf.word;
+        break;
+    case 4:
+        data = buf.dword;
+        break;
+    default:
+        hw_error("vfio: unsupported read size, %d bytes\n", size);
+        break;
+    }
+
+    DPRINTF("%s(region %d+0x%"HWADDR_PRIx", %d) = 0x%"PRIx64"\n",
+            __func__, region->nr, addr, size, data);
+
+    return data;
+}
+
+static const MemoryRegionOps vfio_region_ops = {
+    .read = vfio_region_read,
+    .write = vfio_region_write,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+};
+
+static void vfio_map_region(VFIODevice *vdev, int nr)
+{
+    VFIORegion *region = &vdev->regions[nr];
+    unsigned size = region->size;
+    char name[64];
+
+    snprintf(name, sizeof(name), "VFIO %s region %d", vdev->name, nr);
+
+    /* A "slow" read/write mapping underlies all regions  */
+    memory_region_init_io(&region->mem, OBJECT(vdev), &vfio_region_ops,
+                          region, name, size);
+
+    strncat(name, " mmap", sizeof(name) - strlen(name) - 1);
+    if (vfio_mmap_region(vdev, region, &region->mem,
+                         &region->mmap_mem, &region->mmap, size, 0, name)) {
+        error_report("%s unsupported. Performance may be slow", name);
+    }
+}
+
+static int vfio_get_device(VFIOGroup *group, const char *name,
+                           struct VFIODevice *vdev)
+{
+    struct vfio_device_info dev_info = { .argsz = sizeof(dev_info) };
+    struct vfio_region_info reg_info = { .argsz = sizeof(reg_info) };
+    struct vfio_irq_info irq_info = { .argsz = sizeof(irq_info) };
+    int ret, i;
+
+    ret = ioctl(group->fd, VFIO_GROUP_GET_DEVICE_FD, name);
+    if (ret < 0) {
+        error_report("vfio: error getting device %s from group %d: %m",
+                     name, group->groupid);
+        error_printf("Verify all devices in group %d are bound to the vfio "
+                     "platform driver and are not already in use\n",
+                     group->groupid);
+        return ret;
+    }
+
+    vdev->fd = ret;
+    vdev->group = group;
+    QLIST_INSERT_HEAD(&group->device_list, vdev, next);
+
+    /* Sanity check device */
+    ret = ioctl(vdev->fd, VFIO_DEVICE_GET_INFO, &dev_info);
+    if (ret) {
+        error_report("vfio: error getting device info: %m");
+        goto error;
+    }
+
+    DPRINTF("Device %s flags: %u, regions: %u, irqs: %u\n", name,
+            dev_info.flags, dev_info.num_regions, dev_info.num_irqs);
+
+    vdev->regions = g_malloc0(sizeof(VFIORegion) * dev_info.num_regions);
+    if (!vdev->regions) {
+            error_report("vfio: Error allocating space for %d regions",
+                         dev_info.num_regions);
+            ret = -ENOMEM;
+            goto error;
+    }
+
+    vdev->num_regions = dev_info.num_regions;
+
+    for (i = 0; i < dev_info.num_regions; i++) {
+        reg_info.index = i;
+
+        ret = ioctl(vdev->fd, VFIO_DEVICE_GET_REGION_INFO, &reg_info);
+        if (ret) {
+            error_report("vfio: Error getting region %d info: %m", i);
+            goto error;
+        }
+
+        DPRINTF("Device %s region %d:\n", name, i);
+        DPRINTF("  size: 0x%lx, offset: 0x%lx, flags: 0x%lx\n",
+                (unsigned long)reg_info.size, (unsigned long)reg_info.offset,
+                (unsigned long)reg_info.flags);
+
+        vdev->regions[i].flags = reg_info.flags;
+        vdev->regions[i].size = reg_info.size;
+        vdev->regions[i].fd_offset = reg_info.offset;
+        vdev->regions[i].fd = vdev->fd;
+        vdev->regions[i].nr = i;
+    }
+
+error:
+    if (ret) {
+        g_free(vdev->regions);
+        QLIST_REMOVE(vdev, next);
+        vdev->group = NULL;
+        close(vdev->fd);
+    }
+    return ret;
+}
+
+static void vfio_platform_realize(DeviceState *dev, Error **errp)
+{
+    SysBusDevice *sbdev = SYS_BUS_DEVICE(dev);
+    VFIODevice *pvdev, *vdev = DO_UPCAST(VFIODevice, sbdev, sbdev);
+    VFIOGroup *group;
+    char path[PATH_MAX], iommu_group_path[PATH_MAX], *group_name;
+    ssize_t len;
+    struct stat st;
+    int groupid, i, ret;
+
+    /* TODO: pass device name on command line */
+    vdev->name = malloc(PATH_MAX);
+    strcpy(vdev->name, "fff51000.ethernet");
+
+    /* Check that the host device exists */
+    snprintf(path, sizeof(path), "/sys/bus/platform/devices/%s/", vdev->name);
+    if (stat(path, &st) < 0) {
+        error_report("vfio: error: no such host device: %s", path);
+        return;
+    }
+
+    strncat(path, "iommu_group", sizeof(path) - strlen(path) - 1);
+
+    len = readlink(path, iommu_group_path, PATH_MAX);
+    if (len <= 0) {
+        error_report("vfio: error no iommu_group for device");
+        return;
+    }
+
+    iommu_group_path[len] = 0;
+    group_name = basename(iommu_group_path);
+
+    if (sscanf(group_name, "%d", &groupid) != 1) {
+        error_report("vfio: error reading %s: %m", path);
+        return;
+    }
+
+    DPRINTF("%s(%s) group %d\n", __func__, vdev->name, groupid);
+
+    group = vfio_get_group(groupid, NULL);
+    if (!group) {
+        error_report("vfio: failed to get group %d", groupid);
+        return;
+    }
+
+    snprintf(path, sizeof(path), "%s", vdev->name);
+
+    QLIST_FOREACH(pvdev, &group->device_list, next) {
+        if (strcmp(pvdev->name, vdev->name) == 0) {
+            error_report("vfio: error: device %s is already attached", path);
+            vfio_put_group(group, NULL);
+            return;
+        }
+    }
+
+    ret = vfio_get_device(group, path, vdev);
+    if (ret) {
+        error_report("vfio: failed to get device %s", path);
+        vfio_put_group(group, NULL);
+        return;
+    }
+
+    for (i = 0; i < vdev->num_regions; i++) {
+        vfio_map_region(vdev, i);
+        sysbus_init_mmio(sbdev, &vdev->regions[i].mem);
+    }
+}
+
+static const VMStateDescription vfio_platform_vmstate = {
+    .name = TYPE_VFIO_PLATFORM,
+    .unmigratable = 1,
+};
+
+static void vfio_platform_dev_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->realize = vfio_platform_realize;
+    dc->vmsd = &vfio_platform_vmstate;
+    dc->desc = "VFIO-based platform device assignment";
+    set_bit(DEVICE_CATEGORY_MISC, dc->categories);
+}
+
+static const TypeInfo vfio_platform_dev_info = {
+    .name = TYPE_VFIO_PLATFORM,
+    .parent = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(VFIODevice),
+    .class_init = vfio_platform_dev_class_init,
+};
+
+static void register_vfio_platform_dev_type(void)
+{
+    type_register_static(&vfio_platform_dev_info);
+}
+
+type_init(register_vfio_platform_dev_type)
diff --git a/hw/vfio/vfio-common.h b/hw/vfio/vfio-common.h
new file mode 100644
index 0000000..21148ef
--- /dev/null
+++ b/hw/vfio/vfio-common.h
@@ -0,0 +1,55 @@
+/*
+ * common header for vfio based device assignment support
+ *
+ * Copyright Red Hat, Inc. 2012
+ *
+ * Authors:
+ *  Alex Williamson <alex.williamson@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ * Based on qemu-kvm device-assignment:
+ *  Adapted for KVM by Qumranet.
+ *  Copyright (c) 2007, Neocleus, Alex Novik (alex@neocleus.com)
+ *  Copyright (c) 2007, Neocleus, Guy Zana (guy@neocleus.com)
+ *  Copyright (C) 2008, Qumranet, Amit Shah (amit.shah@qumranet.com)
+ *  Copyright (C) 2008, Red Hat, Amit Shah (amit.shah@redhat.com)
+ *  Copyright (C) 2008, IBM, Muli Ben-Yehuda (muli@il.ibm.com)
+ */
+
+struct VFIODevice;
+
+struct VFIOGroup;
+
+typedef struct VFIOType1 {
+    MemoryListener listener;
+    int error;
+    bool initialized;
+} VFIOType1;
+
+typedef struct VFIOContainer {
+    int fd; /* /dev/vfio/vfio, empowered by the attached groups */
+    struct {
+        /* enable abstraction to support various iommu backends */
+        union {
+            VFIOType1 type1;
+        };
+        void (*release)(struct VFIOContainer *);
+    } iommu_data;
+    QLIST_HEAD(, VFIOGroup) group_list;
+    QLIST_ENTRY(VFIOContainer) next;
+} VFIOContainer;
+
+typedef struct VFIOGroup {
+    int fd;
+    int groupid;
+    VFIOContainer *container;
+    QLIST_HEAD(, VFIODevice) device_list;
+    QLIST_ENTRY(VFIOGroup) next;
+    QLIST_ENTRY(VFIOGroup) container_next;
+} VFIOGroup;
+
+
+VFIOGroup *vfio_get_group(int groupid, QEMUResetHandler *reset_handler);
+void vfio_put_group(VFIOGroup *group, QEMUResetHandler *reset_handler);
-- 
1.8.3.2