All of lore.kernel.org
 help / color / mirror / Atom feed
From: Blue Swirl <blauwirbel@gmail.com>
To: Jan Kiszka <jan.kiszka@web.de>
Cc: kvm@vger.kernel.org, "Michael S. Tsirkin" <mst@redhat.com>,
	Marcelo Tosatti <mtosatti@redhat.com>,
	qemu-devel@nongnu.org,
	Alex Williamson <alex.williamson@redhat.com>,
	Avi Kivity <avi@redhat.com>
Subject: Re: [PATCH 4/4] kvm: i386: Add classic PCI device assignment
Date: Mon, 27 Aug 2012 18:56:38 +0000	[thread overview]
Message-ID: <CAAu8pHu7XZ4OStiKG78X2TuHd2RaJ08FTq5coDPRWbDbi9UBBQ@mail.gmail.com> (raw)
In-Reply-To: <825e653c9cfe9d8e26185917cbe1f1dd7ae299e2.1346048917.git.jan.kiszka@web.de>

On Mon, Aug 27, 2012 at 6:28 AM, Jan Kiszka <jan.kiszka@web.de> wrote:
> From: Jan Kiszka <jan.kiszka@siemens.com>
>
> This adds PCI device assignment for i386 targets using the classic KVM
> interfaces. This version is 100% identical to what is being maintained
> in qemu-kvm for several years and is supported by libvirt as well. It is
> expected to remain relevant for another couple of years until kernels
> without full-features and performance-wise equivalent VFIO support are
> obsolete.
>
> A refactoring to-do that should be done in-tree is to model MSI and
> MSI-X support via the generic PCI layer, similar to what VFIO is already
> doing for MSI-X. This should improve the correctness and clean up the
> code from duplicate logic.
>
> Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
> ---
>  hw/kvm/Makefile.objs |    2 +-
>  hw/kvm/pci-assign.c  | 1929 ++++++++++++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 1930 insertions(+), 1 deletions(-)
>  create mode 100644 hw/kvm/pci-assign.c
>
> diff --git a/hw/kvm/Makefile.objs b/hw/kvm/Makefile.objs
> index 226497a..f620d7f 100644
> --- a/hw/kvm/Makefile.objs
> +++ b/hw/kvm/Makefile.objs
> @@ -1 +1 @@
> -obj-$(CONFIG_KVM) += clock.o apic.o i8259.o ioapic.o i8254.o
> +obj-$(CONFIG_KVM) += clock.o apic.o i8259.o ioapic.o i8254.o pci-assign.o
> diff --git a/hw/kvm/pci-assign.c b/hw/kvm/pci-assign.c
> new file mode 100644
> index 0000000..9cce02c
> --- /dev/null
> +++ b/hw/kvm/pci-assign.c
> @@ -0,0 +1,1929 @@
> +/*
> + * Copyright (c) 2007, Neocleus Corporation.
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms and conditions of the GNU General Public License,
> + * version 2, as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope it will be useful, but WITHOUT
> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
> + * more details.
> + *
> + * You should have received a copy of the GNU General Public License along with
> + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
> + * Place - Suite 330, Boston, MA 02111-1307 USA.
> + *
> + *
> + *  Assign a PCI device from the host to a guest VM.
> + *
> + *  Adapted for KVM by Qumranet.
> + *
> + *  Copyright (c) 2007, Neocleus, Alex Novik (alex@neocleus.com)
> + *  Copyright (c) 2007, Neocleus, Guy Zana (guy@neocleus.com)
> + *  Copyright (C) 2008, Qumranet, Amit Shah (amit.shah@qumranet.com)
> + *  Copyright (C) 2008, Red Hat, Amit Shah (amit.shah@redhat.com)
> + *  Copyright (C) 2008, IBM, Muli Ben-Yehuda (muli@il.ibm.com)
> + */
> +#include <stdio.h>
> +#include <unistd.h>
> +#include <sys/io.h>
> +#include <sys/mman.h>
> +#include <sys/types.h>
> +#include <sys/stat.h>
> +#include "hw/hw.h"
> +#include "hw/pc.h"
> +#include "qemu-error.h"
> +#include "console.h"
> +#include "hw/loader.h"
> +#include "monitor.h"
> +#include "range.h"
> +#include "sysemu.h"
> +#include "hw/pci.h"
> +#include "hw/msi.h"
> +#include "kvm_i386.h"
> +
> +#define MSIX_PAGE_SIZE 0x1000
> +
> +/* From linux/ioport.h */
> +#define IORESOURCE_IO       0x00000100  /* Resource type */
> +#define IORESOURCE_MEM      0x00000200
> +#define IORESOURCE_IRQ      0x00000400
> +#define IORESOURCE_DMA      0x00000800
> +#define IORESOURCE_PREFETCH 0x00002000  /* No side effects */
> +
> +/* #define DEVICE_ASSIGNMENT_DEBUG 1 */
> +
> +#ifdef DEVICE_ASSIGNMENT_DEBUG
> +#define DEBUG(fmt, ...)                                       \
> +    do {                                                      \
> +        fprintf(stderr, "%s: " fmt, __func__ , __VA_ARGS__);  \
> +    } while (0)
> +#else
> +#define DEBUG(fmt, ...)
> +#endif
> +
> +typedef struct {
> +    int type;           /* Memory or port I/O */
> +    int valid;
> +    uint32_t base_addr;
> +    uint32_t size;    /* size of the region */
> +    int resource_fd;
> +} PCIRegion;

Please don't use anonymous structs.

> +
> +typedef struct {
> +    uint8_t bus, dev, func; /* Bus inside domain, device and function */
> +    int irq;                /* IRQ number */
> +    uint16_t region_number; /* number of active regions */
> +
> +    /* Port I/O or MMIO Regions */
> +    PCIRegion regions[PCI_NUM_REGIONS - 1];
> +    int config_fd;
> +} PCIDevRegions;
> +
> +typedef struct {
> +    MemoryRegion container;
> +    MemoryRegion real_iomem;
> +    union {
> +        void *r_virtbase;    /* mmapped access address for memory regions */
> +        uint32_t r_baseport; /* the base guest port for I/O regions */
> +    } u;
> +    pcibus_t e_size;    /* emulated size of region in bytes */
> +    pcibus_t r_size;    /* real size of region in bytes */
> +    PCIRegion *region;
> +} AssignedDevRegion;
> +
> +#define ASSIGNED_DEVICE_PREFER_MSI_BIT  0
> +#define ASSIGNED_DEVICE_SHARE_INTX_BIT  1
> +
> +#define ASSIGNED_DEVICE_PREFER_MSI_MASK (1 << ASSIGNED_DEVICE_PREFER_MSI_BIT)
> +#define ASSIGNED_DEVICE_SHARE_INTX_MASK (1 << ASSIGNED_DEVICE_SHARE_INTX_BIT)
> +
> +typedef struct {
> +    uint32_t addr_lo;
> +    uint32_t addr_hi;
> +    uint32_t data;
> +    uint32_t ctrl;
> +} MSIXTableEntry;
> +
> +typedef enum AssignedIRQType {
> +    ASSIGNED_IRQ_NONE = 0,
> +    ASSIGNED_IRQ_INTX_HOST_INTX,
> +    ASSIGNED_IRQ_INTX_HOST_MSI,
> +    ASSIGNED_IRQ_MSI,
> +    ASSIGNED_IRQ_MSIX
> +} AssignedIRQType;
> +
> +typedef struct AssignedDevice {
> +    PCIDevice dev;
> +    PCIHostDeviceAddress host;
> +    uint32_t dev_id;
> +    uint32_t features;
> +    int intpin;
> +    AssignedDevRegion v_addrs[PCI_NUM_REGIONS - 1];
> +    PCIDevRegions real_device;
> +    PCIINTxRoute intx_route;
> +    AssignedIRQType assigned_irq_type;
> +    struct {
> +#define ASSIGNED_DEVICE_CAP_MSI (1 << 0)
> +#define ASSIGNED_DEVICE_CAP_MSIX (1 << 1)
> +        uint32_t available;
> +#define ASSIGNED_DEVICE_MSI_ENABLED (1 << 0)
> +#define ASSIGNED_DEVICE_MSIX_ENABLED (1 << 1)
> +#define ASSIGNED_DEVICE_MSIX_MASKED (1 << 2)
> +        uint32_t state;
> +    } cap;
> +    uint8_t emulate_config_read[PCI_CONFIG_SPACE_SIZE];
> +    uint8_t emulate_config_write[PCI_CONFIG_SPACE_SIZE];
> +    int msi_virq_nr;
> +    int *msi_virq;
> +    MSIXTableEntry *msix_table;
> +    target_phys_addr_t msix_table_addr;
> +    uint16_t msix_max;
> +    MemoryRegion mmio;
> +    char *configfd_name;

const? Not if this would mean more casts.

> +    int32_t bootindex;
> +} AssignedDevice;
> +
> +static void assigned_dev_update_irq_routing(PCIDevice *dev);
> +
> +static void assigned_dev_load_option_rom(AssignedDevice *dev);
> +
> +static void assigned_dev_unregister_msix_mmio(AssignedDevice *dev);
> +
> +static uint64_t assigned_dev_ioport_rw(AssignedDevRegion *dev_region,
> +                                       target_phys_addr_t addr, int size,
> +                                       uint64_t *data)
> +{
> +    uint64_t val = 0;
> +    int fd = dev_region->region->resource_fd;
> +
> +    if (fd >= 0) {
> +        if (data) {
> +            DEBUG("pwrite data=%lx, size=%d, e_phys=%lx, addr=%lx\n",

PRIx64 for data, TARGET_FMT_Plx for e_phys and addr.

> +                  *data, size, addr, addr);
> +            if (pwrite(fd, data, size, addr) != size) {
> +                fprintf(stderr, "%s - pwrite failed %s\n",
> +                        __func__, strerror(errno));
> +            }
> +        } else {
> +            if (pread(fd, &val, size, addr) != size) {
> +                fprintf(stderr, "%s - pread failed %s\n",
> +                        __func__, strerror(errno));
> +                val = (1UL << (size * 8)) - 1;
> +            }
> +            DEBUG("pread val=%lx, size=%d, e_phys=%lx, addr=%lx\n",

Also here. Please fix all DEBUG statements.

> +                  val, size, addr, addr);
> +        }
> +    } else {
> +        uint32_t port = addr + dev_region->u.r_baseport;
> +
> +        if (data) {
> +            DEBUG("out data=%lx, size=%d, e_phys=%lx, host=%x\n",
> +                  *data, size, addr, port);
> +            switch (size) {
> +            case 1:
> +                outb(*data, port);
> +                break;
> +            case 2:
> +                outw(*data, port);
> +                break;
> +            case 4:
> +                outl(*data, port);
> +                break;

Maybe add case 8: and default: with abort(), also below.

> +            }
> +        } else {
> +            switch (size) {
> +            case 1:
> +                val = inb(port);
> +                break;
> +            case 2:
> +                val = inw(port);
> +                break;
> +            case 4:
> +                val = inl(port);
> +                break;
> +            }
> +            DEBUG("in data=%lx, size=%d, e_phys=%lx, host=%x\n",
> +                  val, size, addr, port);
> +        }
> +    }
> +    return val;
> +}
> +
> +static void assigned_dev_ioport_write(void *opaque, target_phys_addr_t addr,
> +                                      uint64_t data, unsigned size)
> +{
> +    assigned_dev_ioport_rw(opaque, addr, size, &data);
> +}
> +
> +static uint64_t assigned_dev_ioport_read(void *opaque,
> +                                         target_phys_addr_t addr, unsigned size)
> +{
> +    return assigned_dev_ioport_rw(opaque, addr, size, NULL);
> +}
> +
> +static uint32_t slow_bar_readb(void *opaque, target_phys_addr_t addr)
> +{
> +    AssignedDevRegion *d = opaque;
> +    uint8_t *in = d->u.r_virtbase + addr;

Don't perform arithmetic with void pointers.

> +    uint32_t r;
> +
> +    r = *in;
> +    DEBUG("slow_bar_readl addr=0x" TARGET_FMT_plx " val=0x%08x\n", addr, r);
> +
> +    return r;
> +}
> +
> +static uint32_t slow_bar_readw(void *opaque, target_phys_addr_t addr)
> +{
> +    AssignedDevRegion *d = opaque;
> +    uint16_t *in = d->u.r_virtbase + addr;
> +    uint32_t r;
> +
> +    r = *in;
> +    DEBUG("slow_bar_readl addr=0x" TARGET_FMT_plx " val=0x%08x\n", addr, r);
> +
> +    return r;
> +}
> +
> +static uint32_t slow_bar_readl(void *opaque, target_phys_addr_t addr)
> +{
> +    AssignedDevRegion *d = opaque;
> +    uint32_t *in = d->u.r_virtbase + addr;
> +    uint32_t r;
> +
> +    r = *in;
> +    DEBUG("slow_bar_readl addr=0x" TARGET_FMT_plx " val=0x%08x\n", addr, r);
> +
> +    return r;
> +}
> +
> +static void slow_bar_writeb(void *opaque, target_phys_addr_t addr, uint32_t val)
> +{
> +    AssignedDevRegion *d = opaque;
> +    uint8_t *out = d->u.r_virtbase + addr;
> +
> +    DEBUG("slow_bar_writeb addr=0x" TARGET_FMT_plx " val=0x%02x\n", addr, val);
> +    *out = val;
> +}
> +
> +static void slow_bar_writew(void *opaque, target_phys_addr_t addr, uint32_t val)
> +{
> +    AssignedDevRegion *d = opaque;
> +    uint16_t *out = d->u.r_virtbase + addr;
> +
> +    DEBUG("slow_bar_writew addr=0x" TARGET_FMT_plx " val=0x%04x\n", addr, val);
> +    *out = val;
> +}
> +
> +static void slow_bar_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
> +{
> +    AssignedDevRegion *d = opaque;
> +    uint32_t *out = d->u.r_virtbase + addr;
> +
> +    DEBUG("slow_bar_writel addr=0x" TARGET_FMT_plx " val=0x%08x\n", addr, val);
> +    *out = val;
> +}
> +
> +static const MemoryRegionOps slow_bar_ops = {
> +    .old_mmio = {
> +        .read = { slow_bar_readb, slow_bar_readw, slow_bar_readl, },
> +        .write = { slow_bar_writeb, slow_bar_writew, slow_bar_writel, },
> +    },
> +    .endianness = DEVICE_NATIVE_ENDIAN,
> +};
> +
> +static void assigned_dev_iomem_setup(PCIDevice *pci_dev, int region_num,
> +                                     pcibus_t e_size)
> +{
> +    AssignedDevice *r_dev = DO_UPCAST(AssignedDevice, dev, pci_dev);
> +    AssignedDevRegion *region = &r_dev->v_addrs[region_num];
> +    PCIRegion *real_region = &r_dev->real_device.regions[region_num];
> +
> +    if (e_size > 0) {
> +        memory_region_init(&region->container, "assigned-dev-container",
> +                           e_size);
> +        memory_region_add_subregion(&region->container, 0, &region->real_iomem);
> +
> +        /* deal with MSI-X MMIO page */
> +        if (real_region->base_addr <= r_dev->msix_table_addr &&
> +                real_region->base_addr + real_region->size >
> +                r_dev->msix_table_addr) {
> +            int offset = r_dev->msix_table_addr - real_region->base_addr;
> +
> +            memory_region_add_subregion_overlap(&region->container,
> +                                                offset,
> +                                                &r_dev->mmio,
> +                                                1);
> +        }
> +    }
> +}
> +
> +static const MemoryRegionOps assigned_dev_ioport_ops = {
> +    .read = assigned_dev_ioport_read,
> +    .write = assigned_dev_ioport_write,
> +    .endianness = DEVICE_NATIVE_ENDIAN,
> +};
> +
> +static void assigned_dev_ioport_setup(PCIDevice *pci_dev, int region_num,
> +                                      pcibus_t size)
> +{
> +    AssignedDevice *r_dev = DO_UPCAST(AssignedDevice, dev, pci_dev);
> +    AssignedDevRegion *region = &r_dev->v_addrs[region_num];
> +
> +    region->e_size = size;
> +    memory_region_init(&region->container, "assigned-dev-container", size);
> +    memory_region_init_io(&region->real_iomem, &assigned_dev_ioport_ops,
> +                          r_dev->v_addrs + region_num,
> +                          "assigned-dev-iomem", size);
> +    memory_region_add_subregion(&region->container, 0, &region->real_iomem);
> +}
> +
> +static uint32_t assigned_dev_pci_read(PCIDevice *d, int pos, int len)
> +{
> +    AssignedDevice *pci_dev = DO_UPCAST(AssignedDevice, dev, d);
> +    uint32_t val;
> +    ssize_t ret;
> +    int fd = pci_dev->real_device.config_fd;
> +
> +again:
> +    ret = pread(fd, &val, len, pos);
> +    if (ret != len) {
> +        if ((ret < 0) && (errno == EINTR || errno == EAGAIN)) {
> +            goto again;
> +        }
> +
> +        hw_error("pci read failed, ret = %zd errno = %d\n", ret, errno);
> +    }
> +
> +    return val;
> +}
> +
> +static uint8_t assigned_dev_pci_read_byte(PCIDevice *d, int pos)
> +{
> +    return (uint8_t)assigned_dev_pci_read(d, pos, 1);
> +}
> +
> +static void assigned_dev_pci_write(PCIDevice *d, int pos, uint32_t val, int len)
> +{
> +    AssignedDevice *pci_dev = DO_UPCAST(AssignedDevice, dev, d);
> +    ssize_t ret;
> +    int fd = pci_dev->real_device.config_fd;
> +
> +again:
> +    ret = pwrite(fd, &val, len, pos);
> +    if (ret != len) {
> +        if ((ret < 0) && (errno == EINTR || errno == EAGAIN)) {
> +            goto again;
> +        }
> +
> +        hw_error("pci write failed, ret = %zd errno = %d\n", ret, errno);
> +    }
> +}
> +
> +static void assigned_dev_emulate_config_read(AssignedDevice *dev,
> +                                             uint32_t offset, uint32_t len)
> +{
> +    memset(dev->emulate_config_read + offset, 0xff, len);
> +}
> +
> +static void assigned_dev_direct_config_read(AssignedDevice *dev,
> +                                            uint32_t offset, uint32_t len)
> +{
> +    memset(dev->emulate_config_read + offset, 0, len);
> +}
> +
> +static void assigned_dev_direct_config_write(AssignedDevice *dev,
> +                                             uint32_t offset, uint32_t len)
> +{
> +    memset(dev->emulate_config_write + offset, 0, len);
> +}
> +
> +static uint8_t pci_find_cap_offset(PCIDevice *d, uint8_t cap, uint8_t start)
> +{
> +    int id;
> +    int max_cap = 48;
> +    int pos = start ? start : PCI_CAPABILITY_LIST;
> +    int status;
> +
> +    status = assigned_dev_pci_read_byte(d, PCI_STATUS);
> +    if ((status & PCI_STATUS_CAP_LIST) == 0) {
> +        return 0;
> +    }
> +
> +    while (max_cap--) {
> +        pos = assigned_dev_pci_read_byte(d, pos);
> +        if (pos < 0x40) {
> +            break;
> +        }
> +
> +        pos &= ~3;
> +        id = assigned_dev_pci_read_byte(d, pos + PCI_CAP_LIST_ID);
> +
> +        if (id == 0xff) {
> +            break;
> +        }
> +        if (id == cap) {
> +            return pos;
> +        }
> +
> +        pos += PCI_CAP_LIST_NEXT;
> +    }
> +    return 0;
> +}
> +
> +static int assigned_dev_register_regions(PCIRegion *io_regions,
> +                                         unsigned long regions_num,
> +                                         AssignedDevice *pci_dev)
> +{
> +    uint32_t i;
> +    PCIRegion *cur_region = io_regions;
> +
> +    for (i = 0; i < regions_num; i++, cur_region++) {
> +        if (!cur_region->valid) {
> +            continue;
> +        }
> +
> +        /* handle memory io regions */
> +        if (cur_region->type & IORESOURCE_MEM) {
> +            int t = cur_region->type & IORESOURCE_PREFETCH
> +                ? PCI_BASE_ADDRESS_MEM_PREFETCH
> +                : PCI_BASE_ADDRESS_SPACE_MEMORY;
> +
> +            /* map physical memory */
> +            pci_dev->v_addrs[i].u.r_virtbase = mmap(NULL, cur_region->size,
> +                                                    PROT_WRITE | PROT_READ,
> +                                                    MAP_SHARED,
> +                                                    cur_region->resource_fd,
> +                                                    (off_t)0);
> +
> +            if (pci_dev->v_addrs[i].u.r_virtbase == MAP_FAILED) {
> +                pci_dev->v_addrs[i].u.r_virtbase = NULL;
> +                fprintf(stderr, "%s: Error: Couldn't mmap 0x%x!"
> +                        "\n", __func__,
> +                        (uint32_t) (cur_region->base_addr));

Why truncate with cast?

> +                return -1;
> +            }
> +
> +            pci_dev->v_addrs[i].r_size = cur_region->size;
> +            pci_dev->v_addrs[i].e_size = 0;
> +
> +            /* add offset */
> +            pci_dev->v_addrs[i].u.r_virtbase +=
> +                (cur_region->base_addr & 0xFFF);
> +
> +            if (cur_region->size & 0xFFF) {
> +                fprintf(stderr, "PCI region %d at address 0x%llx "

PRIx64 ...

> +                        "has size 0x%x, which is not a multiple of 4K. "
> +                        "You might experience some performance hit "
> +                        "due to that.\n",
> +                        i, (unsigned long long)cur_region->base_addr,

... and remove cast.

> +                        cur_region->size);
> +                memory_region_init_io(&pci_dev->v_addrs[i].real_iomem,
> +                                      &slow_bar_ops, &pci_dev->v_addrs[i],
> +                                      "assigned-dev-slow-bar",
> +                                      cur_region->size);
> +            } else {
> +                void *virtbase = pci_dev->v_addrs[i].u.r_virtbase;
> +                char name[32];
> +                snprintf(name, sizeof(name), "%s.bar%d",
> +                         object_get_typename(OBJECT(pci_dev)), i);
> +                memory_region_init_ram_ptr(&pci_dev->v_addrs[i].real_iomem,
> +                                           name, cur_region->size,
> +                                           virtbase);
> +                vmstate_register_ram(&pci_dev->v_addrs[i].real_iomem,
> +                                     &pci_dev->dev.qdev);
> +            }
> +
> +            assigned_dev_iomem_setup(&pci_dev->dev, i, cur_region->size);
> +            pci_register_bar((PCIDevice *) pci_dev, i, t,
> +                             &pci_dev->v_addrs[i].container);
> +            continue;
> +        } else {
> +            /* handle port io regions */
> +            uint32_t val;
> +            int ret;
> +
> +            /* Test kernel support for ioport resource read/write.  Old
> +             * kernels return EIO.  New kernels only allow 1/2/4 byte reads
> +             * so should return EINVAL for a 3 byte read */
> +            ret = pread(pci_dev->v_addrs[i].region->resource_fd, &val, 3, 0);
> +            if (ret >= 0) {
> +                fprintf(stderr, "Unexpected return from I/O port read: %d\n",
> +                        ret);
> +                abort();
> +            } else if (errno != EINVAL) {
> +                fprintf(stderr, "Kernel doesn't support ioport resource "
> +                                "access, hiding this region.\n");
> +                close(pci_dev->v_addrs[i].region->resource_fd);
> +                cur_region->valid = 0;
> +                continue;
> +            }
> +
> +            pci_dev->v_addrs[i].u.r_baseport = cur_region->base_addr;
> +            pci_dev->v_addrs[i].r_size = cur_region->size;
> +            pci_dev->v_addrs[i].e_size = 0;
> +
> +            assigned_dev_ioport_setup(&pci_dev->dev, i, cur_region->size);
> +            pci_register_bar((PCIDevice *) pci_dev, i,
> +                             PCI_BASE_ADDRESS_SPACE_IO,
> +                             &pci_dev->v_addrs[i].container);
> +        }
> +    }
> +
> +    /* success */
> +    return 0;
> +}
> +
> +static int get_real_id(const char *devpath, const char *idname, uint16_t *val)
> +{
> +    FILE *f;
> +    char name[128];
> +    long id;
> +
> +    snprintf(name, sizeof(name), "%s%s", devpath, idname);
> +    f = fopen(name, "r");
> +    if (f == NULL) {
> +        fprintf(stderr, "%s: %s: %m\n", __func__, name);
> +        return -1;
> +    }
> +    if (fscanf(f, "%li\n", &id) == 1) {
> +        *val = id;
> +    } else {
> +        return -1;
> +    }
> +    fclose(f);
> +
> +    return 0;
> +}
> +
> +static int get_real_vendor_id(const char *devpath, uint16_t *val)
> +{
> +    return get_real_id(devpath, "vendor", val);
> +}
> +
> +static int get_real_device_id(const char *devpath, uint16_t *val)
> +{
> +    return get_real_id(devpath, "device", val);
> +}
> +
> +static int get_real_device(AssignedDevice *pci_dev, uint16_t r_seg,
> +                           uint8_t r_bus, uint8_t r_dev, uint8_t r_func)
> +{
> +    char dir[128], name[128];
> +    int fd, r = 0, v;
> +    FILE *f;
> +    unsigned long long start, end, size, flags;

uint64_t

> +    uint16_t id;
> +    PCIRegion *rp;
> +    PCIDevRegions *dev = &pci_dev->real_device;
> +
> +    dev->region_number = 0;
> +
> +    snprintf(dir, sizeof(dir), "/sys/bus/pci/devices/%04x:%02x:%02x.%x/",
> +             r_seg, r_bus, r_dev, r_func);
> +
> +    snprintf(name, sizeof(name), "%sconfig", dir);
> +
> +    if (pci_dev->configfd_name && *pci_dev->configfd_name) {
> +        if (qemu_isdigit(pci_dev->configfd_name[0])) {
> +            dev->config_fd = strtol(pci_dev->configfd_name, NULL, 0);
> +        } else {
> +            dev->config_fd = monitor_get_fd(cur_mon, pci_dev->configfd_name);
> +            if (dev->config_fd < 0) {
> +                fprintf(stderr, "%s: (%s) unkown\n", __func__,
> +                        pci_dev->configfd_name);
> +                return 1;
> +            }
> +        }
> +    } else {
> +        dev->config_fd = open(name, O_RDWR);
> +
> +        if (dev->config_fd == -1) {
> +            fprintf(stderr, "%s: %s: %m\n", __func__, name);
> +            return 1;
> +        }
> +    }
> +again:
> +    r = read(dev->config_fd, pci_dev->dev.config,
> +             pci_config_size(&pci_dev->dev));
> +    if (r < 0) {
> +        if (errno == EINTR || errno == EAGAIN) {
> +            goto again;
> +        }
> +        fprintf(stderr, "%s: read failed, errno = %d\n", __func__, errno);
> +    }
> +
> +    /* Restore or clear multifunction, this is always controlled by qemu */
> +    if (pci_dev->dev.cap_present & QEMU_PCI_CAP_MULTIFUNCTION) {
> +        pci_dev->dev.config[PCI_HEADER_TYPE] |= PCI_HEADER_TYPE_MULTI_FUNCTION;
> +    } else {
> +        pci_dev->dev.config[PCI_HEADER_TYPE] &= ~PCI_HEADER_TYPE_MULTI_FUNCTION;
> +    }
> +
> +    /* Clear host resource mapping info.  If we choose not to register a
> +     * BAR, such as might be the case with the option ROM, we can get
> +     * confusing, unwritable, residual addresses from the host here. */
> +    memset(&pci_dev->dev.config[PCI_BASE_ADDRESS_0], 0, 24);
> +    memset(&pci_dev->dev.config[PCI_ROM_ADDRESS], 0, 4);
> +
> +    snprintf(name, sizeof(name), "%sresource", dir);
> +
> +    f = fopen(name, "r");
> +    if (f == NULL) {
> +        fprintf(stderr, "%s: %s: %m\n", __func__, name);
> +        return 1;
> +    }
> +
> +    for (r = 0; r < PCI_ROM_SLOT; r++) {
> +        if (fscanf(f, "%lli %lli %lli\n", &start, &end, &flags) != 3) {

SCNi64

> +            break;
> +        }
> +
> +        rp = dev->regions + r;
> +        rp->valid = 0;
> +        rp->resource_fd = -1;
> +        size = end - start + 1;
> +        flags &= IORESOURCE_IO | IORESOURCE_MEM | IORESOURCE_PREFETCH;
> +        if (size == 0 || (flags & ~IORESOURCE_PREFETCH) == 0) {
> +            continue;
> +        }
> +        if (flags & IORESOURCE_MEM) {
> +            flags &= ~IORESOURCE_IO;
> +        } else {
> +            flags &= ~IORESOURCE_PREFETCH;
> +        }
> +        snprintf(name, sizeof(name), "%sresource%d", dir, r);
> +        fd = open(name, O_RDWR);
> +        if (fd == -1) {
> +            continue;
> +        }
> +        rp->resource_fd = fd;
> +
> +        rp->type = flags;
> +        rp->valid = 1;
> +        rp->base_addr = start;
> +        rp->size = size;
> +        pci_dev->v_addrs[r].region = rp;
> +        DEBUG("region %d size %d start 0x%llx type %d resource_fd %d\n",
> +              r, rp->size, start, rp->type, rp->resource_fd);
> +    }
> +
> +    fclose(f);
> +
> +    /* read and fill vendor ID */
> +    v = get_real_vendor_id(dir, &id);
> +    if (v) {
> +        return 1;
> +    }
> +    pci_dev->dev.config[0] = id & 0xff;
> +    pci_dev->dev.config[1] = (id & 0xff00) >> 8;
> +
> +    /* read and fill device ID */
> +    v = get_real_device_id(dir, &id);
> +    if (v) {
> +        return 1;
> +    }
> +    pci_dev->dev.config[2] = id & 0xff;
> +    pci_dev->dev.config[3] = (id & 0xff00) >> 8;
> +
> +    pci_word_test_and_clear_mask(pci_dev->emulate_config_write + PCI_COMMAND,
> +                                 PCI_COMMAND_MASTER | PCI_COMMAND_INTX_DISABLE);
> +
> +    dev->region_number = r;
> +    return 0;
> +}

Pretty long function, how about refactoring?

> +
> +static void free_msi_virqs(AssignedDevice *dev)
> +{
> +    int i;
> +
> +    for (i = 0; i < dev->msi_virq_nr; i++) {
> +        if (dev->msi_virq[i] >= 0) {
> +            kvm_irqchip_release_virq(kvm_state, dev->msi_virq[i]);
> +            dev->msi_virq[i] = -1;
> +        }
> +    }
> +    g_free(dev->msi_virq);
> +    dev->msi_virq = NULL;
> +    dev->msi_virq_nr = 0;
> +}
> +
> +static void free_assigned_device(AssignedDevice *dev)
> +{
> +    int i;
> +
> +    if (dev->cap.available & ASSIGNED_DEVICE_CAP_MSIX) {
> +        assigned_dev_unregister_msix_mmio(dev);
> +    }
> +    for (i = 0; i < dev->real_device.region_number; i++) {
> +        PCIRegion *pci_region = &dev->real_device.regions[i];
> +        AssignedDevRegion *region = &dev->v_addrs[i];
> +
> +        if (!pci_region->valid) {
> +            continue;
> +        }
> +        if (pci_region->type & IORESOURCE_IO) {
> +            if (region->u.r_baseport) {
> +                memory_region_del_subregion(&region->container,
> +                                            &region->real_iomem);
> +                memory_region_destroy(&region->real_iomem);
> +                memory_region_destroy(&region->container);
> +            }
> +        } else if (pci_region->type & IORESOURCE_MEM) {
> +            if (region->u.r_virtbase) {
> +                memory_region_del_subregion(&region->container,
> +                                            &region->real_iomem);
> +
> +                /* Remove MSI-X table subregion */
> +                if (pci_region->base_addr <= dev->msix_table_addr &&
> +                    pci_region->base_addr + pci_region->size >
> +                    dev->msix_table_addr) {
> +                    memory_region_del_subregion(&region->container,
> +                                                &dev->mmio);
> +                }
> +
> +                memory_region_destroy(&region->real_iomem);
> +                memory_region_destroy(&region->container);
> +                if (munmap(region->u.r_virtbase,
> +                           (pci_region->size + 0xFFF) & 0xFFFFF000)) {
> +                    fprintf(stderr,
> +                            "Failed to unmap assigned device region: %s\n",
> +                            strerror(errno));
> +                }
> +            }
> +        }
> +        if (pci_region->resource_fd >= 0) {
> +            close(pci_region->resource_fd);
> +        }
> +    }
> +
> +    if (dev->real_device.config_fd >= 0) {
> +        close(dev->real_device.config_fd);
> +    }
> +
> +    free_msi_virqs(dev);
> +}
> +
> +static void assign_failed_examine(AssignedDevice *dev)
> +{
> +    char name[PATH_MAX], dir[PATH_MAX], driver[PATH_MAX] = {}, *ns;
> +    uint16_t vendor_id, device_id;
> +    int r;
> +
> +    sprintf(dir, "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/",
> +            dev->host.domain, dev->host.bus, dev->host.slot,
> +            dev->host.function);

Please use snprintf(), also below.

> +
> +    sprintf(name, "%sdriver", dir);
> +
> +    r = readlink(name, driver, sizeof(driver));
> +    if ((r <= 0) || r >= sizeof(driver)) {
> +        goto fail;
> +    }
> +
> +    ns = strrchr(driver, '/');
> +    if (!ns) {
> +        goto fail;
> +    }
> +
> +    ns++;
> +
> +    if (get_real_vendor_id(dir, &vendor_id) ||
> +        get_real_device_id(dir, &device_id)) {
> +        goto fail;
> +    }
> +
> +    fprintf(stderr, "*** The driver '%s' is occupying your device "
> +                    "%04x:%02x:%02x.%x.\n",
> +            ns, dev->host.domain, dev->host.bus, dev->host.slot,
> +            dev->host.function);
> +    fprintf(stderr, "***\n");
> +    fprintf(stderr, "*** You can try the following commands to free it:\n");
> +    fprintf(stderr, "***\n");
> +    fprintf(stderr, "*** $ echo \"%04x %04x\" > /sys/bus/pci/drivers/pci-stub/"
> +                    "new_id\n", vendor_id, device_id);
> +    fprintf(stderr, "*** $ echo \"%04x:%02x:%02x.%x\" > /sys/bus/pci/drivers/"
> +                    "%s/unbind\n",
> +            dev->host.domain, dev->host.bus, dev->host.slot,
> +            dev->host.function, ns);
> +    fprintf(stderr, "*** $ echo \"%04x:%02x:%02x.%x\" > /sys/bus/pci/drivers/"
> +                    "pci-stub/bind\n",
> +            dev->host.domain, dev->host.bus, dev->host.slot,
> +            dev->host.function);
> +    fprintf(stderr, "*** $ echo \"%04x %04x\" > /sys/bus/pci/drivers/pci-stub"
> +                    "/remove_id\n", vendor_id, device_id);
> +    fprintf(stderr, "***\n");
> +
> +    return;
> +
> +fail:
> +    fprintf(stderr, "Couldn't find out why.\n");
> +}
> +
> +static int assign_device(AssignedDevice *dev)
> +{
> +    uint32_t flags = KVM_DEV_ASSIGN_ENABLE_IOMMU;
> +    int r;
> +
> +    /* Only pass non-zero PCI segment to capable module */
> +    if (!kvm_check_extension(kvm_state, KVM_CAP_PCI_SEGMENT) &&
> +        dev->host.domain) {
> +        fprintf(stderr, "Can't assign device inside non-zero PCI segment "
> +                "as this KVM module doesn't support it.\n");
> +        return -ENODEV;
> +    }
> +
> +    if (!kvm_check_extension(kvm_state, KVM_CAP_IOMMU)) {
> +        fprintf(stderr, "No IOMMU found.  Unable to assign device \"%s\"\n",
> +                dev->dev.qdev.id);
> +        return -ENODEV;
> +    }
> +
> +    if (dev->features & ASSIGNED_DEVICE_SHARE_INTX_MASK &&
> +        kvm_has_intx_set_mask()) {
> +        flags |= KVM_DEV_ASSIGN_PCI_2_3;
> +    }
> +
> +    r = kvm_device_pci_assign(kvm_state, &dev->host, flags, &dev->dev_id);
> +    if (r < 0) {
> +        fprintf(stderr, "Failed to assign device \"%s\" : %s\n",
> +                dev->dev.qdev.id, strerror(-r));
> +
> +        switch (r) {
> +        case -EBUSY:
> +            assign_failed_examine(dev);
> +            break;
> +        default:
> +            break;
> +        }
> +    }
> +    return r;
> +}
> +
> +static bool check_irqchip_in_kernel(void)
> +{
> +    if (kvm_irqchip_in_kernel()) {
> +        return true;
> +    }
> +    error_report("pci-assign: error: requires KVM with in-kernel irqchip "
> +                 "enabled");
> +    return false;
> +}
> +
> +static int assign_intx(AssignedDevice *dev)
> +{
> +    AssignedIRQType new_type;
> +    PCIINTxRoute intx_route;
> +    bool intx_host_msi;
> +    int r;
> +
> +    /* Interrupt PIN 0 means don't use INTx */
> +    if (assigned_dev_pci_read_byte(&dev->dev, PCI_INTERRUPT_PIN) == 0) {
> +        pci_device_set_intx_routing_notifier(&dev->dev, NULL);
> +        return 0;
> +    }
> +
> +    if (!check_irqchip_in_kernel()) {
> +        return -ENOTSUP;
> +    }
> +
> +    pci_device_set_intx_routing_notifier(&dev->dev,
> +                                         assigned_dev_update_irq_routing);
> +
> +    intx_route = pci_device_route_intx_to_irq(&dev->dev, 0);
> +    assert(intx_route.mode != PCI_INTX_INVERTED);
> +
> +    if (dev->intx_route.mode == intx_route.mode &&
> +        dev->intx_route.irq == intx_route.irq) {
> +        return 0;
> +    }
> +
> +    switch (dev->assigned_irq_type) {
> +    case ASSIGNED_IRQ_INTX_HOST_INTX:
> +    case ASSIGNED_IRQ_INTX_HOST_MSI:
> +        intx_host_msi = dev->assigned_irq_type == ASSIGNED_IRQ_INTX_HOST_MSI;
> +        r = kvm_device_intx_deassign(kvm_state, dev->dev_id, intx_host_msi);
> +        break;
> +    case ASSIGNED_IRQ_MSI:
> +        r = kvm_device_msi_deassign(kvm_state, dev->dev_id);
> +        break;
> +    case ASSIGNED_IRQ_MSIX:
> +        r = kvm_device_msix_deassign(kvm_state, dev->dev_id);
> +        break;
> +    default:
> +        r = 0;
> +        break;
> +    }
> +    if (r) {
> +        perror("assign_intx: deassign");

This error message is not very clear.

> +    }
> +    dev->assigned_irq_type = ASSIGNED_IRQ_NONE;
> +
> +    if (intx_route.mode == PCI_INTX_DISABLED) {
> +        dev->intx_route = intx_route;
> +        return 0;
> +    }
> +
> +retry:
> +    if (dev->features & ASSIGNED_DEVICE_PREFER_MSI_MASK &&
> +        dev->cap.available & ASSIGNED_DEVICE_CAP_MSI) {
> +        intx_host_msi = true;
> +        new_type = ASSIGNED_IRQ_INTX_HOST_MSI;
> +    } else {
> +        intx_host_msi = false;
> +        new_type = ASSIGNED_IRQ_INTX_HOST_INTX;
> +    }
> +
> +    r = kvm_device_intx_assign(kvm_state, dev->dev_id, intx_host_msi,
> +                               intx_route.irq);
> +    if (r < 0) {
> +        if (r == -EIO && !(dev->features & ASSIGNED_DEVICE_PREFER_MSI_MASK) &&
> +            dev->cap.available & ASSIGNED_DEVICE_CAP_MSI) {
> +            /* Retry with host-side MSI. There might be an IRQ conflict and
> +             * either the kernel or the device doesn't support sharing. */
> +            fprintf(stderr,
> +                    "Host-side INTx sharing not supported, "
> +                    "using MSI instead.\n"
> +                    "Some devices do not to work properly in this mode.\n");
> +            dev->features |= ASSIGNED_DEVICE_PREFER_MSI_MASK;
> +            goto retry;
> +        }
> +        fprintf(stderr, "Failed to assign irq for \"%s\": %s\n",
> +                dev->dev.qdev.id, strerror(-r));
> +        fprintf(stderr, "Perhaps you are assigning a device "
> +                "that shares an IRQ with another device?\n");
> +        return r;
> +    }
> +
> +    dev->intx_route = intx_route;
> +    dev->assigned_irq_type = new_type;
> +    return r;
> +}
> +
> +static void deassign_device(AssignedDevice *dev)
> +{
> +    int r;
> +
> +    r = kvm_device_pci_deassign(kvm_state, dev->dev_id);
> +    if (r < 0) {
> +        fprintf(stderr, "Failed to deassign device \"%s\" : %s\n",
> +                dev->dev.qdev.id, strerror(-r));
> +    }
> +}
> +
> +/* The pci config space got updated. Check if irq numbers have changed
> + * for our devices
> + */
> +static void assigned_dev_update_irq_routing(PCIDevice *dev)
> +{
> +    AssignedDevice *assigned_dev = DO_UPCAST(AssignedDevice, dev, dev);
> +    Error *err = NULL;
> +    int r;
> +
> +    r = assign_intx(assigned_dev);
> +    if (r < 0) {
> +        qdev_unplug(&dev->qdev, &err);
> +        assert(!err);
> +    }
> +}
> +
> +static void assigned_dev_update_msi(PCIDevice *pci_dev)
> +{
> +    AssignedDevice *assigned_dev = DO_UPCAST(AssignedDevice, dev, pci_dev);
> +    uint8_t ctrl_byte = pci_get_byte(pci_dev->config + pci_dev->msi_cap +
> +                                     PCI_MSI_FLAGS);
> +    int r;
> +
> +    /* Some guests gratuitously disable MSI even if they're not using it,
> +     * try to catch this by only deassigning irqs if the guest is using
> +     * MSI or intends to start. */
> +    if (assigned_dev->assigned_irq_type == ASSIGNED_IRQ_MSI ||
> +        (ctrl_byte & PCI_MSI_FLAGS_ENABLE)) {
> +        r = kvm_device_msi_deassign(kvm_state, assigned_dev->dev_id);
> +        /* -ENXIO means no assigned irq */
> +        if (r && r != -ENXIO) {
> +            perror("assigned_dev_update_msi: deassign irq");
> +        }
> +
> +        free_msi_virqs(assigned_dev);
> +
> +        assigned_dev->assigned_irq_type = ASSIGNED_IRQ_NONE;
> +        pci_device_set_intx_routing_notifier(pci_dev, NULL);
> +    }
> +
> +    if (ctrl_byte & PCI_MSI_FLAGS_ENABLE) {
> +        uint8_t *pos = pci_dev->config + pci_dev->msi_cap;
> +        MSIMessage msg;
> +        int virq;
> +
> +        msg.address = pci_get_long(pos + PCI_MSI_ADDRESS_LO);
> +        msg.data = pci_get_word(pos + PCI_MSI_DATA_32);
> +        virq = kvm_irqchip_add_msi_route(kvm_state, msg);
> +        if (virq < 0) {
> +            perror("assigned_dev_update_msi: kvm_irqchip_add_msi_route");
> +            return;
> +        }
> +
> +        assigned_dev->msi_virq = g_malloc(sizeof(*assigned_dev->msi_virq));

Is this ever freed?

> +        assigned_dev->msi_virq_nr = 1;
> +        assigned_dev->msi_virq[0] = virq;
> +        if (kvm_device_msi_assign(kvm_state, assigned_dev->dev_id, virq) < 0) {
> +            perror("assigned_dev_update_msi: kvm_device_msi_assign");
> +        }
> +
> +        assigned_dev->intx_route.mode = PCI_INTX_DISABLED;
> +        assigned_dev->intx_route.irq = -1;
> +        assigned_dev->assigned_irq_type = ASSIGNED_IRQ_MSI;
> +    } else {
> +        assign_intx(assigned_dev);
> +    }
> +}
> +
> +static bool msix_masked(MSIXTableEntry *entry)
> +{
> +    return (entry->ctrl & cpu_to_le32(0x1)) != 0;
> +}
> +
> +static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev)
> +{
> +    AssignedDevice *adev = DO_UPCAST(AssignedDevice, dev, pci_dev);
> +    uint16_t entries_nr = 0;
> +    int i, r = 0;
> +    MSIXTableEntry *entry = adev->msix_table;
> +    MSIMessage msg;
> +
> +    /* Get the usable entry number for allocating */
> +    for (i = 0; i < adev->msix_max; i++, entry++) {
> +        if (msix_masked(entry)) {
> +            continue;
> +        }
> +        entries_nr++;
> +    }
> +
> +    DEBUG("MSI-X entries: %d\n", entries_nr);
> +
> +    /* It's valid to enable MSI-X with all entries masked */
> +    if (!entries_nr) {
> +        return 0;
> +    }
> +
> +    r = kvm_device_msix_init_vectors(kvm_state, adev->dev_id, entries_nr);
> +    if (r != 0) {
> +        fprintf(stderr, "fail to set MSI-X entry number for MSIX! %s\n",
> +                strerror(-r));
> +        return r;
> +    }
> +
> +    free_msi_virqs(adev);
> +
> +    adev->msi_virq_nr = adev->msix_max;
> +    adev->msi_virq = g_malloc(adev->msix_max * sizeof(*adev->msi_virq));
> +
> +    entry = adev->msix_table;
> +    for (i = 0; i < adev->msix_max; i++, entry++) {
> +        adev->msi_virq[i] = -1;
> +
> +        if (msix_masked(entry)) {
> +            continue;
> +        }
> +
> +        msg.address = entry->addr_lo | ((uint64_t)entry->addr_hi << 32);
> +        msg.data = entry->data;
> +        r = kvm_irqchip_add_msi_route(kvm_state, msg);
> +        if (r < 0) {
> +            return r;
> +        }
> +        adev->msi_virq[i] = r;
> +
> +        DEBUG("MSI-X vector %d, gsi %d, addr %08x_%08x, data %08x\n", i,
> +              r, entry->addr_hi, entry->addr_lo, entry->data);
> +
> +        r = kvm_device_msix_set_vector(kvm_state, adev->dev_id, i,
> +                                       adev->msi_virq[i]);
> +        if (r) {
> +            fprintf(stderr, "fail to set MSI-X entry! %s\n", strerror(-r));
> +            break;
> +        }
> +    }
> +
> +    return r;
> +}
> +
> +static void assigned_dev_update_msix(PCIDevice *pci_dev)
> +{
> +    AssignedDevice *assigned_dev = DO_UPCAST(AssignedDevice, dev, pci_dev);
> +    uint16_t ctrl_word = pci_get_word(pci_dev->config + pci_dev->msix_cap +
> +                                      PCI_MSIX_FLAGS);
> +    int r;
> +
> +    /* Some guests gratuitously disable MSIX even if they're not using it,
> +     * try to catch this by only deassigning irqs if the guest is using
> +     * MSIX or intends to start. */
> +    if ((assigned_dev->assigned_irq_type == ASSIGNED_IRQ_MSIX) ||
> +        (ctrl_word & PCI_MSIX_FLAGS_ENABLE)) {
> +        r = kvm_device_msix_deassign(kvm_state, assigned_dev->dev_id);
> +        /* -ENXIO means no assigned irq */
> +        if (r && r != -ENXIO) {
> +            perror("assigned_dev_update_msix: deassign irq");
> +        }
> +
> +        free_msi_virqs(assigned_dev);
> +
> +        assigned_dev->assigned_irq_type = ASSIGNED_IRQ_NONE;
> +        pci_device_set_intx_routing_notifier(pci_dev, NULL);
> +    }
> +
> +    if (ctrl_word & PCI_MSIX_FLAGS_ENABLE) {
> +        if (assigned_dev_update_msix_mmio(pci_dev) < 0) {
> +            perror("assigned_dev_update_msix_mmio");
> +            return;
> +        }
> +
> +        if (assigned_dev->msi_virq_nr > 0) {
> +            if (kvm_device_msix_assign(kvm_state, assigned_dev->dev_id) < 0) {
> +                perror("assigned_dev_enable_msix: assign irq");
> +                return;
> +            }
> +        }
> +        assigned_dev->intx_route.mode = PCI_INTX_DISABLED;
> +        assigned_dev->intx_route.irq = -1;
> +        assigned_dev->assigned_irq_type = ASSIGNED_IRQ_MSIX;
> +    } else {
> +        assign_intx(assigned_dev);
> +    }
> +}
> +
> +static uint32_t assigned_dev_pci_read_config(PCIDevice *pci_dev,
> +                                             uint32_t address, int len)
> +{
> +    AssignedDevice *assigned_dev = DO_UPCAST(AssignedDevice, dev, pci_dev);
> +    uint32_t virt_val = pci_default_read_config(pci_dev, address, len);
> +    uint32_t real_val, emulate_mask, full_emulation_mask;
> +
> +    emulate_mask = 0;
> +    memcpy(&emulate_mask, assigned_dev->emulate_config_read + address, len);
> +    emulate_mask = le32_to_cpu(emulate_mask);
> +
> +    full_emulation_mask = 0xffffffff >> (32 - len * 8);
> +
> +    if (emulate_mask != full_emulation_mask) {
> +        real_val = assigned_dev_pci_read(pci_dev, address, len);
> +        return (virt_val & emulate_mask) | (real_val & ~emulate_mask);
> +    } else {
> +        return virt_val;
> +    }
> +}
> +
> +static void assigned_dev_pci_write_config(PCIDevice *pci_dev, uint32_t address,
> +                                          uint32_t val, int len)
> +{
> +    AssignedDevice *assigned_dev = DO_UPCAST(AssignedDevice, dev, pci_dev);
> +    uint16_t old_cmd = pci_get_word(pci_dev->config + PCI_COMMAND);
> +    uint32_t emulate_mask, full_emulation_mask;
> +    int ret;
> +
> +    pci_default_write_config(pci_dev, address, val, len);
> +
> +    if (kvm_has_intx_set_mask() &&
> +        range_covers_byte(address, len, PCI_COMMAND + 1)) {
> +        bool intx_masked = (pci_get_word(pci_dev->config + PCI_COMMAND) &
> +                            PCI_COMMAND_INTX_DISABLE);
> +
> +        if (intx_masked != !!(old_cmd & PCI_COMMAND_INTX_DISABLE)) {
> +            ret = kvm_device_intx_set_mask(kvm_state, assigned_dev->dev_id,
> +                                           intx_masked);
> +            if (ret) {
> +                perror("assigned_dev_pci_write_config: set intx mask");
> +            }
> +        }
> +    }
> +    if (assigned_dev->cap.available & ASSIGNED_DEVICE_CAP_MSI) {
> +        if (range_covers_byte(address, len,
> +                              pci_dev->msi_cap + PCI_MSI_FLAGS)) {
> +            assigned_dev_update_msi(pci_dev);
> +        }
> +    }
> +    if (assigned_dev->cap.available & ASSIGNED_DEVICE_CAP_MSIX) {
> +        if (range_covers_byte(address, len,
> +                              pci_dev->msix_cap + PCI_MSIX_FLAGS + 1)) {
> +            assigned_dev_update_msix(pci_dev);
> +        }
> +    }
> +
> +    emulate_mask = 0;
> +    memcpy(&emulate_mask, assigned_dev->emulate_config_write + address, len);
> +    emulate_mask = le32_to_cpu(emulate_mask);
> +
> +    full_emulation_mask = 0xffffffff >> (32 - len * 8);
> +
> +    if (emulate_mask != full_emulation_mask) {
> +        if (emulate_mask) {
> +            val &= ~emulate_mask;
> +            val |= assigned_dev_pci_read(pci_dev, address, len) & emulate_mask;
> +        }
> +        assigned_dev_pci_write(pci_dev, address, val, len);
> +    }
> +}
> +
> +static void assigned_dev_setup_cap_read(AssignedDevice *dev, uint32_t offset,
> +                                        uint32_t len)
> +{
> +    assigned_dev_direct_config_read(dev, offset, len);
> +    assigned_dev_emulate_config_read(dev, offset + PCI_CAP_LIST_NEXT, 1);
> +}
> +
> +static int assigned_device_pci_cap_init(PCIDevice *pci_dev)
> +{
> +    AssignedDevice *dev = DO_UPCAST(AssignedDevice, dev, pci_dev);
> +    PCIRegion *pci_region = dev->real_device.regions;
> +    int ret, pos;
> +
> +    /* Clear initial capabilities pointer and status copied from hw */
> +    pci_set_byte(pci_dev->config + PCI_CAPABILITY_LIST, 0);
> +    pci_set_word(pci_dev->config + PCI_STATUS,
> +                 pci_get_word(pci_dev->config + PCI_STATUS) &
> +                 ~PCI_STATUS_CAP_LIST);
> +
> +    /* Expose MSI capability
> +     * MSI capability is the 1st capability in capability config */
> +    pos = pci_find_cap_offset(pci_dev, PCI_CAP_ID_MSI, 0);
> +    if (pos != 0 && kvm_check_extension(kvm_state, KVM_CAP_ASSIGN_DEV_IRQ)) {
> +        if (!check_irqchip_in_kernel()) {
> +            return -ENOTSUP;
> +        }
> +        dev->cap.available |= ASSIGNED_DEVICE_CAP_MSI;
> +        /* Only 32-bit/no-mask currently supported */
> +        ret = pci_add_capability(pci_dev, PCI_CAP_ID_MSI, pos, 10);
> +        if (ret < 0) {
> +            return ret;
> +        }
> +        pci_dev->msi_cap = pos;
> +
> +        pci_set_word(pci_dev->config + pos + PCI_MSI_FLAGS,
> +                     pci_get_word(pci_dev->config + pos + PCI_MSI_FLAGS) &
> +                     PCI_MSI_FLAGS_QMASK);
> +        pci_set_long(pci_dev->config + pos + PCI_MSI_ADDRESS_LO, 0);
> +        pci_set_word(pci_dev->config + pos + PCI_MSI_DATA_32, 0);
> +
> +        /* Set writable fields */
> +        pci_set_word(pci_dev->wmask + pos + PCI_MSI_FLAGS,
> +                     PCI_MSI_FLAGS_QSIZE | PCI_MSI_FLAGS_ENABLE);
> +        pci_set_long(pci_dev->wmask + pos + PCI_MSI_ADDRESS_LO, 0xfffffffc);
> +        pci_set_word(pci_dev->wmask + pos + PCI_MSI_DATA_32, 0xffff);
> +    }
> +    /* Expose MSI-X capability */
> +    pos = pci_find_cap_offset(pci_dev, PCI_CAP_ID_MSIX, 0);
> +    if (pos != 0 && kvm_device_msix_supported(kvm_state)) {
> +        int bar_nr;
> +        uint32_t msix_table_entry;
> +
> +        if (!check_irqchip_in_kernel()) {
> +            return -ENOTSUP;
> +        }
> +        dev->cap.available |= ASSIGNED_DEVICE_CAP_MSIX;
> +        ret = pci_add_capability(pci_dev, PCI_CAP_ID_MSIX, pos, 12);
> +        if (ret < 0) {
> +            return ret;
> +        }
> +        pci_dev->msix_cap = pos;
> +
> +        pci_set_word(pci_dev->config + pos + PCI_MSIX_FLAGS,
> +                     pci_get_word(pci_dev->config + pos + PCI_MSIX_FLAGS) &
> +                     PCI_MSIX_FLAGS_QSIZE);
> +
> +        /* Only enable and function mask bits are writable */
> +        pci_set_word(pci_dev->wmask + pos + PCI_MSIX_FLAGS,
> +                     PCI_MSIX_FLAGS_ENABLE | PCI_MSIX_FLAGS_MASKALL);
> +
> +        msix_table_entry = pci_get_long(pci_dev->config + pos + PCI_MSIX_TABLE);
> +        bar_nr = msix_table_entry & PCI_MSIX_FLAGS_BIRMASK;
> +        msix_table_entry &= ~PCI_MSIX_FLAGS_BIRMASK;
> +        dev->msix_table_addr = pci_region[bar_nr].base_addr + msix_table_entry;
> +        dev->msix_max = pci_get_word(pci_dev->config + pos + PCI_MSIX_FLAGS);
> +        dev->msix_max &= PCI_MSIX_FLAGS_QSIZE;
> +        dev->msix_max += 1;
> +    }
> +
> +    /* Minimal PM support, nothing writable, device appears to NAK changes */
> +    pos = pci_find_cap_offset(pci_dev, PCI_CAP_ID_PM, 0);
> +    if (pos) {
> +        uint16_t pmc;
> +
> +        ret = pci_add_capability(pci_dev, PCI_CAP_ID_PM, pos, PCI_PM_SIZEOF);
> +        if (ret < 0) {
> +            return ret;
> +        }
> +
> +        assigned_dev_setup_cap_read(dev, pos, PCI_PM_SIZEOF);
> +
> +        pmc = pci_get_word(pci_dev->config + pos + PCI_CAP_FLAGS);
> +        pmc &= (PCI_PM_CAP_VER_MASK | PCI_PM_CAP_DSI);
> +        pci_set_word(pci_dev->config + pos + PCI_CAP_FLAGS, pmc);
> +
> +        /* assign_device will bring the device up to D0, so we don't need
> +         * to worry about doing that ourselves here. */
> +        pci_set_word(pci_dev->config + pos + PCI_PM_CTRL,
> +                     PCI_PM_CTRL_NO_SOFT_RESET);
> +
> +        pci_set_byte(pci_dev->config + pos + PCI_PM_PPB_EXTENSIONS, 0);
> +        pci_set_byte(pci_dev->config + pos + PCI_PM_DATA_REGISTER, 0);
> +    }
> +
> +    pos = pci_find_cap_offset(pci_dev, PCI_CAP_ID_EXP, 0);
> +    if (pos) {
> +        uint8_t version, size = 0;
> +        uint16_t type, devctl, lnksta;
> +        uint32_t devcap, lnkcap;
> +
> +        version = pci_get_byte(pci_dev->config + pos + PCI_EXP_FLAGS);
> +        version &= PCI_EXP_FLAGS_VERS;
> +        if (version == 1) {
> +            size = 0x14;
> +        } else if (version == 2) {
> +            /*
> +             * Check for non-std size, accept reduced size to 0x34,
> +             * which is what bcm5761 implemented, violating the
> +             * PCIe v3.0 spec that regs should exist and be read as 0,
> +             * not optionally provided and shorten the struct size.
> +             */
> +            size = MIN(0x3c, PCI_CONFIG_SPACE_SIZE - pos);
> +            if (size < 0x34) {
> +                fprintf(stderr,
> +                        "%s: Invalid size PCIe cap-id 0x%x\n",
> +                        __func__, PCI_CAP_ID_EXP);
> +                return -EINVAL;
> +            } else if (size != 0x3c) {
> +                fprintf(stderr,
> +                        "WARNING, %s: PCIe cap-id 0x%x has "
> +                        "non-standard size 0x%x; std size should be 0x3c\n",
> +                         __func__, PCI_CAP_ID_EXP, size);
> +            }
> +        } else if (version == 0) {
> +            uint16_t vid, did;
> +            vid = pci_get_word(pci_dev->config + PCI_VENDOR_ID);
> +            did = pci_get_word(pci_dev->config + PCI_DEVICE_ID);
> +            if (vid == PCI_VENDOR_ID_INTEL && did == 0x10ed) {
> +                /*
> +                 * quirk for Intel 82599 VF with invalid PCIe capability
> +                 * version, should really be version 2 (same as PF)
> +                 */
> +                size = 0x3c;
> +            }
> +        }
> +
> +        if (size == 0) {
> +            fprintf(stderr,
> +                    "%s: Unsupported PCI express capability version %d\n",
> +                    __func__, version);
> +            return -EINVAL;
> +        }
> +
> +        ret = pci_add_capability(pci_dev, PCI_CAP_ID_EXP, pos, size);
> +        if (ret < 0) {
> +            return ret;
> +        }
> +
> +        assigned_dev_setup_cap_read(dev, pos, size);
> +
> +        type = pci_get_word(pci_dev->config + pos + PCI_EXP_FLAGS);
> +        type = (type & PCI_EXP_FLAGS_TYPE) >> 4;
> +        if (type != PCI_EXP_TYPE_ENDPOINT &&
> +            type != PCI_EXP_TYPE_LEG_END && type != PCI_EXP_TYPE_RC_END) {
> +            fprintf(stderr,
> +                    "Device assignment only supports endpoint assignment, "
> +                    "device type %d\n", type);
> +            return -EINVAL;
> +        }
> +
> +        /* capabilities, pass existing read-only copy
> +         * PCI_EXP_FLAGS_IRQ: updated by hardware, should be direct read */
> +
> +        /* device capabilities: hide FLR */
> +        devcap = pci_get_long(pci_dev->config + pos + PCI_EXP_DEVCAP);
> +        devcap &= ~PCI_EXP_DEVCAP_FLR;
> +        pci_set_long(pci_dev->config + pos + PCI_EXP_DEVCAP, devcap);
> +
> +        /* device control: clear all error reporting enable bits, leaving
> +         *                 only a few host values.  Note, these are
> +         *                 all writable, but not passed to hw.
> +         */
> +        devctl = pci_get_word(pci_dev->config + pos + PCI_EXP_DEVCTL);
> +        devctl = (devctl & (PCI_EXP_DEVCTL_READRQ | PCI_EXP_DEVCTL_PAYLOAD)) |
> +                  PCI_EXP_DEVCTL_RELAX_EN | PCI_EXP_DEVCTL_NOSNOOP_EN;
> +        pci_set_word(pci_dev->config + pos + PCI_EXP_DEVCTL, devctl);
> +        devctl = PCI_EXP_DEVCTL_BCR_FLR | PCI_EXP_DEVCTL_AUX_PME;
> +        pci_set_word(pci_dev->wmask + pos + PCI_EXP_DEVCTL, ~devctl);
> +
> +        /* Clear device status */
> +        pci_set_word(pci_dev->config + pos + PCI_EXP_DEVSTA, 0);
> +
> +        /* Link capabilities, expose links and latencues, clear reporting */
> +        lnkcap = pci_get_long(pci_dev->config + pos + PCI_EXP_LNKCAP);
> +        lnkcap &= (PCI_EXP_LNKCAP_SLS | PCI_EXP_LNKCAP_MLW |
> +                   PCI_EXP_LNKCAP_ASPMS | PCI_EXP_LNKCAP_L0SEL |
> +                   PCI_EXP_LNKCAP_L1EL);
> +        pci_set_long(pci_dev->config + pos + PCI_EXP_LNKCAP, lnkcap);
> +
> +        /* Link control, pass existing read-only copy.  Should be writable? */
> +
> +        /* Link status, only expose current speed and width */
> +        lnksta = pci_get_word(pci_dev->config + pos + PCI_EXP_LNKSTA);
> +        lnksta &= (PCI_EXP_LNKSTA_CLS | PCI_EXP_LNKSTA_NLW);
> +        pci_set_word(pci_dev->config + pos + PCI_EXP_LNKSTA, lnksta);
> +
> +        if (version >= 2) {
> +            /* Slot capabilities, control, status - not needed for endpoints */
> +            pci_set_long(pci_dev->config + pos + PCI_EXP_SLTCAP, 0);
> +            pci_set_word(pci_dev->config + pos + PCI_EXP_SLTCTL, 0);
> +            pci_set_word(pci_dev->config + pos + PCI_EXP_SLTSTA, 0);
> +
> +            /* Root control, capabilities, status - not needed for endpoints */
> +            pci_set_word(pci_dev->config + pos + PCI_EXP_RTCTL, 0);
> +            pci_set_word(pci_dev->config + pos + PCI_EXP_RTCAP, 0);
> +            pci_set_long(pci_dev->config + pos + PCI_EXP_RTSTA, 0);
> +
> +            /* Device capabilities/control 2, pass existing read-only copy */
> +            /* Link control 2, pass existing read-only copy */
> +        }
> +    }
> +
> +    pos = pci_find_cap_offset(pci_dev, PCI_CAP_ID_PCIX, 0);
> +    if (pos) {
> +        uint16_t cmd;
> +        uint32_t status;
> +
> +        /* Only expose the minimum, 8 byte capability */
> +        ret = pci_add_capability(pci_dev, PCI_CAP_ID_PCIX, pos, 8);
> +        if (ret < 0) {
> +            return ret;
> +        }
> +
> +        assigned_dev_setup_cap_read(dev, pos, 8);
> +
> +        /* Command register, clear upper bits, including extended modes */
> +        cmd = pci_get_word(pci_dev->config + pos + PCI_X_CMD);
> +        cmd &= (PCI_X_CMD_DPERR_E | PCI_X_CMD_ERO | PCI_X_CMD_MAX_READ |
> +                PCI_X_CMD_MAX_SPLIT);
> +        pci_set_word(pci_dev->config + pos + PCI_X_CMD, cmd);
> +
> +        /* Status register, update with emulated PCI bus location, clear
> +         * error bits, leave the rest. */
> +        status = pci_get_long(pci_dev->config + pos + PCI_X_STATUS);
> +        status &= ~(PCI_X_STATUS_BUS | PCI_X_STATUS_DEVFN);
> +        status |= (pci_bus_num(pci_dev->bus) << 8) | pci_dev->devfn;
> +        status &= ~(PCI_X_STATUS_SPL_DISC | PCI_X_STATUS_UNX_SPL |
> +                    PCI_X_STATUS_SPL_ERR);
> +        pci_set_long(pci_dev->config + pos + PCI_X_STATUS, status);
> +    }
> +
> +    pos = pci_find_cap_offset(pci_dev, PCI_CAP_ID_VPD, 0);
> +    if (pos) {
> +        /* Direct R/W passthrough */
> +        ret = pci_add_capability(pci_dev, PCI_CAP_ID_VPD, pos, 8);
> +        if (ret < 0) {
> +            return ret;
> +        }
> +
> +        assigned_dev_setup_cap_read(dev, pos, 8);
> +
> +        /* direct write for cap content */
> +        assigned_dev_direct_config_write(dev, pos + 2, 6);
> +    }
> +
> +    /* Devices can have multiple vendor capabilities, get them all */
> +    for (pos = 0; (pos = pci_find_cap_offset(pci_dev, PCI_CAP_ID_VNDR, pos));
> +        pos += PCI_CAP_LIST_NEXT) {
> +        uint8_t len = pci_get_byte(pci_dev->config + pos + PCI_CAP_FLAGS);
> +        /* Direct R/W passthrough */
> +        ret = pci_add_capability(pci_dev, PCI_CAP_ID_VNDR, pos, len);
> +        if (ret < 0) {
> +            return ret;
> +        }
> +
> +        assigned_dev_setup_cap_read(dev, pos, len);
> +
> +        /* direct write for cap content */
> +        assigned_dev_direct_config_write(dev, pos + 2, len - 2);
> +    }
> +
> +    /* If real and virtual capability list status bits differ, virtualize the
> +     * access. */
> +    if ((pci_get_word(pci_dev->config + PCI_STATUS) & PCI_STATUS_CAP_LIST) !=
> +        (assigned_dev_pci_read_byte(pci_dev, PCI_STATUS) &
> +         PCI_STATUS_CAP_LIST)) {
> +        dev->emulate_config_read[PCI_STATUS] |= PCI_STATUS_CAP_LIST;
> +    }
> +
> +    return 0;
> +}

Also this function is way too long.

> +
> +static uint64_t msix_mmio_read(void *opaque, target_phys_addr_t addr,
> +                               unsigned size)
> +{
> +    AssignedDevice *adev = opaque;
> +    uint64_t val;
> +
> +    memcpy(&val, (void *)((uint8_t *)adev->msix_table + addr), size);
> +
> +    return val;
> +}
> +
> +static void msix_mmio_write(void *opaque, target_phys_addr_t addr,
> +                            uint64_t val, unsigned size)
> +{
> +    AssignedDevice *adev = opaque;
> +    PCIDevice *pdev = &adev->dev;
> +    uint16_t ctrl;
> +    MSIXTableEntry orig;
> +    int i = addr >> 4;
> +
> +    if (i >= adev->msix_max) {
> +        return; /* Drop write */
> +    }
> +
> +    ctrl = pci_get_word(pdev->config + pdev->msix_cap + PCI_MSIX_FLAGS);
> +
> +    DEBUG("write to MSI-X table offset 0x%lx, val 0x%lx\n", addr, val);
> +
> +    if (ctrl & PCI_MSIX_FLAGS_ENABLE) {
> +        orig = adev->msix_table[i];
> +    }
> +
> +    memcpy((void *)((uint8_t *)adev->msix_table + addr), &val, size);

Cast to void * shouldn't be necessary.

> +
> +    if (ctrl & PCI_MSIX_FLAGS_ENABLE) {
> +        MSIXTableEntry *entry = &adev->msix_table[i];
> +
> +        if (!msix_masked(&orig) && msix_masked(entry)) {
> +            /*
> +             * Vector masked, disable it
> +             *
> +             * XXX It's not clear if we can or should actually attempt
> +             * to mask or disable the interrupt.  KVM doesn't have
> +             * support for pending bits and kvm_assign_set_msix_entry
> +             * doesn't modify the device hardware mask.  Interrupts
> +             * while masked are simply not injected to the guest, so
> +             * are lost.  Can we get away with always injecting an
> +             * interrupt on unmask?
> +             */
> +        } else if (msix_masked(&orig) && !msix_masked(entry)) {
> +            /* Vector unmasked */
> +            if (i >= adev->msi_virq_nr || adev->msi_virq[i] < 0) {
> +                /* Previously unassigned vector, start from scratch */
> +                assigned_dev_update_msix(pdev);
> +                return;
> +            } else {
> +                /* Update an existing, previously masked vector */
> +                MSIMessage msg;
> +                int ret;
> +
> +                msg.address = entry->addr_lo |
> +                    ((uint64_t)entry->addr_hi << 32);
> +                msg.data = entry->data;
> +
> +                ret = kvm_irqchip_update_msi_route(kvm_state,
> +                                                   adev->msi_virq[i], msg);
> +                if (ret) {
> +                    fprintf(stderr,
> +                            "Error updating irq routing entry (%d)\n", ret);
> +                }
> +            }
> +        }
> +    }
> +}
> +
> +static const MemoryRegionOps msix_mmio_ops = {
> +    .read = msix_mmio_read,
> +    .write = msix_mmio_write,
> +    .endianness = DEVICE_NATIVE_ENDIAN,
> +    .valid = {
> +        .min_access_size = 4,
> +        .max_access_size = 8,
> +    },
> +    .impl = {
> +        .min_access_size = 4,
> +        .max_access_size = 8,
> +    },
> +};
> +
> +static void msix_reset(AssignedDevice *dev)
> +{
> +    MSIXTableEntry *entry;
> +    int i;
> +
> +    if (!dev->msix_table) {
> +        return;
> +    }
> +
> +    memset(dev->msix_table, 0, MSIX_PAGE_SIZE);
> +
> +    for (i = 0, entry = dev->msix_table; i < dev->msix_max; i++, entry++) {
> +        entry->ctrl = cpu_to_le32(0x1); /* Masked */
> +    }
> +}
> +
> +static int assigned_dev_register_msix_mmio(AssignedDevice *dev)
> +{
> +    dev->msix_table = mmap(NULL, MSIX_PAGE_SIZE, PROT_READ|PROT_WRITE,
> +                           MAP_ANONYMOUS|MAP_PRIVATE, 0, 0);
> +    if (dev->msix_table == MAP_FAILED) {
> +        fprintf(stderr, "fail allocate msix_table! %s\n", strerror(errno));
> +        return -EFAULT;
> +    }
> +
> +    msix_reset(dev);
> +
> +    memory_region_init_io(&dev->mmio, &msix_mmio_ops, dev,
> +                          "assigned-dev-msix", MSIX_PAGE_SIZE);
> +    return 0;
> +}
> +
> +static void assigned_dev_unregister_msix_mmio(AssignedDevice *dev)
> +{
> +    if (!dev->msix_table) {
> +        return;
> +    }
> +
> +    memory_region_destroy(&dev->mmio);
> +
> +    if (munmap(dev->msix_table, MSIX_PAGE_SIZE) == -1) {
> +        fprintf(stderr, "error unmapping msix_table! %s\n",
> +                strerror(errno));
> +    }
> +    dev->msix_table = NULL;
> +}
> +
> +static const VMStateDescription vmstate_assigned_device = {
> +    .name = "pci-assign",
> +    .unmigratable = 1,
> +};
> +
> +static void reset_assigned_device(DeviceState *dev)
> +{
> +    PCIDevice *pci_dev = DO_UPCAST(PCIDevice, qdev, dev);
> +    AssignedDevice *adev = DO_UPCAST(AssignedDevice, dev, pci_dev);
> +    char reset_file[64];
> +    const char reset[] = "1";
> +    int fd, ret;
> +
> +    /*
> +     * If a guest is reset without being shutdown, MSI/MSI-X can still
> +     * be running.  We want to return the device to a known state on
> +     * reset, so disable those here.  We especially do not want MSI-X
> +     * enabled since it lives in MMIO space, which is about to get
> +     * disabled.
> +     */
> +    if (adev->assigned_irq_type == ASSIGNED_IRQ_MSIX) {
> +        uint16_t ctrl = pci_get_word(pci_dev->config +
> +                                     pci_dev->msix_cap + PCI_MSIX_FLAGS);
> +
> +        pci_set_word(pci_dev->config + pci_dev->msix_cap + PCI_MSIX_FLAGS,
> +                     ctrl & ~PCI_MSIX_FLAGS_ENABLE);
> +        assigned_dev_update_msix(pci_dev);
> +    } else if (adev->assigned_irq_type == ASSIGNED_IRQ_MSI) {
> +        uint8_t ctrl = pci_get_byte(pci_dev->config +
> +                                    pci_dev->msi_cap + PCI_MSI_FLAGS);
> +
> +        pci_set_byte(pci_dev->config + pci_dev->msi_cap + PCI_MSI_FLAGS,
> +                     ctrl & ~PCI_MSI_FLAGS_ENABLE);
> +        assigned_dev_update_msi(pci_dev);
> +    }
> +
> +    snprintf(reset_file, sizeof(reset_file),
> +             "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/reset",
> +             adev->host.domain, adev->host.bus, adev->host.slot,
> +             adev->host.function);
> +
> +    /*
> +     * Issue a device reset via pci-sysfs.  Note that we use write(2) here
> +     * and ignore the return value because some kernels have a bug that
> +     * returns 0 rather than bytes written on success, sending us into an
> +     * infinite retry loop using other write mechanisms.
> +     */
> +    fd = open(reset_file, O_WRONLY);
> +    if (fd != -1) {
> +        ret = write(fd, reset, strlen(reset));
> +        (void)ret;
> +        close(fd);
> +    }
> +
> +    /*
> +     * When a 0 is written to the bus master register, the device is logically
> +     * disconnected from the PCI bus. This avoids further DMA transfers.
> +     */
> +    assigned_dev_pci_write_config(pci_dev, PCI_COMMAND, 0, 1);
> +}
> +
> +static int assigned_initfn(struct PCIDevice *pci_dev)
> +{
> +    AssignedDevice *dev = DO_UPCAST(AssignedDevice, dev, pci_dev);
> +    uint8_t e_intx;
> +    int r;
> +
> +    if (!kvm_enabled()) {
> +        error_report("pci-assign: error: requires KVM support");
> +        return -1;
> +    }
> +
> +    if (!dev->host.domain && !dev->host.bus && !dev->host.slot &&
> +        !dev->host.function) {
> +        error_report("pci-assign: error: no host device specified");
> +        return -1;
> +    }
> +
> +    /*
> +     * Set up basic config space access control. Will be further refined during
> +     * device initialization.
> +     */
> +    assigned_dev_emulate_config_read(dev, 0, PCI_CONFIG_SPACE_SIZE);
> +    assigned_dev_direct_config_read(dev, PCI_STATUS, 2);
> +    assigned_dev_direct_config_read(dev, PCI_REVISION_ID, 1);
> +    assigned_dev_direct_config_read(dev, PCI_CLASS_PROG, 3);
> +    assigned_dev_direct_config_read(dev, PCI_CACHE_LINE_SIZE, 1);
> +    assigned_dev_direct_config_read(dev, PCI_LATENCY_TIMER, 1);
> +    assigned_dev_direct_config_read(dev, PCI_BIST, 1);
> +    assigned_dev_direct_config_read(dev, PCI_CARDBUS_CIS, 4);
> +    assigned_dev_direct_config_read(dev, PCI_SUBSYSTEM_VENDOR_ID, 2);
> +    assigned_dev_direct_config_read(dev, PCI_SUBSYSTEM_ID, 2);
> +    assigned_dev_direct_config_read(dev, PCI_CAPABILITY_LIST + 1, 7);
> +    assigned_dev_direct_config_read(dev, PCI_MIN_GNT, 1);
> +    assigned_dev_direct_config_read(dev, PCI_MAX_LAT, 1);
> +    memcpy(dev->emulate_config_write, dev->emulate_config_read,
> +           sizeof(dev->emulate_config_read));
> +
> +    if (get_real_device(dev, dev->host.domain, dev->host.bus,
> +                        dev->host.slot, dev->host.function)) {
> +        error_report("pci-assign: Error: Couldn't get real device (%s)!",
> +                     dev->dev.qdev.id);
> +        goto out;
> +    }
> +
> +    if (assigned_device_pci_cap_init(pci_dev) < 0) {
> +        goto out;
> +    }
> +
> +    /* intercept MSI-X entry page in the MMIO */
> +    if (dev->cap.available & ASSIGNED_DEVICE_CAP_MSIX) {
> +        if (assigned_dev_register_msix_mmio(dev)) {
> +            goto out;
> +        }
> +    }
> +
> +    /* handle real device's MMIO/PIO BARs */
> +    if (assigned_dev_register_regions(dev->real_device.regions,
> +                                      dev->real_device.region_number,
> +                                      dev)) {
> +        goto out;
> +    }
> +
> +    /* handle interrupt routing */
> +    e_intx = dev->dev.config[0x3d] - 1;
> +    dev->intpin = e_intx;
> +    dev->intx_route.mode = PCI_INTX_DISABLED;
> +    dev->intx_route.irq = -1;
> +
> +    /* assign device to guest */
> +    r = assign_device(dev);
> +    if (r < 0) {
> +        goto out;
> +    }
> +
> +    /* assign legacy INTx to the device */
> +    r = assign_intx(dev);
> +    if (r < 0) {
> +        goto assigned_out;
> +    }
> +
> +    assigned_dev_load_option_rom(dev);
> +
> +    add_boot_device_path(dev->bootindex, &pci_dev->qdev, NULL);
> +
> +    return 0;
> +
> +assigned_out:
> +    deassign_device(dev);
> +out:
> +    free_assigned_device(dev);
> +    return -1;
> +}
> +
> +static void assigned_exitfn(struct PCIDevice *pci_dev)
> +{
> +    AssignedDevice *dev = DO_UPCAST(AssignedDevice, dev, pci_dev);
> +
> +    deassign_device(dev);
> +    free_assigned_device(dev);
> +}
> +
> +static Property da_properties[] = {

const?

> +    DEFINE_PROP_PCI_HOST_DEVADDR("host", AssignedDevice, host),
> +    DEFINE_PROP_BIT("prefer_msi", AssignedDevice, features,
> +                    ASSIGNED_DEVICE_PREFER_MSI_BIT, false),
> +    DEFINE_PROP_BIT("share_intx", AssignedDevice, features,
> +                    ASSIGNED_DEVICE_SHARE_INTX_BIT, true),
> +    DEFINE_PROP_INT32("bootindex", AssignedDevice, bootindex, -1),
> +    DEFINE_PROP_STRING("configfd", AssignedDevice, configfd_name),
> +    DEFINE_PROP_END_OF_LIST(),
> +};
> +
> +static void assign_class_init(ObjectClass *klass, void *data)
> +{
> +    PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
> +    DeviceClass *dc = DEVICE_CLASS(klass);
> +
> +    k->init         = assigned_initfn;
> +    k->exit         = assigned_exitfn;
> +    k->config_read  = assigned_dev_pci_read_config;
> +    k->config_write = assigned_dev_pci_write_config;
> +    dc->props       = da_properties;
> +    dc->vmsd        = &vmstate_assigned_device;
> +    dc->reset       = reset_assigned_device;
> +}
> +
> +static TypeInfo assign_info = {

const?

> +    .name               = "pci-assign",
> +    .parent             = TYPE_PCI_DEVICE,
> +    .instance_size      = sizeof(AssignedDevice),
> +    .class_init         = assign_class_init,
> +};
> +
> +static void assign_register_types(void)
> +{
> +    type_register_static(&assign_info);
> +}
> +
> +type_init(assign_register_types)
> +
> +/*
> + * Scan the assigned devices for the devices that have an option ROM, and then
> + * load the corresponding ROM data to RAM. If an error occurs while loading an
> + * option ROM, we just ignore that option ROM and continue with the next one.
> + */
> +static void assigned_dev_load_option_rom(AssignedDevice *dev)
> +{
> +    char name[32], rom_file[64];
> +    FILE *fp;
> +    uint8_t val;
> +    struct stat st;
> +    void *ptr;
> +
> +    /* If loading ROM from file, pci handles it */
> +    if (dev->dev.romfile || !dev->dev.rom_bar) {
> +        return;
> +    }
> +
> +    snprintf(rom_file, sizeof(rom_file),
> +             "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/rom",
> +             dev->host.domain, dev->host.bus, dev->host.slot,
> +             dev->host.function);
> +
> +    if (stat(rom_file, &st)) {
> +        return;
> +    }
> +
> +    if (access(rom_file, F_OK)) {
> +        fprintf(stderr, "pci-assign: Insufficient privileges for %s\n",
> +                rom_file);
> +        return;
> +    }
> +
> +    /* Write "1" to the ROM file to enable it */
> +    fp = fopen(rom_file, "r+");
> +    if (fp == NULL) {
> +        return;
> +    }
> +    val = 1;
> +    if (fwrite(&val, 1, 1, fp) != 1) {
> +        goto close_rom;
> +    }
> +    fseek(fp, 0, SEEK_SET);
> +
> +    snprintf(name, sizeof(name), "%s.rom",
> +            object_get_typename(OBJECT(dev)));
> +    memory_region_init_ram(&dev->dev.rom, name, st.st_size);
> +    vmstate_register_ram(&dev->dev.rom, &dev->dev.qdev);
> +    ptr = memory_region_get_ram_ptr(&dev->dev.rom);
> +    memset(ptr, 0xff, st.st_size);
> +
> +    if (!fread(ptr, 1, st.st_size, fp)) {
> +        fprintf(stderr, "pci-assign: Cannot read from host %s\n"
> +                "\tDevice option ROM contents are probably invalid "
> +                "(check dmesg).\n\tSkip option ROM probe with rombar=0, "
> +                "or load from file with romfile=\n", rom_file);
> +        memory_region_destroy(&dev->dev.rom);
> +        goto close_rom;
> +    }
> +
> +    pci_register_bar(&dev->dev, PCI_ROM_SLOT, 0, &dev->dev.rom);
> +    dev->dev.has_rom = true;
> +close_rom:
> +    /* Write "0" to disable ROM */
> +    fseek(fp, 0, SEEK_SET);
> +    val = 0;
> +    if (!fwrite(&val, 1, 1, fp)) {
> +        DEBUG("%s\n", "Failed to disable pci-sysfs rom file");
> +    }
> +    fclose(fp);
> +}
> --
> 1.7.3.4
>
>

WARNING: multiple messages have this Message-ID (diff)
From: Blue Swirl <blauwirbel@gmail.com>
To: Jan Kiszka <jan.kiszka@web.de>
Cc: kvm@vger.kernel.org, "Michael S. Tsirkin" <mst@redhat.com>,
	Marcelo Tosatti <mtosatti@redhat.com>,
	qemu-devel@nongnu.org,
	Alex Williamson <alex.williamson@redhat.com>,
	Avi Kivity <avi@redhat.com>
Subject: Re: [Qemu-devel] [PATCH 4/4] kvm: i386: Add classic PCI device assignment
Date: Mon, 27 Aug 2012 18:56:38 +0000	[thread overview]
Message-ID: <CAAu8pHu7XZ4OStiKG78X2TuHd2RaJ08FTq5coDPRWbDbi9UBBQ@mail.gmail.com> (raw)
In-Reply-To: <825e653c9cfe9d8e26185917cbe1f1dd7ae299e2.1346048917.git.jan.kiszka@web.de>

On Mon, Aug 27, 2012 at 6:28 AM, Jan Kiszka <jan.kiszka@web.de> wrote:
> From: Jan Kiszka <jan.kiszka@siemens.com>
>
> This adds PCI device assignment for i386 targets using the classic KVM
> interfaces. This version is 100% identical to what is being maintained
> in qemu-kvm for several years and is supported by libvirt as well. It is
> expected to remain relevant for another couple of years until kernels
> without full-features and performance-wise equivalent VFIO support are
> obsolete.
>
> A refactoring to-do that should be done in-tree is to model MSI and
> MSI-X support via the generic PCI layer, similar to what VFIO is already
> doing for MSI-X. This should improve the correctness and clean up the
> code from duplicate logic.
>
> Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
> ---
>  hw/kvm/Makefile.objs |    2 +-
>  hw/kvm/pci-assign.c  | 1929 ++++++++++++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 1930 insertions(+), 1 deletions(-)
>  create mode 100644 hw/kvm/pci-assign.c
>
> diff --git a/hw/kvm/Makefile.objs b/hw/kvm/Makefile.objs
> index 226497a..f620d7f 100644
> --- a/hw/kvm/Makefile.objs
> +++ b/hw/kvm/Makefile.objs
> @@ -1 +1 @@
> -obj-$(CONFIG_KVM) += clock.o apic.o i8259.o ioapic.o i8254.o
> +obj-$(CONFIG_KVM) += clock.o apic.o i8259.o ioapic.o i8254.o pci-assign.o
> diff --git a/hw/kvm/pci-assign.c b/hw/kvm/pci-assign.c
> new file mode 100644
> index 0000000..9cce02c
> --- /dev/null
> +++ b/hw/kvm/pci-assign.c
> @@ -0,0 +1,1929 @@
> +/*
> + * Copyright (c) 2007, Neocleus Corporation.
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms and conditions of the GNU General Public License,
> + * version 2, as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope it will be useful, but WITHOUT
> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
> + * more details.
> + *
> + * You should have received a copy of the GNU General Public License along with
> + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
> + * Place - Suite 330, Boston, MA 02111-1307 USA.
> + *
> + *
> + *  Assign a PCI device from the host to a guest VM.
> + *
> + *  Adapted for KVM by Qumranet.
> + *
> + *  Copyright (c) 2007, Neocleus, Alex Novik (alex@neocleus.com)
> + *  Copyright (c) 2007, Neocleus, Guy Zana (guy@neocleus.com)
> + *  Copyright (C) 2008, Qumranet, Amit Shah (amit.shah@qumranet.com)
> + *  Copyright (C) 2008, Red Hat, Amit Shah (amit.shah@redhat.com)
> + *  Copyright (C) 2008, IBM, Muli Ben-Yehuda (muli@il.ibm.com)
> + */
> +#include <stdio.h>
> +#include <unistd.h>
> +#include <sys/io.h>
> +#include <sys/mman.h>
> +#include <sys/types.h>
> +#include <sys/stat.h>
> +#include "hw/hw.h"
> +#include "hw/pc.h"
> +#include "qemu-error.h"
> +#include "console.h"
> +#include "hw/loader.h"
> +#include "monitor.h"
> +#include "range.h"
> +#include "sysemu.h"
> +#include "hw/pci.h"
> +#include "hw/msi.h"
> +#include "kvm_i386.h"
> +
> +#define MSIX_PAGE_SIZE 0x1000
> +
> +/* From linux/ioport.h */
> +#define IORESOURCE_IO       0x00000100  /* Resource type */
> +#define IORESOURCE_MEM      0x00000200
> +#define IORESOURCE_IRQ      0x00000400
> +#define IORESOURCE_DMA      0x00000800
> +#define IORESOURCE_PREFETCH 0x00002000  /* No side effects */
> +
> +/* #define DEVICE_ASSIGNMENT_DEBUG 1 */
> +
> +#ifdef DEVICE_ASSIGNMENT_DEBUG
> +#define DEBUG(fmt, ...)                                       \
> +    do {                                                      \
> +        fprintf(stderr, "%s: " fmt, __func__ , __VA_ARGS__);  \
> +    } while (0)
> +#else
> +#define DEBUG(fmt, ...)
> +#endif
> +
> +typedef struct {
> +    int type;           /* Memory or port I/O */
> +    int valid;
> +    uint32_t base_addr;
> +    uint32_t size;    /* size of the region */
> +    int resource_fd;
> +} PCIRegion;

Please don't use anonymous structs.

> +
> +typedef struct {
> +    uint8_t bus, dev, func; /* Bus inside domain, device and function */
> +    int irq;                /* IRQ number */
> +    uint16_t region_number; /* number of active regions */
> +
> +    /* Port I/O or MMIO Regions */
> +    PCIRegion regions[PCI_NUM_REGIONS - 1];
> +    int config_fd;
> +} PCIDevRegions;
> +
> +typedef struct {
> +    MemoryRegion container;
> +    MemoryRegion real_iomem;
> +    union {
> +        void *r_virtbase;    /* mmapped access address for memory regions */
> +        uint32_t r_baseport; /* the base guest port for I/O regions */
> +    } u;
> +    pcibus_t e_size;    /* emulated size of region in bytes */
> +    pcibus_t r_size;    /* real size of region in bytes */
> +    PCIRegion *region;
> +} AssignedDevRegion;
> +
> +#define ASSIGNED_DEVICE_PREFER_MSI_BIT  0
> +#define ASSIGNED_DEVICE_SHARE_INTX_BIT  1
> +
> +#define ASSIGNED_DEVICE_PREFER_MSI_MASK (1 << ASSIGNED_DEVICE_PREFER_MSI_BIT)
> +#define ASSIGNED_DEVICE_SHARE_INTX_MASK (1 << ASSIGNED_DEVICE_SHARE_INTX_BIT)
> +
> +typedef struct {
> +    uint32_t addr_lo;
> +    uint32_t addr_hi;
> +    uint32_t data;
> +    uint32_t ctrl;
> +} MSIXTableEntry;
> +
> +typedef enum AssignedIRQType {
> +    ASSIGNED_IRQ_NONE = 0,
> +    ASSIGNED_IRQ_INTX_HOST_INTX,
> +    ASSIGNED_IRQ_INTX_HOST_MSI,
> +    ASSIGNED_IRQ_MSI,
> +    ASSIGNED_IRQ_MSIX
> +} AssignedIRQType;
> +
> +typedef struct AssignedDevice {
> +    PCIDevice dev;
> +    PCIHostDeviceAddress host;
> +    uint32_t dev_id;
> +    uint32_t features;
> +    int intpin;
> +    AssignedDevRegion v_addrs[PCI_NUM_REGIONS - 1];
> +    PCIDevRegions real_device;
> +    PCIINTxRoute intx_route;
> +    AssignedIRQType assigned_irq_type;
> +    struct {
> +#define ASSIGNED_DEVICE_CAP_MSI (1 << 0)
> +#define ASSIGNED_DEVICE_CAP_MSIX (1 << 1)
> +        uint32_t available;
> +#define ASSIGNED_DEVICE_MSI_ENABLED (1 << 0)
> +#define ASSIGNED_DEVICE_MSIX_ENABLED (1 << 1)
> +#define ASSIGNED_DEVICE_MSIX_MASKED (1 << 2)
> +        uint32_t state;
> +    } cap;
> +    uint8_t emulate_config_read[PCI_CONFIG_SPACE_SIZE];
> +    uint8_t emulate_config_write[PCI_CONFIG_SPACE_SIZE];
> +    int msi_virq_nr;
> +    int *msi_virq;
> +    MSIXTableEntry *msix_table;
> +    target_phys_addr_t msix_table_addr;
> +    uint16_t msix_max;
> +    MemoryRegion mmio;
> +    char *configfd_name;

const? Not if this would mean more casts.

> +    int32_t bootindex;
> +} AssignedDevice;
> +
> +static void assigned_dev_update_irq_routing(PCIDevice *dev);
> +
> +static void assigned_dev_load_option_rom(AssignedDevice *dev);
> +
> +static void assigned_dev_unregister_msix_mmio(AssignedDevice *dev);
> +
> +static uint64_t assigned_dev_ioport_rw(AssignedDevRegion *dev_region,
> +                                       target_phys_addr_t addr, int size,
> +                                       uint64_t *data)
> +{
> +    uint64_t val = 0;
> +    int fd = dev_region->region->resource_fd;
> +
> +    if (fd >= 0) {
> +        if (data) {
> +            DEBUG("pwrite data=%lx, size=%d, e_phys=%lx, addr=%lx\n",

PRIx64 for data, TARGET_FMT_Plx for e_phys and addr.

> +                  *data, size, addr, addr);
> +            if (pwrite(fd, data, size, addr) != size) {
> +                fprintf(stderr, "%s - pwrite failed %s\n",
> +                        __func__, strerror(errno));
> +            }
> +        } else {
> +            if (pread(fd, &val, size, addr) != size) {
> +                fprintf(stderr, "%s - pread failed %s\n",
> +                        __func__, strerror(errno));
> +                val = (1UL << (size * 8)) - 1;
> +            }
> +            DEBUG("pread val=%lx, size=%d, e_phys=%lx, addr=%lx\n",

Also here. Please fix all DEBUG statements.

> +                  val, size, addr, addr);
> +        }
> +    } else {
> +        uint32_t port = addr + dev_region->u.r_baseport;
> +
> +        if (data) {
> +            DEBUG("out data=%lx, size=%d, e_phys=%lx, host=%x\n",
> +                  *data, size, addr, port);
> +            switch (size) {
> +            case 1:
> +                outb(*data, port);
> +                break;
> +            case 2:
> +                outw(*data, port);
> +                break;
> +            case 4:
> +                outl(*data, port);
> +                break;

Maybe add case 8: and default: with abort(), also below.

> +            }
> +        } else {
> +            switch (size) {
> +            case 1:
> +                val = inb(port);
> +                break;
> +            case 2:
> +                val = inw(port);
> +                break;
> +            case 4:
> +                val = inl(port);
> +                break;
> +            }
> +            DEBUG("in data=%lx, size=%d, e_phys=%lx, host=%x\n",
> +                  val, size, addr, port);
> +        }
> +    }
> +    return val;
> +}
> +
> +static void assigned_dev_ioport_write(void *opaque, target_phys_addr_t addr,
> +                                      uint64_t data, unsigned size)
> +{
> +    assigned_dev_ioport_rw(opaque, addr, size, &data);
> +}
> +
> +static uint64_t assigned_dev_ioport_read(void *opaque,
> +                                         target_phys_addr_t addr, unsigned size)
> +{
> +    return assigned_dev_ioport_rw(opaque, addr, size, NULL);
> +}
> +
> +static uint32_t slow_bar_readb(void *opaque, target_phys_addr_t addr)
> +{
> +    AssignedDevRegion *d = opaque;
> +    uint8_t *in = d->u.r_virtbase + addr;

Don't perform arithmetic with void pointers.

> +    uint32_t r;
> +
> +    r = *in;
> +    DEBUG("slow_bar_readl addr=0x" TARGET_FMT_plx " val=0x%08x\n", addr, r);
> +
> +    return r;
> +}
> +
> +static uint32_t slow_bar_readw(void *opaque, target_phys_addr_t addr)
> +{
> +    AssignedDevRegion *d = opaque;
> +    uint16_t *in = d->u.r_virtbase + addr;
> +    uint32_t r;
> +
> +    r = *in;
> +    DEBUG("slow_bar_readl addr=0x" TARGET_FMT_plx " val=0x%08x\n", addr, r);
> +
> +    return r;
> +}
> +
> +static uint32_t slow_bar_readl(void *opaque, target_phys_addr_t addr)
> +{
> +    AssignedDevRegion *d = opaque;
> +    uint32_t *in = d->u.r_virtbase + addr;
> +    uint32_t r;
> +
> +    r = *in;
> +    DEBUG("slow_bar_readl addr=0x" TARGET_FMT_plx " val=0x%08x\n", addr, r);
> +
> +    return r;
> +}
> +
> +static void slow_bar_writeb(void *opaque, target_phys_addr_t addr, uint32_t val)
> +{
> +    AssignedDevRegion *d = opaque;
> +    uint8_t *out = d->u.r_virtbase + addr;
> +
> +    DEBUG("slow_bar_writeb addr=0x" TARGET_FMT_plx " val=0x%02x\n", addr, val);
> +    *out = val;
> +}
> +
> +static void slow_bar_writew(void *opaque, target_phys_addr_t addr, uint32_t val)
> +{
> +    AssignedDevRegion *d = opaque;
> +    uint16_t *out = d->u.r_virtbase + addr;
> +
> +    DEBUG("slow_bar_writew addr=0x" TARGET_FMT_plx " val=0x%04x\n", addr, val);
> +    *out = val;
> +}
> +
> +static void slow_bar_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
> +{
> +    AssignedDevRegion *d = opaque;
> +    uint32_t *out = d->u.r_virtbase + addr;
> +
> +    DEBUG("slow_bar_writel addr=0x" TARGET_FMT_plx " val=0x%08x\n", addr, val);
> +    *out = val;
> +}
> +
> +static const MemoryRegionOps slow_bar_ops = {
> +    .old_mmio = {
> +        .read = { slow_bar_readb, slow_bar_readw, slow_bar_readl, },
> +        .write = { slow_bar_writeb, slow_bar_writew, slow_bar_writel, },
> +    },
> +    .endianness = DEVICE_NATIVE_ENDIAN,
> +};
> +
> +static void assigned_dev_iomem_setup(PCIDevice *pci_dev, int region_num,
> +                                     pcibus_t e_size)
> +{
> +    AssignedDevice *r_dev = DO_UPCAST(AssignedDevice, dev, pci_dev);
> +    AssignedDevRegion *region = &r_dev->v_addrs[region_num];
> +    PCIRegion *real_region = &r_dev->real_device.regions[region_num];
> +
> +    if (e_size > 0) {
> +        memory_region_init(&region->container, "assigned-dev-container",
> +                           e_size);
> +        memory_region_add_subregion(&region->container, 0, &region->real_iomem);
> +
> +        /* deal with MSI-X MMIO page */
> +        if (real_region->base_addr <= r_dev->msix_table_addr &&
> +                real_region->base_addr + real_region->size >
> +                r_dev->msix_table_addr) {
> +            int offset = r_dev->msix_table_addr - real_region->base_addr;
> +
> +            memory_region_add_subregion_overlap(&region->container,
> +                                                offset,
> +                                                &r_dev->mmio,
> +                                                1);
> +        }
> +    }
> +}
> +
> +static const MemoryRegionOps assigned_dev_ioport_ops = {
> +    .read = assigned_dev_ioport_read,
> +    .write = assigned_dev_ioport_write,
> +    .endianness = DEVICE_NATIVE_ENDIAN,
> +};
> +
> +static void assigned_dev_ioport_setup(PCIDevice *pci_dev, int region_num,
> +                                      pcibus_t size)
> +{
> +    AssignedDevice *r_dev = DO_UPCAST(AssignedDevice, dev, pci_dev);
> +    AssignedDevRegion *region = &r_dev->v_addrs[region_num];
> +
> +    region->e_size = size;
> +    memory_region_init(&region->container, "assigned-dev-container", size);
> +    memory_region_init_io(&region->real_iomem, &assigned_dev_ioport_ops,
> +                          r_dev->v_addrs + region_num,
> +                          "assigned-dev-iomem", size);
> +    memory_region_add_subregion(&region->container, 0, &region->real_iomem);
> +}
> +
> +static uint32_t assigned_dev_pci_read(PCIDevice *d, int pos, int len)
> +{
> +    AssignedDevice *pci_dev = DO_UPCAST(AssignedDevice, dev, d);
> +    uint32_t val;
> +    ssize_t ret;
> +    int fd = pci_dev->real_device.config_fd;
> +
> +again:
> +    ret = pread(fd, &val, len, pos);
> +    if (ret != len) {
> +        if ((ret < 0) && (errno == EINTR || errno == EAGAIN)) {
> +            goto again;
> +        }
> +
> +        hw_error("pci read failed, ret = %zd errno = %d\n", ret, errno);
> +    }
> +
> +    return val;
> +}
> +
> +static uint8_t assigned_dev_pci_read_byte(PCIDevice *d, int pos)
> +{
> +    return (uint8_t)assigned_dev_pci_read(d, pos, 1);
> +}
> +
> +static void assigned_dev_pci_write(PCIDevice *d, int pos, uint32_t val, int len)
> +{
> +    AssignedDevice *pci_dev = DO_UPCAST(AssignedDevice, dev, d);
> +    ssize_t ret;
> +    int fd = pci_dev->real_device.config_fd;
> +
> +again:
> +    ret = pwrite(fd, &val, len, pos);
> +    if (ret != len) {
> +        if ((ret < 0) && (errno == EINTR || errno == EAGAIN)) {
> +            goto again;
> +        }
> +
> +        hw_error("pci write failed, ret = %zd errno = %d\n", ret, errno);
> +    }
> +}
> +
> +static void assigned_dev_emulate_config_read(AssignedDevice *dev,
> +                                             uint32_t offset, uint32_t len)
> +{
> +    memset(dev->emulate_config_read + offset, 0xff, len);
> +}
> +
> +static void assigned_dev_direct_config_read(AssignedDevice *dev,
> +                                            uint32_t offset, uint32_t len)
> +{
> +    memset(dev->emulate_config_read + offset, 0, len);
> +}
> +
> +static void assigned_dev_direct_config_write(AssignedDevice *dev,
> +                                             uint32_t offset, uint32_t len)
> +{
> +    memset(dev->emulate_config_write + offset, 0, len);
> +}
> +
> +static uint8_t pci_find_cap_offset(PCIDevice *d, uint8_t cap, uint8_t start)
> +{
> +    int id;
> +    int max_cap = 48;
> +    int pos = start ? start : PCI_CAPABILITY_LIST;
> +    int status;
> +
> +    status = assigned_dev_pci_read_byte(d, PCI_STATUS);
> +    if ((status & PCI_STATUS_CAP_LIST) == 0) {
> +        return 0;
> +    }
> +
> +    while (max_cap--) {
> +        pos = assigned_dev_pci_read_byte(d, pos);
> +        if (pos < 0x40) {
> +            break;
> +        }
> +
> +        pos &= ~3;
> +        id = assigned_dev_pci_read_byte(d, pos + PCI_CAP_LIST_ID);
> +
> +        if (id == 0xff) {
> +            break;
> +        }
> +        if (id == cap) {
> +            return pos;
> +        }
> +
> +        pos += PCI_CAP_LIST_NEXT;
> +    }
> +    return 0;
> +}
> +
> +static int assigned_dev_register_regions(PCIRegion *io_regions,
> +                                         unsigned long regions_num,
> +                                         AssignedDevice *pci_dev)
> +{
> +    uint32_t i;
> +    PCIRegion *cur_region = io_regions;
> +
> +    for (i = 0; i < regions_num; i++, cur_region++) {
> +        if (!cur_region->valid) {
> +            continue;
> +        }
> +
> +        /* handle memory io regions */
> +        if (cur_region->type & IORESOURCE_MEM) {
> +            int t = cur_region->type & IORESOURCE_PREFETCH
> +                ? PCI_BASE_ADDRESS_MEM_PREFETCH
> +                : PCI_BASE_ADDRESS_SPACE_MEMORY;
> +
> +            /* map physical memory */
> +            pci_dev->v_addrs[i].u.r_virtbase = mmap(NULL, cur_region->size,
> +                                                    PROT_WRITE | PROT_READ,
> +                                                    MAP_SHARED,
> +                                                    cur_region->resource_fd,
> +                                                    (off_t)0);
> +
> +            if (pci_dev->v_addrs[i].u.r_virtbase == MAP_FAILED) {
> +                pci_dev->v_addrs[i].u.r_virtbase = NULL;
> +                fprintf(stderr, "%s: Error: Couldn't mmap 0x%x!"
> +                        "\n", __func__,
> +                        (uint32_t) (cur_region->base_addr));

Why truncate with cast?

> +                return -1;
> +            }
> +
> +            pci_dev->v_addrs[i].r_size = cur_region->size;
> +            pci_dev->v_addrs[i].e_size = 0;
> +
> +            /* add offset */
> +            pci_dev->v_addrs[i].u.r_virtbase +=
> +                (cur_region->base_addr & 0xFFF);
> +
> +            if (cur_region->size & 0xFFF) {
> +                fprintf(stderr, "PCI region %d at address 0x%llx "

PRIx64 ...

> +                        "has size 0x%x, which is not a multiple of 4K. "
> +                        "You might experience some performance hit "
> +                        "due to that.\n",
> +                        i, (unsigned long long)cur_region->base_addr,

... and remove cast.

> +                        cur_region->size);
> +                memory_region_init_io(&pci_dev->v_addrs[i].real_iomem,
> +                                      &slow_bar_ops, &pci_dev->v_addrs[i],
> +                                      "assigned-dev-slow-bar",
> +                                      cur_region->size);
> +            } else {
> +                void *virtbase = pci_dev->v_addrs[i].u.r_virtbase;
> +                char name[32];
> +                snprintf(name, sizeof(name), "%s.bar%d",
> +                         object_get_typename(OBJECT(pci_dev)), i);
> +                memory_region_init_ram_ptr(&pci_dev->v_addrs[i].real_iomem,
> +                                           name, cur_region->size,
> +                                           virtbase);
> +                vmstate_register_ram(&pci_dev->v_addrs[i].real_iomem,
> +                                     &pci_dev->dev.qdev);
> +            }
> +
> +            assigned_dev_iomem_setup(&pci_dev->dev, i, cur_region->size);
> +            pci_register_bar((PCIDevice *) pci_dev, i, t,
> +                             &pci_dev->v_addrs[i].container);
> +            continue;
> +        } else {
> +            /* handle port io regions */
> +            uint32_t val;
> +            int ret;
> +
> +            /* Test kernel support for ioport resource read/write.  Old
> +             * kernels return EIO.  New kernels only allow 1/2/4 byte reads
> +             * so should return EINVAL for a 3 byte read */
> +            ret = pread(pci_dev->v_addrs[i].region->resource_fd, &val, 3, 0);
> +            if (ret >= 0) {
> +                fprintf(stderr, "Unexpected return from I/O port read: %d\n",
> +                        ret);
> +                abort();
> +            } else if (errno != EINVAL) {
> +                fprintf(stderr, "Kernel doesn't support ioport resource "
> +                                "access, hiding this region.\n");
> +                close(pci_dev->v_addrs[i].region->resource_fd);
> +                cur_region->valid = 0;
> +                continue;
> +            }
> +
> +            pci_dev->v_addrs[i].u.r_baseport = cur_region->base_addr;
> +            pci_dev->v_addrs[i].r_size = cur_region->size;
> +            pci_dev->v_addrs[i].e_size = 0;
> +
> +            assigned_dev_ioport_setup(&pci_dev->dev, i, cur_region->size);
> +            pci_register_bar((PCIDevice *) pci_dev, i,
> +                             PCI_BASE_ADDRESS_SPACE_IO,
> +                             &pci_dev->v_addrs[i].container);
> +        }
> +    }
> +
> +    /* success */
> +    return 0;
> +}
> +
> +static int get_real_id(const char *devpath, const char *idname, uint16_t *val)
> +{
> +    FILE *f;
> +    char name[128];
> +    long id;
> +
> +    snprintf(name, sizeof(name), "%s%s", devpath, idname);
> +    f = fopen(name, "r");
> +    if (f == NULL) {
> +        fprintf(stderr, "%s: %s: %m\n", __func__, name);
> +        return -1;
> +    }
> +    if (fscanf(f, "%li\n", &id) == 1) {
> +        *val = id;
> +    } else {
> +        return -1;
> +    }
> +    fclose(f);
> +
> +    return 0;
> +}
> +
> +static int get_real_vendor_id(const char *devpath, uint16_t *val)
> +{
> +    return get_real_id(devpath, "vendor", val);
> +}
> +
> +static int get_real_device_id(const char *devpath, uint16_t *val)
> +{
> +    return get_real_id(devpath, "device", val);
> +}
> +
> +static int get_real_device(AssignedDevice *pci_dev, uint16_t r_seg,
> +                           uint8_t r_bus, uint8_t r_dev, uint8_t r_func)
> +{
> +    char dir[128], name[128];
> +    int fd, r = 0, v;
> +    FILE *f;
> +    unsigned long long start, end, size, flags;

uint64_t

> +    uint16_t id;
> +    PCIRegion *rp;
> +    PCIDevRegions *dev = &pci_dev->real_device;
> +
> +    dev->region_number = 0;
> +
> +    snprintf(dir, sizeof(dir), "/sys/bus/pci/devices/%04x:%02x:%02x.%x/",
> +             r_seg, r_bus, r_dev, r_func);
> +
> +    snprintf(name, sizeof(name), "%sconfig", dir);
> +
> +    if (pci_dev->configfd_name && *pci_dev->configfd_name) {
> +        if (qemu_isdigit(pci_dev->configfd_name[0])) {
> +            dev->config_fd = strtol(pci_dev->configfd_name, NULL, 0);
> +        } else {
> +            dev->config_fd = monitor_get_fd(cur_mon, pci_dev->configfd_name);
> +            if (dev->config_fd < 0) {
> +                fprintf(stderr, "%s: (%s) unkown\n", __func__,
> +                        pci_dev->configfd_name);
> +                return 1;
> +            }
> +        }
> +    } else {
> +        dev->config_fd = open(name, O_RDWR);
> +
> +        if (dev->config_fd == -1) {
> +            fprintf(stderr, "%s: %s: %m\n", __func__, name);
> +            return 1;
> +        }
> +    }
> +again:
> +    r = read(dev->config_fd, pci_dev->dev.config,
> +             pci_config_size(&pci_dev->dev));
> +    if (r < 0) {
> +        if (errno == EINTR || errno == EAGAIN) {
> +            goto again;
> +        }
> +        fprintf(stderr, "%s: read failed, errno = %d\n", __func__, errno);
> +    }
> +
> +    /* Restore or clear multifunction, this is always controlled by qemu */
> +    if (pci_dev->dev.cap_present & QEMU_PCI_CAP_MULTIFUNCTION) {
> +        pci_dev->dev.config[PCI_HEADER_TYPE] |= PCI_HEADER_TYPE_MULTI_FUNCTION;
> +    } else {
> +        pci_dev->dev.config[PCI_HEADER_TYPE] &= ~PCI_HEADER_TYPE_MULTI_FUNCTION;
> +    }
> +
> +    /* Clear host resource mapping info.  If we choose not to register a
> +     * BAR, such as might be the case with the option ROM, we can get
> +     * confusing, unwritable, residual addresses from the host here. */
> +    memset(&pci_dev->dev.config[PCI_BASE_ADDRESS_0], 0, 24);
> +    memset(&pci_dev->dev.config[PCI_ROM_ADDRESS], 0, 4);
> +
> +    snprintf(name, sizeof(name), "%sresource", dir);
> +
> +    f = fopen(name, "r");
> +    if (f == NULL) {
> +        fprintf(stderr, "%s: %s: %m\n", __func__, name);
> +        return 1;
> +    }
> +
> +    for (r = 0; r < PCI_ROM_SLOT; r++) {
> +        if (fscanf(f, "%lli %lli %lli\n", &start, &end, &flags) != 3) {

SCNi64

> +            break;
> +        }
> +
> +        rp = dev->regions + r;
> +        rp->valid = 0;
> +        rp->resource_fd = -1;
> +        size = end - start + 1;
> +        flags &= IORESOURCE_IO | IORESOURCE_MEM | IORESOURCE_PREFETCH;
> +        if (size == 0 || (flags & ~IORESOURCE_PREFETCH) == 0) {
> +            continue;
> +        }
> +        if (flags & IORESOURCE_MEM) {
> +            flags &= ~IORESOURCE_IO;
> +        } else {
> +            flags &= ~IORESOURCE_PREFETCH;
> +        }
> +        snprintf(name, sizeof(name), "%sresource%d", dir, r);
> +        fd = open(name, O_RDWR);
> +        if (fd == -1) {
> +            continue;
> +        }
> +        rp->resource_fd = fd;
> +
> +        rp->type = flags;
> +        rp->valid = 1;
> +        rp->base_addr = start;
> +        rp->size = size;
> +        pci_dev->v_addrs[r].region = rp;
> +        DEBUG("region %d size %d start 0x%llx type %d resource_fd %d\n",
> +              r, rp->size, start, rp->type, rp->resource_fd);
> +    }
> +
> +    fclose(f);
> +
> +    /* read and fill vendor ID */
> +    v = get_real_vendor_id(dir, &id);
> +    if (v) {
> +        return 1;
> +    }
> +    pci_dev->dev.config[0] = id & 0xff;
> +    pci_dev->dev.config[1] = (id & 0xff00) >> 8;
> +
> +    /* read and fill device ID */
> +    v = get_real_device_id(dir, &id);
> +    if (v) {
> +        return 1;
> +    }
> +    pci_dev->dev.config[2] = id & 0xff;
> +    pci_dev->dev.config[3] = (id & 0xff00) >> 8;
> +
> +    pci_word_test_and_clear_mask(pci_dev->emulate_config_write + PCI_COMMAND,
> +                                 PCI_COMMAND_MASTER | PCI_COMMAND_INTX_DISABLE);
> +
> +    dev->region_number = r;
> +    return 0;
> +}

Pretty long function, how about refactoring?

> +
> +static void free_msi_virqs(AssignedDevice *dev)
> +{
> +    int i;
> +
> +    for (i = 0; i < dev->msi_virq_nr; i++) {
> +        if (dev->msi_virq[i] >= 0) {
> +            kvm_irqchip_release_virq(kvm_state, dev->msi_virq[i]);
> +            dev->msi_virq[i] = -1;
> +        }
> +    }
> +    g_free(dev->msi_virq);
> +    dev->msi_virq = NULL;
> +    dev->msi_virq_nr = 0;
> +}
> +
> +static void free_assigned_device(AssignedDevice *dev)
> +{
> +    int i;
> +
> +    if (dev->cap.available & ASSIGNED_DEVICE_CAP_MSIX) {
> +        assigned_dev_unregister_msix_mmio(dev);
> +    }
> +    for (i = 0; i < dev->real_device.region_number; i++) {
> +        PCIRegion *pci_region = &dev->real_device.regions[i];
> +        AssignedDevRegion *region = &dev->v_addrs[i];
> +
> +        if (!pci_region->valid) {
> +            continue;
> +        }
> +        if (pci_region->type & IORESOURCE_IO) {
> +            if (region->u.r_baseport) {
> +                memory_region_del_subregion(&region->container,
> +                                            &region->real_iomem);
> +                memory_region_destroy(&region->real_iomem);
> +                memory_region_destroy(&region->container);
> +            }
> +        } else if (pci_region->type & IORESOURCE_MEM) {
> +            if (region->u.r_virtbase) {
> +                memory_region_del_subregion(&region->container,
> +                                            &region->real_iomem);
> +
> +                /* Remove MSI-X table subregion */
> +                if (pci_region->base_addr <= dev->msix_table_addr &&
> +                    pci_region->base_addr + pci_region->size >
> +                    dev->msix_table_addr) {
> +                    memory_region_del_subregion(&region->container,
> +                                                &dev->mmio);
> +                }
> +
> +                memory_region_destroy(&region->real_iomem);
> +                memory_region_destroy(&region->container);
> +                if (munmap(region->u.r_virtbase,
> +                           (pci_region->size + 0xFFF) & 0xFFFFF000)) {
> +                    fprintf(stderr,
> +                            "Failed to unmap assigned device region: %s\n",
> +                            strerror(errno));
> +                }
> +            }
> +        }
> +        if (pci_region->resource_fd >= 0) {
> +            close(pci_region->resource_fd);
> +        }
> +    }
> +
> +    if (dev->real_device.config_fd >= 0) {
> +        close(dev->real_device.config_fd);
> +    }
> +
> +    free_msi_virqs(dev);
> +}
> +
> +static void assign_failed_examine(AssignedDevice *dev)
> +{
> +    char name[PATH_MAX], dir[PATH_MAX], driver[PATH_MAX] = {}, *ns;
> +    uint16_t vendor_id, device_id;
> +    int r;
> +
> +    sprintf(dir, "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/",
> +            dev->host.domain, dev->host.bus, dev->host.slot,
> +            dev->host.function);

Please use snprintf(), also below.

> +
> +    sprintf(name, "%sdriver", dir);
> +
> +    r = readlink(name, driver, sizeof(driver));
> +    if ((r <= 0) || r >= sizeof(driver)) {
> +        goto fail;
> +    }
> +
> +    ns = strrchr(driver, '/');
> +    if (!ns) {
> +        goto fail;
> +    }
> +
> +    ns++;
> +
> +    if (get_real_vendor_id(dir, &vendor_id) ||
> +        get_real_device_id(dir, &device_id)) {
> +        goto fail;
> +    }
> +
> +    fprintf(stderr, "*** The driver '%s' is occupying your device "
> +                    "%04x:%02x:%02x.%x.\n",
> +            ns, dev->host.domain, dev->host.bus, dev->host.slot,
> +            dev->host.function);
> +    fprintf(stderr, "***\n");
> +    fprintf(stderr, "*** You can try the following commands to free it:\n");
> +    fprintf(stderr, "***\n");
> +    fprintf(stderr, "*** $ echo \"%04x %04x\" > /sys/bus/pci/drivers/pci-stub/"
> +                    "new_id\n", vendor_id, device_id);
> +    fprintf(stderr, "*** $ echo \"%04x:%02x:%02x.%x\" > /sys/bus/pci/drivers/"
> +                    "%s/unbind\n",
> +            dev->host.domain, dev->host.bus, dev->host.slot,
> +            dev->host.function, ns);
> +    fprintf(stderr, "*** $ echo \"%04x:%02x:%02x.%x\" > /sys/bus/pci/drivers/"
> +                    "pci-stub/bind\n",
> +            dev->host.domain, dev->host.bus, dev->host.slot,
> +            dev->host.function);
> +    fprintf(stderr, "*** $ echo \"%04x %04x\" > /sys/bus/pci/drivers/pci-stub"
> +                    "/remove_id\n", vendor_id, device_id);
> +    fprintf(stderr, "***\n");
> +
> +    return;
> +
> +fail:
> +    fprintf(stderr, "Couldn't find out why.\n");
> +}
> +
> +static int assign_device(AssignedDevice *dev)
> +{
> +    uint32_t flags = KVM_DEV_ASSIGN_ENABLE_IOMMU;
> +    int r;
> +
> +    /* Only pass non-zero PCI segment to capable module */
> +    if (!kvm_check_extension(kvm_state, KVM_CAP_PCI_SEGMENT) &&
> +        dev->host.domain) {
> +        fprintf(stderr, "Can't assign device inside non-zero PCI segment "
> +                "as this KVM module doesn't support it.\n");
> +        return -ENODEV;
> +    }
> +
> +    if (!kvm_check_extension(kvm_state, KVM_CAP_IOMMU)) {
> +        fprintf(stderr, "No IOMMU found.  Unable to assign device \"%s\"\n",
> +                dev->dev.qdev.id);
> +        return -ENODEV;
> +    }
> +
> +    if (dev->features & ASSIGNED_DEVICE_SHARE_INTX_MASK &&
> +        kvm_has_intx_set_mask()) {
> +        flags |= KVM_DEV_ASSIGN_PCI_2_3;
> +    }
> +
> +    r = kvm_device_pci_assign(kvm_state, &dev->host, flags, &dev->dev_id);
> +    if (r < 0) {
> +        fprintf(stderr, "Failed to assign device \"%s\" : %s\n",
> +                dev->dev.qdev.id, strerror(-r));
> +
> +        switch (r) {
> +        case -EBUSY:
> +            assign_failed_examine(dev);
> +            break;
> +        default:
> +            break;
> +        }
> +    }
> +    return r;
> +}
> +
> +static bool check_irqchip_in_kernel(void)
> +{
> +    if (kvm_irqchip_in_kernel()) {
> +        return true;
> +    }
> +    error_report("pci-assign: error: requires KVM with in-kernel irqchip "
> +                 "enabled");
> +    return false;
> +}
> +
> +static int assign_intx(AssignedDevice *dev)
> +{
> +    AssignedIRQType new_type;
> +    PCIINTxRoute intx_route;
> +    bool intx_host_msi;
> +    int r;
> +
> +    /* Interrupt PIN 0 means don't use INTx */
> +    if (assigned_dev_pci_read_byte(&dev->dev, PCI_INTERRUPT_PIN) == 0) {
> +        pci_device_set_intx_routing_notifier(&dev->dev, NULL);
> +        return 0;
> +    }
> +
> +    if (!check_irqchip_in_kernel()) {
> +        return -ENOTSUP;
> +    }
> +
> +    pci_device_set_intx_routing_notifier(&dev->dev,
> +                                         assigned_dev_update_irq_routing);
> +
> +    intx_route = pci_device_route_intx_to_irq(&dev->dev, 0);
> +    assert(intx_route.mode != PCI_INTX_INVERTED);
> +
> +    if (dev->intx_route.mode == intx_route.mode &&
> +        dev->intx_route.irq == intx_route.irq) {
> +        return 0;
> +    }
> +
> +    switch (dev->assigned_irq_type) {
> +    case ASSIGNED_IRQ_INTX_HOST_INTX:
> +    case ASSIGNED_IRQ_INTX_HOST_MSI:
> +        intx_host_msi = dev->assigned_irq_type == ASSIGNED_IRQ_INTX_HOST_MSI;
> +        r = kvm_device_intx_deassign(kvm_state, dev->dev_id, intx_host_msi);
> +        break;
> +    case ASSIGNED_IRQ_MSI:
> +        r = kvm_device_msi_deassign(kvm_state, dev->dev_id);
> +        break;
> +    case ASSIGNED_IRQ_MSIX:
> +        r = kvm_device_msix_deassign(kvm_state, dev->dev_id);
> +        break;
> +    default:
> +        r = 0;
> +        break;
> +    }
> +    if (r) {
> +        perror("assign_intx: deassign");

This error message is not very clear.

> +    }
> +    dev->assigned_irq_type = ASSIGNED_IRQ_NONE;
> +
> +    if (intx_route.mode == PCI_INTX_DISABLED) {
> +        dev->intx_route = intx_route;
> +        return 0;
> +    }
> +
> +retry:
> +    if (dev->features & ASSIGNED_DEVICE_PREFER_MSI_MASK &&
> +        dev->cap.available & ASSIGNED_DEVICE_CAP_MSI) {
> +        intx_host_msi = true;
> +        new_type = ASSIGNED_IRQ_INTX_HOST_MSI;
> +    } else {
> +        intx_host_msi = false;
> +        new_type = ASSIGNED_IRQ_INTX_HOST_INTX;
> +    }
> +
> +    r = kvm_device_intx_assign(kvm_state, dev->dev_id, intx_host_msi,
> +                               intx_route.irq);
> +    if (r < 0) {
> +        if (r == -EIO && !(dev->features & ASSIGNED_DEVICE_PREFER_MSI_MASK) &&
> +            dev->cap.available & ASSIGNED_DEVICE_CAP_MSI) {
> +            /* Retry with host-side MSI. There might be an IRQ conflict and
> +             * either the kernel or the device doesn't support sharing. */
> +            fprintf(stderr,
> +                    "Host-side INTx sharing not supported, "
> +                    "using MSI instead.\n"
> +                    "Some devices do not to work properly in this mode.\n");
> +            dev->features |= ASSIGNED_DEVICE_PREFER_MSI_MASK;
> +            goto retry;
> +        }
> +        fprintf(stderr, "Failed to assign irq for \"%s\": %s\n",
> +                dev->dev.qdev.id, strerror(-r));
> +        fprintf(stderr, "Perhaps you are assigning a device "
> +                "that shares an IRQ with another device?\n");
> +        return r;
> +    }
> +
> +    dev->intx_route = intx_route;
> +    dev->assigned_irq_type = new_type;
> +    return r;
> +}
> +
> +static void deassign_device(AssignedDevice *dev)
> +{
> +    int r;
> +
> +    r = kvm_device_pci_deassign(kvm_state, dev->dev_id);
> +    if (r < 0) {
> +        fprintf(stderr, "Failed to deassign device \"%s\" : %s\n",
> +                dev->dev.qdev.id, strerror(-r));
> +    }
> +}
> +
> +/* The pci config space got updated. Check if irq numbers have changed
> + * for our devices
> + */
> +static void assigned_dev_update_irq_routing(PCIDevice *dev)
> +{
> +    AssignedDevice *assigned_dev = DO_UPCAST(AssignedDevice, dev, dev);
> +    Error *err = NULL;
> +    int r;
> +
> +    r = assign_intx(assigned_dev);
> +    if (r < 0) {
> +        qdev_unplug(&dev->qdev, &err);
> +        assert(!err);
> +    }
> +}
> +
> +static void assigned_dev_update_msi(PCIDevice *pci_dev)
> +{
> +    AssignedDevice *assigned_dev = DO_UPCAST(AssignedDevice, dev, pci_dev);
> +    uint8_t ctrl_byte = pci_get_byte(pci_dev->config + pci_dev->msi_cap +
> +                                     PCI_MSI_FLAGS);
> +    int r;
> +
> +    /* Some guests gratuitously disable MSI even if they're not using it,
> +     * try to catch this by only deassigning irqs if the guest is using
> +     * MSI or intends to start. */
> +    if (assigned_dev->assigned_irq_type == ASSIGNED_IRQ_MSI ||
> +        (ctrl_byte & PCI_MSI_FLAGS_ENABLE)) {
> +        r = kvm_device_msi_deassign(kvm_state, assigned_dev->dev_id);
> +        /* -ENXIO means no assigned irq */
> +        if (r && r != -ENXIO) {
> +            perror("assigned_dev_update_msi: deassign irq");
> +        }
> +
> +        free_msi_virqs(assigned_dev);
> +
> +        assigned_dev->assigned_irq_type = ASSIGNED_IRQ_NONE;
> +        pci_device_set_intx_routing_notifier(pci_dev, NULL);
> +    }
> +
> +    if (ctrl_byte & PCI_MSI_FLAGS_ENABLE) {
> +        uint8_t *pos = pci_dev->config + pci_dev->msi_cap;
> +        MSIMessage msg;
> +        int virq;
> +
> +        msg.address = pci_get_long(pos + PCI_MSI_ADDRESS_LO);
> +        msg.data = pci_get_word(pos + PCI_MSI_DATA_32);
> +        virq = kvm_irqchip_add_msi_route(kvm_state, msg);
> +        if (virq < 0) {
> +            perror("assigned_dev_update_msi: kvm_irqchip_add_msi_route");
> +            return;
> +        }
> +
> +        assigned_dev->msi_virq = g_malloc(sizeof(*assigned_dev->msi_virq));

Is this ever freed?

> +        assigned_dev->msi_virq_nr = 1;
> +        assigned_dev->msi_virq[0] = virq;
> +        if (kvm_device_msi_assign(kvm_state, assigned_dev->dev_id, virq) < 0) {
> +            perror("assigned_dev_update_msi: kvm_device_msi_assign");
> +        }
> +
> +        assigned_dev->intx_route.mode = PCI_INTX_DISABLED;
> +        assigned_dev->intx_route.irq = -1;
> +        assigned_dev->assigned_irq_type = ASSIGNED_IRQ_MSI;
> +    } else {
> +        assign_intx(assigned_dev);
> +    }
> +}
> +
> +static bool msix_masked(MSIXTableEntry *entry)
> +{
> +    return (entry->ctrl & cpu_to_le32(0x1)) != 0;
> +}
> +
> +static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev)
> +{
> +    AssignedDevice *adev = DO_UPCAST(AssignedDevice, dev, pci_dev);
> +    uint16_t entries_nr = 0;
> +    int i, r = 0;
> +    MSIXTableEntry *entry = adev->msix_table;
> +    MSIMessage msg;
> +
> +    /* Get the usable entry number for allocating */
> +    for (i = 0; i < adev->msix_max; i++, entry++) {
> +        if (msix_masked(entry)) {
> +            continue;
> +        }
> +        entries_nr++;
> +    }
> +
> +    DEBUG("MSI-X entries: %d\n", entries_nr);
> +
> +    /* It's valid to enable MSI-X with all entries masked */
> +    if (!entries_nr) {
> +        return 0;
> +    }
> +
> +    r = kvm_device_msix_init_vectors(kvm_state, adev->dev_id, entries_nr);
> +    if (r != 0) {
> +        fprintf(stderr, "fail to set MSI-X entry number for MSIX! %s\n",
> +                strerror(-r));
> +        return r;
> +    }
> +
> +    free_msi_virqs(adev);
> +
> +    adev->msi_virq_nr = adev->msix_max;
> +    adev->msi_virq = g_malloc(adev->msix_max * sizeof(*adev->msi_virq));
> +
> +    entry = adev->msix_table;
> +    for (i = 0; i < adev->msix_max; i++, entry++) {
> +        adev->msi_virq[i] = -1;
> +
> +        if (msix_masked(entry)) {
> +            continue;
> +        }
> +
> +        msg.address = entry->addr_lo | ((uint64_t)entry->addr_hi << 32);
> +        msg.data = entry->data;
> +        r = kvm_irqchip_add_msi_route(kvm_state, msg);
> +        if (r < 0) {
> +            return r;
> +        }
> +        adev->msi_virq[i] = r;
> +
> +        DEBUG("MSI-X vector %d, gsi %d, addr %08x_%08x, data %08x\n", i,
> +              r, entry->addr_hi, entry->addr_lo, entry->data);
> +
> +        r = kvm_device_msix_set_vector(kvm_state, adev->dev_id, i,
> +                                       adev->msi_virq[i]);
> +        if (r) {
> +            fprintf(stderr, "fail to set MSI-X entry! %s\n", strerror(-r));
> +            break;
> +        }
> +    }
> +
> +    return r;
> +}
> +
> +static void assigned_dev_update_msix(PCIDevice *pci_dev)
> +{
> +    AssignedDevice *assigned_dev = DO_UPCAST(AssignedDevice, dev, pci_dev);
> +    uint16_t ctrl_word = pci_get_word(pci_dev->config + pci_dev->msix_cap +
> +                                      PCI_MSIX_FLAGS);
> +    int r;
> +
> +    /* Some guests gratuitously disable MSIX even if they're not using it,
> +     * try to catch this by only deassigning irqs if the guest is using
> +     * MSIX or intends to start. */
> +    if ((assigned_dev->assigned_irq_type == ASSIGNED_IRQ_MSIX) ||
> +        (ctrl_word & PCI_MSIX_FLAGS_ENABLE)) {
> +        r = kvm_device_msix_deassign(kvm_state, assigned_dev->dev_id);
> +        /* -ENXIO means no assigned irq */
> +        if (r && r != -ENXIO) {
> +            perror("assigned_dev_update_msix: deassign irq");
> +        }
> +
> +        free_msi_virqs(assigned_dev);
> +
> +        assigned_dev->assigned_irq_type = ASSIGNED_IRQ_NONE;
> +        pci_device_set_intx_routing_notifier(pci_dev, NULL);
> +    }
> +
> +    if (ctrl_word & PCI_MSIX_FLAGS_ENABLE) {
> +        if (assigned_dev_update_msix_mmio(pci_dev) < 0) {
> +            perror("assigned_dev_update_msix_mmio");
> +            return;
> +        }
> +
> +        if (assigned_dev->msi_virq_nr > 0) {
> +            if (kvm_device_msix_assign(kvm_state, assigned_dev->dev_id) < 0) {
> +                perror("assigned_dev_enable_msix: assign irq");
> +                return;
> +            }
> +        }
> +        assigned_dev->intx_route.mode = PCI_INTX_DISABLED;
> +        assigned_dev->intx_route.irq = -1;
> +        assigned_dev->assigned_irq_type = ASSIGNED_IRQ_MSIX;
> +    } else {
> +        assign_intx(assigned_dev);
> +    }
> +}
> +
> +static uint32_t assigned_dev_pci_read_config(PCIDevice *pci_dev,
> +                                             uint32_t address, int len)
> +{
> +    AssignedDevice *assigned_dev = DO_UPCAST(AssignedDevice, dev, pci_dev);
> +    uint32_t virt_val = pci_default_read_config(pci_dev, address, len);
> +    uint32_t real_val, emulate_mask, full_emulation_mask;
> +
> +    emulate_mask = 0;
> +    memcpy(&emulate_mask, assigned_dev->emulate_config_read + address, len);
> +    emulate_mask = le32_to_cpu(emulate_mask);
> +
> +    full_emulation_mask = 0xffffffff >> (32 - len * 8);
> +
> +    if (emulate_mask != full_emulation_mask) {
> +        real_val = assigned_dev_pci_read(pci_dev, address, len);
> +        return (virt_val & emulate_mask) | (real_val & ~emulate_mask);
> +    } else {
> +        return virt_val;
> +    }
> +}
> +
> +static void assigned_dev_pci_write_config(PCIDevice *pci_dev, uint32_t address,
> +                                          uint32_t val, int len)
> +{
> +    AssignedDevice *assigned_dev = DO_UPCAST(AssignedDevice, dev, pci_dev);
> +    uint16_t old_cmd = pci_get_word(pci_dev->config + PCI_COMMAND);
> +    uint32_t emulate_mask, full_emulation_mask;
> +    int ret;
> +
> +    pci_default_write_config(pci_dev, address, val, len);
> +
> +    if (kvm_has_intx_set_mask() &&
> +        range_covers_byte(address, len, PCI_COMMAND + 1)) {
> +        bool intx_masked = (pci_get_word(pci_dev->config + PCI_COMMAND) &
> +                            PCI_COMMAND_INTX_DISABLE);
> +
> +        if (intx_masked != !!(old_cmd & PCI_COMMAND_INTX_DISABLE)) {
> +            ret = kvm_device_intx_set_mask(kvm_state, assigned_dev->dev_id,
> +                                           intx_masked);
> +            if (ret) {
> +                perror("assigned_dev_pci_write_config: set intx mask");
> +            }
> +        }
> +    }
> +    if (assigned_dev->cap.available & ASSIGNED_DEVICE_CAP_MSI) {
> +        if (range_covers_byte(address, len,
> +                              pci_dev->msi_cap + PCI_MSI_FLAGS)) {
> +            assigned_dev_update_msi(pci_dev);
> +        }
> +    }
> +    if (assigned_dev->cap.available & ASSIGNED_DEVICE_CAP_MSIX) {
> +        if (range_covers_byte(address, len,
> +                              pci_dev->msix_cap + PCI_MSIX_FLAGS + 1)) {
> +            assigned_dev_update_msix(pci_dev);
> +        }
> +    }
> +
> +    emulate_mask = 0;
> +    memcpy(&emulate_mask, assigned_dev->emulate_config_write + address, len);
> +    emulate_mask = le32_to_cpu(emulate_mask);
> +
> +    full_emulation_mask = 0xffffffff >> (32 - len * 8);
> +
> +    if (emulate_mask != full_emulation_mask) {
> +        if (emulate_mask) {
> +            val &= ~emulate_mask;
> +            val |= assigned_dev_pci_read(pci_dev, address, len) & emulate_mask;
> +        }
> +        assigned_dev_pci_write(pci_dev, address, val, len);
> +    }
> +}
> +
> +static void assigned_dev_setup_cap_read(AssignedDevice *dev, uint32_t offset,
> +                                        uint32_t len)
> +{
> +    assigned_dev_direct_config_read(dev, offset, len);
> +    assigned_dev_emulate_config_read(dev, offset + PCI_CAP_LIST_NEXT, 1);
> +}
> +
> +static int assigned_device_pci_cap_init(PCIDevice *pci_dev)
> +{
> +    AssignedDevice *dev = DO_UPCAST(AssignedDevice, dev, pci_dev);
> +    PCIRegion *pci_region = dev->real_device.regions;
> +    int ret, pos;
> +
> +    /* Clear initial capabilities pointer and status copied from hw */
> +    pci_set_byte(pci_dev->config + PCI_CAPABILITY_LIST, 0);
> +    pci_set_word(pci_dev->config + PCI_STATUS,
> +                 pci_get_word(pci_dev->config + PCI_STATUS) &
> +                 ~PCI_STATUS_CAP_LIST);
> +
> +    /* Expose MSI capability
> +     * MSI capability is the 1st capability in capability config */
> +    pos = pci_find_cap_offset(pci_dev, PCI_CAP_ID_MSI, 0);
> +    if (pos != 0 && kvm_check_extension(kvm_state, KVM_CAP_ASSIGN_DEV_IRQ)) {
> +        if (!check_irqchip_in_kernel()) {
> +            return -ENOTSUP;
> +        }
> +        dev->cap.available |= ASSIGNED_DEVICE_CAP_MSI;
> +        /* Only 32-bit/no-mask currently supported */
> +        ret = pci_add_capability(pci_dev, PCI_CAP_ID_MSI, pos, 10);
> +        if (ret < 0) {
> +            return ret;
> +        }
> +        pci_dev->msi_cap = pos;
> +
> +        pci_set_word(pci_dev->config + pos + PCI_MSI_FLAGS,
> +                     pci_get_word(pci_dev->config + pos + PCI_MSI_FLAGS) &
> +                     PCI_MSI_FLAGS_QMASK);
> +        pci_set_long(pci_dev->config + pos + PCI_MSI_ADDRESS_LO, 0);
> +        pci_set_word(pci_dev->config + pos + PCI_MSI_DATA_32, 0);
> +
> +        /* Set writable fields */
> +        pci_set_word(pci_dev->wmask + pos + PCI_MSI_FLAGS,
> +                     PCI_MSI_FLAGS_QSIZE | PCI_MSI_FLAGS_ENABLE);
> +        pci_set_long(pci_dev->wmask + pos + PCI_MSI_ADDRESS_LO, 0xfffffffc);
> +        pci_set_word(pci_dev->wmask + pos + PCI_MSI_DATA_32, 0xffff);
> +    }
> +    /* Expose MSI-X capability */
> +    pos = pci_find_cap_offset(pci_dev, PCI_CAP_ID_MSIX, 0);
> +    if (pos != 0 && kvm_device_msix_supported(kvm_state)) {
> +        int bar_nr;
> +        uint32_t msix_table_entry;
> +
> +        if (!check_irqchip_in_kernel()) {
> +            return -ENOTSUP;
> +        }
> +        dev->cap.available |= ASSIGNED_DEVICE_CAP_MSIX;
> +        ret = pci_add_capability(pci_dev, PCI_CAP_ID_MSIX, pos, 12);
> +        if (ret < 0) {
> +            return ret;
> +        }
> +        pci_dev->msix_cap = pos;
> +
> +        pci_set_word(pci_dev->config + pos + PCI_MSIX_FLAGS,
> +                     pci_get_word(pci_dev->config + pos + PCI_MSIX_FLAGS) &
> +                     PCI_MSIX_FLAGS_QSIZE);
> +
> +        /* Only enable and function mask bits are writable */
> +        pci_set_word(pci_dev->wmask + pos + PCI_MSIX_FLAGS,
> +                     PCI_MSIX_FLAGS_ENABLE | PCI_MSIX_FLAGS_MASKALL);
> +
> +        msix_table_entry = pci_get_long(pci_dev->config + pos + PCI_MSIX_TABLE);
> +        bar_nr = msix_table_entry & PCI_MSIX_FLAGS_BIRMASK;
> +        msix_table_entry &= ~PCI_MSIX_FLAGS_BIRMASK;
> +        dev->msix_table_addr = pci_region[bar_nr].base_addr + msix_table_entry;
> +        dev->msix_max = pci_get_word(pci_dev->config + pos + PCI_MSIX_FLAGS);
> +        dev->msix_max &= PCI_MSIX_FLAGS_QSIZE;
> +        dev->msix_max += 1;
> +    }
> +
> +    /* Minimal PM support, nothing writable, device appears to NAK changes */
> +    pos = pci_find_cap_offset(pci_dev, PCI_CAP_ID_PM, 0);
> +    if (pos) {
> +        uint16_t pmc;
> +
> +        ret = pci_add_capability(pci_dev, PCI_CAP_ID_PM, pos, PCI_PM_SIZEOF);
> +        if (ret < 0) {
> +            return ret;
> +        }
> +
> +        assigned_dev_setup_cap_read(dev, pos, PCI_PM_SIZEOF);
> +
> +        pmc = pci_get_word(pci_dev->config + pos + PCI_CAP_FLAGS);
> +        pmc &= (PCI_PM_CAP_VER_MASK | PCI_PM_CAP_DSI);
> +        pci_set_word(pci_dev->config + pos + PCI_CAP_FLAGS, pmc);
> +
> +        /* assign_device will bring the device up to D0, so we don't need
> +         * to worry about doing that ourselves here. */
> +        pci_set_word(pci_dev->config + pos + PCI_PM_CTRL,
> +                     PCI_PM_CTRL_NO_SOFT_RESET);
> +
> +        pci_set_byte(pci_dev->config + pos + PCI_PM_PPB_EXTENSIONS, 0);
> +        pci_set_byte(pci_dev->config + pos + PCI_PM_DATA_REGISTER, 0);
> +    }
> +
> +    pos = pci_find_cap_offset(pci_dev, PCI_CAP_ID_EXP, 0);
> +    if (pos) {
> +        uint8_t version, size = 0;
> +        uint16_t type, devctl, lnksta;
> +        uint32_t devcap, lnkcap;
> +
> +        version = pci_get_byte(pci_dev->config + pos + PCI_EXP_FLAGS);
> +        version &= PCI_EXP_FLAGS_VERS;
> +        if (version == 1) {
> +            size = 0x14;
> +        } else if (version == 2) {
> +            /*
> +             * Check for non-std size, accept reduced size to 0x34,
> +             * which is what bcm5761 implemented, violating the
> +             * PCIe v3.0 spec that regs should exist and be read as 0,
> +             * not optionally provided and shorten the struct size.
> +             */
> +            size = MIN(0x3c, PCI_CONFIG_SPACE_SIZE - pos);
> +            if (size < 0x34) {
> +                fprintf(stderr,
> +                        "%s: Invalid size PCIe cap-id 0x%x\n",
> +                        __func__, PCI_CAP_ID_EXP);
> +                return -EINVAL;
> +            } else if (size != 0x3c) {
> +                fprintf(stderr,
> +                        "WARNING, %s: PCIe cap-id 0x%x has "
> +                        "non-standard size 0x%x; std size should be 0x3c\n",
> +                         __func__, PCI_CAP_ID_EXP, size);
> +            }
> +        } else if (version == 0) {
> +            uint16_t vid, did;
> +            vid = pci_get_word(pci_dev->config + PCI_VENDOR_ID);
> +            did = pci_get_word(pci_dev->config + PCI_DEVICE_ID);
> +            if (vid == PCI_VENDOR_ID_INTEL && did == 0x10ed) {
> +                /*
> +                 * quirk for Intel 82599 VF with invalid PCIe capability
> +                 * version, should really be version 2 (same as PF)
> +                 */
> +                size = 0x3c;
> +            }
> +        }
> +
> +        if (size == 0) {
> +            fprintf(stderr,
> +                    "%s: Unsupported PCI express capability version %d\n",
> +                    __func__, version);
> +            return -EINVAL;
> +        }
> +
> +        ret = pci_add_capability(pci_dev, PCI_CAP_ID_EXP, pos, size);
> +        if (ret < 0) {
> +            return ret;
> +        }
> +
> +        assigned_dev_setup_cap_read(dev, pos, size);
> +
> +        type = pci_get_word(pci_dev->config + pos + PCI_EXP_FLAGS);
> +        type = (type & PCI_EXP_FLAGS_TYPE) >> 4;
> +        if (type != PCI_EXP_TYPE_ENDPOINT &&
> +            type != PCI_EXP_TYPE_LEG_END && type != PCI_EXP_TYPE_RC_END) {
> +            fprintf(stderr,
> +                    "Device assignment only supports endpoint assignment, "
> +                    "device type %d\n", type);
> +            return -EINVAL;
> +        }
> +
> +        /* capabilities, pass existing read-only copy
> +         * PCI_EXP_FLAGS_IRQ: updated by hardware, should be direct read */
> +
> +        /* device capabilities: hide FLR */
> +        devcap = pci_get_long(pci_dev->config + pos + PCI_EXP_DEVCAP);
> +        devcap &= ~PCI_EXP_DEVCAP_FLR;
> +        pci_set_long(pci_dev->config + pos + PCI_EXP_DEVCAP, devcap);
> +
> +        /* device control: clear all error reporting enable bits, leaving
> +         *                 only a few host values.  Note, these are
> +         *                 all writable, but not passed to hw.
> +         */
> +        devctl = pci_get_word(pci_dev->config + pos + PCI_EXP_DEVCTL);
> +        devctl = (devctl & (PCI_EXP_DEVCTL_READRQ | PCI_EXP_DEVCTL_PAYLOAD)) |
> +                  PCI_EXP_DEVCTL_RELAX_EN | PCI_EXP_DEVCTL_NOSNOOP_EN;
> +        pci_set_word(pci_dev->config + pos + PCI_EXP_DEVCTL, devctl);
> +        devctl = PCI_EXP_DEVCTL_BCR_FLR | PCI_EXP_DEVCTL_AUX_PME;
> +        pci_set_word(pci_dev->wmask + pos + PCI_EXP_DEVCTL, ~devctl);
> +
> +        /* Clear device status */
> +        pci_set_word(pci_dev->config + pos + PCI_EXP_DEVSTA, 0);
> +
> +        /* Link capabilities, expose links and latencues, clear reporting */
> +        lnkcap = pci_get_long(pci_dev->config + pos + PCI_EXP_LNKCAP);
> +        lnkcap &= (PCI_EXP_LNKCAP_SLS | PCI_EXP_LNKCAP_MLW |
> +                   PCI_EXP_LNKCAP_ASPMS | PCI_EXP_LNKCAP_L0SEL |
> +                   PCI_EXP_LNKCAP_L1EL);
> +        pci_set_long(pci_dev->config + pos + PCI_EXP_LNKCAP, lnkcap);
> +
> +        /* Link control, pass existing read-only copy.  Should be writable? */
> +
> +        /* Link status, only expose current speed and width */
> +        lnksta = pci_get_word(pci_dev->config + pos + PCI_EXP_LNKSTA);
> +        lnksta &= (PCI_EXP_LNKSTA_CLS | PCI_EXP_LNKSTA_NLW);
> +        pci_set_word(pci_dev->config + pos + PCI_EXP_LNKSTA, lnksta);
> +
> +        if (version >= 2) {
> +            /* Slot capabilities, control, status - not needed for endpoints */
> +            pci_set_long(pci_dev->config + pos + PCI_EXP_SLTCAP, 0);
> +            pci_set_word(pci_dev->config + pos + PCI_EXP_SLTCTL, 0);
> +            pci_set_word(pci_dev->config + pos + PCI_EXP_SLTSTA, 0);
> +
> +            /* Root control, capabilities, status - not needed for endpoints */
> +            pci_set_word(pci_dev->config + pos + PCI_EXP_RTCTL, 0);
> +            pci_set_word(pci_dev->config + pos + PCI_EXP_RTCAP, 0);
> +            pci_set_long(pci_dev->config + pos + PCI_EXP_RTSTA, 0);
> +
> +            /* Device capabilities/control 2, pass existing read-only copy */
> +            /* Link control 2, pass existing read-only copy */
> +        }
> +    }
> +
> +    pos = pci_find_cap_offset(pci_dev, PCI_CAP_ID_PCIX, 0);
> +    if (pos) {
> +        uint16_t cmd;
> +        uint32_t status;
> +
> +        /* Only expose the minimum, 8 byte capability */
> +        ret = pci_add_capability(pci_dev, PCI_CAP_ID_PCIX, pos, 8);
> +        if (ret < 0) {
> +            return ret;
> +        }
> +
> +        assigned_dev_setup_cap_read(dev, pos, 8);
> +
> +        /* Command register, clear upper bits, including extended modes */
> +        cmd = pci_get_word(pci_dev->config + pos + PCI_X_CMD);
> +        cmd &= (PCI_X_CMD_DPERR_E | PCI_X_CMD_ERO | PCI_X_CMD_MAX_READ |
> +                PCI_X_CMD_MAX_SPLIT);
> +        pci_set_word(pci_dev->config + pos + PCI_X_CMD, cmd);
> +
> +        /* Status register, update with emulated PCI bus location, clear
> +         * error bits, leave the rest. */
> +        status = pci_get_long(pci_dev->config + pos + PCI_X_STATUS);
> +        status &= ~(PCI_X_STATUS_BUS | PCI_X_STATUS_DEVFN);
> +        status |= (pci_bus_num(pci_dev->bus) << 8) | pci_dev->devfn;
> +        status &= ~(PCI_X_STATUS_SPL_DISC | PCI_X_STATUS_UNX_SPL |
> +                    PCI_X_STATUS_SPL_ERR);
> +        pci_set_long(pci_dev->config + pos + PCI_X_STATUS, status);
> +    }
> +
> +    pos = pci_find_cap_offset(pci_dev, PCI_CAP_ID_VPD, 0);
> +    if (pos) {
> +        /* Direct R/W passthrough */
> +        ret = pci_add_capability(pci_dev, PCI_CAP_ID_VPD, pos, 8);
> +        if (ret < 0) {
> +            return ret;
> +        }
> +
> +        assigned_dev_setup_cap_read(dev, pos, 8);
> +
> +        /* direct write for cap content */
> +        assigned_dev_direct_config_write(dev, pos + 2, 6);
> +    }
> +
> +    /* Devices can have multiple vendor capabilities, get them all */
> +    for (pos = 0; (pos = pci_find_cap_offset(pci_dev, PCI_CAP_ID_VNDR, pos));
> +        pos += PCI_CAP_LIST_NEXT) {
> +        uint8_t len = pci_get_byte(pci_dev->config + pos + PCI_CAP_FLAGS);
> +        /* Direct R/W passthrough */
> +        ret = pci_add_capability(pci_dev, PCI_CAP_ID_VNDR, pos, len);
> +        if (ret < 0) {
> +            return ret;
> +        }
> +
> +        assigned_dev_setup_cap_read(dev, pos, len);
> +
> +        /* direct write for cap content */
> +        assigned_dev_direct_config_write(dev, pos + 2, len - 2);
> +    }
> +
> +    /* If real and virtual capability list status bits differ, virtualize the
> +     * access. */
> +    if ((pci_get_word(pci_dev->config + PCI_STATUS) & PCI_STATUS_CAP_LIST) !=
> +        (assigned_dev_pci_read_byte(pci_dev, PCI_STATUS) &
> +         PCI_STATUS_CAP_LIST)) {
> +        dev->emulate_config_read[PCI_STATUS] |= PCI_STATUS_CAP_LIST;
> +    }
> +
> +    return 0;
> +}

Also this function is way too long.

> +
> +static uint64_t msix_mmio_read(void *opaque, target_phys_addr_t addr,
> +                               unsigned size)
> +{
> +    AssignedDevice *adev = opaque;
> +    uint64_t val;
> +
> +    memcpy(&val, (void *)((uint8_t *)adev->msix_table + addr), size);
> +
> +    return val;
> +}
> +
> +static void msix_mmio_write(void *opaque, target_phys_addr_t addr,
> +                            uint64_t val, unsigned size)
> +{
> +    AssignedDevice *adev = opaque;
> +    PCIDevice *pdev = &adev->dev;
> +    uint16_t ctrl;
> +    MSIXTableEntry orig;
> +    int i = addr >> 4;
> +
> +    if (i >= adev->msix_max) {
> +        return; /* Drop write */
> +    }
> +
> +    ctrl = pci_get_word(pdev->config + pdev->msix_cap + PCI_MSIX_FLAGS);
> +
> +    DEBUG("write to MSI-X table offset 0x%lx, val 0x%lx\n", addr, val);
> +
> +    if (ctrl & PCI_MSIX_FLAGS_ENABLE) {
> +        orig = adev->msix_table[i];
> +    }
> +
> +    memcpy((void *)((uint8_t *)adev->msix_table + addr), &val, size);

Cast to void * shouldn't be necessary.

> +
> +    if (ctrl & PCI_MSIX_FLAGS_ENABLE) {
> +        MSIXTableEntry *entry = &adev->msix_table[i];
> +
> +        if (!msix_masked(&orig) && msix_masked(entry)) {
> +            /*
> +             * Vector masked, disable it
> +             *
> +             * XXX It's not clear if we can or should actually attempt
> +             * to mask or disable the interrupt.  KVM doesn't have
> +             * support for pending bits and kvm_assign_set_msix_entry
> +             * doesn't modify the device hardware mask.  Interrupts
> +             * while masked are simply not injected to the guest, so
> +             * are lost.  Can we get away with always injecting an
> +             * interrupt on unmask?
> +             */
> +        } else if (msix_masked(&orig) && !msix_masked(entry)) {
> +            /* Vector unmasked */
> +            if (i >= adev->msi_virq_nr || adev->msi_virq[i] < 0) {
> +                /* Previously unassigned vector, start from scratch */
> +                assigned_dev_update_msix(pdev);
> +                return;
> +            } else {
> +                /* Update an existing, previously masked vector */
> +                MSIMessage msg;
> +                int ret;
> +
> +                msg.address = entry->addr_lo |
> +                    ((uint64_t)entry->addr_hi << 32);
> +                msg.data = entry->data;
> +
> +                ret = kvm_irqchip_update_msi_route(kvm_state,
> +                                                   adev->msi_virq[i], msg);
> +                if (ret) {
> +                    fprintf(stderr,
> +                            "Error updating irq routing entry (%d)\n", ret);
> +                }
> +            }
> +        }
> +    }
> +}
> +
> +static const MemoryRegionOps msix_mmio_ops = {
> +    .read = msix_mmio_read,
> +    .write = msix_mmio_write,
> +    .endianness = DEVICE_NATIVE_ENDIAN,
> +    .valid = {
> +        .min_access_size = 4,
> +        .max_access_size = 8,
> +    },
> +    .impl = {
> +        .min_access_size = 4,
> +        .max_access_size = 8,
> +    },
> +};
> +
> +static void msix_reset(AssignedDevice *dev)
> +{
> +    MSIXTableEntry *entry;
> +    int i;
> +
> +    if (!dev->msix_table) {
> +        return;
> +    }
> +
> +    memset(dev->msix_table, 0, MSIX_PAGE_SIZE);
> +
> +    for (i = 0, entry = dev->msix_table; i < dev->msix_max; i++, entry++) {
> +        entry->ctrl = cpu_to_le32(0x1); /* Masked */
> +    }
> +}
> +
> +static int assigned_dev_register_msix_mmio(AssignedDevice *dev)
> +{
> +    dev->msix_table = mmap(NULL, MSIX_PAGE_SIZE, PROT_READ|PROT_WRITE,
> +                           MAP_ANONYMOUS|MAP_PRIVATE, 0, 0);
> +    if (dev->msix_table == MAP_FAILED) {
> +        fprintf(stderr, "fail allocate msix_table! %s\n", strerror(errno));
> +        return -EFAULT;
> +    }
> +
> +    msix_reset(dev);
> +
> +    memory_region_init_io(&dev->mmio, &msix_mmio_ops, dev,
> +                          "assigned-dev-msix", MSIX_PAGE_SIZE);
> +    return 0;
> +}
> +
> +static void assigned_dev_unregister_msix_mmio(AssignedDevice *dev)
> +{
> +    if (!dev->msix_table) {
> +        return;
> +    }
> +
> +    memory_region_destroy(&dev->mmio);
> +
> +    if (munmap(dev->msix_table, MSIX_PAGE_SIZE) == -1) {
> +        fprintf(stderr, "error unmapping msix_table! %s\n",
> +                strerror(errno));
> +    }
> +    dev->msix_table = NULL;
> +}
> +
> +static const VMStateDescription vmstate_assigned_device = {
> +    .name = "pci-assign",
> +    .unmigratable = 1,
> +};
> +
> +static void reset_assigned_device(DeviceState *dev)
> +{
> +    PCIDevice *pci_dev = DO_UPCAST(PCIDevice, qdev, dev);
> +    AssignedDevice *adev = DO_UPCAST(AssignedDevice, dev, pci_dev);
> +    char reset_file[64];
> +    const char reset[] = "1";
> +    int fd, ret;
> +
> +    /*
> +     * If a guest is reset without being shutdown, MSI/MSI-X can still
> +     * be running.  We want to return the device to a known state on
> +     * reset, so disable those here.  We especially do not want MSI-X
> +     * enabled since it lives in MMIO space, which is about to get
> +     * disabled.
> +     */
> +    if (adev->assigned_irq_type == ASSIGNED_IRQ_MSIX) {
> +        uint16_t ctrl = pci_get_word(pci_dev->config +
> +                                     pci_dev->msix_cap + PCI_MSIX_FLAGS);
> +
> +        pci_set_word(pci_dev->config + pci_dev->msix_cap + PCI_MSIX_FLAGS,
> +                     ctrl & ~PCI_MSIX_FLAGS_ENABLE);
> +        assigned_dev_update_msix(pci_dev);
> +    } else if (adev->assigned_irq_type == ASSIGNED_IRQ_MSI) {
> +        uint8_t ctrl = pci_get_byte(pci_dev->config +
> +                                    pci_dev->msi_cap + PCI_MSI_FLAGS);
> +
> +        pci_set_byte(pci_dev->config + pci_dev->msi_cap + PCI_MSI_FLAGS,
> +                     ctrl & ~PCI_MSI_FLAGS_ENABLE);
> +        assigned_dev_update_msi(pci_dev);
> +    }
> +
> +    snprintf(reset_file, sizeof(reset_file),
> +             "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/reset",
> +             adev->host.domain, adev->host.bus, adev->host.slot,
> +             adev->host.function);
> +
> +    /*
> +     * Issue a device reset via pci-sysfs.  Note that we use write(2) here
> +     * and ignore the return value because some kernels have a bug that
> +     * returns 0 rather than bytes written on success, sending us into an
> +     * infinite retry loop using other write mechanisms.
> +     */
> +    fd = open(reset_file, O_WRONLY);
> +    if (fd != -1) {
> +        ret = write(fd, reset, strlen(reset));
> +        (void)ret;
> +        close(fd);
> +    }
> +
> +    /*
> +     * When a 0 is written to the bus master register, the device is logically
> +     * disconnected from the PCI bus. This avoids further DMA transfers.
> +     */
> +    assigned_dev_pci_write_config(pci_dev, PCI_COMMAND, 0, 1);
> +}
> +
> +static int assigned_initfn(struct PCIDevice *pci_dev)
> +{
> +    AssignedDevice *dev = DO_UPCAST(AssignedDevice, dev, pci_dev);
> +    uint8_t e_intx;
> +    int r;
> +
> +    if (!kvm_enabled()) {
> +        error_report("pci-assign: error: requires KVM support");
> +        return -1;
> +    }
> +
> +    if (!dev->host.domain && !dev->host.bus && !dev->host.slot &&
> +        !dev->host.function) {
> +        error_report("pci-assign: error: no host device specified");
> +        return -1;
> +    }
> +
> +    /*
> +     * Set up basic config space access control. Will be further refined during
> +     * device initialization.
> +     */
> +    assigned_dev_emulate_config_read(dev, 0, PCI_CONFIG_SPACE_SIZE);
> +    assigned_dev_direct_config_read(dev, PCI_STATUS, 2);
> +    assigned_dev_direct_config_read(dev, PCI_REVISION_ID, 1);
> +    assigned_dev_direct_config_read(dev, PCI_CLASS_PROG, 3);
> +    assigned_dev_direct_config_read(dev, PCI_CACHE_LINE_SIZE, 1);
> +    assigned_dev_direct_config_read(dev, PCI_LATENCY_TIMER, 1);
> +    assigned_dev_direct_config_read(dev, PCI_BIST, 1);
> +    assigned_dev_direct_config_read(dev, PCI_CARDBUS_CIS, 4);
> +    assigned_dev_direct_config_read(dev, PCI_SUBSYSTEM_VENDOR_ID, 2);
> +    assigned_dev_direct_config_read(dev, PCI_SUBSYSTEM_ID, 2);
> +    assigned_dev_direct_config_read(dev, PCI_CAPABILITY_LIST + 1, 7);
> +    assigned_dev_direct_config_read(dev, PCI_MIN_GNT, 1);
> +    assigned_dev_direct_config_read(dev, PCI_MAX_LAT, 1);
> +    memcpy(dev->emulate_config_write, dev->emulate_config_read,
> +           sizeof(dev->emulate_config_read));
> +
> +    if (get_real_device(dev, dev->host.domain, dev->host.bus,
> +                        dev->host.slot, dev->host.function)) {
> +        error_report("pci-assign: Error: Couldn't get real device (%s)!",
> +                     dev->dev.qdev.id);
> +        goto out;
> +    }
> +
> +    if (assigned_device_pci_cap_init(pci_dev) < 0) {
> +        goto out;
> +    }
> +
> +    /* intercept MSI-X entry page in the MMIO */
> +    if (dev->cap.available & ASSIGNED_DEVICE_CAP_MSIX) {
> +        if (assigned_dev_register_msix_mmio(dev)) {
> +            goto out;
> +        }
> +    }
> +
> +    /* handle real device's MMIO/PIO BARs */
> +    if (assigned_dev_register_regions(dev->real_device.regions,
> +                                      dev->real_device.region_number,
> +                                      dev)) {
> +        goto out;
> +    }
> +
> +    /* handle interrupt routing */
> +    e_intx = dev->dev.config[0x3d] - 1;
> +    dev->intpin = e_intx;
> +    dev->intx_route.mode = PCI_INTX_DISABLED;
> +    dev->intx_route.irq = -1;
> +
> +    /* assign device to guest */
> +    r = assign_device(dev);
> +    if (r < 0) {
> +        goto out;
> +    }
> +
> +    /* assign legacy INTx to the device */
> +    r = assign_intx(dev);
> +    if (r < 0) {
> +        goto assigned_out;
> +    }
> +
> +    assigned_dev_load_option_rom(dev);
> +
> +    add_boot_device_path(dev->bootindex, &pci_dev->qdev, NULL);
> +
> +    return 0;
> +
> +assigned_out:
> +    deassign_device(dev);
> +out:
> +    free_assigned_device(dev);
> +    return -1;
> +}
> +
> +static void assigned_exitfn(struct PCIDevice *pci_dev)
> +{
> +    AssignedDevice *dev = DO_UPCAST(AssignedDevice, dev, pci_dev);
> +
> +    deassign_device(dev);
> +    free_assigned_device(dev);
> +}
> +
> +static Property da_properties[] = {

const?

> +    DEFINE_PROP_PCI_HOST_DEVADDR("host", AssignedDevice, host),
> +    DEFINE_PROP_BIT("prefer_msi", AssignedDevice, features,
> +                    ASSIGNED_DEVICE_PREFER_MSI_BIT, false),
> +    DEFINE_PROP_BIT("share_intx", AssignedDevice, features,
> +                    ASSIGNED_DEVICE_SHARE_INTX_BIT, true),
> +    DEFINE_PROP_INT32("bootindex", AssignedDevice, bootindex, -1),
> +    DEFINE_PROP_STRING("configfd", AssignedDevice, configfd_name),
> +    DEFINE_PROP_END_OF_LIST(),
> +};
> +
> +static void assign_class_init(ObjectClass *klass, void *data)
> +{
> +    PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
> +    DeviceClass *dc = DEVICE_CLASS(klass);
> +
> +    k->init         = assigned_initfn;
> +    k->exit         = assigned_exitfn;
> +    k->config_read  = assigned_dev_pci_read_config;
> +    k->config_write = assigned_dev_pci_write_config;
> +    dc->props       = da_properties;
> +    dc->vmsd        = &vmstate_assigned_device;
> +    dc->reset       = reset_assigned_device;
> +}
> +
> +static TypeInfo assign_info = {

const?

> +    .name               = "pci-assign",
> +    .parent             = TYPE_PCI_DEVICE,
> +    .instance_size      = sizeof(AssignedDevice),
> +    .class_init         = assign_class_init,
> +};
> +
> +static void assign_register_types(void)
> +{
> +    type_register_static(&assign_info);
> +}
> +
> +type_init(assign_register_types)
> +
> +/*
> + * Scan the assigned devices for the devices that have an option ROM, and then
> + * load the corresponding ROM data to RAM. If an error occurs while loading an
> + * option ROM, we just ignore that option ROM and continue with the next one.
> + */
> +static void assigned_dev_load_option_rom(AssignedDevice *dev)
> +{
> +    char name[32], rom_file[64];
> +    FILE *fp;
> +    uint8_t val;
> +    struct stat st;
> +    void *ptr;
> +
> +    /* If loading ROM from file, pci handles it */
> +    if (dev->dev.romfile || !dev->dev.rom_bar) {
> +        return;
> +    }
> +
> +    snprintf(rom_file, sizeof(rom_file),
> +             "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/rom",
> +             dev->host.domain, dev->host.bus, dev->host.slot,
> +             dev->host.function);
> +
> +    if (stat(rom_file, &st)) {
> +        return;
> +    }
> +
> +    if (access(rom_file, F_OK)) {
> +        fprintf(stderr, "pci-assign: Insufficient privileges for %s\n",
> +                rom_file);
> +        return;
> +    }
> +
> +    /* Write "1" to the ROM file to enable it */
> +    fp = fopen(rom_file, "r+");
> +    if (fp == NULL) {
> +        return;
> +    }
> +    val = 1;
> +    if (fwrite(&val, 1, 1, fp) != 1) {
> +        goto close_rom;
> +    }
> +    fseek(fp, 0, SEEK_SET);
> +
> +    snprintf(name, sizeof(name), "%s.rom",
> +            object_get_typename(OBJECT(dev)));
> +    memory_region_init_ram(&dev->dev.rom, name, st.st_size);
> +    vmstate_register_ram(&dev->dev.rom, &dev->dev.qdev);
> +    ptr = memory_region_get_ram_ptr(&dev->dev.rom);
> +    memset(ptr, 0xff, st.st_size);
> +
> +    if (!fread(ptr, 1, st.st_size, fp)) {
> +        fprintf(stderr, "pci-assign: Cannot read from host %s\n"
> +                "\tDevice option ROM contents are probably invalid "
> +                "(check dmesg).\n\tSkip option ROM probe with rombar=0, "
> +                "or load from file with romfile=\n", rom_file);
> +        memory_region_destroy(&dev->dev.rom);
> +        goto close_rom;
> +    }
> +
> +    pci_register_bar(&dev->dev, PCI_ROM_SLOT, 0, &dev->dev.rom);
> +    dev->dev.has_rom = true;
> +close_rom:
> +    /* Write "0" to disable ROM */
> +    fseek(fp, 0, SEEK_SET);
> +    val = 0;
> +    if (!fwrite(&val, 1, 1, fp)) {
> +        DEBUG("%s\n", "Failed to disable pci-sysfs rom file");
> +    }
> +    fclose(fp);
> +}
> --
> 1.7.3.4
>
>

  parent reply	other threads:[~2012-08-27 18:56 UTC|newest]

Thread overview: 146+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-08-27  6:28 [PATCH 0/4] uq/master: Add classic PCI device assignment Jan Kiszka
2012-08-27  6:28 ` [Qemu-devel] " Jan Kiszka
2012-08-27  6:28 ` [PATCH 1/4] kvm: Introduce kvm_irqchip_update_msi_route Jan Kiszka
2012-08-27  6:28   ` [Qemu-devel] " Jan Kiszka
2012-08-27  6:28 ` [PATCH 2/4] kvm: Introduce kvm_has_intx_set_mask Jan Kiszka
2012-08-27  6:28   ` [Qemu-devel] " Jan Kiszka
2012-08-27  6:28 ` [PATCH 3/4] kvm: i386: Add services required for PCI device assignment Jan Kiszka
2012-08-27  6:28   ` [Qemu-devel] " Jan Kiszka
2012-08-27  6:28 ` [PATCH 4/4] kvm: i386: Add classic " Jan Kiszka
2012-08-27  6:28   ` [Qemu-devel] " Jan Kiszka
2012-08-27 12:07   ` Andreas Färber
2012-08-27 12:07     ` Andreas Färber
2012-08-27 12:15     ` Jan Kiszka
2012-08-27 12:15       ` [Qemu-devel] " Jan Kiszka
2012-08-28 21:26       ` Peter Maydell
2012-08-28 21:26         ` [Qemu-devel] " Peter Maydell
2012-08-29  8:47         ` Jan Kiszka
2012-08-29  8:47           ` [Qemu-devel] " Jan Kiszka
2012-08-29  8:49           ` Peter Maydell
2012-08-29  8:49             ` Peter Maydell
2012-08-29  8:50             ` Jan Kiszka
2012-08-29  8:50               ` Jan Kiszka
2012-09-03 15:59             ` Avi Kivity
2012-09-03 15:59               ` Avi Kivity
2012-09-04  3:31               ` Alex Williamson
2012-09-04  3:31                 ` [Qemu-devel] " Alex Williamson
2012-08-28 12:57     ` Anthony Liguori
2012-08-28 12:57       ` Anthony Liguori
2012-08-29 14:08       ` Andreas Färber
2012-08-29 14:08         ` Andreas Färber
2012-08-29 18:32         ` Anthony Liguori
2012-08-29 18:32           ` [Qemu-devel] " Anthony Liguori
2012-09-04 21:00         ` Anthony Liguori
2012-09-04 21:00           ` [Qemu-devel] " Anthony Liguori
2012-09-05 15:26           ` Avi Kivity
2012-09-05 15:26             ` [Qemu-devel] " Avi Kivity
2012-09-05 15:29             ` Michael S. Tsirkin
2012-09-05 15:29               ` Michael S. Tsirkin
2012-09-05 15:41             ` Anthony Liguori
2012-09-05 15:41               ` Anthony Liguori
2012-09-05 15:52               ` Avi Kivity
2012-09-05 15:52                 ` Avi Kivity
2012-09-05 19:04               ` Blue Swirl
2012-09-05 19:04                 ` Blue Swirl
2012-09-05 19:22                 ` Anthony Liguori
2012-09-05 19:22                   ` Anthony Liguori
2012-09-05 19:38                   ` Blue Swirl
2012-09-05 19:38                     ` Blue Swirl
2012-09-05 20:46                     ` Anthony Liguori
2012-09-05 20:46                       ` [Qemu-devel] " Anthony Liguori
2012-09-10 15:33                       ` Andreas Färber
2012-09-10 15:33                         ` Andreas Färber
2012-09-06  3:42                     ` [Qemu-ppc] " Alexander Graf
2012-09-06  3:42                       ` [Qemu-devel] [Qemu-ppc] " Alexander Graf
2012-09-08  7:54                       ` [Qemu-ppc] [Qemu-devel] " Blue Swirl
2012-09-08  7:54                         ` [Qemu-devel] [Qemu-ppc] " Blue Swirl
2012-09-05 19:24                 ` Eric Blake
2012-09-05 19:24                   ` [Qemu-devel] " Eric Blake
2012-09-05 19:43                   ` Blue Swirl
2012-09-05 19:43                     ` Blue Swirl
2012-09-06  8:44                 ` Avi Kivity
2012-09-06  8:44                   ` Avi Kivity
2012-09-08  8:06                   ` Blue Swirl
2012-09-08  8:06                     ` [Qemu-devel] " Blue Swirl
2012-09-08  9:28                     ` [Qemu-ppc] " Alexander Graf
2012-09-08  9:28                       ` [Qemu-devel] " Alexander Graf
2012-09-08 10:16                       ` [Qemu-ppc] [Qemu-devel] " Blue Swirl
2012-09-08 10:16                         ` [Qemu-devel] [Qemu-ppc] " Blue Swirl
2012-09-08 12:13                         ` Alexander Graf
2012-09-08 12:13                           ` [Qemu-devel] " Alexander Graf
2012-09-08 12:30                           ` Blue Swirl
2012-09-08 12:30                             ` [Qemu-devel] " Blue Swirl
2012-09-08 14:59                             ` Alexander Graf
2012-09-08 14:59                               ` [Qemu-devel] " Alexander Graf
2012-08-27 18:56   ` Blue Swirl [this message]
2012-08-27 18:56     ` [Qemu-devel] " Blue Swirl
2012-08-27 19:01     ` Michael S. Tsirkin
2012-08-27 19:01       ` Michael S. Tsirkin
2012-08-27 19:06       ` Blue Swirl
2012-08-27 19:06         ` [Qemu-devel] " Blue Swirl
2012-08-28  0:30     ` Jan Kiszka
2012-08-28  0:30       ` Jan Kiszka
2012-09-03 16:06       ` Avi Kivity
2012-09-03 16:06         ` Avi Kivity
2012-08-28  7:35     ` Michael Tokarev
2012-08-28  7:35       ` Michael Tokarev
2012-08-28 17:01       ` Blue Swirl
2012-08-28 17:01         ` Blue Swirl
2012-08-28 17:28         ` Michael S. Tsirkin
2012-08-28 17:28           ` Michael S. Tsirkin
2012-08-28 17:38           ` Blue Swirl
2012-08-28 17:38             ` Blue Swirl
2012-08-28 19:31             ` Anthony Liguori
2012-08-28 19:31               ` [Qemu-devel] " Anthony Liguori
2012-08-28 19:49               ` malc
2012-08-28 19:49                 ` [Qemu-devel] " malc
2012-08-28 20:06               ` Blue Swirl
2012-08-28 20:06                 ` Blue Swirl
2012-08-28 21:51                 ` Anthony Liguori
2012-08-28 21:51                   ` Anthony Liguori
2012-09-01  9:20                   ` Blue Swirl
2012-09-01  9:20                     ` [Qemu-devel] " Blue Swirl
2012-08-29  8:27               ` Markus Armbruster
2012-08-29  8:27                 ` [Qemu-devel] " Markus Armbruster
2012-09-03 16:14                 ` Avi Kivity
2012-09-03 16:14                   ` [Qemu-devel] " Avi Kivity
2012-09-03 19:32                   ` Blue Swirl
2012-09-03 19:32                     ` [Qemu-devel] " Blue Swirl
2012-09-04  8:32                     ` Avi Kivity
2012-09-04  8:32                       ` Avi Kivity
2012-09-04 19:27                       ` Blue Swirl
2012-09-04 19:27                         ` Blue Swirl
2012-09-04 21:28                         ` Michael S. Tsirkin
2012-09-04 21:28                           ` [Qemu-devel] " Michael S. Tsirkin
2012-09-05 19:09                           ` Blue Swirl
2012-09-05 19:09                             ` Blue Swirl
2012-08-28 11:02   ` [PATCH v2 " Jan Kiszka
2012-08-28 11:02     ` [Qemu-devel] " Jan Kiszka
2012-08-28 21:49     ` Michael S. Tsirkin
2012-08-28 21:49       ` [Qemu-devel] " Michael S. Tsirkin
2012-08-29  8:44       ` Jan Kiszka
2012-08-29  8:44         ` [Qemu-devel] " Jan Kiszka
2012-08-29 10:35         ` Michael S. Tsirkin
2012-08-29 10:35           ` [Qemu-devel] " Michael S. Tsirkin
2012-08-30 18:30     ` [PATCH v3 " Jan Kiszka
2012-08-30 18:30       ` [Qemu-devel] " Jan Kiszka
2012-09-06  8:44       ` Jan Kiszka
2012-09-06  8:44         ` [Qemu-devel] " Jan Kiszka
2012-09-06  8:49         ` Michael S. Tsirkin
2012-09-06  8:49           ` [Qemu-devel] " Michael S. Tsirkin
2012-09-06 16:06         ` Andreas Färber
2012-09-06 16:06           ` [Qemu-devel] " Andreas Färber
2012-09-06 16:16           ` Jan Kiszka
2012-09-06 16:16             ` [Qemu-devel] " Jan Kiszka
2012-09-08  7:55           ` Blue Swirl
2012-09-08  7:55             ` [Qemu-devel] " Blue Swirl
2012-09-09 14:13         ` Avi Kivity
2012-09-09 14:13           ` [Qemu-devel] " Avi Kivity
2012-09-10  9:26           ` Jan Kiszka
2012-09-10  9:26             ` [Qemu-devel] " Jan Kiszka
2012-09-10 12:30             ` Avi Kivity
2012-09-10 12:30               ` [Qemu-devel] " Avi Kivity
2012-08-28 15:04   ` [PATCH " Michael S. Tsirkin
2012-08-28 15:04     ` [Qemu-devel] " Michael S. Tsirkin
2012-08-29 10:50     ` Jan Kiszka
2012-08-29 10:50       ` [Qemu-devel] " Jan Kiszka

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=CAAu8pHu7XZ4OStiKG78X2TuHd2RaJ08FTq5coDPRWbDbi9UBBQ@mail.gmail.com \
    --to=blauwirbel@gmail.com \
    --cc=alex.williamson@redhat.com \
    --cc=avi@redhat.com \
    --cc=jan.kiszka@web.de \
    --cc=kvm@vger.kernel.org \
    --cc=mst@redhat.com \
    --cc=mtosatti@redhat.com \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.