All of lore.kernel.org
 help / color / mirror / Atom feed
From: Eric Auger <eric.auger@linaro.org>
To: eric.auger@st.com, christoffer.dall@linaro.org, qemu-devel@nongnu.org
Cc: peter.maydell@linaro.org, kim.phillips@freescale.com,
	Eric Auger <eric.auger@linaro.org>,
	agraf@suse.de, stuart.yoder@freescale.com,
	alex.williamson@redhat.com, christophe.barnichon@st.com,
	a.motakis@virtualopensystems.com, kvmarm@lists.cs.columbia.edu
Subject: [Qemu-devel] [RFC v2 4/6] vfio: Add initial IRQ support in QEMU platform device
Date: Wed,  9 Apr 2014 16:33:07 +0100	[thread overview]
Message-ID: <1397057589-11779-5-git-send-email-eric.auger@linaro.org> (raw)
In-Reply-To: <1397057589-11779-1-git-send-email-eric.auger@linaro.org>

This work is inspired of PCI INTx. The code was prepared to support
multiple IRQs but this was not tested at that stage. Similarly to what
is done on PCI, the device register space is RAM unmapped on IRQ hit
in order to trap the end of interrupt (EOI). On mmap timer hit, if the
EOI was trapped, the mmapping is restored. the physical interrupt is
unmasked on the first read/write with the MMIO register space.

Tested on Calxeda Midway xgmac which can be directly assigned to one
virt VM.

Acknowledgements:
- vfio device name, guest physical address and IRQ indexes are
  hardcoded. This will be improved in another patch
- currently tested on a single IRQ (xgmac main IRQ)
- a KVM patch is required to invalidate stage2 entries on RAM memory
  region destruction (see [PATCH] ARM: KVM: Handle IPA unmapping on
  memory region deletion)

Signed-off-by: Eric Auger <eric.auger@linaro.org>
---
 hw/arm/virt.c      |  13 ++-
 hw/vfio/platform.c | 316 +++++++++++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 304 insertions(+), 25 deletions(-)

diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 5d43cf0..31ae7d2 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -108,12 +108,13 @@ static const MemMapEntry a15memmap[] = {
     /* ...repeating for a total of NUM_VIRTIO_TRANSPORTS, each of that size */
     /* 0x10000000 .. 0x40000000 reserved for PCI */
     [VIRT_MEM] = { 0x40000000, 1ULL * 1024 * 1024 * 1024 },
-    [VIRT_ETHERNET] = { 0xfff51000, 0x1000 },
+    [VIRT_ETHERNET] = { 0xfff41000, 0x1000 },
 };
 
 static const int a15irqmap[] = {
     [VIRT_UART] = 1,
     [VIRT_MMIO] = 16, /* ...to 16 + NUM_VIRTIO_TRANSPORTS - 1 */
+    [VIRT_ETHERNET] = 77,
 };
 
 static VirtBoardInfo machines[] = {
@@ -299,8 +300,12 @@ static void create_ethernet(const VirtBoardInfo *vbi, qemu_irq *pic)
     hwaddr base = vbi->memmap[VIRT_ETHERNET].base;
     hwaddr size = vbi->memmap[VIRT_ETHERNET].size;
     const char compat[] = "calxeda,hb-xgmac";
+    int irqm = vbi->irqmap[VIRT_ETHERNET];
+    int irqp = irqm+1;
+    int irqlp = irqm+2;
 
-    sysbus_create_simple("vfio-platform", base, NULL);
+    sysbus_create_varargs("vfio-platform", base,
+                          pic[irqm], pic[irqp], pic[irqlp], NULL);
 
     nodename = g_strdup_printf("/ethernet@%" PRIx64, base);
     qemu_fdt_add_subnode(vbi->fdt, nodename);
@@ -308,6 +313,10 @@ static void create_ethernet(const VirtBoardInfo *vbi, qemu_irq *pic)
     /* Note that we can't use setprop_string because of the embedded NUL */
     qemu_fdt_setprop(vbi->fdt, nodename, "compatible", compat, sizeof(compat));
     qemu_fdt_setprop_sized_cells(vbi->fdt, nodename, "reg", 2, base, 2, size);
+    qemu_fdt_setprop_cells(vbi->fdt, nodename, "interrupts",
+                                0x0, irqm, 0x4,
+                                0x0, irqp, 0x4,
+                                0x0, irqlp, 0x4);
 
     g_free(nodename);
 }
diff --git a/hw/vfio/platform.c b/hw/vfio/platform.c
index 138fb13..f148edd 100644
--- a/hw/vfio/platform.c
+++ b/hw/vfio/platform.c
@@ -24,6 +24,7 @@
 #include "config.h"
 #include "exec/address-spaces.h"
 #include "exec/memory.h"
+
 #include "qemu-common.h"
 #include "qemu/error-report.h"
 #include "qemu/event_notifier.h"
@@ -31,6 +32,7 @@
 #include "qemu/range.h"
 #include "sysemu/kvm.h"
 #include "sysemu/sysemu.h"
+
 #include "hw/qdev-properties.h"
 #include "migration/vmstate.h"
 #include "hw/hw.h"
@@ -41,12 +43,15 @@
 #define DEBUG_VFIO
 #ifdef DEBUG_VFIO
 #define DPRINTF(fmt, ...) \
-    do { fprintf(stderr, "vfio: " fmt, ## __VA_ARGS__); } while (0)
+    do { fprintf(stderr, "vfio: %s: " fmt, __func__, ## __VA_ARGS__); } \
+        while (0)
 #else
 #define DPRINTF(fmt, ...) \
     do { } while (0)
 #endif
 
+#define PLATFORM_NUM_REGIONS 10
+
 /* Extra debugging, trap acceleration paths for more logging */
 #define VFIO_ALLOW_MMAP 1
 
@@ -63,16 +68,240 @@ typedef struct VFIORegion {
     uint8_t nr; /* cache the region number for debug */
 } VFIORegion;
 
+
+#define VFIO_INT_INTp 4
+
+typedef struct VFIOINTp {
+    QLIST_ENTRY(VFIOINTp) next;
+    EventNotifier interrupt; /* eventfd triggered on interrupt */
+    EventNotifier unmask; /* eventfd for unmask on QEMU bypass */
+    qemu_irq qemuirq;
+    struct VFIODevice *vdev; /* back pointer to device */
+    bool pending; /* interrupt pending */
+    bool kvm_accel; /* set when QEMU bypass through KVM enabled */
+    uint8_t pin; /* index */
+    uint32_t mmap_timeout; /* delay to re-enable mmaps after interrupt */
+    QEMUTimer *mmap_timer; /* enable mmaps after periods w/o interrupts */
+} VFIOINTp;
+
+
+
 typedef struct VFIODevice {
     SysBusDevice sbdev;
     int fd;
     int num_regions;
-    VFIORegion *regions;
+    int num_irqs;
+    int interrupt; /* type of the interrupt, might disappear */
+    char *name;
+    uint32_t mmap_timeout; /* mmap timeout value in ms */
+    VFIORegion regions[PLATFORM_NUM_REGIONS];
     QLIST_ENTRY(VFIODevice) next;
     struct VFIOGroup *group;
-    char *name;
+    QLIST_HEAD(, VFIOINTp) intp_list;
 } VFIODevice;
 
+
+
+static void vfio_unmask_intp(VFIODevice *vdev, int index)
+{
+    struct vfio_irq_set irq_set = {
+        .argsz = sizeof(irq_set),
+        .flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_UNMASK,
+        .index = index,
+        .start = 0,
+        .count = 1,
+    };
+
+    ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, &irq_set);
+}
+
+
+
+
+static void vfio_intp_mmap_enable(void *opaque)
+{
+    VFIOINTp * intp = (VFIOINTp *)opaque;
+    VFIODevice *vdev = intp->vdev;
+
+    if (intp->pending) {
+        DPRINTF("IRQ still pending, re-schedule the mmap timer\n");
+        timer_mod(intp->mmap_timer,
+                  qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + intp->mmap_timeout);
+        return;
+    }
+
+    DPRINTF("IRQ EOI'ed sets mmap again\n");
+    VFIORegion *region = &vdev->regions[0];
+    memory_region_set_enabled(&region->mmap_mem, true);
+}
+
+
+
+static void vfio_intp_interrupt(void *opaque)
+{
+    int ret;
+    VFIOINTp *intp = (VFIOINTp *)opaque;
+    VFIODevice *vdev = intp->vdev;
+
+    DPRINTF("pin = %d fd = %d\n",
+            intp->pin, event_notifier_get_fd(&intp->interrupt));
+
+    ret = event_notifier_test_and_clear(&intp->interrupt);
+    if (!ret) {
+        DPRINTF("Error when clearing fd=%d\n",
+                event_notifier_get_fd(&intp->interrupt));
+    }
+
+    intp->pending = true;
+
+    /* TODO: fix this number of regions,
+     * currently a single region is handled
+     */
+
+    VFIORegion *region = &vdev->regions[0];
+    memory_region_set_enabled(&region->mmap_mem, false);
+
+    qemu_set_irq(intp->qemuirq, 1);
+
+    /* schedule the mmap timer which will restote mmap path after EOI*/
+    if (intp->mmap_timeout) {
+        timer_mod(intp->mmap_timer,
+                  qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + intp->mmap_timeout);
+    }
+
+}
+
+
+
+static void vfio_irq_eoi(VFIODevice *vdev)
+{
+
+    VFIOINTp *intp;
+    bool eoi_done = false;
+
+    QLIST_FOREACH(intp, &vdev->intp_list, next) {
+        if (intp->pending) {
+            if (eoi_done) {
+                DPRINTF("several IRQ pending simultaneously: \
+                         this is not a supported case yet\n");
+            }
+            DPRINTF("EOI IRQ #%d fd=%d\n",
+                    intp->pin, event_notifier_get_fd(&intp->interrupt));
+            intp->pending = false;
+            qemu_set_irq(intp->qemuirq, 0);
+            vfio_unmask_intp(vdev, intp->pin);
+            eoi_done = true;
+        }
+    }
+
+    return;
+
+}
+
+
+
+#if 0
+static void vfio_list_intp(VFIODevice *vdev)
+{
+    VFIOINTp *intp;
+    int i = 0;
+    QLIST_FOREACH(intp, &vdev->intp_list, next) {
+        DPRINTF("IRQ #%d\n", i);
+        DPRINTF("- pin = %d\n", intp->pin);
+        DPRINTF("- fd = %d\n", event_notifier_get_fd(&intp->interrupt));
+        DPRINTF("- pending = %d\n", (int)intp->pending);
+        DPRINTF("- kvm_accel = %d\n", (int)intp->kvm_accel);
+        i++;
+    }
+}
+#endif
+
+static int vfio_enable_intp(VFIODevice *vdev, unsigned int index)
+{
+    struct vfio_irq_set *irq_set; /* irq structure passed to vfio kernel */
+    int32_t *pfd; /* pointer to the eventfd */
+    int ret, argsz;
+
+    int device = vdev->fd;
+    SysBusDevice *sbdev = SYS_BUS_DEVICE(vdev);
+
+    vdev->interrupt = VFIO_INT_INTp;
+
+    /* allocate and populate a new VFIOINTp structure put in a queue list */
+    VFIOINTp *intp = g_malloc0(sizeof(*intp));
+    intp->vdev = vdev;
+    intp->pin = index;
+    intp->pending = false;
+    intp->mmap_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL,
+                                    vfio_intp_mmap_enable, intp);
+    intp->mmap_timeout = 1100;
+    /* TO DO: timeout as parameter */
+
+    /* incr sysbus num_irq and sets sysbus->irqp[n] = &intp->qemuirq
+     * only the address of the qemu_irq is set here
+     */
+
+    sysbus_init_irq(sbdev, &intp->qemuirq);
+
+    /* content is set in sysbus_connect_irq (currently in machine definition) */
+
+    ret = event_notifier_init(&intp->interrupt, 0);
+    if (ret) {
+        error_report("vfio: Error: event_notifier_init failed ");
+        return ret;
+    }
+
+    /* build the irq_set to be passed to the vfio kernel driver */
+
+    argsz = sizeof(*irq_set) + sizeof(*pfd);
+
+    irq_set = g_malloc0(argsz);
+
+    if (!irq_set) {
+        DPRINTF("failure while allocating memory for irq\n");
+        qemu_log_mask(LOG_GUEST_ERROR,
+                 "VFIO platform: failure while allocating memory for irq");
+        return -errno;
+    }
+
+    irq_set->argsz = argsz;
+    irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER;
+    irq_set->index = index;
+    irq_set->start = 0;
+    irq_set->count = 1;
+    pfd = (int32_t *)&irq_set->data;
+
+    *pfd = event_notifier_get_fd(&intp->interrupt);
+
+    DPRINTF("register fd=%d/irq index=%d to kernel\n", *pfd, index);
+
+    qemu_set_fd_handler(*pfd, vfio_intp_interrupt, NULL, intp);
+
+    /* pass the index/fd binding to the kernel driver so that it
+     * triggers this fd on HW IRQ
+     */
+    ret = ioctl(device, VFIO_DEVICE_SET_IRQS, irq_set);
+
+    g_free(irq_set);
+
+    if (ret) {
+        error_report("vfio: Error: Failed to pass Int fd to the driver: %m");
+        qemu_set_fd_handler(*pfd, NULL, NULL, vdev);
+        close(*pfd); /* TO DO : replace by event_notifier_cleanup */
+        return -errno;
+    }
+
+    /* store the new intp in qlist */
+
+    QLIST_INSERT_HEAD(&vdev->intp_list, intp, next);
+
+    return 0;
+
+}
+
+
+
+
 static int vfio_mmap_region(VFIODevice *vdev, VFIORegion *region,
                          MemoryRegion *mem, MemoryRegion *submem,
                          void **map, size_t size, off_t offset,
@@ -112,6 +341,7 @@ error:
 /*
  * IO Port/MMIO - Beware of the endians, VFIO is always little endian
  */
+
 static void vfio_region_write(void *opaque, hwaddr addr,
                               uint64_t data, unsigned size)
 {
@@ -139,12 +369,15 @@ static void vfio_region_write(void *opaque, hwaddr addr,
     }
 
     if (pwrite(region->fd, &buf, size, region->fd_offset + addr) != size) {
-        error_report("%s(,0x%"HWADDR_PRIx", 0x%"PRIx64", %d) failed: %m",
-                     __func__, addr, data, size);
+        error_report("(,0x%"HWADDR_PRIx", 0x%"PRIx64", %d) failed: %m",
+                     addr, data, size);
     }
 
-    DPRINTF("%s(region %d+0x%"HWADDR_PRIx", 0x%"PRIx64", %d)\n",
-             __func__, region->nr, addr, data, size);
+    DPRINTF("(region %d+0x%"HWADDR_PRIx", 0x%"PRIx64", %d)\n",
+            region->nr, addr, data, size);
+
+    vfio_irq_eoi(container_of(region, VFIODevice, regions[region->nr]));
+
 }
 
 static uint64_t vfio_region_read(void *opaque, hwaddr addr, unsigned size)
@@ -159,8 +392,8 @@ static uint64_t vfio_region_read(void *opaque, hwaddr addr, unsigned size)
     uint64_t data = 0;
 
     if (pread(region->fd, &buf, size, region->fd_offset + addr) != size) {
-        error_report("%s(,0x%"HWADDR_PRIx", %d) failed: %m",
-                     __func__, addr, size);
+        error_report("(,0x%"HWADDR_PRIx", %d) failed: %m",
+                     addr, size);
         return (uint64_t)-1;
     }
 
@@ -179,18 +412,22 @@ static uint64_t vfio_region_read(void *opaque, hwaddr addr, unsigned size)
         break;
     }
 
-    DPRINTF("%s(region %d+0x%"HWADDR_PRIx", %d) = 0x%"PRIx64"\n",
-            __func__, region->nr, addr, size, data);
+    DPRINTF("(region %d+0x%"HWADDR_PRIx", %d) = 0x%"PRIx64"\n",
+            region->nr, addr, size, data);
+
+    vfio_irq_eoi(container_of(region, VFIODevice, regions[region->nr]));
 
     return data;
 }
 
+
 static const MemoryRegionOps vfio_region_ops = {
     .read = vfio_region_read,
     .write = vfio_region_write,
     .endianness = DEVICE_NATIVE_ENDIAN,
 };
 
+
 static void vfio_map_region(VFIODevice *vdev, int nr)
 {
     VFIORegion *region = &vdev->regions[nr];
@@ -242,14 +479,6 @@ static int vfio_get_device(VFIOGroup *group, const char *name,
     DPRINTF("Device %s flags: %u, regions: %u, irqs: %u\n", name,
             dev_info.flags, dev_info.num_regions, dev_info.num_irqs);
 
-    vdev->regions = g_malloc0(sizeof(VFIORegion) * dev_info.num_regions);
-    if (!vdev->regions) {
-            error_report("vfio: Error allocating space for %d regions",
-                         dev_info.num_regions);
-            ret = -ENOMEM;
-            goto error;
-    }
-
     vdev->num_regions = dev_info.num_regions;
 
     for (i = 0; i < dev_info.num_regions; i++) {
@@ -273,6 +502,38 @@ static int vfio_get_device(VFIOGroup *group, const char *name,
         vdev->regions[i].nr = i;
     }
 
+   /* IRQ */
+
+    DPRINTF("Num IRQS: %d\n", dev_info.num_irqs);
+
+    vdev->num_irqs = dev_info.num_irqs;
+
+    for (i = 0; i < dev_info.num_irqs; i++) {
+
+        struct vfio_irq_info irq = { .argsz = sizeof(irq) };
+
+        irq.index = i;
+
+        DPRINTF("Retrieve IRQ info from vfio platform driver ...\n");
+
+        ret = ioctl(vdev->fd, VFIO_DEVICE_GET_IRQ_INFO, &irq);
+
+        if (ret) {
+            error_report("vfio: error getting device %s irq info",
+                         name);
+            error_printf("vfio: error getting device %s irq info",
+                         name);
+        }
+
+        DPRINTF("- IRQ index %d: count %d, flags=0x%x\n",
+                                irq.index,
+                                irq.count,
+                                irq.flags);
+
+        vfio_enable_intp(vdev, irq.index);
+
+    }
+
 error:
     if (ret) {
         g_free(vdev->regions);
@@ -286,13 +547,16 @@ error:
 static void vfio_platform_realize(DeviceState *dev, Error **errp)
 {
     SysBusDevice *sbdev = SYS_BUS_DEVICE(dev);
-    VFIODevice *pvdev, *vdev = DO_UPCAST(VFIODevice, sbdev, sbdev);
+    VFIODevice *vdev = DO_UPCAST(VFIODevice, sbdev, sbdev);
+    VFIODevice *pvdev;
+
     VFIOGroup *group;
     char path[PATH_MAX], iommu_group_path[PATH_MAX], *group_name;
     ssize_t len;
     struct stat st;
     int groupid, i, ret;
 
+
     /* TODO: pass device name on command line */
     vdev->name = malloc(PATH_MAX);
     strcpy(vdev->name, "fff51000.ethernet");
@@ -320,24 +584,29 @@ static void vfio_platform_realize(DeviceState *dev, Error **errp)
         return;
     }
 
-    DPRINTF("%s(%s) group %d\n", __func__, vdev->name, groupid);
+    DPRINTF("%s belongs to VFIO group %d\n", vdev->name, groupid);
 
     group = vfio_get_group(groupid, NULL);
     if (!group) {
         error_report("vfio: failed to get group %d", groupid);
         return;
     }
-
     snprintf(path, sizeof(path), "%s", vdev->name);
 
     QLIST_FOREACH(pvdev, &group->device_list, next) {
+        DPRINTF("compare %s versus %s\n", pvdev->name, vdev->name);
         if (strcmp(pvdev->name, vdev->name) == 0) {
-            error_report("vfio: error: device %s is already attached", path);
+
+            DPRINTF("vfio device %s already is attached to group %d\n",
+                    vdev->name, groupid);
+
             vfio_put_group(group, NULL);
             return;
         }
     }
 
+    DPRINTF("Calling vfio_get_device ...\n");
+
     ret = vfio_get_device(group, path, vdev);
     if (ret) {
         error_report("vfio: failed to get device %s", path);
@@ -363,6 +632,7 @@ static void vfio_platform_dev_class_init(ObjectClass *klass, void *data)
     dc->realize = vfio_platform_realize;
     dc->vmsd = &vfio_platform_vmstate;
     dc->desc = "VFIO-based platform device assignment";
+    dc->cannot_instantiate_with_device_add_yet = true;
     set_bit(DEVICE_CATEGORY_MISC, dc->categories);
 }
 
-- 
1.8.3.2

  parent reply	other threads:[~2014-04-09 15:34 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-04-09 15:33 [Qemu-devel] [RFC v2 0/6] KVM platform device passthrough Eric Auger
2014-04-09 15:33 ` [Qemu-devel] [RFC v2 1/6] hw/arm/virt: add a xgmac device Eric Auger
2014-04-10 13:26   ` Peter Crosthwaite
2014-04-10 13:48     ` Alexander Graf
2014-04-11  5:41       ` Alistair Francis
2014-04-11  9:21         ` Alexander Graf
2014-04-09 15:33 ` [Qemu-devel] [RFC v2 2/6] vfio: move hw/misc/vfio.c to hw/vfio/pci.c Eric Auger
2014-04-09 15:33 ` [Qemu-devel] [RFC v2 3/6] vfio: add vfio-platform support Eric Auger
     [not found]   ` <1397832861.3060.93.camel@ul30vt.home>
2014-05-20  6:44     ` Eric Auger
2014-04-09 15:33 ` Eric Auger [this message]
2014-04-11  1:34   ` [Qemu-devel] [RFC v2 4/6] vfio: Add initial IRQ support in QEMU platform device Kim Phillips
2014-04-09 15:33 ` [Qemu-devel] [RFC v2 5/6] virt: Assign a VFIO platform device to a virt VM in QEMU command line Eric Auger
2014-04-09 15:33 ` [Qemu-devel] [RFC v2 6/6] vfio: add exit function and IRQ disable functions Eric Auger
2014-04-11  1:28 ` [Qemu-devel] [RFC v2 0/6] KVM platform device passthrough Kim Phillips

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1397057589-11779-5-git-send-email-eric.auger@linaro.org \
    --to=eric.auger@linaro.org \
    --cc=a.motakis@virtualopensystems.com \
    --cc=agraf@suse.de \
    --cc=alex.williamson@redhat.com \
    --cc=christoffer.dall@linaro.org \
    --cc=christophe.barnichon@st.com \
    --cc=eric.auger@st.com \
    --cc=kim.phillips@freescale.com \
    --cc=kvmarm@lists.cs.columbia.edu \
    --cc=peter.maydell@linaro.org \
    --cc=qemu-devel@nongnu.org \
    --cc=stuart.yoder@freescale.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.