All of lore.kernel.org
 help / color / mirror / Atom feed
From: Cam Macdonell <cam@cs.ualberta.ca>
To: kvm@vger.kernel.org
Cc: Cam Macdonell <cam@cs.ualberta.ca>
Subject: [PATCH v2] Shared memory device with interrupt support
Date: Thu,  7 May 2009 10:16:32 -0600	[thread overview]
Message-ID: <1241712992-17004-1-git-send-email-cam@cs.ualberta.ca> (raw)

    Support an inter-vm shared memory device that maps a shared-memory object as a PCI device in the guest.  This patch also supports interrupts between guest by communicating over a unix domain socket.  This patch applies to the qemu-kvm repository. 

This device now creates a qemu character device and sends 1-bytes messages to trigger interrupts.  Writes are trigger by writing to the "Doorbell" register on the shared memory PCI device.  The lower 8-bits of the value written to this register are sent as the 1-byte message so different meanings of interrupts can be supported.

Interrupts are only supported between 2 VMs currently.  One VM must act as the server by adding "server" to the command-line argument.  Shared memory devices are created with the following command-line:

-ivhshmem <shm object>,<size in MB>,[unix:<path>][,server] 

Interrupts can also be used between host and guest as well by implementing a listener on the host.

Cam

---
 Makefile.target |    3 +
 hw/ivshmem.c    |  421 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 hw/pc.c         |    6 +
 hw/pc.h         |    3 +
 qemu-options.hx |   14 ++
 sysemu.h        |    8 +
 vl.c            |   14 ++
 7 files changed, 469 insertions(+), 0 deletions(-)
 create mode 100644 hw/ivshmem.c

diff --git a/Makefile.target b/Makefile.target
index b68a689..3190bba 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -643,6 +643,9 @@ OBJS += pcnet.o
 OBJS += rtl8139.o
 OBJS += e1000.o
 
+# Inter-VM PCI shared memory
+OBJS += ivshmem.o
+
 # Generic watchdog support and some watchdog devices
 OBJS += watchdog.o
 OBJS += wdt_ib700.o wdt_i6300esb.o
diff --git a/hw/ivshmem.c b/hw/ivshmem.c
new file mode 100644
index 0000000..95e2268
--- /dev/null
+++ b/hw/ivshmem.c
@@ -0,0 +1,421 @@
+/*
+ * Inter-VM Shared Memory PCI device.
+ *
+ * Author:
+ *      Cam Macdonell <cam@cs.ualberta.ca>
+ *
+ * Based On: cirrus_vga.c and rtl8139.c
+ *
+ * This code is licensed under the GNU GPL v2.
+ */
+
+#include "hw.h"
+#include "console.h"
+#include "pc.h"
+#include "pci.h"
+#include "sysemu.h"
+
+#include "qemu-common.h"
+#include <sys/mman.h>
+
+#define PCI_COMMAND_IOACCESS                0x0001
+#define PCI_COMMAND_MEMACCESS               0x0002
+#define PCI_COMMAND_BUSMASTER               0x0004
+
+//#define DEBUG_IVSHMEM
+
+#ifdef DEBUG_IVSHMEM
+#define IVSHMEM_DPRINTF(fmt, args...)        \
+    do {printf("IVSHMEM: " fmt, ##args); } while (0)
+#else
+#define IVSHMEM_DPRINTF(fmt, args...)
+#endif
+
+typedef struct IVShmemState {
+    uint16_t intrmask;
+    uint16_t intrstatus;
+    uint16_t doorbell;
+    uint8_t *ivshmem_ptr;
+    unsigned long ivshmem_offset;
+    unsigned int ivshmem_size;
+    unsigned long bios_offset;
+    unsigned int bios_size;
+    target_phys_addr_t base_ctrl;
+    int it_shift;
+    PCIDevice *pci_dev;
+    CharDriverState * chr;
+    unsigned long map_addr;
+    unsigned long map_end;
+    int ivshmem_mmio_io_addr;
+} IVShmemState;
+
+typedef struct PCI_IVShmemState {
+    PCIDevice dev;
+    IVShmemState ivshmem_state;
+} PCI_IVShmemState;
+
+typedef struct IVShmemDesc {
+    char name[1024];
+    char * chrdev;
+    int size;
+} IVShmemDesc;
+
+
+/* registers for the Inter-VM shared memory device */
+enum ivshmem_registers {
+    IntrMask = 0,
+    IntrStatus = 16,
+    Doorbell = 32
+};
+
+static int num_ivshmem_devices = 0;
+static IVShmemDesc ivshmem_desc;
+
+static void ivshmem_map(PCIDevice *pci_dev, int region_num,
+                    uint32_t addr, uint32_t size, int type)
+{
+    PCI_IVShmemState *d = (PCI_IVShmemState *)pci_dev;
+    IVShmemState *s = &d->ivshmem_state;
+
+    IVSHMEM_DPRINTF("addr = %u size = %u\n", addr, size);
+    cpu_register_physical_memory(addr, s->ivshmem_size, s->ivshmem_offset);
+
+}
+
+void ivshmem_init(const char * optarg) {
+
+    char * temp;
+    char * ivshmem_sz;
+    int size;
+
+    num_ivshmem_devices++;
+
+    /* currently we only support 1 device */
+    if (num_ivshmem_devices > MAX_IVSHMEM_DEVICES) {
+        return;
+    }
+
+    temp = strdup(optarg);
+    snprintf(ivshmem_desc.name, 1024, "/%s", strsep(&temp,","));
+    ivshmem_sz=strsep(&temp,",");
+    if (ivshmem_sz != NULL){
+        size = atol(ivshmem_sz);
+    } else {
+        size = -1;
+    }
+
+    ivshmem_desc.chrdev = strsep(&temp,"\0");
+
+    if ( size == -1) {
+        ivshmem_desc.size = TARGET_PAGE_SIZE;
+    } else {
+        ivshmem_desc.size = size*1024*1024;
+    }
+    IVSHMEM_DPRINTF("optarg is %s, name is %s, size is %d, chrdev is %s\n",
+                                        optarg, ivshmem_desc.name,
+                                        ivshmem_desc.size, ivshmem_desc.chrdev);
+}
+
+int ivshmem_get_size(void) {
+    return ivshmem_desc.size;
+}
+
+/* accessing registers - based on rtl8139 */
+static void ivshmem_update_irq(IVShmemState *s)
+{
+    int isr;
+    isr = (s->intrstatus & s->intrmask) & 0xffff;
+
+    /* don't print ISR resets */
+    if (isr) {
+        IVSHMEM_DPRINTF("Set IRQ to %d (%04x %04x)\n",
+           isr ? 1 : 0, s->intrstatus, s->intrmask);
+    }
+
+    qemu_set_irq(s->pci_dev->irq[0], (isr != 0));
+}
+
+static void ivshmem_mmio_map(PCIDevice *pci_dev, int region_num,
+                       uint32_t addr, uint32_t size, int type)
+{
+    PCI_IVShmemState *d = (PCI_IVShmemState *)pci_dev;
+    IVShmemState *s = &d->ivshmem_state;
+
+    cpu_register_physical_memory(addr + 0, 0x100, s->ivshmem_mmio_io_addr);
+}
+
+static void ivshmem_IntrMask_write(IVShmemState *s, uint32_t val)
+{
+    IVSHMEM_DPRINTF("IntrMask write(w) val = 0x%04x\n", val);
+
+    s->intrmask = val;
+
+    ivshmem_update_irq(s);
+}
+
+static uint32_t ivshmem_IntrMask_read(IVShmemState *s)
+{
+    uint32_t ret = s->intrmask;
+
+    IVSHMEM_DPRINTF("intrmask read(w) val = 0x%04x\n", ret);
+
+    return ret;
+}
+
+static void ivshmem_IntrStatus_write(IVShmemState *s, uint32_t val)
+{
+    IVSHMEM_DPRINTF("IntrStatus write(w) val = 0x%04x\n", val);
+
+    s->intrstatus = val;
+
+    ivshmem_update_irq(s);
+    return;
+}
+
+static uint32_t ivshmem_IntrStatus_read(IVShmemState *s)
+{
+    uint32_t ret = s->intrstatus;
+
+    /* reading ISR clears all interrupts */
+    s->intrstatus = 0;
+
+    ivshmem_update_irq(s);
+
+    return ret;
+}
+
+static void ivshmem_io_writew(void *opaque, uint8_t addr, uint32_t val)
+{
+    IVShmemState *s = opaque;
+
+    IVSHMEM_DPRINTF("writing 0x%x to 0x%lx\n", addr, (unsigned long) opaque);
+
+    addr &= 0xfe;
+
+    switch (addr)
+    {
+        case IntrMask:
+            ivshmem_IntrMask_write(s, val);
+            break;
+
+        case IntrStatus:
+            ivshmem_IntrStatus_write(s, val);
+            break;
+
+        default:
+            IVSHMEM_DPRINTF("why are we writing 0x%x\n", addr);
+    }
+}
+
+static void ivshmem_io_writel(void *opaque, uint8_t addr, uint32_t val)
+{
+    IVSHMEM_DPRINTF("We shouldn't be writing longs\n");
+}
+
+static void ivshmem_io_writeb(void *opaque, uint8_t addr, uint32_t val)
+{
+    IVShmemState *s = opaque;
+    uint8_t writebyte = val & 0xff; //write the lower 8-bits of 'val'
+
+    switch (addr)
+    {   // in future, we will probably want to support more types of doorbells
+        case Doorbell:
+            // wake up the other side
+            qemu_chr_write(s->chr, &writebyte, 1);
+            IVSHMEM_DPRINTF("Writing to the other side 0x%x\n", writebyte);
+            break;
+        default:
+            IVSHMEM_DPRINTF("Unhandled write (0x%x)\n", addr);
+    }
+}
+
+static uint32_t ivshmem_io_readw(void *opaque, uint8_t addr)
+{
+
+    IVShmemState *s = opaque;
+    uint32_t ret;
+
+    switch (addr)
+    {
+        case IntrMask:
+            ret = ivshmem_IntrMask_read(s);
+            break;
+        case IntrStatus:
+            ret = ivshmem_IntrStatus_read(s);
+            break;
+        default:
+            IVSHMEM_DPRINTF("why are we reading 0x%x\n", addr);
+            ret = 0;
+    }
+
+    return ret;
+}
+
+static uint32_t ivshmem_io_readl(void *opaque, uint8_t addr)
+{
+    IVSHMEM_DPRINTF("We shouldn't be reading longs\n");
+    return 0;
+}
+
+static uint32_t ivshmem_io_readb(void *opaque, uint8_t addr)
+{
+    IVSHMEM_DPRINTF("We shouldn't be reading bytes\n");
+
+    return 0;
+}
+
+static void ivshmem_mmio_writeb(void *opaque,
+                                target_phys_addr_t addr, uint32_t val)
+{
+    ivshmem_io_writeb(opaque, addr & 0xFF, val);
+}
+
+static void ivshmem_mmio_writew(void *opaque,
+                                target_phys_addr_t addr, uint32_t val)
+{
+    ivshmem_io_writew(opaque, addr & 0xFF, val);
+}
+
+static void ivshmem_mmio_writel(void *opaque,
+                                target_phys_addr_t addr, uint32_t val)
+{
+    ivshmem_io_writel(opaque, addr & 0xFF, val);
+}
+
+static uint32_t ivshmem_mmio_readb(void *opaque, target_phys_addr_t addr)
+{
+    return ivshmem_io_readb(opaque, addr & 0xFF);
+}
+
+static uint32_t ivshmem_mmio_readw(void *opaque, target_phys_addr_t addr)
+{
+    uint32_t val = ivshmem_io_readw(opaque, addr & 0xFF);
+    return val;
+}
+
+static uint32_t ivshmem_mmio_readl(void *opaque, target_phys_addr_t addr)
+{
+    uint32_t val = ivshmem_io_readl(opaque, addr & 0xFF);
+    return val;
+}
+
+static CPUReadMemoryFunc *ivshmem_mmio_read[3] = {
+    ivshmem_mmio_readb,
+    ivshmem_mmio_readw,
+    ivshmem_mmio_readl,
+};
+
+static CPUWriteMemoryFunc *ivshmem_mmio_write[3] = {
+    ivshmem_mmio_writeb,
+    ivshmem_mmio_writew,
+    ivshmem_mmio_writel,
+};
+
+static int ivshmem_can_receive(void * opaque)
+{
+    return 1;
+}
+
+static void ivshmem_receive(void *opaque, const uint8_t *buf, int size)
+{
+    IVShmemState *s = opaque;
+
+    ivshmem_IntrStatus_write(s, *buf);
+
+    IVSHMEM_DPRINTF("ivshmem_receive 0x%02x\n", *buf);
+}
+
+static void ivshmem_event(void *opaque, int event)
+{
+    IVShmemState *s = opaque;
+    IVSHMEM_DPRINTF("ivshmem_event %d\n", event);
+}
+
+int pci_ivshmem_init(PCIBus *bus)
+{
+    PCI_IVShmemState *d;
+    IVShmemState *s;
+    uint8_t *pci_conf;
+    int ivshmem_fd;
+
+    IVSHMEM_DPRINTF("shared file is %s\n", ivshmem_desc.name);
+    d = (PCI_IVShmemState *)pci_register_device(bus, "kvm_ivshmem",
+                                           sizeof(PCI_IVShmemState),
+                                           -1, NULL, NULL);
+    if (!d) {
+        return -1;
+    }
+
+    s = &d->ivshmem_state;
+
+    /* allocate shared memory RAM */
+    s->ivshmem_offset = qemu_ram_alloc(ivshmem_desc.size);
+    IVSHMEM_DPRINTF("size is = %d\n", ivshmem_desc.size);
+    IVSHMEM_DPRINTF("ivshmem ram offset = %ld\n", s->ivshmem_offset);
+
+    s->ivshmem_ptr = qemu_get_ram_ptr(s->ivshmem_offset);
+
+    s->pci_dev = &d->dev;
+    s->ivshmem_size = ivshmem_desc.size;
+
+    pci_conf = d->dev.config;
+    pci_conf[0x00] = 0xf4; // Qumranet vendor ID 0x5002
+    pci_conf[0x01] = 0x1a;
+    pci_conf[0x02] = 0x10;
+    pci_conf[0x03] = 0x11;
+    pci_conf[0x04] = PCI_COMMAND_IOACCESS | PCI_COMMAND_MEMACCESS;
+    pci_conf[0x0a] = 0x00; // RAM controller
+    pci_conf[0x0b] = 0x05;
+    pci_conf[0x0e] = 0x00; // header_type
+
+    pci_conf[PCI_INTERRUPT_PIN] = 1; // we are going to support interrupts
+
+    /* XXX: ivshmem_desc.size must be a power of two */
+
+    s->ivshmem_mmio_io_addr = cpu_register_io_memory(0, ivshmem_mmio_read,
+                                    ivshmem_mmio_write, s);
+
+    /* region for registers*/
+    pci_register_io_region(&d->dev, 0, 0x100,
+                           PCI_ADDRESS_SPACE_MEM, ivshmem_mmio_map);
+
+    /* region for shared memory */
+    pci_register_io_region(&d->dev, 1, ivshmem_desc.size,
+                           PCI_ADDRESS_SPACE_MEM, ivshmem_map);
+
+    /* open shared memory file  */
+    if ((ivshmem_fd = shm_open(ivshmem_desc.name, O_CREAT|O_RDWR, S_IRWXU)) < 0)
+    {
+        fprintf(stderr, "kvm_ivshmem: could not open shared file\n");
+        exit(-1);
+    }
+
+    ftruncate(ivshmem_fd, ivshmem_desc.size);
+
+    /* mmap onto PCI device's memory */
+    if (mmap(s->ivshmem_ptr, ivshmem_desc.size, PROT_READ|PROT_WRITE,
+                        MAP_SHARED|MAP_FIXED, ivshmem_fd, 0) == MAP_FAILED)
+    {
+        fprintf(stderr, "kvm_ivshmem: could not mmap shared file\n");
+        exit(-1);
+    }
+
+    IVSHMEM_DPRINTF("shared object mapped to 0x%p\n", s->ivshmem_ptr);
+
+    /* setup character device channel */
+
+    if (ivshmem_desc.chrdev != NULL) {
+        char label[32];
+        snprintf(label, 32, "ivshmem_chardev");
+        s->chr = qemu_chr_open(label, ivshmem_desc.chrdev, NULL);
+        if (s->chr == NULL) {
+            fprintf(stderr, "No server listening on %s\n", ivshmem_desc.chrdev);
+            exit(-1);
+        }
+        qemu_chr_add_handlers(s->chr, ivshmem_can_receive, ivshmem_receive,
+                          ivshmem_event, s);
+    }
+
+    return 0;
+}
+
diff --git a/hw/pc.c b/hw/pc.c
index 34a4d25..7d0cff2 100644
--- a/hw/pc.c
+++ b/hw/pc.c
@@ -67,6 +67,8 @@ static PITState *pit;
 static IOAPICState *ioapic;
 static PCIDevice *i440fx_state;
 
+extern int ivshmem_enabled;
+
 static void ioport80_write(void *opaque, uint32_t addr, uint32_t data)
 {
 }
@@ -1040,6 +1042,10 @@ static void pc_init1(ram_addr_t ram_size, int vga_ram_size,
         }
     }
 
+    if (pci_enabled && ivshmem_enabled) {
+        pci_ivshmem_init(pci_bus);
+    }
+
     rtc_state = rtc_init(0x70, i8259[8], 2000);
 
     qemu_register_boot_set(pc_boot_set, rtc_state);
diff --git a/hw/pc.h b/hw/pc.h
index 885c918..0ae0493 100644
--- a/hw/pc.h
+++ b/hw/pc.h
@@ -185,4 +185,7 @@ void isa_ne2000_init(int base, qemu_irq irq, NICInfo *nd);
 
 void extboot_init(BlockDriverState *bs, int cmd);
 
+/* ivshmem.c */
+int pci_ivshmem_init(PCIBus *bus);
+
 #endif
diff --git a/qemu-options.hx b/qemu-options.hx
index 173f458..9ab3e2d 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -1243,6 +1243,20 @@ The default device is @code{vc} in graphical mode and @code{stdio} in
 non graphical mode.
 ETEXI
 
+DEF("ivshmem", HAS_ARG, QEMU_OPTION_ivshmem, \
+    "-ivshmem name,size[,unix:path][,server]  creates or opens a shared file 'name' of size \
+    'size' (in MB) and exposes it as a PCI device in the guest\n")
+STEXI
+@item -ivshmem @var{file},@var{size}
+Creates a POSIX shared file named @var{file} of size @var{size} and creates a
+PCI device of the same size that maps the shared file into the device for guests
+to access.  The created file on the host is located in /dev/shm/
+
+@item unix:@var{path}[,server]
+A unix domain socket is used to send and receive interrupts between VMs.  The unix domain socket
+@var{path} is used for connections.
+ETEXI
+
 DEF("pidfile", HAS_ARG, QEMU_OPTION_pidfile, \
     "-pidfile file   write PID to 'file'\n")
 STEXI
diff --git a/sysemu.h b/sysemu.h
index 1f45fd6..862b79e 100644
--- a/sysemu.h
+++ b/sysemu.h
@@ -217,6 +217,14 @@ extern CharDriverState *parallel_hds[MAX_PARALLEL_PORTS];
 
 extern CharDriverState *virtcon_hds[MAX_VIRTIO_CONSOLES];
 
+/* inter-VM shared memory devices */
+
+#define MAX_IVSHMEM_DEVICES 1
+
+extern CharDriverState * ivshmem_chardev;
+void ivshmem_init(const char * optarg);
+int ivshmem_get_size(void);
+
 #define TFR(expr) do { if ((expr) != -1) break; } while (errno == EINTR)
 
 #ifdef NEED_CPU_H
diff --git a/vl.c b/vl.c
index 0420634..7260fa1 100644
--- a/vl.c
+++ b/vl.c
@@ -221,6 +221,7 @@ static int rtc_date_offset = -1; /* -1 means no change */
 int cirrus_vga_enabled = 1;
 int std_vga_enabled = 0;
 int vmsvga_enabled = 0;
+int ivshmem_enabled = 0;
 int xenfb_enabled = 0;
 #ifdef TARGET_SPARC
 int graphic_width = 1024;
@@ -239,6 +240,8 @@ int no_quit = 0;
 CharDriverState *serial_hds[MAX_SERIAL_PORTS];
 CharDriverState *parallel_hds[MAX_PARALLEL_PORTS];
 CharDriverState *virtcon_hds[MAX_VIRTIO_CONSOLES];
+CharDriverState *ivshmem_chardev;
+const char * ivshmem_device;
 #ifdef TARGET_I386
 int win2k_install_hack = 0;
 int rtc_td_hack = 0;
@@ -5063,6 +5066,8 @@ int main(int argc, char **argv, char **envp)
     cyls = heads = secs = 0;
     translation = BIOS_ATA_TRANSLATION_AUTO;
     monitor_device = "vc:80Cx24C";
+    ivshmem_device = NULL;
+    ivshmem_chardev = NULL;
 
     serial_devices[0] = "vc:80Cx24C";
     for(i = 1; i < MAX_SERIAL_PORTS; i++)
@@ -5518,6 +5523,10 @@ int main(int argc, char **argv, char **envp)
                 parallel_devices[parallel_device_index] = optarg;
                 parallel_device_index++;
                 break;
+            case QEMU_OPTION_ivshmem:
+                ivshmem_device = optarg;
+                ivshmem_enabled = 1;
+                break;
 	    case QEMU_OPTION_loadvm:
 		loadvm = optarg;
 		break;
@@ -5984,6 +5993,11 @@ int main(int argc, char **argv, char **envp)
 	    }
     }
 
+    if (ivshmem_enabled) {
+        ivshmem_init(ivshmem_device);
+        ram_size += ivshmem_get_size();
+    }
+
 #ifdef CONFIG_KQEMU
     /* FIXME: This is a nasty hack because kqemu can't cope with dynamic
        guest ram allocation.  It needs to go away.  */
-- 
1.6.0.6


             reply	other threads:[~2009-05-07 16:16 UTC|newest]

Thread overview: 21+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-05-07 16:16 Cam Macdonell [this message]
2009-05-16  2:45 ` Kumar, Venkat
2009-05-16  3:27   ` Cam Macdonell
2009-05-17 21:39     ` Avi Kivity
2009-05-18 18:54 ` Anthony Liguori
2009-05-19  4:31   ` Avi Kivity
2009-05-19 18:31     ` Anthony Liguori
2009-05-20  9:01       ` Avi Kivity
2009-05-20 13:45         ` Anthony Liguori
2009-05-20 14:26           ` Avi Kivity
     [not found] <3D9CB4061D1EB3408D4A0B910433453C030BCA8892@inbmail01.lsi.com>
2009-05-16  3:30 ` Cam Macdonell
2009-05-17 21:51   ` Avi Kivity
2009-05-18 11:12     ` Gregory Haskins
2009-05-18 11:38       ` Avi Kivity
2009-05-18 16:50     ` Cam Macdonell
2009-05-18 17:19       ` Avi Kivity
2009-05-18 12:11   ` Kumar, Venkat
2009-05-18 16:20     ` Cam Macdonell
2009-05-19  3:52       ` Kumar, Venkat
2009-05-19 11:20         ` Jayaraman, Bhaskar
2009-05-19 11:35           ` Gregory Haskins

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1241712992-17004-1-git-send-email-cam@cs.ualberta.ca \
    --to=cam@cs.ualberta.ca \
    --cc=kvm@vger.kernel.org \
    --subject='Re: [PATCH v2] Shared memory device with interrupt support' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.