All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2] Shared memory device with interrupt support
@ 2009-05-07 16:16 Cam Macdonell
  2009-05-16  2:45 ` Kumar, Venkat
  2009-05-18 18:54 ` Anthony Liguori
  0 siblings, 2 replies; 21+ messages in thread
From: Cam Macdonell @ 2009-05-07 16:16 UTC (permalink / raw)
  To: kvm; +Cc: Cam Macdonell

    Support an inter-vm shared memory device that maps a shared-memory object as a PCI device in the guest.  This patch also supports interrupts between guest by communicating over a unix domain socket.  This patch applies to the qemu-kvm repository. 

This device now creates a qemu character device and sends 1-bytes messages to trigger interrupts.  Writes are trigger by writing to the "Doorbell" register on the shared memory PCI device.  The lower 8-bits of the value written to this register are sent as the 1-byte message so different meanings of interrupts can be supported.

Interrupts are only supported between 2 VMs currently.  One VM must act as the server by adding "server" to the command-line argument.  Shared memory devices are created with the following command-line:

-ivhshmem <shm object>,<size in MB>,[unix:<path>][,server] 

Interrupts can also be used between host and guest as well by implementing a listener on the host.

Cam

---
 Makefile.target |    3 +
 hw/ivshmem.c    |  421 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 hw/pc.c         |    6 +
 hw/pc.h         |    3 +
 qemu-options.hx |   14 ++
 sysemu.h        |    8 +
 vl.c            |   14 ++
 7 files changed, 469 insertions(+), 0 deletions(-)
 create mode 100644 hw/ivshmem.c

diff --git a/Makefile.target b/Makefile.target
index b68a689..3190bba 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -643,6 +643,9 @@ OBJS += pcnet.o
 OBJS += rtl8139.o
 OBJS += e1000.o
 
+# Inter-VM PCI shared memory
+OBJS += ivshmem.o
+
 # Generic watchdog support and some watchdog devices
 OBJS += watchdog.o
 OBJS += wdt_ib700.o wdt_i6300esb.o
diff --git a/hw/ivshmem.c b/hw/ivshmem.c
new file mode 100644
index 0000000..95e2268
--- /dev/null
+++ b/hw/ivshmem.c
@@ -0,0 +1,421 @@
+/*
+ * Inter-VM Shared Memory PCI device.
+ *
+ * Author:
+ *      Cam Macdonell <cam@cs.ualberta.ca>
+ *
+ * Based On: cirrus_vga.c and rtl8139.c
+ *
+ * This code is licensed under the GNU GPL v2.
+ */
+
+#include "hw.h"
+#include "console.h"
+#include "pc.h"
+#include "pci.h"
+#include "sysemu.h"
+
+#include "qemu-common.h"
+#include <sys/mman.h>
+
+#define PCI_COMMAND_IOACCESS                0x0001
+#define PCI_COMMAND_MEMACCESS               0x0002
+#define PCI_COMMAND_BUSMASTER               0x0004
+
+//#define DEBUG_IVSHMEM
+
+#ifdef DEBUG_IVSHMEM
+#define IVSHMEM_DPRINTF(fmt, args...)        \
+    do {printf("IVSHMEM: " fmt, ##args); } while (0)
+#else
+#define IVSHMEM_DPRINTF(fmt, args...)
+#endif
+
+typedef struct IVShmemState {
+    uint16_t intrmask;
+    uint16_t intrstatus;
+    uint16_t doorbell;
+    uint8_t *ivshmem_ptr;
+    unsigned long ivshmem_offset;
+    unsigned int ivshmem_size;
+    unsigned long bios_offset;
+    unsigned int bios_size;
+    target_phys_addr_t base_ctrl;
+    int it_shift;
+    PCIDevice *pci_dev;
+    CharDriverState * chr;
+    unsigned long map_addr;
+    unsigned long map_end;
+    int ivshmem_mmio_io_addr;
+} IVShmemState;
+
+typedef struct PCI_IVShmemState {
+    PCIDevice dev;
+    IVShmemState ivshmem_state;
+} PCI_IVShmemState;
+
+typedef struct IVShmemDesc {
+    char name[1024];
+    char * chrdev;
+    int size;
+} IVShmemDesc;
+
+
+/* registers for the Inter-VM shared memory device */
+enum ivshmem_registers {
+    IntrMask = 0,
+    IntrStatus = 16,
+    Doorbell = 32
+};
+
+static int num_ivshmem_devices = 0;
+static IVShmemDesc ivshmem_desc;
+
+static void ivshmem_map(PCIDevice *pci_dev, int region_num,
+                    uint32_t addr, uint32_t size, int type)
+{
+    PCI_IVShmemState *d = (PCI_IVShmemState *)pci_dev;
+    IVShmemState *s = &d->ivshmem_state;
+
+    IVSHMEM_DPRINTF("addr = %u size = %u\n", addr, size);
+    cpu_register_physical_memory(addr, s->ivshmem_size, s->ivshmem_offset);
+
+}
+
+void ivshmem_init(const char * optarg) {
+
+    char * temp;
+    char * ivshmem_sz;
+    int size;
+
+    num_ivshmem_devices++;
+
+    /* currently we only support 1 device */
+    if (num_ivshmem_devices > MAX_IVSHMEM_DEVICES) {
+        return;
+    }
+
+    temp = strdup(optarg);
+    snprintf(ivshmem_desc.name, 1024, "/%s", strsep(&temp,","));
+    ivshmem_sz=strsep(&temp,",");
+    if (ivshmem_sz != NULL){
+        size = atol(ivshmem_sz);
+    } else {
+        size = -1;
+    }
+
+    ivshmem_desc.chrdev = strsep(&temp,"\0");
+
+    if ( size == -1) {
+        ivshmem_desc.size = TARGET_PAGE_SIZE;
+    } else {
+        ivshmem_desc.size = size*1024*1024;
+    }
+    IVSHMEM_DPRINTF("optarg is %s, name is %s, size is %d, chrdev is %s\n",
+                                        optarg, ivshmem_desc.name,
+                                        ivshmem_desc.size, ivshmem_desc.chrdev);
+}
+
+int ivshmem_get_size(void) {
+    return ivshmem_desc.size;
+}
+
+/* accessing registers - based on rtl8139 */
+static void ivshmem_update_irq(IVShmemState *s)
+{
+    int isr;
+    isr = (s->intrstatus & s->intrmask) & 0xffff;
+
+    /* don't print ISR resets */
+    if (isr) {
+        IVSHMEM_DPRINTF("Set IRQ to %d (%04x %04x)\n",
+           isr ? 1 : 0, s->intrstatus, s->intrmask);
+    }
+
+    qemu_set_irq(s->pci_dev->irq[0], (isr != 0));
+}
+
+static void ivshmem_mmio_map(PCIDevice *pci_dev, int region_num,
+                       uint32_t addr, uint32_t size, int type)
+{
+    PCI_IVShmemState *d = (PCI_IVShmemState *)pci_dev;
+    IVShmemState *s = &d->ivshmem_state;
+
+    cpu_register_physical_memory(addr + 0, 0x100, s->ivshmem_mmio_io_addr);
+}
+
+static void ivshmem_IntrMask_write(IVShmemState *s, uint32_t val)
+{
+    IVSHMEM_DPRINTF("IntrMask write(w) val = 0x%04x\n", val);
+
+    s->intrmask = val;
+
+    ivshmem_update_irq(s);
+}
+
+static uint32_t ivshmem_IntrMask_read(IVShmemState *s)
+{
+    uint32_t ret = s->intrmask;
+
+    IVSHMEM_DPRINTF("intrmask read(w) val = 0x%04x\n", ret);
+
+    return ret;
+}
+
+static void ivshmem_IntrStatus_write(IVShmemState *s, uint32_t val)
+{
+    IVSHMEM_DPRINTF("IntrStatus write(w) val = 0x%04x\n", val);
+
+    s->intrstatus = val;
+
+    ivshmem_update_irq(s);
+    return;
+}
+
+static uint32_t ivshmem_IntrStatus_read(IVShmemState *s)
+{
+    uint32_t ret = s->intrstatus;
+
+    /* reading ISR clears all interrupts */
+    s->intrstatus = 0;
+
+    ivshmem_update_irq(s);
+
+    return ret;
+}
+
+static void ivshmem_io_writew(void *opaque, uint8_t addr, uint32_t val)
+{
+    IVShmemState *s = opaque;
+
+    IVSHMEM_DPRINTF("writing 0x%x to 0x%lx\n", addr, (unsigned long) opaque);
+
+    addr &= 0xfe;
+
+    switch (addr)
+    {
+        case IntrMask:
+            ivshmem_IntrMask_write(s, val);
+            break;
+
+        case IntrStatus:
+            ivshmem_IntrStatus_write(s, val);
+            break;
+
+        default:
+            IVSHMEM_DPRINTF("why are we writing 0x%x\n", addr);
+    }
+}
+
+static void ivshmem_io_writel(void *opaque, uint8_t addr, uint32_t val)
+{
+    IVSHMEM_DPRINTF("We shouldn't be writing longs\n");
+}
+
+static void ivshmem_io_writeb(void *opaque, uint8_t addr, uint32_t val)
+{
+    IVShmemState *s = opaque;
+    uint8_t writebyte = val & 0xff; //write the lower 8-bits of 'val'
+
+    switch (addr)
+    {   // in future, we will probably want to support more types of doorbells
+        case Doorbell:
+            // wake up the other side
+            qemu_chr_write(s->chr, &writebyte, 1);
+            IVSHMEM_DPRINTF("Writing to the other side 0x%x\n", writebyte);
+            break;
+        default:
+            IVSHMEM_DPRINTF("Unhandled write (0x%x)\n", addr);
+    }
+}
+
+static uint32_t ivshmem_io_readw(void *opaque, uint8_t addr)
+{
+
+    IVShmemState *s = opaque;
+    uint32_t ret;
+
+    switch (addr)
+    {
+        case IntrMask:
+            ret = ivshmem_IntrMask_read(s);
+            break;
+        case IntrStatus:
+            ret = ivshmem_IntrStatus_read(s);
+            break;
+        default:
+            IVSHMEM_DPRINTF("why are we reading 0x%x\n", addr);
+            ret = 0;
+    }
+
+    return ret;
+}
+
+static uint32_t ivshmem_io_readl(void *opaque, uint8_t addr)
+{
+    IVSHMEM_DPRINTF("We shouldn't be reading longs\n");
+    return 0;
+}
+
+static uint32_t ivshmem_io_readb(void *opaque, uint8_t addr)
+{
+    IVSHMEM_DPRINTF("We shouldn't be reading bytes\n");
+
+    return 0;
+}
+
+static void ivshmem_mmio_writeb(void *opaque,
+                                target_phys_addr_t addr, uint32_t val)
+{
+    ivshmem_io_writeb(opaque, addr & 0xFF, val);
+}
+
+static void ivshmem_mmio_writew(void *opaque,
+                                target_phys_addr_t addr, uint32_t val)
+{
+    ivshmem_io_writew(opaque, addr & 0xFF, val);
+}
+
+static void ivshmem_mmio_writel(void *opaque,
+                                target_phys_addr_t addr, uint32_t val)
+{
+    ivshmem_io_writel(opaque, addr & 0xFF, val);
+}
+
+static uint32_t ivshmem_mmio_readb(void *opaque, target_phys_addr_t addr)
+{
+    return ivshmem_io_readb(opaque, addr & 0xFF);
+}
+
+static uint32_t ivshmem_mmio_readw(void *opaque, target_phys_addr_t addr)
+{
+    uint32_t val = ivshmem_io_readw(opaque, addr & 0xFF);
+    return val;
+}
+
+static uint32_t ivshmem_mmio_readl(void *opaque, target_phys_addr_t addr)
+{
+    uint32_t val = ivshmem_io_readl(opaque, addr & 0xFF);
+    return val;
+}
+
+static CPUReadMemoryFunc *ivshmem_mmio_read[3] = {
+    ivshmem_mmio_readb,
+    ivshmem_mmio_readw,
+    ivshmem_mmio_readl,
+};
+
+static CPUWriteMemoryFunc *ivshmem_mmio_write[3] = {
+    ivshmem_mmio_writeb,
+    ivshmem_mmio_writew,
+    ivshmem_mmio_writel,
+};
+
+static int ivshmem_can_receive(void * opaque)
+{
+    return 1;
+}
+
+static void ivshmem_receive(void *opaque, const uint8_t *buf, int size)
+{
+    IVShmemState *s = opaque;
+
+    ivshmem_IntrStatus_write(s, *buf);
+
+    IVSHMEM_DPRINTF("ivshmem_receive 0x%02x\n", *buf);
+}
+
+static void ivshmem_event(void *opaque, int event)
+{
+    IVShmemState *s = opaque;
+    IVSHMEM_DPRINTF("ivshmem_event %d\n", event);
+}
+
+int pci_ivshmem_init(PCIBus *bus)
+{
+    PCI_IVShmemState *d;
+    IVShmemState *s;
+    uint8_t *pci_conf;
+    int ivshmem_fd;
+
+    IVSHMEM_DPRINTF("shared file is %s\n", ivshmem_desc.name);
+    d = (PCI_IVShmemState *)pci_register_device(bus, "kvm_ivshmem",
+                                           sizeof(PCI_IVShmemState),
+                                           -1, NULL, NULL);
+    if (!d) {
+        return -1;
+    }
+
+    s = &d->ivshmem_state;
+
+    /* allocate shared memory RAM */
+    s->ivshmem_offset = qemu_ram_alloc(ivshmem_desc.size);
+    IVSHMEM_DPRINTF("size is = %d\n", ivshmem_desc.size);
+    IVSHMEM_DPRINTF("ivshmem ram offset = %ld\n", s->ivshmem_offset);
+
+    s->ivshmem_ptr = qemu_get_ram_ptr(s->ivshmem_offset);
+
+    s->pci_dev = &d->dev;
+    s->ivshmem_size = ivshmem_desc.size;
+
+    pci_conf = d->dev.config;
+    pci_conf[0x00] = 0xf4; // Qumranet vendor ID 0x5002
+    pci_conf[0x01] = 0x1a;
+    pci_conf[0x02] = 0x10;
+    pci_conf[0x03] = 0x11;
+    pci_conf[0x04] = PCI_COMMAND_IOACCESS | PCI_COMMAND_MEMACCESS;
+    pci_conf[0x0a] = 0x00; // RAM controller
+    pci_conf[0x0b] = 0x05;
+    pci_conf[0x0e] = 0x00; // header_type
+
+    pci_conf[PCI_INTERRUPT_PIN] = 1; // we are going to support interrupts
+
+    /* XXX: ivshmem_desc.size must be a power of two */
+
+    s->ivshmem_mmio_io_addr = cpu_register_io_memory(0, ivshmem_mmio_read,
+                                    ivshmem_mmio_write, s);
+
+    /* region for registers*/
+    pci_register_io_region(&d->dev, 0, 0x100,
+                           PCI_ADDRESS_SPACE_MEM, ivshmem_mmio_map);
+
+    /* region for shared memory */
+    pci_register_io_region(&d->dev, 1, ivshmem_desc.size,
+                           PCI_ADDRESS_SPACE_MEM, ivshmem_map);
+
+    /* open shared memory file  */
+    if ((ivshmem_fd = shm_open(ivshmem_desc.name, O_CREAT|O_RDWR, S_IRWXU)) < 0)
+    {
+        fprintf(stderr, "kvm_ivshmem: could not open shared file\n");
+        exit(-1);
+    }
+
+    ftruncate(ivshmem_fd, ivshmem_desc.size);
+
+    /* mmap onto PCI device's memory */
+    if (mmap(s->ivshmem_ptr, ivshmem_desc.size, PROT_READ|PROT_WRITE,
+                        MAP_SHARED|MAP_FIXED, ivshmem_fd, 0) == MAP_FAILED)
+    {
+        fprintf(stderr, "kvm_ivshmem: could not mmap shared file\n");
+        exit(-1);
+    }
+
+    IVSHMEM_DPRINTF("shared object mapped to 0x%p\n", s->ivshmem_ptr);
+
+    /* setup character device channel */
+
+    if (ivshmem_desc.chrdev != NULL) {
+        char label[32];
+        snprintf(label, 32, "ivshmem_chardev");
+        s->chr = qemu_chr_open(label, ivshmem_desc.chrdev, NULL);
+        if (s->chr == NULL) {
+            fprintf(stderr, "No server listening on %s\n", ivshmem_desc.chrdev);
+            exit(-1);
+        }
+        qemu_chr_add_handlers(s->chr, ivshmem_can_receive, ivshmem_receive,
+                          ivshmem_event, s);
+    }
+
+    return 0;
+}
+
diff --git a/hw/pc.c b/hw/pc.c
index 34a4d25..7d0cff2 100644
--- a/hw/pc.c
+++ b/hw/pc.c
@@ -67,6 +67,8 @@ static PITState *pit;
 static IOAPICState *ioapic;
 static PCIDevice *i440fx_state;
 
+extern int ivshmem_enabled;
+
 static void ioport80_write(void *opaque, uint32_t addr, uint32_t data)
 {
 }
@@ -1040,6 +1042,10 @@ static void pc_init1(ram_addr_t ram_size, int vga_ram_size,
         }
     }
 
+    if (pci_enabled && ivshmem_enabled) {
+        pci_ivshmem_init(pci_bus);
+    }
+
     rtc_state = rtc_init(0x70, i8259[8], 2000);
 
     qemu_register_boot_set(pc_boot_set, rtc_state);
diff --git a/hw/pc.h b/hw/pc.h
index 885c918..0ae0493 100644
--- a/hw/pc.h
+++ b/hw/pc.h
@@ -185,4 +185,7 @@ void isa_ne2000_init(int base, qemu_irq irq, NICInfo *nd);
 
 void extboot_init(BlockDriverState *bs, int cmd);
 
+/* ivshmem.c */
+int pci_ivshmem_init(PCIBus *bus);
+
 #endif
diff --git a/qemu-options.hx b/qemu-options.hx
index 173f458..9ab3e2d 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -1243,6 +1243,20 @@ The default device is @code{vc} in graphical mode and @code{stdio} in
 non graphical mode.
 ETEXI
 
+DEF("ivshmem", HAS_ARG, QEMU_OPTION_ivshmem, \
+    "-ivshmem name,size[,unix:path][,server]  creates or opens a shared file 'name' of size \
+    'size' (in MB) and exposes it as a PCI device in the guest\n")
+STEXI
+@item -ivshmem @var{file},@var{size}
+Creates a POSIX shared file named @var{file} of size @var{size} and creates a
+PCI device of the same size that maps the shared file into the device for guests
+to access.  The created file on the host is located in /dev/shm/
+
+@item unix:@var{path}[,server]
+A unix domain socket is used to send and receive interrupts between VMs.  The unix domain socket
+@var{path} is used for connections.
+ETEXI
+
 DEF("pidfile", HAS_ARG, QEMU_OPTION_pidfile, \
     "-pidfile file   write PID to 'file'\n")
 STEXI
diff --git a/sysemu.h b/sysemu.h
index 1f45fd6..862b79e 100644
--- a/sysemu.h
+++ b/sysemu.h
@@ -217,6 +217,14 @@ extern CharDriverState *parallel_hds[MAX_PARALLEL_PORTS];
 
 extern CharDriverState *virtcon_hds[MAX_VIRTIO_CONSOLES];
 
+/* inter-VM shared memory devices */
+
+#define MAX_IVSHMEM_DEVICES 1
+
+extern CharDriverState * ivshmem_chardev;
+void ivshmem_init(const char * optarg);
+int ivshmem_get_size(void);
+
 #define TFR(expr) do { if ((expr) != -1) break; } while (errno == EINTR)
 
 #ifdef NEED_CPU_H
diff --git a/vl.c b/vl.c
index 0420634..7260fa1 100644
--- a/vl.c
+++ b/vl.c
@@ -221,6 +221,7 @@ static int rtc_date_offset = -1; /* -1 means no change */
 int cirrus_vga_enabled = 1;
 int std_vga_enabled = 0;
 int vmsvga_enabled = 0;
+int ivshmem_enabled = 0;
 int xenfb_enabled = 0;
 #ifdef TARGET_SPARC
 int graphic_width = 1024;
@@ -239,6 +240,8 @@ int no_quit = 0;
 CharDriverState *serial_hds[MAX_SERIAL_PORTS];
 CharDriverState *parallel_hds[MAX_PARALLEL_PORTS];
 CharDriverState *virtcon_hds[MAX_VIRTIO_CONSOLES];
+CharDriverState *ivshmem_chardev;
+const char * ivshmem_device;
 #ifdef TARGET_I386
 int win2k_install_hack = 0;
 int rtc_td_hack = 0;
@@ -5063,6 +5066,8 @@ int main(int argc, char **argv, char **envp)
     cyls = heads = secs = 0;
     translation = BIOS_ATA_TRANSLATION_AUTO;
     monitor_device = "vc:80Cx24C";
+    ivshmem_device = NULL;
+    ivshmem_chardev = NULL;
 
     serial_devices[0] = "vc:80Cx24C";
     for(i = 1; i < MAX_SERIAL_PORTS; i++)
@@ -5518,6 +5523,10 @@ int main(int argc, char **argv, char **envp)
                 parallel_devices[parallel_device_index] = optarg;
                 parallel_device_index++;
                 break;
+            case QEMU_OPTION_ivshmem:
+                ivshmem_device = optarg;
+                ivshmem_enabled = 1;
+                break;
 	    case QEMU_OPTION_loadvm:
 		loadvm = optarg;
 		break;
@@ -5984,6 +5993,11 @@ int main(int argc, char **argv, char **envp)
 	    }
     }
 
+    if (ivshmem_enabled) {
+        ivshmem_init(ivshmem_device);
+        ram_size += ivshmem_get_size();
+    }
+
 #ifdef CONFIG_KQEMU
     /* FIXME: This is a nasty hack because kqemu can't cope with dynamic
        guest ram allocation.  It needs to go away.  */
-- 
1.6.0.6


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* RE: [PATCH v2] Shared memory device with interrupt support
  2009-05-07 16:16 [PATCH v2] Shared memory device with interrupt support Cam Macdonell
@ 2009-05-16  2:45 ` Kumar, Venkat
  2009-05-16  3:27   ` Cam Macdonell
  2009-05-18 18:54 ` Anthony Liguori
  1 sibling, 1 reply; 21+ messages in thread
From: Kumar, Venkat @ 2009-05-16  2:45 UTC (permalink / raw)
  To: Cam Macdonell; +Cc: kvm

Hi Cam, I have gone through you latest shared memory patch.
I have a few questions and comments.

Comment:-
+    if (ivshmem_enabled) {
+        ivshmem_init(ivshmem_device);
+        ram_size += ivshmem_get_size();
+    }
+

In your initial patch this part of the patch is

+    if (ivshmem_enabled) {
+        ivshmem_init(ivshmem_device);
+        phys_ram_size += ivshmem_get_size();
+    }

I think the phys_ram_size += ivshmem_get_size(); is correct.

Question:-
You are giving the desired virtual address for mmaping the shared memory object as "s->ivshmem_ptr" which is "phys_ram_base + s->ivshmem_offset". This desired virtual address is nothing but the base virtual address of the memory that you are allocating after incrementing phys_ram_size. So now s->ivshmem_ptr would point to a new set of memory, which is the shared memory region instead of memory allocated through qemu_alloc_physram, which means if pages are allocated for "sh->ivshmem_ptr" virtual address range then those pages can never be addressed again. Correct me if my understanding is wrong.

Thx,

Venkat


-----Original Message-----
From: kvm-owner@vger.kernel.org [mailto:kvm-owner@vger.kernel.org] On Behalf Of Cam Macdonell
Sent: Thursday, May 07, 2009 9:47 PM
To: kvm@vger.kernel.org
Cc: Cam Macdonell
Subject: [PATCH v2] Shared memory device with interrupt support

    Support an inter-vm shared memory device that maps a shared-memory object as a PCI device in the guest.  This patch also supports interrupts between guest by communicating over a unix domain socket.  This patch applies to the qemu-kvm repository.

This device now creates a qemu character device and sends 1-bytes messages to trigger interrupts.  Writes are trigger by writing to the "Doorbell" register on the shared memory PCI device.  The lower 8-bits of the value written to this register are sent as the 1-byte message so different meanings of interrupts can be supported.

Interrupts are only supported between 2 VMs currently.  One VM must act as the server by adding "server" to the command-line argument.  Shared memory devices are created with the following command-line:

-ivhshmem <shm object>,<size in MB>,[unix:<path>][,server]

Interrupts can also be used between host and guest as well by implementing a listener on the host.

Cam

---
 Makefile.target |    3 +
 hw/ivshmem.c    |  421 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 hw/pc.c         |    6 +
 hw/pc.h         |    3 +
 qemu-options.hx |   14 ++
 sysemu.h        |    8 +
 vl.c            |   14 ++
 7 files changed, 469 insertions(+), 0 deletions(-)
 create mode 100644 hw/ivshmem.c

diff --git a/Makefile.target b/Makefile.target
index b68a689..3190bba 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -643,6 +643,9 @@ OBJS += pcnet.o
 OBJS += rtl8139.o
 OBJS += e1000.o

+# Inter-VM PCI shared memory
+OBJS += ivshmem.o
+
 # Generic watchdog support and some watchdog devices
 OBJS += watchdog.o
 OBJS += wdt_ib700.o wdt_i6300esb.o
diff --git a/hw/ivshmem.c b/hw/ivshmem.c
new file mode 100644
index 0000000..95e2268
--- /dev/null
+++ b/hw/ivshmem.c
@@ -0,0 +1,421 @@
+/*
+ * Inter-VM Shared Memory PCI device.
+ *
+ * Author:
+ *      Cam Macdonell <cam@cs.ualberta.ca>
+ *
+ * Based On: cirrus_vga.c and rtl8139.c
+ *
+ * This code is licensed under the GNU GPL v2.
+ */
+
+#include "hw.h"
+#include "console.h"
+#include "pc.h"
+#include "pci.h"
+#include "sysemu.h"
+
+#include "qemu-common.h"
+#include <sys/mman.h>
+
+#define PCI_COMMAND_IOACCESS                0x0001
+#define PCI_COMMAND_MEMACCESS               0x0002
+#define PCI_COMMAND_BUSMASTER               0x0004
+
+//#define DEBUG_IVSHMEM
+
+#ifdef DEBUG_IVSHMEM
+#define IVSHMEM_DPRINTF(fmt, args...)        \
+    do {printf("IVSHMEM: " fmt, ##args); } while (0)
+#else
+#define IVSHMEM_DPRINTF(fmt, args...)
+#endif
+
+typedef struct IVShmemState {
+    uint16_t intrmask;
+    uint16_t intrstatus;
+    uint16_t doorbell;
+    uint8_t *ivshmem_ptr;
+    unsigned long ivshmem_offset;
+    unsigned int ivshmem_size;
+    unsigned long bios_offset;
+    unsigned int bios_size;
+    target_phys_addr_t base_ctrl;
+    int it_shift;
+    PCIDevice *pci_dev;
+    CharDriverState * chr;
+    unsigned long map_addr;
+    unsigned long map_end;
+    int ivshmem_mmio_io_addr;
+} IVShmemState;
+
+typedef struct PCI_IVShmemState {
+    PCIDevice dev;
+    IVShmemState ivshmem_state;
+} PCI_IVShmemState;
+
+typedef struct IVShmemDesc {
+    char name[1024];
+    char * chrdev;
+    int size;
+} IVShmemDesc;
+
+
+/* registers for the Inter-VM shared memory device */
+enum ivshmem_registers {
+    IntrMask = 0,
+    IntrStatus = 16,
+    Doorbell = 32
+};
+
+static int num_ivshmem_devices = 0;
+static IVShmemDesc ivshmem_desc;
+
+static void ivshmem_map(PCIDevice *pci_dev, int region_num,
+                    uint32_t addr, uint32_t size, int type)
+{
+    PCI_IVShmemState *d = (PCI_IVShmemState *)pci_dev;
+    IVShmemState *s = &d->ivshmem_state;
+
+    IVSHMEM_DPRINTF("addr = %u size = %u\n", addr, size);
+    cpu_register_physical_memory(addr, s->ivshmem_size, s->ivshmem_offset);
+
+}
+
+void ivshmem_init(const char * optarg) {
+
+    char * temp;
+    char * ivshmem_sz;
+    int size;
+
+    num_ivshmem_devices++;
+
+    /* currently we only support 1 device */
+    if (num_ivshmem_devices > MAX_IVSHMEM_DEVICES) {
+        return;
+    }
+
+    temp = strdup(optarg);
+    snprintf(ivshmem_desc.name, 1024, "/%s", strsep(&temp,","));
+    ivshmem_sz=strsep(&temp,",");
+    if (ivshmem_sz != NULL){
+        size = atol(ivshmem_sz);
+    } else {
+        size = -1;
+    }
+
+    ivshmem_desc.chrdev = strsep(&temp,"\0");
+
+    if ( size == -1) {
+        ivshmem_desc.size = TARGET_PAGE_SIZE;
+    } else {
+        ivshmem_desc.size = size*1024*1024;
+    }
+    IVSHMEM_DPRINTF("optarg is %s, name is %s, size is %d, chrdev is %s\n",
+                                        optarg, ivshmem_desc.name,
+                                        ivshmem_desc.size, ivshmem_desc.chrdev);
+}
+
+int ivshmem_get_size(void) {
+    return ivshmem_desc.size;
+}
+
+/* accessing registers - based on rtl8139 */
+static void ivshmem_update_irq(IVShmemState *s)
+{
+    int isr;
+    isr = (s->intrstatus & s->intrmask) & 0xffff;
+
+    /* don't print ISR resets */
+    if (isr) {
+        IVSHMEM_DPRINTF("Set IRQ to %d (%04x %04x)\n",
+           isr ? 1 : 0, s->intrstatus, s->intrmask);
+    }
+
+    qemu_set_irq(s->pci_dev->irq[0], (isr != 0));
+}
+
+static void ivshmem_mmio_map(PCIDevice *pci_dev, int region_num,
+                       uint32_t addr, uint32_t size, int type)
+{
+    PCI_IVShmemState *d = (PCI_IVShmemState *)pci_dev;
+    IVShmemState *s = &d->ivshmem_state;
+
+    cpu_register_physical_memory(addr + 0, 0x100, s->ivshmem_mmio_io_addr);
+}
+
+static void ivshmem_IntrMask_write(IVShmemState *s, uint32_t val)
+{
+    IVSHMEM_DPRINTF("IntrMask write(w) val = 0x%04x\n", val);
+
+    s->intrmask = val;
+
+    ivshmem_update_irq(s);
+}
+
+static uint32_t ivshmem_IntrMask_read(IVShmemState *s)
+{
+    uint32_t ret = s->intrmask;
+
+    IVSHMEM_DPRINTF("intrmask read(w) val = 0x%04x\n", ret);
+
+    return ret;
+}
+
+static void ivshmem_IntrStatus_write(IVShmemState *s, uint32_t val)
+{
+    IVSHMEM_DPRINTF("IntrStatus write(w) val = 0x%04x\n", val);
+
+    s->intrstatus = val;
+
+    ivshmem_update_irq(s);
+    return;
+}
+
+static uint32_t ivshmem_IntrStatus_read(IVShmemState *s)
+{
+    uint32_t ret = s->intrstatus;
+
+    /* reading ISR clears all interrupts */
+    s->intrstatus = 0;
+
+    ivshmem_update_irq(s);
+
+    return ret;
+}
+
+static void ivshmem_io_writew(void *opaque, uint8_t addr, uint32_t val)
+{
+    IVShmemState *s = opaque;
+
+    IVSHMEM_DPRINTF("writing 0x%x to 0x%lx\n", addr, (unsigned long) opaque);
+
+    addr &= 0xfe;
+
+    switch (addr)
+    {
+        case IntrMask:
+            ivshmem_IntrMask_write(s, val);
+            break;
+
+        case IntrStatus:
+            ivshmem_IntrStatus_write(s, val);
+            break;
+
+        default:
+            IVSHMEM_DPRINTF("why are we writing 0x%x\n", addr);
+    }
+}
+
+static void ivshmem_io_writel(void *opaque, uint8_t addr, uint32_t val)
+{
+    IVSHMEM_DPRINTF("We shouldn't be writing longs\n");
+}
+
+static void ivshmem_io_writeb(void *opaque, uint8_t addr, uint32_t val)
+{
+    IVShmemState *s = opaque;
+    uint8_t writebyte = val & 0xff; //write the lower 8-bits of 'val'
+
+    switch (addr)
+    {   // in future, we will probably want to support more types of doorbells
+        case Doorbell:
+            // wake up the other side
+            qemu_chr_write(s->chr, &writebyte, 1);
+            IVSHMEM_DPRINTF("Writing to the other side 0x%x\n", writebyte);
+            break;
+        default:
+            IVSHMEM_DPRINTF("Unhandled write (0x%x)\n", addr);
+    }
+}
+
+static uint32_t ivshmem_io_readw(void *opaque, uint8_t addr)
+{
+
+    IVShmemState *s = opaque;
+    uint32_t ret;
+
+    switch (addr)
+    {
+        case IntrMask:
+            ret = ivshmem_IntrMask_read(s);
+            break;
+        case IntrStatus:
+            ret = ivshmem_IntrStatus_read(s);
+            break;
+        default:
+            IVSHMEM_DPRINTF("why are we reading 0x%x\n", addr);
+            ret = 0;
+    }
+
+    return ret;
+}
+
+static uint32_t ivshmem_io_readl(void *opaque, uint8_t addr)
+{
+    IVSHMEM_DPRINTF("We shouldn't be reading longs\n");
+    return 0;
+}
+
+static uint32_t ivshmem_io_readb(void *opaque, uint8_t addr)
+{
+    IVSHMEM_DPRINTF("We shouldn't be reading bytes\n");
+
+    return 0;
+}
+
+static void ivshmem_mmio_writeb(void *opaque,
+                                target_phys_addr_t addr, uint32_t val)
+{
+    ivshmem_io_writeb(opaque, addr & 0xFF, val);
+}
+
+static void ivshmem_mmio_writew(void *opaque,
+                                target_phys_addr_t addr, uint32_t val)
+{
+    ivshmem_io_writew(opaque, addr & 0xFF, val);
+}
+
+static void ivshmem_mmio_writel(void *opaque,
+                                target_phys_addr_t addr, uint32_t val)
+{
+    ivshmem_io_writel(opaque, addr & 0xFF, val);
+}
+
+static uint32_t ivshmem_mmio_readb(void *opaque, target_phys_addr_t addr)
+{
+    return ivshmem_io_readb(opaque, addr & 0xFF);
+}
+
+static uint32_t ivshmem_mmio_readw(void *opaque, target_phys_addr_t addr)
+{
+    uint32_t val = ivshmem_io_readw(opaque, addr & 0xFF);
+    return val;
+}
+
+static uint32_t ivshmem_mmio_readl(void *opaque, target_phys_addr_t addr)
+{
+    uint32_t val = ivshmem_io_readl(opaque, addr & 0xFF);
+    return val;
+}
+
+static CPUReadMemoryFunc *ivshmem_mmio_read[3] = {
+    ivshmem_mmio_readb,
+    ivshmem_mmio_readw,
+    ivshmem_mmio_readl,
+};
+
+static CPUWriteMemoryFunc *ivshmem_mmio_write[3] = {
+    ivshmem_mmio_writeb,
+    ivshmem_mmio_writew,
+    ivshmem_mmio_writel,
+};
+
+static int ivshmem_can_receive(void * opaque)
+{
+    return 1;
+}
+
+static void ivshmem_receive(void *opaque, const uint8_t *buf, int size)
+{
+    IVShmemState *s = opaque;
+
+    ivshmem_IntrStatus_write(s, *buf);
+
+    IVSHMEM_DPRINTF("ivshmem_receive 0x%02x\n", *buf);
+}
+
+static void ivshmem_event(void *opaque, int event)
+{
+    IVShmemState *s = opaque;
+    IVSHMEM_DPRINTF("ivshmem_event %d\n", event);
+}
+
+int pci_ivshmem_init(PCIBus *bus)
+{
+    PCI_IVShmemState *d;
+    IVShmemState *s;
+    uint8_t *pci_conf;
+    int ivshmem_fd;
+
+    IVSHMEM_DPRINTF("shared file is %s\n", ivshmem_desc.name);
+    d = (PCI_IVShmemState *)pci_register_device(bus, "kvm_ivshmem",
+                                           sizeof(PCI_IVShmemState),
+                                           -1, NULL, NULL);
+    if (!d) {
+        return -1;
+    }
+
+    s = &d->ivshmem_state;
+
+    /* allocate shared memory RAM */
+    s->ivshmem_offset = qemu_ram_alloc(ivshmem_desc.size);
+    IVSHMEM_DPRINTF("size is = %d\n", ivshmem_desc.size);
+    IVSHMEM_DPRINTF("ivshmem ram offset = %ld\n", s->ivshmem_offset);
+
+    s->ivshmem_ptr = qemu_get_ram_ptr(s->ivshmem_offset);
+
+    s->pci_dev = &d->dev;
+    s->ivshmem_size = ivshmem_desc.size;
+
+    pci_conf = d->dev.config;
+    pci_conf[0x00] = 0xf4; // Qumranet vendor ID 0x5002
+    pci_conf[0x01] = 0x1a;
+    pci_conf[0x02] = 0x10;
+    pci_conf[0x03] = 0x11;
+    pci_conf[0x04] = PCI_COMMAND_IOACCESS | PCI_COMMAND_MEMACCESS;
+    pci_conf[0x0a] = 0x00; // RAM controller
+    pci_conf[0x0b] = 0x05;
+    pci_conf[0x0e] = 0x00; // header_type
+
+    pci_conf[PCI_INTERRUPT_PIN] = 1; // we are going to support interrupts
+
+    /* XXX: ivshmem_desc.size must be a power of two */
+
+    s->ivshmem_mmio_io_addr = cpu_register_io_memory(0, ivshmem_mmio_read,
+                                    ivshmem_mmio_write, s);
+
+    /* region for registers*/
+    pci_register_io_region(&d->dev, 0, 0x100,
+                           PCI_ADDRESS_SPACE_MEM, ivshmem_mmio_map);
+
+    /* region for shared memory */
+    pci_register_io_region(&d->dev, 1, ivshmem_desc.size,
+                           PCI_ADDRESS_SPACE_MEM, ivshmem_map);
+
+    /* open shared memory file  */
+    if ((ivshmem_fd = shm_open(ivshmem_desc.name, O_CREAT|O_RDWR, S_IRWXU)) < 0)
+    {
+        fprintf(stderr, "kvm_ivshmem: could not open shared file\n");
+        exit(-1);
+    }
+
+    ftruncate(ivshmem_fd, ivshmem_desc.size);
+
+    /* mmap onto PCI device's memory */
+    if (mmap(s->ivshmem_ptr, ivshmem_desc.size, PROT_READ|PROT_WRITE,
+                        MAP_SHARED|MAP_FIXED, ivshmem_fd, 0) == MAP_FAILED)
+    {
+        fprintf(stderr, "kvm_ivshmem: could not mmap shared file\n");
+        exit(-1);
+    }
+
+    IVSHMEM_DPRINTF("shared object mapped to 0x%p\n", s->ivshmem_ptr);
+
+    /* setup character device channel */
+
+    if (ivshmem_desc.chrdev != NULL) {
+        char label[32];
+        snprintf(label, 32, "ivshmem_chardev");
+        s->chr = qemu_chr_open(label, ivshmem_desc.chrdev, NULL);
+        if (s->chr == NULL) {
+            fprintf(stderr, "No server listening on %s\n", ivshmem_desc.chrdev);
+            exit(-1);
+        }
+        qemu_chr_add_handlers(s->chr, ivshmem_can_receive, ivshmem_receive,
+                          ivshmem_event, s);
+    }
+
+    return 0;
+}
+
diff --git a/hw/pc.c b/hw/pc.c
index 34a4d25..7d0cff2 100644
--- a/hw/pc.c
+++ b/hw/pc.c
@@ -67,6 +67,8 @@ static PITState *pit;
 static IOAPICState *ioapic;
 static PCIDevice *i440fx_state;

+extern int ivshmem_enabled;
+
 static void ioport80_write(void *opaque, uint32_t addr, uint32_t data)
 {
 }
@@ -1040,6 +1042,10 @@ static void pc_init1(ram_addr_t ram_size, int vga_ram_size,
         }
     }

+    if (pci_enabled && ivshmem_enabled) {
+        pci_ivshmem_init(pci_bus);
+    }
+
     rtc_state = rtc_init(0x70, i8259[8], 2000);

     qemu_register_boot_set(pc_boot_set, rtc_state);
diff --git a/hw/pc.h b/hw/pc.h
index 885c918..0ae0493 100644
--- a/hw/pc.h
+++ b/hw/pc.h
@@ -185,4 +185,7 @@ void isa_ne2000_init(int base, qemu_irq irq, NICInfo *nd);

 void extboot_init(BlockDriverState *bs, int cmd);

+/* ivshmem.c */
+int pci_ivshmem_init(PCIBus *bus);
+
 #endif
diff --git a/qemu-options.hx b/qemu-options.hx
index 173f458..9ab3e2d 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -1243,6 +1243,20 @@ The default device is @code{vc} in graphical mode and @code{stdio} in
 non graphical mode.
 ETEXI

+DEF("ivshmem", HAS_ARG, QEMU_OPTION_ivshmem, \
+    "-ivshmem name,size[,unix:path][,server]  creates or opens a shared file 'name' of size \
+    'size' (in MB) and exposes it as a PCI device in the guest\n")
+STEXI
+@item -ivshmem @var{file},@var{size}
+Creates a POSIX shared file named @var{file} of size @var{size} and creates a
+PCI device of the same size that maps the shared file into the device for guests
+to access.  The created file on the host is located in /dev/shm/
+
+@item unix:@var{path}[,server]
+A unix domain socket is used to send and receive interrupts between VMs.  The unix domain socket
+@var{path} is used for connections.
+ETEXI
+
 DEF("pidfile", HAS_ARG, QEMU_OPTION_pidfile, \
     "-pidfile file   write PID to 'file'\n")
 STEXI
diff --git a/sysemu.h b/sysemu.h
index 1f45fd6..862b79e 100644
--- a/sysemu.h
+++ b/sysemu.h
@@ -217,6 +217,14 @@ extern CharDriverState *parallel_hds[MAX_PARALLEL_PORTS];

 extern CharDriverState *virtcon_hds[MAX_VIRTIO_CONSOLES];

+/* inter-VM shared memory devices */
+
+#define MAX_IVSHMEM_DEVICES 1
+
+extern CharDriverState * ivshmem_chardev;
+void ivshmem_init(const char * optarg);
+int ivshmem_get_size(void);
+
 #define TFR(expr) do { if ((expr) != -1) break; } while (errno == EINTR)

 #ifdef NEED_CPU_H
diff --git a/vl.c b/vl.c
index 0420634..7260fa1 100644
--- a/vl.c
+++ b/vl.c
@@ -221,6 +221,7 @@ static int rtc_date_offset = -1; /* -1 means no change */
 int cirrus_vga_enabled = 1;
 int std_vga_enabled = 0;
 int vmsvga_enabled = 0;
+int ivshmem_enabled = 0;
 int xenfb_enabled = 0;
 #ifdef TARGET_SPARC
 int graphic_width = 1024;
@@ -239,6 +240,8 @@ int no_quit = 0;
 CharDriverState *serial_hds[MAX_SERIAL_PORTS];
 CharDriverState *parallel_hds[MAX_PARALLEL_PORTS];
 CharDriverState *virtcon_hds[MAX_VIRTIO_CONSOLES];
+CharDriverState *ivshmem_chardev;
+const char * ivshmem_device;
 #ifdef TARGET_I386
 int win2k_install_hack = 0;
 int rtc_td_hack = 0;
@@ -5063,6 +5066,8 @@ int main(int argc, char **argv, char **envp)
     cyls = heads = secs = 0;
     translation = BIOS_ATA_TRANSLATION_AUTO;
     monitor_device = "vc:80Cx24C";
+    ivshmem_device = NULL;
+    ivshmem_chardev = NULL;

     serial_devices[0] = "vc:80Cx24C";
     for(i = 1; i < MAX_SERIAL_PORTS; i++)
@@ -5518,6 +5523,10 @@ int main(int argc, char **argv, char **envp)
                 parallel_devices[parallel_device_index] = optarg;
                 parallel_device_index++;
                 break;
+            case QEMU_OPTION_ivshmem:
+                ivshmem_device = optarg;
+                ivshmem_enabled = 1;
+                break;
            case QEMU_OPTION_loadvm:
                loadvm = optarg;
                break;
@@ -5984,6 +5993,11 @@ int main(int argc, char **argv, char **envp)
            }
     }

+    if (ivshmem_enabled) {
+        ivshmem_init(ivshmem_device);
+        ram_size += ivshmem_get_size();
+    }
+
 #ifdef CONFIG_KQEMU
     /* FIXME: This is a nasty hack because kqemu can't cope with dynamic
        guest ram allocation.  It needs to go away.  */
--
1.6.0.6

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* Re: [PATCH v2] Shared memory device with interrupt support
  2009-05-16  2:45 ` Kumar, Venkat
@ 2009-05-16  3:27   ` Cam Macdonell
  2009-05-17 21:39     ` Avi Kivity
  0 siblings, 1 reply; 21+ messages in thread
From: Cam Macdonell @ 2009-05-16  3:27 UTC (permalink / raw)
  To: Kumar, Venkat; +Cc: kvm


On 15-May-09, at 8:45 PM, Kumar, Venkat wrote:

> Hi Cam, I have gone through you latest shared memory patch.
> I have a few questions and comments.
>
> Comment:-
> +    if (ivshmem_enabled) {
> +        ivshmem_init(ivshmem_device);
> +        ram_size += ivshmem_get_size();
> +    }
> +
>
> In your initial patch this part of the patch is
>
> +    if (ivshmem_enabled) {
> +        ivshmem_init(ivshmem_device);
> +        phys_ram_size += ivshmem_get_size();
> +    }
>
> I think the phys_ram_size += ivshmem_get_size(); is correct.

Hi Venkat,

Not with the newer qemu that qemu-kvm uses.   The newer patch is for  
qemu-kvm, not kvm-userspace.  There is no longer a variable named  
phys_ram_size in pc.c in qemu-kvm.

>
> Question:-
> You are giving the desired virtual address for mmaping the shared  
> memory object as "s->ivshmem_ptr" which is "phys_ram_base + s- 
> >ivshmem_offset". This desired virtual address is nothing but the  
> base virtual address of the memory that you are allocating after  
> incrementing phys_ram_size. So now s->ivshmem_ptr would point to a  
> new set of memory, which is the shared memory region instead of  
> memory allocated through qemu_alloc_physram, which means if pages  
> are allocated for "sh->ivshmem_ptr" virtual address range then those  
> pages can never be addressed again. Correct me if my understanding  
> is wrong.

I don't think so.  With the mmap call, I specify MAP_FIXED which  
requires that the memory in the shared memory object be mapped to the  
address given in the first parameter (s->ivshmem_ptr).  If MAP_FIXED  
is not specified then mmap would allocate the memory and map on to it,  
but with MAP_FIXED it maps onto the already reserved space that  
ivshmem_ptr points to and was allocated with qemu_ram_alloc().

I hope that answers your question,

Cam

>
> -----Original Message-----
> From: kvm-owner@vger.kernel.org [mailto:kvm-owner@vger.kernel.org]  
> On Behalf Of Cam Macdonell
> Sent: Thursday, May 07, 2009 9:47 PM
> To: kvm@vger.kernel.org
> Cc: Cam Macdonell
> Subject: [PATCH v2] Shared memory device with interrupt support
>
>    Support an inter-vm shared memory device that maps a shared- 
> memory object as a PCI device in the guest.  This patch also  
> supports interrupts between guest by communicating over a unix  
> domain socket.  This patch applies to the qemu-kvm repository.
>
> This device now creates a qemu character device and sends 1-bytes  
> messages to trigger interrupts.  Writes are trigger by writing to  
> the "Doorbell" register on the shared memory PCI device.  The lower  
> 8-bits of the value written to this register are sent as the 1-byte  
> message so different meanings of interrupts can be supported.
>
> Interrupts are only supported between 2 VMs currently.  One VM must  
> act as the server by adding "server" to the command-line argument.   
> Shared memory devices are created with the following command-line:
>
> -ivhshmem <shm object>,<size in MB>,[unix:<path>][,server]
>
> Interrupts can also be used between host and guest as well by  
> implementing a listener on the host.
>
> Cam
>
> ---
> Makefile.target |    3 +
> hw/ivshmem.c    |  421 ++++++++++++++++++++++++++++++++++++++++++++++ 
> +++++++++
> hw/pc.c         |    6 +
> hw/pc.h         |    3 +
> qemu-options.hx |   14 ++
> sysemu.h        |    8 +
> vl.c            |   14 ++
> 7 files changed, 469 insertions(+), 0 deletions(-)
> create mode 100644 hw/ivshmem.c
>
> diff --git a/Makefile.target b/Makefile.target
> index b68a689..3190bba 100644
> --- a/Makefile.target
> +++ b/Makefile.target
> @@ -643,6 +643,9 @@ OBJS += pcnet.o
> OBJS += rtl8139.o
> OBJS += e1000.o
>
> +# Inter-VM PCI shared memory
> +OBJS += ivshmem.o
> +
> # Generic watchdog support and some watchdog devices
> OBJS += watchdog.o
> OBJS += wdt_ib700.o wdt_i6300esb.o
> diff --git a/hw/ivshmem.c b/hw/ivshmem.c
> new file mode 100644
> index 0000000..95e2268
> --- /dev/null
> +++ b/hw/ivshmem.c
> @@ -0,0 +1,421 @@
> +/*
> + * Inter-VM Shared Memory PCI device.
> + *
> + * Author:
> + *      Cam Macdonell <cam@cs.ualberta.ca>
> + *
> + * Based On: cirrus_vga.c and rtl8139.c
> + *
> + * This code is licensed under the GNU GPL v2.
> + */
> +
> +#include "hw.h"
> +#include "console.h"
> +#include "pc.h"
> +#include "pci.h"
> +#include "sysemu.h"
> +
> +#include "qemu-common.h"
> +#include <sys/mman.h>
> +
> +#define PCI_COMMAND_IOACCESS                0x0001
> +#define PCI_COMMAND_MEMACCESS               0x0002
> +#define PCI_COMMAND_BUSMASTER               0x0004
> +
> +//#define DEBUG_IVSHMEM
> +
> +#ifdef DEBUG_IVSHMEM
> +#define IVSHMEM_DPRINTF(fmt, args...)        \
> +    do {printf("IVSHMEM: " fmt, ##args); } while (0)
> +#else
> +#define IVSHMEM_DPRINTF(fmt, args...)
> +#endif
> +
> +typedef struct IVShmemState {
> +    uint16_t intrmask;
> +    uint16_t intrstatus;
> +    uint16_t doorbell;
> +    uint8_t *ivshmem_ptr;
> +    unsigned long ivshmem_offset;
> +    unsigned int ivshmem_size;
> +    unsigned long bios_offset;
> +    unsigned int bios_size;
> +    target_phys_addr_t base_ctrl;
> +    int it_shift;
> +    PCIDevice *pci_dev;
> +    CharDriverState * chr;
> +    unsigned long map_addr;
> +    unsigned long map_end;
> +    int ivshmem_mmio_io_addr;
> +} IVShmemState;
> +
> +typedef struct PCI_IVShmemState {
> +    PCIDevice dev;
> +    IVShmemState ivshmem_state;
> +} PCI_IVShmemState;
> +
> +typedef struct IVShmemDesc {
> +    char name[1024];
> +    char * chrdev;
> +    int size;
> +} IVShmemDesc;
> +
> +
> +/* registers for the Inter-VM shared memory device */
> +enum ivshmem_registers {
> +    IntrMask = 0,
> +    IntrStatus = 16,
> +    Doorbell = 32
> +};
> +
> +static int num_ivshmem_devices = 0;
> +static IVShmemDesc ivshmem_desc;
> +
> +static void ivshmem_map(PCIDevice *pci_dev, int region_num,
> +                    uint32_t addr, uint32_t size, int type)
> +{
> +    PCI_IVShmemState *d = (PCI_IVShmemState *)pci_dev;
> +    IVShmemState *s = &d->ivshmem_state;
> +
> +    IVSHMEM_DPRINTF("addr = %u size = %u\n", addr, size);
> +    cpu_register_physical_memory(addr, s->ivshmem_size, s- 
> >ivshmem_offset);
> +
> +}
> +
> +void ivshmem_init(const char * optarg) {
> +
> +    char * temp;
> +    char * ivshmem_sz;
> +    int size;
> +
> +    num_ivshmem_devices++;
> +
> +    /* currently we only support 1 device */
> +    if (num_ivshmem_devices > MAX_IVSHMEM_DEVICES) {
> +        return;
> +    }
> +
> +    temp = strdup(optarg);
> +    snprintf(ivshmem_desc.name, 1024, "/%s", strsep(&temp,","));
> +    ivshmem_sz=strsep(&temp,",");
> +    if (ivshmem_sz != NULL){
> +        size = atol(ivshmem_sz);
> +    } else {
> +        size = -1;
> +    }
> +
> +    ivshmem_desc.chrdev = strsep(&temp,"\0");
> +
> +    if ( size == -1) {
> +        ivshmem_desc.size = TARGET_PAGE_SIZE;
> +    } else {
> +        ivshmem_desc.size = size*1024*1024;
> +    }
> +    IVSHMEM_DPRINTF("optarg is %s, name is %s, size is %d, chrdev  
> is %s\n",
> +                                        optarg, ivshmem_desc.name,
> +                                        ivshmem_desc.size,  
> ivshmem_desc.chrdev);
> +}
> +
> +int ivshmem_get_size(void) {
> +    return ivshmem_desc.size;
> +}
> +
> +/* accessing registers - based on rtl8139 */
> +static void ivshmem_update_irq(IVShmemState *s)
> +{
> +    int isr;
> +    isr = (s->intrstatus & s->intrmask) & 0xffff;
> +
> +    /* don't print ISR resets */
> +    if (isr) {
> +        IVSHMEM_DPRINTF("Set IRQ to %d (%04x %04x)\n",
> +           isr ? 1 : 0, s->intrstatus, s->intrmask);
> +    }
> +
> +    qemu_set_irq(s->pci_dev->irq[0], (isr != 0));
> +}
> +
> +static void ivshmem_mmio_map(PCIDevice *pci_dev, int region_num,
> +                       uint32_t addr, uint32_t size, int type)
> +{
> +    PCI_IVShmemState *d = (PCI_IVShmemState *)pci_dev;
> +    IVShmemState *s = &d->ivshmem_state;
> +
> +    cpu_register_physical_memory(addr + 0, 0x100, s- 
> >ivshmem_mmio_io_addr);
> +}
> +
> +static void ivshmem_IntrMask_write(IVShmemState *s, uint32_t val)
> +{
> +    IVSHMEM_DPRINTF("IntrMask write(w) val = 0x%04x\n", val);
> +
> +    s->intrmask = val;
> +
> +    ivshmem_update_irq(s);
> +}
> +
> +static uint32_t ivshmem_IntrMask_read(IVShmemState *s)
> +{
> +    uint32_t ret = s->intrmask;
> +
> +    IVSHMEM_DPRINTF("intrmask read(w) val = 0x%04x\n", ret);
> +
> +    return ret;
> +}
> +
> +static void ivshmem_IntrStatus_write(IVShmemState *s, uint32_t val)
> +{
> +    IVSHMEM_DPRINTF("IntrStatus write(w) val = 0x%04x\n", val);
> +
> +    s->intrstatus = val;
> +
> +    ivshmem_update_irq(s);
> +    return;
> +}
> +
> +static uint32_t ivshmem_IntrStatus_read(IVShmemState *s)
> +{
> +    uint32_t ret = s->intrstatus;
> +
> +    /* reading ISR clears all interrupts */
> +    s->intrstatus = 0;
> +
> +    ivshmem_update_irq(s);
> +
> +    return ret;
> +}
> +
> +static void ivshmem_io_writew(void *opaque, uint8_t addr, uint32_t  
> val)
> +{
> +    IVShmemState *s = opaque;
> +
> +    IVSHMEM_DPRINTF("writing 0x%x to 0x%lx\n", addr, (unsigned  
> long) opaque);
> +
> +    addr &= 0xfe;
> +
> +    switch (addr)
> +    {
> +        case IntrMask:
> +            ivshmem_IntrMask_write(s, val);
> +            break;
> +
> +        case IntrStatus:
> +            ivshmem_IntrStatus_write(s, val);
> +            break;
> +
> +        default:
> +            IVSHMEM_DPRINTF("why are we writing 0x%x\n", addr);
> +    }
> +}
> +
> +static void ivshmem_io_writel(void *opaque, uint8_t addr, uint32_t  
> val)
> +{
> +    IVSHMEM_DPRINTF("We shouldn't be writing longs\n");
> +}
> +
> +static void ivshmem_io_writeb(void *opaque, uint8_t addr, uint32_t  
> val)
> +{
> +    IVShmemState *s = opaque;
> +    uint8_t writebyte = val & 0xff; //write the lower 8-bits of 'val'
> +
> +    switch (addr)
> +    {   // in future, we will probably want to support more types  
> of doorbells
> +        case Doorbell:
> +            // wake up the other side
> +            qemu_chr_write(s->chr, &writebyte, 1);
> +            IVSHMEM_DPRINTF("Writing to the other side 0x%x\n",  
> writebyte);
> +            break;
> +        default:
> +            IVSHMEM_DPRINTF("Unhandled write (0x%x)\n", addr);
> +    }
> +}
> +
> +static uint32_t ivshmem_io_readw(void *opaque, uint8_t addr)
> +{
> +
> +    IVShmemState *s = opaque;
> +    uint32_t ret;
> +
> +    switch (addr)
> +    {
> +        case IntrMask:
> +            ret = ivshmem_IntrMask_read(s);
> +            break;
> +        case IntrStatus:
> +            ret = ivshmem_IntrStatus_read(s);
> +            break;
> +        default:
> +            IVSHMEM_DPRINTF("why are we reading 0x%x\n", addr);
> +            ret = 0;
> +    }
> +
> +    return ret;
> +}
> +
> +static uint32_t ivshmem_io_readl(void *opaque, uint8_t addr)
> +{
> +    IVSHMEM_DPRINTF("We shouldn't be reading longs\n");
> +    return 0;
> +}
> +
> +static uint32_t ivshmem_io_readb(void *opaque, uint8_t addr)
> +{
> +    IVSHMEM_DPRINTF("We shouldn't be reading bytes\n");
> +
> +    return 0;
> +}
> +
> +static void ivshmem_mmio_writeb(void *opaque,
> +                                target_phys_addr_t addr, uint32_t  
> val)
> +{
> +    ivshmem_io_writeb(opaque, addr & 0xFF, val);
> +}
> +
> +static void ivshmem_mmio_writew(void *opaque,
> +                                target_phys_addr_t addr, uint32_t  
> val)
> +{
> +    ivshmem_io_writew(opaque, addr & 0xFF, val);
> +}
> +
> +static void ivshmem_mmio_writel(void *opaque,
> +                                target_phys_addr_t addr, uint32_t  
> val)
> +{
> +    ivshmem_io_writel(opaque, addr & 0xFF, val);
> +}
> +
> +static uint32_t ivshmem_mmio_readb(void *opaque, target_phys_addr_t  
> addr)
> +{
> +    return ivshmem_io_readb(opaque, addr & 0xFF);
> +}
> +
> +static uint32_t ivshmem_mmio_readw(void *opaque, target_phys_addr_t  
> addr)
> +{
> +    uint32_t val = ivshmem_io_readw(opaque, addr & 0xFF);
> +    return val;
> +}
> +
> +static uint32_t ivshmem_mmio_readl(void *opaque, target_phys_addr_t  
> addr)
> +{
> +    uint32_t val = ivshmem_io_readl(opaque, addr & 0xFF);
> +    return val;
> +}
> +
> +static CPUReadMemoryFunc *ivshmem_mmio_read[3] = {
> +    ivshmem_mmio_readb,
> +    ivshmem_mmio_readw,
> +    ivshmem_mmio_readl,
> +};
> +
> +static CPUWriteMemoryFunc *ivshmem_mmio_write[3] = {
> +    ivshmem_mmio_writeb,
> +    ivshmem_mmio_writew,
> +    ivshmem_mmio_writel,
> +};
> +
> +static int ivshmem_can_receive(void * opaque)
> +{
> +    return 1;
> +}
> +
> +static void ivshmem_receive(void *opaque, const uint8_t *buf, int  
> size)
> +{
> +    IVShmemState *s = opaque;
> +
> +    ivshmem_IntrStatus_write(s, *buf);
> +
> +    IVSHMEM_DPRINTF("ivshmem_receive 0x%02x\n", *buf);
> +}
> +
> +static void ivshmem_event(void *opaque, int event)
> +{
> +    IVShmemState *s = opaque;
> +    IVSHMEM_DPRINTF("ivshmem_event %d\n", event);
> +}
> +
> +int pci_ivshmem_init(PCIBus *bus)
> +{
> +    PCI_IVShmemState *d;
> +    IVShmemState *s;
> +    uint8_t *pci_conf;
> +    int ivshmem_fd;
> +
> +    IVSHMEM_DPRINTF("shared file is %s\n", ivshmem_desc.name);
> +    d = (PCI_IVShmemState *)pci_register_device(bus, "kvm_ivshmem",
> +                                           sizeof(PCI_IVShmemState),
> +                                           -1, NULL, NULL);
> +    if (!d) {
> +        return -1;
> +    }
> +
> +    s = &d->ivshmem_state;
> +
> +    /* allocate shared memory RAM */
> +    s->ivshmem_offset = qemu_ram_alloc(ivshmem_desc.size);
> +    IVSHMEM_DPRINTF("size is = %d\n", ivshmem_desc.size);
> +    IVSHMEM_DPRINTF("ivshmem ram offset = %ld\n", s->ivshmem_offset);
> +
> +    s->ivshmem_ptr = qemu_get_ram_ptr(s->ivshmem_offset);
> +
> +    s->pci_dev = &d->dev;
> +    s->ivshmem_size = ivshmem_desc.size;
> +
> +    pci_conf = d->dev.config;
> +    pci_conf[0x00] = 0xf4; // Qumranet vendor ID 0x5002
> +    pci_conf[0x01] = 0x1a;
> +    pci_conf[0x02] = 0x10;
> +    pci_conf[0x03] = 0x11;
> +    pci_conf[0x04] = PCI_COMMAND_IOACCESS | PCI_COMMAND_MEMACCESS;
> +    pci_conf[0x0a] = 0x00; // RAM controller
> +    pci_conf[0x0b] = 0x05;
> +    pci_conf[0x0e] = 0x00; // header_type
> +
> +    pci_conf[PCI_INTERRUPT_PIN] = 1; // we are going to support  
> interrupts
> +
> +    /* XXX: ivshmem_desc.size must be a power of two */
> +
> +    s->ivshmem_mmio_io_addr = cpu_register_io_memory(0,  
> ivshmem_mmio_read,
> +                                    ivshmem_mmio_write, s);
> +
> +    /* region for registers*/
> +    pci_register_io_region(&d->dev, 0, 0x100,
> +                           PCI_ADDRESS_SPACE_MEM, ivshmem_mmio_map);
> +
> +    /* region for shared memory */
> +    pci_register_io_region(&d->dev, 1, ivshmem_desc.size,
> +                           PCI_ADDRESS_SPACE_MEM, ivshmem_map);
> +
> +    /* open shared memory file  */
> +    if ((ivshmem_fd = shm_open(ivshmem_desc.name, O_CREAT|O_RDWR,  
> S_IRWXU)) < 0)
> +    {
> +        fprintf(stderr, "kvm_ivshmem: could not open shared file\n");
> +        exit(-1);
> +    }
> +
> +    ftruncate(ivshmem_fd, ivshmem_desc.size);
> +
> +    /* mmap onto PCI device's memory */
> +    if (mmap(s->ivshmem_ptr, ivshmem_desc.size, PROT_READ|PROT_WRITE,
> +                        MAP_SHARED|MAP_FIXED, ivshmem_fd, 0) ==  
> MAP_FAILED)
> +    {
> +        fprintf(stderr, "kvm_ivshmem: could not mmap shared file\n");
> +        exit(-1);
> +    }
> +
> +    IVSHMEM_DPRINTF("shared object mapped to 0x%p\n", s- 
> >ivshmem_ptr);
> +
> +    /* setup character device channel */
> +
> +    if (ivshmem_desc.chrdev != NULL) {
> +        char label[32];
> +        snprintf(label, 32, "ivshmem_chardev");
> +        s->chr = qemu_chr_open(label, ivshmem_desc.chrdev, NULL);
> +        if (s->chr == NULL) {
> +            fprintf(stderr, "No server listening on %s\n",  
> ivshmem_desc.chrdev);
> +            exit(-1);
> +        }
> +        qemu_chr_add_handlers(s->chr, ivshmem_can_receive,  
> ivshmem_receive,
> +                          ivshmem_event, s);
> +    }
> +
> +    return 0;
> +}
> +
> diff --git a/hw/pc.c b/hw/pc.c
> index 34a4d25..7d0cff2 100644
> --- a/hw/pc.c
> +++ b/hw/pc.c
> @@ -67,6 +67,8 @@ static PITState *pit;
> static IOAPICState *ioapic;
> static PCIDevice *i440fx_state;
>
> +extern int ivshmem_enabled;
> +
> static void ioport80_write(void *opaque, uint32_t addr, uint32_t data)
> {
> }
> @@ -1040,6 +1042,10 @@ static void pc_init1(ram_addr_t ram_size, int  
> vga_ram_size,
>         }
>     }
>
> +    if (pci_enabled && ivshmem_enabled) {
> +        pci_ivshmem_init(pci_bus);
> +    }
> +
>     rtc_state = rtc_init(0x70, i8259[8], 2000);
>
>     qemu_register_boot_set(pc_boot_set, rtc_state);
> diff --git a/hw/pc.h b/hw/pc.h
> index 885c918..0ae0493 100644
> --- a/hw/pc.h
> +++ b/hw/pc.h
> @@ -185,4 +185,7 @@ void isa_ne2000_init(int base, qemu_irq irq,  
> NICInfo *nd);
>
> void extboot_init(BlockDriverState *bs, int cmd);
>
> +/* ivshmem.c */
> +int pci_ivshmem_init(PCIBus *bus);
> +
> #endif
> diff --git a/qemu-options.hx b/qemu-options.hx
> index 173f458..9ab3e2d 100644
> --- a/qemu-options.hx
> +++ b/qemu-options.hx
> @@ -1243,6 +1243,20 @@ The default device is @code{vc} in graphical  
> mode and @code{stdio} in
> non graphical mode.
> ETEXI
>
> +DEF("ivshmem", HAS_ARG, QEMU_OPTION_ivshmem, \
> +    "-ivshmem name,size[,unix:path][,server]  creates or opens a  
> shared file 'name' of size \
> +    'size' (in MB) and exposes it as a PCI device in the guest\n")
> +STEXI
> +@item -ivshmem @var{file},@var{size}
> +Creates a POSIX shared file named @var{file} of size @var{size} and  
> creates a
> +PCI device of the same size that maps the shared file into the  
> device for guests
> +to access.  The created file on the host is located in /dev/shm/
> +
> +@item unix:@var{path}[,server]
> +A unix domain socket is used to send and receive interrupts between  
> VMs.  The unix domain socket
> +@var{path} is used for connections.
> +ETEXI
> +
> DEF("pidfile", HAS_ARG, QEMU_OPTION_pidfile, \
>     "-pidfile file   write PID to 'file'\n")
> STEXI
> diff --git a/sysemu.h b/sysemu.h
> index 1f45fd6..862b79e 100644
> --- a/sysemu.h
> +++ b/sysemu.h
> @@ -217,6 +217,14 @@ extern CharDriverState  
> *parallel_hds[MAX_PARALLEL_PORTS];
>
> extern CharDriverState *virtcon_hds[MAX_VIRTIO_CONSOLES];
>
> +/* inter-VM shared memory devices */
> +
> +#define MAX_IVSHMEM_DEVICES 1
> +
> +extern CharDriverState * ivshmem_chardev;
> +void ivshmem_init(const char * optarg);
> +int ivshmem_get_size(void);
> +
> #define TFR(expr) do { if ((expr) != -1) break; } while (errno ==  
> EINTR)
>
> #ifdef NEED_CPU_H
> diff --git a/vl.c b/vl.c
> index 0420634..7260fa1 100644
> --- a/vl.c
> +++ b/vl.c
> @@ -221,6 +221,7 @@ static int rtc_date_offset = -1; /* -1 means no  
> change */
> int cirrus_vga_enabled = 1;
> int std_vga_enabled = 0;
> int vmsvga_enabled = 0;
> +int ivshmem_enabled = 0;
> int xenfb_enabled = 0;
> #ifdef TARGET_SPARC
> int graphic_width = 1024;
> @@ -239,6 +240,8 @@ int no_quit = 0;
> CharDriverState *serial_hds[MAX_SERIAL_PORTS];
> CharDriverState *parallel_hds[MAX_PARALLEL_PORTS];
> CharDriverState *virtcon_hds[MAX_VIRTIO_CONSOLES];
> +CharDriverState *ivshmem_chardev;
> +const char * ivshmem_device;
> #ifdef TARGET_I386
> int win2k_install_hack = 0;
> int rtc_td_hack = 0;
> @@ -5063,6 +5066,8 @@ int main(int argc, char **argv, char **envp)
>     cyls = heads = secs = 0;
>     translation = BIOS_ATA_TRANSLATION_AUTO;
>     monitor_device = "vc:80Cx24C";
> +    ivshmem_device = NULL;
> +    ivshmem_chardev = NULL;
>
>     serial_devices[0] = "vc:80Cx24C";
>     for(i = 1; i < MAX_SERIAL_PORTS; i++)
> @@ -5518,6 +5523,10 @@ int main(int argc, char **argv, char **envp)
>                 parallel_devices[parallel_device_index] = optarg;
>                 parallel_device_index++;
>                 break;
> +            case QEMU_OPTION_ivshmem:
> +                ivshmem_device = optarg;
> +                ivshmem_enabled = 1;
> +                break;
>            case QEMU_OPTION_loadvm:
>                loadvm = optarg;
>                break;
> @@ -5984,6 +5993,11 @@ int main(int argc, char **argv, char **envp)
>            }
>     }
>
> +    if (ivshmem_enabled) {
> +        ivshmem_init(ivshmem_device);
> +        ram_size += ivshmem_get_size();
> +    }
> +
> #ifdef CONFIG_KQEMU
>     /* FIXME: This is a nasty hack because kqemu can't cope with  
> dynamic
>        guest ram allocation.  It needs to go away.  */
> --
> 1.6.0.6
>
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html



-----------------------------------------------
A. Cameron Macdonell
Ph.D. Student
Department of Computing Science
University of Alberta
cam@cs.ualberta.ca




^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2] Shared memory device with interrupt support
  2009-05-16  3:27   ` Cam Macdonell
@ 2009-05-17 21:39     ` Avi Kivity
  0 siblings, 0 replies; 21+ messages in thread
From: Avi Kivity @ 2009-05-17 21:39 UTC (permalink / raw)
  To: Cam Macdonell; +Cc: Kumar, Venkat, kvm

Cam Macdonell wrote:
>
> I don't think so.  With the mmap call, I specify MAP_FIXED which 
> requires that the memory in the shared memory object be mapped to the 
> address given in the first parameter (s->ivshmem_ptr).  If MAP_FIXED 
> is not specified then mmap would allocate the memory and map on to it, 
> but with MAP_FIXED it maps onto the already reserved space that 
> ivshmem_ptr points to and was allocated with qemu_ram_alloc().

It might be nice to have a variant of qemu_ram_alloc() that takes a 
pointer to existing memory, so we don't have to play these MAP_FIXED games.

-- 
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2] Shared memory device with interrupt support
  2009-05-07 16:16 [PATCH v2] Shared memory device with interrupt support Cam Macdonell
  2009-05-16  2:45 ` Kumar, Venkat
@ 2009-05-18 18:54 ` Anthony Liguori
  2009-05-19  4:31   ` Avi Kivity
  1 sibling, 1 reply; 21+ messages in thread
From: Anthony Liguori @ 2009-05-18 18:54 UTC (permalink / raw)
  To: Cam Macdonell; +Cc: kvm, Avi Kivity

Hi Cam,

Cam Macdonell wrote:
>     Support an inter-vm shared memory device that maps a shared-memory object as a PCI device in the guest.  This patch also supports interrupts between guest by communicating over a unix domain socket.  This patch applies to the qemu-kvm repository. 
>
> This device now creates a qemu character device and sends 1-bytes messages to trigger interrupts.  Writes are trigger by writing to the "Doorbell" register on the shared memory PCI device.  The lower 8-bits of the value written to this register are sent as the 1-byte message so different meanings of interrupts can be supported.
>
> Interrupts are only supported between 2 VMs currently.  One VM must act as the server by adding "server" to the command-line argument.  Shared memory devices are created with the following command-line:
>
> -ivhshmem <shm object>,<size in MB>,[unix:<path>][,server] 
>
> Interrupts can also be used between host and guest as well by implementing a listener on the host.
>
> Cam
>   

I'd strongly recommend working these patches on qemu-devel and lkml.  I 
suspect Avi may disagree with me, but in order for this to be eventually 
merged in either place, you're going to have additional requirements put 
on you.

If it goes in via qemu-kvm.git, there's a possibility that you'll be 
forced into an ABI break down the road (consider the old hypercall and 
balloon drivers).

Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2] Shared memory device with interrupt support
  2009-05-18 18:54 ` Anthony Liguori
@ 2009-05-19  4:31   ` Avi Kivity
  2009-05-19 18:31     ` Anthony Liguori
  0 siblings, 1 reply; 21+ messages in thread
From: Avi Kivity @ 2009-05-19  4:31 UTC (permalink / raw)
  To: Anthony Liguori; +Cc: Cam Macdonell, kvm

Anthony Liguori wrote:
> I'd strongly recommend working these patches on qemu-devel and lkml.  
> I suspect Avi may disagree with me, but in order for this to be 
> eventually merged in either place, you're going to have additional 
> requirements put on you.

I don't disagree with the fact that there will be additional 
requirements, but I might disagree with some of those additional 
requirements themselves.  In particular I think your proposal was 
unimplementable; I would like to see how how you can address my concerns.

I don't think bulk memory sharing and the current transactional virtio 
mechanisms are a good fit for each other; but if we were to add a 
BAR-like capability to virtio that would address the compatibility 
requirement (though it might be difficult to implement on s390 with its 
requirement on contiguous host virtual address space).

A model which does fit the current virtio capabilities is that of a DMA 
engine - guest A specifies an sglist to copy; guest B specifies an 
sglist to receive the copy; the host does the copy, using a real DMA 
engine if available.  Note A == B is a possibility, and is a way to 
expose a DMA engine to a single guest for its own use in moving memory 
around.

I think both models could prove useful.

> If it goes in via qemu-kvm.git, there's a possibility that you'll be 
> forced into an ABI break down the road (consider the old hypercall and 
> balloon drivers).

I agree this is best merged upstream first.

-- 
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2] Shared memory device with interrupt support
  2009-05-19  4:31   ` Avi Kivity
@ 2009-05-19 18:31     ` Anthony Liguori
  2009-05-20  9:01       ` Avi Kivity
  0 siblings, 1 reply; 21+ messages in thread
From: Anthony Liguori @ 2009-05-19 18:31 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Cam Macdonell, kvm

Avi Kivity wrote:
> Anthony Liguori wrote:
>> I'd strongly recommend working these patches on qemu-devel and lkml.  
>> I suspect Avi may disagree with me, but in order for this to be 
>> eventually merged in either place, you're going to have additional 
>> requirements put on you.
>
> I don't disagree with the fact that there will be additional 
> requirements, but I might disagree with some of those additional 
> requirements themselves.

It actually works out better than I think you expect it to...

We can't use mmap() directly.  With the new RAM allocation scheme, I 
think it's pretty reasonable to now allow portions of ram to come from 
files that get mmap() (sort of like -mem-path).

This RAM area could be setup as a BAR.

>   In particular I think your proposal was unimplementable; I would 
> like to see how how you can address my concerns.

I don't remember what my proposal was to be perfectly honest :-)  I 
think I suggested registering a guest allocated portion of memory as a 
sharable region via virtio?  Why is that unimplementable?

> I don't think bulk memory sharing and the current transactional virtio 
> mechanisms are a good fit for each other; but if we were to add a 
> BAR-like capability to virtio that would address the compatibility 
> requirement (though it might be difficult to implement on s390 with 
> its requirement on contiguous host virtual address space).

It doesn't necessarily have to be virtio if that's not what makes sense.

The QEMU bits and the device model bits are actually relatively simple.  
The part that I think needs more deep thought is the guest-visible 
interface.

A char device is probably not the best interface.  I think you want 
something like tmpfs/hugetlbfs.  Another question is whether you want a 
guest to be able to share a portion of it's memory with another guest or 
have everything setup by the host.

If everything is setup by the host, hot plug is important.

Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2] Shared memory device with interrupt support
  2009-05-19 18:31     ` Anthony Liguori
@ 2009-05-20  9:01       ` Avi Kivity
  2009-05-20 13:45         ` Anthony Liguori
  0 siblings, 1 reply; 21+ messages in thread
From: Avi Kivity @ 2009-05-20  9:01 UTC (permalink / raw)
  To: Anthony Liguori; +Cc: Cam Macdonell, kvm

Anthony Liguori wrote:
> Avi Kivity wrote:
>> Anthony Liguori wrote:
>>> I'd strongly recommend working these patches on qemu-devel and 
>>> lkml.  I suspect Avi may disagree with me, but in order for this to 
>>> be eventually merged in either place, you're going to have 
>>> additional requirements put on you.
>>
>> I don't disagree with the fact that there will be additional 
>> requirements, but I might disagree with some of those additional 
>> requirements themselves.
>
> It actually works out better than I think you expect it to...

Can you explain why?  You haven't addressed my concerns the last time 
around.

>
> We can't use mmap() directly.  With the new RAM allocation scheme, I 
> think it's pretty reasonable to now allow portions of ram to come from 
> files that get mmap() (sort of like -mem-path).
>
> This RAM area could be setup as a BAR.

That's what Cam's patch does, and what you objected to.

>
>>   In particular I think your proposal was unimplementable; I would 
>> like to see how how you can address my concerns.
>
> I don't remember what my proposal was to be perfectly honest :-)  I 
> think I suggested registering a guest allocated portion of memory as a 
> sharable region via virtio?  

Yes.

> Why is that unimplementable?

Bad choice of words - it's implementable, just not very usable.  You 
can't share 1GB in a 256MB guest, will fragment host vmas, no guarantee 
the guest can actually allocate all that memory, doesn't work with large 
pages, what happens on freeing, etc.

>> I don't think bulk memory sharing and the current transactional 
>> virtio mechanisms are a good fit for each other; but if we were to 
>> add a BAR-like capability to virtio that would address the 
>> compatibility requirement (though it might be difficult to implement 
>> on s390 with its requirement on contiguous host virtual address space).
>
> It doesn't necessarily have to be virtio if that's not what makes sense.

The problem is not virtio, it's the transient scatter gather dma model 
that virtio supports.  If virtio were to support BARs like Christian's 
patch proposes, then it could be easily done with virtio.

Maybe we should call it something else though to avoid confusion.

>
> The QEMU bits and the device model bits are actually relatively 
> simple.  The part that I think needs more deep thought is the 
> guest-visible interface.
>
> A char device is probably not the best interface.  I think you want 
> something like tmpfs/hugetlbfs.  

Yes those are so wonderful to work with.

> Another question is whether you want a guest to be able to share a 
> portion of it's memory with another guest or have everything setup by 
> the host.
>

I think we want host setup.  That way you have symmetry among the guests.

> If everything is setup by the host, hot plug is important.

It is.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2] Shared memory device with interrupt support
  2009-05-20  9:01       ` Avi Kivity
@ 2009-05-20 13:45         ` Anthony Liguori
  2009-05-20 14:26           ` Avi Kivity
  0 siblings, 1 reply; 21+ messages in thread
From: Anthony Liguori @ 2009-05-20 13:45 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Cam Macdonell, kvm

Avi Kivity wrote:
> Anthony Liguori wrote:
>> Avi Kivity wrote:
>>> Anthony Liguori wrote:
>>>> I'd strongly recommend working these patches on qemu-devel and 
>>>> lkml.  I suspect Avi may disagree with me, but in order for this to 
>>>> be eventually merged in either place, you're going to have 
>>>> additional requirements put on you.
>>>
>>> I don't disagree with the fact that there will be additional 
>>> requirements, but I might disagree with some of those additional 
>>> requirements themselves.
>>
>> It actually works out better than I think you expect it to...
>
> Can you explain why?  You haven't addressed my concerns the last time 
> around.

Because of the qemu_ram_alloc() patches.  We no longer have a contiguous 
phys_ram_base so we don't have to deal with mmap(MAP_FIXED).  We can 
also more practically do memory hot-add which is more or less a 
requirement of this work.

It also means we could do shared memory through more traditional means 
too like sys v ipc or whatever is the native mechanism on the underlying 
platform.  That means we could even support Win32 (although I wouldn't 
make that an initial requirement).

>>
>> We can't use mmap() directly.  With the new RAM allocation scheme, I 
>> think it's pretty reasonable to now allow portions of ram to come 
>> from files that get mmap() (sort of like -mem-path).
>>
>> This RAM area could be setup as a BAR.
>
> That's what Cam's patch does, and what you objected to.

I'm flexible.  BARs are pretty unattractive because of the size 
requirements.

The actual transport implementation is the least important part though 
IMHO.  The guest interface and how it's implemented within QEMU is much 
more important to get right the first time.

>> Why is that unimplementable?
>
> Bad choice of words - it's implementable, just not very usable.  You 
> can't share 1GB in a 256MB guest, will fragment host vmas, no 
> guarantee the guest can actually allocate all that memory, doesn't 
> work with large pages, what happens on freeing, etc.

You can share 1GB with a PCI BAR today.  You're limited to 32-bit 
addresses which admittedly we could fix.

Any reason to bother with BARs instead of just picking unused physical 
addresses?  Does Windows do anything special with BAR addresses?

>>
>> The QEMU bits and the device model bits are actually relatively 
>> simple.  The part that I think needs more deep thought is the 
>> guest-visible interface.
>>
>> A char device is probably not the best interface.  I think you want 
>> something like tmpfs/hugetlbfs.  
>
> Yes those are so wonderful to work with.

qemu -ivshmem 
file=/dev/shm/ring.shared,name=shared-ring,size=1G,notify=/path/to/socket

/path/to/socket is used to pass an eventfd

Within the guest, you'd have:

/dev/ivshmemfs/shared-ring

An app would mmap() that file, and then could do something like an 
ioctl() to get an eventfd.

Alternatively, you could have something like:

/dev/ivshmemfs/mem/shared-ring
/dev/ivshmemfs/notify/shared-ring

Where notify/shared-ring behaves like an eventfd().

Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2] Shared memory device with interrupt support
  2009-05-20 13:45         ` Anthony Liguori
@ 2009-05-20 14:26           ` Avi Kivity
  0 siblings, 0 replies; 21+ messages in thread
From: Avi Kivity @ 2009-05-20 14:26 UTC (permalink / raw)
  To: Anthony Liguori; +Cc: Cam Macdonell, kvm

Anthony Liguori wrote:
>>>>> I'd strongly recommend working these patches on qemu-devel and 
>>>>> lkml.  I suspect Avi may disagree with me, but in order for this 
>>>>> to be eventually merged in either place, you're going to have 
>>>>> additional requirements put on you.
>>>>
>>>> I don't disagree with the fact that there will be additional 
>>>> requirements, but I might disagree with some of those additional 
>>>> requirements themselves.
>>>
>>> It actually works out better than I think you expect it to...
>>
>> Can you explain why?  You haven't addressed my concerns the last time 
>> around.
>
> Because of the qemu_ram_alloc() patches.  We no longer have a 
> contiguous phys_ram_base so we don't have to deal with 
> mmap(MAP_FIXED).  We can also more practically do memory hot-add which 
> is more or less a requirement of this work.

I think you're arguing my side.  If the guest specifies the memory to be 
shared via an add_buf() sglist allocated from its free memory, you have 
to use MAP_FIXED (since the gpa->hva mapping is already fixed for guest 
memory).  If it's provided as a BAR or equivalent, we can use a variant 
of qemu_ram_alloc() which binds to the shared segment instead of allocating.

> It also means we could do shared memory through more traditional means 
> too like sys v ipc or whatever is the native mechanism on the 
> underlying platform.  That means we could even support Win32 (although 
> I wouldn't make that an initial requirement).

Not with add_buf() memory...

>>> We can't use mmap() directly.  With the new RAM allocation scheme, I 
>>> think it's pretty reasonable to now allow portions of ram to come 
>>> from files that get mmap() (sort of like -mem-path).
>>>
>>> This RAM area could be setup as a BAR.
>>
>> That's what Cam's patch does, and what you objected to.
>
> I'm flexible.  BARs are pretty unattractive because of the size 
> requirements.

What size requirements?  The PCI memory hole?  Those requirements are 
easily lifted.

> The actual transport implementation is the least important part though 
> IMHO.  The guest interface and how it's implemented within QEMU is 
> much more important to get right the first time.

I agree, with much more emphasis on the guest/host interface.

>>> Why is that unimplementable?
>>
>> Bad choice of words - it's implementable, just not very usable.  You 
>> can't share 1GB in a 256MB guest, will fragment host vmas, no 
>> guarantee the guest can actually allocate all that memory, doesn't 
>> work with large pages, what happens on freeing, etc.
>
> You can share 1GB with a PCI BAR today.  You're limited to 32-bit 
> addresses which admittedly we could fix.
>
> Any reason to bother with BARs instead of just picking unused physical 
> addresses?  Does Windows do anything special with BAR addresses?

If you use a BAR you let the host kernel know what you're doing.  No 
doubt you could do the same thing yourself (the PCI support functions 
call the raw support functions), but if you use a BAR, everything from 
the BIOS onwards is plumbed down.

Sure we could do something independent a la vbus, but my preference has 
always been to behave like real hardware.

Oh, and if it's a BAR you can use device assignment.  You can't assign a 
device that exposes memory the host doesn't know about.

>>>
>>> The QEMU bits and the device model bits are actually relatively 
>>> simple.  The part that I think needs more deep thought is the 
>>> guest-visible interface.
>>>
>>> A char device is probably not the best interface.  I think you want 
>>> something like tmpfs/hugetlbfs.  
>>
>> Yes those are so wonderful to work with.
>
> qemu -ivshmem 
> file=/dev/shm/ring.shared,name=shared-ring,size=1G,notify=/path/to/socket
>
> /path/to/socket is used to pass an eventfd
>
> Within the guest, you'd have:
>
> /dev/ivshmemfs/shared-ring
>
> An app would mmap() that file, and then could do something like an 
> ioctl() to get an eventfd.
>
> Alternatively, you could have something like:
>
> /dev/ivshmemfs/mem/shared-ring
> /dev/ivshmemfs/notify/shared-ring
>
> Where notify/shared-ring behaves like an eventfd().

Being the traditionalist that I am, I'd much prefer it to be a char 
device and use udev rules to get a meaningful name if needed.  That's 
how every other real device works.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2] Shared memory device with interrupt support
  2009-05-19 11:20         ` Jayaraman, Bhaskar
@ 2009-05-19 11:35           ` Gregory Haskins
  0 siblings, 0 replies; 21+ messages in thread
From: Gregory Haskins @ 2009-05-19 11:35 UTC (permalink / raw)
  To: Jayaraman, Bhaskar; +Cc: Kumar, Venkat, Cam Macdonell, kvm@vger.kernel.org list

[-- Attachment #1: Type: text/plain, Size: 616 bytes --]

Jayaraman, Bhaskar wrote:
> Cam, is it somehow possible to generate a local APIC interrupt from one VM to another? I guess it shouldn't be as the LAPIC interrupts generated in one VM will go to the VCPUs of the same VM...
> Regards,
> Bhaskar.
>   

The closest thing to this is the irqfd+iosignalfd thing I mentioned the
other day.  With this model, a PIO/MMIO write in the src guest will
directly inject an interrupt into the dst guest's LAPIC.  However, as
Avi points out, this is just an optimization.  You can also do it by
first taking a hop through each guests userspace as well.

HTH
-Greg



[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 266 bytes --]

^ permalink raw reply	[flat|nested] 21+ messages in thread

* RE: [PATCH v2] Shared memory device with interrupt support
  2009-05-19  3:52       ` Kumar, Venkat
@ 2009-05-19 11:20         ` Jayaraman, Bhaskar
  2009-05-19 11:35           ` Gregory Haskins
  0 siblings, 1 reply; 21+ messages in thread
From: Jayaraman, Bhaskar @ 2009-05-19 11:20 UTC (permalink / raw)
  To: Kumar, Venkat, Cam Macdonell; +Cc: kvm@vger.kernel.org list

Cam, is it somehow possible to generate a local APIC interrupt from one VM to another? I guess it shouldn't be as the LAPIC interrupts generated in one VM will go to the VCPUs of the same VM...
Regards,
Bhaskar.

-----Original Message-----
From: kvm-owner@vger.kernel.org [mailto:kvm-owner@vger.kernel.org] On Behalf Of Kumar, Venkat
Sent: Tuesday, May 19, 2009 9:22 AM
To: Cam Macdonell
Cc: kvm@vger.kernel.org list
Subject: RE: [PATCH v2] Shared memory device with interrupt support

I had tried all syntaxes other than this :).
Interrupts work now.

Thx,

Venkat

-----Original Message-----
From: Cam Macdonell [mailto:cam@cs.ualberta.ca]
Sent: Monday, May 18, 2009 9:51 PM
To: Kumar, Venkat
Cc: kvm@vger.kernel.org list
Subject: Re: [PATCH v2] Shared memory device with interrupt support

Kumar, Venkat wrote:
> Cam - I got your patch to work but without notifications. I could share memory using the patch but notifications aren't working.
>
> I bring up two VM's with option "-ivshmem shrmem,1024,/dev/shm/shrmem,server" and "-ivshmem shrmem,1024,/dev/shm/shrmem" respectively.

Ok, I guess I need to do more error checking of arguments :)  You need
to specify "unix:" on the path.  So your options should look like this

"-ivshmem shrmem,1024,unix:/dev/shm/shrmem,server"

"-ivshmem shrmem,1024,unix:/dev/shm/shrmem"

That should help.

Cam

>
> When I make an "ioctl" from one of the VM's to inject an interrupt to the other VM, I get an error in "qemu_chr_write" and return value is "-1". "write" call in "send_all" is failing with return value "-1".
>
> Am I missing something here?
>
> Thx,
>
> Venkat
>
>
> -----Original Message-----
> From: Cam Macdonell [mailto:cam@cs.ualberta.ca]
> Sent: Saturday, May 16, 2009 9:01 AM
> To: Kumar, Venkat
> Cc: kvm@vger.kernel.org list
> Subject: Re: [PATCH v2] Shared memory device with interrupt support
>
>
> On 15-May-09, at 8:54 PM, Kumar, Venkat wrote:
>
>> Cam,
>>
>> A questions on interrupts as well.
>> What is "unix:path" that needs to be passed in the argument list?
>> Can it be any string?
>
> It has to be a valid path on the host.  It will create a unix domain
> socket on that path.
>
>> If my understanding is correct both the VM's who wants to
>> communicate would gives this path in the command line with one of
>> them specifying as "server".
>
> Exactly, the one with the "server" in the parameter list will wait for
> a connection before booting.
>
> Cam
>
>> Thx,
>> Venkat
>>
>>
>>
>>
>>
>>
>>    Support an inter-vm shared memory device that maps a shared-
>> memory object
>> as a PCI device in the guest.  This patch also supports interrupts
>> between
>> guest by communicating over a unix domain socket.  This patch
>> applies to the
>> qemu-kvm repository.
>>
>> This device now creates a qemu character device and sends 1-bytes
>> messages to
>> trigger interrupts.  Writes are trigger by writing to the "Doorbell"
>> register
>> on the shared memory PCI device.  The lower 8-bits of the value
>> written to this
>> register are sent as the 1-byte message so different meanings of
>> interrupts can
>> be supported.
>>
>> Interrupts are only supported between 2 VMs currently.  One VM must
>> act as the
>> server by adding "server" to the command-line argument.  Shared
>> memory devices
>> are created with the following command-line:
>>
>> -ivhshmem <shm object>,<size in MB>,[unix:<path>][,server]
>>
>> Interrupts can also be used between host and guest as well by
>> implementing a
>> listener on the host.
>>
>> Cam
>>
>> ---
>> Makefile.target |    3 +
>> hw/ivshmem.c    |  421 ++++++++++++++++++++++++++++++++++++++++++++++
>> +++++++++
>> hw/pc.c         |    6 +
>> hw/pc.h         |    3 +
>> qemu-options.hx |   14 ++
>> sysemu.h        |    8 +
>> vl.c            |   14 ++
>> 7 files changed, 469 insertions(+), 0 deletions(-)
>> create mode 100644 hw/ivshmem.c
>>
>> diff --git a/Makefile.target b/Makefile.target
>> index b68a689..3190bba 100644
>> --- a/Makefile.target
>> +++ b/Makefile.target
>> @@ -643,6 +643,9 @@ OBJS += pcnet.o
>> OBJS += rtl8139.o
>> OBJS += e1000.o
>>
>> +# Inter-VM PCI shared memory
>> +OBJS += ivshmem.o
>> +
>> # Generic watchdog support and some watchdog devices
>> OBJS += watchdog.o
>> OBJS += wdt_ib700.o wdt_i6300esb.o
>> diff --git a/hw/ivshmem.c b/hw/ivshmem.c
>> new file mode 100644
>> index 0000000..95e2268
>> --- /dev/null
>> +++ b/hw/ivshmem.c
>> @@ -0,0 +1,421 @@
>> +/*
>> + * Inter-VM Shared Memory PCI device.
>> + *
>> + * Author:
>> + *      Cam Macdonell <c...@cs.ualberta.ca>
>> + *
>> + * Based On: cirrus_vga.c and rtl8139.c
>> + *
>> + * This code is licensed under the GNU GPL v2.
>> + */
>> +
>> +#include "hw.h"
>> +#include "console.h"
>> +#include "pc.h"
>> +#include "pci.h"
>> +#include "sysemu.h"
>> +
>> +#include "qemu-common.h"
>> +#include <sys/mman.h>
>> +
>> +#define PCI_COMMAND_IOACCESS                0x0001
>> +#define PCI_COMMAND_MEMACCESS               0x0002
>> +#define PCI_COMMAND_BUSMASTER               0x0004
>> +
>> +//#define DEBUG_IVSHMEM
>> +
>> +#ifdef DEBUG_IVSHMEM
>> +#define IVSHMEM_DPRINTF(fmt, args...)        \
>> +    do {printf("IVSHMEM: " fmt, ##args); } while (0)
>> +#else
>> +#define IVSHMEM_DPRINTF(fmt, args...)
>> +#endif
>> +
>> +typedef struct IVShmemState {
>> +    uint16_t intrmask;
>> +    uint16_t intrstatus;
>> +    uint16_t doorbell;
>> +    uint8_t *ivshmem_ptr;
>> +    unsigned long ivshmem_offset;
>> +    unsigned int ivshmem_size;
>> +    unsigned long bios_offset;
>> +    unsigned int bios_size;
>> +    target_phys_addr_t base_ctrl;
>> +    int it_shift;
>> +    PCIDevice *pci_dev;
>> +    CharDriverState * chr;
>> +    unsigned long map_addr;
>> +    unsigned long map_end;
>> +    int ivshmem_mmio_io_addr;
>> +} IVShmemState;
>> +
>> +typedef struct PCI_IVShmemState {
>> +    PCIDevice dev;
>> +    IVShmemState ivshmem_state;
>> +} PCI_IVShmemState;
>> +
>> +typedef struct IVShmemDesc {
>> +    char name[1024];
>> +    char * chrdev;
>> +    int size;
>> +} IVShmemDesc;
>> +
>> +
>> +/* registers for the Inter-VM shared memory device */
>> +enum ivshmem_registers {
>> +    IntrMask = 0,
>> +    IntrStatus = 16,
>> +    Doorbell = 32
>> +};
>> +
>> +static int num_ivshmem_devices = 0;
>> +static IVShmemDesc ivshmem_desc;
>> +
>> +static void ivshmem_map(PCIDevice *pci_dev, int region_num,
>> +                    uint32_t addr, uint32_t size, int type)
>> +{
>> +    PCI_IVShmemState *d = (PCI_IVShmemState *)pci_dev;
>> +    IVShmemState *s = &d->ivshmem_state;
>> +
>> +    IVSHMEM_DPRINTF("addr = %u size = %u\n", addr, size);
>> +    cpu_register_physical_memory(addr, s->ivshmem_size, s-
>>> ivshmem_offset);
>> +
>> +}
>> +
>> +void ivshmem_init(const char * optarg) {
>> +
>> +    char * temp;
>> +    char * ivshmem_sz;
>> +    int size;
>> +
>> +    num_ivshmem_devices++;
>> +
>> +    /* currently we only support 1 device */
>> +    if (num_ivshmem_devices > MAX_IVSHMEM_DEVICES) {
>> +        return;
>> +    }
>> +
>> +    temp = strdup(optarg);
>> +    snprintf(ivshmem_desc.name, 1024, "/%s", strsep(&temp,","));
>> +    ivshmem_sz=strsep(&temp,",");
>> +    if (ivshmem_sz != NULL){
>> +        size = atol(ivshmem_sz);
>> +    } else {
>> +        size = -1;
>> +    }
>> +
>> +    ivshmem_desc.chrdev = strsep(&temp,"\0");
>> +
>> +    if ( size == -1) {
>> +        ivshmem_desc.size = TARGET_PAGE_SIZE;
>> +    } else {
>> +        ivshmem_desc.size = size*1024*1024;
>> +    }
>> +    IVSHMEM_DPRINTF("optarg is %s, name is %s, size is %d, chrdev
>> is %s\n",
>> +                                        optarg, ivshmem_desc.name,
>> +                                        ivshmem_desc.size,
>> ivshmem_desc.chrdev);
>> +}
>> +
>> +int ivshmem_get_size(void) {
>> +    return ivshmem_desc.size;
>> +}
>> +
>> +/* accessing registers - based on rtl8139 */
>> +static void ivshmem_update_irq(IVShmemState *s)
>> +{
>> +    int isr;
>> +    isr = (s->intrstatus & s->intrmask) & 0xffff;
>> +
>> +    /* don't print ISR resets */
>> +    if (isr) {
>> +        IVSHMEM_DPRINTF("Set IRQ to %d (%04x %04x)\n",
>> +           isr ? 1 : 0, s->intrstatus, s->intrmask);
>> +    }
>> +
>> +    qemu_set_irq(s->pci_dev->irq[0], (isr != 0));
>> +}
>> +
>> +static void ivshmem_mmio_map(PCIDevice *pci_dev, int region_num,
>> +                       uint32_t addr, uint32_t size, int type)
>> +{
>> +    PCI_IVShmemState *d = (PCI_IVShmemState *)pci_dev;
>> +    IVShmemState *s = &d->ivshmem_state;
>> +
>> +    cpu_register_physical_memory(addr + 0, 0x100, s-
>>> ivshmem_mmio_io_addr);
>> +}
>> +
>> +static void ivshmem_IntrMask_write(IVShmemState *s, uint32_t val)
>> +{
>> +    IVSHMEM_DPRINTF("IntrMask write(w) val = 0x%04x\n", val);
>> +
>> +    s->intrmask = val;
>> +
>> +    ivshmem_update_irq(s);
>> +}
>> +
>> +static uint32_t ivshmem_IntrMask_read(IVShmemState *s)
>> +{
>> +    uint32_t ret = s->intrmask;
>> +
>> +    IVSHMEM_DPRINTF("intrmask read(w) val = 0x%04x\n", ret);
>> +
>> +    return ret;
>> +}
>> +
>> +static void ivshmem_IntrStatus_write(IVShmemState *s, uint32_t val)
>> +{
>> +    IVSHMEM_DPRINTF("IntrStatus write(w) val = 0x%04x\n", val);
>> +
>> +    s->intrstatus = val;
>> +
>> +    ivshmem_update_irq(s);
>> +    return;
>> +}
>> +
>> +static uint32_t ivshmem_IntrStatus_read(IVShmemState *s)
>> +{
>> +    uint32_t ret = s->intrstatus;
>> +
>> +    /* reading ISR clears all interrupts */
>> +    s->intrstatus = 0;
>> +
>> +    ivshmem_update_irq(s);
>> +
>> +    return ret;
>> +}
>> +
>> +static void ivshmem_io_writew(void *opaque, uint8_t addr, uint32_t
>> val)
>> +{
>> +    IVShmemState *s = opaque;
>> +
>> +    IVSHMEM_DPRINTF("writing 0x%x to 0x%lx\n", addr, (unsigned
>> long) opaque);
>> +
>> +    addr &= 0xfe;
>> +
>> +    switch (addr)
>> +    {
>> +        case IntrMask:
>> +            ivshmem_IntrMask_write(s, val);
>> +            break;
>> +
>> +        case IntrStatus:
>> +            ivshmem_IntrStatus_write(s, val);
>> +            break;
>> +
>> +        default:
>> +            IVSHMEM_DPRINTF("why are we writing 0x%x\n", addr);
>> +    }
>> +}
>> +
>> +static void ivshmem_io_writel(void *opaque, uint8_t addr, uint32_t
>> val)
>> +{
>> +    IVSHMEM_DPRINTF("We shouldn't be writing longs\n");
>> +}
>> +
>> +static void ivshmem_io_writeb(void *opaque, uint8_t addr, uint32_t
>> val)
>> +{
>> +    IVShmemState *s = opaque;
>> +    uint8_t writebyte = val & 0xff; //write the lower 8-bits of 'val'
>> +
>> +    switch (addr)
>> +    {   // in future, we will probably want to support more types
>> of doorbells
>> +        case Doorbell:
>> +            // wake up the other side
>> +            qemu_chr_write(s->chr, &writebyte, 1);
>> +            IVSHMEM_DPRINTF("Writing to the other side 0x%x\n",
>> writebyte);
>> +            break;
>> +        default:
>> +            IVSHMEM_DPRINTF("Unhandled write (0x%x)\n", addr);
>> +    }
>> +}
>> +
>> +static uint32_t ivshmem_io_readw(void *opaque, uint8_t addr)
>> +{
>> +
>> +    IVShmemState *s = opaque;
>> +    uint32_t ret;
>> +
>> +    switch (addr)
>> +    {
>> +        case IntrMask:
>> +            ret = ivshmem_IntrMask_read(s);
>> +            break;
>> +        case IntrStatus:
>> +            ret = ivshmem_IntrStatus_read(s);
>> +            break;
>> +        default:
>> +            IVSHMEM_DPRINTF("why are we reading 0x%x\n", addr);
>> +            ret = 0;
>> +    }
>> +
>> +    return ret;
>> +}
>> +
>> +static uint32_t ivshmem_io_readl(void *opaque, uint8_t addr)
>> +{
>> +    IVSHMEM_DPRINTF("We shouldn't be reading longs\n");
>> +    return 0;
>> +}
>> +
>> +static uint32_t ivshmem_io_readb(void *opaque, uint8_t addr)
>> +{
>> +    IVSHMEM_DPRINTF("We shouldn't be reading bytes\n");
>> +
>> +    return 0;
>> +}
>> +
>> +static void ivshmem_mmio_writeb(void *opaque,
>> +                                target_phys_addr_t addr, uint32_t
>> val)
>> +{
>> +    ivshmem_io_writeb(opaque, addr & 0xFF, val);
>> +}
>> +
>> +static void ivshmem_mmio_writew(void *opaque,
>> +                                target_phys_addr_t addr, uint32_t
>> val)
>> +{
>> +    ivshmem_io_writew(opaque, addr & 0xFF, val);
>> +}
>> +
>> +static void ivshmem_mmio_writel(void *opaque,
>> +                                target_phys_addr_t addr, uint32_t
>> val)
>> +{
>> +    ivshmem_io_writel(opaque, addr & 0xFF, val);
>> +}
>> +
>> +static uint32_t ivshmem_mmio_readb(void *opaque, target_phys_addr_t
>> addr)
>> +{
>> +    return ivshmem_io_readb(opaque, addr & 0xFF);
>> +}
>> +
>> +static uint32_t ivshmem_mmio_readw(void *opaque, target_phys_addr_t
>> addr)
>> +{
>> +    uint32_t val = ivshmem_io_readw(opaque, addr & 0xFF);
>> +    return val;
>> +}
>> +
>> +static uint32_t ivshmem_mmio_readl(void *opaque, target_phys_addr_t
>> addr)
>> +{
>> +    uint32_t val = ivshmem_io_readl(opaque, addr & 0xFF);
>> +    return val;
>> +}
>> +
>> +static CPUReadMemoryFunc *ivshmem_mmio_read[3] = {
>> +    ivshmem_mmio_readb,
>> +    ivshmem_mmio_readw,
>> +    ivshmem_mmio_readl,
>> +};
>> +
>> +static CPUWriteMemoryFunc *ivshmem_mmio_write[3] = {
>> +    ivshmem_mmio_writeb,
>> +    ivshmem_mmio_writew,
>> +    ivshmem_mmio_writel,
>> +};
>> +
>> +static int ivshmem_can_receive(void * opaque)
>> +{
>> +    return 1;
>> +}
>> +
>> +static void ivshmem_receive(void *opaque, const uint8_t *buf, int
>> size)
>> +{
>> +    IVShmemState *s = opaque;
>> +
>> +    ivshmem_IntrStatus_write(s, *buf);
>> +
>> +    IVSHMEM_DPRINTF("ivshmem_receive 0x%02x\n", *buf);
>> +}
>> +
>> +static void ivshmem_event(void *opaque, int event)
>> +{
>> +    IVShmemState *s = opaque;
>> +    IVSHMEM_DPRINTF("ivshmem_event %d\n", event);
>> +}
>> +
>> +int pci_ivshmem_init(PCIBus *bus)
>> +{
>> +    PCI_IVShmemState *d;
>> +    IVShmemState *s;
>> +    uint8_t *pci_conf;
>> +    int ivshmem_fd;
>> +
>> +    IVSHMEM_DPRINTF("shared file is %s\n", ivshmem_desc.name);
>> +    d = (PCI_IVShmemState *)pci_register_device(bus, "kvm_ivshmem",
>> +                                           sizeof(PCI_IVShmemState),
>> +                                           -1, NULL, NULL);
>> +    if (!d) {
>> +        return -1;
>> +    }
>> +
>> +    s = &d->ivshmem_state;
>> +
>> +    /* allocate shared memory RAM */
>> +    s->ivshmem_offset = qemu_ram_alloc(ivshmem_desc.size);
>> +    IVSHMEM_DPRINTF("size is = %d\n", ivshmem_desc.size);
>> +    IVSHMEM_DPRINTF("ivshmem ram offset = %ld\n", s->ivshmem_offset);
>> +
>> +    s->ivshmem_ptr = qemu_get_ram_ptr(s->ivshmem_offset);
>> +
>> +    s->pci_dev = &d->dev;
>> +    s->ivshmem_size = ivshmem_desc.size;
>> +
>> +    pci_conf = d->dev.config;
>> +    pci_conf[0x00] = 0xf4; // Qumranet vendor ID 0x5002
>> +    pci_conf[0x01] = 0x1a;
>> +    pci_conf[0x02] = 0x10;
>> +    pci_conf[0x03] = 0x11;
>> +    pci_conf[0x04] = PCI_COMMAND_IOACCESS | PCI_COMMAND_MEMACCESS;
>> +    pci_conf[0x0a] = 0x00; // RAM controller
>> +    pci_conf[0x0b] = 0x05;
>> +    pci_conf[0x0e] = 0x00; // header_type
>> +
>> +    pci_conf[PCI_INTERRUPT_PIN] = 1; // we are going to support
>> interrupts
>> +
>> +    /* XXX: ivshmem_desc.size must be a power of two */
>> +
>> +    s->ivshmem_mmio_io_addr = cpu_register_io_memory(0,
>> ivshmem_mmio_read,
>> +                                    ivshmem_mmio_write, s);
>> +
>> +    /* region for registers*/
>> +    pci_register_io_region(&d->dev, 0, 0x100,
>> +                           PCI_ADDRESS_SPACE_MEM, ivshmem_mmio_map);
>> +
>> +    /* region for shared memory */
>> +    pci_register_io_region(&d->dev, 1, ivshmem_desc.size,
>> +                           PCI_ADDRESS_SPACE_MEM, ivshmem_map);
>> +
>> +    /* open shared memory file  */
>> +    if ((ivshmem_fd = shm_open(ivshmem_desc.name, O_CREAT|O_RDWR,
>> S_IRWXU)) <
>> 0)
>> +    {
>> +        fprintf(stderr, "kvm_ivshmem: could not open shared file\n");
>> +        exit(-1);
>> +    }
>> +
>> +    ftruncate(ivshmem_fd, ivshmem_desc.size);
>> +
>> +    /* mmap onto PCI device's memory */
>> +    if (mmap(s->ivshmem_ptr, ivshmem_desc.size, PROT_READ|PROT_WRITE,
>> +                        MAP_SHARED|MAP_FIXED, ivshmem_fd, 0) ==
>> MAP_FAILED)
>> +    {
>> +        fprintf(stderr, "kvm_ivshmem: could not mmap shared file\n");
>> +        exit(-1);
>> +    }
>> +
>> +    IVSHMEM_DPRINTF("shared object mapped to 0x%p\n", s-
>>> ivshmem_ptr);
>> +
>> +    /* setup character device channel */
>> +
>> +    if (ivshmem_desc.chrdev != NULL) {
>> +        char label[32];
>> +        snprintf(label, 32, "ivshmem_chardev");
>> +        s->chr = qemu_chr_open(label, ivshmem_desc.chrdev, NULL);
>> +        if (s->chr == NULL) {
>> +            fprintf(stderr, "No server listening on %s\n",
>> ivshmem_desc.chrdev);
>> +            exit(-1);
>> +        }
>> +        qemu_chr_add_handlers(s->chr, ivshmem_can_receive,
>> ivshmem_receive,
>> +                          ivshmem_event, s);
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> diff --git a/hw/pc.c b/hw/pc.c
>> index 34a4d25..7d0cff2 100644
>> --- a/hw/pc.c
>> +++ b/hw/pc.c
>> @@ -67,6 +67,8 @@ static PITState *pit;
>> static IOAPICState *ioapic;
>> static PCIDevice *i440fx_state;
>>
>> +extern int ivshmem_enabled;
>> +
>> static void ioport80_write(void *opaque, uint32_t addr, uint32_t data)
>> {
>> }
>> @@ -1040,6 +1042,10 @@ static void pc_init1(ram_addr_t ram_size, int
>> vga_ram_size,
>>         }
>>     }
>>
>> +    if (pci_enabled && ivshmem_enabled) {
>> +        pci_ivshmem_init(pci_bus);
>> +    }
>> +
>>     rtc_state = rtc_init(0x70, i8259[8], 2000);
>>
>>     qemu_register_boot_set(pc_boot_set, rtc_state);
>> diff --git a/hw/pc.h b/hw/pc.h
>> index 885c918..0ae0493 100644
>> --- a/hw/pc.h
>> +++ b/hw/pc.h
>> @@ -185,4 +185,7 @@ void isa_ne2000_init(int base, qemu_irq irq,
>> NICInfo *nd);
>>
>> void extboot_init(BlockDriverState *bs, int cmd);
>>
>> +/* ivshmem.c */
>> +int pci_ivshmem_init(PCIBus *bus);
>> +
>> #endif
>> diff --git a/qemu-options.hx b/qemu-options.hx
>> index 173f458..9ab3e2d 100644
>> --- a/qemu-options.hx
>> +++ b/qemu-options.hx
>> @@ -1243,6 +1243,20 @@ The default device is @code{vc} in graphical
>> mode and
>> @code{stdio} in
>> non graphical mode.
>> ETEXI
>>
>> +DEF("ivshmem", HAS_ARG, QEMU_OPTION_ivshmem, \
>> +    "-ivshmem name,size[,unix:path][,server]  creates or opens a
>> shared file
>> 'name' of size \
>> +    'size' (in MB) and exposes it as a PCI device in the guest\n")
>> +STEXI
>> +...@item -ivshmem @var{file},@var{size}
>> +Creates a POSIX shared file named @var{file} of size @var{size} and
>> creates a
>> +PCI device of the same size that maps the shared file into the
>> device for
>> guests
>> +to access.  The created file on the host is located in /dev/shm/
>> +
>> +...@item unix:@var{path}[,server]
>> +A unix domain socket is used to send and receive interrupts between
>> VMs.  The
>> unix domain socket
>> +...@var{path} is used for connections.
>> +ETEXI
>> +
>> DEF("pidfile", HAS_ARG, QEMU_OPTION_pidfile, \
>>     "-pidfile file   write PID to 'file'\n")
>> STEXI
>> diff --git a/sysemu.h b/sysemu.h
>> index 1f45fd6..862b79e 100644
>> --- a/sysemu.h
>> +++ b/sysemu.h
>> @@ -217,6 +217,14 @@ extern CharDriverState
>> *parallel_hds[MAX_PARALLEL_PORTS];
>>
>> extern CharDriverState *virtcon_hds[MAX_VIRTIO_CONSOLES];
>>
>> +/* inter-VM shared memory devices */
>> +
>> +#define MAX_IVSHMEM_DEVICES 1
>> +
>> +extern CharDriverState * ivshmem_chardev;
>> +void ivshmem_init(const char * optarg);
>> +int ivshmem_get_size(void);
>> +
>> #define TFR(expr) do { if ((expr) != -1) break; } while (errno ==
>> EINTR)
>>
>> #ifdef NEED_CPU_H
>> diff --git a/vl.c b/vl.c
>> index 0420634..7260fa1 100644
>> --- a/vl.c
>> +++ b/vl.c
>> @@ -221,6 +221,7 @@ static int rtc_date_offset = -1; /* -1 means no
>> change */
>> int cirrus_vga_enabled = 1;
>> int std_vga_enabled = 0;
>> int vmsvga_enabled = 0;
>> +int ivshmem_enabled = 0;
>> int xenfb_enabled = 0;
>> #ifdef TARGET_SPARC
>> int graphic_width = 1024;
>> @@ -239,6 +240,8 @@ int no_quit = 0;
>> CharDriverState *serial_hds[MAX_SERIAL_PORTS];
>> CharDriverState *parallel_hds[MAX_PARALLEL_PORTS];
>> CharDriverState *virtcon_hds[MAX_VIRTIO_CONSOLES];
>> +CharDriverState *ivshmem_chardev;
>> +const char * ivshmem_device;
>> #ifdef TARGET_I386
>> int win2k_install_hack = 0;
>> int rtc_td_hack = 0;
>> @@ -5063,6 +5066,8 @@ int main(int argc, char **argv, char **envp)
>>     cyls = heads = secs = 0;
>>     translation = BIOS_ATA_TRANSLATION_AUTO;
>>     monitor_device = "vc:80Cx24C";
>> +    ivshmem_device = NULL;
>> +    ivshmem_chardev = NULL;
>>
>>     serial_devices[0] = "vc:80Cx24C";
>>     for(i = 1; i < MAX_SERIAL_PORTS; i++)
>> @@ -5518,6 +5523,10 @@ int main(int argc, char **argv, char **envp)
>>                 parallel_devices[parallel_device_index] = optarg;
>>                 parallel_device_index++;
>>                 break;
>> +            case QEMU_OPTION_ivshmem:
>> +                ivshmem_device = optarg;
>> +                ivshmem_enabled = 1;
>> +                break;
>>            case QEMU_OPTION_loadvm:
>>                loadvm = optarg;
>>                break;
>> @@ -5984,6 +5993,11 @@ int main(int argc, char **argv, char **envp)
>>            }
>>     }
>>
>> +    if (ivshmem_enabled) {
>> +        ivshmem_init(ivshmem_device);
>> +        ram_size += ivshmem_get_size();
>> +    }
>> +
>> #ifdef CONFIG_KQEMU
>>     /* FIXME: This is a nasty hack because kqemu can't cope with
>> dynamic
>>        guest ram allocation.  It needs to go away.  */
>> Thx,
>>
>> Venkat
>
>
>
> -----------------------------------------------
> A. Cameron Macdonell
> Ph.D. Student
> Department of Computing Science
> University of Alberta
> cam@cs.ualberta.ca

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 21+ messages in thread

* RE: [PATCH v2] Shared memory device with interrupt support
  2009-05-18 16:20     ` Cam Macdonell
@ 2009-05-19  3:52       ` Kumar, Venkat
  2009-05-19 11:20         ` Jayaraman, Bhaskar
  0 siblings, 1 reply; 21+ messages in thread
From: Kumar, Venkat @ 2009-05-19  3:52 UTC (permalink / raw)
  To: Cam Macdonell; +Cc: kvm@vger.kernel.org list

I had tried all syntaxes other than this :).
Interrupts work now.

Thx,

Venkat

-----Original Message-----
From: Cam Macdonell [mailto:cam@cs.ualberta.ca]
Sent: Monday, May 18, 2009 9:51 PM
To: Kumar, Venkat
Cc: kvm@vger.kernel.org list
Subject: Re: [PATCH v2] Shared memory device with interrupt support

Kumar, Venkat wrote:
> Cam - I got your patch to work but without notifications. I could share memory using the patch but notifications aren't working.
>
> I bring up two VM's with option "-ivshmem shrmem,1024,/dev/shm/shrmem,server" and "-ivshmem shrmem,1024,/dev/shm/shrmem" respectively.

Ok, I guess I need to do more error checking of arguments :)  You need
to specify "unix:" on the path.  So your options should look like this

"-ivshmem shrmem,1024,unix:/dev/shm/shrmem,server"

"-ivshmem shrmem,1024,unix:/dev/shm/shrmem"

That should help.

Cam

>
> When I make an "ioctl" from one of the VM's to inject an interrupt to the other VM, I get an error in "qemu_chr_write" and return value is "-1". "write" call in "send_all" is failing with return value "-1".
>
> Am I missing something here?
>
> Thx,
>
> Venkat
>
>
> -----Original Message-----
> From: Cam Macdonell [mailto:cam@cs.ualberta.ca]
> Sent: Saturday, May 16, 2009 9:01 AM
> To: Kumar, Venkat
> Cc: kvm@vger.kernel.org list
> Subject: Re: [PATCH v2] Shared memory device with interrupt support
>
>
> On 15-May-09, at 8:54 PM, Kumar, Venkat wrote:
>
>> Cam,
>>
>> A questions on interrupts as well.
>> What is "unix:path" that needs to be passed in the argument list?
>> Can it be any string?
>
> It has to be a valid path on the host.  It will create a unix domain
> socket on that path.
>
>> If my understanding is correct both the VM's who wants to
>> communicate would gives this path in the command line with one of
>> them specifying as "server".
>
> Exactly, the one with the "server" in the parameter list will wait for
> a connection before booting.
>
> Cam
>
>> Thx,
>> Venkat
>>
>>
>>
>>
>>
>>
>>    Support an inter-vm shared memory device that maps a shared-
>> memory object
>> as a PCI device in the guest.  This patch also supports interrupts
>> between
>> guest by communicating over a unix domain socket.  This patch
>> applies to the
>> qemu-kvm repository.
>>
>> This device now creates a qemu character device and sends 1-bytes
>> messages to
>> trigger interrupts.  Writes are trigger by writing to the "Doorbell"
>> register
>> on the shared memory PCI device.  The lower 8-bits of the value
>> written to this
>> register are sent as the 1-byte message so different meanings of
>> interrupts can
>> be supported.
>>
>> Interrupts are only supported between 2 VMs currently.  One VM must
>> act as the
>> server by adding "server" to the command-line argument.  Shared
>> memory devices
>> are created with the following command-line:
>>
>> -ivhshmem <shm object>,<size in MB>,[unix:<path>][,server]
>>
>> Interrupts can also be used between host and guest as well by
>> implementing a
>> listener on the host.
>>
>> Cam
>>
>> ---
>> Makefile.target |    3 +
>> hw/ivshmem.c    |  421 ++++++++++++++++++++++++++++++++++++++++++++++
>> +++++++++
>> hw/pc.c         |    6 +
>> hw/pc.h         |    3 +
>> qemu-options.hx |   14 ++
>> sysemu.h        |    8 +
>> vl.c            |   14 ++
>> 7 files changed, 469 insertions(+), 0 deletions(-)
>> create mode 100644 hw/ivshmem.c
>>
>> diff --git a/Makefile.target b/Makefile.target
>> index b68a689..3190bba 100644
>> --- a/Makefile.target
>> +++ b/Makefile.target
>> @@ -643,6 +643,9 @@ OBJS += pcnet.o
>> OBJS += rtl8139.o
>> OBJS += e1000.o
>>
>> +# Inter-VM PCI shared memory
>> +OBJS += ivshmem.o
>> +
>> # Generic watchdog support and some watchdog devices
>> OBJS += watchdog.o
>> OBJS += wdt_ib700.o wdt_i6300esb.o
>> diff --git a/hw/ivshmem.c b/hw/ivshmem.c
>> new file mode 100644
>> index 0000000..95e2268
>> --- /dev/null
>> +++ b/hw/ivshmem.c
>> @@ -0,0 +1,421 @@
>> +/*
>> + * Inter-VM Shared Memory PCI device.
>> + *
>> + * Author:
>> + *      Cam Macdonell <c...@cs.ualberta.ca>
>> + *
>> + * Based On: cirrus_vga.c and rtl8139.c
>> + *
>> + * This code is licensed under the GNU GPL v2.
>> + */
>> +
>> +#include "hw.h"
>> +#include "console.h"
>> +#include "pc.h"
>> +#include "pci.h"
>> +#include "sysemu.h"
>> +
>> +#include "qemu-common.h"
>> +#include <sys/mman.h>
>> +
>> +#define PCI_COMMAND_IOACCESS                0x0001
>> +#define PCI_COMMAND_MEMACCESS               0x0002
>> +#define PCI_COMMAND_BUSMASTER               0x0004
>> +
>> +//#define DEBUG_IVSHMEM
>> +
>> +#ifdef DEBUG_IVSHMEM
>> +#define IVSHMEM_DPRINTF(fmt, args...)        \
>> +    do {printf("IVSHMEM: " fmt, ##args); } while (0)
>> +#else
>> +#define IVSHMEM_DPRINTF(fmt, args...)
>> +#endif
>> +
>> +typedef struct IVShmemState {
>> +    uint16_t intrmask;
>> +    uint16_t intrstatus;
>> +    uint16_t doorbell;
>> +    uint8_t *ivshmem_ptr;
>> +    unsigned long ivshmem_offset;
>> +    unsigned int ivshmem_size;
>> +    unsigned long bios_offset;
>> +    unsigned int bios_size;
>> +    target_phys_addr_t base_ctrl;
>> +    int it_shift;
>> +    PCIDevice *pci_dev;
>> +    CharDriverState * chr;
>> +    unsigned long map_addr;
>> +    unsigned long map_end;
>> +    int ivshmem_mmio_io_addr;
>> +} IVShmemState;
>> +
>> +typedef struct PCI_IVShmemState {
>> +    PCIDevice dev;
>> +    IVShmemState ivshmem_state;
>> +} PCI_IVShmemState;
>> +
>> +typedef struct IVShmemDesc {
>> +    char name[1024];
>> +    char * chrdev;
>> +    int size;
>> +} IVShmemDesc;
>> +
>> +
>> +/* registers for the Inter-VM shared memory device */
>> +enum ivshmem_registers {
>> +    IntrMask = 0,
>> +    IntrStatus = 16,
>> +    Doorbell = 32
>> +};
>> +
>> +static int num_ivshmem_devices = 0;
>> +static IVShmemDesc ivshmem_desc;
>> +
>> +static void ivshmem_map(PCIDevice *pci_dev, int region_num,
>> +                    uint32_t addr, uint32_t size, int type)
>> +{
>> +    PCI_IVShmemState *d = (PCI_IVShmemState *)pci_dev;
>> +    IVShmemState *s = &d->ivshmem_state;
>> +
>> +    IVSHMEM_DPRINTF("addr = %u size = %u\n", addr, size);
>> +    cpu_register_physical_memory(addr, s->ivshmem_size, s-
>>> ivshmem_offset);
>> +
>> +}
>> +
>> +void ivshmem_init(const char * optarg) {
>> +
>> +    char * temp;
>> +    char * ivshmem_sz;
>> +    int size;
>> +
>> +    num_ivshmem_devices++;
>> +
>> +    /* currently we only support 1 device */
>> +    if (num_ivshmem_devices > MAX_IVSHMEM_DEVICES) {
>> +        return;
>> +    }
>> +
>> +    temp = strdup(optarg);
>> +    snprintf(ivshmem_desc.name, 1024, "/%s", strsep(&temp,","));
>> +    ivshmem_sz=strsep(&temp,",");
>> +    if (ivshmem_sz != NULL){
>> +        size = atol(ivshmem_sz);
>> +    } else {
>> +        size = -1;
>> +    }
>> +
>> +    ivshmem_desc.chrdev = strsep(&temp,"\0");
>> +
>> +    if ( size == -1) {
>> +        ivshmem_desc.size = TARGET_PAGE_SIZE;
>> +    } else {
>> +        ivshmem_desc.size = size*1024*1024;
>> +    }
>> +    IVSHMEM_DPRINTF("optarg is %s, name is %s, size is %d, chrdev
>> is %s\n",
>> +                                        optarg, ivshmem_desc.name,
>> +                                        ivshmem_desc.size,
>> ivshmem_desc.chrdev);
>> +}
>> +
>> +int ivshmem_get_size(void) {
>> +    return ivshmem_desc.size;
>> +}
>> +
>> +/* accessing registers - based on rtl8139 */
>> +static void ivshmem_update_irq(IVShmemState *s)
>> +{
>> +    int isr;
>> +    isr = (s->intrstatus & s->intrmask) & 0xffff;
>> +
>> +    /* don't print ISR resets */
>> +    if (isr) {
>> +        IVSHMEM_DPRINTF("Set IRQ to %d (%04x %04x)\n",
>> +           isr ? 1 : 0, s->intrstatus, s->intrmask);
>> +    }
>> +
>> +    qemu_set_irq(s->pci_dev->irq[0], (isr != 0));
>> +}
>> +
>> +static void ivshmem_mmio_map(PCIDevice *pci_dev, int region_num,
>> +                       uint32_t addr, uint32_t size, int type)
>> +{
>> +    PCI_IVShmemState *d = (PCI_IVShmemState *)pci_dev;
>> +    IVShmemState *s = &d->ivshmem_state;
>> +
>> +    cpu_register_physical_memory(addr + 0, 0x100, s-
>>> ivshmem_mmio_io_addr);
>> +}
>> +
>> +static void ivshmem_IntrMask_write(IVShmemState *s, uint32_t val)
>> +{
>> +    IVSHMEM_DPRINTF("IntrMask write(w) val = 0x%04x\n", val);
>> +
>> +    s->intrmask = val;
>> +
>> +    ivshmem_update_irq(s);
>> +}
>> +
>> +static uint32_t ivshmem_IntrMask_read(IVShmemState *s)
>> +{
>> +    uint32_t ret = s->intrmask;
>> +
>> +    IVSHMEM_DPRINTF("intrmask read(w) val = 0x%04x\n", ret);
>> +
>> +    return ret;
>> +}
>> +
>> +static void ivshmem_IntrStatus_write(IVShmemState *s, uint32_t val)
>> +{
>> +    IVSHMEM_DPRINTF("IntrStatus write(w) val = 0x%04x\n", val);
>> +
>> +    s->intrstatus = val;
>> +
>> +    ivshmem_update_irq(s);
>> +    return;
>> +}
>> +
>> +static uint32_t ivshmem_IntrStatus_read(IVShmemState *s)
>> +{
>> +    uint32_t ret = s->intrstatus;
>> +
>> +    /* reading ISR clears all interrupts */
>> +    s->intrstatus = 0;
>> +
>> +    ivshmem_update_irq(s);
>> +
>> +    return ret;
>> +}
>> +
>> +static void ivshmem_io_writew(void *opaque, uint8_t addr, uint32_t
>> val)
>> +{
>> +    IVShmemState *s = opaque;
>> +
>> +    IVSHMEM_DPRINTF("writing 0x%x to 0x%lx\n", addr, (unsigned
>> long) opaque);
>> +
>> +    addr &= 0xfe;
>> +
>> +    switch (addr)
>> +    {
>> +        case IntrMask:
>> +            ivshmem_IntrMask_write(s, val);
>> +            break;
>> +
>> +        case IntrStatus:
>> +            ivshmem_IntrStatus_write(s, val);
>> +            break;
>> +
>> +        default:
>> +            IVSHMEM_DPRINTF("why are we writing 0x%x\n", addr);
>> +    }
>> +}
>> +
>> +static void ivshmem_io_writel(void *opaque, uint8_t addr, uint32_t
>> val)
>> +{
>> +    IVSHMEM_DPRINTF("We shouldn't be writing longs\n");
>> +}
>> +
>> +static void ivshmem_io_writeb(void *opaque, uint8_t addr, uint32_t
>> val)
>> +{
>> +    IVShmemState *s = opaque;
>> +    uint8_t writebyte = val & 0xff; //write the lower 8-bits of 'val'
>> +
>> +    switch (addr)
>> +    {   // in future, we will probably want to support more types
>> of doorbells
>> +        case Doorbell:
>> +            // wake up the other side
>> +            qemu_chr_write(s->chr, &writebyte, 1);
>> +            IVSHMEM_DPRINTF("Writing to the other side 0x%x\n",
>> writebyte);
>> +            break;
>> +        default:
>> +            IVSHMEM_DPRINTF("Unhandled write (0x%x)\n", addr);
>> +    }
>> +}
>> +
>> +static uint32_t ivshmem_io_readw(void *opaque, uint8_t addr)
>> +{
>> +
>> +    IVShmemState *s = opaque;
>> +    uint32_t ret;
>> +
>> +    switch (addr)
>> +    {
>> +        case IntrMask:
>> +            ret = ivshmem_IntrMask_read(s);
>> +            break;
>> +        case IntrStatus:
>> +            ret = ivshmem_IntrStatus_read(s);
>> +            break;
>> +        default:
>> +            IVSHMEM_DPRINTF("why are we reading 0x%x\n", addr);
>> +            ret = 0;
>> +    }
>> +
>> +    return ret;
>> +}
>> +
>> +static uint32_t ivshmem_io_readl(void *opaque, uint8_t addr)
>> +{
>> +    IVSHMEM_DPRINTF("We shouldn't be reading longs\n");
>> +    return 0;
>> +}
>> +
>> +static uint32_t ivshmem_io_readb(void *opaque, uint8_t addr)
>> +{
>> +    IVSHMEM_DPRINTF("We shouldn't be reading bytes\n");
>> +
>> +    return 0;
>> +}
>> +
>> +static void ivshmem_mmio_writeb(void *opaque,
>> +                                target_phys_addr_t addr, uint32_t
>> val)
>> +{
>> +    ivshmem_io_writeb(opaque, addr & 0xFF, val);
>> +}
>> +
>> +static void ivshmem_mmio_writew(void *opaque,
>> +                                target_phys_addr_t addr, uint32_t
>> val)
>> +{
>> +    ivshmem_io_writew(opaque, addr & 0xFF, val);
>> +}
>> +
>> +static void ivshmem_mmio_writel(void *opaque,
>> +                                target_phys_addr_t addr, uint32_t
>> val)
>> +{
>> +    ivshmem_io_writel(opaque, addr & 0xFF, val);
>> +}
>> +
>> +static uint32_t ivshmem_mmio_readb(void *opaque, target_phys_addr_t
>> addr)
>> +{
>> +    return ivshmem_io_readb(opaque, addr & 0xFF);
>> +}
>> +
>> +static uint32_t ivshmem_mmio_readw(void *opaque, target_phys_addr_t
>> addr)
>> +{
>> +    uint32_t val = ivshmem_io_readw(opaque, addr & 0xFF);
>> +    return val;
>> +}
>> +
>> +static uint32_t ivshmem_mmio_readl(void *opaque, target_phys_addr_t
>> addr)
>> +{
>> +    uint32_t val = ivshmem_io_readl(opaque, addr & 0xFF);
>> +    return val;
>> +}
>> +
>> +static CPUReadMemoryFunc *ivshmem_mmio_read[3] = {
>> +    ivshmem_mmio_readb,
>> +    ivshmem_mmio_readw,
>> +    ivshmem_mmio_readl,
>> +};
>> +
>> +static CPUWriteMemoryFunc *ivshmem_mmio_write[3] = {
>> +    ivshmem_mmio_writeb,
>> +    ivshmem_mmio_writew,
>> +    ivshmem_mmio_writel,
>> +};
>> +
>> +static int ivshmem_can_receive(void * opaque)
>> +{
>> +    return 1;
>> +}
>> +
>> +static void ivshmem_receive(void *opaque, const uint8_t *buf, int
>> size)
>> +{
>> +    IVShmemState *s = opaque;
>> +
>> +    ivshmem_IntrStatus_write(s, *buf);
>> +
>> +    IVSHMEM_DPRINTF("ivshmem_receive 0x%02x\n", *buf);
>> +}
>> +
>> +static void ivshmem_event(void *opaque, int event)
>> +{
>> +    IVShmemState *s = opaque;
>> +    IVSHMEM_DPRINTF("ivshmem_event %d\n", event);
>> +}
>> +
>> +int pci_ivshmem_init(PCIBus *bus)
>> +{
>> +    PCI_IVShmemState *d;
>> +    IVShmemState *s;
>> +    uint8_t *pci_conf;
>> +    int ivshmem_fd;
>> +
>> +    IVSHMEM_DPRINTF("shared file is %s\n", ivshmem_desc.name);
>> +    d = (PCI_IVShmemState *)pci_register_device(bus, "kvm_ivshmem",
>> +                                           sizeof(PCI_IVShmemState),
>> +                                           -1, NULL, NULL);
>> +    if (!d) {
>> +        return -1;
>> +    }
>> +
>> +    s = &d->ivshmem_state;
>> +
>> +    /* allocate shared memory RAM */
>> +    s->ivshmem_offset = qemu_ram_alloc(ivshmem_desc.size);
>> +    IVSHMEM_DPRINTF("size is = %d\n", ivshmem_desc.size);
>> +    IVSHMEM_DPRINTF("ivshmem ram offset = %ld\n", s->ivshmem_offset);
>> +
>> +    s->ivshmem_ptr = qemu_get_ram_ptr(s->ivshmem_offset);
>> +
>> +    s->pci_dev = &d->dev;
>> +    s->ivshmem_size = ivshmem_desc.size;
>> +
>> +    pci_conf = d->dev.config;
>> +    pci_conf[0x00] = 0xf4; // Qumranet vendor ID 0x5002
>> +    pci_conf[0x01] = 0x1a;
>> +    pci_conf[0x02] = 0x10;
>> +    pci_conf[0x03] = 0x11;
>> +    pci_conf[0x04] = PCI_COMMAND_IOACCESS | PCI_COMMAND_MEMACCESS;
>> +    pci_conf[0x0a] = 0x00; // RAM controller
>> +    pci_conf[0x0b] = 0x05;
>> +    pci_conf[0x0e] = 0x00; // header_type
>> +
>> +    pci_conf[PCI_INTERRUPT_PIN] = 1; // we are going to support
>> interrupts
>> +
>> +    /* XXX: ivshmem_desc.size must be a power of two */
>> +
>> +    s->ivshmem_mmio_io_addr = cpu_register_io_memory(0,
>> ivshmem_mmio_read,
>> +                                    ivshmem_mmio_write, s);
>> +
>> +    /* region for registers*/
>> +    pci_register_io_region(&d->dev, 0, 0x100,
>> +                           PCI_ADDRESS_SPACE_MEM, ivshmem_mmio_map);
>> +
>> +    /* region for shared memory */
>> +    pci_register_io_region(&d->dev, 1, ivshmem_desc.size,
>> +                           PCI_ADDRESS_SPACE_MEM, ivshmem_map);
>> +
>> +    /* open shared memory file  */
>> +    if ((ivshmem_fd = shm_open(ivshmem_desc.name, O_CREAT|O_RDWR,
>> S_IRWXU)) <
>> 0)
>> +    {
>> +        fprintf(stderr, "kvm_ivshmem: could not open shared file\n");
>> +        exit(-1);
>> +    }
>> +
>> +    ftruncate(ivshmem_fd, ivshmem_desc.size);
>> +
>> +    /* mmap onto PCI device's memory */
>> +    if (mmap(s->ivshmem_ptr, ivshmem_desc.size, PROT_READ|PROT_WRITE,
>> +                        MAP_SHARED|MAP_FIXED, ivshmem_fd, 0) ==
>> MAP_FAILED)
>> +    {
>> +        fprintf(stderr, "kvm_ivshmem: could not mmap shared file\n");
>> +        exit(-1);
>> +    }
>> +
>> +    IVSHMEM_DPRINTF("shared object mapped to 0x%p\n", s-
>>> ivshmem_ptr);
>> +
>> +    /* setup character device channel */
>> +
>> +    if (ivshmem_desc.chrdev != NULL) {
>> +        char label[32];
>> +        snprintf(label, 32, "ivshmem_chardev");
>> +        s->chr = qemu_chr_open(label, ivshmem_desc.chrdev, NULL);
>> +        if (s->chr == NULL) {
>> +            fprintf(stderr, "No server listening on %s\n",
>> ivshmem_desc.chrdev);
>> +            exit(-1);
>> +        }
>> +        qemu_chr_add_handlers(s->chr, ivshmem_can_receive,
>> ivshmem_receive,
>> +                          ivshmem_event, s);
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> diff --git a/hw/pc.c b/hw/pc.c
>> index 34a4d25..7d0cff2 100644
>> --- a/hw/pc.c
>> +++ b/hw/pc.c
>> @@ -67,6 +67,8 @@ static PITState *pit;
>> static IOAPICState *ioapic;
>> static PCIDevice *i440fx_state;
>>
>> +extern int ivshmem_enabled;
>> +
>> static void ioport80_write(void *opaque, uint32_t addr, uint32_t data)
>> {
>> }
>> @@ -1040,6 +1042,10 @@ static void pc_init1(ram_addr_t ram_size, int
>> vga_ram_size,
>>         }
>>     }
>>
>> +    if (pci_enabled && ivshmem_enabled) {
>> +        pci_ivshmem_init(pci_bus);
>> +    }
>> +
>>     rtc_state = rtc_init(0x70, i8259[8], 2000);
>>
>>     qemu_register_boot_set(pc_boot_set, rtc_state);
>> diff --git a/hw/pc.h b/hw/pc.h
>> index 885c918..0ae0493 100644
>> --- a/hw/pc.h
>> +++ b/hw/pc.h
>> @@ -185,4 +185,7 @@ void isa_ne2000_init(int base, qemu_irq irq,
>> NICInfo *nd);
>>
>> void extboot_init(BlockDriverState *bs, int cmd);
>>
>> +/* ivshmem.c */
>> +int pci_ivshmem_init(PCIBus *bus);
>> +
>> #endif
>> diff --git a/qemu-options.hx b/qemu-options.hx
>> index 173f458..9ab3e2d 100644
>> --- a/qemu-options.hx
>> +++ b/qemu-options.hx
>> @@ -1243,6 +1243,20 @@ The default device is @code{vc} in graphical
>> mode and
>> @code{stdio} in
>> non graphical mode.
>> ETEXI
>>
>> +DEF("ivshmem", HAS_ARG, QEMU_OPTION_ivshmem, \
>> +    "-ivshmem name,size[,unix:path][,server]  creates or opens a
>> shared file
>> 'name' of size \
>> +    'size' (in MB) and exposes it as a PCI device in the guest\n")
>> +STEXI
>> +...@item -ivshmem @var{file},@var{size}
>> +Creates a POSIX shared file named @var{file} of size @var{size} and
>> creates a
>> +PCI device of the same size that maps the shared file into the
>> device for
>> guests
>> +to access.  The created file on the host is located in /dev/shm/
>> +
>> +...@item unix:@var{path}[,server]
>> +A unix domain socket is used to send and receive interrupts between
>> VMs.  The
>> unix domain socket
>> +...@var{path} is used for connections.
>> +ETEXI
>> +
>> DEF("pidfile", HAS_ARG, QEMU_OPTION_pidfile, \
>>     "-pidfile file   write PID to 'file'\n")
>> STEXI
>> diff --git a/sysemu.h b/sysemu.h
>> index 1f45fd6..862b79e 100644
>> --- a/sysemu.h
>> +++ b/sysemu.h
>> @@ -217,6 +217,14 @@ extern CharDriverState
>> *parallel_hds[MAX_PARALLEL_PORTS];
>>
>> extern CharDriverState *virtcon_hds[MAX_VIRTIO_CONSOLES];
>>
>> +/* inter-VM shared memory devices */
>> +
>> +#define MAX_IVSHMEM_DEVICES 1
>> +
>> +extern CharDriverState * ivshmem_chardev;
>> +void ivshmem_init(const char * optarg);
>> +int ivshmem_get_size(void);
>> +
>> #define TFR(expr) do { if ((expr) != -1) break; } while (errno ==
>> EINTR)
>>
>> #ifdef NEED_CPU_H
>> diff --git a/vl.c b/vl.c
>> index 0420634..7260fa1 100644
>> --- a/vl.c
>> +++ b/vl.c
>> @@ -221,6 +221,7 @@ static int rtc_date_offset = -1; /* -1 means no
>> change */
>> int cirrus_vga_enabled = 1;
>> int std_vga_enabled = 0;
>> int vmsvga_enabled = 0;
>> +int ivshmem_enabled = 0;
>> int xenfb_enabled = 0;
>> #ifdef TARGET_SPARC
>> int graphic_width = 1024;
>> @@ -239,6 +240,8 @@ int no_quit = 0;
>> CharDriverState *serial_hds[MAX_SERIAL_PORTS];
>> CharDriverState *parallel_hds[MAX_PARALLEL_PORTS];
>> CharDriverState *virtcon_hds[MAX_VIRTIO_CONSOLES];
>> +CharDriverState *ivshmem_chardev;
>> +const char * ivshmem_device;
>> #ifdef TARGET_I386
>> int win2k_install_hack = 0;
>> int rtc_td_hack = 0;
>> @@ -5063,6 +5066,8 @@ int main(int argc, char **argv, char **envp)
>>     cyls = heads = secs = 0;
>>     translation = BIOS_ATA_TRANSLATION_AUTO;
>>     monitor_device = "vc:80Cx24C";
>> +    ivshmem_device = NULL;
>> +    ivshmem_chardev = NULL;
>>
>>     serial_devices[0] = "vc:80Cx24C";
>>     for(i = 1; i < MAX_SERIAL_PORTS; i++)
>> @@ -5518,6 +5523,10 @@ int main(int argc, char **argv, char **envp)
>>                 parallel_devices[parallel_device_index] = optarg;
>>                 parallel_device_index++;
>>                 break;
>> +            case QEMU_OPTION_ivshmem:
>> +                ivshmem_device = optarg;
>> +                ivshmem_enabled = 1;
>> +                break;
>>            case QEMU_OPTION_loadvm:
>>                loadvm = optarg;
>>                break;
>> @@ -5984,6 +5993,11 @@ int main(int argc, char **argv, char **envp)
>>            }
>>     }
>>
>> +    if (ivshmem_enabled) {
>> +        ivshmem_init(ivshmem_device);
>> +        ram_size += ivshmem_get_size();
>> +    }
>> +
>> #ifdef CONFIG_KQEMU
>>     /* FIXME: This is a nasty hack because kqemu can't cope with
>> dynamic
>>        guest ram allocation.  It needs to go away.  */
>> Thx,
>>
>> Venkat
>
>
>
> -----------------------------------------------
> A. Cameron Macdonell
> Ph.D. Student
> Department of Computing Science
> University of Alberta
> cam@cs.ualberta.ca


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2] Shared memory device with interrupt support
  2009-05-18 16:50     ` Cam Macdonell
@ 2009-05-18 17:19       ` Avi Kivity
  0 siblings, 0 replies; 21+ messages in thread
From: Avi Kivity @ 2009-05-18 17:19 UTC (permalink / raw)
  To: Cam Macdonell; +Cc: kvm@vger.kernel.org list, Gregory Haskins

Cam Macdonell wrote:
>
> My usual noob questions:  Do I need to run Greg's tree on the host for 
> the necessary irqfd/eventfd suppport?  

Yes (though irqfd will be merged real soon, and iosignalfd somewhat 
afterwards).

> Are there any examples to work from aside from Greg's unit tests?

No.

You don't really need to irqfd and iosignalfd to implement all of this, 
they're just optimizations.  You can still pass the eventfds around.  If 
you don't have irqfd, have qemu poll the eventfd, and when something 
happens, inject and interrupt.  Likewise, if you don't have iosignalfd, 
register a pio/mmio handler for the command register, and when they fire 
touch all of the relevant irqfds.  You'll need that anyway for backwards 
compatibility and for level-triggered pci interrupts, which irqfd 
doesn't support.

-- 
Do not meddle in the internals of kernels, for they are subtle and quick to panic.


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2] Shared memory device with interrupt support
  2009-05-17 21:51   ` Avi Kivity
  2009-05-18 11:12     ` Gregory Haskins
@ 2009-05-18 16:50     ` Cam Macdonell
  2009-05-18 17:19       ` Avi Kivity
  1 sibling, 1 reply; 21+ messages in thread
From: Cam Macdonell @ 2009-05-18 16:50 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm@vger.kernel.org list, Gregory Haskins

Avi Kivity wrote:
> Cam Macdonell wrote:
>>>
>>> If my understanding is correct both the VM's who wants to communicate 
>>> would gives this path in the command line with one of them specifying 
>>> as "server".
>>
>> Exactly, the one with the "server" in the parameter list will wait for 
>> a connection before booting.
> 
> hm, we may be able to eliminate the server from the fast path, at the 
> cost of some complexity.
> 
> When a guest connects to the server, the server creates an eventfd and 
> passes using SCM_RIGHTS to all other connected guests.  The server also 
> passes the eventfds of currently connected guests to the new guest.  
> From now on, the server does not participate in anything; when a quest 
> wants to send an interrupt to one or more other guests, its qemu just 
> writes to the eventfds() of the corresponding guests; their qemus will 
> inject the interrupt, without any server involvement.
> 
> Now, anyone who has been paying attention will have their alarms going 
> off at the word eventfd.  And yes, if the host supports irqfd, the 
> various qemus can associate those eventfds with an irq and pretty much 
> forget about them.  When a qemu triggers an irqfd, the interrupt will be 
> injected directly without the target qemu's involvement.
> 
> I like it.

That certainly sounds like the right direction for multi-VM setup.  I'm 
currently working on the shmem PCI card server discussed in the first 
patch's thread to support broadcast and multicast which will now be 
simpler if qemu handles the *casting.

My usual noob questions:  Do I need to run Greg's tree on the host for 
the necessary irqfd/eventfd suppport?  Are there any examples to work 
from aside from Greg's unit tests?

Thanks,
Cam


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2] Shared memory device with interrupt support
  2009-05-18 12:11   ` Kumar, Venkat
@ 2009-05-18 16:20     ` Cam Macdonell
  2009-05-19  3:52       ` Kumar, Venkat
  0 siblings, 1 reply; 21+ messages in thread
From: Cam Macdonell @ 2009-05-18 16:20 UTC (permalink / raw)
  To: Kumar, Venkat; +Cc: kvm@vger.kernel.org list

Kumar, Venkat wrote:
> Cam - I got your patch to work but without notifications. I could share memory using the patch but notifications aren't working.
> 
> I bring up two VM's with option "-ivshmem shrmem,1024,/dev/shm/shrmem,server" and "-ivshmem shrmem,1024,/dev/shm/shrmem" respectively.

Ok, I guess I need to do more error checking of arguments :)  You need 
to specify "unix:" on the path.  So your options should look like this

"-ivshmem shrmem,1024,unix:/dev/shm/shrmem,server"

"-ivshmem shrmem,1024,unix:/dev/shm/shrmem"

That should help.

Cam

> 
> When I make an "ioctl" from one of the VM's to inject an interrupt to the other VM, I get an error in "qemu_chr_write" and return value is "-1". "write" call in "send_all" is failing with return value "-1".
> 
> Am I missing something here?
> 
> Thx,
> 
> Venkat
> 
> 
> -----Original Message-----
> From: Cam Macdonell [mailto:cam@cs.ualberta.ca]
> Sent: Saturday, May 16, 2009 9:01 AM
> To: Kumar, Venkat
> Cc: kvm@vger.kernel.org list
> Subject: Re: [PATCH v2] Shared memory device with interrupt support
> 
> 
> On 15-May-09, at 8:54 PM, Kumar, Venkat wrote:
> 
>> Cam,
>>
>> A questions on interrupts as well.
>> What is "unix:path" that needs to be passed in the argument list?
>> Can it be any string?
> 
> It has to be a valid path on the host.  It will create a unix domain
> socket on that path.
> 
>> If my understanding is correct both the VM's who wants to
>> communicate would gives this path in the command line with one of
>> them specifying as "server".
> 
> Exactly, the one with the "server" in the parameter list will wait for
> a connection before booting.
> 
> Cam
> 
>> Thx,
>> Venkat
>>
>>
>>
>>
>>
>>
>>    Support an inter-vm shared memory device that maps a shared-
>> memory object
>> as a PCI device in the guest.  This patch also supports interrupts
>> between
>> guest by communicating over a unix domain socket.  This patch
>> applies to the
>> qemu-kvm repository.
>>
>> This device now creates a qemu character device and sends 1-bytes
>> messages to
>> trigger interrupts.  Writes are trigger by writing to the "Doorbell"
>> register
>> on the shared memory PCI device.  The lower 8-bits of the value
>> written to this
>> register are sent as the 1-byte message so different meanings of
>> interrupts can
>> be supported.
>>
>> Interrupts are only supported between 2 VMs currently.  One VM must
>> act as the
>> server by adding "server" to the command-line argument.  Shared
>> memory devices
>> are created with the following command-line:
>>
>> -ivhshmem <shm object>,<size in MB>,[unix:<path>][,server]
>>
>> Interrupts can also be used between host and guest as well by
>> implementing a
>> listener on the host.
>>
>> Cam
>>
>> ---
>> Makefile.target |    3 +
>> hw/ivshmem.c    |  421 ++++++++++++++++++++++++++++++++++++++++++++++
>> +++++++++
>> hw/pc.c         |    6 +
>> hw/pc.h         |    3 +
>> qemu-options.hx |   14 ++
>> sysemu.h        |    8 +
>> vl.c            |   14 ++
>> 7 files changed, 469 insertions(+), 0 deletions(-)
>> create mode 100644 hw/ivshmem.c
>>
>> diff --git a/Makefile.target b/Makefile.target
>> index b68a689..3190bba 100644
>> --- a/Makefile.target
>> +++ b/Makefile.target
>> @@ -643,6 +643,9 @@ OBJS += pcnet.o
>> OBJS += rtl8139.o
>> OBJS += e1000.o
>>
>> +# Inter-VM PCI shared memory
>> +OBJS += ivshmem.o
>> +
>> # Generic watchdog support and some watchdog devices
>> OBJS += watchdog.o
>> OBJS += wdt_ib700.o wdt_i6300esb.o
>> diff --git a/hw/ivshmem.c b/hw/ivshmem.c
>> new file mode 100644
>> index 0000000..95e2268
>> --- /dev/null
>> +++ b/hw/ivshmem.c
>> @@ -0,0 +1,421 @@
>> +/*
>> + * Inter-VM Shared Memory PCI device.
>> + *
>> + * Author:
>> + *      Cam Macdonell <c...@cs.ualberta.ca>
>> + *
>> + * Based On: cirrus_vga.c and rtl8139.c
>> + *
>> + * This code is licensed under the GNU GPL v2.
>> + */
>> +
>> +#include "hw.h"
>> +#include "console.h"
>> +#include "pc.h"
>> +#include "pci.h"
>> +#include "sysemu.h"
>> +
>> +#include "qemu-common.h"
>> +#include <sys/mman.h>
>> +
>> +#define PCI_COMMAND_IOACCESS                0x0001
>> +#define PCI_COMMAND_MEMACCESS               0x0002
>> +#define PCI_COMMAND_BUSMASTER               0x0004
>> +
>> +//#define DEBUG_IVSHMEM
>> +
>> +#ifdef DEBUG_IVSHMEM
>> +#define IVSHMEM_DPRINTF(fmt, args...)        \
>> +    do {printf("IVSHMEM: " fmt, ##args); } while (0)
>> +#else
>> +#define IVSHMEM_DPRINTF(fmt, args...)
>> +#endif
>> +
>> +typedef struct IVShmemState {
>> +    uint16_t intrmask;
>> +    uint16_t intrstatus;
>> +    uint16_t doorbell;
>> +    uint8_t *ivshmem_ptr;
>> +    unsigned long ivshmem_offset;
>> +    unsigned int ivshmem_size;
>> +    unsigned long bios_offset;
>> +    unsigned int bios_size;
>> +    target_phys_addr_t base_ctrl;
>> +    int it_shift;
>> +    PCIDevice *pci_dev;
>> +    CharDriverState * chr;
>> +    unsigned long map_addr;
>> +    unsigned long map_end;
>> +    int ivshmem_mmio_io_addr;
>> +} IVShmemState;
>> +
>> +typedef struct PCI_IVShmemState {
>> +    PCIDevice dev;
>> +    IVShmemState ivshmem_state;
>> +} PCI_IVShmemState;
>> +
>> +typedef struct IVShmemDesc {
>> +    char name[1024];
>> +    char * chrdev;
>> +    int size;
>> +} IVShmemDesc;
>> +
>> +
>> +/* registers for the Inter-VM shared memory device */
>> +enum ivshmem_registers {
>> +    IntrMask = 0,
>> +    IntrStatus = 16,
>> +    Doorbell = 32
>> +};
>> +
>> +static int num_ivshmem_devices = 0;
>> +static IVShmemDesc ivshmem_desc;
>> +
>> +static void ivshmem_map(PCIDevice *pci_dev, int region_num,
>> +                    uint32_t addr, uint32_t size, int type)
>> +{
>> +    PCI_IVShmemState *d = (PCI_IVShmemState *)pci_dev;
>> +    IVShmemState *s = &d->ivshmem_state;
>> +
>> +    IVSHMEM_DPRINTF("addr = %u size = %u\n", addr, size);
>> +    cpu_register_physical_memory(addr, s->ivshmem_size, s-
>>> ivshmem_offset);
>> +
>> +}
>> +
>> +void ivshmem_init(const char * optarg) {
>> +
>> +    char * temp;
>> +    char * ivshmem_sz;
>> +    int size;
>> +
>> +    num_ivshmem_devices++;
>> +
>> +    /* currently we only support 1 device */
>> +    if (num_ivshmem_devices > MAX_IVSHMEM_DEVICES) {
>> +        return;
>> +    }
>> +
>> +    temp = strdup(optarg);
>> +    snprintf(ivshmem_desc.name, 1024, "/%s", strsep(&temp,","));
>> +    ivshmem_sz=strsep(&temp,",");
>> +    if (ivshmem_sz != NULL){
>> +        size = atol(ivshmem_sz);
>> +    } else {
>> +        size = -1;
>> +    }
>> +
>> +    ivshmem_desc.chrdev = strsep(&temp,"\0");
>> +
>> +    if ( size == -1) {
>> +        ivshmem_desc.size = TARGET_PAGE_SIZE;
>> +    } else {
>> +        ivshmem_desc.size = size*1024*1024;
>> +    }
>> +    IVSHMEM_DPRINTF("optarg is %s, name is %s, size is %d, chrdev
>> is %s\n",
>> +                                        optarg, ivshmem_desc.name,
>> +                                        ivshmem_desc.size,
>> ivshmem_desc.chrdev);
>> +}
>> +
>> +int ivshmem_get_size(void) {
>> +    return ivshmem_desc.size;
>> +}
>> +
>> +/* accessing registers - based on rtl8139 */
>> +static void ivshmem_update_irq(IVShmemState *s)
>> +{
>> +    int isr;
>> +    isr = (s->intrstatus & s->intrmask) & 0xffff;
>> +
>> +    /* don't print ISR resets */
>> +    if (isr) {
>> +        IVSHMEM_DPRINTF("Set IRQ to %d (%04x %04x)\n",
>> +           isr ? 1 : 0, s->intrstatus, s->intrmask);
>> +    }
>> +
>> +    qemu_set_irq(s->pci_dev->irq[0], (isr != 0));
>> +}
>> +
>> +static void ivshmem_mmio_map(PCIDevice *pci_dev, int region_num,
>> +                       uint32_t addr, uint32_t size, int type)
>> +{
>> +    PCI_IVShmemState *d = (PCI_IVShmemState *)pci_dev;
>> +    IVShmemState *s = &d->ivshmem_state;
>> +
>> +    cpu_register_physical_memory(addr + 0, 0x100, s-
>>> ivshmem_mmio_io_addr);
>> +}
>> +
>> +static void ivshmem_IntrMask_write(IVShmemState *s, uint32_t val)
>> +{
>> +    IVSHMEM_DPRINTF("IntrMask write(w) val = 0x%04x\n", val);
>> +
>> +    s->intrmask = val;
>> +
>> +    ivshmem_update_irq(s);
>> +}
>> +
>> +static uint32_t ivshmem_IntrMask_read(IVShmemState *s)
>> +{
>> +    uint32_t ret = s->intrmask;
>> +
>> +    IVSHMEM_DPRINTF("intrmask read(w) val = 0x%04x\n", ret);
>> +
>> +    return ret;
>> +}
>> +
>> +static void ivshmem_IntrStatus_write(IVShmemState *s, uint32_t val)
>> +{
>> +    IVSHMEM_DPRINTF("IntrStatus write(w) val = 0x%04x\n", val);
>> +
>> +    s->intrstatus = val;
>> +
>> +    ivshmem_update_irq(s);
>> +    return;
>> +}
>> +
>> +static uint32_t ivshmem_IntrStatus_read(IVShmemState *s)
>> +{
>> +    uint32_t ret = s->intrstatus;
>> +
>> +    /* reading ISR clears all interrupts */
>> +    s->intrstatus = 0;
>> +
>> +    ivshmem_update_irq(s);
>> +
>> +    return ret;
>> +}
>> +
>> +static void ivshmem_io_writew(void *opaque, uint8_t addr, uint32_t
>> val)
>> +{
>> +    IVShmemState *s = opaque;
>> +
>> +    IVSHMEM_DPRINTF("writing 0x%x to 0x%lx\n", addr, (unsigned
>> long) opaque);
>> +
>> +    addr &= 0xfe;
>> +
>> +    switch (addr)
>> +    {
>> +        case IntrMask:
>> +            ivshmem_IntrMask_write(s, val);
>> +            break;
>> +
>> +        case IntrStatus:
>> +            ivshmem_IntrStatus_write(s, val);
>> +            break;
>> +
>> +        default:
>> +            IVSHMEM_DPRINTF("why are we writing 0x%x\n", addr);
>> +    }
>> +}
>> +
>> +static void ivshmem_io_writel(void *opaque, uint8_t addr, uint32_t
>> val)
>> +{
>> +    IVSHMEM_DPRINTF("We shouldn't be writing longs\n");
>> +}
>> +
>> +static void ivshmem_io_writeb(void *opaque, uint8_t addr, uint32_t
>> val)
>> +{
>> +    IVShmemState *s = opaque;
>> +    uint8_t writebyte = val & 0xff; //write the lower 8-bits of 'val'
>> +
>> +    switch (addr)
>> +    {   // in future, we will probably want to support more types
>> of doorbells
>> +        case Doorbell:
>> +            // wake up the other side
>> +            qemu_chr_write(s->chr, &writebyte, 1);
>> +            IVSHMEM_DPRINTF("Writing to the other side 0x%x\n",
>> writebyte);
>> +            break;
>> +        default:
>> +            IVSHMEM_DPRINTF("Unhandled write (0x%x)\n", addr);
>> +    }
>> +}
>> +
>> +static uint32_t ivshmem_io_readw(void *opaque, uint8_t addr)
>> +{
>> +
>> +    IVShmemState *s = opaque;
>> +    uint32_t ret;
>> +
>> +    switch (addr)
>> +    {
>> +        case IntrMask:
>> +            ret = ivshmem_IntrMask_read(s);
>> +            break;
>> +        case IntrStatus:
>> +            ret = ivshmem_IntrStatus_read(s);
>> +            break;
>> +        default:
>> +            IVSHMEM_DPRINTF("why are we reading 0x%x\n", addr);
>> +            ret = 0;
>> +    }
>> +
>> +    return ret;
>> +}
>> +
>> +static uint32_t ivshmem_io_readl(void *opaque, uint8_t addr)
>> +{
>> +    IVSHMEM_DPRINTF("We shouldn't be reading longs\n");
>> +    return 0;
>> +}
>> +
>> +static uint32_t ivshmem_io_readb(void *opaque, uint8_t addr)
>> +{
>> +    IVSHMEM_DPRINTF("We shouldn't be reading bytes\n");
>> +
>> +    return 0;
>> +}
>> +
>> +static void ivshmem_mmio_writeb(void *opaque,
>> +                                target_phys_addr_t addr, uint32_t
>> val)
>> +{
>> +    ivshmem_io_writeb(opaque, addr & 0xFF, val);
>> +}
>> +
>> +static void ivshmem_mmio_writew(void *opaque,
>> +                                target_phys_addr_t addr, uint32_t
>> val)
>> +{
>> +    ivshmem_io_writew(opaque, addr & 0xFF, val);
>> +}
>> +
>> +static void ivshmem_mmio_writel(void *opaque,
>> +                                target_phys_addr_t addr, uint32_t
>> val)
>> +{
>> +    ivshmem_io_writel(opaque, addr & 0xFF, val);
>> +}
>> +
>> +static uint32_t ivshmem_mmio_readb(void *opaque, target_phys_addr_t
>> addr)
>> +{
>> +    return ivshmem_io_readb(opaque, addr & 0xFF);
>> +}
>> +
>> +static uint32_t ivshmem_mmio_readw(void *opaque, target_phys_addr_t
>> addr)
>> +{
>> +    uint32_t val = ivshmem_io_readw(opaque, addr & 0xFF);
>> +    return val;
>> +}
>> +
>> +static uint32_t ivshmem_mmio_readl(void *opaque, target_phys_addr_t
>> addr)
>> +{
>> +    uint32_t val = ivshmem_io_readl(opaque, addr & 0xFF);
>> +    return val;
>> +}
>> +
>> +static CPUReadMemoryFunc *ivshmem_mmio_read[3] = {
>> +    ivshmem_mmio_readb,
>> +    ivshmem_mmio_readw,
>> +    ivshmem_mmio_readl,
>> +};
>> +
>> +static CPUWriteMemoryFunc *ivshmem_mmio_write[3] = {
>> +    ivshmem_mmio_writeb,
>> +    ivshmem_mmio_writew,
>> +    ivshmem_mmio_writel,
>> +};
>> +
>> +static int ivshmem_can_receive(void * opaque)
>> +{
>> +    return 1;
>> +}
>> +
>> +static void ivshmem_receive(void *opaque, const uint8_t *buf, int
>> size)
>> +{
>> +    IVShmemState *s = opaque;
>> +
>> +    ivshmem_IntrStatus_write(s, *buf);
>> +
>> +    IVSHMEM_DPRINTF("ivshmem_receive 0x%02x\n", *buf);
>> +}
>> +
>> +static void ivshmem_event(void *opaque, int event)
>> +{
>> +    IVShmemState *s = opaque;
>> +    IVSHMEM_DPRINTF("ivshmem_event %d\n", event);
>> +}
>> +
>> +int pci_ivshmem_init(PCIBus *bus)
>> +{
>> +    PCI_IVShmemState *d;
>> +    IVShmemState *s;
>> +    uint8_t *pci_conf;
>> +    int ivshmem_fd;
>> +
>> +    IVSHMEM_DPRINTF("shared file is %s\n", ivshmem_desc.name);
>> +    d = (PCI_IVShmemState *)pci_register_device(bus, "kvm_ivshmem",
>> +                                           sizeof(PCI_IVShmemState),
>> +                                           -1, NULL, NULL);
>> +    if (!d) {
>> +        return -1;
>> +    }
>> +
>> +    s = &d->ivshmem_state;
>> +
>> +    /* allocate shared memory RAM */
>> +    s->ivshmem_offset = qemu_ram_alloc(ivshmem_desc.size);
>> +    IVSHMEM_DPRINTF("size is = %d\n", ivshmem_desc.size);
>> +    IVSHMEM_DPRINTF("ivshmem ram offset = %ld\n", s->ivshmem_offset);
>> +
>> +    s->ivshmem_ptr = qemu_get_ram_ptr(s->ivshmem_offset);
>> +
>> +    s->pci_dev = &d->dev;
>> +    s->ivshmem_size = ivshmem_desc.size;
>> +
>> +    pci_conf = d->dev.config;
>> +    pci_conf[0x00] = 0xf4; // Qumranet vendor ID 0x5002
>> +    pci_conf[0x01] = 0x1a;
>> +    pci_conf[0x02] = 0x10;
>> +    pci_conf[0x03] = 0x11;
>> +    pci_conf[0x04] = PCI_COMMAND_IOACCESS | PCI_COMMAND_MEMACCESS;
>> +    pci_conf[0x0a] = 0x00; // RAM controller
>> +    pci_conf[0x0b] = 0x05;
>> +    pci_conf[0x0e] = 0x00; // header_type
>> +
>> +    pci_conf[PCI_INTERRUPT_PIN] = 1; // we are going to support
>> interrupts
>> +
>> +    /* XXX: ivshmem_desc.size must be a power of two */
>> +
>> +    s->ivshmem_mmio_io_addr = cpu_register_io_memory(0,
>> ivshmem_mmio_read,
>> +                                    ivshmem_mmio_write, s);
>> +
>> +    /* region for registers*/
>> +    pci_register_io_region(&d->dev, 0, 0x100,
>> +                           PCI_ADDRESS_SPACE_MEM, ivshmem_mmio_map);
>> +
>> +    /* region for shared memory */
>> +    pci_register_io_region(&d->dev, 1, ivshmem_desc.size,
>> +                           PCI_ADDRESS_SPACE_MEM, ivshmem_map);
>> +
>> +    /* open shared memory file  */
>> +    if ((ivshmem_fd = shm_open(ivshmem_desc.name, O_CREAT|O_RDWR,
>> S_IRWXU)) <
>> 0)
>> +    {
>> +        fprintf(stderr, "kvm_ivshmem: could not open shared file\n");
>> +        exit(-1);
>> +    }
>> +
>> +    ftruncate(ivshmem_fd, ivshmem_desc.size);
>> +
>> +    /* mmap onto PCI device's memory */
>> +    if (mmap(s->ivshmem_ptr, ivshmem_desc.size, PROT_READ|PROT_WRITE,
>> +                        MAP_SHARED|MAP_FIXED, ivshmem_fd, 0) ==
>> MAP_FAILED)
>> +    {
>> +        fprintf(stderr, "kvm_ivshmem: could not mmap shared file\n");
>> +        exit(-1);
>> +    }
>> +
>> +    IVSHMEM_DPRINTF("shared object mapped to 0x%p\n", s-
>>> ivshmem_ptr);
>> +
>> +    /* setup character device channel */
>> +
>> +    if (ivshmem_desc.chrdev != NULL) {
>> +        char label[32];
>> +        snprintf(label, 32, "ivshmem_chardev");
>> +        s->chr = qemu_chr_open(label, ivshmem_desc.chrdev, NULL);
>> +        if (s->chr == NULL) {
>> +            fprintf(stderr, "No server listening on %s\n",
>> ivshmem_desc.chrdev);
>> +            exit(-1);
>> +        }
>> +        qemu_chr_add_handlers(s->chr, ivshmem_can_receive,
>> ivshmem_receive,
>> +                          ivshmem_event, s);
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> diff --git a/hw/pc.c b/hw/pc.c
>> index 34a4d25..7d0cff2 100644
>> --- a/hw/pc.c
>> +++ b/hw/pc.c
>> @@ -67,6 +67,8 @@ static PITState *pit;
>> static IOAPICState *ioapic;
>> static PCIDevice *i440fx_state;
>>
>> +extern int ivshmem_enabled;
>> +
>> static void ioport80_write(void *opaque, uint32_t addr, uint32_t data)
>> {
>> }
>> @@ -1040,6 +1042,10 @@ static void pc_init1(ram_addr_t ram_size, int
>> vga_ram_size,
>>         }
>>     }
>>
>> +    if (pci_enabled && ivshmem_enabled) {
>> +        pci_ivshmem_init(pci_bus);
>> +    }
>> +
>>     rtc_state = rtc_init(0x70, i8259[8], 2000);
>>
>>     qemu_register_boot_set(pc_boot_set, rtc_state);
>> diff --git a/hw/pc.h b/hw/pc.h
>> index 885c918..0ae0493 100644
>> --- a/hw/pc.h
>> +++ b/hw/pc.h
>> @@ -185,4 +185,7 @@ void isa_ne2000_init(int base, qemu_irq irq,
>> NICInfo *nd);
>>
>> void extboot_init(BlockDriverState *bs, int cmd);
>>
>> +/* ivshmem.c */
>> +int pci_ivshmem_init(PCIBus *bus);
>> +
>> #endif
>> diff --git a/qemu-options.hx b/qemu-options.hx
>> index 173f458..9ab3e2d 100644
>> --- a/qemu-options.hx
>> +++ b/qemu-options.hx
>> @@ -1243,6 +1243,20 @@ The default device is @code{vc} in graphical
>> mode and
>> @code{stdio} in
>> non graphical mode.
>> ETEXI
>>
>> +DEF("ivshmem", HAS_ARG, QEMU_OPTION_ivshmem, \
>> +    "-ivshmem name,size[,unix:path][,server]  creates or opens a
>> shared file
>> 'name' of size \
>> +    'size' (in MB) and exposes it as a PCI device in the guest\n")
>> +STEXI
>> +...@item -ivshmem @var{file},@var{size}
>> +Creates a POSIX shared file named @var{file} of size @var{size} and
>> creates a
>> +PCI device of the same size that maps the shared file into the
>> device for
>> guests
>> +to access.  The created file on the host is located in /dev/shm/
>> +
>> +...@item unix:@var{path}[,server]
>> +A unix domain socket is used to send and receive interrupts between
>> VMs.  The
>> unix domain socket
>> +...@var{path} is used for connections.
>> +ETEXI
>> +
>> DEF("pidfile", HAS_ARG, QEMU_OPTION_pidfile, \
>>     "-pidfile file   write PID to 'file'\n")
>> STEXI
>> diff --git a/sysemu.h b/sysemu.h
>> index 1f45fd6..862b79e 100644
>> --- a/sysemu.h
>> +++ b/sysemu.h
>> @@ -217,6 +217,14 @@ extern CharDriverState
>> *parallel_hds[MAX_PARALLEL_PORTS];
>>
>> extern CharDriverState *virtcon_hds[MAX_VIRTIO_CONSOLES];
>>
>> +/* inter-VM shared memory devices */
>> +
>> +#define MAX_IVSHMEM_DEVICES 1
>> +
>> +extern CharDriverState * ivshmem_chardev;
>> +void ivshmem_init(const char * optarg);
>> +int ivshmem_get_size(void);
>> +
>> #define TFR(expr) do { if ((expr) != -1) break; } while (errno ==
>> EINTR)
>>
>> #ifdef NEED_CPU_H
>> diff --git a/vl.c b/vl.c
>> index 0420634..7260fa1 100644
>> --- a/vl.c
>> +++ b/vl.c
>> @@ -221,6 +221,7 @@ static int rtc_date_offset = -1; /* -1 means no
>> change */
>> int cirrus_vga_enabled = 1;
>> int std_vga_enabled = 0;
>> int vmsvga_enabled = 0;
>> +int ivshmem_enabled = 0;
>> int xenfb_enabled = 0;
>> #ifdef TARGET_SPARC
>> int graphic_width = 1024;
>> @@ -239,6 +240,8 @@ int no_quit = 0;
>> CharDriverState *serial_hds[MAX_SERIAL_PORTS];
>> CharDriverState *parallel_hds[MAX_PARALLEL_PORTS];
>> CharDriverState *virtcon_hds[MAX_VIRTIO_CONSOLES];
>> +CharDriverState *ivshmem_chardev;
>> +const char * ivshmem_device;
>> #ifdef TARGET_I386
>> int win2k_install_hack = 0;
>> int rtc_td_hack = 0;
>> @@ -5063,6 +5066,8 @@ int main(int argc, char **argv, char **envp)
>>     cyls = heads = secs = 0;
>>     translation = BIOS_ATA_TRANSLATION_AUTO;
>>     monitor_device = "vc:80Cx24C";
>> +    ivshmem_device = NULL;
>> +    ivshmem_chardev = NULL;
>>
>>     serial_devices[0] = "vc:80Cx24C";
>>     for(i = 1; i < MAX_SERIAL_PORTS; i++)
>> @@ -5518,6 +5523,10 @@ int main(int argc, char **argv, char **envp)
>>                 parallel_devices[parallel_device_index] = optarg;
>>                 parallel_device_index++;
>>                 break;
>> +            case QEMU_OPTION_ivshmem:
>> +                ivshmem_device = optarg;
>> +                ivshmem_enabled = 1;
>> +                break;
>>            case QEMU_OPTION_loadvm:
>>                loadvm = optarg;
>>                break;
>> @@ -5984,6 +5993,11 @@ int main(int argc, char **argv, char **envp)
>>            }
>>     }
>>
>> +    if (ivshmem_enabled) {
>> +        ivshmem_init(ivshmem_device);
>> +        ram_size += ivshmem_get_size();
>> +    }
>> +
>> #ifdef CONFIG_KQEMU
>>     /* FIXME: This is a nasty hack because kqemu can't cope with
>> dynamic
>>        guest ram allocation.  It needs to go away.  */
>> Thx,
>>
>> Venkat
> 
> 
> 
> -----------------------------------------------
> A. Cameron Macdonell
> Ph.D. Student
> Department of Computing Science
> University of Alberta
> cam@cs.ualberta.ca


^ permalink raw reply	[flat|nested] 21+ messages in thread

* RE: [PATCH v2] Shared memory device with interrupt support
  2009-05-16  3:30 ` Cam Macdonell
  2009-05-17 21:51   ` Avi Kivity
@ 2009-05-18 12:11   ` Kumar, Venkat
  2009-05-18 16:20     ` Cam Macdonell
  1 sibling, 1 reply; 21+ messages in thread
From: Kumar, Venkat @ 2009-05-18 12:11 UTC (permalink / raw)
  To: Cam Macdonell; +Cc: kvm@vger.kernel.org list

Cam - I got your patch to work but without notifications. I could share memory using the patch but notifications aren't working.

I bring up two VM's with option "-ivshmem shrmem,1024,/dev/shm/shrmem,server" and "-ivshmem shrmem,1024,/dev/shm/shrmem" respectively.

When I make an "ioctl" from one of the VM's to inject an interrupt to the other VM, I get an error in "qemu_chr_write" and return value is "-1". "write" call in "send_all" is failing with return value "-1".

Am I missing something here?

Thx,

Venkat


-----Original Message-----
From: Cam Macdonell [mailto:cam@cs.ualberta.ca]
Sent: Saturday, May 16, 2009 9:01 AM
To: Kumar, Venkat
Cc: kvm@vger.kernel.org list
Subject: Re: [PATCH v2] Shared memory device with interrupt support


On 15-May-09, at 8:54 PM, Kumar, Venkat wrote:

> Cam,
>
> A questions on interrupts as well.
> What is "unix:path" that needs to be passed in the argument list?
> Can it be any string?

It has to be a valid path on the host.  It will create a unix domain
socket on that path.

>
> If my understanding is correct both the VM's who wants to
> communicate would gives this path in the command line with one of
> them specifying as "server".

Exactly, the one with the "server" in the parameter list will wait for
a connection before booting.

Cam

>
> Thx,
> Venkat
>
>
>
>
>
>
>    Support an inter-vm shared memory device that maps a shared-
> memory object
> as a PCI device in the guest.  This patch also supports interrupts
> between
> guest by communicating over a unix domain socket.  This patch
> applies to the
> qemu-kvm repository.
>
> This device now creates a qemu character device and sends 1-bytes
> messages to
> trigger interrupts.  Writes are trigger by writing to the "Doorbell"
> register
> on the shared memory PCI device.  The lower 8-bits of the value
> written to this
> register are sent as the 1-byte message so different meanings of
> interrupts can
> be supported.
>
> Interrupts are only supported between 2 VMs currently.  One VM must
> act as the
> server by adding "server" to the command-line argument.  Shared
> memory devices
> are created with the following command-line:
>
> -ivhshmem <shm object>,<size in MB>,[unix:<path>][,server]
>
> Interrupts can also be used between host and guest as well by
> implementing a
> listener on the host.
>
> Cam
>
> ---
> Makefile.target |    3 +
> hw/ivshmem.c    |  421 ++++++++++++++++++++++++++++++++++++++++++++++
> +++++++++
> hw/pc.c         |    6 +
> hw/pc.h         |    3 +
> qemu-options.hx |   14 ++
> sysemu.h        |    8 +
> vl.c            |   14 ++
> 7 files changed, 469 insertions(+), 0 deletions(-)
> create mode 100644 hw/ivshmem.c
>
> diff --git a/Makefile.target b/Makefile.target
> index b68a689..3190bba 100644
> --- a/Makefile.target
> +++ b/Makefile.target
> @@ -643,6 +643,9 @@ OBJS += pcnet.o
> OBJS += rtl8139.o
> OBJS += e1000.o
>
> +# Inter-VM PCI shared memory
> +OBJS += ivshmem.o
> +
> # Generic watchdog support and some watchdog devices
> OBJS += watchdog.o
> OBJS += wdt_ib700.o wdt_i6300esb.o
> diff --git a/hw/ivshmem.c b/hw/ivshmem.c
> new file mode 100644
> index 0000000..95e2268
> --- /dev/null
> +++ b/hw/ivshmem.c
> @@ -0,0 +1,421 @@
> +/*
> + * Inter-VM Shared Memory PCI device.
> + *
> + * Author:
> + *      Cam Macdonell <c...@cs.ualberta.ca>
> + *
> + * Based On: cirrus_vga.c and rtl8139.c
> + *
> + * This code is licensed under the GNU GPL v2.
> + */
> +
> +#include "hw.h"
> +#include "console.h"
> +#include "pc.h"
> +#include "pci.h"
> +#include "sysemu.h"
> +
> +#include "qemu-common.h"
> +#include <sys/mman.h>
> +
> +#define PCI_COMMAND_IOACCESS                0x0001
> +#define PCI_COMMAND_MEMACCESS               0x0002
> +#define PCI_COMMAND_BUSMASTER               0x0004
> +
> +//#define DEBUG_IVSHMEM
> +
> +#ifdef DEBUG_IVSHMEM
> +#define IVSHMEM_DPRINTF(fmt, args...)        \
> +    do {printf("IVSHMEM: " fmt, ##args); } while (0)
> +#else
> +#define IVSHMEM_DPRINTF(fmt, args...)
> +#endif
> +
> +typedef struct IVShmemState {
> +    uint16_t intrmask;
> +    uint16_t intrstatus;
> +    uint16_t doorbell;
> +    uint8_t *ivshmem_ptr;
> +    unsigned long ivshmem_offset;
> +    unsigned int ivshmem_size;
> +    unsigned long bios_offset;
> +    unsigned int bios_size;
> +    target_phys_addr_t base_ctrl;
> +    int it_shift;
> +    PCIDevice *pci_dev;
> +    CharDriverState * chr;
> +    unsigned long map_addr;
> +    unsigned long map_end;
> +    int ivshmem_mmio_io_addr;
> +} IVShmemState;
> +
> +typedef struct PCI_IVShmemState {
> +    PCIDevice dev;
> +    IVShmemState ivshmem_state;
> +} PCI_IVShmemState;
> +
> +typedef struct IVShmemDesc {
> +    char name[1024];
> +    char * chrdev;
> +    int size;
> +} IVShmemDesc;
> +
> +
> +/* registers for the Inter-VM shared memory device */
> +enum ivshmem_registers {
> +    IntrMask = 0,
> +    IntrStatus = 16,
> +    Doorbell = 32
> +};
> +
> +static int num_ivshmem_devices = 0;
> +static IVShmemDesc ivshmem_desc;
> +
> +static void ivshmem_map(PCIDevice *pci_dev, int region_num,
> +                    uint32_t addr, uint32_t size, int type)
> +{
> +    PCI_IVShmemState *d = (PCI_IVShmemState *)pci_dev;
> +    IVShmemState *s = &d->ivshmem_state;
> +
> +    IVSHMEM_DPRINTF("addr = %u size = %u\n", addr, size);
> +    cpu_register_physical_memory(addr, s->ivshmem_size, s-
> >ivshmem_offset);
> +
> +}
> +
> +void ivshmem_init(const char * optarg) {
> +
> +    char * temp;
> +    char * ivshmem_sz;
> +    int size;
> +
> +    num_ivshmem_devices++;
> +
> +    /* currently we only support 1 device */
> +    if (num_ivshmem_devices > MAX_IVSHMEM_DEVICES) {
> +        return;
> +    }
> +
> +    temp = strdup(optarg);
> +    snprintf(ivshmem_desc.name, 1024, "/%s", strsep(&temp,","));
> +    ivshmem_sz=strsep(&temp,",");
> +    if (ivshmem_sz != NULL){
> +        size = atol(ivshmem_sz);
> +    } else {
> +        size = -1;
> +    }
> +
> +    ivshmem_desc.chrdev = strsep(&temp,"\0");
> +
> +    if ( size == -1) {
> +        ivshmem_desc.size = TARGET_PAGE_SIZE;
> +    } else {
> +        ivshmem_desc.size = size*1024*1024;
> +    }
> +    IVSHMEM_DPRINTF("optarg is %s, name is %s, size is %d, chrdev
> is %s\n",
> +                                        optarg, ivshmem_desc.name,
> +                                        ivshmem_desc.size,
> ivshmem_desc.chrdev);
> +}
> +
> +int ivshmem_get_size(void) {
> +    return ivshmem_desc.size;
> +}
> +
> +/* accessing registers - based on rtl8139 */
> +static void ivshmem_update_irq(IVShmemState *s)
> +{
> +    int isr;
> +    isr = (s->intrstatus & s->intrmask) & 0xffff;
> +
> +    /* don't print ISR resets */
> +    if (isr) {
> +        IVSHMEM_DPRINTF("Set IRQ to %d (%04x %04x)\n",
> +           isr ? 1 : 0, s->intrstatus, s->intrmask);
> +    }
> +
> +    qemu_set_irq(s->pci_dev->irq[0], (isr != 0));
> +}
> +
> +static void ivshmem_mmio_map(PCIDevice *pci_dev, int region_num,
> +                       uint32_t addr, uint32_t size, int type)
> +{
> +    PCI_IVShmemState *d = (PCI_IVShmemState *)pci_dev;
> +    IVShmemState *s = &d->ivshmem_state;
> +
> +    cpu_register_physical_memory(addr + 0, 0x100, s-
> >ivshmem_mmio_io_addr);
> +}
> +
> +static void ivshmem_IntrMask_write(IVShmemState *s, uint32_t val)
> +{
> +    IVSHMEM_DPRINTF("IntrMask write(w) val = 0x%04x\n", val);
> +
> +    s->intrmask = val;
> +
> +    ivshmem_update_irq(s);
> +}
> +
> +static uint32_t ivshmem_IntrMask_read(IVShmemState *s)
> +{
> +    uint32_t ret = s->intrmask;
> +
> +    IVSHMEM_DPRINTF("intrmask read(w) val = 0x%04x\n", ret);
> +
> +    return ret;
> +}
> +
> +static void ivshmem_IntrStatus_write(IVShmemState *s, uint32_t val)
> +{
> +    IVSHMEM_DPRINTF("IntrStatus write(w) val = 0x%04x\n", val);
> +
> +    s->intrstatus = val;
> +
> +    ivshmem_update_irq(s);
> +    return;
> +}
> +
> +static uint32_t ivshmem_IntrStatus_read(IVShmemState *s)
> +{
> +    uint32_t ret = s->intrstatus;
> +
> +    /* reading ISR clears all interrupts */
> +    s->intrstatus = 0;
> +
> +    ivshmem_update_irq(s);
> +
> +    return ret;
> +}
> +
> +static void ivshmem_io_writew(void *opaque, uint8_t addr, uint32_t
> val)
> +{
> +    IVShmemState *s = opaque;
> +
> +    IVSHMEM_DPRINTF("writing 0x%x to 0x%lx\n", addr, (unsigned
> long) opaque);
> +
> +    addr &= 0xfe;
> +
> +    switch (addr)
> +    {
> +        case IntrMask:
> +            ivshmem_IntrMask_write(s, val);
> +            break;
> +
> +        case IntrStatus:
> +            ivshmem_IntrStatus_write(s, val);
> +            break;
> +
> +        default:
> +            IVSHMEM_DPRINTF("why are we writing 0x%x\n", addr);
> +    }
> +}
> +
> +static void ivshmem_io_writel(void *opaque, uint8_t addr, uint32_t
> val)
> +{
> +    IVSHMEM_DPRINTF("We shouldn't be writing longs\n");
> +}
> +
> +static void ivshmem_io_writeb(void *opaque, uint8_t addr, uint32_t
> val)
> +{
> +    IVShmemState *s = opaque;
> +    uint8_t writebyte = val & 0xff; //write the lower 8-bits of 'val'
> +
> +    switch (addr)
> +    {   // in future, we will probably want to support more types
> of doorbells
> +        case Doorbell:
> +            // wake up the other side
> +            qemu_chr_write(s->chr, &writebyte, 1);
> +            IVSHMEM_DPRINTF("Writing to the other side 0x%x\n",
> writebyte);
> +            break;
> +        default:
> +            IVSHMEM_DPRINTF("Unhandled write (0x%x)\n", addr);
> +    }
> +}
> +
> +static uint32_t ivshmem_io_readw(void *opaque, uint8_t addr)
> +{
> +
> +    IVShmemState *s = opaque;
> +    uint32_t ret;
> +
> +    switch (addr)
> +    {
> +        case IntrMask:
> +            ret = ivshmem_IntrMask_read(s);
> +            break;
> +        case IntrStatus:
> +            ret = ivshmem_IntrStatus_read(s);
> +            break;
> +        default:
> +            IVSHMEM_DPRINTF("why are we reading 0x%x\n", addr);
> +            ret = 0;
> +    }
> +
> +    return ret;
> +}
> +
> +static uint32_t ivshmem_io_readl(void *opaque, uint8_t addr)
> +{
> +    IVSHMEM_DPRINTF("We shouldn't be reading longs\n");
> +    return 0;
> +}
> +
> +static uint32_t ivshmem_io_readb(void *opaque, uint8_t addr)
> +{
> +    IVSHMEM_DPRINTF("We shouldn't be reading bytes\n");
> +
> +    return 0;
> +}
> +
> +static void ivshmem_mmio_writeb(void *opaque,
> +                                target_phys_addr_t addr, uint32_t
> val)
> +{
> +    ivshmem_io_writeb(opaque, addr & 0xFF, val);
> +}
> +
> +static void ivshmem_mmio_writew(void *opaque,
> +                                target_phys_addr_t addr, uint32_t
> val)
> +{
> +    ivshmem_io_writew(opaque, addr & 0xFF, val);
> +}
> +
> +static void ivshmem_mmio_writel(void *opaque,
> +                                target_phys_addr_t addr, uint32_t
> val)
> +{
> +    ivshmem_io_writel(opaque, addr & 0xFF, val);
> +}
> +
> +static uint32_t ivshmem_mmio_readb(void *opaque, target_phys_addr_t
> addr)
> +{
> +    return ivshmem_io_readb(opaque, addr & 0xFF);
> +}
> +
> +static uint32_t ivshmem_mmio_readw(void *opaque, target_phys_addr_t
> addr)
> +{
> +    uint32_t val = ivshmem_io_readw(opaque, addr & 0xFF);
> +    return val;
> +}
> +
> +static uint32_t ivshmem_mmio_readl(void *opaque, target_phys_addr_t
> addr)
> +{
> +    uint32_t val = ivshmem_io_readl(opaque, addr & 0xFF);
> +    return val;
> +}
> +
> +static CPUReadMemoryFunc *ivshmem_mmio_read[3] = {
> +    ivshmem_mmio_readb,
> +    ivshmem_mmio_readw,
> +    ivshmem_mmio_readl,
> +};
> +
> +static CPUWriteMemoryFunc *ivshmem_mmio_write[3] = {
> +    ivshmem_mmio_writeb,
> +    ivshmem_mmio_writew,
> +    ivshmem_mmio_writel,
> +};
> +
> +static int ivshmem_can_receive(void * opaque)
> +{
> +    return 1;
> +}
> +
> +static void ivshmem_receive(void *opaque, const uint8_t *buf, int
> size)
> +{
> +    IVShmemState *s = opaque;
> +
> +    ivshmem_IntrStatus_write(s, *buf);
> +
> +    IVSHMEM_DPRINTF("ivshmem_receive 0x%02x\n", *buf);
> +}
> +
> +static void ivshmem_event(void *opaque, int event)
> +{
> +    IVShmemState *s = opaque;
> +    IVSHMEM_DPRINTF("ivshmem_event %d\n", event);
> +}
> +
> +int pci_ivshmem_init(PCIBus *bus)
> +{
> +    PCI_IVShmemState *d;
> +    IVShmemState *s;
> +    uint8_t *pci_conf;
> +    int ivshmem_fd;
> +
> +    IVSHMEM_DPRINTF("shared file is %s\n", ivshmem_desc.name);
> +    d = (PCI_IVShmemState *)pci_register_device(bus, "kvm_ivshmem",
> +                                           sizeof(PCI_IVShmemState),
> +                                           -1, NULL, NULL);
> +    if (!d) {
> +        return -1;
> +    }
> +
> +    s = &d->ivshmem_state;
> +
> +    /* allocate shared memory RAM */
> +    s->ivshmem_offset = qemu_ram_alloc(ivshmem_desc.size);
> +    IVSHMEM_DPRINTF("size is = %d\n", ivshmem_desc.size);
> +    IVSHMEM_DPRINTF("ivshmem ram offset = %ld\n", s->ivshmem_offset);
> +
> +    s->ivshmem_ptr = qemu_get_ram_ptr(s->ivshmem_offset);
> +
> +    s->pci_dev = &d->dev;
> +    s->ivshmem_size = ivshmem_desc.size;
> +
> +    pci_conf = d->dev.config;
> +    pci_conf[0x00] = 0xf4; // Qumranet vendor ID 0x5002
> +    pci_conf[0x01] = 0x1a;
> +    pci_conf[0x02] = 0x10;
> +    pci_conf[0x03] = 0x11;
> +    pci_conf[0x04] = PCI_COMMAND_IOACCESS | PCI_COMMAND_MEMACCESS;
> +    pci_conf[0x0a] = 0x00; // RAM controller
> +    pci_conf[0x0b] = 0x05;
> +    pci_conf[0x0e] = 0x00; // header_type
> +
> +    pci_conf[PCI_INTERRUPT_PIN] = 1; // we are going to support
> interrupts
> +
> +    /* XXX: ivshmem_desc.size must be a power of two */
> +
> +    s->ivshmem_mmio_io_addr = cpu_register_io_memory(0,
> ivshmem_mmio_read,
> +                                    ivshmem_mmio_write, s);
> +
> +    /* region for registers*/
> +    pci_register_io_region(&d->dev, 0, 0x100,
> +                           PCI_ADDRESS_SPACE_MEM, ivshmem_mmio_map);
> +
> +    /* region for shared memory */
> +    pci_register_io_region(&d->dev, 1, ivshmem_desc.size,
> +                           PCI_ADDRESS_SPACE_MEM, ivshmem_map);
> +
> +    /* open shared memory file  */
> +    if ((ivshmem_fd = shm_open(ivshmem_desc.name, O_CREAT|O_RDWR,
> S_IRWXU)) <
> 0)
> +    {
> +        fprintf(stderr, "kvm_ivshmem: could not open shared file\n");
> +        exit(-1);
> +    }
> +
> +    ftruncate(ivshmem_fd, ivshmem_desc.size);
> +
> +    /* mmap onto PCI device's memory */
> +    if (mmap(s->ivshmem_ptr, ivshmem_desc.size, PROT_READ|PROT_WRITE,
> +                        MAP_SHARED|MAP_FIXED, ivshmem_fd, 0) ==
> MAP_FAILED)
> +    {
> +        fprintf(stderr, "kvm_ivshmem: could not mmap shared file\n");
> +        exit(-1);
> +    }
> +
> +    IVSHMEM_DPRINTF("shared object mapped to 0x%p\n", s-
> >ivshmem_ptr);
> +
> +    /* setup character device channel */
> +
> +    if (ivshmem_desc.chrdev != NULL) {
> +        char label[32];
> +        snprintf(label, 32, "ivshmem_chardev");
> +        s->chr = qemu_chr_open(label, ivshmem_desc.chrdev, NULL);
> +        if (s->chr == NULL) {
> +            fprintf(stderr, "No server listening on %s\n",
> ivshmem_desc.chrdev);
> +            exit(-1);
> +        }
> +        qemu_chr_add_handlers(s->chr, ivshmem_can_receive,
> ivshmem_receive,
> +                          ivshmem_event, s);
> +    }
> +
> +    return 0;
> +}
> +
> diff --git a/hw/pc.c b/hw/pc.c
> index 34a4d25..7d0cff2 100644
> --- a/hw/pc.c
> +++ b/hw/pc.c
> @@ -67,6 +67,8 @@ static PITState *pit;
> static IOAPICState *ioapic;
> static PCIDevice *i440fx_state;
>
> +extern int ivshmem_enabled;
> +
> static void ioport80_write(void *opaque, uint32_t addr, uint32_t data)
> {
> }
> @@ -1040,6 +1042,10 @@ static void pc_init1(ram_addr_t ram_size, int
> vga_ram_size,
>         }
>     }
>
> +    if (pci_enabled && ivshmem_enabled) {
> +        pci_ivshmem_init(pci_bus);
> +    }
> +
>     rtc_state = rtc_init(0x70, i8259[8], 2000);
>
>     qemu_register_boot_set(pc_boot_set, rtc_state);
> diff --git a/hw/pc.h b/hw/pc.h
> index 885c918..0ae0493 100644
> --- a/hw/pc.h
> +++ b/hw/pc.h
> @@ -185,4 +185,7 @@ void isa_ne2000_init(int base, qemu_irq irq,
> NICInfo *nd);
>
> void extboot_init(BlockDriverState *bs, int cmd);
>
> +/* ivshmem.c */
> +int pci_ivshmem_init(PCIBus *bus);
> +
> #endif
> diff --git a/qemu-options.hx b/qemu-options.hx
> index 173f458..9ab3e2d 100644
> --- a/qemu-options.hx
> +++ b/qemu-options.hx
> @@ -1243,6 +1243,20 @@ The default device is @code{vc} in graphical
> mode and
> @code{stdio} in
> non graphical mode.
> ETEXI
>
> +DEF("ivshmem", HAS_ARG, QEMU_OPTION_ivshmem, \
> +    "-ivshmem name,size[,unix:path][,server]  creates or opens a
> shared file
> 'name' of size \
> +    'size' (in MB) and exposes it as a PCI device in the guest\n")
> +STEXI
> +...@item -ivshmem @var{file},@var{size}
> +Creates a POSIX shared file named @var{file} of size @var{size} and
> creates a
> +PCI device of the same size that maps the shared file into the
> device for
> guests
> +to access.  The created file on the host is located in /dev/shm/
> +
> +...@item unix:@var{path}[,server]
> +A unix domain socket is used to send and receive interrupts between
> VMs.  The
> unix domain socket
> +...@var{path} is used for connections.
> +ETEXI
> +
> DEF("pidfile", HAS_ARG, QEMU_OPTION_pidfile, \
>     "-pidfile file   write PID to 'file'\n")
> STEXI
> diff --git a/sysemu.h b/sysemu.h
> index 1f45fd6..862b79e 100644
> --- a/sysemu.h
> +++ b/sysemu.h
> @@ -217,6 +217,14 @@ extern CharDriverState
> *parallel_hds[MAX_PARALLEL_PORTS];
>
> extern CharDriverState *virtcon_hds[MAX_VIRTIO_CONSOLES];
>
> +/* inter-VM shared memory devices */
> +
> +#define MAX_IVSHMEM_DEVICES 1
> +
> +extern CharDriverState * ivshmem_chardev;
> +void ivshmem_init(const char * optarg);
> +int ivshmem_get_size(void);
> +
> #define TFR(expr) do { if ((expr) != -1) break; } while (errno ==
> EINTR)
>
> #ifdef NEED_CPU_H
> diff --git a/vl.c b/vl.c
> index 0420634..7260fa1 100644
> --- a/vl.c
> +++ b/vl.c
> @@ -221,6 +221,7 @@ static int rtc_date_offset = -1; /* -1 means no
> change */
> int cirrus_vga_enabled = 1;
> int std_vga_enabled = 0;
> int vmsvga_enabled = 0;
> +int ivshmem_enabled = 0;
> int xenfb_enabled = 0;
> #ifdef TARGET_SPARC
> int graphic_width = 1024;
> @@ -239,6 +240,8 @@ int no_quit = 0;
> CharDriverState *serial_hds[MAX_SERIAL_PORTS];
> CharDriverState *parallel_hds[MAX_PARALLEL_PORTS];
> CharDriverState *virtcon_hds[MAX_VIRTIO_CONSOLES];
> +CharDriverState *ivshmem_chardev;
> +const char * ivshmem_device;
> #ifdef TARGET_I386
> int win2k_install_hack = 0;
> int rtc_td_hack = 0;
> @@ -5063,6 +5066,8 @@ int main(int argc, char **argv, char **envp)
>     cyls = heads = secs = 0;
>     translation = BIOS_ATA_TRANSLATION_AUTO;
>     monitor_device = "vc:80Cx24C";
> +    ivshmem_device = NULL;
> +    ivshmem_chardev = NULL;
>
>     serial_devices[0] = "vc:80Cx24C";
>     for(i = 1; i < MAX_SERIAL_PORTS; i++)
> @@ -5518,6 +5523,10 @@ int main(int argc, char **argv, char **envp)
>                 parallel_devices[parallel_device_index] = optarg;
>                 parallel_device_index++;
>                 break;
> +            case QEMU_OPTION_ivshmem:
> +                ivshmem_device = optarg;
> +                ivshmem_enabled = 1;
> +                break;
>            case QEMU_OPTION_loadvm:
>                loadvm = optarg;
>                break;
> @@ -5984,6 +5993,11 @@ int main(int argc, char **argv, char **envp)
>            }
>     }
>
> +    if (ivshmem_enabled) {
> +        ivshmem_init(ivshmem_device);
> +        ram_size += ivshmem_get_size();
> +    }
> +
> #ifdef CONFIG_KQEMU
>     /* FIXME: This is a nasty hack because kqemu can't cope with
> dynamic
>        guest ram allocation.  It needs to go away.  */
> Thx,
>
> Venkat



-----------------------------------------------
A. Cameron Macdonell
Ph.D. Student
Department of Computing Science
University of Alberta
cam@cs.ualberta.ca




^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2] Shared memory device with interrupt support
  2009-05-18 11:12     ` Gregory Haskins
@ 2009-05-18 11:38       ` Avi Kivity
  0 siblings, 0 replies; 21+ messages in thread
From: Avi Kivity @ 2009-05-18 11:38 UTC (permalink / raw)
  To: Gregory Haskins; +Cc: Cam Macdonell, Kumar, Venkat, kvm@vger.kernel.org list

Gregory Haskins wrote:
> I'll just add that you could tie the irqfd to an iosignalfd to eliminate
> the involvement of qemu on either side as well.  I'm not sure if that
> really works with the design of this particular device (e.g. perhaps
> qemu is needed for other reasons besides signaling), but it is a neat
> demonstration of the flexibility of the newly emerging kvm-eventfd
> interfaces.
>   

If we have an iosignalfd for point-to-point (say, a pio port with the 
guest ID) we can do direct guest-to-guest signalling.  For broadcast or 
multicast, we need to exit to qemu to handle the loop.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2] Shared memory device with interrupt support
  2009-05-17 21:51   ` Avi Kivity
@ 2009-05-18 11:12     ` Gregory Haskins
  2009-05-18 11:38       ` Avi Kivity
  2009-05-18 16:50     ` Cam Macdonell
  1 sibling, 1 reply; 21+ messages in thread
From: Gregory Haskins @ 2009-05-18 11:12 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Cam Macdonell, Kumar, Venkat, kvm@vger.kernel.org list

[-- Attachment #1: Type: text/plain, Size: 1673 bytes --]

Avi Kivity wrote:
> Cam Macdonell wrote:
>>>
>>> If my understanding is correct both the VM's who wants to
>>> communicate would gives this path in the command line with one of
>>> them specifying as "server".
>>
>> Exactly, the one with the "server" in the parameter list will wait
>> for a connection before booting.
>
> hm, we may be able to eliminate the server from the fast path, at the
> cost of some complexity.
>
> When a guest connects to the server, the server creates an eventfd and
> passes using SCM_RIGHTS to all other connected guests.  The server
> also passes the eventfds of currently connected guests to the new
> guest.  From now on, the server does not participate in anything; when
> a quest wants to send an interrupt to one or more other guests, its
> qemu just writes to the eventfds() of the corresponding guests; their
> qemus will inject the interrupt, without any server involvement.
>
> Now, anyone who has been paying attention will have their alarms going
> off at the word eventfd.  And yes, if the host supports irqfd, the
> various qemus can associate those eventfds with an irq and pretty much
> forget about them.  When a qemu triggers an irqfd, the interrupt will
> be injected directly without the target qemu's involvement.

I'll just add that you could tie the irqfd to an iosignalfd to eliminate
the involvement of qemu on either side as well.  I'm not sure if that
really works with the design of this particular device (e.g. perhaps
qemu is needed for other reasons besides signaling), but it is a neat
demonstration of the flexibility of the newly emerging kvm-eventfd
interfaces.

-Greg



[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 266 bytes --]

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2] Shared memory device with interrupt support
  2009-05-16  3:30 ` Cam Macdonell
@ 2009-05-17 21:51   ` Avi Kivity
  2009-05-18 11:12     ` Gregory Haskins
  2009-05-18 16:50     ` Cam Macdonell
  2009-05-18 12:11   ` Kumar, Venkat
  1 sibling, 2 replies; 21+ messages in thread
From: Avi Kivity @ 2009-05-17 21:51 UTC (permalink / raw)
  To: Cam Macdonell; +Cc: Kumar, Venkat, kvm@vger.kernel.org list, Gregory Haskins

Cam Macdonell wrote:
>>
>> If my understanding is correct both the VM's who wants to communicate 
>> would gives this path in the command line with one of them specifying 
>> as "server".
>
> Exactly, the one with the "server" in the parameter list will wait for 
> a connection before booting.

hm, we may be able to eliminate the server from the fast path, at the 
cost of some complexity.

When a guest connects to the server, the server creates an eventfd and 
passes using SCM_RIGHTS to all other connected guests.  The server also 
passes the eventfds of currently connected guests to the new guest.  
 From now on, the server does not participate in anything; when a quest 
wants to send an interrupt to one or more other guests, its qemu just 
writes to the eventfds() of the corresponding guests; their qemus will 
inject the interrupt, without any server involvement.

Now, anyone who has been paying attention will have their alarms going 
off at the word eventfd.  And yes, if the host supports irqfd, the 
various qemus can associate those eventfds with an irq and pretty much 
forget about them.  When a qemu triggers an irqfd, the interrupt will be 
injected directly without the target qemu's involvement.

I like it.

-- 
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2] Shared memory device with interrupt support
       [not found] <3D9CB4061D1EB3408D4A0B910433453C030BCA8892@inbmail01.lsi.com>
@ 2009-05-16  3:30 ` Cam Macdonell
  2009-05-17 21:51   ` Avi Kivity
  2009-05-18 12:11   ` Kumar, Venkat
  0 siblings, 2 replies; 21+ messages in thread
From: Cam Macdonell @ 2009-05-16  3:30 UTC (permalink / raw)
  To: Kumar, Venkat; +Cc: kvm@vger.kernel.org list


On 15-May-09, at 8:54 PM, Kumar, Venkat wrote:

> Cam,
>
> A questions on interrupts as well.
> What is "unix:path" that needs to be passed in the argument list?
> Can it be any string?

It has to be a valid path on the host.  It will create a unix domain  
socket on that path.

>
> If my understanding is correct both the VM's who wants to  
> communicate would gives this path in the command line with one of  
> them specifying as "server".

Exactly, the one with the "server" in the parameter list will wait for  
a connection before booting.

Cam

>
> Thx,
> Venkat
>
>
>
>
>
>
>    Support an inter-vm shared memory device that maps a shared- 
> memory object
> as a PCI device in the guest.  This patch also supports interrupts  
> between
> guest by communicating over a unix domain socket.  This patch  
> applies to the
> qemu-kvm repository.
>
> This device now creates a qemu character device and sends 1-bytes  
> messages to
> trigger interrupts.  Writes are trigger by writing to the "Doorbell"  
> register
> on the shared memory PCI device.  The lower 8-bits of the value  
> written to this
> register are sent as the 1-byte message so different meanings of  
> interrupts can
> be supported.
>
> Interrupts are only supported between 2 VMs currently.  One VM must  
> act as the
> server by adding "server" to the command-line argument.  Shared  
> memory devices
> are created with the following command-line:
>
> -ivhshmem <shm object>,<size in MB>,[unix:<path>][,server]
>
> Interrupts can also be used between host and guest as well by  
> implementing a
> listener on the host.
>
> Cam
>
> ---
> Makefile.target |    3 +
> hw/ivshmem.c    |  421 ++++++++++++++++++++++++++++++++++++++++++++++ 
> +++++++++
> hw/pc.c         |    6 +
> hw/pc.h         |    3 +
> qemu-options.hx |   14 ++
> sysemu.h        |    8 +
> vl.c            |   14 ++
> 7 files changed, 469 insertions(+), 0 deletions(-)
> create mode 100644 hw/ivshmem.c
>
> diff --git a/Makefile.target b/Makefile.target
> index b68a689..3190bba 100644
> --- a/Makefile.target
> +++ b/Makefile.target
> @@ -643,6 +643,9 @@ OBJS += pcnet.o
> OBJS += rtl8139.o
> OBJS += e1000.o
>
> +# Inter-VM PCI shared memory
> +OBJS += ivshmem.o
> +
> # Generic watchdog support and some watchdog devices
> OBJS += watchdog.o
> OBJS += wdt_ib700.o wdt_i6300esb.o
> diff --git a/hw/ivshmem.c b/hw/ivshmem.c
> new file mode 100644
> index 0000000..95e2268
> --- /dev/null
> +++ b/hw/ivshmem.c
> @@ -0,0 +1,421 @@
> +/*
> + * Inter-VM Shared Memory PCI device.
> + *
> + * Author:
> + *      Cam Macdonell <c...@cs.ualberta.ca>
> + *
> + * Based On: cirrus_vga.c and rtl8139.c
> + *
> + * This code is licensed under the GNU GPL v2.
> + */
> +
> +#include "hw.h"
> +#include "console.h"
> +#include "pc.h"
> +#include "pci.h"
> +#include "sysemu.h"
> +
> +#include "qemu-common.h"
> +#include <sys/mman.h>
> +
> +#define PCI_COMMAND_IOACCESS                0x0001
> +#define PCI_COMMAND_MEMACCESS               0x0002
> +#define PCI_COMMAND_BUSMASTER               0x0004
> +
> +//#define DEBUG_IVSHMEM
> +
> +#ifdef DEBUG_IVSHMEM
> +#define IVSHMEM_DPRINTF(fmt, args...)        \
> +    do {printf("IVSHMEM: " fmt, ##args); } while (0)
> +#else
> +#define IVSHMEM_DPRINTF(fmt, args...)
> +#endif
> +
> +typedef struct IVShmemState {
> +    uint16_t intrmask;
> +    uint16_t intrstatus;
> +    uint16_t doorbell;
> +    uint8_t *ivshmem_ptr;
> +    unsigned long ivshmem_offset;
> +    unsigned int ivshmem_size;
> +    unsigned long bios_offset;
> +    unsigned int bios_size;
> +    target_phys_addr_t base_ctrl;
> +    int it_shift;
> +    PCIDevice *pci_dev;
> +    CharDriverState * chr;
> +    unsigned long map_addr;
> +    unsigned long map_end;
> +    int ivshmem_mmio_io_addr;
> +} IVShmemState;
> +
> +typedef struct PCI_IVShmemState {
> +    PCIDevice dev;
> +    IVShmemState ivshmem_state;
> +} PCI_IVShmemState;
> +
> +typedef struct IVShmemDesc {
> +    char name[1024];
> +    char * chrdev;
> +    int size;
> +} IVShmemDesc;
> +
> +
> +/* registers for the Inter-VM shared memory device */
> +enum ivshmem_registers {
> +    IntrMask = 0,
> +    IntrStatus = 16,
> +    Doorbell = 32
> +};
> +
> +static int num_ivshmem_devices = 0;
> +static IVShmemDesc ivshmem_desc;
> +
> +static void ivshmem_map(PCIDevice *pci_dev, int region_num,
> +                    uint32_t addr, uint32_t size, int type)
> +{
> +    PCI_IVShmemState *d = (PCI_IVShmemState *)pci_dev;
> +    IVShmemState *s = &d->ivshmem_state;
> +
> +    IVSHMEM_DPRINTF("addr = %u size = %u\n", addr, size);
> +    cpu_register_physical_memory(addr, s->ivshmem_size, s- 
> >ivshmem_offset);
> +
> +}
> +
> +void ivshmem_init(const char * optarg) {
> +
> +    char * temp;
> +    char * ivshmem_sz;
> +    int size;
> +
> +    num_ivshmem_devices++;
> +
> +    /* currently we only support 1 device */
> +    if (num_ivshmem_devices > MAX_IVSHMEM_DEVICES) {
> +        return;
> +    }
> +
> +    temp = strdup(optarg);
> +    snprintf(ivshmem_desc.name, 1024, "/%s", strsep(&temp,","));
> +    ivshmem_sz=strsep(&temp,",");
> +    if (ivshmem_sz != NULL){
> +        size = atol(ivshmem_sz);
> +    } else {
> +        size = -1;
> +    }
> +
> +    ivshmem_desc.chrdev = strsep(&temp,"\0");
> +
> +    if ( size == -1) {
> +        ivshmem_desc.size = TARGET_PAGE_SIZE;
> +    } else {
> +        ivshmem_desc.size = size*1024*1024;
> +    }
> +    IVSHMEM_DPRINTF("optarg is %s, name is %s, size is %d, chrdev  
> is %s\n",
> +                                        optarg, ivshmem_desc.name,
> +                                        ivshmem_desc.size,
> ivshmem_desc.chrdev);
> +}
> +
> +int ivshmem_get_size(void) {
> +    return ivshmem_desc.size;
> +}
> +
> +/* accessing registers - based on rtl8139 */
> +static void ivshmem_update_irq(IVShmemState *s)
> +{
> +    int isr;
> +    isr = (s->intrstatus & s->intrmask) & 0xffff;
> +
> +    /* don't print ISR resets */
> +    if (isr) {
> +        IVSHMEM_DPRINTF("Set IRQ to %d (%04x %04x)\n",
> +           isr ? 1 : 0, s->intrstatus, s->intrmask);
> +    }
> +
> +    qemu_set_irq(s->pci_dev->irq[0], (isr != 0));
> +}
> +
> +static void ivshmem_mmio_map(PCIDevice *pci_dev, int region_num,
> +                       uint32_t addr, uint32_t size, int type)
> +{
> +    PCI_IVShmemState *d = (PCI_IVShmemState *)pci_dev;
> +    IVShmemState *s = &d->ivshmem_state;
> +
> +    cpu_register_physical_memory(addr + 0, 0x100, s- 
> >ivshmem_mmio_io_addr);
> +}
> +
> +static void ivshmem_IntrMask_write(IVShmemState *s, uint32_t val)
> +{
> +    IVSHMEM_DPRINTF("IntrMask write(w) val = 0x%04x\n", val);
> +
> +    s->intrmask = val;
> +
> +    ivshmem_update_irq(s);
> +}
> +
> +static uint32_t ivshmem_IntrMask_read(IVShmemState *s)
> +{
> +    uint32_t ret = s->intrmask;
> +
> +    IVSHMEM_DPRINTF("intrmask read(w) val = 0x%04x\n", ret);
> +
> +    return ret;
> +}
> +
> +static void ivshmem_IntrStatus_write(IVShmemState *s, uint32_t val)
> +{
> +    IVSHMEM_DPRINTF("IntrStatus write(w) val = 0x%04x\n", val);
> +
> +    s->intrstatus = val;
> +
> +    ivshmem_update_irq(s);
> +    return;
> +}
> +
> +static uint32_t ivshmem_IntrStatus_read(IVShmemState *s)
> +{
> +    uint32_t ret = s->intrstatus;
> +
> +    /* reading ISR clears all interrupts */
> +    s->intrstatus = 0;
> +
> +    ivshmem_update_irq(s);
> +
> +    return ret;
> +}
> +
> +static void ivshmem_io_writew(void *opaque, uint8_t addr, uint32_t  
> val)
> +{
> +    IVShmemState *s = opaque;
> +
> +    IVSHMEM_DPRINTF("writing 0x%x to 0x%lx\n", addr, (unsigned  
> long) opaque);
> +
> +    addr &= 0xfe;
> +
> +    switch (addr)
> +    {
> +        case IntrMask:
> +            ivshmem_IntrMask_write(s, val);
> +            break;
> +
> +        case IntrStatus:
> +            ivshmem_IntrStatus_write(s, val);
> +            break;
> +
> +        default:
> +            IVSHMEM_DPRINTF("why are we writing 0x%x\n", addr);
> +    }
> +}
> +
> +static void ivshmem_io_writel(void *opaque, uint8_t addr, uint32_t  
> val)
> +{
> +    IVSHMEM_DPRINTF("We shouldn't be writing longs\n");
> +}
> +
> +static void ivshmem_io_writeb(void *opaque, uint8_t addr, uint32_t  
> val)
> +{
> +    IVShmemState *s = opaque;
> +    uint8_t writebyte = val & 0xff; //write the lower 8-bits of 'val'
> +
> +    switch (addr)
> +    {   // in future, we will probably want to support more types  
> of doorbells
> +        case Doorbell:
> +            // wake up the other side
> +            qemu_chr_write(s->chr, &writebyte, 1);
> +            IVSHMEM_DPRINTF("Writing to the other side 0x%x\n",  
> writebyte);
> +            break;
> +        default:
> +            IVSHMEM_DPRINTF("Unhandled write (0x%x)\n", addr);
> +    }
> +}
> +
> +static uint32_t ivshmem_io_readw(void *opaque, uint8_t addr)
> +{
> +
> +    IVShmemState *s = opaque;
> +    uint32_t ret;
> +
> +    switch (addr)
> +    {
> +        case IntrMask:
> +            ret = ivshmem_IntrMask_read(s);
> +            break;
> +        case IntrStatus:
> +            ret = ivshmem_IntrStatus_read(s);
> +            break;
> +        default:
> +            IVSHMEM_DPRINTF("why are we reading 0x%x\n", addr);
> +            ret = 0;
> +    }
> +
> +    return ret;
> +}
> +
> +static uint32_t ivshmem_io_readl(void *opaque, uint8_t addr)
> +{
> +    IVSHMEM_DPRINTF("We shouldn't be reading longs\n");
> +    return 0;
> +}
> +
> +static uint32_t ivshmem_io_readb(void *opaque, uint8_t addr)
> +{
> +    IVSHMEM_DPRINTF("We shouldn't be reading bytes\n");
> +
> +    return 0;
> +}
> +
> +static void ivshmem_mmio_writeb(void *opaque,
> +                                target_phys_addr_t addr, uint32_t  
> val)
> +{
> +    ivshmem_io_writeb(opaque, addr & 0xFF, val);
> +}
> +
> +static void ivshmem_mmio_writew(void *opaque,
> +                                target_phys_addr_t addr, uint32_t  
> val)
> +{
> +    ivshmem_io_writew(opaque, addr & 0xFF, val);
> +}
> +
> +static void ivshmem_mmio_writel(void *opaque,
> +                                target_phys_addr_t addr, uint32_t  
> val)
> +{
> +    ivshmem_io_writel(opaque, addr & 0xFF, val);
> +}
> +
> +static uint32_t ivshmem_mmio_readb(void *opaque, target_phys_addr_t  
> addr)
> +{
> +    return ivshmem_io_readb(opaque, addr & 0xFF);
> +}
> +
> +static uint32_t ivshmem_mmio_readw(void *opaque, target_phys_addr_t  
> addr)
> +{
> +    uint32_t val = ivshmem_io_readw(opaque, addr & 0xFF);
> +    return val;
> +}
> +
> +static uint32_t ivshmem_mmio_readl(void *opaque, target_phys_addr_t  
> addr)
> +{
> +    uint32_t val = ivshmem_io_readl(opaque, addr & 0xFF);
> +    return val;
> +}
> +
> +static CPUReadMemoryFunc *ivshmem_mmio_read[3] = {
> +    ivshmem_mmio_readb,
> +    ivshmem_mmio_readw,
> +    ivshmem_mmio_readl,
> +};
> +
> +static CPUWriteMemoryFunc *ivshmem_mmio_write[3] = {
> +    ivshmem_mmio_writeb,
> +    ivshmem_mmio_writew,
> +    ivshmem_mmio_writel,
> +};
> +
> +static int ivshmem_can_receive(void * opaque)
> +{
> +    return 1;
> +}
> +
> +static void ivshmem_receive(void *opaque, const uint8_t *buf, int  
> size)
> +{
> +    IVShmemState *s = opaque;
> +
> +    ivshmem_IntrStatus_write(s, *buf);
> +
> +    IVSHMEM_DPRINTF("ivshmem_receive 0x%02x\n", *buf);
> +}
> +
> +static void ivshmem_event(void *opaque, int event)
> +{
> +    IVShmemState *s = opaque;
> +    IVSHMEM_DPRINTF("ivshmem_event %d\n", event);
> +}
> +
> +int pci_ivshmem_init(PCIBus *bus)
> +{
> +    PCI_IVShmemState *d;
> +    IVShmemState *s;
> +    uint8_t *pci_conf;
> +    int ivshmem_fd;
> +
> +    IVSHMEM_DPRINTF("shared file is %s\n", ivshmem_desc.name);
> +    d = (PCI_IVShmemState *)pci_register_device(bus, "kvm_ivshmem",
> +                                           sizeof(PCI_IVShmemState),
> +                                           -1, NULL, NULL);
> +    if (!d) {
> +        return -1;
> +    }
> +
> +    s = &d->ivshmem_state;
> +
> +    /* allocate shared memory RAM */
> +    s->ivshmem_offset = qemu_ram_alloc(ivshmem_desc.size);
> +    IVSHMEM_DPRINTF("size is = %d\n", ivshmem_desc.size);
> +    IVSHMEM_DPRINTF("ivshmem ram offset = %ld\n", s->ivshmem_offset);
> +
> +    s->ivshmem_ptr = qemu_get_ram_ptr(s->ivshmem_offset);
> +
> +    s->pci_dev = &d->dev;
> +    s->ivshmem_size = ivshmem_desc.size;
> +
> +    pci_conf = d->dev.config;
> +    pci_conf[0x00] = 0xf4; // Qumranet vendor ID 0x5002
> +    pci_conf[0x01] = 0x1a;
> +    pci_conf[0x02] = 0x10;
> +    pci_conf[0x03] = 0x11;
> +    pci_conf[0x04] = PCI_COMMAND_IOACCESS | PCI_COMMAND_MEMACCESS;
> +    pci_conf[0x0a] = 0x00; // RAM controller
> +    pci_conf[0x0b] = 0x05;
> +    pci_conf[0x0e] = 0x00; // header_type
> +
> +    pci_conf[PCI_INTERRUPT_PIN] = 1; // we are going to support  
> interrupts
> +
> +    /* XXX: ivshmem_desc.size must be a power of two */
> +
> +    s->ivshmem_mmio_io_addr = cpu_register_io_memory(0,  
> ivshmem_mmio_read,
> +                                    ivshmem_mmio_write, s);
> +
> +    /* region for registers*/
> +    pci_register_io_region(&d->dev, 0, 0x100,
> +                           PCI_ADDRESS_SPACE_MEM, ivshmem_mmio_map);
> +
> +    /* region for shared memory */
> +    pci_register_io_region(&d->dev, 1, ivshmem_desc.size,
> +                           PCI_ADDRESS_SPACE_MEM, ivshmem_map);
> +
> +    /* open shared memory file  */
> +    if ((ivshmem_fd = shm_open(ivshmem_desc.name, O_CREAT|O_RDWR,  
> S_IRWXU)) <
> 0)
> +    {
> +        fprintf(stderr, "kvm_ivshmem: could not open shared file\n");
> +        exit(-1);
> +    }
> +
> +    ftruncate(ivshmem_fd, ivshmem_desc.size);
> +
> +    /* mmap onto PCI device's memory */
> +    if (mmap(s->ivshmem_ptr, ivshmem_desc.size, PROT_READ|PROT_WRITE,
> +                        MAP_SHARED|MAP_FIXED, ivshmem_fd, 0) ==  
> MAP_FAILED)
> +    {
> +        fprintf(stderr, "kvm_ivshmem: could not mmap shared file\n");
> +        exit(-1);
> +    }
> +
> +    IVSHMEM_DPRINTF("shared object mapped to 0x%p\n", s- 
> >ivshmem_ptr);
> +
> +    /* setup character device channel */
> +
> +    if (ivshmem_desc.chrdev != NULL) {
> +        char label[32];
> +        snprintf(label, 32, "ivshmem_chardev");
> +        s->chr = qemu_chr_open(label, ivshmem_desc.chrdev, NULL);
> +        if (s->chr == NULL) {
> +            fprintf(stderr, "No server listening on %s\n",
> ivshmem_desc.chrdev);
> +            exit(-1);
> +        }
> +        qemu_chr_add_handlers(s->chr, ivshmem_can_receive,  
> ivshmem_receive,
> +                          ivshmem_event, s);
> +    }
> +
> +    return 0;
> +}
> +
> diff --git a/hw/pc.c b/hw/pc.c
> index 34a4d25..7d0cff2 100644
> --- a/hw/pc.c
> +++ b/hw/pc.c
> @@ -67,6 +67,8 @@ static PITState *pit;
> static IOAPICState *ioapic;
> static PCIDevice *i440fx_state;
>
> +extern int ivshmem_enabled;
> +
> static void ioport80_write(void *opaque, uint32_t addr, uint32_t data)
> {
> }
> @@ -1040,6 +1042,10 @@ static void pc_init1(ram_addr_t ram_size, int
> vga_ram_size,
>         }
>     }
>
> +    if (pci_enabled && ivshmem_enabled) {
> +        pci_ivshmem_init(pci_bus);
> +    }
> +
>     rtc_state = rtc_init(0x70, i8259[8], 2000);
>
>     qemu_register_boot_set(pc_boot_set, rtc_state);
> diff --git a/hw/pc.h b/hw/pc.h
> index 885c918..0ae0493 100644
> --- a/hw/pc.h
> +++ b/hw/pc.h
> @@ -185,4 +185,7 @@ void isa_ne2000_init(int base, qemu_irq irq,  
> NICInfo *nd);
>
> void extboot_init(BlockDriverState *bs, int cmd);
>
> +/* ivshmem.c */
> +int pci_ivshmem_init(PCIBus *bus);
> +
> #endif
> diff --git a/qemu-options.hx b/qemu-options.hx
> index 173f458..9ab3e2d 100644
> --- a/qemu-options.hx
> +++ b/qemu-options.hx
> @@ -1243,6 +1243,20 @@ The default device is @code{vc} in graphical  
> mode and
> @code{stdio} in
> non graphical mode.
> ETEXI
>
> +DEF("ivshmem", HAS_ARG, QEMU_OPTION_ivshmem, \
> +    "-ivshmem name,size[,unix:path][,server]  creates or opens a  
> shared file
> 'name' of size \
> +    'size' (in MB) and exposes it as a PCI device in the guest\n")
> +STEXI
> +...@item -ivshmem @var{file},@var{size}
> +Creates a POSIX shared file named @var{file} of size @var{size} and  
> creates a
> +PCI device of the same size that maps the shared file into the  
> device for
> guests
> +to access.  The created file on the host is located in /dev/shm/
> +
> +...@item unix:@var{path}[,server]
> +A unix domain socket is used to send and receive interrupts between  
> VMs.  The
> unix domain socket
> +...@var{path} is used for connections.
> +ETEXI
> +
> DEF("pidfile", HAS_ARG, QEMU_OPTION_pidfile, \
>     "-pidfile file   write PID to 'file'\n")
> STEXI
> diff --git a/sysemu.h b/sysemu.h
> index 1f45fd6..862b79e 100644
> --- a/sysemu.h
> +++ b/sysemu.h
> @@ -217,6 +217,14 @@ extern CharDriverState  
> *parallel_hds[MAX_PARALLEL_PORTS];
>
> extern CharDriverState *virtcon_hds[MAX_VIRTIO_CONSOLES];
>
> +/* inter-VM shared memory devices */
> +
> +#define MAX_IVSHMEM_DEVICES 1
> +
> +extern CharDriverState * ivshmem_chardev;
> +void ivshmem_init(const char * optarg);
> +int ivshmem_get_size(void);
> +
> #define TFR(expr) do { if ((expr) != -1) break; } while (errno ==  
> EINTR)
>
> #ifdef NEED_CPU_H
> diff --git a/vl.c b/vl.c
> index 0420634..7260fa1 100644
> --- a/vl.c
> +++ b/vl.c
> @@ -221,6 +221,7 @@ static int rtc_date_offset = -1; /* -1 means no  
> change */
> int cirrus_vga_enabled = 1;
> int std_vga_enabled = 0;
> int vmsvga_enabled = 0;
> +int ivshmem_enabled = 0;
> int xenfb_enabled = 0;
> #ifdef TARGET_SPARC
> int graphic_width = 1024;
> @@ -239,6 +240,8 @@ int no_quit = 0;
> CharDriverState *serial_hds[MAX_SERIAL_PORTS];
> CharDriverState *parallel_hds[MAX_PARALLEL_PORTS];
> CharDriverState *virtcon_hds[MAX_VIRTIO_CONSOLES];
> +CharDriverState *ivshmem_chardev;
> +const char * ivshmem_device;
> #ifdef TARGET_I386
> int win2k_install_hack = 0;
> int rtc_td_hack = 0;
> @@ -5063,6 +5066,8 @@ int main(int argc, char **argv, char **envp)
>     cyls = heads = secs = 0;
>     translation = BIOS_ATA_TRANSLATION_AUTO;
>     monitor_device = "vc:80Cx24C";
> +    ivshmem_device = NULL;
> +    ivshmem_chardev = NULL;
>
>     serial_devices[0] = "vc:80Cx24C";
>     for(i = 1; i < MAX_SERIAL_PORTS; i++)
> @@ -5518,6 +5523,10 @@ int main(int argc, char **argv, char **envp)
>                 parallel_devices[parallel_device_index] = optarg;
>                 parallel_device_index++;
>                 break;
> +            case QEMU_OPTION_ivshmem:
> +                ivshmem_device = optarg;
> +                ivshmem_enabled = 1;
> +                break;
>            case QEMU_OPTION_loadvm:
>                loadvm = optarg;
>                break;
> @@ -5984,6 +5993,11 @@ int main(int argc, char **argv, char **envp)
>            }
>     }
>
> +    if (ivshmem_enabled) {
> +        ivshmem_init(ivshmem_device);
> +        ram_size += ivshmem_get_size();
> +    }
> +
> #ifdef CONFIG_KQEMU
>     /* FIXME: This is a nasty hack because kqemu can't cope with  
> dynamic
>        guest ram allocation.  It needs to go away.  */
> Thx,
>
> Venkat



-----------------------------------------------
A. Cameron Macdonell
Ph.D. Student
Department of Computing Science
University of Alberta
cam@cs.ualberta.ca




^ permalink raw reply	[flat|nested] 21+ messages in thread

end of thread, other threads:[~2009-05-20 14:26 UTC | newest]

Thread overview: 21+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-05-07 16:16 [PATCH v2] Shared memory device with interrupt support Cam Macdonell
2009-05-16  2:45 ` Kumar, Venkat
2009-05-16  3:27   ` Cam Macdonell
2009-05-17 21:39     ` Avi Kivity
2009-05-18 18:54 ` Anthony Liguori
2009-05-19  4:31   ` Avi Kivity
2009-05-19 18:31     ` Anthony Liguori
2009-05-20  9:01       ` Avi Kivity
2009-05-20 13:45         ` Anthony Liguori
2009-05-20 14:26           ` Avi Kivity
     [not found] <3D9CB4061D1EB3408D4A0B910433453C030BCA8892@inbmail01.lsi.com>
2009-05-16  3:30 ` Cam Macdonell
2009-05-17 21:51   ` Avi Kivity
2009-05-18 11:12     ` Gregory Haskins
2009-05-18 11:38       ` Avi Kivity
2009-05-18 16:50     ` Cam Macdonell
2009-05-18 17:19       ` Avi Kivity
2009-05-18 12:11   ` Kumar, Venkat
2009-05-18 16:20     ` Cam Macdonell
2009-05-19  3:52       ` Kumar, Venkat
2009-05-19 11:20         ` Jayaraman, Bhaskar
2009-05-19 11:35           ` Gregory Haskins

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.