All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] Vmchannel PCI device.
@ 2008-12-14 11:50 ` Gleb Natapov
  0 siblings, 0 replies; 32+ messages in thread
From: Gleb Natapov @ 2008-12-14 11:50 UTC (permalink / raw)
  To: qemu-devel; +Cc: kvm

There is a need for communication channel between host and various
agents that are running inside a VM guest. The channel will be used
for statistic gathering, logging, cut & paste, host screen resolution
changes notification, guest configuration etc.

It is undesirable to use TCP/IP for this purpose since network
connectivity may not exist between host and guest and if it exists the
traffic can be not routable between host and guest for security reasons
or TCP/IP traffic can be firewalled (by mistake) by unsuspecting VM user.

The patch implements separate PCI device for this type of communication.
To create a channel "-vmchannel channel:dev" option should be specified
on qemu commmand line during VM launch.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
---

 Makefile.target       |    2 
 hw/pc.c               |    8 +
 hw/virtio-vmchannel.c |  283 +++++++++++++++++++++++++++++++++++++++++++++++++
 hw/virtio-vmchannel.h |   19 +++
 sysemu.h              |    4 +
 vl.c                  |   35 ++++++
 6 files changed, 344 insertions(+), 7 deletions(-)
 create mode 100644 hw/virtio-vmchannel.c
 create mode 100644 hw/virtio-vmchannel.h

diff --git a/Makefile.target b/Makefile.target
index 8c649be..d9f5aad 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -637,7 +637,7 @@ OBJS+= fdc.o mc146818rtc.o serial.o i8259.o i8254.o pcspk.o pc.o
 OBJS+= cirrus_vga.o apic.o parallel.o acpi.o piix_pci.o
 OBJS+= usb-uhci.o vmmouse.o vmport.o vmware_vga.o
 # virtio support
-OBJS+= virtio.o virtio-blk.o virtio-balloon.o
+OBJS+= virtio.o virtio-blk.o virtio-balloon.o virtio-vmchannel.o
 CPPFLAGS += -DHAS_AUDIO -DHAS_AUDIO_CHOICE
 endif
 ifeq ($(TARGET_BASE_ARCH), ppc)
diff --git a/hw/pc.c b/hw/pc.c
index 73dd8bc..57e3b1d 100644
--- a/hw/pc.c
+++ b/hw/pc.c
@@ -1095,7 +1095,7 @@ static void pc_init1(ram_addr_t ram_size, int vga_ram_size,
         }
     }
 
-    /* Add virtio block devices */
+    /* Add virtio devices */
     if (pci_enabled) {
         int index;
         int unit_id = 0;
@@ -1104,11 +1104,9 @@ static void pc_init1(ram_addr_t ram_size, int vga_ram_size,
             virtio_blk_init(pci_bus, drives_table[index].bdrv);
             unit_id++;
         }
-    }
-
-    /* Add virtio balloon device */
-    if (pci_enabled)
         virtio_balloon_init(pci_bus);
+	virtio_vmchannel_init(pci_bus);
+    }
 }
 
 static void pc_init_pci(ram_addr_t ram_size, int vga_ram_size,
diff --git a/hw/virtio-vmchannel.c b/hw/virtio-vmchannel.c
new file mode 100644
index 0000000..1f5e274
--- /dev/null
+++ b/hw/virtio-vmchannel.c
@@ -0,0 +1,283 @@
+/*
+ * Virtio VMChannel Device
+ *
+ * Copyright RedHat, inc. 2008
+ *
+ * Authors:
+ *      Gleb Natapov <gleb@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu-common.h"
+#include "sysemu.h"
+#include "virtio.h"
+#include "pc.h"
+#include "qemu-char.h"
+#include "virtio-vmchannel.h"
+
+//#define DEBUG_VMCHANNEL
+
+#ifdef DEBUG_VMCHANNEL
+#define VMCHANNEL_DPRINTF(fmt, args...)                     \
+    do { printf("VMCHANNEL: " fmt , ##args); } while (0)
+#else
+#define VMCHANNEL_DPRINTF(fmt, args...)
+#endif
+
+typedef struct VirtIOVMChannel {
+    VirtIODevice vdev;
+    VirtQueue *sq;
+    VirtQueue *rq;
+} VirtIOVMChannel;
+
+typedef struct VMChannel {
+    CharDriverState *hd;
+    VirtQueueElement elem;
+    size_t len;
+    uint32_t id;
+    char name[VMCHANNEL_NAME_MAX];
+} VMChannel;
+
+typedef struct VMChannelDesc {
+    uint32_t id;
+    uint32_t len;
+} VMChannelDesc;
+
+typedef struct VMChannelCfg {
+    uint32_t count;
+    char ids[0];
+} VMChannelCfg;
+
+static uint32_t vmchannel_cfg_size;
+static VirtIOVMChannel *vmchannel;
+
+static VMChannel vmchannel_descs[MAX_VMCHANNEL_DEVICES];
+static int vmchannel_desc_idx;
+
+static int vmchannel_can_read(void *opaque)
+{
+    VMChannel *c = opaque;
+
+    /* device not yet configured */
+    if (!virtio_queue_ready(vmchannel->rq))
+        return 0;
+
+    if (!c->len) {
+        int i;
+
+        if (virtqueue_pop(vmchannel->rq, &c->elem) == 0)
+            return 0;
+
+        if (c->elem.in_num < 1 ||
+                c->elem.in_sg[0].iov_len < sizeof(VMChannelDesc)) {
+            fprintf(stderr, "vmchannel: wrong receive descriptor\n");
+            return 0;
+        }
+
+        for (i = 0; i < c->elem.in_num; i++)
+            c->len += c->elem.in_sg[i].iov_len;
+
+        c->len -= sizeof(VMChannelDesc);
+    }
+
+    return (int)c->len;
+}
+
+static void vmchannel_read(void *opaque, const uint8_t *buf, int size)
+{
+    VMChannel *c = opaque;
+    VMChannelDesc *desc;
+    int i = 0, left = size;
+    size_t iov_len;
+    void *iov_base;
+
+    VMCHANNEL_DPRINTF("read %d bytes from channel %d\n", size, c->id);
+
+    if (!c->len) {
+        fprintf(stderr, "vmchannel: trying to receive into empty descriptor\n");
+        exit(1);
+    }
+
+    if (size <= 0 || size > c->len) {
+        fprintf(stderr, "vmchannel: read size is wrong\n");
+        exit(1);
+    }
+
+    desc = (VMChannelDesc*)c->elem.in_sg[0].iov_base;
+    desc->id = cpu_to_le32(c->id);
+    desc->len = cpu_to_le32(size);
+
+    iov_base = desc + 1;
+    iov_len = c->elem.in_sg[0].iov_len - sizeof(VMChannelDesc);
+
+    for (;;) {
+        size_t len = MIN(left, iov_len);
+        memcpy(iov_base, buf, len);
+        left -= len;
+        buf += len;
+        if (left == 0 || ++i == c->elem.in_num)
+            break;
+        iov_base = c->elem.in_sg[i].iov_base;
+        iov_len = c->elem.in_sg[i].iov_len;
+    }
+
+    if (left) {
+        fprintf(stderr, "vmchannel: dropping %d bytes of data\n", left);
+        exit(1);
+    }
+
+    virtqueue_push(vmchannel->rq, &c->elem, size + sizeof(VMChannelDesc));
+    c->len = 0;
+    virtio_notify(&vmchannel->vdev, vmchannel->rq);
+}
+
+static void virtio_vmchannel_handle_recv(VirtIODevice *vdev, VirtQueue *outputq)
+{
+}
+
+static VMChannel *vmchannel_find_by_id(uint32_t id)
+{
+    int i;
+
+    for (i = 0; i < vmchannel_desc_idx; i++) {
+        if (vmchannel_descs[i].id == id)
+            return &vmchannel_descs[i];
+    }
+    return NULL;
+}
+
+static VMChannel *vmchannel_find_by_name(const char *name)
+{
+    int i;
+
+    for (i = 0; i < vmchannel_desc_idx; i++) {
+        if (!strcmp(vmchannel_descs[i].name, name))
+            return &vmchannel_descs[i];
+    }
+    return NULL;
+}
+
+static void virtio_vmchannel_handle_send(VirtIODevice *vdev, VirtQueue *outputq)
+{
+    VirtQueueElement elem;
+
+    VMCHANNEL_DPRINTF("send\n");
+    while (virtqueue_pop(vmchannel->sq, &elem)) {
+        VMChannelDesc *desc;
+        VMChannel *c;
+        unsigned int len = 0;
+        int i = 0;
+        size_t iov_len;
+        void *iov_base;
+
+        if (elem.out_num < 1 ||
+                elem.out_sg[0].iov_len < sizeof(VMChannelDesc)) {
+            fprintf(stderr, "vmchannel: incorrect send descriptor\n");
+            virtqueue_push(vmchannel->sq, &elem, 0);
+            return;
+        }
+
+        desc = (VMChannelDesc*)elem.out_sg[0].iov_base;
+        desc->len = le32_to_cpu(desc->len);
+        c = vmchannel_find_by_id(le32_to_cpu(desc->id));
+
+        if(!c) {
+            fprintf(stderr, "vmchannel: guest sends to nonexistent channel\n");
+            virtqueue_push(vmchannel->sq, &elem, 0);
+            return;
+        }
+
+        VMCHANNEL_DPRINTF("send to channel %d %d bytes\n", c->id, desc->len);
+
+        iov_base = desc + 1;
+        iov_len = elem.out_sg[0].iov_len - sizeof(VMChannelDesc);
+
+        for (;;) {
+            qemu_chr_write(c->hd, iov_base, iov_len);
+            len += iov_len;
+            if (++i == elem.out_num)
+                break;
+            iov_base = elem.out_sg[i].iov_base;
+            iov_len = elem.out_sg[i].iov_len;
+        }
+
+        if (desc->len != len)
+            fprintf(stderr, "vmchannel: bad descriptor was sent by guest\n");
+
+        virtqueue_push(vmchannel->sq, &elem, len);
+    }
+
+    virtio_notify(&vmchannel->vdev, vmchannel->sq);
+}
+
+static uint32_t virtio_vmchannel_get_features(VirtIODevice *vdev)
+{ 
+    return 0;
+}
+
+static void virtio_vmchannel_update_config(VirtIODevice *vdev, uint8_t *config)
+{
+    VMChannelCfg *cfg = (VMChannelCfg *)config;
+    int i, offset = 0;
+
+    cfg->count = cpu_to_le32(vmchannel_desc_idx);
+    for (i = 0; i < vmchannel_desc_idx; i++) {
+        uint32_t len = strlen(vmchannel_descs[i].name) + 1;
+        cpu_to_le32w((uint32_t*)(cfg->ids + offset), vmchannel_descs[i].id);
+        offset += 4;
+        cpu_to_le32w((uint32_t*)(cfg->ids + offset), len);
+        offset += 4;
+        strcpy(cfg->ids + offset, vmchannel_descs[i].name);
+        offset += len;
+    }
+}
+
+static void virtio_vmchannel_reset(VirtIODevice *vdev)
+{
+    int i;
+
+    for (i = 0; i < vmchannel_desc_idx; i++)
+        vmchannel_descs[i].len = 0;
+}
+
+void virtio_vmchannel_init(PCIBus *bus)
+{
+
+    if (!vmchannel_desc_idx)
+        return;
+
+    vmchannel = (VirtIOVMChannel *)virtio_init_pci(bus, "virtio-vmchannel",
+            0x1af4, 0x1003, 0, VIRTIO_ID_VMCHANNEL, 0x5, 0x0, 0x0, 
+            vmchannel_cfg_size + 4, sizeof(VirtIOVMChannel));
+
+    vmchannel->vdev.get_features = virtio_vmchannel_get_features;
+    vmchannel->vdev.get_config = virtio_vmchannel_update_config;
+    vmchannel->vdev.reset = virtio_vmchannel_reset;
+
+    vmchannel->rq = virtio_add_queue(&vmchannel->vdev, 128,
+            virtio_vmchannel_handle_recv);
+    vmchannel->sq = virtio_add_queue(&vmchannel->vdev, 128,
+            virtio_vmchannel_handle_send);
+
+    return;
+}
+
+void vmchannel_init(CharDriverState *hd, const char *name)
+{
+    VMChannel *c = &vmchannel_descs[vmchannel_desc_idx];
+
+    if (vmchannel_find_by_name(name)) {
+        fprintf(stderr, "vmchannel with name '%s' already exists\n", name);
+        exit(1);
+    }
+
+    vmchannel_cfg_size += (9 + strlen(name));
+    c->hd = hd;
+    c->id = vmchannel_desc_idx++;
+    strncpy(c->name, name, VMCHANNEL_NAME_MAX);
+    qemu_chr_add_handlers(hd, vmchannel_can_read, vmchannel_read, NULL, c);
+    VMCHANNEL_DPRINTF("Add channel %d with name '%s'\n", c->id, name);
+}
diff --git a/hw/virtio-vmchannel.h b/hw/virtio-vmchannel.h
new file mode 100644
index 0000000..7d7994c
--- /dev/null
+++ b/hw/virtio-vmchannel.h
@@ -0,0 +1,19 @@
+/*
+ * Virtio VMChannel Device
+ *
+ * Copyright RedHat, inc. 2008
+ *
+ * Authors:
+ *      Gleb Natapov <gleb@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef VIRTIO_VMCHANNEL_H
+#define VIRTIO_VMCHANNEL_H
+
+#define VIRTIO_ID_VMCHANNEL 6
+#define VMCHANNEL_NAME_MAX 128
+#endif
diff --git a/sysemu.h b/sysemu.h
index 94cffaf..54d9c83 100644
--- a/sysemu.h
+++ b/sysemu.h
@@ -157,6 +157,10 @@ extern CharDriverState *parallel_hds[MAX_PARALLEL_PORTS];
 
 #define TFR(expr) do { if ((expr) != -1) break; } while (errno == EINTR)
 
+#define MAX_VMCHANNEL_DEVICES 4
+void virtio_vmchannel_init(PCIBus *bus);
+void vmchannel_init(CharDriverState *hd, const char *name);
+
 #ifdef NEED_CPU_H
 /* loader.c */
 int get_image_size(const char *filename);
diff --git a/vl.c b/vl.c
index c3a8d8f..9a714c8 100644
--- a/vl.c
+++ b/vl.c
@@ -215,6 +215,7 @@ static int full_screen = 0;
 static int no_frame = 0;
 #endif
 int no_quit = 0;
+CharDriverState *vmchannel_hds[MAX_VMCHANNEL_DEVICES];
 CharDriverState *serial_hds[MAX_SERIAL_PORTS];
 CharDriverState *parallel_hds[MAX_PARALLEL_PORTS];
 #ifdef TARGET_I386
@@ -3939,6 +3940,7 @@ static void help(int exitcode)
            "-monitor dev    redirect the monitor to char device 'dev'\n"
            "-serial dev     redirect the serial port to char device 'dev'\n"
            "-parallel dev   redirect the parallel port to char device 'dev'\n"
+	   "-vmchannel channel:dev  redirect the vmchannel with name 'channel', to char device 'dev'\n"
            "-pidfile file   Write PID to 'file'\n"
            "-S              freeze CPU at startup (use 'c' to start execution)\n"
            "-s              wait gdb connection to port\n"
@@ -4052,6 +4054,7 @@ enum {
     QEMU_OPTION_monitor,
     QEMU_OPTION_serial,
     QEMU_OPTION_parallel,
+    QEMU_OPTION_vmchannel,
     QEMU_OPTION_loadvm,
     QEMU_OPTION_full_screen,
     QEMU_OPTION_no_frame,
@@ -4160,6 +4163,7 @@ static const QEMUOption qemu_options[] = {
     { "monitor", HAS_ARG, QEMU_OPTION_monitor },
     { "serial", HAS_ARG, QEMU_OPTION_serial },
     { "parallel", HAS_ARG, QEMU_OPTION_parallel },
+    { "vmchannel", 1, QEMU_OPTION_vmchannel },
     { "loadvm", HAS_ARG, QEMU_OPTION_loadvm },
     { "full-screen", 0, QEMU_OPTION_full_screen },
 #ifdef CONFIG_SDL
@@ -4484,6 +4488,8 @@ int main(int argc, char **argv, char **envp)
     int serial_device_index;
     const char *parallel_devices[MAX_PARALLEL_PORTS];
     int parallel_device_index;
+    char *vmchannel_devices[MAX_VMCHANNEL_DEVICES];
+    int vmchannel_device_index;
     const char *loadvm = NULL;
     QEMUMachine *machine;
     const char *cpu_model;
@@ -4557,6 +4563,10 @@ int main(int argc, char **argv, char **envp)
         parallel_devices[i] = NULL;
     parallel_device_index = 0;
 
+    for(i = 0; i < MAX_VMCHANNEL_DEVICES; i++)
+    	vmchannel_devices[i] = NULL;
+    vmchannel_device_index = 0;
+
     usb_devices_index = 0;
 
     nb_net_clients = 0;
@@ -4951,7 +4961,13 @@ int main(int argc, char **argv, char **envp)
                 parallel_devices[parallel_device_index] = optarg;
                 parallel_device_index++;
                 break;
-	    case QEMU_OPTION_loadvm:
+            case QEMU_OPTION_vmchannel:
+                if (vmchannel_device_index >= MAX_VMCHANNEL_DEVICES) {
+                    fprintf(stderr, "qemu: too many vmchannel devices\n");
+                    exit(1);
+                }
+                vmchannel_devices[vmchannel_device_index++] = strdup(optarg);
+            case QEMU_OPTION_loadvm:
 		loadvm = optarg;
 		break;
             case QEMU_OPTION_full_screen:
@@ -5453,6 +5469,23 @@ int main(int argc, char **argv, char **envp)
         }
     }
 
+    for(i = 0; i < vmchannel_device_index; i++) {
+        char *devname = vmchannel_devices[i];
+        char *name;
+
+        if (!devname)
+            continue;
+
+        name = strsep(&devname, ":");
+        vmchannel_hds[i] = qemu_chr_open(name, devname);
+        if (!vmchannel_hds[i]) {
+            fprintf(stderr, "qemu: could not open vmchannel device '%s'\n", name);
+            exit(1);
+        }
+        vmchannel_init(vmchannel_hds[i], name);
+        free(vmchannel_devices[i]);
+    }
+
     machine->init(ram_size, vga_ram_size, boot_devices, ds,
                   kernel_filename, kernel_cmdline, initrd_filename, cpu_model);
 


^ permalink raw reply related	[flat|nested] 32+ messages in thread

* [Qemu-devel] [PATCH] Vmchannel PCI device.
@ 2008-12-14 11:50 ` Gleb Natapov
  0 siblings, 0 replies; 32+ messages in thread
From: Gleb Natapov @ 2008-12-14 11:50 UTC (permalink / raw)
  To: qemu-devel; +Cc: kvm

There is a need for communication channel between host and various
agents that are running inside a VM guest. The channel will be used
for statistic gathering, logging, cut & paste, host screen resolution
changes notification, guest configuration etc.

It is undesirable to use TCP/IP for this purpose since network
connectivity may not exist between host and guest and if it exists the
traffic can be not routable between host and guest for security reasons
or TCP/IP traffic can be firewalled (by mistake) by unsuspecting VM user.

The patch implements separate PCI device for this type of communication.
To create a channel "-vmchannel channel:dev" option should be specified
on qemu commmand line during VM launch.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
---

 Makefile.target       |    2 
 hw/pc.c               |    8 +
 hw/virtio-vmchannel.c |  283 +++++++++++++++++++++++++++++++++++++++++++++++++
 hw/virtio-vmchannel.h |   19 +++
 sysemu.h              |    4 +
 vl.c                  |   35 ++++++
 6 files changed, 344 insertions(+), 7 deletions(-)
 create mode 100644 hw/virtio-vmchannel.c
 create mode 100644 hw/virtio-vmchannel.h

diff --git a/Makefile.target b/Makefile.target
index 8c649be..d9f5aad 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -637,7 +637,7 @@ OBJS+= fdc.o mc146818rtc.o serial.o i8259.o i8254.o pcspk.o pc.o
 OBJS+= cirrus_vga.o apic.o parallel.o acpi.o piix_pci.o
 OBJS+= usb-uhci.o vmmouse.o vmport.o vmware_vga.o
 # virtio support
-OBJS+= virtio.o virtio-blk.o virtio-balloon.o
+OBJS+= virtio.o virtio-blk.o virtio-balloon.o virtio-vmchannel.o
 CPPFLAGS += -DHAS_AUDIO -DHAS_AUDIO_CHOICE
 endif
 ifeq ($(TARGET_BASE_ARCH), ppc)
diff --git a/hw/pc.c b/hw/pc.c
index 73dd8bc..57e3b1d 100644
--- a/hw/pc.c
+++ b/hw/pc.c
@@ -1095,7 +1095,7 @@ static void pc_init1(ram_addr_t ram_size, int vga_ram_size,
         }
     }
 
-    /* Add virtio block devices */
+    /* Add virtio devices */
     if (pci_enabled) {
         int index;
         int unit_id = 0;
@@ -1104,11 +1104,9 @@ static void pc_init1(ram_addr_t ram_size, int vga_ram_size,
             virtio_blk_init(pci_bus, drives_table[index].bdrv);
             unit_id++;
         }
-    }
-
-    /* Add virtio balloon device */
-    if (pci_enabled)
         virtio_balloon_init(pci_bus);
+	virtio_vmchannel_init(pci_bus);
+    }
 }
 
 static void pc_init_pci(ram_addr_t ram_size, int vga_ram_size,
diff --git a/hw/virtio-vmchannel.c b/hw/virtio-vmchannel.c
new file mode 100644
index 0000000..1f5e274
--- /dev/null
+++ b/hw/virtio-vmchannel.c
@@ -0,0 +1,283 @@
+/*
+ * Virtio VMChannel Device
+ *
+ * Copyright RedHat, inc. 2008
+ *
+ * Authors:
+ *      Gleb Natapov <gleb@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu-common.h"
+#include "sysemu.h"
+#include "virtio.h"
+#include "pc.h"
+#include "qemu-char.h"
+#include "virtio-vmchannel.h"
+
+//#define DEBUG_VMCHANNEL
+
+#ifdef DEBUG_VMCHANNEL
+#define VMCHANNEL_DPRINTF(fmt, args...)                     \
+    do { printf("VMCHANNEL: " fmt , ##args); } while (0)
+#else
+#define VMCHANNEL_DPRINTF(fmt, args...)
+#endif
+
+typedef struct VirtIOVMChannel {
+    VirtIODevice vdev;
+    VirtQueue *sq;
+    VirtQueue *rq;
+} VirtIOVMChannel;
+
+typedef struct VMChannel {
+    CharDriverState *hd;
+    VirtQueueElement elem;
+    size_t len;
+    uint32_t id;
+    char name[VMCHANNEL_NAME_MAX];
+} VMChannel;
+
+typedef struct VMChannelDesc {
+    uint32_t id;
+    uint32_t len;
+} VMChannelDesc;
+
+typedef struct VMChannelCfg {
+    uint32_t count;
+    char ids[0];
+} VMChannelCfg;
+
+static uint32_t vmchannel_cfg_size;
+static VirtIOVMChannel *vmchannel;
+
+static VMChannel vmchannel_descs[MAX_VMCHANNEL_DEVICES];
+static int vmchannel_desc_idx;
+
+static int vmchannel_can_read(void *opaque)
+{
+    VMChannel *c = opaque;
+
+    /* device not yet configured */
+    if (!virtio_queue_ready(vmchannel->rq))
+        return 0;
+
+    if (!c->len) {
+        int i;
+
+        if (virtqueue_pop(vmchannel->rq, &c->elem) == 0)
+            return 0;
+
+        if (c->elem.in_num < 1 ||
+                c->elem.in_sg[0].iov_len < sizeof(VMChannelDesc)) {
+            fprintf(stderr, "vmchannel: wrong receive descriptor\n");
+            return 0;
+        }
+
+        for (i = 0; i < c->elem.in_num; i++)
+            c->len += c->elem.in_sg[i].iov_len;
+
+        c->len -= sizeof(VMChannelDesc);
+    }
+
+    return (int)c->len;
+}
+
+static void vmchannel_read(void *opaque, const uint8_t *buf, int size)
+{
+    VMChannel *c = opaque;
+    VMChannelDesc *desc;
+    int i = 0, left = size;
+    size_t iov_len;
+    void *iov_base;
+
+    VMCHANNEL_DPRINTF("read %d bytes from channel %d\n", size, c->id);
+
+    if (!c->len) {
+        fprintf(stderr, "vmchannel: trying to receive into empty descriptor\n");
+        exit(1);
+    }
+
+    if (size <= 0 || size > c->len) {
+        fprintf(stderr, "vmchannel: read size is wrong\n");
+        exit(1);
+    }
+
+    desc = (VMChannelDesc*)c->elem.in_sg[0].iov_base;
+    desc->id = cpu_to_le32(c->id);
+    desc->len = cpu_to_le32(size);
+
+    iov_base = desc + 1;
+    iov_len = c->elem.in_sg[0].iov_len - sizeof(VMChannelDesc);
+
+    for (;;) {
+        size_t len = MIN(left, iov_len);
+        memcpy(iov_base, buf, len);
+        left -= len;
+        buf += len;
+        if (left == 0 || ++i == c->elem.in_num)
+            break;
+        iov_base = c->elem.in_sg[i].iov_base;
+        iov_len = c->elem.in_sg[i].iov_len;
+    }
+
+    if (left) {
+        fprintf(stderr, "vmchannel: dropping %d bytes of data\n", left);
+        exit(1);
+    }
+
+    virtqueue_push(vmchannel->rq, &c->elem, size + sizeof(VMChannelDesc));
+    c->len = 0;
+    virtio_notify(&vmchannel->vdev, vmchannel->rq);
+}
+
+static void virtio_vmchannel_handle_recv(VirtIODevice *vdev, VirtQueue *outputq)
+{
+}
+
+static VMChannel *vmchannel_find_by_id(uint32_t id)
+{
+    int i;
+
+    for (i = 0; i < vmchannel_desc_idx; i++) {
+        if (vmchannel_descs[i].id == id)
+            return &vmchannel_descs[i];
+    }
+    return NULL;
+}
+
+static VMChannel *vmchannel_find_by_name(const char *name)
+{
+    int i;
+
+    for (i = 0; i < vmchannel_desc_idx; i++) {
+        if (!strcmp(vmchannel_descs[i].name, name))
+            return &vmchannel_descs[i];
+    }
+    return NULL;
+}
+
+static void virtio_vmchannel_handle_send(VirtIODevice *vdev, VirtQueue *outputq)
+{
+    VirtQueueElement elem;
+
+    VMCHANNEL_DPRINTF("send\n");
+    while (virtqueue_pop(vmchannel->sq, &elem)) {
+        VMChannelDesc *desc;
+        VMChannel *c;
+        unsigned int len = 0;
+        int i = 0;
+        size_t iov_len;
+        void *iov_base;
+
+        if (elem.out_num < 1 ||
+                elem.out_sg[0].iov_len < sizeof(VMChannelDesc)) {
+            fprintf(stderr, "vmchannel: incorrect send descriptor\n");
+            virtqueue_push(vmchannel->sq, &elem, 0);
+            return;
+        }
+
+        desc = (VMChannelDesc*)elem.out_sg[0].iov_base;
+        desc->len = le32_to_cpu(desc->len);
+        c = vmchannel_find_by_id(le32_to_cpu(desc->id));
+
+        if(!c) {
+            fprintf(stderr, "vmchannel: guest sends to nonexistent channel\n");
+            virtqueue_push(vmchannel->sq, &elem, 0);
+            return;
+        }
+
+        VMCHANNEL_DPRINTF("send to channel %d %d bytes\n", c->id, desc->len);
+
+        iov_base = desc + 1;
+        iov_len = elem.out_sg[0].iov_len - sizeof(VMChannelDesc);
+
+        for (;;) {
+            qemu_chr_write(c->hd, iov_base, iov_len);
+            len += iov_len;
+            if (++i == elem.out_num)
+                break;
+            iov_base = elem.out_sg[i].iov_base;
+            iov_len = elem.out_sg[i].iov_len;
+        }
+
+        if (desc->len != len)
+            fprintf(stderr, "vmchannel: bad descriptor was sent by guest\n");
+
+        virtqueue_push(vmchannel->sq, &elem, len);
+    }
+
+    virtio_notify(&vmchannel->vdev, vmchannel->sq);
+}
+
+static uint32_t virtio_vmchannel_get_features(VirtIODevice *vdev)
+{ 
+    return 0;
+}
+
+static void virtio_vmchannel_update_config(VirtIODevice *vdev, uint8_t *config)
+{
+    VMChannelCfg *cfg = (VMChannelCfg *)config;
+    int i, offset = 0;
+
+    cfg->count = cpu_to_le32(vmchannel_desc_idx);
+    for (i = 0; i < vmchannel_desc_idx; i++) {
+        uint32_t len = strlen(vmchannel_descs[i].name) + 1;
+        cpu_to_le32w((uint32_t*)(cfg->ids + offset), vmchannel_descs[i].id);
+        offset += 4;
+        cpu_to_le32w((uint32_t*)(cfg->ids + offset), len);
+        offset += 4;
+        strcpy(cfg->ids + offset, vmchannel_descs[i].name);
+        offset += len;
+    }
+}
+
+static void virtio_vmchannel_reset(VirtIODevice *vdev)
+{
+    int i;
+
+    for (i = 0; i < vmchannel_desc_idx; i++)
+        vmchannel_descs[i].len = 0;
+}
+
+void virtio_vmchannel_init(PCIBus *bus)
+{
+
+    if (!vmchannel_desc_idx)
+        return;
+
+    vmchannel = (VirtIOVMChannel *)virtio_init_pci(bus, "virtio-vmchannel",
+            0x1af4, 0x1003, 0, VIRTIO_ID_VMCHANNEL, 0x5, 0x0, 0x0, 
+            vmchannel_cfg_size + 4, sizeof(VirtIOVMChannel));
+
+    vmchannel->vdev.get_features = virtio_vmchannel_get_features;
+    vmchannel->vdev.get_config = virtio_vmchannel_update_config;
+    vmchannel->vdev.reset = virtio_vmchannel_reset;
+
+    vmchannel->rq = virtio_add_queue(&vmchannel->vdev, 128,
+            virtio_vmchannel_handle_recv);
+    vmchannel->sq = virtio_add_queue(&vmchannel->vdev, 128,
+            virtio_vmchannel_handle_send);
+
+    return;
+}
+
+void vmchannel_init(CharDriverState *hd, const char *name)
+{
+    VMChannel *c = &vmchannel_descs[vmchannel_desc_idx];
+
+    if (vmchannel_find_by_name(name)) {
+        fprintf(stderr, "vmchannel with name '%s' already exists\n", name);
+        exit(1);
+    }
+
+    vmchannel_cfg_size += (9 + strlen(name));
+    c->hd = hd;
+    c->id = vmchannel_desc_idx++;
+    strncpy(c->name, name, VMCHANNEL_NAME_MAX);
+    qemu_chr_add_handlers(hd, vmchannel_can_read, vmchannel_read, NULL, c);
+    VMCHANNEL_DPRINTF("Add channel %d with name '%s'\n", c->id, name);
+}
diff --git a/hw/virtio-vmchannel.h b/hw/virtio-vmchannel.h
new file mode 100644
index 0000000..7d7994c
--- /dev/null
+++ b/hw/virtio-vmchannel.h
@@ -0,0 +1,19 @@
+/*
+ * Virtio VMChannel Device
+ *
+ * Copyright RedHat, inc. 2008
+ *
+ * Authors:
+ *      Gleb Natapov <gleb@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef VIRTIO_VMCHANNEL_H
+#define VIRTIO_VMCHANNEL_H
+
+#define VIRTIO_ID_VMCHANNEL 6
+#define VMCHANNEL_NAME_MAX 128
+#endif
diff --git a/sysemu.h b/sysemu.h
index 94cffaf..54d9c83 100644
--- a/sysemu.h
+++ b/sysemu.h
@@ -157,6 +157,10 @@ extern CharDriverState *parallel_hds[MAX_PARALLEL_PORTS];
 
 #define TFR(expr) do { if ((expr) != -1) break; } while (errno == EINTR)
 
+#define MAX_VMCHANNEL_DEVICES 4
+void virtio_vmchannel_init(PCIBus *bus);
+void vmchannel_init(CharDriverState *hd, const char *name);
+
 #ifdef NEED_CPU_H
 /* loader.c */
 int get_image_size(const char *filename);
diff --git a/vl.c b/vl.c
index c3a8d8f..9a714c8 100644
--- a/vl.c
+++ b/vl.c
@@ -215,6 +215,7 @@ static int full_screen = 0;
 static int no_frame = 0;
 #endif
 int no_quit = 0;
+CharDriverState *vmchannel_hds[MAX_VMCHANNEL_DEVICES];
 CharDriverState *serial_hds[MAX_SERIAL_PORTS];
 CharDriverState *parallel_hds[MAX_PARALLEL_PORTS];
 #ifdef TARGET_I386
@@ -3939,6 +3940,7 @@ static void help(int exitcode)
            "-monitor dev    redirect the monitor to char device 'dev'\n"
            "-serial dev     redirect the serial port to char device 'dev'\n"
            "-parallel dev   redirect the parallel port to char device 'dev'\n"
+	   "-vmchannel channel:dev  redirect the vmchannel with name 'channel', to char device 'dev'\n"
            "-pidfile file   Write PID to 'file'\n"
            "-S              freeze CPU at startup (use 'c' to start execution)\n"
            "-s              wait gdb connection to port\n"
@@ -4052,6 +4054,7 @@ enum {
     QEMU_OPTION_monitor,
     QEMU_OPTION_serial,
     QEMU_OPTION_parallel,
+    QEMU_OPTION_vmchannel,
     QEMU_OPTION_loadvm,
     QEMU_OPTION_full_screen,
     QEMU_OPTION_no_frame,
@@ -4160,6 +4163,7 @@ static const QEMUOption qemu_options[] = {
     { "monitor", HAS_ARG, QEMU_OPTION_monitor },
     { "serial", HAS_ARG, QEMU_OPTION_serial },
     { "parallel", HAS_ARG, QEMU_OPTION_parallel },
+    { "vmchannel", 1, QEMU_OPTION_vmchannel },
     { "loadvm", HAS_ARG, QEMU_OPTION_loadvm },
     { "full-screen", 0, QEMU_OPTION_full_screen },
 #ifdef CONFIG_SDL
@@ -4484,6 +4488,8 @@ int main(int argc, char **argv, char **envp)
     int serial_device_index;
     const char *parallel_devices[MAX_PARALLEL_PORTS];
     int parallel_device_index;
+    char *vmchannel_devices[MAX_VMCHANNEL_DEVICES];
+    int vmchannel_device_index;
     const char *loadvm = NULL;
     QEMUMachine *machine;
     const char *cpu_model;
@@ -4557,6 +4563,10 @@ int main(int argc, char **argv, char **envp)
         parallel_devices[i] = NULL;
     parallel_device_index = 0;
 
+    for(i = 0; i < MAX_VMCHANNEL_DEVICES; i++)
+    	vmchannel_devices[i] = NULL;
+    vmchannel_device_index = 0;
+
     usb_devices_index = 0;
 
     nb_net_clients = 0;
@@ -4951,7 +4961,13 @@ int main(int argc, char **argv, char **envp)
                 parallel_devices[parallel_device_index] = optarg;
                 parallel_device_index++;
                 break;
-	    case QEMU_OPTION_loadvm:
+            case QEMU_OPTION_vmchannel:
+                if (vmchannel_device_index >= MAX_VMCHANNEL_DEVICES) {
+                    fprintf(stderr, "qemu: too many vmchannel devices\n");
+                    exit(1);
+                }
+                vmchannel_devices[vmchannel_device_index++] = strdup(optarg);
+            case QEMU_OPTION_loadvm:
 		loadvm = optarg;
 		break;
             case QEMU_OPTION_full_screen:
@@ -5453,6 +5469,23 @@ int main(int argc, char **argv, char **envp)
         }
     }
 
+    for(i = 0; i < vmchannel_device_index; i++) {
+        char *devname = vmchannel_devices[i];
+        char *name;
+
+        if (!devname)
+            continue;
+
+        name = strsep(&devname, ":");
+        vmchannel_hds[i] = qemu_chr_open(name, devname);
+        if (!vmchannel_hds[i]) {
+            fprintf(stderr, "qemu: could not open vmchannel device '%s'\n", name);
+            exit(1);
+        }
+        vmchannel_init(vmchannel_hds[i], name);
+        free(vmchannel_devices[i]);
+    }
+
     machine->init(ram_size, vga_ram_size, boot_devices, ds,
                   kernel_filename, kernel_cmdline, initrd_filename, cpu_model);
 

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* Re: [Qemu-devel] [PATCH] Vmchannel PCI device.
  2008-12-14 11:50 ` [Qemu-devel] " Gleb Natapov
  (?)
@ 2008-12-14 12:28 ` Blue Swirl
  2008-12-14 13:12   ` Gleb Natapov
  -1 siblings, 1 reply; 32+ messages in thread
From: Blue Swirl @ 2008-12-14 12:28 UTC (permalink / raw)
  To: qemu-devel; +Cc: kvm

On 12/14/08, Gleb Natapov <gleb@redhat.com> wrote:
> There is a need for communication channel between host and various
>  agents that are running inside a VM guest. The channel will be used
>  for statistic gathering, logging, cut & paste, host screen resolution
>  changes notification, guest configuration etc.

Isn't this exactly what the firmware configuration device was supposed
to be used for? In the list of use cases you gave, I don't see
anything that could not be done with it.

So, to avoid duplicated functionality, I'd add the missing pieces to
the configuration device and if PCI compatibility is desired, the
firmware configuration device IO port could be handled by a wrapper
PCI device much like what you proposed.

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [Qemu-devel] [PATCH] Vmchannel PCI device.
  2008-12-14 12:28 ` Blue Swirl
@ 2008-12-14 13:12   ` Gleb Natapov
  2008-12-14 19:15     ` Anthony Liguori
  0 siblings, 1 reply; 32+ messages in thread
From: Gleb Natapov @ 2008-12-14 13:12 UTC (permalink / raw)
  To: qemu-devel; +Cc: kvm

On Sun, Dec 14, 2008 at 02:28:23PM +0200, Blue Swirl wrote:
> On 12/14/08, Gleb Natapov <gleb@redhat.com> wrote:
> > There is a need for communication channel between host and various
> >  agents that are running inside a VM guest. The channel will be used
> >  for statistic gathering, logging, cut & paste, host screen resolution
> >  changes notification, guest configuration etc.
> 
> Isn't this exactly what the firmware configuration device was supposed
> to be used for? In the list of use cases you gave, I don't see
> anything that could not be done with it.
> 
The requirement for firmware configuration interface was different. We
wanted something simple that we can use as early as possible in cpu init
code and performance was not considered at all. Obviously PCI device doesn't
fit for this. We don't want to write PCI driver inside a BIOS and PCI
initialization is too late in HW initialization sequence.

The requirement for vmchannel was that it should allow a guest
to communicate with external (to qemu) process and with reasonable
performance too. Firmware interface that copies data byte at time does
not fit.  And obviously firmware interface lacks interrupts, we don't
want to poll for data in a guest.

> So, to avoid duplicated functionality, I'd add the missing pieces to
> the configuration device and if PCI compatibility is desired, the
> firmware configuration device IO port could be handled by a wrapper
> PCI device much like what you proposed.
> 
vmchannel code uses virtio subsistem (which was not present in qemu when
firmware interface was added BTW). Theoretically we can use virtio for
FW interface too, but the in guest part of vitio is too complex to be
added to firmware IMO. Lets keep simple things simple.

--
			Gleb.

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [Qemu-devel] [PATCH] Vmchannel PCI device.
  2008-12-14 13:12   ` Gleb Natapov
@ 2008-12-14 19:15     ` Anthony Liguori
  2008-12-14 19:37       ` Gleb Natapov
  2008-12-14 22:13         ` Daniel P. Berrange
  0 siblings, 2 replies; 32+ messages in thread
From: Anthony Liguori @ 2008-12-14 19:15 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: qemu-devel, kvm

Gleb Natapov wrote:
> On Sun, Dec 14, 2008 at 02:28:23PM +0200, Blue Swirl wrote:
>   
>> On 12/14/08, Gleb Natapov <gleb@redhat.com> wrote:
>>     
>>> There is a need for communication channel between host and various
>>>  agents that are running inside a VM guest. The channel will be used
>>>  for statistic gathering, logging, cut & paste, host screen resolution
>>>  changes notification, guest configuration etc.
>>>       
>> Isn't this exactly what the firmware configuration device was supposed
>> to be used for? In the list of use cases you gave, I don't see
>> anything that could not be done with it.
>>
>>     
> The requirement for firmware configuration interface was different. We
> wanted something simple that we can use as early as possible in cpu init
> code and performance was not considered at all. Obviously PCI device doesn't
> fit for this. We don't want to write PCI driver inside a BIOS and PCI
> initialization is too late in HW initialization sequence.
>
> The requirement for vmchannel was that it should allow a guest
> to communicate with external (to qemu) process and with reasonable
> performance too. 

This is not a requirement that I think is important.  It's only a 
requirement for you because you have closed code that you want to 
implement the backend with.  I would personally be more interested in 
vmchannel backends in QEMU and I think there will be a lot of them.

But the firmware config interface is different than what is proposed 
here in a number of important ways.  The first is that this is a 
streaming communication mechanism verses a value/pair store.  It maps 
naturally to userspace via a socket abstraction and is present in a 
number of other hypervisors (XenSocket in Xen, VMCI in VMware, etc.).

I see the firmware config as more akin to a device tree or CMOS than a 
generic guest<=>host transport.

Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH] Vmchannel PCI device.
  2008-12-14 11:50 ` [Qemu-devel] " Gleb Natapov
@ 2008-12-14 19:24   ` Anthony Liguori
  -1 siblings, 0 replies; 32+ messages in thread
From: Anthony Liguori @ 2008-12-14 19:24 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: qemu-devel, kvm

Gleb Natapov wrote:
> diff --git a/hw/pc.c b/hw/pc.c
> index 73dd8bc..57e3b1d 100644
> --- a/hw/pc.c
> +++ b/hw/pc.c
> @@ -1095,7 +1095,7 @@ static void pc_init1(ram_addr_t ram_size, int vga_ram_size,
>          }
>      }
>  
> -    /* Add virtio block devices */
> +    /* Add virtio devices */
>   

Please don't make comments less specific.  We probably want to go in the 
opposite direction :-)

>      if (pci_enabled) {
>          int index;
>          int unit_id = 0;
> @@ -1104,11 +1104,9 @@ static void pc_init1(ram_addr_t ram_size, int vga_ram_size,
>              virtio_blk_init(pci_bus, drives_table[index].bdrv);
>              unit_id++;
>          }
> -    }
> -
> -    /* Add virtio balloon device */
> -    if (pci_enabled)
>          virtio_balloon_init(pci_bus);
> +	virtio_vmchannel_init(pci_bus);
>   

You're tab damaged.

> +    }
>  }
>  
>  static void pc_init_pci(ram_addr_t ram_size, int vga_ram_size,
> diff --git a/hw/virtio-vmchannel.c b/hw/virtio-vmchannel.c
> new file mode 100644
> index 0000000..1f5e274
> --- /dev/null
> +++ b/hw/virtio-vmchannel.c
> @@ -0,0 +1,283 @@
> +/*
> + * Virtio VMChannel Device
> + *
> + * Copyright RedHat, inc. 2008
> + *
> + * Authors:
> + *      Gleb Natapov <gleb@redhat.com>
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2.  See
> + * the COPYING file in the top-level directory.
>   

Did you intend for GPLv2 or GPLv2+?  There's no requirement either way 
but sometimes people just copy/paste these things.

> + */
> +
> +#include "qemu-common.h"
> +#include "sysemu.h"
> +#include "virtio.h"
> +#include "pc.h"
> +#include "qemu-char.h"
> +#include "virtio-vmchannel.h"
> +
> +//#define DEBUG_VMCHANNEL
> +
> +#ifdef DEBUG_VMCHANNEL
> +#define VMCHANNEL_DPRINTF(fmt, args...)                     \
> +    do { printf("VMCHANNEL: " fmt , ##args); } while (0)
> +#else
> +#define VMCHANNEL_DPRINTF(fmt, args...)
> +#endif
>   

I very much like just naming these things dprintf() but this is not a 
requirement.  Please do use C99 style though instead of GCC-ism.

> diff --git a/sysemu.h b/sysemu.h
> index 94cffaf..54d9c83 100644
> --- a/sysemu.h
> +++ b/sysemu.h
> @@ -157,6 +157,10 @@ extern CharDriverState *parallel_hds[MAX_PARALLEL_PORTS];
>  
>  #define TFR(expr) do { if ((expr) != -1) break; } while (errno == EINTR)
>  
> +#define MAX_VMCHANNEL_DEVICES 4
> +void virtio_vmchannel_init(PCIBus *bus);
> +void vmchannel_init(CharDriverState *hd, const char *name);
>   

This should be in virtio-vmchannel.h.

> -	    case QEMU_OPTION_loadvm:
> +            case QEMU_OPTION_vmchannel:
> +                if (vmchannel_device_index >= MAX_VMCHANNEL_DEVICES) {
> +                    fprintf(stderr, "qemu: too many vmchannel devices\n");
> +                    exit(1);
> +                }
> +                vmchannel_devices[vmchannel_device_index++] = strdup(optarg);
>   

No need to strdup().  optarg is good for the duration of execution.

I've only done a light review but things mostly look good.  I'd like to 
wait a bit to see what the reaction is on netdev before applying.

Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 32+ messages in thread

* [Qemu-devel] Re: [PATCH] Vmchannel PCI device.
@ 2008-12-14 19:24   ` Anthony Liguori
  0 siblings, 0 replies; 32+ messages in thread
From: Anthony Liguori @ 2008-12-14 19:24 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: qemu-devel, kvm

Gleb Natapov wrote:
> diff --git a/hw/pc.c b/hw/pc.c
> index 73dd8bc..57e3b1d 100644
> --- a/hw/pc.c
> +++ b/hw/pc.c
> @@ -1095,7 +1095,7 @@ static void pc_init1(ram_addr_t ram_size, int vga_ram_size,
>          }
>      }
>  
> -    /* Add virtio block devices */
> +    /* Add virtio devices */
>   

Please don't make comments less specific.  We probably want to go in the 
opposite direction :-)

>      if (pci_enabled) {
>          int index;
>          int unit_id = 0;
> @@ -1104,11 +1104,9 @@ static void pc_init1(ram_addr_t ram_size, int vga_ram_size,
>              virtio_blk_init(pci_bus, drives_table[index].bdrv);
>              unit_id++;
>          }
> -    }
> -
> -    /* Add virtio balloon device */
> -    if (pci_enabled)
>          virtio_balloon_init(pci_bus);
> +	virtio_vmchannel_init(pci_bus);
>   

You're tab damaged.

> +    }
>  }
>  
>  static void pc_init_pci(ram_addr_t ram_size, int vga_ram_size,
> diff --git a/hw/virtio-vmchannel.c b/hw/virtio-vmchannel.c
> new file mode 100644
> index 0000000..1f5e274
> --- /dev/null
> +++ b/hw/virtio-vmchannel.c
> @@ -0,0 +1,283 @@
> +/*
> + * Virtio VMChannel Device
> + *
> + * Copyright RedHat, inc. 2008
> + *
> + * Authors:
> + *      Gleb Natapov <gleb@redhat.com>
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2.  See
> + * the COPYING file in the top-level directory.
>   

Did you intend for GPLv2 or GPLv2+?  There's no requirement either way 
but sometimes people just copy/paste these things.

> + */
> +
> +#include "qemu-common.h"
> +#include "sysemu.h"
> +#include "virtio.h"
> +#include "pc.h"
> +#include "qemu-char.h"
> +#include "virtio-vmchannel.h"
> +
> +//#define DEBUG_VMCHANNEL
> +
> +#ifdef DEBUG_VMCHANNEL
> +#define VMCHANNEL_DPRINTF(fmt, args...)                     \
> +    do { printf("VMCHANNEL: " fmt , ##args); } while (0)
> +#else
> +#define VMCHANNEL_DPRINTF(fmt, args...)
> +#endif
>   

I very much like just naming these things dprintf() but this is not a 
requirement.  Please do use C99 style though instead of GCC-ism.

> diff --git a/sysemu.h b/sysemu.h
> index 94cffaf..54d9c83 100644
> --- a/sysemu.h
> +++ b/sysemu.h
> @@ -157,6 +157,10 @@ extern CharDriverState *parallel_hds[MAX_PARALLEL_PORTS];
>  
>  #define TFR(expr) do { if ((expr) != -1) break; } while (errno == EINTR)
>  
> +#define MAX_VMCHANNEL_DEVICES 4
> +void virtio_vmchannel_init(PCIBus *bus);
> +void vmchannel_init(CharDriverState *hd, const char *name);
>   

This should be in virtio-vmchannel.h.

> -	    case QEMU_OPTION_loadvm:
> +            case QEMU_OPTION_vmchannel:
> +                if (vmchannel_device_index >= MAX_VMCHANNEL_DEVICES) {
> +                    fprintf(stderr, "qemu: too many vmchannel devices\n");
> +                    exit(1);
> +                }
> +                vmchannel_devices[vmchannel_device_index++] = strdup(optarg);
>   

No need to strdup().  optarg is good for the duration of execution.

I've only done a light review but things mostly look good.  I'd like to 
wait a bit to see what the reaction is on netdev before applying.

Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [Qemu-devel] [PATCH] Vmchannel PCI device.
  2008-12-14 19:15     ` Anthony Liguori
@ 2008-12-14 19:37       ` Gleb Natapov
  2008-12-14 22:52         ` Anthony Liguori
  2008-12-14 22:13         ` Daniel P. Berrange
  1 sibling, 1 reply; 32+ messages in thread
From: Gleb Natapov @ 2008-12-14 19:37 UTC (permalink / raw)
  To: Anthony Liguori; +Cc: qemu-devel, kvm

On Sun, Dec 14, 2008 at 01:15:42PM -0600, Anthony Liguori wrote:
> Gleb Natapov wrote:
>> On Sun, Dec 14, 2008 at 02:28:23PM +0200, Blue Swirl wrote:
>>   
>>> On 12/14/08, Gleb Natapov <gleb@redhat.com> wrote:
>>>     
>>>> There is a need for communication channel between host and various
>>>>  agents that are running inside a VM guest. The channel will be used
>>>>  for statistic gathering, logging, cut & paste, host screen resolution
>>>>  changes notification, guest configuration etc.
>>>>       
>>> Isn't this exactly what the firmware configuration device was supposed
>>> to be used for? In the list of use cases you gave, I don't see
>>> anything that could not be done with it.
>>>
>>>     
>> The requirement for firmware configuration interface was different. We
>> wanted something simple that we can use as early as possible in cpu init
>> code and performance was not considered at all. Obviously PCI device doesn't
>> fit for this. We don't want to write PCI driver inside a BIOS and PCI
>> initialization is too late in HW initialization sequence.
>>
>> The requirement for vmchannel was that it should allow a guest
>> to communicate with external (to qemu) process and with reasonable
>> performance too. 
>
> This is not a requirement that I think is important.  It's only a  
> requirement for you because you have closed code that you want to  
> implement the backend with.  I would personally be more interested in  
> vmchannel backends in QEMU and I think there will be a lot of them.
>
I don't know why do you think that we are going to use that for closed
code or something. It will be used by libvirt and it is open source last
I checked.

--
			Gleb.

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH] Vmchannel PCI device.
  2008-12-14 19:24   ` [Qemu-devel] " Anthony Liguori
@ 2008-12-14 19:44     ` Gleb Natapov
  -1 siblings, 0 replies; 32+ messages in thread
From: Gleb Natapov @ 2008-12-14 19:44 UTC (permalink / raw)
  To: Anthony Liguori; +Cc: qemu-devel, kvm

On Sun, Dec 14, 2008 at 01:24:01PM -0600, Anthony Liguori wrote:
> Gleb Natapov wrote:
>> diff --git a/hw/pc.c b/hw/pc.c
>> index 73dd8bc..57e3b1d 100644
>> --- a/hw/pc.c
>> +++ b/hw/pc.c
>> @@ -1095,7 +1095,7 @@ static void pc_init1(ram_addr_t ram_size, int vga_ram_size,
>>          }
>>      }
>>  -    /* Add virtio block devices */
>> +    /* Add virtio devices */
>>   
>
> Please don't make comments less specific.  We probably want to go in the  
> opposite direction :-)
>
I change the comment because I also changes the code it describes.
Previously it registered only block device, now it registers balloon
and vmchannel.

>> +    }
>>  }
>>   static void pc_init_pci(ram_addr_t ram_size, int vga_ram_size,
>> diff --git a/hw/virtio-vmchannel.c b/hw/virtio-vmchannel.c
>> new file mode 100644
>> index 0000000..1f5e274
>> --- /dev/null
>> +++ b/hw/virtio-vmchannel.c
>> @@ -0,0 +1,283 @@
>> +/*
>> + * Virtio VMChannel Device
>> + *
>> + * Copyright RedHat, inc. 2008
>> + *
>> + * Authors:
>> + *      Gleb Natapov <gleb@redhat.com>
>> + *
>> + * This work is licensed under the terms of the GNU GPL, version 2.  See
>> + * the COPYING file in the top-level directory.
>>   
>
> Did you intend for GPLv2 or GPLv2+?  There's no requirement either way  
> but sometimes people just copy/paste these things.
>
Will check :)

>> + */
>> +
>> +#include "qemu-common.h"
>> +#include "sysemu.h"
>> +#include "virtio.h"
>> +#include "pc.h"
>> +#include "qemu-char.h"
>> +#include "virtio-vmchannel.h"
>> +
>> +//#define DEBUG_VMCHANNEL
>> +
>> +#ifdef DEBUG_VMCHANNEL
>> +#define VMCHANNEL_DPRINTF(fmt, args...)                     \
>> +    do { printf("VMCHANNEL: " fmt , ##args); } while (0)
>> +#else
>> +#define VMCHANNEL_DPRINTF(fmt, args...)
>> +#endif
>>   
>
> I very much like just naming these things dprintf() but this is not a  
> requirement.  Please do use C99 style though instead of GCC-ism.
>
OK.

>> diff --git a/sysemu.h b/sysemu.h
>> index 94cffaf..54d9c83 100644
>> --- a/sysemu.h
>> +++ b/sysemu.h
>> @@ -157,6 +157,10 @@ extern CharDriverState *parallel_hds[MAX_PARALLEL_PORTS];
>>   #define TFR(expr) do { if ((expr) != -1) break; } while (errno == 
>> EINTR)
>>  +#define MAX_VMCHANNEL_DEVICES 4
>> +void virtio_vmchannel_init(PCIBus *bus);
>> +void vmchannel_init(CharDriverState *hd, const char *name);
>>   
>
> This should be in virtio-vmchannel.h.
>
OK.

>> -	    case QEMU_OPTION_loadvm:
>> +            case QEMU_OPTION_vmchannel:
>> +                if (vmchannel_device_index >= MAX_VMCHANNEL_DEVICES) {
>> +                    fprintf(stderr, "qemu: too many vmchannel devices\n");
>> +                    exit(1);
>> +                }
>> +                vmchannel_devices[vmchannel_device_index++] = strdup(optarg);
>>   
>
> No need to strdup().  optarg is good for the duration of execution.
>
optarg is const and I change the string during parsing (call strsep on it).

--
			Gleb.

^ permalink raw reply	[flat|nested] 32+ messages in thread

* [Qemu-devel] Re: [PATCH] Vmchannel PCI device.
@ 2008-12-14 19:44     ` Gleb Natapov
  0 siblings, 0 replies; 32+ messages in thread
From: Gleb Natapov @ 2008-12-14 19:44 UTC (permalink / raw)
  To: Anthony Liguori; +Cc: qemu-devel, kvm

On Sun, Dec 14, 2008 at 01:24:01PM -0600, Anthony Liguori wrote:
> Gleb Natapov wrote:
>> diff --git a/hw/pc.c b/hw/pc.c
>> index 73dd8bc..57e3b1d 100644
>> --- a/hw/pc.c
>> +++ b/hw/pc.c
>> @@ -1095,7 +1095,7 @@ static void pc_init1(ram_addr_t ram_size, int vga_ram_size,
>>          }
>>      }
>>  -    /* Add virtio block devices */
>> +    /* Add virtio devices */
>>   
>
> Please don't make comments less specific.  We probably want to go in the  
> opposite direction :-)
>
I change the comment because I also changes the code it describes.
Previously it registered only block device, now it registers balloon
and vmchannel.

>> +    }
>>  }
>>   static void pc_init_pci(ram_addr_t ram_size, int vga_ram_size,
>> diff --git a/hw/virtio-vmchannel.c b/hw/virtio-vmchannel.c
>> new file mode 100644
>> index 0000000..1f5e274
>> --- /dev/null
>> +++ b/hw/virtio-vmchannel.c
>> @@ -0,0 +1,283 @@
>> +/*
>> + * Virtio VMChannel Device
>> + *
>> + * Copyright RedHat, inc. 2008
>> + *
>> + * Authors:
>> + *      Gleb Natapov <gleb@redhat.com>
>> + *
>> + * This work is licensed under the terms of the GNU GPL, version 2.  See
>> + * the COPYING file in the top-level directory.
>>   
>
> Did you intend for GPLv2 or GPLv2+?  There's no requirement either way  
> but sometimes people just copy/paste these things.
>
Will check :)

>> + */
>> +
>> +#include "qemu-common.h"
>> +#include "sysemu.h"
>> +#include "virtio.h"
>> +#include "pc.h"
>> +#include "qemu-char.h"
>> +#include "virtio-vmchannel.h"
>> +
>> +//#define DEBUG_VMCHANNEL
>> +
>> +#ifdef DEBUG_VMCHANNEL
>> +#define VMCHANNEL_DPRINTF(fmt, args...)                     \
>> +    do { printf("VMCHANNEL: " fmt , ##args); } while (0)
>> +#else
>> +#define VMCHANNEL_DPRINTF(fmt, args...)
>> +#endif
>>   
>
> I very much like just naming these things dprintf() but this is not a  
> requirement.  Please do use C99 style though instead of GCC-ism.
>
OK.

>> diff --git a/sysemu.h b/sysemu.h
>> index 94cffaf..54d9c83 100644
>> --- a/sysemu.h
>> +++ b/sysemu.h
>> @@ -157,6 +157,10 @@ extern CharDriverState *parallel_hds[MAX_PARALLEL_PORTS];
>>   #define TFR(expr) do { if ((expr) != -1) break; } while (errno == 
>> EINTR)
>>  +#define MAX_VMCHANNEL_DEVICES 4
>> +void virtio_vmchannel_init(PCIBus *bus);
>> +void vmchannel_init(CharDriverState *hd, const char *name);
>>   
>
> This should be in virtio-vmchannel.h.
>
OK.

>> -	    case QEMU_OPTION_loadvm:
>> +            case QEMU_OPTION_vmchannel:
>> +                if (vmchannel_device_index >= MAX_VMCHANNEL_DEVICES) {
>> +                    fprintf(stderr, "qemu: too many vmchannel devices\n");
>> +                    exit(1);
>> +                }
>> +                vmchannel_devices[vmchannel_device_index++] = strdup(optarg);
>>   
>
> No need to strdup().  optarg is good for the duration of execution.
>
optarg is const and I change the string during parsing (call strsep on it).

--
			Gleb.

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [Qemu-devel] [PATCH] Vmchannel PCI device.
  2008-12-14 19:15     ` Anthony Liguori
@ 2008-12-14 22:13         ` Daniel P. Berrange
  2008-12-14 22:13         ` Daniel P. Berrange
  1 sibling, 0 replies; 32+ messages in thread
From: Daniel P. Berrange @ 2008-12-14 22:13 UTC (permalink / raw)
  To: Anthony Liguori; +Cc: Gleb Natapov, qemu-devel, kvm

On Sun, Dec 14, 2008 at 01:15:42PM -0600, Anthony Liguori wrote:
> Gleb Natapov wrote:
> >On Sun, Dec 14, 2008 at 02:28:23PM +0200, Blue Swirl wrote:
> >  
> >>On 12/14/08, Gleb Natapov <gleb@redhat.com> wrote:
> >>    
> >>>There is a need for communication channel between host and various
> >>> agents that are running inside a VM guest. The channel will be used
> >>> for statistic gathering, logging, cut & paste, host screen resolution
> >>> changes notification, guest configuration etc.
> >>>      
> >>Isn't this exactly what the firmware configuration device was supposed
> >>to be used for? In the list of use cases you gave, I don't see
> >>anything that could not be done with it.
> >>
> >>    
> >The requirement for firmware configuration interface was different. We
> >wanted something simple that we can use as early as possible in cpu init
> >code and performance was not considered at all. Obviously PCI device 
> >doesn't
> >fit for this. We don't want to write PCI driver inside a BIOS and PCI
> >initialization is too late in HW initialization sequence.
> >
> >The requirement for vmchannel was that it should allow a guest
> >to communicate with external (to qemu) process and with reasonable
> >performance too. 
> 
> This is not a requirement that I think is important.  It's only a 
> requirement for you because you have closed code that you want to 
> implement the backend with.  I would personally be more interested in 
> vmchannel backends in QEMU and I think there will be a lot of them.

One non-QEMU backend I can see being implemented is a DBus daemon,
providing a simple bus for RPC calls between guests & host. Or on
a similar theme, perhaps a QPid message broker in the host OS. Yet
another backend is a clustering service providing a virtual fence
device to VMs. All of these would live outside QEMU, and as such
exposing the backend using the character device infrastructure 
is a natural fit.

Daniel
-- 
|: Red Hat, Engineering, London   -o-   http://people.redhat.com/berrange/ :|
|: http://libvirt.org  -o-  http://virt-manager.org  -o-  http://ovirt.org :|
|: http://autobuild.org       -o-         http://search.cpan.org/~danberr/ :|
|: GnuPG: 7D3B9505  -o-  F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 :|

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [Qemu-devel] [PATCH] Vmchannel PCI device.
@ 2008-12-14 22:13         ` Daniel P. Berrange
  0 siblings, 0 replies; 32+ messages in thread
From: Daniel P. Berrange @ 2008-12-14 22:13 UTC (permalink / raw)
  To: Anthony Liguori; +Cc: qemu-devel, Gleb Natapov, kvm

On Sun, Dec 14, 2008 at 01:15:42PM -0600, Anthony Liguori wrote:
> Gleb Natapov wrote:
> >On Sun, Dec 14, 2008 at 02:28:23PM +0200, Blue Swirl wrote:
> >  
> >>On 12/14/08, Gleb Natapov <gleb@redhat.com> wrote:
> >>    
> >>>There is a need for communication channel between host and various
> >>> agents that are running inside a VM guest. The channel will be used
> >>> for statistic gathering, logging, cut & paste, host screen resolution
> >>> changes notification, guest configuration etc.
> >>>      
> >>Isn't this exactly what the firmware configuration device was supposed
> >>to be used for? In the list of use cases you gave, I don't see
> >>anything that could not be done with it.
> >>
> >>    
> >The requirement for firmware configuration interface was different. We
> >wanted something simple that we can use as early as possible in cpu init
> >code and performance was not considered at all. Obviously PCI device 
> >doesn't
> >fit for this. We don't want to write PCI driver inside a BIOS and PCI
> >initialization is too late in HW initialization sequence.
> >
> >The requirement for vmchannel was that it should allow a guest
> >to communicate with external (to qemu) process and with reasonable
> >performance too. 
> 
> This is not a requirement that I think is important.  It's only a 
> requirement for you because you have closed code that you want to 
> implement the backend with.  I would personally be more interested in 
> vmchannel backends in QEMU and I think there will be a lot of them.

One non-QEMU backend I can see being implemented is a DBus daemon,
providing a simple bus for RPC calls between guests & host. Or on
a similar theme, perhaps a QPid message broker in the host OS. Yet
another backend is a clustering service providing a virtual fence
device to VMs. All of these would live outside QEMU, and as such
exposing the backend using the character device infrastructure 
is a natural fit.

Daniel
-- 
|: Red Hat, Engineering, London   -o-   http://people.redhat.com/berrange/ :|
|: http://libvirt.org  -o-  http://virt-manager.org  -o-  http://ovirt.org :|
|: http://autobuild.org       -o-         http://search.cpan.org/~danberr/ :|
|: GnuPG: 7D3B9505  -o-  F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 :|

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [Qemu-devel] [PATCH] Vmchannel PCI device.
  2008-12-14 19:37       ` Gleb Natapov
@ 2008-12-14 22:52         ` Anthony Liguori
  2008-12-15  9:20             ` Avi Kivity
                             ` (2 more replies)
  0 siblings, 3 replies; 32+ messages in thread
From: Anthony Liguori @ 2008-12-14 22:52 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: qemu-devel, kvm

Gleb Natapov wrote:
> On Sun, Dec 14, 2008 at 01:15:42PM -0600, Anthony Liguori wrote:
>   
> I don't know why do you think that we are going to use that for closed
> code or something. It will be used by libvirt and it is open source last
> I checked.
>   

For what?

vmchannel was developed for SPICE, is this not right?  That's where my 
assumption comes from.  If there's another use case, please describe it.

Regards,

Anthony Liguori

> --
> 			Gleb.
>   


^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [Qemu-devel] [PATCH] Vmchannel PCI device.
  2008-12-14 22:13         ` Daniel P. Berrange
@ 2008-12-14 22:56           ` Anthony Liguori
  -1 siblings, 0 replies; 32+ messages in thread
From: Anthony Liguori @ 2008-12-14 22:56 UTC (permalink / raw)
  To: Daniel P. Berrange; +Cc: Gleb Natapov, qemu-devel, kvm

Daniel P. Berrange wrote:
> On Sun, Dec 14, 2008 at 01:15:42PM -0600, Anthony Liguori wrote:
>   
> One non-QEMU backend I can see being implemented is a DBus daemon,
> providing a simple bus for RPC calls between guests & host.

The main problem with "external" backends is that they cannot easily 
participate in save/restore or live migration.  If you want to have an 
RPC mechanism, I would suggest implementing the backend in QEMU and 
hooking QEMU up to dbus.  Then you can implement proper save/restore.

>  Or on
> a similar theme, perhaps a QPid message broker in the host OS. Yet
> another backend is a clustering service providing a virtual fence
> device to VMs.

Why not use virtual networking for a clustering service (as you would in 
real machines).

>  All of these would live outside QEMU, and as such
> exposing the backend using the character device infrastructure 
> is a natural fit.
>   

If you don't have QEMU as a broker, it makes it very hard for QEMU to 
virtualization all of the resources exposed to the guest.  This 
complicates things like save/restore and complicates security policies 
since you now have things being done on behalf of a guest originating 
from another process.  It generally breaks the model of guest-as-a-process.

What's the argument to do these things external to QEMU?

Regards,

Anthony Liguori

> Daniel
>   


^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [Qemu-devel] [PATCH] Vmchannel PCI device.
@ 2008-12-14 22:56           ` Anthony Liguori
  0 siblings, 0 replies; 32+ messages in thread
From: Anthony Liguori @ 2008-12-14 22:56 UTC (permalink / raw)
  To: Daniel P. Berrange; +Cc: qemu-devel, Gleb Natapov, kvm

Daniel P. Berrange wrote:
> On Sun, Dec 14, 2008 at 01:15:42PM -0600, Anthony Liguori wrote:
>   
> One non-QEMU backend I can see being implemented is a DBus daemon,
> providing a simple bus for RPC calls between guests & host.

The main problem with "external" backends is that they cannot easily 
participate in save/restore or live migration.  If you want to have an 
RPC mechanism, I would suggest implementing the backend in QEMU and 
hooking QEMU up to dbus.  Then you can implement proper save/restore.

>  Or on
> a similar theme, perhaps a QPid message broker in the host OS. Yet
> another backend is a clustering service providing a virtual fence
> device to VMs.

Why not use virtual networking for a clustering service (as you would in 
real machines).

>  All of these would live outside QEMU, and as such
> exposing the backend using the character device infrastructure 
> is a natural fit.
>   

If you don't have QEMU as a broker, it makes it very hard for QEMU to 
virtualization all of the resources exposed to the guest.  This 
complicates things like save/restore and complicates security policies 
since you now have things being done on behalf of a guest originating 
from another process.  It generally breaks the model of guest-as-a-process.

What's the argument to do these things external to QEMU?

Regards,

Anthony Liguori

> Daniel
>   

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [Qemu-devel] [PATCH] Vmchannel PCI device.
  2008-12-14 22:56           ` Anthony Liguori
@ 2008-12-14 23:33             ` Daniel P. Berrange
  -1 siblings, 0 replies; 32+ messages in thread
From: Daniel P. Berrange @ 2008-12-14 23:33 UTC (permalink / raw)
  To: Anthony Liguori; +Cc: Gleb Natapov, qemu-devel, kvm

On Sun, Dec 14, 2008 at 04:56:49PM -0600, Anthony Liguori wrote:
> Daniel P. Berrange wrote:
> >On Sun, Dec 14, 2008 at 01:15:42PM -0600, Anthony Liguori wrote:
> >  
> >One non-QEMU backend I can see being implemented is a DBus daemon,
> >providing a simple bus for RPC calls between guests & host.
> 
> The main problem with "external" backends is that they cannot easily 
> participate in save/restore or live migration.  If you want to have an 
> RPC mechanism, I would suggest implementing the backend in QEMU and 
> hooking QEMU up to dbus.  Then you can implement proper save/restore.

DBus is a general purpose RPC service, which has little-to-no knowledge
of the semantics of application services running over it. Simply pushing
a backend into QEMU can't magically make sure all the application level
state is preserved across save/restore/migrate. For some protocols the
only viable option may be to explicitly give the equivalent of -EPIPE 
/ POLLHUP to the guest and have it explicitly re-establish connectivity 
with the host backend and re-initialize neccessary state if desired

> > Or on
> >a similar theme, perhaps a QPid message broker in the host OS. Yet
> >another backend is a clustering service providing a virtual fence
> >device to VMs.
> 
> Why not use virtual networking for a clustering service (as you would in 
> real machines).

It imposes a configuration & authentication burden on the guest to
use networking. When a virtual fence device is provided directly from
the host OS, you can get zero-config deployment of clustering with
the need to configure any authentication credentials in the guest.
This is a big plus over over the traditional setup for real machines.

> > All of these would live outside QEMU, and as such
> >exposing the backend using the character device infrastructure 
> >is a natural fit.
> 
> If you don't have QEMU as a broker, it makes it very hard for QEMU to 
> virtualization all of the resources exposed to the guest.  This 
> complicates things like save/restore and complicates security policies 
> since you now have things being done on behalf of a guest originating 
> from another process.  It generally breaks the model of guest-as-a-process.

This really depends on what you define the semantics of the vmchannel
protocol to be - specifically whether you want save/restore/migrate to
be totally opaque to the guest or not. I could imagine one option is to
have the guest end of the device be given -EPIPE when the backend is
restarted for restore/migrate, and choose to re-establish its connection
if so desired. This would not require QEMU to maintain any backend state.
For stateless datagram (UDP-like) application protocols there's nothing 
that there's no special support required for save/restore.

> What's the argument to do these things external to QEMU?

There are many potential uses cases for VMchannel, not all are going
to be general purpose things that everyone wants to use. Forcing alot
of application specific backend code into QEMU is not a good way to 
approach this from a maintenance point of view. Some backends may well
be well suited to living inside QEMU, while others may be better suited
as external services. 

Daniel
-- 
|: Red Hat, Engineering, London   -o-   http://people.redhat.com/berrange/ :|
|: http://libvirt.org  -o-  http://virt-manager.org  -o-  http://ovirt.org :|
|: http://autobuild.org       -o-         http://search.cpan.org/~danberr/ :|
|: GnuPG: 7D3B9505  -o-  F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 :|

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [Qemu-devel] [PATCH] Vmchannel PCI device.
@ 2008-12-14 23:33             ` Daniel P. Berrange
  0 siblings, 0 replies; 32+ messages in thread
From: Daniel P. Berrange @ 2008-12-14 23:33 UTC (permalink / raw)
  To: Anthony Liguori; +Cc: qemu-devel, Gleb Natapov, kvm

On Sun, Dec 14, 2008 at 04:56:49PM -0600, Anthony Liguori wrote:
> Daniel P. Berrange wrote:
> >On Sun, Dec 14, 2008 at 01:15:42PM -0600, Anthony Liguori wrote:
> >  
> >One non-QEMU backend I can see being implemented is a DBus daemon,
> >providing a simple bus for RPC calls between guests & host.
> 
> The main problem with "external" backends is that they cannot easily 
> participate in save/restore or live migration.  If you want to have an 
> RPC mechanism, I would suggest implementing the backend in QEMU and 
> hooking QEMU up to dbus.  Then you can implement proper save/restore.

DBus is a general purpose RPC service, which has little-to-no knowledge
of the semantics of application services running over it. Simply pushing
a backend into QEMU can't magically make sure all the application level
state is preserved across save/restore/migrate. For some protocols the
only viable option may be to explicitly give the equivalent of -EPIPE 
/ POLLHUP to the guest and have it explicitly re-establish connectivity 
with the host backend and re-initialize neccessary state if desired

> > Or on
> >a similar theme, perhaps a QPid message broker in the host OS. Yet
> >another backend is a clustering service providing a virtual fence
> >device to VMs.
> 
> Why not use virtual networking for a clustering service (as you would in 
> real machines).

It imposes a configuration & authentication burden on the guest to
use networking. When a virtual fence device is provided directly from
the host OS, you can get zero-config deployment of clustering with
the need to configure any authentication credentials in the guest.
This is a big plus over over the traditional setup for real machines.

> > All of these would live outside QEMU, and as such
> >exposing the backend using the character device infrastructure 
> >is a natural fit.
> 
> If you don't have QEMU as a broker, it makes it very hard for QEMU to 
> virtualization all of the resources exposed to the guest.  This 
> complicates things like save/restore and complicates security policies 
> since you now have things being done on behalf of a guest originating 
> from another process.  It generally breaks the model of guest-as-a-process.

This really depends on what you define the semantics of the vmchannel
protocol to be - specifically whether you want save/restore/migrate to
be totally opaque to the guest or not. I could imagine one option is to
have the guest end of the device be given -EPIPE when the backend is
restarted for restore/migrate, and choose to re-establish its connection
if so desired. This would not require QEMU to maintain any backend state.
For stateless datagram (UDP-like) application protocols there's nothing 
that there's no special support required for save/restore.

> What's the argument to do these things external to QEMU?

There are many potential uses cases for VMchannel, not all are going
to be general purpose things that everyone wants to use. Forcing alot
of application specific backend code into QEMU is not a good way to 
approach this from a maintenance point of view. Some backends may well
be well suited to living inside QEMU, while others may be better suited
as external services. 

Daniel
-- 
|: Red Hat, Engineering, London   -o-   http://people.redhat.com/berrange/ :|
|: http://libvirt.org  -o-  http://virt-manager.org  -o-  http://ovirt.org :|
|: http://autobuild.org       -o-         http://search.cpan.org/~danberr/ :|
|: GnuPG: 7D3B9505  -o-  F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 :|

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [Qemu-devel] [PATCH] Vmchannel PCI device.
  2008-12-14 11:50 ` [Qemu-devel] " Gleb Natapov
@ 2008-12-15  0:41   ` Paul Brook
  -1 siblings, 0 replies; 32+ messages in thread
From: Paul Brook @ 2008-12-15  0:41 UTC (permalink / raw)
  To: qemu-devel; +Cc: Gleb Natapov, kvm

On Sunday 14 December 2008, Gleb Natapov wrote:
> There is a need for communication channel between host and various
> agents that are running inside a VM guest. The channel will be used
> for statistic gathering, logging, cut & paste, host screen resolution
> changes notification, guest configuration etc.

I thin the description you're looking for is "serial port" :-)

> The patch implements separate PCI device for this type of communication.
> To create a channel "-vmchannel channel:dev" option should be specified
> on qemu commmand line during VM launch.

Needs documentation.

Paul

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [Qemu-devel] [PATCH] Vmchannel PCI device.
@ 2008-12-15  0:41   ` Paul Brook
  0 siblings, 0 replies; 32+ messages in thread
From: Paul Brook @ 2008-12-15  0:41 UTC (permalink / raw)
  To: qemu-devel; +Cc: kvm, Gleb Natapov

On Sunday 14 December 2008, Gleb Natapov wrote:
> There is a need for communication channel between host and various
> agents that are running inside a VM guest. The channel will be used
> for statistic gathering, logging, cut & paste, host screen resolution
> changes notification, guest configuration etc.

I thin the description you're looking for is "serial port" :-)

> The patch implements separate PCI device for this type of communication.
> To create a channel "-vmchannel channel:dev" option should be specified
> on qemu commmand line during VM launch.

Needs documentation.

Paul

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [Qemu-devel] [PATCH] Vmchannel PCI device.
  2008-12-14 23:33             ` Daniel P. Berrange
@ 2008-12-15  1:18               ` Thiemo Seufer
  -1 siblings, 0 replies; 32+ messages in thread
From: Thiemo Seufer @ 2008-12-15  1:18 UTC (permalink / raw)
  To: Daniel P. Berrange; +Cc: Anthony Liguori, qemu-devel, Gleb Natapov, kvm

Daniel P. Berrange wrote:
[snip]
> > If you don't have QEMU as a broker, it makes it very hard for QEMU to 
> > virtualization all of the resources exposed to the guest.  This 
> > complicates things like save/restore and complicates security policies 
> > since you now have things being done on behalf of a guest originating 
> > from another process.  It generally breaks the model of guest-as-a-process.
> 
> This really depends on what you define the semantics of the vmchannel
> protocol to be - specifically whether you want save/restore/migrate to
> be totally opaque to the guest or not. I could imagine one option is to
> have the guest end of the device be given -EPIPE when the backend is
> restarted for restore/migrate, and choose to re-establish its connection
> if so desired. This would not require QEMU to maintain any backend state.
> For stateless datagram (UDP-like) application protocols there's nothing 
> that there's no special support required for save/restore.
> 
> > What's the argument to do these things external to QEMU?
> 
> There are many potential uses cases for VMchannel,

Could you describe a practical use case of VMchannel in Qemu? I think I
missed what this feature is good for. :-)

> not all are going to be general purpose things that everyone wants to
> use.

If it is only good for specialized esoteric stuff, why should it be in
Qemu?


Thiemo

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [Qemu-devel] [PATCH] Vmchannel PCI device.
@ 2008-12-15  1:18               ` Thiemo Seufer
  0 siblings, 0 replies; 32+ messages in thread
From: Thiemo Seufer @ 2008-12-15  1:18 UTC (permalink / raw)
  To: Daniel P. Berrange; +Cc: Gleb Natapov, qemu-devel, kvm

Daniel P. Berrange wrote:
[snip]
> > If you don't have QEMU as a broker, it makes it very hard for QEMU to 
> > virtualization all of the resources exposed to the guest.  This 
> > complicates things like save/restore and complicates security policies 
> > since you now have things being done on behalf of a guest originating 
> > from another process.  It generally breaks the model of guest-as-a-process.
> 
> This really depends on what you define the semantics of the vmchannel
> protocol to be - specifically whether you want save/restore/migrate to
> be totally opaque to the guest or not. I could imagine one option is to
> have the guest end of the device be given -EPIPE when the backend is
> restarted for restore/migrate, and choose to re-establish its connection
> if so desired. This would not require QEMU to maintain any backend state.
> For stateless datagram (UDP-like) application protocols there's nothing 
> that there's no special support required for save/restore.
> 
> > What's the argument to do these things external to QEMU?
> 
> There are many potential uses cases for VMchannel,

Could you describe a practical use case of VMchannel in Qemu? I think I
missed what this feature is good for. :-)

> not all are going to be general purpose things that everyone wants to
> use.

If it is only good for specialized esoteric stuff, why should it be in
Qemu?


Thiemo

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [Qemu-devel] [PATCH] Vmchannel PCI device.
  2008-12-15  0:41   ` Paul Brook
  (?)
@ 2008-12-15  1:50   ` Anthony Liguori
  -1 siblings, 0 replies; 32+ messages in thread
From: Anthony Liguori @ 2008-12-15  1:50 UTC (permalink / raw)
  To: Paul Brook; +Cc: qemu-devel, Gleb Natapov, kvm

Paul Brook wrote:
> On Sunday 14 December 2008, Gleb Natapov wrote:
>   
>> There is a need for communication channel between host and various
>> agents that are running inside a VM guest. The channel will be used
>> for statistic gathering, logging, cut & paste, host screen resolution
>> changes notification, guest configuration etc.
>>     
>
> I thin the description you're looking for is "serial port" :-)
>   

Gee, that sounds awfully familiar ;-)

Regards,

Anthony Liguori

>> The patch implements separate PCI device for this type of communication.
>> To create a channel "-vmchannel channel:dev" option should be specified
>> on qemu commmand line during VM launch.
>>     
>
> Needs documentation.
>
> Paul
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>   


^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [Qemu-devel] [PATCH] Vmchannel PCI device.
  2008-12-14 23:33             ` Daniel P. Berrange
@ 2008-12-15  2:03               ` Anthony Liguori
  -1 siblings, 0 replies; 32+ messages in thread
From: Anthony Liguori @ 2008-12-15  2:03 UTC (permalink / raw)
  To: Daniel P. Berrange; +Cc: Gleb Natapov, qemu-devel, kvm

Daniel P. Berrange wrote:
> On Sun, Dec 14, 2008 at 04:56:49PM -0600, Anthony Liguori wrote:
>   
>> Daniel P. Berrange wrote:
>>     
>>> On Sun, Dec 14, 2008 at 01:15:42PM -0600, Anthony Liguori wrote:
>>>  
>>> One non-QEMU backend I can see being implemented is a DBus daemon,
>>> providing a simple bus for RPC calls between guests & host.
>>>       
>> The main problem with "external" backends is that they cannot easily 
>> participate in save/restore or live migration.  If you want to have an 
>> RPC mechanism, I would suggest implementing the backend in QEMU and 
>> hooking QEMU up to dbus.  Then you can implement proper save/restore.
>>     
>
> DBus is a general purpose RPC service, which has little-to-no knowledge
> of the semantics of application services running over it. Simply pushing
> a backend into QEMU can't magically make sure all the application level
> state is preserved across save/restore/migrate. For some protocols the
> only viable option may be to explicitly give the equivalent of -EPIPE 
> / POLLHUP to the guest and have it explicitly re-establish connectivity 
> with the host backend and re-initialize neccessary state if desired
>   

In the case of dbus, you actually have a shot of making save/restore 
transparent.  If you send the RPCs, you can parse the messages in QEMU 
and know when you have a complete buffer.  You can then dispatch the RPC 
from QEMU (and BTW, perfect example of security, you want the RPCs to 
originate from the QEMU process).  When you get the RPC response, you 
can marshal it and make it available to the guest.

If you ever have a request or response, you should save the partial 
results as part of save/restore.  You could use the live feature of 
savevm to attempt to wait until there are no pending RPCs.  In fact, you 
have to do this because otherwise, the save/restore would be broken.

This example is particularly bad for EPIPE.  If the guest sends an RPC, 
what happens if it gets EPIPE?  Has it been completed?  It would make it 
very difficult to program for this model.

EPIPE is the model Xen used for guest save/restore and it's been a huge 
hassle.  You don't want guests involved in save/restore because it adds 
a combinatorial factor to your test matrix.  You have to now test every 
host combination with every supported guest combination to ensure that 
save/restore has not regressed.  It's a huge burden and IMHO is never 
truly necessary.

> It imposes a configuration & authentication burden on the guest to
> use networking. When a virtual fence device is provided directly from
> the host OS, you can get zero-config deployment of clustering with
> the need to configure any authentication credentials in the guest.
> This is a big plus over over the traditional setup for real machines.
>   

If you just want to use vmchannel for networking without the 
"configuration" burden then someone heavily involved with a distro 
should just preconfigure, say Fedora, to create a private network on a 
dedicated network interface as soon as the system starts.  Then you have 
a dedicated, never disappearing network interface you can use for all of 
this stuff.  And it requires no application modification to boot.

> This really depends on what you define the semantics of the vmchannel
> protocol to be - specifically whether you want save/restore/migrate to
> be totally opaque to the guest or not. I could imagine one option is to
> have the guest end of the device be given -EPIPE when the backend is
> restarted for restore/migrate, and choose to re-establish its connection
> if so desired. This would not require QEMU to maintain any backend state.
> For stateless datagram (UDP-like) application protocols there's nothing 
> that there's no special support required for save/restore.
>   

It's a losing proposition because it explodes the test matrix to build 
anything that's even remotely robust.

>> What's the argument to do these things external to QEMU?
>>     
>
> There are many potential uses cases for VMchannel, not all are going
> to be general purpose things that everyone wants to use. Forcing alot
> of application specific backend code into QEMU is not a good way to 
> approach this from a maintenance point of view. Some backends may well
> be well suited to living inside QEMU, while others may be better suited
> as external services. 
>   

I think VMchannel is a useful concept but not for the same reasons you 
do :-)

Regards,

Anthony Liguori

> Daniel
>   


^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [Qemu-devel] [PATCH] Vmchannel PCI device.
@ 2008-12-15  2:03               ` Anthony Liguori
  0 siblings, 0 replies; 32+ messages in thread
From: Anthony Liguori @ 2008-12-15  2:03 UTC (permalink / raw)
  To: Daniel P. Berrange; +Cc: qemu-devel, Gleb Natapov, kvm

Daniel P. Berrange wrote:
> On Sun, Dec 14, 2008 at 04:56:49PM -0600, Anthony Liguori wrote:
>   
>> Daniel P. Berrange wrote:
>>     
>>> On Sun, Dec 14, 2008 at 01:15:42PM -0600, Anthony Liguori wrote:
>>>  
>>> One non-QEMU backend I can see being implemented is a DBus daemon,
>>> providing a simple bus for RPC calls between guests & host.
>>>       
>> The main problem with "external" backends is that they cannot easily 
>> participate in save/restore or live migration.  If you want to have an 
>> RPC mechanism, I would suggest implementing the backend in QEMU and 
>> hooking QEMU up to dbus.  Then you can implement proper save/restore.
>>     
>
> DBus is a general purpose RPC service, which has little-to-no knowledge
> of the semantics of application services running over it. Simply pushing
> a backend into QEMU can't magically make sure all the application level
> state is preserved across save/restore/migrate. For some protocols the
> only viable option may be to explicitly give the equivalent of -EPIPE 
> / POLLHUP to the guest and have it explicitly re-establish connectivity 
> with the host backend and re-initialize neccessary state if desired
>   

In the case of dbus, you actually have a shot of making save/restore 
transparent.  If you send the RPCs, you can parse the messages in QEMU 
and know when you have a complete buffer.  You can then dispatch the RPC 
from QEMU (and BTW, perfect example of security, you want the RPCs to 
originate from the QEMU process).  When you get the RPC response, you 
can marshal it and make it available to the guest.

If you ever have a request or response, you should save the partial 
results as part of save/restore.  You could use the live feature of 
savevm to attempt to wait until there are no pending RPCs.  In fact, you 
have to do this because otherwise, the save/restore would be broken.

This example is particularly bad for EPIPE.  If the guest sends an RPC, 
what happens if it gets EPIPE?  Has it been completed?  It would make it 
very difficult to program for this model.

EPIPE is the model Xen used for guest save/restore and it's been a huge 
hassle.  You don't want guests involved in save/restore because it adds 
a combinatorial factor to your test matrix.  You have to now test every 
host combination with every supported guest combination to ensure that 
save/restore has not regressed.  It's a huge burden and IMHO is never 
truly necessary.

> It imposes a configuration & authentication burden on the guest to
> use networking. When a virtual fence device is provided directly from
> the host OS, you can get zero-config deployment of clustering with
> the need to configure any authentication credentials in the guest.
> This is a big plus over over the traditional setup for real machines.
>   

If you just want to use vmchannel for networking without the 
"configuration" burden then someone heavily involved with a distro 
should just preconfigure, say Fedora, to create a private network on a 
dedicated network interface as soon as the system starts.  Then you have 
a dedicated, never disappearing network interface you can use for all of 
this stuff.  And it requires no application modification to boot.

> This really depends on what you define the semantics of the vmchannel
> protocol to be - specifically whether you want save/restore/migrate to
> be totally opaque to the guest or not. I could imagine one option is to
> have the guest end of the device be given -EPIPE when the backend is
> restarted for restore/migrate, and choose to re-establish its connection
> if so desired. This would not require QEMU to maintain any backend state.
> For stateless datagram (UDP-like) application protocols there's nothing 
> that there's no special support required for save/restore.
>   

It's a losing proposition because it explodes the test matrix to build 
anything that's even remotely robust.

>> What's the argument to do these things external to QEMU?
>>     
>
> There are many potential uses cases for VMchannel, not all are going
> to be general purpose things that everyone wants to use. Forcing alot
> of application specific backend code into QEMU is not a good way to 
> approach this from a maintenance point of view. Some backends may well
> be well suited to living inside QEMU, while others may be better suited
> as external services. 
>   

I think VMchannel is a useful concept but not for the same reasons you 
do :-)

Regards,

Anthony Liguori

> Daniel
>   

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [Qemu-devel] [PATCH] Vmchannel PCI device.
  2008-12-14 22:52         ` Anthony Liguori
@ 2008-12-15  9:20             ` Avi Kivity
  2008-12-15  9:25             ` Dan Kenigsberg
  2008-12-15 15:43             ` Dan Kenigsberg
  2 siblings, 0 replies; 32+ messages in thread
From: Avi Kivity @ 2008-12-15  9:20 UTC (permalink / raw)
  To: Anthony Liguori; +Cc: Gleb Natapov, qemu-devel, kvm

Anthony Liguori wrote:
>
> vmchannel was developed for SPICE, is this not right?

No, spice does its own thing.  It's dma intensive, so it isn't a good 
fit for vmchannel.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [Qemu-devel] [PATCH] Vmchannel PCI device.
@ 2008-12-15  9:20             ` Avi Kivity
  0 siblings, 0 replies; 32+ messages in thread
From: Avi Kivity @ 2008-12-15  9:20 UTC (permalink / raw)
  To: Anthony Liguori; +Cc: qemu-devel, Gleb Natapov, kvm

Anthony Liguori wrote:
>
> vmchannel was developed for SPICE, is this not right?

No, spice does its own thing.  It's dma intensive, so it isn't a good 
fit for vmchannel.

-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [Qemu-devel] [PATCH] Vmchannel PCI device.
  2008-12-14 22:52         ` Anthony Liguori
@ 2008-12-15  9:25             ` Dan Kenigsberg
  2008-12-15  9:25             ` Dan Kenigsberg
  2008-12-15 15:43             ` Dan Kenigsberg
  2 siblings, 0 replies; 32+ messages in thread
From: Dan Kenigsberg @ 2008-12-15  9:25 UTC (permalink / raw)
  To: qemu-devel; +Cc: Gleb Natapov, kvm

On Sun, Dec 14, 2008 at 04:52:20PM -0600, Anthony Liguori wrote:
> Gleb Natapov wrote:
>> On Sun, Dec 14, 2008 at 01:15:42PM -0600, Anthony Liguori wrote:
>>   I don't know why do you think that we are going to use that for 
>> closed
>> code or something. It will be used by libvirt and it is open source last
>> I checked.
>>   
>
> For what?
>
> vmchannel was developed for SPICE, is this not right?  That's where my  
> assumption comes from.  If there's another use case, please describe it.

Our management system uses vmchannel to communicate with an agent
running on the guest.

We use this agent to collect information about the guest OS: e.g.,
installed applications, who's logged in, whether anything's running, or
the guest is rebooting.

The agent is capable of performing operations on the guest, too. We use
this to log a user in (for single sign-on), to log a user out before
migrating to file, to renew the guest's dhcp lease if the guest is
migrated to another subnet, to name a few uses.

Dan.

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [Qemu-devel] [PATCH] Vmchannel PCI device.
@ 2008-12-15  9:25             ` Dan Kenigsberg
  0 siblings, 0 replies; 32+ messages in thread
From: Dan Kenigsberg @ 2008-12-15  9:25 UTC (permalink / raw)
  To: qemu-devel; +Cc: kvm, Gleb Natapov

On Sun, Dec 14, 2008 at 04:52:20PM -0600, Anthony Liguori wrote:
> Gleb Natapov wrote:
>> On Sun, Dec 14, 2008 at 01:15:42PM -0600, Anthony Liguori wrote:
>>   I don't know why do you think that we are going to use that for 
>> closed
>> code or something. It will be used by libvirt and it is open source last
>> I checked.
>>   
>
> For what?
>
> vmchannel was developed for SPICE, is this not right?  That's where my  
> assumption comes from.  If there's another use case, please describe it.

Our management system uses vmchannel to communicate with an agent
running on the guest.

We use this agent to collect information about the guest OS: e.g.,
installed applications, who's logged in, whether anything's running, or
the guest is rebooting.

The agent is capable of performing operations on the guest, too. We use
this to log a user in (for single sign-on), to log a user out before
migrating to file, to renew the guest's dhcp lease if the guest is
migrated to another subnet, to name a few uses.

Dan.

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [Qemu-devel] [PATCH] Vmchannel PCI device.
  2008-12-15  2:03               ` Anthony Liguori
@ 2008-12-15  9:47                 ` Daniel P. Berrange
  -1 siblings, 0 replies; 32+ messages in thread
From: Daniel P. Berrange @ 2008-12-15  9:47 UTC (permalink / raw)
  To: Anthony Liguori; +Cc: Gleb Natapov, qemu-devel, kvm

On Sun, Dec 14, 2008 at 08:03:39PM -0600, Anthony Liguori wrote:
> Daniel P. Berrange wrote:
> >On Sun, Dec 14, 2008 at 04:56:49PM -0600, Anthony Liguori wrote:
> >  
> >>Daniel P. Berrange wrote:
> >>    
> >>>On Sun, Dec 14, 2008 at 01:15:42PM -0600, Anthony Liguori wrote:
> >>> 
> >>>One non-QEMU backend I can see being implemented is a DBus daemon,
> >>>providing a simple bus for RPC calls between guests & host.
> >>>      
> >>The main problem with "external" backends is that they cannot easily 
> >>participate in save/restore or live migration.  If you want to have an 
> >>RPC mechanism, I would suggest implementing the backend in QEMU and 
> >>hooking QEMU up to dbus.  Then you can implement proper save/restore.
> >>    
> >
> >DBus is a general purpose RPC service, which has little-to-no knowledge
> >of the semantics of application services running over it. Simply pushing
> >a backend into QEMU can't magically make sure all the application level
> >state is preserved across save/restore/migrate. For some protocols the
> >only viable option may be to explicitly give the equivalent of -EPIPE 
> >/ POLLHUP to the guest and have it explicitly re-establish connectivity 
> >with the host backend and re-initialize neccessary state if desired
> >  
> 
> In the case of dbus, you actually have a shot of making save/restore 
> transparent.  If you send the RPCs, you can parse the messages in QEMU 
> and know when you have a complete buffer.  You can then dispatch the RPC 
> from QEMU (and BTW, perfect example of security, you want the RPCs to 
> originate from the QEMU process).  When you get the RPC response, you 
> can marshal it and make it available to the guest.
> 
> If you ever have a request or response, you should save the partial 
> results as part of save/restore.  You could use the live feature of 
> savevm to attempt to wait until there are no pending RPCs.  In fact, you 
> have to do this because otherwise, the save/restore would be broken.

This is missing the point I was trying to make. Sure you can parse the
RPC calls and know where the boundaries are so you can ensure a consistent
RPC stream, but that's not the state problem I was trying to describe.

There is state in the higher level application relating to the RPC calls.
So, consider two RPC calls made by say NetworkManager

      - Create FOO()
      - Run FOO.Bar()

We save/restore half-way through the 'Run FOO.Bar()' method, QEMU can see
this and so it replays the interruptted 'Run FOO.Bar()' method call upon 
restore. This is usless because there is nothing that says object 'FOO()'
even exists on the host the VM is being restored on.

Thus for this kind of RPC service I believe is it better not to try and
be transparent in save/restore. Explicitly break the communication channel
and allow the guest VM to re-discover the relative services it wants to
talk with on the host.

> This example is particularly bad for EPIPE.  If the guest sends an RPC, 
> what happens if it gets EPIPE?  Has it been completed?  It would make it 
> very difficult to program for this model.

This is really a question of what semantics the application wants to provide.
DBus is not attempting to provide a guarenteed reliable message delivery
service, and its specification does not require any particular semantics 
upon connection failure. This is already true of the existing TCP based
impl of DBus. So whether in-flight RPCs complete or not is 'undefined',
and thus upon re-connection the client in the guest needs to re-discover
whatever it was talking to and verify what state its in.

A service like AMQP/QPid does provide a guarenteed reliable message stream
with the neccessary protocol handshakes/acknowledgments to be able to 
implement clearly defined semantics upon connection failure. ie the app is 
able to get a guarenteed response as to whether the message has been
delivered or not, even in the face of network comms failure.

Daniel
-- 
|: Red Hat, Engineering, London   -o-   http://people.redhat.com/berrange/ :|
|: http://libvirt.org  -o-  http://virt-manager.org  -o-  http://ovirt.org :|
|: http://autobuild.org       -o-         http://search.cpan.org/~danberr/ :|
|: GnuPG: 7D3B9505  -o-  F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 :|

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [Qemu-devel] [PATCH] Vmchannel PCI device.
@ 2008-12-15  9:47                 ` Daniel P. Berrange
  0 siblings, 0 replies; 32+ messages in thread
From: Daniel P. Berrange @ 2008-12-15  9:47 UTC (permalink / raw)
  To: Anthony Liguori; +Cc: qemu-devel, Gleb Natapov, kvm

On Sun, Dec 14, 2008 at 08:03:39PM -0600, Anthony Liguori wrote:
> Daniel P. Berrange wrote:
> >On Sun, Dec 14, 2008 at 04:56:49PM -0600, Anthony Liguori wrote:
> >  
> >>Daniel P. Berrange wrote:
> >>    
> >>>On Sun, Dec 14, 2008 at 01:15:42PM -0600, Anthony Liguori wrote:
> >>> 
> >>>One non-QEMU backend I can see being implemented is a DBus daemon,
> >>>providing a simple bus for RPC calls between guests & host.
> >>>      
> >>The main problem with "external" backends is that they cannot easily 
> >>participate in save/restore or live migration.  If you want to have an 
> >>RPC mechanism, I would suggest implementing the backend in QEMU and 
> >>hooking QEMU up to dbus.  Then you can implement proper save/restore.
> >>    
> >
> >DBus is a general purpose RPC service, which has little-to-no knowledge
> >of the semantics of application services running over it. Simply pushing
> >a backend into QEMU can't magically make sure all the application level
> >state is preserved across save/restore/migrate. For some protocols the
> >only viable option may be to explicitly give the equivalent of -EPIPE 
> >/ POLLHUP to the guest and have it explicitly re-establish connectivity 
> >with the host backend and re-initialize neccessary state if desired
> >  
> 
> In the case of dbus, you actually have a shot of making save/restore 
> transparent.  If you send the RPCs, you can parse the messages in QEMU 
> and know when you have a complete buffer.  You can then dispatch the RPC 
> from QEMU (and BTW, perfect example of security, you want the RPCs to 
> originate from the QEMU process).  When you get the RPC response, you 
> can marshal it and make it available to the guest.
> 
> If you ever have a request or response, you should save the partial 
> results as part of save/restore.  You could use the live feature of 
> savevm to attempt to wait until there are no pending RPCs.  In fact, you 
> have to do this because otherwise, the save/restore would be broken.

This is missing the point I was trying to make. Sure you can parse the
RPC calls and know where the boundaries are so you can ensure a consistent
RPC stream, but that's not the state problem I was trying to describe.

There is state in the higher level application relating to the RPC calls.
So, consider two RPC calls made by say NetworkManager

      - Create FOO()
      - Run FOO.Bar()

We save/restore half-way through the 'Run FOO.Bar()' method, QEMU can see
this and so it replays the interruptted 'Run FOO.Bar()' method call upon 
restore. This is usless because there is nothing that says object 'FOO()'
even exists on the host the VM is being restored on.

Thus for this kind of RPC service I believe is it better not to try and
be transparent in save/restore. Explicitly break the communication channel
and allow the guest VM to re-discover the relative services it wants to
talk with on the host.

> This example is particularly bad for EPIPE.  If the guest sends an RPC, 
> what happens if it gets EPIPE?  Has it been completed?  It would make it 
> very difficult to program for this model.

This is really a question of what semantics the application wants to provide.
DBus is not attempting to provide a guarenteed reliable message delivery
service, and its specification does not require any particular semantics 
upon connection failure. This is already true of the existing TCP based
impl of DBus. So whether in-flight RPCs complete or not is 'undefined',
and thus upon re-connection the client in the guest needs to re-discover
whatever it was talking to and verify what state its in.

A service like AMQP/QPid does provide a guarenteed reliable message stream
with the neccessary protocol handshakes/acknowledgments to be able to 
implement clearly defined semantics upon connection failure. ie the app is 
able to get a guarenteed response as to whether the message has been
delivered or not, even in the face of network comms failure.

Daniel
-- 
|: Red Hat, Engineering, London   -o-   http://people.redhat.com/berrange/ :|
|: http://libvirt.org  -o-  http://virt-manager.org  -o-  http://ovirt.org :|
|: http://autobuild.org       -o-         http://search.cpan.org/~danberr/ :|
|: GnuPG: 7D3B9505  -o-  F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 :|

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [Qemu-devel] [PATCH] Vmchannel PCI device.
  2008-12-14 22:52         ` Anthony Liguori
@ 2008-12-15 15:43             ` Dan Kenigsberg
  2008-12-15  9:25             ` Dan Kenigsberg
  2008-12-15 15:43             ` Dan Kenigsberg
  2 siblings, 0 replies; 32+ messages in thread
From: Dan Kenigsberg @ 2008-12-15 15:43 UTC (permalink / raw)
  To: qemu-devel; +Cc: Gleb Natapov, kvm

On Sun, Dec 14, 2008 at 04:52:20PM -0600, Anthony Liguori wrote:
> Gleb Natapov wrote:
>> On Sun, Dec 14, 2008 at 01:15:42PM -0600, Anthony Liguori wrote:
>>   I don't know why do you think that we are going to use that for 
>> closed
>> code or something. It will be used by libvirt and it is open source last
>> I checked.
>>   
>
> For what?
>
> vmchannel was developed for SPICE, is this not right?  That's where my  
> assumption comes from.  If there's another use case, please describe it.

Our management system uses vmchannel to communicate with an agent
running on the guest.

We use this agent to collect information about the guest OS: e.g.,
installed applications, who's logged in, whether anything's running, or
the guest is rebooting.

The agent is capable of performing operations on the guest, too. We use
this to log a user in (for single sign-on), to log a user out before
migrating to file, to renew the guest's dhcp lease if the guest is
migrated to another subnet, to name a few uses.

Dan.

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [Qemu-devel] [PATCH] Vmchannel PCI device.
@ 2008-12-15 15:43             ` Dan Kenigsberg
  0 siblings, 0 replies; 32+ messages in thread
From: Dan Kenigsberg @ 2008-12-15 15:43 UTC (permalink / raw)
  To: qemu-devel; +Cc: kvm, Gleb Natapov

On Sun, Dec 14, 2008 at 04:52:20PM -0600, Anthony Liguori wrote:
> Gleb Natapov wrote:
>> On Sun, Dec 14, 2008 at 01:15:42PM -0600, Anthony Liguori wrote:
>>   I don't know why do you think that we are going to use that for 
>> closed
>> code or something. It will be used by libvirt and it is open source last
>> I checked.
>>   
>
> For what?
>
> vmchannel was developed for SPICE, is this not right?  That's where my  
> assumption comes from.  If there's another use case, please describe it.

Our management system uses vmchannel to communicate with an agent
running on the guest.

We use this agent to collect information about the guest OS: e.g.,
installed applications, who's logged in, whether anything's running, or
the guest is rebooting.

The agent is capable of performing operations on the guest, too. We use
this to log a user in (for single sign-on), to log a user out before
migrating to file, to renew the guest's dhcp lease if the guest is
migrated to another subnet, to name a few uses.

Dan.

^ permalink raw reply	[flat|nested] 32+ messages in thread

end of thread, other threads:[~2008-12-15 15:43 UTC | newest]

Thread overview: 32+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2008-12-14 11:50 [PATCH] Vmchannel PCI device Gleb Natapov
2008-12-14 11:50 ` [Qemu-devel] " Gleb Natapov
2008-12-14 12:28 ` Blue Swirl
2008-12-14 13:12   ` Gleb Natapov
2008-12-14 19:15     ` Anthony Liguori
2008-12-14 19:37       ` Gleb Natapov
2008-12-14 22:52         ` Anthony Liguori
2008-12-15  9:20           ` Avi Kivity
2008-12-15  9:20             ` Avi Kivity
2008-12-15  9:25           ` Dan Kenigsberg
2008-12-15  9:25             ` Dan Kenigsberg
2008-12-15 15:43           ` Dan Kenigsberg
2008-12-15 15:43             ` Dan Kenigsberg
2008-12-14 22:13       ` Daniel P. Berrange
2008-12-14 22:13         ` Daniel P. Berrange
2008-12-14 22:56         ` Anthony Liguori
2008-12-14 22:56           ` Anthony Liguori
2008-12-14 23:33           ` Daniel P. Berrange
2008-12-14 23:33             ` Daniel P. Berrange
2008-12-15  1:18             ` Thiemo Seufer
2008-12-15  1:18               ` Thiemo Seufer
2008-12-15  2:03             ` Anthony Liguori
2008-12-15  2:03               ` Anthony Liguori
2008-12-15  9:47               ` Daniel P. Berrange
2008-12-15  9:47                 ` Daniel P. Berrange
2008-12-14 19:24 ` Anthony Liguori
2008-12-14 19:24   ` [Qemu-devel] " Anthony Liguori
2008-12-14 19:44   ` Gleb Natapov
2008-12-14 19:44     ` [Qemu-devel] " Gleb Natapov
2008-12-15  0:41 ` [Qemu-devel] " Paul Brook
2008-12-15  0:41   ` Paul Brook
2008-12-15  1:50   ` Anthony Liguori

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.