All of lore.kernel.org
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH v3] tests/vhost-user-bridge: implement logging of dirty pages
@ 2015-11-12 19:15 Victor Kaplansky
  0 siblings, 0 replies; only message in thread
From: Victor Kaplansky @ 2015-11-12 19:15 UTC (permalink / raw)
  To: qemu-devel
  Cc: Victor Kaplansky, Marc-Andre Lureau, peterx, Michael S. Tsirkin

During migration devices continue writing to the guest's memory.
The writes has to be reported to QEMU. This change implements
minimal support in vhost-user-bridge required for successful
migration of a guest with virtio-net device.

Signed-off-by: Victor Kaplansky <victork@redhat.com>
---
v3:
    - Get rid of vhost_log_chunk_t. Just use uint8_t.
    - Implement vubr_set_log_fd_exec().
    - Kick the log if log_call_fd has been set up.
    - Mark bits in log table atomically to enable more then one
      simultaneous vhost-user backend.
    - Fix the calculations of required log table size in an
      assert.
    - Fix the coding style: only single space before assignment
      operator.
    - Add a comment on the hack to determine that queues are
      ready for processing.
    - Other minor cosmetic fixes.
v2:
    - use log_guest_addr for used ring reported by qemu instead of
      translating.
    - use mmap_size and mmap_offset defined in new
      VHOST_USER_SET_LOG_BASE interface. See the patch
      "vhost-user: modify SET_LOG_BASE to pass mmap size and
      offset".
    - start logging dirty pages only after the appropriate feature
      is set by a VHOST_USER_GET_PROTOCOL_FEATURES request.
    - updated TODO list.

 tests/vhost-user-bridge.c | 205 ++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 181 insertions(+), 24 deletions(-)

diff --git a/tests/vhost-user-bridge.c b/tests/vhost-user-bridge.c
index fa18ad5..65aca0b 100644
--- a/tests/vhost-user-bridge.c
+++ b/tests/vhost-user-bridge.c
@@ -13,16 +13,22 @@
 /*
  * TODO:
  *     - main should get parameters from the command line.
- *     - implement all request handlers.
+ *     - implement all request handlers. Still not implemented:
+ *          vubr_set_protocol_features_exec()
+ *          vubr_get_queue_num_exec()
+ *          vubr_set_vring_enable_exec()
+ *          vubr_send_rarp_exec()
+ *          vubr_set_log_fd_exec()
  *     - test for broken requests and virtqueue.
  *     - implement features defined by Virtio 1.0 spec.
  *     - support mergeable buffers and indirect descriptors.
- *     - implement RESET_DEVICE request.
  *     - implement clean shutdown.
  *     - implement non-blocking writes to UDP backend.
  *     - implement polling strategy.
  */
 
+#define _FILE_OFFSET_BITS 64
+
 #include <stddef.h>
 #include <assert.h>
 #include <stdio.h>
@@ -166,6 +172,7 @@ typedef struct VubrVirtq {
     struct vring_desc *desc;
     struct vring_avail *avail;
     struct vring_used *used;
+    uint64_t log_guest_addr;
 } VubrVirtq;
 
 /* Based on qemu/hw/virtio/vhost-user.c */
@@ -173,6 +180,8 @@ typedef struct VubrVirtq {
 #define VHOST_MEMORY_MAX_NREGIONS    8
 #define VHOST_USER_F_PROTOCOL_FEATURES 30
 
+#define VHOST_LOG_PAGE 4096
+
 enum VhostUserProtocolFeature {
     VHOST_USER_PROTOCOL_F_MQ = 0,
     VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1,
@@ -220,6 +229,11 @@ typedef struct VhostUserMemory {
     VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
 } VhostUserMemory;
 
+typedef struct VhostUserLog {
+    uint64_t mmap_size;
+    uint64_t mmap_offset;
+} VhostUserLog;
+
 typedef struct VhostUserMsg {
     VhostUserRequest request;
 
@@ -234,6 +248,7 @@ typedef struct VhostUserMsg {
         struct vhost_vring_state state;
         struct vhost_vring_addr addr;
         VhostUserMemory memory;
+        VhostUserLog log;
     } payload;
     int fds[VHOST_MEMORY_MAX_NREGIONS];
     int fd_num;
@@ -265,8 +280,13 @@ typedef struct VubrDev {
     uint32_t nregions;
     VubrDevRegion regions[VHOST_MEMORY_MAX_NREGIONS];
     VubrVirtq vq[MAX_NR_VIRTQUEUE];
+    int log_call_fd;
+    uint64_t log_size;
+    uint8_t *log_table;
     int backend_udp_sock;
     struct sockaddr_in backend_udp_dest;
+    int ready;
+    uint64_t features;
 } VubrDev;
 
 static const char *vubr_request_str[] = {
@@ -368,7 +388,12 @@ vubr_message_read(int conn_fd, VhostUserMsg *vmsg)
 
     rc = recvmsg(conn_fd, &msg, 0);
 
-    if (rc <= 0) {
+    if (rc == 0) {
+        vubr_die("recvmsg");
+        fprintf(stderr, "Peer disconnected.\n");
+        exit(1);
+    }
+    if (rc < 0) {
         vubr_die("recvmsg");
     }
 
@@ -395,7 +420,12 @@ vubr_message_read(int conn_fd, VhostUserMsg *vmsg)
 
     if (vmsg->size) {
         rc = read(conn_fd, &vmsg->payload, vmsg->size);
-        if (rc <= 0) {
+        if (rc == 0) {
+            vubr_die("recvmsg");
+            fprintf(stderr, "Peer disconnected.\n");
+            exit(1);
+        }
+        if (rc < 0) {
             vubr_die("recvmsg");
         }
 
@@ -455,6 +485,16 @@ vubr_consume_raw_packet(VubrDev *dev, uint8_t *buf, uint32_t len)
     vubr_backend_udp_sendbuf(dev, buf + hdrlen, len - hdrlen);
 }
 
+/* Kick the log_call_fd if required. */
+static void
+vubr_log_kick(VubrDev *dev)
+{
+    if (dev->log_call_fd != -1) {
+        DPRINT("Kicking the QEMU's log...\n");
+        eventfd_write(dev->log_call_fd, 1);
+    }
+}
+
 /* Kick the guest if necessary. */
 static void
 vubr_virtqueue_kick(VubrVirtq *vq)
@@ -466,11 +506,38 @@ vubr_virtqueue_kick(VubrVirtq *vq)
 }
 
 static void
+vubr_log_page(uint8_t *log_table, uint64_t page)
+{
+    DPRINT("Logged dirty guest page: %"PRId64"\n", page);
+    atomic_or(&log_table[page / 8], 1 << (page % 8));
+}
+
+static void
+vubr_log_write(VubrDev *dev, uint64_t address, uint64_t length)
+{
+    uint64_t page;
+
+    if (!(dev->features & VHOST_F_LOG_ALL) || !dev->log_table || !length) {
+        return;
+    }
+
+    assert(dev->log_size > ((address + length - 1) / VHOST_LOG_PAGE / 8));
+
+    page = address / VHOST_LOG_PAGE;
+    while (page * VHOST_LOG_PAGE < address + length) {
+        vubr_log_page(dev->log_table, page);
+        page += VHOST_LOG_PAGE;
+    }
+    vubr_log_kick(dev);
+}
+
+static void
 vubr_post_buffer(VubrDev *dev, VubrVirtq *vq, uint8_t *buf, int32_t len)
 {
-    struct vring_desc *desc   = vq->desc;
+    struct vring_desc *desc = vq->desc;
     struct vring_avail *avail = vq->avail;
-    struct vring_used *used   = vq->used;
+    struct vring_used *used = vq->used;
+    uint64_t log_guest_addr = vq->log_guest_addr;
 
     unsigned int size = vq->size;
 
@@ -510,6 +577,7 @@ vubr_post_buffer(VubrDev *dev, VubrVirtq *vq, uint8_t *buf, int32_t len)
 
     if (len <= chunk_len) {
         memcpy(chunk_start, buf, len);
+        vubr_log_write(dev, desc[i].addr, len);
     } else {
         fprintf(stderr,
                 "Received too long packet from the backend. Dropping...\n");
@@ -519,11 +587,17 @@ vubr_post_buffer(VubrDev *dev, VubrVirtq *vq, uint8_t *buf, int32_t len)
     /* Add descriptor to the used ring. */
     used->ring[u_index].id = d_index;
     used->ring[u_index].len = len;
+    vubr_log_write(dev,
+                   log_guest_addr + offsetof(struct vring_used, ring[u_index]),
+                   sizeof(used->ring[u_index]));
 
     vq->last_avail_index++;
     vq->last_used_index++;
 
     atomic_mb_set(&used->idx, vq->last_used_index);
+    vubr_log_write(dev,
+                   log_guest_addr + offsetof(struct vring_used, idx),
+                   sizeof(used->idx));
 
     /* Kick the guest if necessary. */
     vubr_virtqueue_kick(vq);
@@ -532,9 +606,10 @@ vubr_post_buffer(VubrDev *dev, VubrVirtq *vq, uint8_t *buf, int32_t len)
 static int
 vubr_process_desc(VubrDev *dev, VubrVirtq *vq)
 {
-    struct vring_desc *desc   = vq->desc;
+    struct vring_desc *desc = vq->desc;
     struct vring_avail *avail = vq->avail;
-    struct vring_used *used   = vq->used;
+    struct vring_used *used = vq->used;
+    uint64_t log_guest_addr = vq->log_guest_addr;
 
     unsigned int size = vq->size;
 
@@ -552,6 +627,8 @@ vubr_process_desc(VubrDev *dev, VubrVirtq *vq)
         void *chunk_start = (void *)gpa_to_va(dev, desc[i].addr);
         uint32_t chunk_len = desc[i].len;
 
+        assert(!(desc[i].flags & VRING_DESC_F_WRITE));
+
         if (len + chunk_len < buf_size) {
             memcpy(buf + len, chunk_start, chunk_len);
             DPRINT("%d ", chunk_len);
@@ -577,6 +654,9 @@ vubr_process_desc(VubrDev *dev, VubrVirtq *vq)
     /* Add descriptor to the used ring. */
     used->ring[u_index].id = d_index;
     used->ring[u_index].len = len;
+    vubr_log_write(dev,
+                   log_guest_addr + offsetof(struct vring_used, ring[u_index]),
+                   sizeof(used->ring[u_index]));
 
     vubr_consume_raw_packet(dev, buf, len);
 
@@ -588,6 +668,7 @@ vubr_process_avail(VubrDev *dev, VubrVirtq *vq)
 {
     struct vring_avail *avail = vq->avail;
     struct vring_used *used = vq->used;
+    uint64_t log_guest_addr = vq->log_guest_addr;
 
     while (vq->last_avail_index != atomic_mb_read(&avail->idx)) {
         vubr_process_desc(dev, vq);
@@ -596,6 +677,9 @@ vubr_process_avail(VubrDev *dev, VubrVirtq *vq)
     }
 
     atomic_mb_set(&used->idx, vq->last_used_index);
+    vubr_log_write(dev,
+                   log_guest_addr + offsetof(struct vring_used, idx),
+                   sizeof(used->idx));
 }
 
 static void
@@ -609,6 +693,10 @@ vubr_backend_recv_cb(int sock, void *ctx)
     int buflen = sizeof(buf);
     int len;
 
+    if (!dev->ready) {
+        return;
+    }
+
     DPRINT("\n\n   ***   IN UDP RECEIVE CALLBACK    ***\n\n");
 
     uint16_t avail_index = atomic_mb_read(&rx_vq->avail->idx);
@@ -656,14 +744,14 @@ vubr_get_features_exec(VubrDev *dev, VhostUserMsg *vmsg)
 {
     vmsg->payload.u64 =
             ((1ULL << VIRTIO_NET_F_MRG_RXBUF) |
-             (1ULL << VIRTIO_NET_F_CTRL_VQ) |
-             (1ULL << VIRTIO_NET_F_CTRL_RX) |
-             (1ULL << VHOST_F_LOG_ALL));
+             (1ULL << VHOST_F_LOG_ALL) |
+             (1ULL << VHOST_USER_F_PROTOCOL_FEATURES));
+
     vmsg->size = sizeof(vmsg->payload.u64);
 
     DPRINT("Sending back to guest u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
 
-    /* reply */
+    /* Reply */
     return 1;
 }
 
@@ -671,6 +759,7 @@ static int
 vubr_set_features_exec(VubrDev *dev, VhostUserMsg *vmsg)
 {
     DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
+    dev->features = vmsg->payload.u64;
     return 0;
 }
 
@@ -680,10 +769,28 @@ vubr_set_owner_exec(VubrDev *dev, VhostUserMsg *vmsg)
     return 0;
 }
 
+static void
+vubr_close_log(VubrDev *dev)
+{
+    if (dev->log_table) {
+        if (munmap(dev->log_table, dev->log_size) != 0) {
+            vubr_die("munmap()");
+        }
+
+        dev->log_table = 0;
+    }
+    if (dev->log_call_fd != -1) {
+        close(dev->log_call_fd);
+        dev->log_call_fd = -1;
+    }
+}
+
 static int
 vubr_reset_device_exec(VubrDev *dev, VhostUserMsg *vmsg)
 {
-    DPRINT("Function %s() not implemented yet.\n", __func__);
+    vubr_close_log(dev);
+    dev->ready = 0;
+    dev->features = 0;
     return 0;
 }
 
@@ -710,9 +817,9 @@ vubr_set_mem_table_exec(VubrDev *dev, VhostUserMsg *vmsg)
         DPRINT("    mmap_offset      0x%016"PRIx64"\n",
                msg_region->mmap_offset);
 
-        dev_region->gpa         = msg_region->guest_phys_addr;
-        dev_region->size        = msg_region->memory_size;
-        dev_region->qva         = msg_region->userspace_addr;
+        dev_region->gpa = msg_region->guest_phys_addr;
+        dev_region->size = msg_region->memory_size;
+        dev_region->qva = msg_region->userspace_addr;
         dev_region->mmap_offset = msg_region->mmap_offset;
 
         /* We don't use offset argument of mmap() since the
@@ -736,14 +843,38 @@ vubr_set_mem_table_exec(VubrDev *dev, VhostUserMsg *vmsg)
 static int
 vubr_set_log_base_exec(VubrDev *dev, VhostUserMsg *vmsg)
 {
-    DPRINT("Function %s() not implemented yet.\n", __func__);
-    return 0;
+    int fd;
+    uint64_t log_mmap_size, log_mmap_offset;
+    void *rc;
+
+    assert(vmsg->fd_num == 1);
+    fd = vmsg->fds[0];
+
+    assert(vmsg->size == sizeof(vmsg->payload.log));
+    log_mmap_offset = vmsg->payload.log.mmap_offset;
+    log_mmap_size = vmsg->payload.log.mmap_size;
+    DPRINT("Log mmap_offset: %"PRId64"\n", log_mmap_offset);
+    DPRINT("Log mmap_size:   %"PRId64"\n", log_mmap_size);
+
+    rc = mmap(0, log_mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd,
+              log_mmap_offset);
+    if (rc == MAP_FAILED) {
+        vubr_die("mmap");
+    }
+    dev->log_table = rc;
+    dev->log_size = log_mmap_size;
+
+    vmsg->size = sizeof(vmsg->payload.u64);
+    /* Reply */
+    return 1;
 }
 
 static int
 vubr_set_log_fd_exec(VubrDev *dev, VhostUserMsg *vmsg)
 {
-    DPRINT("Function %s() not implemented yet.\n", __func__);
+    assert(vmsg->fd_num == 1);
+    dev->log_call_fd = vmsg->fds[0];
+    DPRINT("Got log_call_fd: %d\n", vmsg->fds[0]);
     return 0;
 }
 
@@ -777,6 +908,7 @@ vubr_set_vring_addr_exec(VubrDev *dev, VhostUserMsg *vmsg)
     vq->desc = (struct vring_desc *)qva_to_va(dev, vra->desc_user_addr);
     vq->used = (struct vring_used *)qva_to_va(dev, vra->used_user_addr);
     vq->avail = (struct vring_avail *)qva_to_va(dev, vra->avail_user_addr);
+    vq->log_guest_addr = vra->log_guest_addr;
 
     DPRINT("Setting virtq addresses:\n");
     DPRINT("    vring_desc  at %p\n", vq->desc);
@@ -803,8 +935,14 @@ vubr_set_vring_base_exec(VubrDev *dev, VhostUserMsg *vmsg)
 static int
 vubr_get_vring_base_exec(VubrDev *dev, VhostUserMsg *vmsg)
 {
-    DPRINT("Function %s() not implemented yet.\n", __func__);
-    return 0;
+    unsigned int index = vmsg->payload.state.index;
+
+    DPRINT("State.index: %d\n", index);
+    vmsg->payload.state.num = dev->vq[index].last_avail_index;
+    vmsg->size = sizeof(vmsg->payload.state);
+
+    /* Reply */
+    return 1;
 }
 
 static int
@@ -829,7 +967,16 @@ vubr_set_vring_kick_exec(VubrDev *dev, VhostUserMsg *vmsg)
         DPRINT("Waiting for kicks on fd: %d for vq: %d\n",
                dev->vq[index].kick_fd, index);
     }
+    /* We temporarily use this hack to determine that both TX and RX
+     * queues are set up and ready for processing.
+     * FIXME: we need to rely in VRING_ENABLE and actual kicks. */
+    if (dev->vq[0].kick_fd != -1 &&
+        dev->vq[1].kick_fd != -1) {
+        dev->ready = 1;
+        DPRINT("vhost-user-bridge is ready for processing queues.\n");
+    }
     return 0;
+
 }
 
 static int
@@ -858,9 +1005,12 @@ vubr_set_vring_err_exec(VubrDev *dev, VhostUserMsg *vmsg)
 static int
 vubr_get_protocol_features_exec(VubrDev *dev, VhostUserMsg *vmsg)
 {
-    /* FIXME: unimplented */
+    vmsg->payload.u64 = 1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD;
     DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
-    return 0;
+    vmsg->size = sizeof(vmsg->payload.u64);
+
+    /* Reply */
+    return 1;
 }
 
 static int
@@ -987,7 +1137,7 @@ vubr_accept_cb(int sock, void *ctx)
     socklen_t len = sizeof(un);
 
     conn_fd = accept(sock, (struct sockaddr *) &un, &len);
-    if (conn_fd  == -1) {
+    if (conn_fd == -1) {
         vubr_die("accept()");
     }
     DPRINT("Got connection from remote peer on sock %d\n", conn_fd);
@@ -1012,6 +1162,13 @@ vubr_new(const char *path)
         };
     }
 
+    /* Init log */
+    dev->log_call_fd = -1;
+    dev->log_size = 0;
+    dev->log_table = 0;
+    dev->ready = 0;
+    dev->features = 0;
+
     /* Get a UNIX socket. */
     dev->sock = socket(AF_UNIX, SOCK_STREAM, 0);
     if (dev->sock == -1) {
-- 
--Victor

^ permalink raw reply related	[flat|nested] only message in thread

only message in thread, other threads:[~2015-11-12 19:15 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-11-12 19:15 [Qemu-devel] [PATCH v3] tests/vhost-user-bridge: implement logging of dirty pages Victor Kaplansky

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.