All of lore.kernel.org
 help / color / mirror / Atom feed
From: Victor Kaplansky <victork@redhat.com>
To: qemu-devel@nongnu.org
Cc: Victor Kaplansky <victork@redhat.com>,
	Marc-Andre Lureau <mlureau@redhat.com>,
	peterx@redhat.com, "Michael S. Tsirkin" <mst@redhat.com>
Subject: [Qemu-devel] [PATCH v2] tests/vhost-user-bridge: implement logging of dirty pages
Date: Thu, 12 Nov 2015 14:34:56 +0200	[thread overview]
Message-ID: <1447331679-6270-1-git-send-email-victork@redhat.com> (raw)

During migration devices continue writing to the guest's memory.
The writes has to be reported to QEMU. This change implements
minimal support in vhost-user-bridge required for successful
migration of a guest with virtio-net device.

Signed-off-by: Victor Kaplansky <victork@redhat.com>
---
v2:
   - use log_guest_addr for used ring reported by qemu instead of
     translating.
   - use mmap_size and mmap_offset defined in new
     VHOST_USER_SET_LOG_BASE interface. See the patch
     "vhost-user: modify SET_LOG_BASE to pass mmap size and
     offset".
   - start logging dirty pages only after the appropriate feature
     is set by a VHOST_USER_GET_PROTOCOL_FEATURES request.
   - updated TODO list.

 tests/vhost-user-bridge.c | 169 ++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 155 insertions(+), 14 deletions(-)

diff --git a/tests/vhost-user-bridge.c b/tests/vhost-user-bridge.c
index fa18ad5..8c1c997 100644
--- a/tests/vhost-user-bridge.c
+++ b/tests/vhost-user-bridge.c
@@ -13,16 +13,22 @@
 /*
  * TODO:
  *     - main should get parameters from the command line.
- *     - implement all request handlers.
+ *     - implement all request handlers. Still not implemented:
+ *          vubr_set_protocol_features_exec()
+ *          vubr_get_queue_num_exec()
+ *          vubr_set_vring_enable_exec()
+ *          vubr_send_rarp_exec()
+ *          vubr_set_log_fd_exec()
  *     - test for broken requests and virtqueue.
  *     - implement features defined by Virtio 1.0 spec.
  *     - support mergeable buffers and indirect descriptors.
- *     - implement RESET_DEVICE request.
  *     - implement clean shutdown.
  *     - implement non-blocking writes to UDP backend.
  *     - implement polling strategy.
  */
 
+#define _FILE_OFFSET_BITS 64
+
 #include <stddef.h>
 #include <assert.h>
 #include <stdio.h>
@@ -166,6 +172,7 @@ typedef struct VubrVirtq {
     struct vring_desc *desc;
     struct vring_avail *avail;
     struct vring_used *used;
+    uint64_t log_guest_addr;
 } VubrVirtq;
 
 /* Based on qemu/hw/virtio/vhost-user.c */
@@ -173,6 +180,9 @@ typedef struct VubrVirtq {
 #define VHOST_MEMORY_MAX_NREGIONS    8
 #define VHOST_USER_F_PROTOCOL_FEATURES 30
 
+typedef uint8_t vhost_log_chunk_t;
+#define VHOST_LOG_PAGE 4096
+
 enum VhostUserProtocolFeature {
     VHOST_USER_PROTOCOL_F_MQ = 0,
     VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1,
@@ -220,6 +230,11 @@ typedef struct VhostUserMemory {
     VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
 } VhostUserMemory;
 
+typedef struct VhostUserLog {
+    uint64_t mmap_size;
+    uint64_t mmap_offset;
+} VhostUserLog;
+
 typedef struct VhostUserMsg {
     VhostUserRequest request;
 
@@ -234,6 +249,7 @@ typedef struct VhostUserMsg {
         struct vhost_vring_state state;
         struct vhost_vring_addr addr;
         VhostUserMemory memory;
+        VhostUserLog log;
     } payload;
     int fds[VHOST_MEMORY_MAX_NREGIONS];
     int fd_num;
@@ -265,8 +281,13 @@ typedef struct VubrDev {
     uint32_t nregions;
     VubrDevRegion regions[VHOST_MEMORY_MAX_NREGIONS];
     VubrVirtq vq[MAX_NR_VIRTQUEUE];
+    int log_call_fd;
+    uint64_t log_size;
+    vhost_log_chunk_t *log_table;
     int backend_udp_sock;
     struct sockaddr_in backend_udp_dest;
+    int ready;
+    uint64_t features;
 } VubrDev;
 
 static const char *vubr_request_str[] = {
@@ -368,7 +389,12 @@ vubr_message_read(int conn_fd, VhostUserMsg *vmsg)
 
     rc = recvmsg(conn_fd, &msg, 0);
 
-    if (rc <= 0) {
+    if (rc == 0) {
+        vubr_die("recvmsg");
+        fprintf(stderr, "Peer disconnected.\n");
+        exit(1);
+    }
+    if (rc < 0) {
         vubr_die("recvmsg");
     }
 
@@ -395,7 +421,12 @@ vubr_message_read(int conn_fd, VhostUserMsg *vmsg)
 
     if (vmsg->size) {
         rc = read(conn_fd, &vmsg->payload, vmsg->size);
-        if (rc <= 0) {
+        if (rc == 0) {
+            vubr_die("recvmsg");
+            fprintf(stderr, "Peer disconnected.\n");
+            exit(1);
+        }
+        if (rc < 0) {
             vubr_die("recvmsg");
         }
 
@@ -465,12 +496,39 @@ vubr_virtqueue_kick(VubrVirtq *vq)
     }
 }
 
+
+static void
+vubr_log_page(uint8_t *log_table, uint64_t page)
+{
+    DPRINT("Logged dirty guest page: %"PRId64"\n", page);
+    log_table[page / 8] |= 1 << (page % 8);
+}
+
+static void
+vubr_log_write(VubrDev *dev, uint64_t address, uint64_t length)
+{
+    uint64_t page;
+
+    if (!(dev->features & VHOST_F_LOG_ALL) || !dev->log_table || !length) {
+        return;
+    }
+
+    assert(dev->log_size >= ((address + length) / VHOST_LOG_PAGE / 8));
+
+    page = address / VHOST_LOG_PAGE;
+    while (page * VHOST_LOG_PAGE < address + length) {
+        vubr_log_page(dev->log_table, page);
+        page += VHOST_LOG_PAGE;
+    }
+}
+
 static void
 vubr_post_buffer(VubrDev *dev, VubrVirtq *vq, uint8_t *buf, int32_t len)
 {
     struct vring_desc *desc   = vq->desc;
     struct vring_avail *avail = vq->avail;
     struct vring_used *used   = vq->used;
+    uint64_t log_guest_addr = vq->log_guest_addr;
 
     unsigned int size = vq->size;
 
@@ -510,6 +568,7 @@ vubr_post_buffer(VubrDev *dev, VubrVirtq *vq, uint8_t *buf, int32_t len)
 
     if (len <= chunk_len) {
         memcpy(chunk_start, buf, len);
+        vubr_log_write(dev, desc[i].addr, len);
     } else {
         fprintf(stderr,
                 "Received too long packet from the backend. Dropping...\n");
@@ -519,11 +578,17 @@ vubr_post_buffer(VubrDev *dev, VubrVirtq *vq, uint8_t *buf, int32_t len)
     /* Add descriptor to the used ring. */
     used->ring[u_index].id = d_index;
     used->ring[u_index].len = len;
+    vubr_log_write(dev,
+                   log_guest_addr + offsetof(struct vring_used, ring[u_index]),
+                   sizeof(used->ring[u_index]));
 
     vq->last_avail_index++;
     vq->last_used_index++;
 
     atomic_mb_set(&used->idx, vq->last_used_index);
+    vubr_log_write(dev,
+                   log_guest_addr + offsetof(struct vring_used, idx),
+                   sizeof(used->idx));
 
     /* Kick the guest if necessary. */
     vubr_virtqueue_kick(vq);
@@ -535,6 +600,7 @@ vubr_process_desc(VubrDev *dev, VubrVirtq *vq)
     struct vring_desc *desc   = vq->desc;
     struct vring_avail *avail = vq->avail;
     struct vring_used *used   = vq->used;
+    uint64_t log_guest_addr = vq->log_guest_addr;
 
     unsigned int size = vq->size;
 
@@ -552,6 +618,8 @@ vubr_process_desc(VubrDev *dev, VubrVirtq *vq)
         void *chunk_start = (void *)gpa_to_va(dev, desc[i].addr);
         uint32_t chunk_len = desc[i].len;
 
+        assert(!(desc[i].flags & VRING_DESC_F_WRITE));
+
         if (len + chunk_len < buf_size) {
             memcpy(buf + len, chunk_start, chunk_len);
             DPRINT("%d ", chunk_len);
@@ -577,6 +645,9 @@ vubr_process_desc(VubrDev *dev, VubrVirtq *vq)
     /* Add descriptor to the used ring. */
     used->ring[u_index].id = d_index;
     used->ring[u_index].len = len;
+    vubr_log_write(dev,
+                   log_guest_addr + offsetof(struct vring_used, ring[u_index]),
+                   sizeof(used->ring[u_index]));
 
     vubr_consume_raw_packet(dev, buf, len);
 
@@ -588,6 +659,7 @@ vubr_process_avail(VubrDev *dev, VubrVirtq *vq)
 {
     struct vring_avail *avail = vq->avail;
     struct vring_used *used = vq->used;
+    uint64_t log_guest_addr = vq->log_guest_addr;
 
     while (vq->last_avail_index != atomic_mb_read(&avail->idx)) {
         vubr_process_desc(dev, vq);
@@ -596,6 +668,9 @@ vubr_process_avail(VubrDev *dev, VubrVirtq *vq)
     }
 
     atomic_mb_set(&used->idx, vq->last_used_index);
+    vubr_log_write(dev,
+                   log_guest_addr + offsetof(struct vring_used, idx),
+                   sizeof(used->idx));
 }
 
 static void
@@ -609,6 +684,10 @@ vubr_backend_recv_cb(int sock, void *ctx)
     int buflen = sizeof(buf);
     int len;
 
+    if (!dev->ready) {
+        return;
+    }
+
     DPRINT("\n\n   ***   IN UDP RECEIVE CALLBACK    ***\n\n");
 
     uint16_t avail_index = atomic_mb_read(&rx_vq->avail->idx);
@@ -656,9 +735,9 @@ vubr_get_features_exec(VubrDev *dev, VhostUserMsg *vmsg)
 {
     vmsg->payload.u64 =
             ((1ULL << VIRTIO_NET_F_MRG_RXBUF) |
-             (1ULL << VIRTIO_NET_F_CTRL_VQ) |
-             (1ULL << VIRTIO_NET_F_CTRL_RX) |
-             (1ULL << VHOST_F_LOG_ALL));
+             (1ULL << VHOST_F_LOG_ALL) |
+             (1ULL << VHOST_USER_F_PROTOCOL_FEATURES));
+
     vmsg->size = sizeof(vmsg->payload.u64);
 
     DPRINT("Sending back to guest u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
@@ -671,6 +750,7 @@ static int
 vubr_set_features_exec(VubrDev *dev, VhostUserMsg *vmsg)
 {
     DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
+    dev->features = vmsg->payload.u64;
     return 0;
 }
 
@@ -680,10 +760,28 @@ vubr_set_owner_exec(VubrDev *dev, VhostUserMsg *vmsg)
     return 0;
 }
 
+static void
+vubr_close_log(VubrDev *dev)
+{
+    if (dev->log_table) {
+        if (munmap(dev->log_table, dev->log_size) != 0) {
+            vubr_die("munmap()");
+        }
+
+        dev->log_table = 0;
+    }
+    if (dev->log_call_fd != -1) {
+        close(dev->log_call_fd);
+        dev->log_call_fd = -1;
+    }
+}
+
 static int
 vubr_reset_device_exec(VubrDev *dev, VhostUserMsg *vmsg)
 {
-    DPRINT("Function %s() not implemented yet.\n", __func__);
+    vubr_close_log(dev);
+    dev->ready = 0;
+    dev->features = 0;
     return 0;
 }
 
@@ -736,8 +834,30 @@ vubr_set_mem_table_exec(VubrDev *dev, VhostUserMsg *vmsg)
 static int
 vubr_set_log_base_exec(VubrDev *dev, VhostUserMsg *vmsg)
 {
-    DPRINT("Function %s() not implemented yet.\n", __func__);
-    return 0;
+    int fd;
+    uint64_t log_mmap_size, log_mmap_offset;
+    void *rc;
+
+    assert(vmsg->fd_num == 1);
+    fd = vmsg->fds[0];
+
+    assert(vmsg->size == sizeof(vmsg->payload.log));
+    log_mmap_offset = vmsg->payload.log.mmap_offset;
+    log_mmap_size   = vmsg->payload.log.mmap_size;
+    DPRINT("Log mmap_offset: %"PRId64"\n", log_mmap_offset);
+    DPRINT("Log mmap_size:   %"PRId64"\n", log_mmap_size);
+
+    rc = mmap(0, log_mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd,
+              log_mmap_offset);
+    if (rc == MAP_FAILED) {
+        vubr_die("mmap");
+    }
+    dev->log_table = (vhost_log_chunk_t *) rc;
+    dev->log_size = log_mmap_size;
+
+    vmsg->size = sizeof(vmsg->payload.u64);
+    /* Reply */
+    return 1;
 }
 
 static int
@@ -777,6 +897,7 @@ vubr_set_vring_addr_exec(VubrDev *dev, VhostUserMsg *vmsg)
     vq->desc = (struct vring_desc *)qva_to_va(dev, vra->desc_user_addr);
     vq->used = (struct vring_used *)qva_to_va(dev, vra->used_user_addr);
     vq->avail = (struct vring_avail *)qva_to_va(dev, vra->avail_user_addr);
+    vq->log_guest_addr = vra->log_guest_addr;
 
     DPRINT("Setting virtq addresses:\n");
     DPRINT("    vring_desc  at %p\n", vq->desc);
@@ -803,8 +924,14 @@ vubr_set_vring_base_exec(VubrDev *dev, VhostUserMsg *vmsg)
 static int
 vubr_get_vring_base_exec(VubrDev *dev, VhostUserMsg *vmsg)
 {
-    DPRINT("Function %s() not implemented yet.\n", __func__);
-    return 0;
+    unsigned int index = vmsg->payload.state.index;
+
+    DPRINT("State.index: %d\n", index);
+    vmsg->payload.state.num = dev->vq[index].last_avail_index;
+    vmsg->size = sizeof(vmsg->payload.state);
+
+    /* reply */
+    return 1;
 }
 
 static int
@@ -829,6 +956,10 @@ vubr_set_vring_kick_exec(VubrDev *dev, VhostUserMsg *vmsg)
         DPRINT("Waiting for kicks on fd: %d for vq: %d\n",
                dev->vq[index].kick_fd, index);
     }
+    if (dev->vq[0].kick_fd != -1 &&
+        dev->vq[1].kick_fd != -1) {
+        dev->ready = 1;
+    }
     return 0;
 }
 
@@ -858,9 +989,12 @@ vubr_set_vring_err_exec(VubrDev *dev, VhostUserMsg *vmsg)
 static int
 vubr_get_protocol_features_exec(VubrDev *dev, VhostUserMsg *vmsg)
 {
-    /* FIXME: unimplented */
+    vmsg->payload.u64 = 1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD;
     DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
-    return 0;
+    vmsg->size = sizeof(vmsg->payload.u64);
+
+    /* Reply */
+    return 1;
 }
 
 static int
@@ -1012,6 +1146,13 @@ vubr_new(const char *path)
         };
     }
 
+    /* Init log */
+    dev->log_call_fd = -1;
+    dev->log_size    = 0;
+    dev->log_table   = 0;
+    dev->ready = 0;
+    dev->features = 0;
+
     /* Get a UNIX socket. */
     dev->sock = socket(AF_UNIX, SOCK_STREAM, 0);
     if (dev->sock == -1) {
-- 
--Victor

             reply	other threads:[~2015-11-12 12:35 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-11-12 12:34 Victor Kaplansky [this message]
2015-11-12 14:38 ` [Qemu-devel] [PATCH v2] tests/vhost-user-bridge: implement logging of dirty pages Michael S. Tsirkin
2015-11-12 17:39   ` Victor Kaplansky

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1447331679-6270-1-git-send-email-victork@redhat.com \
    --to=victork@redhat.com \
    --cc=mlureau@redhat.com \
    --cc=mst@redhat.com \
    --cc=peterx@redhat.com \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.