All of lore.kernel.org
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH v2] tests/vhost-user-bridge: implement logging of dirty pages
@ 2015-11-12 12:34 Victor Kaplansky
  2015-11-12 14:38 ` Michael S. Tsirkin
  0 siblings, 1 reply; 3+ messages in thread
From: Victor Kaplansky @ 2015-11-12 12:34 UTC (permalink / raw)
  To: qemu-devel
  Cc: Victor Kaplansky, Marc-Andre Lureau, peterx, Michael S. Tsirkin

During migration devices continue writing to the guest's memory.
The writes has to be reported to QEMU. This change implements
minimal support in vhost-user-bridge required for successful
migration of a guest with virtio-net device.

Signed-off-by: Victor Kaplansky <victork@redhat.com>
---
v2:
   - use log_guest_addr for used ring reported by qemu instead of
     translating.
   - use mmap_size and mmap_offset defined in new
     VHOST_USER_SET_LOG_BASE interface. See the patch
     "vhost-user: modify SET_LOG_BASE to pass mmap size and
     offset".
   - start logging dirty pages only after the appropriate feature
     is set by a VHOST_USER_GET_PROTOCOL_FEATURES request.
   - updated TODO list.

 tests/vhost-user-bridge.c | 169 ++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 155 insertions(+), 14 deletions(-)

diff --git a/tests/vhost-user-bridge.c b/tests/vhost-user-bridge.c
index fa18ad5..8c1c997 100644
--- a/tests/vhost-user-bridge.c
+++ b/tests/vhost-user-bridge.c
@@ -13,16 +13,22 @@
 /*
  * TODO:
  *     - main should get parameters from the command line.
- *     - implement all request handlers.
+ *     - implement all request handlers. Still not implemented:
+ *          vubr_set_protocol_features_exec()
+ *          vubr_get_queue_num_exec()
+ *          vubr_set_vring_enable_exec()
+ *          vubr_send_rarp_exec()
+ *          vubr_set_log_fd_exec()
  *     - test for broken requests and virtqueue.
  *     - implement features defined by Virtio 1.0 spec.
  *     - support mergeable buffers and indirect descriptors.
- *     - implement RESET_DEVICE request.
  *     - implement clean shutdown.
  *     - implement non-blocking writes to UDP backend.
  *     - implement polling strategy.
  */
 
+#define _FILE_OFFSET_BITS 64
+
 #include <stddef.h>
 #include <assert.h>
 #include <stdio.h>
@@ -166,6 +172,7 @@ typedef struct VubrVirtq {
     struct vring_desc *desc;
     struct vring_avail *avail;
     struct vring_used *used;
+    uint64_t log_guest_addr;
 } VubrVirtq;
 
 /* Based on qemu/hw/virtio/vhost-user.c */
@@ -173,6 +180,9 @@ typedef struct VubrVirtq {
 #define VHOST_MEMORY_MAX_NREGIONS    8
 #define VHOST_USER_F_PROTOCOL_FEATURES 30
 
+typedef uint8_t vhost_log_chunk_t;
+#define VHOST_LOG_PAGE 4096
+
 enum VhostUserProtocolFeature {
     VHOST_USER_PROTOCOL_F_MQ = 0,
     VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1,
@@ -220,6 +230,11 @@ typedef struct VhostUserMemory {
     VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
 } VhostUserMemory;
 
+typedef struct VhostUserLog {
+    uint64_t mmap_size;
+    uint64_t mmap_offset;
+} VhostUserLog;
+
 typedef struct VhostUserMsg {
     VhostUserRequest request;
 
@@ -234,6 +249,7 @@ typedef struct VhostUserMsg {
         struct vhost_vring_state state;
         struct vhost_vring_addr addr;
         VhostUserMemory memory;
+        VhostUserLog log;
     } payload;
     int fds[VHOST_MEMORY_MAX_NREGIONS];
     int fd_num;
@@ -265,8 +281,13 @@ typedef struct VubrDev {
     uint32_t nregions;
     VubrDevRegion regions[VHOST_MEMORY_MAX_NREGIONS];
     VubrVirtq vq[MAX_NR_VIRTQUEUE];
+    int log_call_fd;
+    uint64_t log_size;
+    vhost_log_chunk_t *log_table;
     int backend_udp_sock;
     struct sockaddr_in backend_udp_dest;
+    int ready;
+    uint64_t features;
 } VubrDev;
 
 static const char *vubr_request_str[] = {
@@ -368,7 +389,12 @@ vubr_message_read(int conn_fd, VhostUserMsg *vmsg)
 
     rc = recvmsg(conn_fd, &msg, 0);
 
-    if (rc <= 0) {
+    if (rc == 0) {
+        vubr_die("recvmsg");
+        fprintf(stderr, "Peer disconnected.\n");
+        exit(1);
+    }
+    if (rc < 0) {
         vubr_die("recvmsg");
     }
 
@@ -395,7 +421,12 @@ vubr_message_read(int conn_fd, VhostUserMsg *vmsg)
 
     if (vmsg->size) {
         rc = read(conn_fd, &vmsg->payload, vmsg->size);
-        if (rc <= 0) {
+        if (rc == 0) {
+            vubr_die("recvmsg");
+            fprintf(stderr, "Peer disconnected.\n");
+            exit(1);
+        }
+        if (rc < 0) {
             vubr_die("recvmsg");
         }
 
@@ -465,12 +496,39 @@ vubr_virtqueue_kick(VubrVirtq *vq)
     }
 }
 
+
+static void
+vubr_log_page(uint8_t *log_table, uint64_t page)
+{
+    DPRINT("Logged dirty guest page: %"PRId64"\n", page);
+    log_table[page / 8] |= 1 << (page % 8);
+}
+
+static void
+vubr_log_write(VubrDev *dev, uint64_t address, uint64_t length)
+{
+    uint64_t page;
+
+    if (!(dev->features & VHOST_F_LOG_ALL) || !dev->log_table || !length) {
+        return;
+    }
+
+    assert(dev->log_size >= ((address + length) / VHOST_LOG_PAGE / 8));
+
+    page = address / VHOST_LOG_PAGE;
+    while (page * VHOST_LOG_PAGE < address + length) {
+        vubr_log_page(dev->log_table, page);
+        page += VHOST_LOG_PAGE;
+    }
+}
+
 static void
 vubr_post_buffer(VubrDev *dev, VubrVirtq *vq, uint8_t *buf, int32_t len)
 {
     struct vring_desc *desc   = vq->desc;
     struct vring_avail *avail = vq->avail;
     struct vring_used *used   = vq->used;
+    uint64_t log_guest_addr = vq->log_guest_addr;
 
     unsigned int size = vq->size;
 
@@ -510,6 +568,7 @@ vubr_post_buffer(VubrDev *dev, VubrVirtq *vq, uint8_t *buf, int32_t len)
 
     if (len <= chunk_len) {
         memcpy(chunk_start, buf, len);
+        vubr_log_write(dev, desc[i].addr, len);
     } else {
         fprintf(stderr,
                 "Received too long packet from the backend. Dropping...\n");
@@ -519,11 +578,17 @@ vubr_post_buffer(VubrDev *dev, VubrVirtq *vq, uint8_t *buf, int32_t len)
     /* Add descriptor to the used ring. */
     used->ring[u_index].id = d_index;
     used->ring[u_index].len = len;
+    vubr_log_write(dev,
+                   log_guest_addr + offsetof(struct vring_used, ring[u_index]),
+                   sizeof(used->ring[u_index]));
 
     vq->last_avail_index++;
     vq->last_used_index++;
 
     atomic_mb_set(&used->idx, vq->last_used_index);
+    vubr_log_write(dev,
+                   log_guest_addr + offsetof(struct vring_used, idx),
+                   sizeof(used->idx));
 
     /* Kick the guest if necessary. */
     vubr_virtqueue_kick(vq);
@@ -535,6 +600,7 @@ vubr_process_desc(VubrDev *dev, VubrVirtq *vq)
     struct vring_desc *desc   = vq->desc;
     struct vring_avail *avail = vq->avail;
     struct vring_used *used   = vq->used;
+    uint64_t log_guest_addr = vq->log_guest_addr;
 
     unsigned int size = vq->size;
 
@@ -552,6 +618,8 @@ vubr_process_desc(VubrDev *dev, VubrVirtq *vq)
         void *chunk_start = (void *)gpa_to_va(dev, desc[i].addr);
         uint32_t chunk_len = desc[i].len;
 
+        assert(!(desc[i].flags & VRING_DESC_F_WRITE));
+
         if (len + chunk_len < buf_size) {
             memcpy(buf + len, chunk_start, chunk_len);
             DPRINT("%d ", chunk_len);
@@ -577,6 +645,9 @@ vubr_process_desc(VubrDev *dev, VubrVirtq *vq)
     /* Add descriptor to the used ring. */
     used->ring[u_index].id = d_index;
     used->ring[u_index].len = len;
+    vubr_log_write(dev,
+                   log_guest_addr + offsetof(struct vring_used, ring[u_index]),
+                   sizeof(used->ring[u_index]));
 
     vubr_consume_raw_packet(dev, buf, len);
 
@@ -588,6 +659,7 @@ vubr_process_avail(VubrDev *dev, VubrVirtq *vq)
 {
     struct vring_avail *avail = vq->avail;
     struct vring_used *used = vq->used;
+    uint64_t log_guest_addr = vq->log_guest_addr;
 
     while (vq->last_avail_index != atomic_mb_read(&avail->idx)) {
         vubr_process_desc(dev, vq);
@@ -596,6 +668,9 @@ vubr_process_avail(VubrDev *dev, VubrVirtq *vq)
     }
 
     atomic_mb_set(&used->idx, vq->last_used_index);
+    vubr_log_write(dev,
+                   log_guest_addr + offsetof(struct vring_used, idx),
+                   sizeof(used->idx));
 }
 
 static void
@@ -609,6 +684,10 @@ vubr_backend_recv_cb(int sock, void *ctx)
     int buflen = sizeof(buf);
     int len;
 
+    if (!dev->ready) {
+        return;
+    }
+
     DPRINT("\n\n   ***   IN UDP RECEIVE CALLBACK    ***\n\n");
 
     uint16_t avail_index = atomic_mb_read(&rx_vq->avail->idx);
@@ -656,9 +735,9 @@ vubr_get_features_exec(VubrDev *dev, VhostUserMsg *vmsg)
 {
     vmsg->payload.u64 =
             ((1ULL << VIRTIO_NET_F_MRG_RXBUF) |
-             (1ULL << VIRTIO_NET_F_CTRL_VQ) |
-             (1ULL << VIRTIO_NET_F_CTRL_RX) |
-             (1ULL << VHOST_F_LOG_ALL));
+             (1ULL << VHOST_F_LOG_ALL) |
+             (1ULL << VHOST_USER_F_PROTOCOL_FEATURES));
+
     vmsg->size = sizeof(vmsg->payload.u64);
 
     DPRINT("Sending back to guest u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
@@ -671,6 +750,7 @@ static int
 vubr_set_features_exec(VubrDev *dev, VhostUserMsg *vmsg)
 {
     DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
+    dev->features = vmsg->payload.u64;
     return 0;
 }
 
@@ -680,10 +760,28 @@ vubr_set_owner_exec(VubrDev *dev, VhostUserMsg *vmsg)
     return 0;
 }
 
+static void
+vubr_close_log(VubrDev *dev)
+{
+    if (dev->log_table) {
+        if (munmap(dev->log_table, dev->log_size) != 0) {
+            vubr_die("munmap()");
+        }
+
+        dev->log_table = 0;
+    }
+    if (dev->log_call_fd != -1) {
+        close(dev->log_call_fd);
+        dev->log_call_fd = -1;
+    }
+}
+
 static int
 vubr_reset_device_exec(VubrDev *dev, VhostUserMsg *vmsg)
 {
-    DPRINT("Function %s() not implemented yet.\n", __func__);
+    vubr_close_log(dev);
+    dev->ready = 0;
+    dev->features = 0;
     return 0;
 }
 
@@ -736,8 +834,30 @@ vubr_set_mem_table_exec(VubrDev *dev, VhostUserMsg *vmsg)
 static int
 vubr_set_log_base_exec(VubrDev *dev, VhostUserMsg *vmsg)
 {
-    DPRINT("Function %s() not implemented yet.\n", __func__);
-    return 0;
+    int fd;
+    uint64_t log_mmap_size, log_mmap_offset;
+    void *rc;
+
+    assert(vmsg->fd_num == 1);
+    fd = vmsg->fds[0];
+
+    assert(vmsg->size == sizeof(vmsg->payload.log));
+    log_mmap_offset = vmsg->payload.log.mmap_offset;
+    log_mmap_size   = vmsg->payload.log.mmap_size;
+    DPRINT("Log mmap_offset: %"PRId64"\n", log_mmap_offset);
+    DPRINT("Log mmap_size:   %"PRId64"\n", log_mmap_size);
+
+    rc = mmap(0, log_mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd,
+              log_mmap_offset);
+    if (rc == MAP_FAILED) {
+        vubr_die("mmap");
+    }
+    dev->log_table = (vhost_log_chunk_t *) rc;
+    dev->log_size = log_mmap_size;
+
+    vmsg->size = sizeof(vmsg->payload.u64);
+    /* Reply */
+    return 1;
 }
 
 static int
@@ -777,6 +897,7 @@ vubr_set_vring_addr_exec(VubrDev *dev, VhostUserMsg *vmsg)
     vq->desc = (struct vring_desc *)qva_to_va(dev, vra->desc_user_addr);
     vq->used = (struct vring_used *)qva_to_va(dev, vra->used_user_addr);
     vq->avail = (struct vring_avail *)qva_to_va(dev, vra->avail_user_addr);
+    vq->log_guest_addr = vra->log_guest_addr;
 
     DPRINT("Setting virtq addresses:\n");
     DPRINT("    vring_desc  at %p\n", vq->desc);
@@ -803,8 +924,14 @@ vubr_set_vring_base_exec(VubrDev *dev, VhostUserMsg *vmsg)
 static int
 vubr_get_vring_base_exec(VubrDev *dev, VhostUserMsg *vmsg)
 {
-    DPRINT("Function %s() not implemented yet.\n", __func__);
-    return 0;
+    unsigned int index = vmsg->payload.state.index;
+
+    DPRINT("State.index: %d\n", index);
+    vmsg->payload.state.num = dev->vq[index].last_avail_index;
+    vmsg->size = sizeof(vmsg->payload.state);
+
+    /* reply */
+    return 1;
 }
 
 static int
@@ -829,6 +956,10 @@ vubr_set_vring_kick_exec(VubrDev *dev, VhostUserMsg *vmsg)
         DPRINT("Waiting for kicks on fd: %d for vq: %d\n",
                dev->vq[index].kick_fd, index);
     }
+    if (dev->vq[0].kick_fd != -1 &&
+        dev->vq[1].kick_fd != -1) {
+        dev->ready = 1;
+    }
     return 0;
 }
 
@@ -858,9 +989,12 @@ vubr_set_vring_err_exec(VubrDev *dev, VhostUserMsg *vmsg)
 static int
 vubr_get_protocol_features_exec(VubrDev *dev, VhostUserMsg *vmsg)
 {
-    /* FIXME: unimplented */
+    vmsg->payload.u64 = 1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD;
     DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
-    return 0;
+    vmsg->size = sizeof(vmsg->payload.u64);
+
+    /* Reply */
+    return 1;
 }
 
 static int
@@ -1012,6 +1146,13 @@ vubr_new(const char *path)
         };
     }
 
+    /* Init log */
+    dev->log_call_fd = -1;
+    dev->log_size    = 0;
+    dev->log_table   = 0;
+    dev->ready = 0;
+    dev->features = 0;
+
     /* Get a UNIX socket. */
     dev->sock = socket(AF_UNIX, SOCK_STREAM, 0);
     if (dev->sock == -1) {
-- 
--Victor

^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [Qemu-devel] [PATCH v2] tests/vhost-user-bridge: implement logging of dirty pages
  2015-11-12 12:34 [Qemu-devel] [PATCH v2] tests/vhost-user-bridge: implement logging of dirty pages Victor Kaplansky
@ 2015-11-12 14:38 ` Michael S. Tsirkin
  2015-11-12 17:39   ` Victor Kaplansky
  0 siblings, 1 reply; 3+ messages in thread
From: Michael S. Tsirkin @ 2015-11-12 14:38 UTC (permalink / raw)
  To: Victor Kaplansky; +Cc: Marc-Andre Lureau, qemu-devel, peterx

On Thu, Nov 12, 2015 at 02:34:56PM +0200, Victor Kaplansky wrote:
> During migration devices continue writing to the guest's memory.
> The writes has to be reported to QEMU. This change implements
> minimal support in vhost-user-bridge required for successful
> migration of a guest with virtio-net device.
> 
> Signed-off-by: Victor Kaplansky <victork@redhat.com>
> ---
> v2:
>    - use log_guest_addr for used ring reported by qemu instead of
>      translating.
>    - use mmap_size and mmap_offset defined in new
>      VHOST_USER_SET_LOG_BASE interface. See the patch
>      "vhost-user: modify SET_LOG_BASE to pass mmap size and
>      offset".
>    - start logging dirty pages only after the appropriate feature
>      is set by a VHOST_USER_GET_PROTOCOL_FEATURES request.
>    - updated TODO list.
> 
>  tests/vhost-user-bridge.c | 169 ++++++++++++++++++++++++++++++++++++++++++----
>  1 file changed, 155 insertions(+), 14 deletions(-)
> 
> diff --git a/tests/vhost-user-bridge.c b/tests/vhost-user-bridge.c
> index fa18ad5..8c1c997 100644
> --- a/tests/vhost-user-bridge.c
> +++ b/tests/vhost-user-bridge.c
> @@ -13,16 +13,22 @@
>  /*
>   * TODO:
>   *     - main should get parameters from the command line.
> - *     - implement all request handlers.
> + *     - implement all request handlers. Still not implemented:
> + *          vubr_set_protocol_features_exec()
> + *          vubr_get_queue_num_exec()
> + *          vubr_set_vring_enable_exec()
> + *          vubr_send_rarp_exec()
> + *          vubr_set_log_fd_exec()
>   *     - test for broken requests and virtqueue.
>   *     - implement features defined by Virtio 1.0 spec.
>   *     - support mergeable buffers and indirect descriptors.
> - *     - implement RESET_DEVICE request.
>   *     - implement clean shutdown.
>   *     - implement non-blocking writes to UDP backend.
>   *     - implement polling strategy.
>   */
>  
> +#define _FILE_OFFSET_BITS 64
> +
>  #include <stddef.h>
>  #include <assert.h>
>  #include <stdio.h>
> @@ -166,6 +172,7 @@ typedef struct VubrVirtq {
>      struct vring_desc *desc;
>      struct vring_avail *avail;
>      struct vring_used *used;
> +    uint64_t log_guest_addr;
>  } VubrVirtq;
>  
>  /* Based on qemu/hw/virtio/vhost-user.c */
> @@ -173,6 +180,9 @@ typedef struct VubrVirtq {
>  #define VHOST_MEMORY_MAX_NREGIONS    8
>  #define VHOST_USER_F_PROTOCOL_FEATURES 30
>  
> +typedef uint8_t vhost_log_chunk_t;

Most code just uses uint8_t directly - I think you should
just drop this typedef.

> +#define VHOST_LOG_PAGE 4096
> +
>  enum VhostUserProtocolFeature {
>      VHOST_USER_PROTOCOL_F_MQ = 0,
>      VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1,
> @@ -220,6 +230,11 @@ typedef struct VhostUserMemory {
>      VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
>  } VhostUserMemory;
>  
> +typedef struct VhostUserLog {
> +    uint64_t mmap_size;
> +    uint64_t mmap_offset;
> +} VhostUserLog;
> +
>  typedef struct VhostUserMsg {
>      VhostUserRequest request;
>  
> @@ -234,6 +249,7 @@ typedef struct VhostUserMsg {
>          struct vhost_vring_state state;
>          struct vhost_vring_addr addr;
>          VhostUserMemory memory;
> +        VhostUserLog log;
>      } payload;
>      int fds[VHOST_MEMORY_MAX_NREGIONS];
>      int fd_num;
> @@ -265,8 +281,13 @@ typedef struct VubrDev {
>      uint32_t nregions;
>      VubrDevRegion regions[VHOST_MEMORY_MAX_NREGIONS];
>      VubrVirtq vq[MAX_NR_VIRTQUEUE];
> +    int log_call_fd;

This doesn't seem to be used. Pls add code to signal this
after logging.

> +    uint64_t log_size;
> +    vhost_log_chunk_t *log_table;
>      int backend_udp_sock;
>      struct sockaddr_in backend_udp_dest;
> +    int ready;
> +    uint64_t features;
>  } VubrDev;
>  
>  static const char *vubr_request_str[] = {
> @@ -368,7 +389,12 @@ vubr_message_read(int conn_fd, VhostUserMsg *vmsg)
>  
>      rc = recvmsg(conn_fd, &msg, 0);
>  
> -    if (rc <= 0) {
> +    if (rc == 0) {
> +        vubr_die("recvmsg");
> +        fprintf(stderr, "Peer disconnected.\n");
> +        exit(1);
> +    }
> +    if (rc < 0) {
>          vubr_die("recvmsg");
>      }
>  
> @@ -395,7 +421,12 @@ vubr_message_read(int conn_fd, VhostUserMsg *vmsg)
>  
>      if (vmsg->size) {
>          rc = read(conn_fd, &vmsg->payload, vmsg->size);
> -        if (rc <= 0) {
> +        if (rc == 0) {
> +            vubr_die("recvmsg");
> +            fprintf(stderr, "Peer disconnected.\n");
> +            exit(1);
> +        }
> +        if (rc < 0) {
>              vubr_die("recvmsg");
>          }
>  
> @@ -465,12 +496,39 @@ vubr_virtqueue_kick(VubrVirtq *vq)
>      }
>  }
>  
> +
> +static void
> +vubr_log_page(uint8_t *log_table, uint64_t page)
> +{
> +    DPRINT("Logged dirty guest page: %"PRId64"\n", page);
> +    log_table[page / 8] |= 1 << (page % 8);
> +}
> +

This is only safe if there's a single writer.
Please add a comment that says as much.
Or set this atomically, that's also not hard to do.

> +static void
> +vubr_log_write(VubrDev *dev, uint64_t address, uint64_t length)
> +{
> +    uint64_t page;
> +
> +    if (!(dev->features & VHOST_F_LOG_ALL) || !dev->log_table || !length) {
> +        return;
> +    }
> +
> +    assert(dev->log_size >= ((address + length) / VHOST_LOG_PAGE / 8));

I think there's an off by 1 here.
Imagine size == 0, test should always fail.
But imagine address == 0 and length == 1.
You get >= and test passes, seems wrong.

> +
> +    page = address / VHOST_LOG_PAGE;
> +    while (page * VHOST_LOG_PAGE < address + length) {
> +        vubr_log_page(dev->log_table, page);
> +        page += VHOST_LOG_PAGE;
> +    }
> +}
> +
>  static void
>  vubr_post_buffer(VubrDev *dev, VubrVirtq *vq, uint8_t *buf, int32_t len)
>  {
>      struct vring_desc *desc   = vq->desc;
>      struct vring_avail *avail = vq->avail;
>      struct vring_used *used   = vq->used;
> +    uint64_t log_guest_addr = vq->log_guest_addr;
>  
>      unsigned int size = vq->size;
>  
> @@ -510,6 +568,7 @@ vubr_post_buffer(VubrDev *dev, VubrVirtq *vq, uint8_t *buf, int32_t len)
>  
>      if (len <= chunk_len) {
>          memcpy(chunk_start, buf, len);
> +        vubr_log_write(dev, desc[i].addr, len);
>      } else {
>          fprintf(stderr,
>                  "Received too long packet from the backend. Dropping...\n");
> @@ -519,11 +578,17 @@ vubr_post_buffer(VubrDev *dev, VubrVirtq *vq, uint8_t *buf, int32_t len)
>      /* Add descriptor to the used ring. */
>      used->ring[u_index].id = d_index;
>      used->ring[u_index].len = len;
> +    vubr_log_write(dev,
> +                   log_guest_addr + offsetof(struct vring_used, ring[u_index]),
> +                   sizeof(used->ring[u_index]));
>  
>      vq->last_avail_index++;
>      vq->last_used_index++;
>  
>      atomic_mb_set(&used->idx, vq->last_used_index);
> +    vubr_log_write(dev,
> +                   log_guest_addr + offsetof(struct vring_used, idx),
> +                   sizeof(used->idx));
>  
>      /* Kick the guest if necessary. */
>      vubr_virtqueue_kick(vq);
> @@ -535,6 +600,7 @@ vubr_process_desc(VubrDev *dev, VubrVirtq *vq)
>      struct vring_desc *desc   = vq->desc;
>      struct vring_avail *avail = vq->avail;
>      struct vring_used *used   = vq->used;
> +    uint64_t log_guest_addr = vq->log_guest_addr;
>  
>      unsigned int size = vq->size;
>  
> @@ -552,6 +618,8 @@ vubr_process_desc(VubrDev *dev, VubrVirtq *vq)
>          void *chunk_start = (void *)gpa_to_va(dev, desc[i].addr);
>          uint32_t chunk_len = desc[i].len;
>  
> +        assert(!(desc[i].flags & VRING_DESC_F_WRITE));
> +
>          if (len + chunk_len < buf_size) {
>              memcpy(buf + len, chunk_start, chunk_len);
>              DPRINT("%d ", chunk_len);
> @@ -577,6 +645,9 @@ vubr_process_desc(VubrDev *dev, VubrVirtq *vq)
>      /* Add descriptor to the used ring. */
>      used->ring[u_index].id = d_index;
>      used->ring[u_index].len = len;
> +    vubr_log_write(dev,
> +                   log_guest_addr + offsetof(struct vring_used, ring[u_index]),
> +                   sizeof(used->ring[u_index]));
>  
>      vubr_consume_raw_packet(dev, buf, len);
>  
> @@ -588,6 +659,7 @@ vubr_process_avail(VubrDev *dev, VubrVirtq *vq)
>  {
>      struct vring_avail *avail = vq->avail;
>      struct vring_used *used = vq->used;
> +    uint64_t log_guest_addr = vq->log_guest_addr;
>  
>      while (vq->last_avail_index != atomic_mb_read(&avail->idx)) {
>          vubr_process_desc(dev, vq);
> @@ -596,6 +668,9 @@ vubr_process_avail(VubrDev *dev, VubrVirtq *vq)
>      }
>  
>      atomic_mb_set(&used->idx, vq->last_used_index);
> +    vubr_log_write(dev,
> +                   log_guest_addr + offsetof(struct vring_used, idx),
> +                   sizeof(used->idx));
>  }
>  
>  static void
> @@ -609,6 +684,10 @@ vubr_backend_recv_cb(int sock, void *ctx)
>      int buflen = sizeof(buf);
>      int len;
>  
> +    if (!dev->ready) {
> +        return;
> +    }
> +
>      DPRINT("\n\n   ***   IN UDP RECEIVE CALLBACK    ***\n\n");
>  
>      uint16_t avail_index = atomic_mb_read(&rx_vq->avail->idx);
> @@ -656,9 +735,9 @@ vubr_get_features_exec(VubrDev *dev, VhostUserMsg *vmsg)
>  {
>      vmsg->payload.u64 =
>              ((1ULL << VIRTIO_NET_F_MRG_RXBUF) |
> -             (1ULL << VIRTIO_NET_F_CTRL_VQ) |
> -             (1ULL << VIRTIO_NET_F_CTRL_RX) |
> -             (1ULL << VHOST_F_LOG_ALL));
> +             (1ULL << VHOST_F_LOG_ALL) |
> +             (1ULL << VHOST_USER_F_PROTOCOL_FEATURES));
> +
>      vmsg->size = sizeof(vmsg->payload.u64);
>  
>      DPRINT("Sending back to guest u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
> @@ -671,6 +750,7 @@ static int
>  vubr_set_features_exec(VubrDev *dev, VhostUserMsg *vmsg)
>  {
>      DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
> +    dev->features = vmsg->payload.u64;
>      return 0;
>  }
>  
> @@ -680,10 +760,28 @@ vubr_set_owner_exec(VubrDev *dev, VhostUserMsg *vmsg)
>      return 0;
>  }
>  
> +static void
> +vubr_close_log(VubrDev *dev)
> +{
> +    if (dev->log_table) {
> +        if (munmap(dev->log_table, dev->log_size) != 0) {
> +            vubr_die("munmap()");
> +        }
> +
> +        dev->log_table = 0;
> +    }
> +    if (dev->log_call_fd != -1) {
> +        close(dev->log_call_fd);
> +        dev->log_call_fd = -1;
> +    }
> +}
> +
>  static int
>  vubr_reset_device_exec(VubrDev *dev, VhostUserMsg *vmsg)
>  {
> -    DPRINT("Function %s() not implemented yet.\n", __func__);
> +    vubr_close_log(dev);
> +    dev->ready = 0;
> +    dev->features = 0;
>      return 0;
>  }
>  
> @@ -736,8 +834,30 @@ vubr_set_mem_table_exec(VubrDev *dev, VhostUserMsg *vmsg)
>  static int
>  vubr_set_log_base_exec(VubrDev *dev, VhostUserMsg *vmsg)
>  {
> -    DPRINT("Function %s() not implemented yet.\n", __func__);
> -    return 0;
> +    int fd;
> +    uint64_t log_mmap_size, log_mmap_offset;
> +    void *rc;
> +
> +    assert(vmsg->fd_num == 1);
> +    fd = vmsg->fds[0];
> +
> +    assert(vmsg->size == sizeof(vmsg->payload.log));
> +    log_mmap_offset = vmsg->payload.log.mmap_offset;
> +    log_mmap_size   = vmsg->payload.log.mmap_size;
> +    DPRINT("Log mmap_offset: %"PRId64"\n", log_mmap_offset);
> +    DPRINT("Log mmap_size:   %"PRId64"\n", log_mmap_size);
> +
> +    rc = mmap(0, log_mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd,
> +              log_mmap_offset);
> +    if (rc == MAP_FAILED) {
> +        vubr_die("mmap");
> +    }
> +    dev->log_table = (vhost_log_chunk_t *) rc;

Cast is not needed here.

> +    dev->log_size = log_mmap_size;
> +
> +    vmsg->size = sizeof(vmsg->payload.u64);
> +    /* Reply */
> +    return 1;
>  }
>  
>  static int
> @@ -777,6 +897,7 @@ vubr_set_vring_addr_exec(VubrDev *dev, VhostUserMsg *vmsg)
>      vq->desc = (struct vring_desc *)qva_to_va(dev, vra->desc_user_addr);
>      vq->used = (struct vring_used *)qva_to_va(dev, vra->used_user_addr);
>      vq->avail = (struct vring_avail *)qva_to_va(dev, vra->avail_user_addr);
> +    vq->log_guest_addr = vra->log_guest_addr;
>  
>      DPRINT("Setting virtq addresses:\n");
>      DPRINT("    vring_desc  at %p\n", vq->desc);
> @@ -803,8 +924,14 @@ vubr_set_vring_base_exec(VubrDev *dev, VhostUserMsg *vmsg)
>  static int
>  vubr_get_vring_base_exec(VubrDev *dev, VhostUserMsg *vmsg)
>  {
> -    DPRINT("Function %s() not implemented yet.\n", __func__);
> -    return 0;
> +    unsigned int index = vmsg->payload.state.index;
> +
> +    DPRINT("State.index: %d\n", index);
> +    vmsg->payload.state.num = dev->vq[index].last_avail_index;
> +    vmsg->size = sizeof(vmsg->payload.state);
> +
> +    /* reply */

But Reply above. Pls keep it consistent.

> +    return 1;
>  }
>  
>  static int
> @@ -829,6 +956,10 @@ vubr_set_vring_kick_exec(VubrDev *dev, VhostUserMsg *vmsg)
>          DPRINT("Waiting for kicks on fd: %d for vq: %d\n",
>                 dev->vq[index].kick_fd, index);
>      }
> +    if (dev->vq[0].kick_fd != -1 &&
> +        dev->vq[1].kick_fd != -1) {
> +        dev->ready = 1;

I'm not sure what this does. Related to logging somehow?
Anyway, processing a VQ should happen after either
- you received a kick
or
- you received VRING_ENABLE

> +    }
>      return 0;
>  }
>  
> @@ -858,9 +989,12 @@ vubr_set_vring_err_exec(VubrDev *dev, VhostUserMsg *vmsg)
>  static int
>  vubr_get_protocol_features_exec(VubrDev *dev, VhostUserMsg *vmsg)
>  {
> -    /* FIXME: unimplented */
> +    vmsg->payload.u64 = 1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD;
>      DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
> -    return 0;
> +    vmsg->size = sizeof(vmsg->payload.u64);
> +
> +    /* Reply */
> +    return 1;
>  }
>  
>  static int
> @@ -1012,6 +1146,13 @@ vubr_new(const char *path)
>          };
>      }
>  
> +    /* Init log */
> +    dev->log_call_fd = -1;
> +    dev->log_size    = 0;
> +    dev->log_table   = 0;

Pls just put a single space there. Don't align things,
it's hard to keep consistency when one does.

> +    dev->ready = 0;
> +    dev->features = 0;
> +
>      /* Get a UNIX socket. */
>      dev->sock = socket(AF_UNIX, SOCK_STREAM, 0);
>      if (dev->sock == -1) {
> -- 
> --Victor

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [Qemu-devel] [PATCH v2] tests/vhost-user-bridge: implement logging of dirty pages
  2015-11-12 14:38 ` Michael S. Tsirkin
@ 2015-11-12 17:39   ` Victor Kaplansky
  0 siblings, 0 replies; 3+ messages in thread
From: Victor Kaplansky @ 2015-11-12 17:39 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: Marc-Andre Lureau, qemu-devel, peterx

On Thu, Nov 12, 2015 at 04:38:51PM +0200, Michael S. Tsirkin wrote:
> On Thu, Nov 12, 2015 at 02:34:56PM +0200, Victor Kaplansky wrote:
> >  /* Based on qemu/hw/virtio/vhost-user.c */
> > @@ -173,6 +180,9 @@ typedef struct VubrVirtq {
> >  #define VHOST_MEMORY_MAX_NREGIONS    8
> >  #define VHOST_USER_F_PROTOCOL_FEATURES 30
> >  
> > +typedef uint8_t vhost_log_chunk_t;
> 
> Most code just uses uint8_t directly - I think you should
> just drop this typedef.

Oh, right. I'll clean this.

> > @@ -234,6 +249,7 @@ typedef struct VhostUserMsg {
> >          struct vhost_vring_state state;
> >          struct vhost_vring_addr addr;
> >          VhostUserMemory memory;
> > +        VhostUserLog log;
> >      } payload;
> >      int fds[VHOST_MEMORY_MAX_NREGIONS];
> >      int fd_num;
> > @@ -265,8 +281,13 @@ typedef struct VubrDev {
> >      uint32_t nregions;
> >      VubrDevRegion regions[VHOST_MEMORY_MAX_NREGIONS];
> >      VubrVirtq vq[MAX_NR_VIRTQUEUE];
> > +    int log_call_fd;
> 
> This doesn't seem to be used. Pls add code to signal this
> after logging.
> 

Right, implementation of SET_LOG_FD request handler was on my TODO
list. I'll include it in the next version of the patch (as well
as using it for the signaling).

> > @@ -465,12 +496,39 @@ vubr_virtqueue_kick(VubrVirtq *vq)
> >      }
> >  }
> >  
> > +
> > +static void
> > +vubr_log_page(uint8_t *log_table, uint64_t page)
> > +{
> > +    DPRINT("Logged dirty guest page: %"PRId64"\n", page);
> > +    log_table[page / 8] |= 1 << (page % 8);
> > +}
> > +
> 
> This is only safe if there's a single writer.
> Please add a comment that says as much.
> Or set this atomically, that's also not hard to do.
> 

I'll set it atomically.

> > +static void
> > +vubr_log_write(VubrDev *dev, uint64_t address, uint64_t length)
> > +{
> > +    uint64_t page;
> > +
> > +    if (!(dev->features & VHOST_F_LOG_ALL) || !dev->log_table || !length) {
> > +        return;
> > +    }
> > +
> > +    assert(dev->log_size >= ((address + length) / VHOST_LOG_PAGE / 8));
> 
> I think there's an off by 1 here.
> Imagine size == 0, test should always fail.
> But imagine address == 0 and length == 1.
> You get >= and test passes, seems wrong.
> 

Oh, good catch. Will fix it. Hopefully, right way this time. ;-)

> > +    rc = mmap(0, log_mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd,
> > +              log_mmap_offset);
> > +    if (rc == MAP_FAILED) {
> > +        vubr_die("mmap");
> > +    }
> > +    dev->log_table = (vhost_log_chunk_t *) rc;
> 
> Cast is not needed here.
> 

Indeed.

> > @@ -829,6 +956,10 @@ vubr_set_vring_kick_exec(VubrDev *dev, VhostUserMsg *vmsg)
> >          DPRINT("Waiting for kicks on fd: %d for vq: %d\n",
> >                 dev->vq[index].kick_fd, index);
> >      }
> > +    if (dev->vq[0].kick_fd != -1 &&
> > +        dev->vq[1].kick_fd != -1) {
> > +        dev->ready = 1;
> 
> I'm not sure what this does. Related to logging somehow?
> Anyway, processing a VQ should happen after either
> - you received a kick
> or
> - you received VRING_ENABLE

It is a temporarily hack to determine if vrings are ready for
processing. AFAIR, DPDK code uses the same heuristics. I'll add an
explanation in the comments.

--Victor

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2015-11-12 17:39 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-11-12 12:34 [Qemu-devel] [PATCH v2] tests/vhost-user-bridge: implement logging of dirty pages Victor Kaplansky
2015-11-12 14:38 ` Michael S. Tsirkin
2015-11-12 17:39   ` Victor Kaplansky

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.