* [Qemu-devel] [PATCH v2] tests/vhost-user-bridge: implement logging of dirty pages
@ 2015-11-12 12:34 Victor Kaplansky
2015-11-12 14:38 ` Michael S. Tsirkin
0 siblings, 1 reply; 3+ messages in thread
From: Victor Kaplansky @ 2015-11-12 12:34 UTC (permalink / raw)
To: qemu-devel
Cc: Victor Kaplansky, Marc-Andre Lureau, peterx, Michael S. Tsirkin
During migration devices continue writing to the guest's memory.
The writes has to be reported to QEMU. This change implements
minimal support in vhost-user-bridge required for successful
migration of a guest with virtio-net device.
Signed-off-by: Victor Kaplansky <victork@redhat.com>
---
v2:
- use log_guest_addr for used ring reported by qemu instead of
translating.
- use mmap_size and mmap_offset defined in new
VHOST_USER_SET_LOG_BASE interface. See the patch
"vhost-user: modify SET_LOG_BASE to pass mmap size and
offset".
- start logging dirty pages only after the appropriate feature
is set by a VHOST_USER_GET_PROTOCOL_FEATURES request.
- updated TODO list.
tests/vhost-user-bridge.c | 169 ++++++++++++++++++++++++++++++++++++++++++----
1 file changed, 155 insertions(+), 14 deletions(-)
diff --git a/tests/vhost-user-bridge.c b/tests/vhost-user-bridge.c
index fa18ad5..8c1c997 100644
--- a/tests/vhost-user-bridge.c
+++ b/tests/vhost-user-bridge.c
@@ -13,16 +13,22 @@
/*
* TODO:
* - main should get parameters from the command line.
- * - implement all request handlers.
+ * - implement all request handlers. Still not implemented:
+ * vubr_set_protocol_features_exec()
+ * vubr_get_queue_num_exec()
+ * vubr_set_vring_enable_exec()
+ * vubr_send_rarp_exec()
+ * vubr_set_log_fd_exec()
* - test for broken requests and virtqueue.
* - implement features defined by Virtio 1.0 spec.
* - support mergeable buffers and indirect descriptors.
- * - implement RESET_DEVICE request.
* - implement clean shutdown.
* - implement non-blocking writes to UDP backend.
* - implement polling strategy.
*/
+#define _FILE_OFFSET_BITS 64
+
#include <stddef.h>
#include <assert.h>
#include <stdio.h>
@@ -166,6 +172,7 @@ typedef struct VubrVirtq {
struct vring_desc *desc;
struct vring_avail *avail;
struct vring_used *used;
+ uint64_t log_guest_addr;
} VubrVirtq;
/* Based on qemu/hw/virtio/vhost-user.c */
@@ -173,6 +180,9 @@ typedef struct VubrVirtq {
#define VHOST_MEMORY_MAX_NREGIONS 8
#define VHOST_USER_F_PROTOCOL_FEATURES 30
+typedef uint8_t vhost_log_chunk_t;
+#define VHOST_LOG_PAGE 4096
+
enum VhostUserProtocolFeature {
VHOST_USER_PROTOCOL_F_MQ = 0,
VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1,
@@ -220,6 +230,11 @@ typedef struct VhostUserMemory {
VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
} VhostUserMemory;
+typedef struct VhostUserLog {
+ uint64_t mmap_size;
+ uint64_t mmap_offset;
+} VhostUserLog;
+
typedef struct VhostUserMsg {
VhostUserRequest request;
@@ -234,6 +249,7 @@ typedef struct VhostUserMsg {
struct vhost_vring_state state;
struct vhost_vring_addr addr;
VhostUserMemory memory;
+ VhostUserLog log;
} payload;
int fds[VHOST_MEMORY_MAX_NREGIONS];
int fd_num;
@@ -265,8 +281,13 @@ typedef struct VubrDev {
uint32_t nregions;
VubrDevRegion regions[VHOST_MEMORY_MAX_NREGIONS];
VubrVirtq vq[MAX_NR_VIRTQUEUE];
+ int log_call_fd;
+ uint64_t log_size;
+ vhost_log_chunk_t *log_table;
int backend_udp_sock;
struct sockaddr_in backend_udp_dest;
+ int ready;
+ uint64_t features;
} VubrDev;
static const char *vubr_request_str[] = {
@@ -368,7 +389,12 @@ vubr_message_read(int conn_fd, VhostUserMsg *vmsg)
rc = recvmsg(conn_fd, &msg, 0);
- if (rc <= 0) {
+ if (rc == 0) {
+ vubr_die("recvmsg");
+ fprintf(stderr, "Peer disconnected.\n");
+ exit(1);
+ }
+ if (rc < 0) {
vubr_die("recvmsg");
}
@@ -395,7 +421,12 @@ vubr_message_read(int conn_fd, VhostUserMsg *vmsg)
if (vmsg->size) {
rc = read(conn_fd, &vmsg->payload, vmsg->size);
- if (rc <= 0) {
+ if (rc == 0) {
+ vubr_die("recvmsg");
+ fprintf(stderr, "Peer disconnected.\n");
+ exit(1);
+ }
+ if (rc < 0) {
vubr_die("recvmsg");
}
@@ -465,12 +496,39 @@ vubr_virtqueue_kick(VubrVirtq *vq)
}
}
+
+static void
+vubr_log_page(uint8_t *log_table, uint64_t page)
+{
+ DPRINT("Logged dirty guest page: %"PRId64"\n", page);
+ log_table[page / 8] |= 1 << (page % 8);
+}
+
+static void
+vubr_log_write(VubrDev *dev, uint64_t address, uint64_t length)
+{
+ uint64_t page;
+
+ if (!(dev->features & VHOST_F_LOG_ALL) || !dev->log_table || !length) {
+ return;
+ }
+
+ assert(dev->log_size >= ((address + length) / VHOST_LOG_PAGE / 8));
+
+ page = address / VHOST_LOG_PAGE;
+ while (page * VHOST_LOG_PAGE < address + length) {
+ vubr_log_page(dev->log_table, page);
+ page += VHOST_LOG_PAGE;
+ }
+}
+
static void
vubr_post_buffer(VubrDev *dev, VubrVirtq *vq, uint8_t *buf, int32_t len)
{
struct vring_desc *desc = vq->desc;
struct vring_avail *avail = vq->avail;
struct vring_used *used = vq->used;
+ uint64_t log_guest_addr = vq->log_guest_addr;
unsigned int size = vq->size;
@@ -510,6 +568,7 @@ vubr_post_buffer(VubrDev *dev, VubrVirtq *vq, uint8_t *buf, int32_t len)
if (len <= chunk_len) {
memcpy(chunk_start, buf, len);
+ vubr_log_write(dev, desc[i].addr, len);
} else {
fprintf(stderr,
"Received too long packet from the backend. Dropping...\n");
@@ -519,11 +578,17 @@ vubr_post_buffer(VubrDev *dev, VubrVirtq *vq, uint8_t *buf, int32_t len)
/* Add descriptor to the used ring. */
used->ring[u_index].id = d_index;
used->ring[u_index].len = len;
+ vubr_log_write(dev,
+ log_guest_addr + offsetof(struct vring_used, ring[u_index]),
+ sizeof(used->ring[u_index]));
vq->last_avail_index++;
vq->last_used_index++;
atomic_mb_set(&used->idx, vq->last_used_index);
+ vubr_log_write(dev,
+ log_guest_addr + offsetof(struct vring_used, idx),
+ sizeof(used->idx));
/* Kick the guest if necessary. */
vubr_virtqueue_kick(vq);
@@ -535,6 +600,7 @@ vubr_process_desc(VubrDev *dev, VubrVirtq *vq)
struct vring_desc *desc = vq->desc;
struct vring_avail *avail = vq->avail;
struct vring_used *used = vq->used;
+ uint64_t log_guest_addr = vq->log_guest_addr;
unsigned int size = vq->size;
@@ -552,6 +618,8 @@ vubr_process_desc(VubrDev *dev, VubrVirtq *vq)
void *chunk_start = (void *)gpa_to_va(dev, desc[i].addr);
uint32_t chunk_len = desc[i].len;
+ assert(!(desc[i].flags & VRING_DESC_F_WRITE));
+
if (len + chunk_len < buf_size) {
memcpy(buf + len, chunk_start, chunk_len);
DPRINT("%d ", chunk_len);
@@ -577,6 +645,9 @@ vubr_process_desc(VubrDev *dev, VubrVirtq *vq)
/* Add descriptor to the used ring. */
used->ring[u_index].id = d_index;
used->ring[u_index].len = len;
+ vubr_log_write(dev,
+ log_guest_addr + offsetof(struct vring_used, ring[u_index]),
+ sizeof(used->ring[u_index]));
vubr_consume_raw_packet(dev, buf, len);
@@ -588,6 +659,7 @@ vubr_process_avail(VubrDev *dev, VubrVirtq *vq)
{
struct vring_avail *avail = vq->avail;
struct vring_used *used = vq->used;
+ uint64_t log_guest_addr = vq->log_guest_addr;
while (vq->last_avail_index != atomic_mb_read(&avail->idx)) {
vubr_process_desc(dev, vq);
@@ -596,6 +668,9 @@ vubr_process_avail(VubrDev *dev, VubrVirtq *vq)
}
atomic_mb_set(&used->idx, vq->last_used_index);
+ vubr_log_write(dev,
+ log_guest_addr + offsetof(struct vring_used, idx),
+ sizeof(used->idx));
}
static void
@@ -609,6 +684,10 @@ vubr_backend_recv_cb(int sock, void *ctx)
int buflen = sizeof(buf);
int len;
+ if (!dev->ready) {
+ return;
+ }
+
DPRINT("\n\n *** IN UDP RECEIVE CALLBACK ***\n\n");
uint16_t avail_index = atomic_mb_read(&rx_vq->avail->idx);
@@ -656,9 +735,9 @@ vubr_get_features_exec(VubrDev *dev, VhostUserMsg *vmsg)
{
vmsg->payload.u64 =
((1ULL << VIRTIO_NET_F_MRG_RXBUF) |
- (1ULL << VIRTIO_NET_F_CTRL_VQ) |
- (1ULL << VIRTIO_NET_F_CTRL_RX) |
- (1ULL << VHOST_F_LOG_ALL));
+ (1ULL << VHOST_F_LOG_ALL) |
+ (1ULL << VHOST_USER_F_PROTOCOL_FEATURES));
+
vmsg->size = sizeof(vmsg->payload.u64);
DPRINT("Sending back to guest u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
@@ -671,6 +750,7 @@ static int
vubr_set_features_exec(VubrDev *dev, VhostUserMsg *vmsg)
{
DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
+ dev->features = vmsg->payload.u64;
return 0;
}
@@ -680,10 +760,28 @@ vubr_set_owner_exec(VubrDev *dev, VhostUserMsg *vmsg)
return 0;
}
+static void
+vubr_close_log(VubrDev *dev)
+{
+ if (dev->log_table) {
+ if (munmap(dev->log_table, dev->log_size) != 0) {
+ vubr_die("munmap()");
+ }
+
+ dev->log_table = 0;
+ }
+ if (dev->log_call_fd != -1) {
+ close(dev->log_call_fd);
+ dev->log_call_fd = -1;
+ }
+}
+
static int
vubr_reset_device_exec(VubrDev *dev, VhostUserMsg *vmsg)
{
- DPRINT("Function %s() not implemented yet.\n", __func__);
+ vubr_close_log(dev);
+ dev->ready = 0;
+ dev->features = 0;
return 0;
}
@@ -736,8 +834,30 @@ vubr_set_mem_table_exec(VubrDev *dev, VhostUserMsg *vmsg)
static int
vubr_set_log_base_exec(VubrDev *dev, VhostUserMsg *vmsg)
{
- DPRINT("Function %s() not implemented yet.\n", __func__);
- return 0;
+ int fd;
+ uint64_t log_mmap_size, log_mmap_offset;
+ void *rc;
+
+ assert(vmsg->fd_num == 1);
+ fd = vmsg->fds[0];
+
+ assert(vmsg->size == sizeof(vmsg->payload.log));
+ log_mmap_offset = vmsg->payload.log.mmap_offset;
+ log_mmap_size = vmsg->payload.log.mmap_size;
+ DPRINT("Log mmap_offset: %"PRId64"\n", log_mmap_offset);
+ DPRINT("Log mmap_size: %"PRId64"\n", log_mmap_size);
+
+ rc = mmap(0, log_mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd,
+ log_mmap_offset);
+ if (rc == MAP_FAILED) {
+ vubr_die("mmap");
+ }
+ dev->log_table = (vhost_log_chunk_t *) rc;
+ dev->log_size = log_mmap_size;
+
+ vmsg->size = sizeof(vmsg->payload.u64);
+ /* Reply */
+ return 1;
}
static int
@@ -777,6 +897,7 @@ vubr_set_vring_addr_exec(VubrDev *dev, VhostUserMsg *vmsg)
vq->desc = (struct vring_desc *)qva_to_va(dev, vra->desc_user_addr);
vq->used = (struct vring_used *)qva_to_va(dev, vra->used_user_addr);
vq->avail = (struct vring_avail *)qva_to_va(dev, vra->avail_user_addr);
+ vq->log_guest_addr = vra->log_guest_addr;
DPRINT("Setting virtq addresses:\n");
DPRINT(" vring_desc at %p\n", vq->desc);
@@ -803,8 +924,14 @@ vubr_set_vring_base_exec(VubrDev *dev, VhostUserMsg *vmsg)
static int
vubr_get_vring_base_exec(VubrDev *dev, VhostUserMsg *vmsg)
{
- DPRINT("Function %s() not implemented yet.\n", __func__);
- return 0;
+ unsigned int index = vmsg->payload.state.index;
+
+ DPRINT("State.index: %d\n", index);
+ vmsg->payload.state.num = dev->vq[index].last_avail_index;
+ vmsg->size = sizeof(vmsg->payload.state);
+
+ /* reply */
+ return 1;
}
static int
@@ -829,6 +956,10 @@ vubr_set_vring_kick_exec(VubrDev *dev, VhostUserMsg *vmsg)
DPRINT("Waiting for kicks on fd: %d for vq: %d\n",
dev->vq[index].kick_fd, index);
}
+ if (dev->vq[0].kick_fd != -1 &&
+ dev->vq[1].kick_fd != -1) {
+ dev->ready = 1;
+ }
return 0;
}
@@ -858,9 +989,12 @@ vubr_set_vring_err_exec(VubrDev *dev, VhostUserMsg *vmsg)
static int
vubr_get_protocol_features_exec(VubrDev *dev, VhostUserMsg *vmsg)
{
- /* FIXME: unimplented */
+ vmsg->payload.u64 = 1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD;
DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
- return 0;
+ vmsg->size = sizeof(vmsg->payload.u64);
+
+ /* Reply */
+ return 1;
}
static int
@@ -1012,6 +1146,13 @@ vubr_new(const char *path)
};
}
+ /* Init log */
+ dev->log_call_fd = -1;
+ dev->log_size = 0;
+ dev->log_table = 0;
+ dev->ready = 0;
+ dev->features = 0;
+
/* Get a UNIX socket. */
dev->sock = socket(AF_UNIX, SOCK_STREAM, 0);
if (dev->sock == -1) {
--
--Victor
^ permalink raw reply related [flat|nested] 3+ messages in thread
* Re: [Qemu-devel] [PATCH v2] tests/vhost-user-bridge: implement logging of dirty pages
2015-11-12 12:34 [Qemu-devel] [PATCH v2] tests/vhost-user-bridge: implement logging of dirty pages Victor Kaplansky
@ 2015-11-12 14:38 ` Michael S. Tsirkin
2015-11-12 17:39 ` Victor Kaplansky
0 siblings, 1 reply; 3+ messages in thread
From: Michael S. Tsirkin @ 2015-11-12 14:38 UTC (permalink / raw)
To: Victor Kaplansky; +Cc: Marc-Andre Lureau, qemu-devel, peterx
On Thu, Nov 12, 2015 at 02:34:56PM +0200, Victor Kaplansky wrote:
> During migration devices continue writing to the guest's memory.
> The writes has to be reported to QEMU. This change implements
> minimal support in vhost-user-bridge required for successful
> migration of a guest with virtio-net device.
>
> Signed-off-by: Victor Kaplansky <victork@redhat.com>
> ---
> v2:
> - use log_guest_addr for used ring reported by qemu instead of
> translating.
> - use mmap_size and mmap_offset defined in new
> VHOST_USER_SET_LOG_BASE interface. See the patch
> "vhost-user: modify SET_LOG_BASE to pass mmap size and
> offset".
> - start logging dirty pages only after the appropriate feature
> is set by a VHOST_USER_GET_PROTOCOL_FEATURES request.
> - updated TODO list.
>
> tests/vhost-user-bridge.c | 169 ++++++++++++++++++++++++++++++++++++++++++----
> 1 file changed, 155 insertions(+), 14 deletions(-)
>
> diff --git a/tests/vhost-user-bridge.c b/tests/vhost-user-bridge.c
> index fa18ad5..8c1c997 100644
> --- a/tests/vhost-user-bridge.c
> +++ b/tests/vhost-user-bridge.c
> @@ -13,16 +13,22 @@
> /*
> * TODO:
> * - main should get parameters from the command line.
> - * - implement all request handlers.
> + * - implement all request handlers. Still not implemented:
> + * vubr_set_protocol_features_exec()
> + * vubr_get_queue_num_exec()
> + * vubr_set_vring_enable_exec()
> + * vubr_send_rarp_exec()
> + * vubr_set_log_fd_exec()
> * - test for broken requests and virtqueue.
> * - implement features defined by Virtio 1.0 spec.
> * - support mergeable buffers and indirect descriptors.
> - * - implement RESET_DEVICE request.
> * - implement clean shutdown.
> * - implement non-blocking writes to UDP backend.
> * - implement polling strategy.
> */
>
> +#define _FILE_OFFSET_BITS 64
> +
> #include <stddef.h>
> #include <assert.h>
> #include <stdio.h>
> @@ -166,6 +172,7 @@ typedef struct VubrVirtq {
> struct vring_desc *desc;
> struct vring_avail *avail;
> struct vring_used *used;
> + uint64_t log_guest_addr;
> } VubrVirtq;
>
> /* Based on qemu/hw/virtio/vhost-user.c */
> @@ -173,6 +180,9 @@ typedef struct VubrVirtq {
> #define VHOST_MEMORY_MAX_NREGIONS 8
> #define VHOST_USER_F_PROTOCOL_FEATURES 30
>
> +typedef uint8_t vhost_log_chunk_t;
Most code just uses uint8_t directly - I think you should
just drop this typedef.
> +#define VHOST_LOG_PAGE 4096
> +
> enum VhostUserProtocolFeature {
> VHOST_USER_PROTOCOL_F_MQ = 0,
> VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1,
> @@ -220,6 +230,11 @@ typedef struct VhostUserMemory {
> VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
> } VhostUserMemory;
>
> +typedef struct VhostUserLog {
> + uint64_t mmap_size;
> + uint64_t mmap_offset;
> +} VhostUserLog;
> +
> typedef struct VhostUserMsg {
> VhostUserRequest request;
>
> @@ -234,6 +249,7 @@ typedef struct VhostUserMsg {
> struct vhost_vring_state state;
> struct vhost_vring_addr addr;
> VhostUserMemory memory;
> + VhostUserLog log;
> } payload;
> int fds[VHOST_MEMORY_MAX_NREGIONS];
> int fd_num;
> @@ -265,8 +281,13 @@ typedef struct VubrDev {
> uint32_t nregions;
> VubrDevRegion regions[VHOST_MEMORY_MAX_NREGIONS];
> VubrVirtq vq[MAX_NR_VIRTQUEUE];
> + int log_call_fd;
This doesn't seem to be used. Pls add code to signal this
after logging.
> + uint64_t log_size;
> + vhost_log_chunk_t *log_table;
> int backend_udp_sock;
> struct sockaddr_in backend_udp_dest;
> + int ready;
> + uint64_t features;
> } VubrDev;
>
> static const char *vubr_request_str[] = {
> @@ -368,7 +389,12 @@ vubr_message_read(int conn_fd, VhostUserMsg *vmsg)
>
> rc = recvmsg(conn_fd, &msg, 0);
>
> - if (rc <= 0) {
> + if (rc == 0) {
> + vubr_die("recvmsg");
> + fprintf(stderr, "Peer disconnected.\n");
> + exit(1);
> + }
> + if (rc < 0) {
> vubr_die("recvmsg");
> }
>
> @@ -395,7 +421,12 @@ vubr_message_read(int conn_fd, VhostUserMsg *vmsg)
>
> if (vmsg->size) {
> rc = read(conn_fd, &vmsg->payload, vmsg->size);
> - if (rc <= 0) {
> + if (rc == 0) {
> + vubr_die("recvmsg");
> + fprintf(stderr, "Peer disconnected.\n");
> + exit(1);
> + }
> + if (rc < 0) {
> vubr_die("recvmsg");
> }
>
> @@ -465,12 +496,39 @@ vubr_virtqueue_kick(VubrVirtq *vq)
> }
> }
>
> +
> +static void
> +vubr_log_page(uint8_t *log_table, uint64_t page)
> +{
> + DPRINT("Logged dirty guest page: %"PRId64"\n", page);
> + log_table[page / 8] |= 1 << (page % 8);
> +}
> +
This is only safe if there's a single writer.
Please add a comment that says as much.
Or set this atomically, that's also not hard to do.
> +static void
> +vubr_log_write(VubrDev *dev, uint64_t address, uint64_t length)
> +{
> + uint64_t page;
> +
> + if (!(dev->features & VHOST_F_LOG_ALL) || !dev->log_table || !length) {
> + return;
> + }
> +
> + assert(dev->log_size >= ((address + length) / VHOST_LOG_PAGE / 8));
I think there's an off by 1 here.
Imagine size == 0, test should always fail.
But imagine address == 0 and length == 1.
You get >= and test passes, seems wrong.
> +
> + page = address / VHOST_LOG_PAGE;
> + while (page * VHOST_LOG_PAGE < address + length) {
> + vubr_log_page(dev->log_table, page);
> + page += VHOST_LOG_PAGE;
> + }
> +}
> +
> static void
> vubr_post_buffer(VubrDev *dev, VubrVirtq *vq, uint8_t *buf, int32_t len)
> {
> struct vring_desc *desc = vq->desc;
> struct vring_avail *avail = vq->avail;
> struct vring_used *used = vq->used;
> + uint64_t log_guest_addr = vq->log_guest_addr;
>
> unsigned int size = vq->size;
>
> @@ -510,6 +568,7 @@ vubr_post_buffer(VubrDev *dev, VubrVirtq *vq, uint8_t *buf, int32_t len)
>
> if (len <= chunk_len) {
> memcpy(chunk_start, buf, len);
> + vubr_log_write(dev, desc[i].addr, len);
> } else {
> fprintf(stderr,
> "Received too long packet from the backend. Dropping...\n");
> @@ -519,11 +578,17 @@ vubr_post_buffer(VubrDev *dev, VubrVirtq *vq, uint8_t *buf, int32_t len)
> /* Add descriptor to the used ring. */
> used->ring[u_index].id = d_index;
> used->ring[u_index].len = len;
> + vubr_log_write(dev,
> + log_guest_addr + offsetof(struct vring_used, ring[u_index]),
> + sizeof(used->ring[u_index]));
>
> vq->last_avail_index++;
> vq->last_used_index++;
>
> atomic_mb_set(&used->idx, vq->last_used_index);
> + vubr_log_write(dev,
> + log_guest_addr + offsetof(struct vring_used, idx),
> + sizeof(used->idx));
>
> /* Kick the guest if necessary. */
> vubr_virtqueue_kick(vq);
> @@ -535,6 +600,7 @@ vubr_process_desc(VubrDev *dev, VubrVirtq *vq)
> struct vring_desc *desc = vq->desc;
> struct vring_avail *avail = vq->avail;
> struct vring_used *used = vq->used;
> + uint64_t log_guest_addr = vq->log_guest_addr;
>
> unsigned int size = vq->size;
>
> @@ -552,6 +618,8 @@ vubr_process_desc(VubrDev *dev, VubrVirtq *vq)
> void *chunk_start = (void *)gpa_to_va(dev, desc[i].addr);
> uint32_t chunk_len = desc[i].len;
>
> + assert(!(desc[i].flags & VRING_DESC_F_WRITE));
> +
> if (len + chunk_len < buf_size) {
> memcpy(buf + len, chunk_start, chunk_len);
> DPRINT("%d ", chunk_len);
> @@ -577,6 +645,9 @@ vubr_process_desc(VubrDev *dev, VubrVirtq *vq)
> /* Add descriptor to the used ring. */
> used->ring[u_index].id = d_index;
> used->ring[u_index].len = len;
> + vubr_log_write(dev,
> + log_guest_addr + offsetof(struct vring_used, ring[u_index]),
> + sizeof(used->ring[u_index]));
>
> vubr_consume_raw_packet(dev, buf, len);
>
> @@ -588,6 +659,7 @@ vubr_process_avail(VubrDev *dev, VubrVirtq *vq)
> {
> struct vring_avail *avail = vq->avail;
> struct vring_used *used = vq->used;
> + uint64_t log_guest_addr = vq->log_guest_addr;
>
> while (vq->last_avail_index != atomic_mb_read(&avail->idx)) {
> vubr_process_desc(dev, vq);
> @@ -596,6 +668,9 @@ vubr_process_avail(VubrDev *dev, VubrVirtq *vq)
> }
>
> atomic_mb_set(&used->idx, vq->last_used_index);
> + vubr_log_write(dev,
> + log_guest_addr + offsetof(struct vring_used, idx),
> + sizeof(used->idx));
> }
>
> static void
> @@ -609,6 +684,10 @@ vubr_backend_recv_cb(int sock, void *ctx)
> int buflen = sizeof(buf);
> int len;
>
> + if (!dev->ready) {
> + return;
> + }
> +
> DPRINT("\n\n *** IN UDP RECEIVE CALLBACK ***\n\n");
>
> uint16_t avail_index = atomic_mb_read(&rx_vq->avail->idx);
> @@ -656,9 +735,9 @@ vubr_get_features_exec(VubrDev *dev, VhostUserMsg *vmsg)
> {
> vmsg->payload.u64 =
> ((1ULL << VIRTIO_NET_F_MRG_RXBUF) |
> - (1ULL << VIRTIO_NET_F_CTRL_VQ) |
> - (1ULL << VIRTIO_NET_F_CTRL_RX) |
> - (1ULL << VHOST_F_LOG_ALL));
> + (1ULL << VHOST_F_LOG_ALL) |
> + (1ULL << VHOST_USER_F_PROTOCOL_FEATURES));
> +
> vmsg->size = sizeof(vmsg->payload.u64);
>
> DPRINT("Sending back to guest u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
> @@ -671,6 +750,7 @@ static int
> vubr_set_features_exec(VubrDev *dev, VhostUserMsg *vmsg)
> {
> DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
> + dev->features = vmsg->payload.u64;
> return 0;
> }
>
> @@ -680,10 +760,28 @@ vubr_set_owner_exec(VubrDev *dev, VhostUserMsg *vmsg)
> return 0;
> }
>
> +static void
> +vubr_close_log(VubrDev *dev)
> +{
> + if (dev->log_table) {
> + if (munmap(dev->log_table, dev->log_size) != 0) {
> + vubr_die("munmap()");
> + }
> +
> + dev->log_table = 0;
> + }
> + if (dev->log_call_fd != -1) {
> + close(dev->log_call_fd);
> + dev->log_call_fd = -1;
> + }
> +}
> +
> static int
> vubr_reset_device_exec(VubrDev *dev, VhostUserMsg *vmsg)
> {
> - DPRINT("Function %s() not implemented yet.\n", __func__);
> + vubr_close_log(dev);
> + dev->ready = 0;
> + dev->features = 0;
> return 0;
> }
>
> @@ -736,8 +834,30 @@ vubr_set_mem_table_exec(VubrDev *dev, VhostUserMsg *vmsg)
> static int
> vubr_set_log_base_exec(VubrDev *dev, VhostUserMsg *vmsg)
> {
> - DPRINT("Function %s() not implemented yet.\n", __func__);
> - return 0;
> + int fd;
> + uint64_t log_mmap_size, log_mmap_offset;
> + void *rc;
> +
> + assert(vmsg->fd_num == 1);
> + fd = vmsg->fds[0];
> +
> + assert(vmsg->size == sizeof(vmsg->payload.log));
> + log_mmap_offset = vmsg->payload.log.mmap_offset;
> + log_mmap_size = vmsg->payload.log.mmap_size;
> + DPRINT("Log mmap_offset: %"PRId64"\n", log_mmap_offset);
> + DPRINT("Log mmap_size: %"PRId64"\n", log_mmap_size);
> +
> + rc = mmap(0, log_mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd,
> + log_mmap_offset);
> + if (rc == MAP_FAILED) {
> + vubr_die("mmap");
> + }
> + dev->log_table = (vhost_log_chunk_t *) rc;
Cast is not needed here.
> + dev->log_size = log_mmap_size;
> +
> + vmsg->size = sizeof(vmsg->payload.u64);
> + /* Reply */
> + return 1;
> }
>
> static int
> @@ -777,6 +897,7 @@ vubr_set_vring_addr_exec(VubrDev *dev, VhostUserMsg *vmsg)
> vq->desc = (struct vring_desc *)qva_to_va(dev, vra->desc_user_addr);
> vq->used = (struct vring_used *)qva_to_va(dev, vra->used_user_addr);
> vq->avail = (struct vring_avail *)qva_to_va(dev, vra->avail_user_addr);
> + vq->log_guest_addr = vra->log_guest_addr;
>
> DPRINT("Setting virtq addresses:\n");
> DPRINT(" vring_desc at %p\n", vq->desc);
> @@ -803,8 +924,14 @@ vubr_set_vring_base_exec(VubrDev *dev, VhostUserMsg *vmsg)
> static int
> vubr_get_vring_base_exec(VubrDev *dev, VhostUserMsg *vmsg)
> {
> - DPRINT("Function %s() not implemented yet.\n", __func__);
> - return 0;
> + unsigned int index = vmsg->payload.state.index;
> +
> + DPRINT("State.index: %d\n", index);
> + vmsg->payload.state.num = dev->vq[index].last_avail_index;
> + vmsg->size = sizeof(vmsg->payload.state);
> +
> + /* reply */
But Reply above. Pls keep it consistent.
> + return 1;
> }
>
> static int
> @@ -829,6 +956,10 @@ vubr_set_vring_kick_exec(VubrDev *dev, VhostUserMsg *vmsg)
> DPRINT("Waiting for kicks on fd: %d for vq: %d\n",
> dev->vq[index].kick_fd, index);
> }
> + if (dev->vq[0].kick_fd != -1 &&
> + dev->vq[1].kick_fd != -1) {
> + dev->ready = 1;
I'm not sure what this does. Related to logging somehow?
Anyway, processing a VQ should happen after either
- you received a kick
or
- you received VRING_ENABLE
> + }
> return 0;
> }
>
> @@ -858,9 +989,12 @@ vubr_set_vring_err_exec(VubrDev *dev, VhostUserMsg *vmsg)
> static int
> vubr_get_protocol_features_exec(VubrDev *dev, VhostUserMsg *vmsg)
> {
> - /* FIXME: unimplented */
> + vmsg->payload.u64 = 1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD;
> DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
> - return 0;
> + vmsg->size = sizeof(vmsg->payload.u64);
> +
> + /* Reply */
> + return 1;
> }
>
> static int
> @@ -1012,6 +1146,13 @@ vubr_new(const char *path)
> };
> }
>
> + /* Init log */
> + dev->log_call_fd = -1;
> + dev->log_size = 0;
> + dev->log_table = 0;
Pls just put a single space there. Don't align things,
it's hard to keep consistency when one does.
> + dev->ready = 0;
> + dev->features = 0;
> +
> /* Get a UNIX socket. */
> dev->sock = socket(AF_UNIX, SOCK_STREAM, 0);
> if (dev->sock == -1) {
> --
> --Victor
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [Qemu-devel] [PATCH v2] tests/vhost-user-bridge: implement logging of dirty pages
2015-11-12 14:38 ` Michael S. Tsirkin
@ 2015-11-12 17:39 ` Victor Kaplansky
0 siblings, 0 replies; 3+ messages in thread
From: Victor Kaplansky @ 2015-11-12 17:39 UTC (permalink / raw)
To: Michael S. Tsirkin; +Cc: Marc-Andre Lureau, qemu-devel, peterx
On Thu, Nov 12, 2015 at 04:38:51PM +0200, Michael S. Tsirkin wrote:
> On Thu, Nov 12, 2015 at 02:34:56PM +0200, Victor Kaplansky wrote:
> > /* Based on qemu/hw/virtio/vhost-user.c */
> > @@ -173,6 +180,9 @@ typedef struct VubrVirtq {
> > #define VHOST_MEMORY_MAX_NREGIONS 8
> > #define VHOST_USER_F_PROTOCOL_FEATURES 30
> >
> > +typedef uint8_t vhost_log_chunk_t;
>
> Most code just uses uint8_t directly - I think you should
> just drop this typedef.
Oh, right. I'll clean this.
> > @@ -234,6 +249,7 @@ typedef struct VhostUserMsg {
> > struct vhost_vring_state state;
> > struct vhost_vring_addr addr;
> > VhostUserMemory memory;
> > + VhostUserLog log;
> > } payload;
> > int fds[VHOST_MEMORY_MAX_NREGIONS];
> > int fd_num;
> > @@ -265,8 +281,13 @@ typedef struct VubrDev {
> > uint32_t nregions;
> > VubrDevRegion regions[VHOST_MEMORY_MAX_NREGIONS];
> > VubrVirtq vq[MAX_NR_VIRTQUEUE];
> > + int log_call_fd;
>
> This doesn't seem to be used. Pls add code to signal this
> after logging.
>
Right, implementation of SET_LOG_FD request handler was on my TODO
list. I'll include it in the next version of the patch (as well
as using it for the signaling).
> > @@ -465,12 +496,39 @@ vubr_virtqueue_kick(VubrVirtq *vq)
> > }
> > }
> >
> > +
> > +static void
> > +vubr_log_page(uint8_t *log_table, uint64_t page)
> > +{
> > + DPRINT("Logged dirty guest page: %"PRId64"\n", page);
> > + log_table[page / 8] |= 1 << (page % 8);
> > +}
> > +
>
> This is only safe if there's a single writer.
> Please add a comment that says as much.
> Or set this atomically, that's also not hard to do.
>
I'll set it atomically.
> > +static void
> > +vubr_log_write(VubrDev *dev, uint64_t address, uint64_t length)
> > +{
> > + uint64_t page;
> > +
> > + if (!(dev->features & VHOST_F_LOG_ALL) || !dev->log_table || !length) {
> > + return;
> > + }
> > +
> > + assert(dev->log_size >= ((address + length) / VHOST_LOG_PAGE / 8));
>
> I think there's an off by 1 here.
> Imagine size == 0, test should always fail.
> But imagine address == 0 and length == 1.
> You get >= and test passes, seems wrong.
>
Oh, good catch. Will fix it. Hopefully, right way this time. ;-)
> > + rc = mmap(0, log_mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd,
> > + log_mmap_offset);
> > + if (rc == MAP_FAILED) {
> > + vubr_die("mmap");
> > + }
> > + dev->log_table = (vhost_log_chunk_t *) rc;
>
> Cast is not needed here.
>
Indeed.
> > @@ -829,6 +956,10 @@ vubr_set_vring_kick_exec(VubrDev *dev, VhostUserMsg *vmsg)
> > DPRINT("Waiting for kicks on fd: %d for vq: %d\n",
> > dev->vq[index].kick_fd, index);
> > }
> > + if (dev->vq[0].kick_fd != -1 &&
> > + dev->vq[1].kick_fd != -1) {
> > + dev->ready = 1;
>
> I'm not sure what this does. Related to logging somehow?
> Anyway, processing a VQ should happen after either
> - you received a kick
> or
> - you received VRING_ENABLE
It is a temporarily hack to determine if vrings are ready for
processing. AFAIR, DPDK code uses the same heuristics. I'll add an
explanation in the comments.
--Victor
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2015-11-12 17:39 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-11-12 12:34 [Qemu-devel] [PATCH v2] tests/vhost-user-bridge: implement logging of dirty pages Victor Kaplansky
2015-11-12 14:38 ` Michael S. Tsirkin
2015-11-12 17:39 ` Victor Kaplansky
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.