From: Victor Kaplansky <victork@redhat.com>
To: qemu-devel@nongnu.org
Cc: Victor Kaplansky <victork@redhat.com>,
Marc-Andre Lureau <mlureau@redhat.com>,
peterx@redhat.com, "Michael S. Tsirkin" <mst@redhat.com>
Subject: [Qemu-devel] [PATCH v2] tests/vhost-user-bridge: implement logging of dirty pages
Date: Thu, 12 Nov 2015 14:34:56 +0200 [thread overview]
Message-ID: <1447331679-6270-1-git-send-email-victork@redhat.com> (raw)
During migration devices continue writing to the guest's memory.
The writes has to be reported to QEMU. This change implements
minimal support in vhost-user-bridge required for successful
migration of a guest with virtio-net device.
Signed-off-by: Victor Kaplansky <victork@redhat.com>
---
v2:
- use log_guest_addr for used ring reported by qemu instead of
translating.
- use mmap_size and mmap_offset defined in new
VHOST_USER_SET_LOG_BASE interface. See the patch
"vhost-user: modify SET_LOG_BASE to pass mmap size and
offset".
- start logging dirty pages only after the appropriate feature
is set by a VHOST_USER_GET_PROTOCOL_FEATURES request.
- updated TODO list.
tests/vhost-user-bridge.c | 169 ++++++++++++++++++++++++++++++++++++++++++----
1 file changed, 155 insertions(+), 14 deletions(-)
diff --git a/tests/vhost-user-bridge.c b/tests/vhost-user-bridge.c
index fa18ad5..8c1c997 100644
--- a/tests/vhost-user-bridge.c
+++ b/tests/vhost-user-bridge.c
@@ -13,16 +13,22 @@
/*
* TODO:
* - main should get parameters from the command line.
- * - implement all request handlers.
+ * - implement all request handlers. Still not implemented:
+ * vubr_set_protocol_features_exec()
+ * vubr_get_queue_num_exec()
+ * vubr_set_vring_enable_exec()
+ * vubr_send_rarp_exec()
+ * vubr_set_log_fd_exec()
* - test for broken requests and virtqueue.
* - implement features defined by Virtio 1.0 spec.
* - support mergeable buffers and indirect descriptors.
- * - implement RESET_DEVICE request.
* - implement clean shutdown.
* - implement non-blocking writes to UDP backend.
* - implement polling strategy.
*/
+#define _FILE_OFFSET_BITS 64
+
#include <stddef.h>
#include <assert.h>
#include <stdio.h>
@@ -166,6 +172,7 @@ typedef struct VubrVirtq {
struct vring_desc *desc;
struct vring_avail *avail;
struct vring_used *used;
+ uint64_t log_guest_addr;
} VubrVirtq;
/* Based on qemu/hw/virtio/vhost-user.c */
@@ -173,6 +180,9 @@ typedef struct VubrVirtq {
#define VHOST_MEMORY_MAX_NREGIONS 8
#define VHOST_USER_F_PROTOCOL_FEATURES 30
+typedef uint8_t vhost_log_chunk_t;
+#define VHOST_LOG_PAGE 4096
+
enum VhostUserProtocolFeature {
VHOST_USER_PROTOCOL_F_MQ = 0,
VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1,
@@ -220,6 +230,11 @@ typedef struct VhostUserMemory {
VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
} VhostUserMemory;
+typedef struct VhostUserLog {
+ uint64_t mmap_size;
+ uint64_t mmap_offset;
+} VhostUserLog;
+
typedef struct VhostUserMsg {
VhostUserRequest request;
@@ -234,6 +249,7 @@ typedef struct VhostUserMsg {
struct vhost_vring_state state;
struct vhost_vring_addr addr;
VhostUserMemory memory;
+ VhostUserLog log;
} payload;
int fds[VHOST_MEMORY_MAX_NREGIONS];
int fd_num;
@@ -265,8 +281,13 @@ typedef struct VubrDev {
uint32_t nregions;
VubrDevRegion regions[VHOST_MEMORY_MAX_NREGIONS];
VubrVirtq vq[MAX_NR_VIRTQUEUE];
+ int log_call_fd;
+ uint64_t log_size;
+ vhost_log_chunk_t *log_table;
int backend_udp_sock;
struct sockaddr_in backend_udp_dest;
+ int ready;
+ uint64_t features;
} VubrDev;
static const char *vubr_request_str[] = {
@@ -368,7 +389,12 @@ vubr_message_read(int conn_fd, VhostUserMsg *vmsg)
rc = recvmsg(conn_fd, &msg, 0);
- if (rc <= 0) {
+ if (rc == 0) {
+ vubr_die("recvmsg");
+ fprintf(stderr, "Peer disconnected.\n");
+ exit(1);
+ }
+ if (rc < 0) {
vubr_die("recvmsg");
}
@@ -395,7 +421,12 @@ vubr_message_read(int conn_fd, VhostUserMsg *vmsg)
if (vmsg->size) {
rc = read(conn_fd, &vmsg->payload, vmsg->size);
- if (rc <= 0) {
+ if (rc == 0) {
+ vubr_die("recvmsg");
+ fprintf(stderr, "Peer disconnected.\n");
+ exit(1);
+ }
+ if (rc < 0) {
vubr_die("recvmsg");
}
@@ -465,12 +496,39 @@ vubr_virtqueue_kick(VubrVirtq *vq)
}
}
+
+static void
+vubr_log_page(uint8_t *log_table, uint64_t page)
+{
+ DPRINT("Logged dirty guest page: %"PRId64"\n", page);
+ log_table[page / 8] |= 1 << (page % 8);
+}
+
+static void
+vubr_log_write(VubrDev *dev, uint64_t address, uint64_t length)
+{
+ uint64_t page;
+
+ if (!(dev->features & VHOST_F_LOG_ALL) || !dev->log_table || !length) {
+ return;
+ }
+
+ assert(dev->log_size >= ((address + length) / VHOST_LOG_PAGE / 8));
+
+ page = address / VHOST_LOG_PAGE;
+ while (page * VHOST_LOG_PAGE < address + length) {
+ vubr_log_page(dev->log_table, page);
+ page += VHOST_LOG_PAGE;
+ }
+}
+
static void
vubr_post_buffer(VubrDev *dev, VubrVirtq *vq, uint8_t *buf, int32_t len)
{
struct vring_desc *desc = vq->desc;
struct vring_avail *avail = vq->avail;
struct vring_used *used = vq->used;
+ uint64_t log_guest_addr = vq->log_guest_addr;
unsigned int size = vq->size;
@@ -510,6 +568,7 @@ vubr_post_buffer(VubrDev *dev, VubrVirtq *vq, uint8_t *buf, int32_t len)
if (len <= chunk_len) {
memcpy(chunk_start, buf, len);
+ vubr_log_write(dev, desc[i].addr, len);
} else {
fprintf(stderr,
"Received too long packet from the backend. Dropping...\n");
@@ -519,11 +578,17 @@ vubr_post_buffer(VubrDev *dev, VubrVirtq *vq, uint8_t *buf, int32_t len)
/* Add descriptor to the used ring. */
used->ring[u_index].id = d_index;
used->ring[u_index].len = len;
+ vubr_log_write(dev,
+ log_guest_addr + offsetof(struct vring_used, ring[u_index]),
+ sizeof(used->ring[u_index]));
vq->last_avail_index++;
vq->last_used_index++;
atomic_mb_set(&used->idx, vq->last_used_index);
+ vubr_log_write(dev,
+ log_guest_addr + offsetof(struct vring_used, idx),
+ sizeof(used->idx));
/* Kick the guest if necessary. */
vubr_virtqueue_kick(vq);
@@ -535,6 +600,7 @@ vubr_process_desc(VubrDev *dev, VubrVirtq *vq)
struct vring_desc *desc = vq->desc;
struct vring_avail *avail = vq->avail;
struct vring_used *used = vq->used;
+ uint64_t log_guest_addr = vq->log_guest_addr;
unsigned int size = vq->size;
@@ -552,6 +618,8 @@ vubr_process_desc(VubrDev *dev, VubrVirtq *vq)
void *chunk_start = (void *)gpa_to_va(dev, desc[i].addr);
uint32_t chunk_len = desc[i].len;
+ assert(!(desc[i].flags & VRING_DESC_F_WRITE));
+
if (len + chunk_len < buf_size) {
memcpy(buf + len, chunk_start, chunk_len);
DPRINT("%d ", chunk_len);
@@ -577,6 +645,9 @@ vubr_process_desc(VubrDev *dev, VubrVirtq *vq)
/* Add descriptor to the used ring. */
used->ring[u_index].id = d_index;
used->ring[u_index].len = len;
+ vubr_log_write(dev,
+ log_guest_addr + offsetof(struct vring_used, ring[u_index]),
+ sizeof(used->ring[u_index]));
vubr_consume_raw_packet(dev, buf, len);
@@ -588,6 +659,7 @@ vubr_process_avail(VubrDev *dev, VubrVirtq *vq)
{
struct vring_avail *avail = vq->avail;
struct vring_used *used = vq->used;
+ uint64_t log_guest_addr = vq->log_guest_addr;
while (vq->last_avail_index != atomic_mb_read(&avail->idx)) {
vubr_process_desc(dev, vq);
@@ -596,6 +668,9 @@ vubr_process_avail(VubrDev *dev, VubrVirtq *vq)
}
atomic_mb_set(&used->idx, vq->last_used_index);
+ vubr_log_write(dev,
+ log_guest_addr + offsetof(struct vring_used, idx),
+ sizeof(used->idx));
}
static void
@@ -609,6 +684,10 @@ vubr_backend_recv_cb(int sock, void *ctx)
int buflen = sizeof(buf);
int len;
+ if (!dev->ready) {
+ return;
+ }
+
DPRINT("\n\n *** IN UDP RECEIVE CALLBACK ***\n\n");
uint16_t avail_index = atomic_mb_read(&rx_vq->avail->idx);
@@ -656,9 +735,9 @@ vubr_get_features_exec(VubrDev *dev, VhostUserMsg *vmsg)
{
vmsg->payload.u64 =
((1ULL << VIRTIO_NET_F_MRG_RXBUF) |
- (1ULL << VIRTIO_NET_F_CTRL_VQ) |
- (1ULL << VIRTIO_NET_F_CTRL_RX) |
- (1ULL << VHOST_F_LOG_ALL));
+ (1ULL << VHOST_F_LOG_ALL) |
+ (1ULL << VHOST_USER_F_PROTOCOL_FEATURES));
+
vmsg->size = sizeof(vmsg->payload.u64);
DPRINT("Sending back to guest u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
@@ -671,6 +750,7 @@ static int
vubr_set_features_exec(VubrDev *dev, VhostUserMsg *vmsg)
{
DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
+ dev->features = vmsg->payload.u64;
return 0;
}
@@ -680,10 +760,28 @@ vubr_set_owner_exec(VubrDev *dev, VhostUserMsg *vmsg)
return 0;
}
+static void
+vubr_close_log(VubrDev *dev)
+{
+ if (dev->log_table) {
+ if (munmap(dev->log_table, dev->log_size) != 0) {
+ vubr_die("munmap()");
+ }
+
+ dev->log_table = 0;
+ }
+ if (dev->log_call_fd != -1) {
+ close(dev->log_call_fd);
+ dev->log_call_fd = -1;
+ }
+}
+
static int
vubr_reset_device_exec(VubrDev *dev, VhostUserMsg *vmsg)
{
- DPRINT("Function %s() not implemented yet.\n", __func__);
+ vubr_close_log(dev);
+ dev->ready = 0;
+ dev->features = 0;
return 0;
}
@@ -736,8 +834,30 @@ vubr_set_mem_table_exec(VubrDev *dev, VhostUserMsg *vmsg)
static int
vubr_set_log_base_exec(VubrDev *dev, VhostUserMsg *vmsg)
{
- DPRINT("Function %s() not implemented yet.\n", __func__);
- return 0;
+ int fd;
+ uint64_t log_mmap_size, log_mmap_offset;
+ void *rc;
+
+ assert(vmsg->fd_num == 1);
+ fd = vmsg->fds[0];
+
+ assert(vmsg->size == sizeof(vmsg->payload.log));
+ log_mmap_offset = vmsg->payload.log.mmap_offset;
+ log_mmap_size = vmsg->payload.log.mmap_size;
+ DPRINT("Log mmap_offset: %"PRId64"\n", log_mmap_offset);
+ DPRINT("Log mmap_size: %"PRId64"\n", log_mmap_size);
+
+ rc = mmap(0, log_mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd,
+ log_mmap_offset);
+ if (rc == MAP_FAILED) {
+ vubr_die("mmap");
+ }
+ dev->log_table = (vhost_log_chunk_t *) rc;
+ dev->log_size = log_mmap_size;
+
+ vmsg->size = sizeof(vmsg->payload.u64);
+ /* Reply */
+ return 1;
}
static int
@@ -777,6 +897,7 @@ vubr_set_vring_addr_exec(VubrDev *dev, VhostUserMsg *vmsg)
vq->desc = (struct vring_desc *)qva_to_va(dev, vra->desc_user_addr);
vq->used = (struct vring_used *)qva_to_va(dev, vra->used_user_addr);
vq->avail = (struct vring_avail *)qva_to_va(dev, vra->avail_user_addr);
+ vq->log_guest_addr = vra->log_guest_addr;
DPRINT("Setting virtq addresses:\n");
DPRINT(" vring_desc at %p\n", vq->desc);
@@ -803,8 +924,14 @@ vubr_set_vring_base_exec(VubrDev *dev, VhostUserMsg *vmsg)
static int
vubr_get_vring_base_exec(VubrDev *dev, VhostUserMsg *vmsg)
{
- DPRINT("Function %s() not implemented yet.\n", __func__);
- return 0;
+ unsigned int index = vmsg->payload.state.index;
+
+ DPRINT("State.index: %d\n", index);
+ vmsg->payload.state.num = dev->vq[index].last_avail_index;
+ vmsg->size = sizeof(vmsg->payload.state);
+
+ /* reply */
+ return 1;
}
static int
@@ -829,6 +956,10 @@ vubr_set_vring_kick_exec(VubrDev *dev, VhostUserMsg *vmsg)
DPRINT("Waiting for kicks on fd: %d for vq: %d\n",
dev->vq[index].kick_fd, index);
}
+ if (dev->vq[0].kick_fd != -1 &&
+ dev->vq[1].kick_fd != -1) {
+ dev->ready = 1;
+ }
return 0;
}
@@ -858,9 +989,12 @@ vubr_set_vring_err_exec(VubrDev *dev, VhostUserMsg *vmsg)
static int
vubr_get_protocol_features_exec(VubrDev *dev, VhostUserMsg *vmsg)
{
- /* FIXME: unimplented */
+ vmsg->payload.u64 = 1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD;
DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
- return 0;
+ vmsg->size = sizeof(vmsg->payload.u64);
+
+ /* Reply */
+ return 1;
}
static int
@@ -1012,6 +1146,13 @@ vubr_new(const char *path)
};
}
+ /* Init log */
+ dev->log_call_fd = -1;
+ dev->log_size = 0;
+ dev->log_table = 0;
+ dev->ready = 0;
+ dev->features = 0;
+
/* Get a UNIX socket. */
dev->sock = socket(AF_UNIX, SOCK_STREAM, 0);
if (dev->sock == -1) {
--
--Victor
next reply other threads:[~2015-11-12 12:35 UTC|newest]
Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-11-12 12:34 Victor Kaplansky [this message]
2015-11-12 14:38 ` [Qemu-devel] [PATCH v2] tests/vhost-user-bridge: implement logging of dirty pages Michael S. Tsirkin
2015-11-12 17:39 ` Victor Kaplansky
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1447331679-6270-1-git-send-email-victork@redhat.com \
--to=victork@redhat.com \
--cc=mlureau@redhat.com \
--cc=mst@redhat.com \
--cc=peterx@redhat.com \
--cc=qemu-devel@nongnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.