From mboxrd@z Thu Jan 1 00:00:00 1970 From: Isaku Yamahata Subject: Re: [PATCH 21/21] postcopy: implement postcopy livemigration Date: Wed, 4 Jan 2012 12:34:33 +0900 Message-ID: <20120104033433.GN19274@valinux.co.jp> References: <4EFC8C88.70701@redhat.com> Mime-Version: 1.0 Content-Type: text/plain; charset=us-ascii Cc: t.hirofuchi@aist.go.jp, qemu-devel@nongnu.org, kvm@vger.kernel.org, satoshi.itoh@aist.go.jp To: Orit Wasserman Return-path: Content-Disposition: inline In-Reply-To: <4EFC8C88.70701@redhat.com> List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: qemu-devel-bounces+gceq-qemu-devel=gmane.org@nongnu.org Sender: qemu-devel-bounces+gceq-qemu-devel=gmane.org@nongnu.org List-Id: kvm.vger.kernel.org On Thu, Dec 29, 2011 at 05:51:36PM +0200, Orit Wasserman wrote: > Hi, Thank you for review. > A general comment this patch is a bit too long,which makes it hard to review. > Can you split it please? Will do. Maybe split into umem.[hc] part, incoming part and outgoing part. > On 12/29/2011 03:26 AM, Isaku Yamahata wrote: > > This patch implements postcopy livemigration. > > > > Signed-off-by: Isaku Yamahata > > --- > > Makefile.target | 4 + > > arch_init.c | 26 +- > > cpu-all.h | 7 + > > exec.c | 20 +- > > migration-exec.c | 8 + > > migration-fd.c | 30 + > > migration-postcopy-stub.c | 77 ++ > > migration-postcopy.c | 1891 +++++++++++++++++++++++++++++++++++++++++++++ > > migration-tcp.c | 37 +- > > migration-unix.c | 32 +- > > migration.c | 31 + > > migration.h | 30 + > > qemu-common.h | 1 + > > qemu-options.hx | 5 +- > > umem.c | 379 +++++++++ > > umem.h | 105 +++ > > vl.c | 14 +- > > 17 files changed, 2677 insertions(+), 20 deletions(-) > > create mode 100644 migration-postcopy-stub.c > > create mode 100644 migration-postcopy.c > > create mode 100644 umem.c > > create mode 100644 umem.h > > > > diff --git a/Makefile.target b/Makefile.target > > index 3261383..d94c53f 100644 > > --- a/Makefile.target > > +++ b/Makefile.target > > @@ -4,6 +4,7 @@ GENERATED_HEADERS = config-target.h > > CONFIG_NO_PCI = $(if $(subst n,,$(CONFIG_PCI)),n,y) > > CONFIG_NO_KVM = $(if $(subst n,,$(CONFIG_KVM)),n,y) > > CONFIG_NO_XEN = $(if $(subst n,,$(CONFIG_XEN)),n,y) > > +CONFIG_NO_POSTCOPY = $(if $(subst n,,$(CONFIG_POSTCOPY)),n,y) > > > > include ../config-host.mak > > include config-devices.mak > > @@ -199,6 +200,9 @@ obj-$(CONFIG_NO_KVM) += kvm-stub.o > > obj-y += memory.o > > LIBS+=-lz > > > > +common-obj-$(CONFIG_POSTCOPY) += migration-postcopy.o umem.o > > +common-obj-$(CONFIG_NO_POSTCOPY) += migration-postcopy-stub.o > > + > > QEMU_CFLAGS += $(VNC_TLS_CFLAGS) > > QEMU_CFLAGS += $(VNC_SASL_CFLAGS) > > QEMU_CFLAGS += $(VNC_JPEG_CFLAGS) > > diff --git a/arch_init.c b/arch_init.c > > index bc53092..8b3130d 100644 > > --- a/arch_init.c > > +++ b/arch_init.c > > @@ -102,6 +102,13 @@ static int is_dup_page(uint8_t *page, uint8_t ch) > > return 1; > > } > > > > +static bool outgoing_postcopy = false; > > + > > +void ram_save_set_params(const MigrationParams *params, void *opaque) > > +{ > > + outgoing_postcopy = params->postcopy; > > +} > > + > > static RAMBlock *last_block_sent = NULL; > > > > int ram_save_page(QEMUFile *f, RAMBlock *block, ram_addr_t offset) > > @@ -284,6 +291,17 @@ int ram_save_live(Monitor *mon, QEMUFile *f, int stage, void *opaque) > > uint64_t expected_time = 0; > > int ret; > > > > + if (stage == 1) { > > + last_block_sent = NULL; > > + > > + bytes_transferred = 0; > > + last_block = NULL; > > + last_offset = 0; > > Changing of line order + new empty line > > > + } > > + if (outgoing_postcopy) { > > + return postcopy_outgoing_ram_save_live(mon, f, stage, opaque); > > + } > > + > > I would just do : > > unregister_savevm_live and then register_savevm_live(...,postcopy_outgoing_ram_save_live,...) > when starting outgoing postcopy migration. > > > if (stage < 0) { > > cpu_physical_memory_set_dirty_tracking(0); > > return 0; > > @@ -295,10 +313,6 @@ int ram_save_live(Monitor *mon, QEMUFile *f, int stage, void *opaque) > > } > > > > if (stage == 1) { > > - bytes_transferred = 0; > > - last_block_sent = NULL; > > - last_block = NULL; > > - last_offset = 0; > > sort_ram_list(); > > > > /* Make sure all dirty bits are set */ > > @@ -436,6 +450,10 @@ int ram_load(QEMUFile *f, void *opaque, int version_id) > > int flags; > > int error; > > > > + if (incoming_postcopy) { > > + return postcopy_incoming_ram_load(f, opaque, version_id); > > + } > > + > why not call register_savevm_live(...,postcopy_incoming_ram_load,...) when starting guest with postcopy_incoming > > > if (version_id < 3 || version_id > RAM_SAVE_VERSION_ID) { > > return -EINVAL; > > } > > diff --git a/cpu-all.h b/cpu-all.h > > index 0244f7a..2e9d8a7 100644 > > --- a/cpu-all.h > > +++ b/cpu-all.h > > @@ -475,6 +475,9 @@ extern ram_addr_t ram_size; > > /* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */ > > #define RAM_PREALLOC_MASK (1 << 0) > > > > +/* RAM is allocated via umem for postcopy incoming mode */ > > +#define RAM_POSTCOPY_UMEM_MASK (1 << 1) > > + > > typedef struct RAMBlock { > > uint8_t *host; > > ram_addr_t offset; > > @@ -485,6 +488,10 @@ typedef struct RAMBlock { > > #if defined(__linux__) && !defined(TARGET_S390X) > > int fd; > > #endif > > + > > +#ifdef CONFIG_POSTCOPY > > + UMem *umem; /* for incoming postcopy mode */ > > +#endif > > } RAMBlock; > > > > typedef struct RAMList { > > diff --git a/exec.c b/exec.c > > index c8c6692..90b0491 100644 > > --- a/exec.c > > +++ b/exec.c > > @@ -35,6 +35,7 @@ > > #include "qemu-timer.h" > > #include "memory.h" > > #include "exec-memory.h" > > +#include "migration.h" > > #if defined(CONFIG_USER_ONLY) > > #include > > #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) > > @@ -2949,6 +2950,13 @@ ram_addr_t qemu_ram_alloc_from_ptr(DeviceState *dev, const char *name, > > new_block->host = host; > > new_block->flags |= RAM_PREALLOC_MASK; > > } else { > > +#ifdef CONFIG_POSTCOPY > > + if (incoming_postcopy) { > > + postcopy_incoming_ram_alloc(name, size, > > + &new_block->host, &new_block->umem); > > + new_block->flags |= RAM_POSTCOPY_UMEM_MASK; > > + } else > > +#endif > > if (mem_path) { > > #if defined (__linux__) && !defined(TARGET_S390X) > > new_block->host = file_ram_alloc(new_block, size, mem_path); > > @@ -3027,7 +3035,13 @@ void qemu_ram_free(ram_addr_t addr) > > QLIST_REMOVE(block, next); > > if (block->flags & RAM_PREALLOC_MASK) { > > ; > > - } else if (mem_path) { > > + } > > +#ifdef CONFIG_POSTCOPY > > + else if (block->flags & RAM_POSTCOPY_UMEM_MASK) { > > + postcopy_incoming_ram_free(block->umem); > > + } > > +#endif > > + else if (mem_path) { > > #if defined (__linux__) && !defined(TARGET_S390X) > > if (block->fd) { > > munmap(block->host, block->length); > > @@ -3073,6 +3087,10 @@ void qemu_ram_remap(ram_addr_t addr, ram_addr_t length) > > } else { > > flags = MAP_FIXED; > > munmap(vaddr, length); > > + if (block->flags & RAM_POSTCOPY_UMEM_MASK) { > > + postcopy_incoming_qemu_pages_unmapped(addr, length); > > + block->flags &= ~RAM_POSTCOPY_UMEM_MASK; > > + } > > if (mem_path) { > > #if defined(__linux__) && !defined(TARGET_S390X) > > if (block->fd) { > > diff --git a/migration-exec.c b/migration-exec.c > > index e14552e..2bd0c3b 100644 > > --- a/migration-exec.c > > +++ b/migration-exec.c > > @@ -62,6 +62,10 @@ int exec_start_outgoing_migration(MigrationState *s, const char *command) > > { > > FILE *f; > > > > + if (s->params.postcopy) { > > + return -ENOSYS; > > + } > > + > > f = popen(command, "w"); > > if (f == NULL) { > > DPRINTF("Unable to popen exec target\n"); > > @@ -104,6 +108,10 @@ int exec_start_incoming_migration(const char *command) > > { > > QEMUFile *f; > > > > + if (incoming_postcopy) { > > + return -ENOSYS; > > + } > > + > > DPRINTF("Attempting to start an incoming migration\n"); > > f = qemu_popen_cmd(command, "r"); > > if(f == NULL) { > > diff --git a/migration-fd.c b/migration-fd.c > > index 6211124..5a62ab9 100644 > > --- a/migration-fd.c > > +++ b/migration-fd.c > > @@ -88,6 +88,23 @@ int fd_start_outgoing_migration(MigrationState *s, const char *fdname) > > s->write = fd_write; > > s->close = fd_close; > > > > + if (s->params.postcopy) { > > + int flags = fcntl(s->fd, F_GETFL); > > + if ((flags & O_ACCMODE) != O_RDWR) { > > + goto err_after_open; > > + } > > + > > + s->fd_read = dup(s->fd); > > + if (s->fd_read == -1) { > > + goto err_after_open; > > + } > > + s->file_read = qemu_fdopen(s->fd_read, "r"); > > + if (s->file_read == NULL) { > > + close(s->fd_read); > > + goto err_after_open; > > + } > > + } > > + > > migrate_fd_connect(s); > > return 0; > > > > @@ -103,7 +120,14 @@ static void fd_accept_incoming_migration(void *opaque) > > > > process_incoming_migration(f); > > qemu_set_fd_handler2(qemu_stdio_fd(f), NULL, NULL, NULL, NULL); > > + if (incoming_postcopy) { > > + postcopy_incoming_fork_umemd(qemu_stdio_fd(f), f); > > + } > > qemu_fclose(f); > > + if (incoming_postcopy) { > > + postcopy_incoming_qemu_ready(); > > + } > > + return; > > } > > > > int fd_start_incoming_migration(const char *infd) > > @@ -114,6 +138,12 @@ int fd_start_incoming_migration(const char *infd) > > DPRINTF("Attempting to start an incoming migration via fd\n"); > > > > fd = strtol(infd, NULL, 0); > > + if (incoming_postcopy) { > > + int flags = fcntl(fd, F_GETFL); > > + if ((flags & O_ACCMODE) != O_RDWR) { > > + return -EINVAL; > > + } > > + } > > f = qemu_fdopen(fd, "rb"); > > if(f == NULL) { > > DPRINTF("Unable to apply qemu wrapper to file descriptor\n"); > > diff --git a/migration-postcopy-stub.c b/migration-postcopy-stub.c > > new file mode 100644 > > index 0000000..0b78de7 > > --- /dev/null > > +++ b/migration-postcopy-stub.c > > @@ -0,0 +1,77 @@ > > +/* > > + * migration-postcopy-stub.c: postcopy livemigration > > + * stub functions for non-supported hosts > > + * > > + * Copyright (c) 2011 > > + * National Institute of Advanced Industrial Science and Technology > > + * > > + * https://sites.google.com/site/grivonhome/quick-kvm-migration > > + * Author: Isaku Yamahata > > + * > > + * This program is free software; you can redistribute it and/or modify it > > + * under the terms and conditions of the GNU General Public License, > > + * version 2, as published by the Free Software Foundation. > > + * > > + * This program is distributed in the hope it will be useful, but WITHOUT > > + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or > > + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for > > + * more details. > > + * > > + * You should have received a copy of the GNU General Public License along > > + * with this program; if not, see . > > + */ > > + > > +#include "sysemu.h" > > +#include "migration.h" > > + > > +int postcopy_outgoing_create_read_socket(MigrationState *s) > > +{ > > + return -ENOSYS; > > +} > > + > > +int postcopy_outgoing_ram_save_live(Monitor *mon, > > + QEMUFile *f, int stage, void *opaque) > > +{ > > + return -ENOSYS; > > +} > > + > > +void *postcopy_outgoing_begin(MigrationState *ms) > > +{ > > + return NULL; > > +} > > + > > +int postcopy_outgoing_ram_save_background(Monitor *mon, QEMUFile *f, > > + void *postcopy) > > +{ > > + return -ENOSYS; > > +} > > + > > +int postcopy_incoming_init(const char *incoming, bool incoming_postcopy) > > +{ > > + return -ENOSYS; > > +} > > + > > +void postcopy_incoming_prepare(void) > > +{ > > +} > > + > > +int postcopy_incoming_ram_load(QEMUFile *f, void *opaque, int version_id) > > +{ > > + return -ENOSYS; > > +} > > + > > +void postcopy_incoming_fork_umemd(int mig_read_fd, QEMUFile *mig_read) > > +{ > > +} > > + > > +void postcopy_incoming_qemu_ready(void) > > +{ > > +} > > + > > +void postcopy_incoming_qemu_cleanup(void) > > +{ > > +} > > + > > +void postcopy_incoming_qemu_pages_unmapped(ram_addr_t addr, ram_addr_t size) > > +{ > > +} > > diff --git a/migration-postcopy.c b/migration-postcopy.c > > new file mode 100644 > > index 0000000..ed0d574 > > --- /dev/null > > +++ b/migration-postcopy.c > > @@ -0,0 +1,1891 @@ > > +/* > > + * migration-postcopy.c: postcopy livemigration > > + * > > + * Copyright (c) 2011 > > + * National Institute of Advanced Industrial Science and Technology > > + * > > + * https://sites.google.com/site/grivonhome/quick-kvm-migration > > + * Author: Isaku Yamahata > > + * > > + * This program is free software; you can redistribute it and/or modify it > > + * under the terms and conditions of the GNU General Public License, > > + * version 2, as published by the Free Software Foundation. > > + * > > + * This program is distributed in the hope it will be useful, but WITHOUT > > + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or > > + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for > > + * more details. > > + * > > + * You should have received a copy of the GNU General Public License along > > + * with this program; if not, see . > > + */ > > + > > +#include "bitmap.h" > > +#include "sysemu.h" > > +#include "hw/hw.h" > > +#include "arch_init.h" > > +#include "migration.h" > > +#include "umem.h" > > + > > +#include "memory.h" > > +#define WANT_EXEC_OBSOLETE > > +#include "exec-obsolete.h" > > + > > +//#define DEBUG_POSTCOPY > > +#ifdef DEBUG_POSTCOPY > > +#include > > +#define DPRINTF(fmt, ...) \ > > + do { \ > > + printf("%d:%ld %s:%d: " fmt, getpid(), syscall(SYS_gettid), \ > > + __func__, __LINE__, ## __VA_ARGS__); \ > > + } while (0) > > +#else > > +#define DPRINTF(fmt, ...) do { } while (0) > > +#endif > > + > > +#define ALIGN_UP(size, align) (((size) + (align) - 1) & ~((align) - 1)) > > + > > +static void fd_close(int *fd) > > +{ > > + if (*fd >= 0) { > > + close(*fd); > > + *fd = -1; > > + } > > +} > > + > > +/*************************************************************************** > > + * QEMUFile for non blocking pipe > > + */ > > + > > +/* read only */ > > +struct QEMUFilePipe { > > + int fd; > > + QEMUFile *file; > > +}; > > Why not use QEMUFileSocket ? Okay, will rename it to QEMUFile_FD (or whatever) and share the struct. > > +typedef struct QEMUFilePipe QEMUFilePipe; > > + > > +static int pipe_get_buffer(void *opaque, uint8_t *buf, int64_t pos, int size) > > +{ > > + QEMUFilePipe *s = opaque; > > + ssize_t len = 0; > > + > > + while (size > 0) { > > + ssize_t ret = read(s->fd, buf, size); > > + if (ret == -1) { > > + if (errno == EINTR) { > > + continue; > > + } > > + if (len == 0) { > > + len = -errno; > > + } > > + break; > > + } > > + > > + if (ret == 0) { > > + /* the write end of the pipe is closed */ > > + break; > > + } > > + len += ret; > > + buf += ret; > > + size -= ret; > > + } > > + > > + return len; > > +} > > + > > +static int pipe_close(void *opaque) > > +{ > > + QEMUFilePipe *s = opaque; > > + g_free(s); > > + return 0; > > +} > > + > > +static QEMUFile *qemu_fopen_pipe(int fd) > > +{ > > + QEMUFilePipe *s = g_malloc0(sizeof(*s)); > > + > > + s->fd = fd; > > + fcntl_setfl(fd, O_NONBLOCK); > > + s->file = qemu_fopen_ops(s, NULL, pipe_get_buffer, pipe_close, > > + NULL, NULL, NULL); > > + return s->file; > > +} > > + > > +/* write only */ > > +struct QEMUFileNonblock { > > + int fd; > > + QEMUFile *file; > > + > > + /* for pipe-write nonblocking mode */ > > +#define BUF_SIZE_INC (32 * 1024) /* = IO_BUF_SIZE */ > > + uint8_t *buffer; > > + size_t buffer_size; > > + size_t buffer_capacity; > > + bool freeze_output; > > +}; > > +typedef struct QEMUFileNonblock QEMUFileNonblock; > > + > > Couldn't you use QEMUFileBuffered ? QEMUFileBuffered can be built on top of QEMUFileNonblock. I'll refactor buffered_file.c > > +static void nonblock_flush_buffer(QEMUFileNonblock *s) > > +{ > > + size_t offset = 0; > > + ssize_t ret; > > + > > + while (offset < s->buffer_size) { > > + ret = write(s->fd, s->buffer + offset, s->buffer_size - offset); > > + if (ret == -1) { > > + if (errno == EINTR) { > > + continue; > > + } else if (errno == EAGAIN) { > > + s->freeze_output = true; > > + } else { > > + qemu_file_set_error(s->file, errno); > > + } > > + break; > > + } > > + > > + if (ret == 0) { > > + DPRINTF("ret == 0\n"); > > + break; > > + } > > + > > + offset += ret; > > + } > > + > > + if (offset > 0) { > > + assert(s->buffer_size >= offset); > > + memmove(s->buffer, s->buffer + offset, s->buffer_size - offset); > > + s->buffer_size -= offset; > > + } > > + if (s->buffer_size > 0) { > > + s->freeze_output = true; > > + } > > +} > > + > > +static int nonblock_put_buffer(void *opaque, > > + const uint8_t *buf, int64_t pos, int size) > > +{ > > + QEMUFileNonblock *s = opaque; > > + int error; > > + ssize_t len = 0; > > + > > + error = qemu_file_get_error(s->file); > > + if (error) { > > + return error; > > + } > > + > > + nonblock_flush_buffer(s); > > + error = qemu_file_get_error(s->file); > > + if (error) { > > + return error; > > + } > > + > > + while (!s->freeze_output && size > 0) { > > + ssize_t ret; > > + assert(s->buffer_size == 0); > > + > > + ret = write(s->fd, buf, size); > > + if (ret == -1) { > > + if (errno == EINTR) { > > + continue; > > + } else if (errno == EAGAIN) { > > + s->freeze_output = true; > > + } else { > > + qemu_file_set_error(s->file, errno); > > + } > > + break; > > + } > > + > > + len += ret; > > + buf += ret; > > + size -= ret; > > + } > > + > > + if (size > 0) { > > + int inc = size - (s->buffer_capacity - s->buffer_size); > > + if (inc > 0) { > > + s->buffer_capacity += > > + DIV_ROUND_UP(inc, BUF_SIZE_INC) * BUF_SIZE_INC; > > + s->buffer = g_realloc(s->buffer, s->buffer_capacity); > > + } > > + memcpy(s->buffer + s->buffer_size, buf, size); > > + s->buffer_size += size; > > + > > + len += size; > > + } > > + > > + return len; > > +} > > + > > +static int nonblock_pending_size(QEMUFileNonblock *s) > > +{ > > + return qemu_pending_size(s->file) + s->buffer_size; > > +} > > + > > +static void nonblock_fflush(QEMUFileNonblock *s) > > +{ > > + s->freeze_output = false; > > + nonblock_flush_buffer(s); > > + if (!s->freeze_output) { > > + qemu_fflush(s->file); > > + } > > +} > > + > > +static void nonblock_wait_for_flush(QEMUFileNonblock *s) > > +{ > > + while (nonblock_pending_size(s) > 0) { > > + fd_set fds; > > + FD_ZERO(&fds); > > + FD_SET(s->fd, &fds); > > + select(s->fd + 1, NULL, &fds, NULL, NULL); > > + > > + nonblock_fflush(s); > > + } > > +} > > + > > +static int nonblock_close(void *opaque) > > +{ > > + QEMUFileNonblock *s = opaque; > > + nonblock_wait_for_flush(s); > > + g_free(s->buffer); > > + g_free(s); > > + return 0; > > +} > > + > > +static QEMUFileNonblock *qemu_fopen_nonblock(int fd) > > +{ > > + QEMUFileNonblock *s = g_malloc0(sizeof(*s)); > > + > > + s->fd = fd; > > + fcntl_setfl(fd, O_NONBLOCK); > > + s->file = qemu_fopen_ops(s, nonblock_put_buffer, NULL, nonblock_close, > > + NULL, NULL, NULL); > > + return s; > > +} > > + > > +/*************************************************************************** > > + * umem daemon on destination <-> qemu on source protocol > > + */ > > + > > +#define QEMU_UMEM_REQ_INIT 0x00 > > +#define QEMU_UMEM_REQ_ON_DEMAND 0x01 > > +#define QEMU_UMEM_REQ_ON_DEMAND_CONT 0x02 > > +#define QEMU_UMEM_REQ_BACKGROUND 0x03 > > +#define QEMU_UMEM_REQ_BACKGROUND_CONT 0x04 > > +#define QEMU_UMEM_REQ_REMOVE 0x05 > > +#define QEMU_UMEM_REQ_EOC 0x06 > > + > > +struct qemu_umem_req { > > + int8_t cmd; > > + uint8_t len; > > + char *idstr; /* ON_DEMAND, BACKGROUND, REMOVE */ > > + uint32_t nr; /* ON_DEMAND, ON_DEMAND_CONT, > > + BACKGROUND, BACKGROUND_CONT, REMOVE */ > > + > > + /* in target page size as qemu migration protocol */ > > + uint64_t *pgoffs; /* ON_DEMAND, ON_DEMAND_CONT, > > + BACKGROUND, BACKGROUND_CONT, REMOVE */ > > +}; > > + > > +static void postcopy_incoming_send_req_idstr(QEMUFile *f, const char* idstr) > > +{ > > + qemu_put_byte(f, strlen(idstr)); > > + qemu_put_buffer(f, (uint8_t *)idstr, strlen(idstr)); > > +} > > + > > +static void postcopy_incoming_send_req_pgoffs(QEMUFile *f, uint32_t nr, > > + const uint64_t *pgoffs) > > +{ > > + uint32_t i; > > + > > + qemu_put_be32(f, nr); > > + for (i = 0; i < nr; i++) { > > + qemu_put_be64(f, pgoffs[i]); > > + } > > +} > > + > > +static void postcopy_incoming_send_req_one(QEMUFile *f, > > + const struct qemu_umem_req *req) > > +{ > > + DPRINTF("cmd %d\n", req->cmd); > > + qemu_put_byte(f, req->cmd); > > + switch (req->cmd) { > > + case QEMU_UMEM_REQ_INIT: > > + case QEMU_UMEM_REQ_EOC: > > + /* nothing */ > > + break; > > + case QEMU_UMEM_REQ_ON_DEMAND: > > + case QEMU_UMEM_REQ_BACKGROUND: > > + case QEMU_UMEM_REQ_REMOVE: > > + postcopy_incoming_send_req_idstr(f, req->idstr); > > + postcopy_incoming_send_req_pgoffs(f, req->nr, req->pgoffs); > > + break; > > + case QEMU_UMEM_REQ_ON_DEMAND_CONT: > > + case QEMU_UMEM_REQ_BACKGROUND_CONT: > > + postcopy_incoming_send_req_pgoffs(f, req->nr, req->pgoffs); > > + break; > > + default: > > + abort(); > > + break; > > + } > > +} > > + > > +/* QEMUFile can buffer up to IO_BUF_SIZE = 32 * 1024. > > + * So one message size must be <= IO_BUF_SIZE > > + * cmd: 1 > > + * id len: 1 > > + * id: 256 > > + * nr: 2 > > + */ > > +#define MAX_PAGE_NR ((32 * 1024 - 1 - 1 - 256 - 2) / sizeof(uint64_t)) > > +static void postcopy_incoming_send_req(QEMUFile *f, > > + const struct qemu_umem_req *req) > > +{ > > + uint32_t nr = req->nr; > > + struct qemu_umem_req tmp = *req; > > + > > + switch (req->cmd) { > > + case QEMU_UMEM_REQ_INIT: > > + case QEMU_UMEM_REQ_EOC: > > + postcopy_incoming_send_req_one(f, &tmp); > > + break; > > + case QEMU_UMEM_REQ_ON_DEMAND: > > + case QEMU_UMEM_REQ_BACKGROUND: > > + tmp.nr = MIN(nr, MAX_PAGE_NR); > > + postcopy_incoming_send_req_one(f, &tmp); > > + > > + nr -= tmp.nr; > > + tmp.pgoffs += tmp.nr; > > + if (tmp.cmd == QEMU_UMEM_REQ_ON_DEMAND) { > > + tmp.cmd = QEMU_UMEM_REQ_ON_DEMAND_CONT; > > + }else { > > + tmp.cmd = QEMU_UMEM_REQ_BACKGROUND_CONT; > > + } > > + /* fall through */ > > + case QEMU_UMEM_REQ_REMOVE: > > + case QEMU_UMEM_REQ_ON_DEMAND_CONT: > > + case QEMU_UMEM_REQ_BACKGROUND_CONT: > > + while (nr > 0) { > > + tmp.nr = MIN(nr, MAX_PAGE_NR); > > + postcopy_incoming_send_req_one(f, &tmp); > > + > > + nr -= tmp.nr; > > + tmp.pgoffs += tmp.nr; > > + } > > + break; > > + default: > > + abort(); > > + break; > > + } > > +} > > + > > +static int postcopy_outgoing_recv_req_idstr(QEMUFile *f, > > + struct qemu_umem_req *req, > > + size_t *offset) > > +{ > > + int ret; > > + > > + req->len = qemu_peek_byte(f, *offset); > > + *offset += 1; > > + if (req->len == 0) { > > + return -EAGAIN; > > + } > > + req->idstr = g_malloc((int)req->len + 1); > > + ret = qemu_peek_buffer(f, (uint8_t*)req->idstr, req->len, *offset); > > + *offset += ret; > > + if (ret != req->len) { > > + g_free(req->idstr); > > + req->idstr = NULL; > > + return -EAGAIN; > > + } > > + req->idstr[req->len] = 0; > > + return 0; > > +} > > + > > +static int postcopy_outgoing_recv_req_pgoffs(QEMUFile *f, > > + struct qemu_umem_req *req, > > + size_t *offset) > > +{ > > + int ret; > > + uint32_t be32; > > + uint32_t i; > > + > > + ret = qemu_peek_buffer(f, (uint8_t*)&be32, sizeof(be32), *offset); > > + *offset += sizeof(be32); > > + if (ret != sizeof(be32)) { > > + return -EAGAIN; > > + } > > + > > + req->nr = be32_to_cpu(be32); > > + req->pgoffs = g_new(uint64_t, req->nr); > > + for (i = 0; i < req->nr; i++) { > > + uint64_t be64; > > + ret = qemu_peek_buffer(f, (uint8_t*)&be64, sizeof(be64), *offset); > > + *offset += sizeof(be64); > > + if (ret != sizeof(be64)) { > > + g_free(req->pgoffs); > > + req->pgoffs = NULL; > > + return -EAGAIN; > > + } > > + req->pgoffs[i] = be64_to_cpu(be64); > > + } > > + return 0; > > +} > > + > > +static int postcopy_outgoing_recv_req(QEMUFile *f, struct qemu_umem_req *req) > > +{ > > + int size; > > + int ret; > > + size_t offset = 0; > > + > > + size = qemu_peek_buffer(f, (uint8_t*)&req->cmd, 1, offset); > > + if (size <= 0) { > > + return -EAGAIN; > > + } > > + offset += 1; > > + > > + switch (req->cmd) { > > + case QEMU_UMEM_REQ_INIT: > > + case QEMU_UMEM_REQ_EOC: > > + /* nothing */ > > + break; > > + case QEMU_UMEM_REQ_ON_DEMAND: > > + case QEMU_UMEM_REQ_BACKGROUND: > > + case QEMU_UMEM_REQ_REMOVE: > > + ret = postcopy_outgoing_recv_req_idstr(f, req, &offset); > > + if (ret < 0) { > > + return ret; > > + } > > + ret = postcopy_outgoing_recv_req_pgoffs(f, req, &offset); > > + if (ret < 0) { > > + return ret; > > + } > > + break; > > + case QEMU_UMEM_REQ_ON_DEMAND_CONT: > > + case QEMU_UMEM_REQ_BACKGROUND_CONT: > > + ret = postcopy_outgoing_recv_req_pgoffs(f, req, &offset); > > + if (ret < 0) { > > + return ret; > > + } > > + break; > > + default: > > + abort(); > > + break; > > + } > > + qemu_file_skip(f, offset); > > + DPRINTF("cmd %d\n", req->cmd); > > + return 0; > > +} > > + > > +static void postcopy_outgoing_free_req(struct qemu_umem_req *req) > > +{ > > + g_free(req->idstr); > > + g_free(req->pgoffs); > > +} > > + > > +/*************************************************************************** > > + * outgoing part > > + */ > > + > > +#define QEMU_SAVE_LIVE_STAGE_START 0x01 /* = QEMU_VM_SECTION_START */ > > +#define QEMU_SAVE_LIVE_STAGE_PART 0x02 /* = QEMU_VM_SECTION_PART */ > > +#define QEMU_SAVE_LIVE_STAGE_END 0x03 /* = QEMU_VM_SECTION_END */ > > + > > +enum POState { > > + PO_STATE_ERROR_RECEIVE, > > + PO_STATE_ACTIVE, > > + PO_STATE_EOC_RECEIVED, > > + PO_STATE_ALL_PAGES_SENT, > > + PO_STATE_COMPLETED, > > +}; > > +typedef enum POState POState; > > + > > +struct PostcopyOutgoingState { > > + POState state; > > + QEMUFile *mig_read; > > + int fd_read; > > + RAMBlock *last_block_read; > > + > > + QEMUFile *mig_buffered_write; > > + MigrationState *ms; > > + > > + /* For nobg mode. Check if all pages are sent */ > > + RAMBlock *block; > > + ram_addr_t addr; > > +}; > > +typedef struct PostcopyOutgoingState PostcopyOutgoingState; > > + > > +int postcopy_outgoing_create_read_socket(MigrationState *s) > > +{ > > + if (!s->params.postcopy) { > > + return 0; > > + } > > + > > + s->fd_read = dup(s->fd); > > + if (s->fd_read == -1) { > > + int ret = -errno; > > + perror("dup"); > > + return ret; > > + } > > + s->file_read = qemu_fopen_socket(s->fd_read); > > + if (s->file_read == NULL) { > > + return -EINVAL; > > + } > > + return 0; > > +} > > + > > +int postcopy_outgoing_ram_save_live(Monitor *mon, > > + QEMUFile *f, int stage, void *opaque) > > +{ > > + int ret = 0; > > + DPRINTF("stage %d\n", stage); > > + if (stage == QEMU_SAVE_LIVE_STAGE_START) { > > + sort_ram_list(); > > + ram_save_live_mem_size(f); > > + } > > + if (stage == QEMU_SAVE_LIVE_STAGE_PART) { > > + ret = 1; > > + } > > + qemu_put_be64(f, RAM_SAVE_FLAG_EOS); > > + return ret; > > +} > > + > > +static RAMBlock *postcopy_outgoing_find_block(const char *idstr) > > +{ > > + RAMBlock *block; > > + QLIST_FOREACH(block, &ram_list.blocks, next) { > > + if (!strncmp(idstr, block->idstr, strlen(idstr))) { > > + return block; > > + } > > + } > > + return NULL; > > +} > > + > > +/* > > + * return value > > + * 0: continue postcopy mode > > + * > 0: completed postcopy mode. > > + * < 0: error > > + */ > > +static int postcopy_outgoing_handle_req(PostcopyOutgoingState *s, > > + const struct qemu_umem_req *req, > > + bool *written) > > +{ > > + int i; > > + RAMBlock *block; > > + > > + DPRINTF("cmd %d state %d\n", req->cmd, s->state); > > + switch(req->cmd) { > > + case QEMU_UMEM_REQ_INIT: > > + /* nothing */ > > + break; > > + case QEMU_UMEM_REQ_EOC: > > + /* tell to finish migration. */ > > + if (s->state == PO_STATE_ALL_PAGES_SENT) { > > + s->state = PO_STATE_COMPLETED; > > + DPRINTF("-> PO_STATE_COMPLETED\n"); > > + } else { > > + s->state = PO_STATE_EOC_RECEIVED; > > + DPRINTF("-> PO_STATE_EOC_RECEIVED\n"); > > + } > > + return 1; > > + case QEMU_UMEM_REQ_ON_DEMAND: > > + case QEMU_UMEM_REQ_BACKGROUND: > > + DPRINTF("idstr: %s\n", req->idstr); > > + block = postcopy_outgoing_find_block(req->idstr); > > + if (block == NULL) { > > + return -EINVAL; > > + } > > + s->last_block_read = block; > > + /* fall through */ > > + case QEMU_UMEM_REQ_ON_DEMAND_CONT: > > + case QEMU_UMEM_REQ_BACKGROUND_CONT: > > + DPRINTF("nr %d\n", req->nr); > > + for (i = 0; i < req->nr; i++) { > > + DPRINTF("offs[%d] 0x%"PRIx64"\n", i, req->pgoffs[i]); > > + int ret = ram_save_page(s->mig_buffered_write, s->last_block_read, > > + req->pgoffs[i] << TARGET_PAGE_BITS); > > + if (ret > 0) { > > + *written = true; > > + } > > + } > > + break; > > + case QEMU_UMEM_REQ_REMOVE: > > + block = postcopy_outgoing_find_block(req->idstr); > > + if (block == NULL) { > > + return -EINVAL; > > + } > > + for (i = 0; i < req->nr; i++) { > > + ram_addr_t addr = block->offset + > > + (req->pgoffs[i] << TARGET_PAGE_BITS); > > + cpu_physical_memory_reset_dirty(addr, > > + addr + TARGET_PAGE_SIZE, > > + MIGRATION_DIRTY_FLAG); > > + } > > + break; > > + default: > > + return -EINVAL; > > + } > > + return 0; > > +} > > + > > +static void postcopy_outgoing_close_mig_read(PostcopyOutgoingState *s) > > +{ > > + if (s->mig_read != NULL) { > > + qemu_set_fd_handler(s->fd_read, NULL, NULL, NULL); > > + qemu_fclose(s->mig_read); > > + s->mig_read = NULL; > > + fd_close(&s->fd_read); > > + > > + s->ms->file_read = NULL; > > + s->ms->fd_read = -1; > > + } > > +} > > + > > +static void postcopy_outgoing_completed(PostcopyOutgoingState *s) > > +{ > > + postcopy_outgoing_close_mig_read(s); > > + s->ms->postcopy = NULL; > > + g_free(s); > > +} > > + > > +static void postcopy_outgoing_recv_handler(void *opaque) > > +{ > > + PostcopyOutgoingState *s = opaque; > > + bool written = false; > > + int ret = 0; > > + > > + assert(s->state == PO_STATE_ACTIVE || > > + s->state == PO_STATE_ALL_PAGES_SENT); > > + > > + do { > > + struct qemu_umem_req req = {.idstr = NULL, > > + .pgoffs = NULL}; > > + > > + ret = postcopy_outgoing_recv_req(s->mig_read, &req); > > + if (ret < 0) { > > + if (ret == -EAGAIN) { > > + ret = 0; > > + } > > + break; > > + } > > + if (s->state == PO_STATE_ACTIVE) { > > + ret = postcopy_outgoing_handle_req(s, &req, &written); > > + } > > + postcopy_outgoing_free_req(&req); > > + } while (ret == 0); > > + > > + /* > > + * flush buffered_file. > > + * Although mig_write is rate-limited buffered file, those written pages > > + * are requested on demand by the destination. So forcibly push > > + * those pages ignoring rate limiting > > + */ > > + if (written) { > > + qemu_fflush(s->mig_buffered_write); > > + /* qemu_buffered_file_drain(s->mig_buffered_write); */ > > + } > > + > > + if (ret < 0) { > > + switch (s->state) { > > + case PO_STATE_ACTIVE: > > + s->state = PO_STATE_ERROR_RECEIVE; > > + DPRINTF("-> PO_STATE_ERROR_RECEIVE\n"); > > + break; > > + case PO_STATE_ALL_PAGES_SENT: > > + s->state = PO_STATE_COMPLETED; > > + DPRINTF("-> PO_STATE_ALL_PAGES_SENT\n"); > > + break; > > + default: > > + abort(); > > + } > > + } > > + if (s->state == PO_STATE_ERROR_RECEIVE || s->state == PO_STATE_COMPLETED) { > > + postcopy_outgoing_close_mig_read(s); > > + } > > + if (s->state == PO_STATE_COMPLETED) { > > + DPRINTF("PO_STATE_COMPLETED\n"); > > + MigrationState *ms = s->ms; > > + postcopy_outgoing_completed(s); > > + migrate_fd_completed(ms); > > + } > > +} > > + > > +void *postcopy_outgoing_begin(MigrationState *ms) > > +{ > > + PostcopyOutgoingState *s = g_new(PostcopyOutgoingState, 1); > > + DPRINTF("outgoing begin\n"); > > + qemu_fflush(ms->file); > > + > > + s->ms = ms; > > + s->state = PO_STATE_ACTIVE; > > + s->fd_read = ms->fd_read; > > + s->mig_read = ms->file_read; > > + s->mig_buffered_write = ms->file; > > + s->block = NULL; > > + s->addr = 0; > > + > > + /* Make sure all dirty bits are set */ > > + ram_save_memory_set_dirty(); > > + > > + qemu_set_fd_handler(s->fd_read, > > + &postcopy_outgoing_recv_handler, NULL, s); > > + return s; > > +} > > + > > +static void postcopy_outgoing_ram_all_sent(QEMUFile *f, > > + PostcopyOutgoingState *s) > > +{ > > + assert(s->state == PO_STATE_ACTIVE); > > + > > + s->state = PO_STATE_ALL_PAGES_SENT; > > + /* tell incoming side that all pages are sent */ > > + qemu_put_be64(f, RAM_SAVE_FLAG_EOS); > > + qemu_fflush(f); > > + qemu_buffered_file_drain(f); > > + DPRINTF("sent RAM_SAVE_FLAG_EOS\n"); > > + migrate_fd_cleanup(s->ms); > > + > > + /* Later migrate_fd_complete() will be called which calls > > + * migrate_fd_cleanup() again. So dummy file is created > > + * for qemu monitor to keep working. > > + */ > > + s->ms->file = qemu_fopen_ops(NULL, NULL, NULL, NULL, NULL, > > + NULL, NULL); > > +} > > + > > +static int postcopy_outgoing_check_all_ram_sent(PostcopyOutgoingState *s, > > + RAMBlock *block, > > + ram_addr_t addr) > > +{ > > + if (block == NULL) { > > + block = QLIST_FIRST(&ram_list.blocks); > > + addr = block->offset; > > + } > > + > > + for (; block != NULL; > > + s->block = QLIST_NEXT(s->block, next), addr = block->offset) { > > + for (; addr < block->offset + block->length; > > + addr += TARGET_PAGE_SIZE) { > > + if (cpu_physical_memory_get_dirty(addr, MIGRATION_DIRTY_FLAG)) { > > + s->block = block; > > + s->addr = addr; > > + return 0; > > + } > > + } > > + } > > + > > + return 1; > > +} > > + > > +int postcopy_outgoing_ram_save_background(Monitor *mon, QEMUFile *f, > > + void *postcopy) > > +{ > > + PostcopyOutgoingState *s = postcopy; > > + > > + assert(s->state == PO_STATE_ACTIVE || > > + s->state == PO_STATE_EOC_RECEIVED || > > + s->state == PO_STATE_ERROR_RECEIVE); > > + > > + switch (s->state) { > > + case PO_STATE_ACTIVE: > > + /* nothing. processed below */ > > + break; > > + case PO_STATE_EOC_RECEIVED: > > + qemu_put_be64(f, RAM_SAVE_FLAG_EOS); > > + s->state = PO_STATE_COMPLETED; > > + postcopy_outgoing_completed(s); > > + DPRINTF("PO_STATE_COMPLETED\n"); > > + return 1; > > + case PO_STATE_ERROR_RECEIVE: > > + postcopy_outgoing_completed(s); > > + DPRINTF("PO_STATE_ERROR_RECEIVE\n"); > > + return -1; > > + default: > > + abort(); > > + } > > + > > + if (s->ms->params.nobg) { > > + /* See if all pages are sent. */ > > + if (postcopy_outgoing_check_all_ram_sent(s, s->block, s->addr) == 0) { > > + return 0; > > + } > > + /* ram_list can be reordered. (it doesn't seem so during migration, > > + though) So the whole list needs to be checked again */ > > + if (postcopy_outgoing_check_all_ram_sent(s, NULL, 0) == 0) { > > + return 0; > > + } > > + > > + postcopy_outgoing_ram_all_sent(f, s); > > + return 0; > > + } > > + > > + DPRINTF("outgoing background state: %d\n", s->state); > > + > > + while (qemu_file_rate_limit(f) == 0) { > > + if (ram_save_block(f) == 0) { /* no more blocks */ > > + assert(s->state == PO_STATE_ACTIVE); > > + postcopy_outgoing_ram_all_sent(f, s); > > + return 0; > > + } > > + } > > + > > + return 0; > > +} > > + > > +/*************************************************************************** > > + * incoming part > > + */ > > + > > +/* flags for incoming mode to modify the behavior. > > + This is for benchmark/debug purpose */ > > +#define INCOMING_FLAGS_FAULT_REQUEST 0x01 > > + > > + > > +static void postcopy_incoming_umemd(void); > > + > > +#define PIS_STATE_QUIT_RECEIVED 0x01 > > +#define PIS_STATE_QUIT_QUEUED 0x02 > > +#define PIS_STATE_QUIT_SENT 0x04 > > + > > +#define PIS_STATE_QUIT_MASK (PIS_STATE_QUIT_RECEIVED | \ > > + PIS_STATE_QUIT_QUEUED | \ > > + PIS_STATE_QUIT_SENT) > > + > > +struct PostcopyIncomingState { > > + /* dest qemu state */ > > + uint32_t state; > > + > > + UMemDev *dev; > > + int host_page_size; > > + int host_page_shift; > > + > > + /* qemu side */ > > + int to_umemd_fd; > > + QEMUFileNonblock *to_umemd; > > +#define MAX_FAULTED_PAGES 256 > > + struct umem_pages *faulted_pages; > > + > > + int from_umemd_fd; > > + QEMUFile *from_umemd; > > + int version_id; /* save/load format version id */ > > +}; > > +typedef struct PostcopyIncomingState PostcopyIncomingState; > > + > > + > > +#define UMEM_STATE_EOS_RECEIVED 0x01 /* umem daemon <-> src qemu */ > > +#define UMEM_STATE_EOC_SENT 0x02 /* umem daemon <-> src qemu */ > > +#define UMEM_STATE_QUIT_RECEIVED 0x04 /* umem daemon <-> dst qemu */ > > +#define UMEM_STATE_QUIT_QUEUED 0x08 /* umem daemon <-> dst qemu */ > > +#define UMEM_STATE_QUIT_SENT 0x10 /* umem daemon <-> dst qemu */ > > + > > +#define UMEM_STATE_QUIT_MASK (UMEM_STATE_QUIT_QUEUED | \ > > + UMEM_STATE_QUIT_SENT | \ > > + UMEM_STATE_QUIT_RECEIVED) > > +#define UMEM_STATE_END_MASK (UMEM_STATE_EOS_RECEIVED | \ > > + UMEM_STATE_EOC_SENT | \ > > + UMEM_STATE_QUIT_MASK) > > + > > +struct PostcopyIncomingUMemDaemon { > > + /* umem daemon side */ > > + uint32_t state; > > + > > + int host_page_size; > > + int host_page_shift; > > + int nr_host_pages_per_target_page; > > + int host_to_target_page_shift; > > + int nr_target_pages_per_host_page; > > + int target_to_host_page_shift; > > + int version_id; /* save/load format version id */ > > + > > + int to_qemu_fd; > > + QEMUFileNonblock *to_qemu; > > + int from_qemu_fd; > > + QEMUFile *from_qemu; > > + > > + int mig_read_fd; > > + QEMUFile *mig_read; /* qemu on source -> umem daemon */ > > + > > + int mig_write_fd; > > + QEMUFileNonblock *mig_write; /* umem daemon -> qemu on source */ > > + > > + /* = KVM_MAX_VCPUS * (ASYNC_PF_PER_VCPUS + 1) */ > > +#define MAX_REQUESTS (512 * (64 + 1)) > > + > > + struct umem_page_request page_request; > > + struct umem_page_cached page_cached; > > + > > +#define MAX_PRESENT_REQUESTS MAX_FAULTED_PAGES > > + struct umem_pages *present_request; > > + > > + uint64_t *target_pgoffs; > > + > > + /* bitmap indexed by target page offset */ > > + unsigned long *phys_requested; > > + > > + /* bitmap indexed by target page offset */ > > + unsigned long *phys_received; > > + > > + RAMBlock *last_block_read; /* qemu on source -> umem daemon */ > > + RAMBlock *last_block_write; /* umem daemon -> qemu on source */ > > +}; > > +typedef struct PostcopyIncomingUMemDaemon PostcopyIncomingUMemDaemon; > > + > > +static PostcopyIncomingState state = { > > + .state = 0, > > + .dev = NULL, > > + .to_umemd_fd = -1, > > + .to_umemd = NULL, > > + .from_umemd_fd = -1, > > + .from_umemd = NULL, > > +}; > > + > > +static PostcopyIncomingUMemDaemon umemd = { > > + .state = 0, > > + .to_qemu_fd = -1, > > + .to_qemu = NULL, > > + .from_qemu_fd = -1, > > + .from_qemu = NULL, > > + .mig_read_fd = -1, > > + .mig_read = NULL, > > + .mig_write_fd = -1, > > + .mig_write = NULL, > > +}; > > + > > +int postcopy_incoming_init(const char *incoming, bool incoming_postcopy) > > +{ > > + /* incoming_postcopy makes sense only when incoming migration mode */ > > + if (!incoming && incoming_postcopy) { > > + return -EINVAL; > > + } > > + > > + if (!incoming_postcopy) { > > + return 0; > > + } > > + > > + state.state = 0; > > + state.dev = umem_dev_new(); > > + state.host_page_size = getpagesize(); > > + state.host_page_shift = ffs(state.host_page_size) - 1; > > + state.version_id = RAM_SAVE_VERSION_ID; /* = save version of > > + ram_save_live() */ > > + return 0; > > +} > > + > > +void postcopy_incoming_ram_alloc(const char *name, > > + size_t size, uint8_t **hostp, UMem **umemp) > > +{ > > + UMem *umem; > > + size = ALIGN_UP(size, state.host_page_size); > > + umem = umem_dev_create(state.dev, size, name); > > + > > + *umemp = umem; > > + *hostp = umem->umem; > > +} > > + > > +void postcopy_incoming_ram_free(UMem *umem) > > +{ > > + umem_unmap(umem); > > + umem_close(umem); > > + umem_destroy(umem); > > +} > > + > > +void postcopy_incoming_prepare(void) > > +{ > > + RAMBlock *block; > > + > > + QLIST_FOREACH(block, &ram_list.blocks, next) { > > + if (block->umem != NULL) { > > + umem_mmap(block->umem); > > + } > > + } > > +} > > + > > +static int postcopy_incoming_ram_load_get64(QEMUFile *f, > > + ram_addr_t *addr, int *flags) > > +{ > > + *addr = qemu_get_be64(f); > > + *flags = *addr & ~TARGET_PAGE_MASK; > > + *addr &= TARGET_PAGE_MASK; > > + return qemu_file_get_error(f); > > +} > > + > > +int postcopy_incoming_ram_load(QEMUFile *f, void *opaque, int version_id) > > +{ > > + ram_addr_t addr; > > + int flags; > > + int error; > > + > > + DPRINTF("incoming ram load\n"); > > + /* > > + * RAM_SAVE_FLAGS_EOS or > > + * RAM_SAVE_FLAGS_MEM_SIZE + mem size + RAM_SAVE_FLAGS_EOS > > + * see postcopy_outgoing_ram_save_live() > > + */ > > + > > + if (version_id != RAM_SAVE_VERSION_ID) { > > + DPRINTF("RAM_SAVE_VERSION_ID %d != %d\n", > > + version_id, RAM_SAVE_VERSION_ID); > > + return -EINVAL; > > + } > > + error = postcopy_incoming_ram_load_get64(f, &addr, &flags); > > + DPRINTF("addr 0x%lx flags 0x%x\n", addr, flags); > > + if (error) { > > + DPRINTF("error %d\n", error); > > + return error; > > + } > > + if (flags == RAM_SAVE_FLAG_EOS && addr == 0) { > > + DPRINTF("EOS\n"); > > + return 0; > > + } > > + > > + if (flags != RAM_SAVE_FLAG_MEM_SIZE) { > > + DPRINTF("-EINVAL flags 0x%x\n", flags); > > + return -EINVAL; > > + } > > + error = ram_load_mem_size(f, addr); > > + if (error) { > > + DPRINTF("addr 0x%lx error %d\n", addr, error); > > + return error; > > + } > > + > > + error = postcopy_incoming_ram_load_get64(f, &addr, &flags); > > + if (error) { > > + DPRINTF("addr 0x%lx flags 0x%x error %d\n", addr, flags, error); > > + return error; > > + } > > + if (flags == RAM_SAVE_FLAG_EOS && addr == 0) { > > + DPRINTF("done\n"); > > + return 0; > > + } > > + DPRINTF("-EINVAL\n"); > > + return -EINVAL; > > +} > > + > > +void postcopy_incoming_fork_umemd(int mig_read_fd, QEMUFile *mig_read) > > +{ > > + int fds[2]; > > + RAMBlock *block; > > + > > + DPRINTF("fork\n"); > > + > > + /* socketpair(AF_UNIX)? */ > > + > > + if (qemu_pipe(fds) == -1) { > > + perror("qemu_pipe"); > > + abort(); > > + } > > + state.from_umemd_fd = fds[0]; > > + umemd.to_qemu_fd = fds[1]; > > + > > + if (qemu_pipe(fds) == -1) { > > + perror("qemu_pipe"); > > + abort(); > > + } > > + umemd.from_qemu_fd = fds[0]; > > + state.to_umemd_fd = fds[1]; > > + > > + pid_t child = fork(); > > + if (child < 0) { > > + perror("fork"); > > + abort(); > > + } > > + > > + if (child == 0) { > > + int mig_write_fd; > > + > > + fd_close(&state.to_umemd_fd); > > + fd_close(&state.from_umemd_fd); > > + umemd.host_page_size = state.host_page_size; > > + umemd.host_page_shift = state.host_page_shift; > > + > > + umemd.nr_host_pages_per_target_page = > > + TARGET_PAGE_SIZE / umemd.host_page_size; > > + umemd.nr_target_pages_per_host_page = > > + umemd.host_page_size / TARGET_PAGE_SIZE; > > + > > + umemd.target_to_host_page_shift = > > + ffs(umemd.nr_host_pages_per_target_page) - 1; > > + umemd.host_to_target_page_shift = > > + ffs(umemd.nr_target_pages_per_host_page) - 1; > > + > > + umemd.state = 0; > > + umemd.version_id = state.version_id; > > + umemd.mig_read_fd = mig_read_fd; > > + umemd.mig_read = mig_read; > > + > > + mig_write_fd = dup(mig_read_fd); > > + if (mig_write_fd < 0) { > > + perror("could not dup for writable socket \n"); > > + abort(); > > + } > > + umemd.mig_write_fd = mig_write_fd; > > + umemd.mig_write = qemu_fopen_nonblock(mig_write_fd); > > + > > + postcopy_incoming_umemd(); /* noreturn */ > > + } > > + > > + DPRINTF("qemu pid: %d daemon pid: %d\n", getpid(), child); > > + fd_close(&umemd.to_qemu_fd); > > + fd_close(&umemd.from_qemu_fd); > > + state.faulted_pages = g_malloc(umem_pages_size(MAX_FAULTED_PAGES)); > > + state.faulted_pages->nr = 0; > > + > > + /* close all UMem.shmem_fd */ > > + QLIST_FOREACH(block, &ram_list.blocks, next) { > > + umem_close_shmem(block->umem); > > + } > > + umem_qemu_wait_for_daemon(state.from_umemd_fd); > > +} > > + > > +static void postcopy_incoming_qemu_recv_quit(void) > > +{ > > + RAMBlock *block; > > + if (state.state & PIS_STATE_QUIT_RECEIVED) { > > + return; > > + } > > + > > + QLIST_FOREACH(block, &ram_list.blocks, next) { > > + if (block->umem != NULL) { > > + umem_destroy(block->umem); > > + block->umem = NULL; > > + block->flags &= ~RAM_POSTCOPY_UMEM_MASK; > > + } > > + } > > + > > + DPRINTF("|= PIS_STATE_QUIT_RECEIVED\n"); > > + state.state |= PIS_STATE_QUIT_RECEIVED; > > + qemu_set_fd_handler(state.from_umemd_fd, NULL, NULL, NULL); > > + qemu_fclose(state.from_umemd); > > + state.from_umemd = NULL; > > + fd_close(&state.from_umemd_fd); > > +} > > + > > +static void postcopy_incoming_qemu_fflush_to_umemd_handler(void *opaque) > > +{ > > + assert(state.to_umemd != NULL); > > + > > + nonblock_fflush(state.to_umemd); > > + if (nonblock_pending_size(state.to_umemd) > 0) { > > + return; > > + } > > + > > + qemu_set_fd_handler(state.to_umemd->fd, NULL, NULL, NULL); > > + if (state.state & PIS_STATE_QUIT_QUEUED) { > > + DPRINTF("|= PIS_STATE_QUIT_SENT\n"); > > + state.state |= PIS_STATE_QUIT_SENT; > > + qemu_fclose(state.to_umemd->file); > > + state.to_umemd = NULL; > > + fd_close(&state.to_umemd_fd); > > + g_free(state.faulted_pages); > > + state.faulted_pages = NULL; > > + } > > +} > > + > > +static void postcopy_incoming_qemu_fflush_to_umemd(void) > > +{ > > + qemu_set_fd_handler(state.to_umemd->fd, NULL, > > + postcopy_incoming_qemu_fflush_to_umemd_handler, NULL); > > + postcopy_incoming_qemu_fflush_to_umemd_handler(NULL); > > +} > > + > > +static void postcopy_incoming_qemu_queue_quit(void) > > +{ > > + if (state.state & PIS_STATE_QUIT_QUEUED) { > > + return; > > + } > > + > > + DPRINTF("|= PIS_STATE_QUIT_QUEUED\n"); > > + umem_qemu_quit(state.to_umemd->file); > > + state.state |= PIS_STATE_QUIT_QUEUED; > > +} > > + > > +static void postcopy_incoming_qemu_send_pages_present(void) > > +{ > > + if (state.faulted_pages->nr > 0) { > > + umem_qemu_send_pages_present(state.to_umemd->file, > > + state.faulted_pages); > > + state.faulted_pages->nr = 0; > > + } > > +} > > + > > +static void postcopy_incoming_qemu_faulted_pages( > > + const struct umem_pages *pages) > > +{ > > + assert(pages->nr <= MAX_FAULTED_PAGES); > > + assert(state.faulted_pages != NULL); > > + > > + if (state.faulted_pages->nr + pages->nr > MAX_FAULTED_PAGES) { > > + postcopy_incoming_qemu_send_pages_present(); > > + } > > + memcpy(&state.faulted_pages->pgoffs[state.faulted_pages->nr], > > + &pages->pgoffs[0], sizeof(pages->pgoffs[0]) * pages->nr); > > + state.faulted_pages->nr += pages->nr; > > +} > > + > > +static void postcopy_incoming_qemu_cleanup_umem(void); > > + > > +static int postcopy_incoming_qemu_handle_req_one(void) > > +{ > > + int offset = 0; > > + int ret; > > + uint8_t cmd; > > + > > + ret = qemu_peek_buffer(state.from_umemd, &cmd, sizeof(cmd), offset); > > + offset += sizeof(cmd); > > + if (ret != sizeof(cmd)) { > > + return -EAGAIN; > > + } > > + DPRINTF("cmd %c\n", cmd); > > + > > + switch (cmd) { > > + case UMEM_DAEMON_QUIT: > > + postcopy_incoming_qemu_recv_quit(); > > + postcopy_incoming_qemu_queue_quit(); > > + postcopy_incoming_qemu_cleanup_umem(); > > + break; > > + case UMEM_DAEMON_TRIGGER_PAGE_FAULT: { > > + struct umem_pages *pages = > > + umem_qemu_trigger_page_fault(state.from_umemd, &offset); > > + if (pages == NULL) { > > + return -EAGAIN; > > + } > > + if (state.to_umemd_fd >= 0 && !(state.state & PIS_STATE_QUIT_QUEUED)) { > > + postcopy_incoming_qemu_faulted_pages(pages); > > + g_free(pages); > > + } > > + break; > > + } > > + case UMEM_DAEMON_ERROR: > > + /* umem daemon hit troubles, so it warned us to stop vm execution */ > > + vm_stop(RUN_STATE_IO_ERROR); /* or RUN_STATE_INTERNAL_ERROR */ > > + break; > > + default: > > + abort(); > > + break; > > + } > > + > > + if (state.from_umemd != NULL) { > > + qemu_file_skip(state.from_umemd, offset); > > + } > > + return 0; > > +} > > + > > +static void postcopy_incoming_qemu_handle_req(void *opaque) > > +{ > > + do { > > + int ret = postcopy_incoming_qemu_handle_req_one(); > > + if (ret == -EAGAIN) { > > + break; > > + } > > + } while (state.from_umemd != NULL && > > + qemu_pending_size(state.from_umemd) > 0); > > + > > + if (state.to_umemd != NULL) { > > + if (state.faulted_pages->nr > 0) { > > + postcopy_incoming_qemu_send_pages_present(); > > + } > > + postcopy_incoming_qemu_fflush_to_umemd(); > > + } > > +} > > + > > +void postcopy_incoming_qemu_ready(void) > > +{ > > + umem_qemu_ready(state.to_umemd_fd); > > + > > + state.from_umemd = qemu_fopen_pipe(state.from_umemd_fd); > > + state.to_umemd = qemu_fopen_nonblock(state.to_umemd_fd); > > + qemu_set_fd_handler(state.from_umemd_fd, > > + postcopy_incoming_qemu_handle_req, NULL, NULL); > > +} > > + > > +static void postcopy_incoming_qemu_cleanup_umem(void) > > +{ > > + /* when qemu will quit before completing postcopy, tell umem daemon > > + to tear down umem device and exit. */ > > + if (state.to_umemd_fd >= 0) { > > + postcopy_incoming_qemu_queue_quit(); > > + postcopy_incoming_qemu_fflush_to_umemd(); > > + } > > + > > + if (state.dev) { > > + umem_dev_destroy(state.dev); > > + state.dev = NULL; > > + } > > +} > > + > > +void postcopy_incoming_qemu_cleanup(void) > > +{ > > + postcopy_incoming_qemu_cleanup_umem(); > > + if (state.to_umemd != NULL) { > > + nonblock_wait_for_flush(state.to_umemd); > > + } > > +} > > + > > +void postcopy_incoming_qemu_pages_unmapped(ram_addr_t addr, ram_addr_t size) > > +{ > > + uint64_t nr = DIV_ROUND_UP(size, state.host_page_size); > > + size_t len = umem_pages_size(nr); > > + ram_addr_t end = addr + size; > > + struct umem_pages *pages; > > + int i; > > + > > + if (state.to_umemd_fd < 0 || state.state & PIS_STATE_QUIT_QUEUED) { > > + return; > > + } > > + pages = g_malloc(len); > > + pages->nr = nr; > > + for (i = 0; addr < end; addr += state.host_page_size, i++) { > > + pages->pgoffs[i] = addr >> state.host_page_shift; > > + } > > + umem_qemu_send_pages_unmapped(state.to_umemd->file, pages); > > + g_free(pages); > > + assert(state.to_umemd != NULL); > > + postcopy_incoming_qemu_fflush_to_umemd(); > > +} > > + > > +/************************************************************************** > > + * incoming umem daemon > > + */ > > + > > +static void postcopy_incoming_umem_recv_quit(void) > > +{ > > + if (umemd.state & UMEM_STATE_QUIT_RECEIVED) { > > + return; > > + } > > + DPRINTF("|= UMEM_STATE_QUIT_RECEIVED\n"); > > + umemd.state |= UMEM_STATE_QUIT_RECEIVED; > > + qemu_fclose(umemd.from_qemu); > > + umemd.from_qemu = NULL; > > + fd_close(&umemd.from_qemu_fd); > > +} > > + > > +static void postcopy_incoming_umem_queue_quit(void) > > +{ > > + if (umemd.state & UMEM_STATE_QUIT_QUEUED) { > > + return; > > + } > > + DPRINTF("|= UMEM_STATE_QUIT_QUEUED\n"); > > + umem_daemon_quit(umemd.to_qemu->file); > > + umemd.state |= UMEM_STATE_QUIT_QUEUED; > > +} > > + > > +static void postcopy_incoming_umem_send_eoc_req(void) > > +{ > > + struct qemu_umem_req req; > > + > > + if (umemd.state & UMEM_STATE_EOC_SENT) { > > + return; > > + } > > + > > + DPRINTF("|= UMEM_STATE_EOC_SENT\n"); > > + req.cmd = QEMU_UMEM_REQ_EOC; > > + postcopy_incoming_send_req(umemd.mig_write->file, &req); > > + umemd.state |= UMEM_STATE_EOC_SENT; > > + qemu_fclose(umemd.mig_write->file); > > + umemd.mig_write = NULL; > > + fd_close(&umemd.mig_write_fd); > > +} > > + > > +static void postcopy_incoming_umem_send_page_req(RAMBlock *block) > > +{ > > + struct qemu_umem_req req; > > + int bit; > > + uint64_t target_pgoff; > > + int i; > > + > > + umemd.page_request.nr = MAX_REQUESTS; > > + umem_get_page_request(block->umem, &umemd.page_request); > > + DPRINTF("id %s nr %d offs 0x%"PRIx64" 0x%"PRIx64"\n", > > + block->idstr, umemd.page_request.nr, > > + (uint64_t)umemd.page_request.pgoffs[0], > > + (uint64_t)umemd.page_request.pgoffs[1]); > > + > > + if (umemd.last_block_write != block) { > > + req.cmd = QEMU_UMEM_REQ_ON_DEMAND; > > + req.idstr = block->idstr; > > + } else { > > + req.cmd = QEMU_UMEM_REQ_ON_DEMAND_CONT; > > + } > > + > > + req.nr = 0; > > + req.pgoffs = umemd.target_pgoffs; > > + if (TARGET_PAGE_SIZE >= umemd.host_page_size) { > > + for (i = 0; i < umemd.page_request.nr; i++) { > > + target_pgoff = > > + umemd.page_request.pgoffs[i] >> umemd.host_to_target_page_shift; > > + bit = (block->offset >> TARGET_PAGE_BITS) + target_pgoff; > > + > > + if (!test_and_set_bit(bit, umemd.phys_requested)) { > > + req.pgoffs[req.nr] = target_pgoff; > > + req.nr++; > > + } > > + } > > + } else { > > + for (i = 0; i < umemd.page_request.nr; i++) { > > + int j; > > + target_pgoff = > > + umemd.page_request.pgoffs[i] << umemd.host_to_target_page_shift; > > + bit = (block->offset >> TARGET_PAGE_BITS) + target_pgoff; > > + > > + for (j = 0; j < umemd.nr_target_pages_per_host_page; j++) { > > + if (!test_and_set_bit(bit + j, umemd.phys_requested)) { > > + req.pgoffs[req.nr] = target_pgoff + j; > > + req.nr++; > > + } > > + } > > + } > > + } > > + > > + DPRINTF("id %s nr %d offs 0x%"PRIx64" 0x%"PRIx64"\n", > > + block->idstr, req.nr, req.pgoffs[0], req.pgoffs[1]); > > + if (req.nr > 0 && umemd.mig_write != NULL) { > > + postcopy_incoming_send_req(umemd.mig_write->file, &req); > > + umemd.last_block_write = block; > > + } > > +} > > + > > +static void postcopy_incoming_umem_send_pages_present(void) > > +{ > > + if (umemd.present_request->nr > 0) { > > + umem_daemon_send_pages_present(umemd.to_qemu->file, > > + umemd.present_request); > > + umemd.present_request->nr = 0; > > + } > > +} > > + > > +static void postcopy_incoming_umem_pages_present_one( > > + uint32_t nr, const __u64 *pgoffs, uint64_t ramblock_pgoffset) > > +{ > > + uint32_t i; > > + assert(nr <= MAX_PRESENT_REQUESTS); > > + > > + if (umemd.present_request->nr + nr > MAX_PRESENT_REQUESTS) { > > + postcopy_incoming_umem_send_pages_present(); > > + } > > + > > + for (i = 0; i < nr; i++) { > > + umemd.present_request->pgoffs[umemd.present_request->nr + i] = > > + pgoffs[i] + ramblock_pgoffset; > > + } > > + umemd.present_request->nr += nr; > > +} > > + > > +static void postcopy_incoming_umem_pages_present( > > + const struct umem_page_cached *page_cached, uint64_t ramblock_pgoffset) > > +{ > > + uint32_t left = page_cached->nr; > > + uint32_t offset = 0; > > + > > + while (left > 0) { > > + uint32_t nr = MIN(left, MAX_PRESENT_REQUESTS); > > + postcopy_incoming_umem_pages_present_one( > > + nr, &page_cached->pgoffs[offset], ramblock_pgoffset); > > + > > + left -= nr; > > + offset += nr; > > + } > > +} > > + > > +static int postcopy_incoming_umem_ram_load(void) > > +{ > > + ram_addr_t offset; > > + int flags; > > + int error; > > + void *shmem; > > + int i; > > + int bit; > > + > > + if (umemd.version_id != RAM_SAVE_VERSION_ID) { > > + return -EINVAL; > > + } > > + > > + offset = qemu_get_be64(umemd.mig_read); > > + > > + flags = offset & ~TARGET_PAGE_MASK; > > + offset &= TARGET_PAGE_MASK; > > + > > + assert(!(flags & RAM_SAVE_FLAG_MEM_SIZE)); > > + > > + if (flags & RAM_SAVE_FLAG_EOS) { > > + DPRINTF("RAM_SAVE_FLAG_EOS\n"); > > + postcopy_incoming_umem_send_eoc_req(); > > + > > + qemu_fclose(umemd.mig_read); > > + umemd.mig_read = NULL; > > + fd_close(&umemd.mig_read_fd); > > + umemd.state |= UMEM_STATE_EOS_RECEIVED; > > + > > + postcopy_incoming_umem_queue_quit(); > > + DPRINTF("|= UMEM_STATE_EOS_RECEIVED\n"); > > + return 0; > > + } > > + > > + shmem = ram_load_host_from_stream_offset(umemd.mig_read, offset, flags, > > + &umemd.last_block_read); > > + if (!shmem) { > > + DPRINTF("shmem == NULL\n"); > > + return -EINVAL; > > + } > > + > > + if (flags & RAM_SAVE_FLAG_COMPRESS) { > > + uint8_t ch = qemu_get_byte(umemd.mig_read); > > + memset(shmem, ch, TARGET_PAGE_SIZE); > > + } else if (flags & RAM_SAVE_FLAG_PAGE) { > > + qemu_get_buffer(umemd.mig_read, shmem, TARGET_PAGE_SIZE); > > + } > > + > > + error = qemu_file_get_error(umemd.mig_read); > > + if (error) { > > + DPRINTF("error %d\n", error); > > + return error; > > + } > > + > > + umemd.page_cached.nr = 0; > > + bit = (umemd.last_block_read->offset + offset) >> TARGET_PAGE_BITS; > > + if (!test_and_set_bit(bit, umemd.phys_received)) { > > + if (TARGET_PAGE_SIZE >= umemd.host_page_size) { > > + __u64 pgoff = offset >> umemd.host_page_shift; > > + for (i = 0; i < umemd.nr_host_pages_per_target_page; i++) { > > + umemd.page_cached.pgoffs[umemd.page_cached.nr] = pgoff + i; > > + umemd.page_cached.nr++; > > + } > > + } else { > > + bool mark_cache = true; > > + for (i = 0; i < umemd.nr_target_pages_per_host_page; i++) { > > + if (!test_bit(bit + i, umemd.phys_received)) { > > + mark_cache = false; > > + break; > > + } > > + } > > + if (mark_cache) { > > + umemd.page_cached.pgoffs[0] = offset >> umemd.host_page_shift; > > + umemd.page_cached.nr = 1; > > + } > > + } > > + } > > + > > + if (umemd.page_cached.nr > 0) { > > + umem_mark_page_cached(umemd.last_block_read->umem, &umemd.page_cached); > > + > > + if (!(umemd.state & UMEM_STATE_QUIT_QUEUED) && umemd.to_qemu_fd >=0 && > > + (incoming_postcopy_flags & INCOMING_FLAGS_FAULT_REQUEST)) { > > + uint64_t ramblock_pgoffset; > > + > > + ramblock_pgoffset = > > + umemd.last_block_read->offset >> umemd.host_page_shift; > > + postcopy_incoming_umem_pages_present(&umemd.page_cached, > > + ramblock_pgoffset); > > + } > > + } > > + > > + return 0; > > +} > > + > > +static bool postcopy_incoming_umem_check_umem_done(void) > > +{ > > + bool all_done = true; > > + RAMBlock *block; > > + > > + QLIST_FOREACH(block, &ram_list.blocks, next) { > > + UMem *umem = block->umem; > > + if (umem != NULL && umem->nsets == umem->nbits) { > > + umem_unmap_shmem(umem); > > + umem_destroy(umem); > > + block->umem = NULL; > > + } > > + if (block->umem != NULL) { > > + all_done = false; > > + } > > + } > > + return all_done; > > +} > > + > > +static bool postcopy_incoming_umem_page_faulted(const struct umem_pages *pages) > > +{ > > + int i; > > + > > + for (i = 0; i < pages->nr; i++) { > > + ram_addr_t addr = pages->pgoffs[i] << umemd.host_page_shift; > > + RAMBlock *block = qemu_get_ram_block(addr); > > + addr -= block->offset; > > + umem_remove_shmem(block->umem, addr, umemd.host_page_size); > > + } > > + return postcopy_incoming_umem_check_umem_done(); > > +} > > + > > +static bool > > +postcopy_incoming_umem_page_unmapped(const struct umem_pages *pages) > > +{ > > + RAMBlock *block; > > + ram_addr_t addr; > > + int i; > > + > > + struct qemu_umem_req req = { > > + .cmd = QEMU_UMEM_REQ_REMOVE, > > + .nr = 0, > > + .pgoffs = (uint64_t*)pages->pgoffs, > > + }; > > + > > + addr = pages->pgoffs[0] << umemd.host_page_shift; > > + block = qemu_get_ram_block(addr); > > + > > + for (i = 0; i < pages->nr; i++) { > > + int pgoff; > > + > > + addr = pages->pgoffs[i] << umemd.host_page_shift; > > + pgoff = addr >> TARGET_PAGE_BITS; > > + if (!test_bit(pgoff, umemd.phys_received) && > > + !test_bit(pgoff, umemd.phys_requested)) { > > + req.pgoffs[req.nr] = pgoff; > > + req.nr++; > > + } > > + set_bit(pgoff, umemd.phys_received); > > + set_bit(pgoff, umemd.phys_requested); > > + > > + umem_remove_shmem(block->umem, > > + addr - block->offset, umemd.host_page_size); > > + } > > + if (req.nr > 0 && umemd.mig_write != NULL) { > > + req.idstr = block->idstr; > > + postcopy_incoming_send_req(umemd.mig_write->file, &req); > > + } > > + > > + return postcopy_incoming_umem_check_umem_done(); > > +} > > + > > +static void postcopy_incoming_umem_done(void) > > +{ > > + postcopy_incoming_umem_send_eoc_req(); > > + postcopy_incoming_umem_queue_quit(); > > +} > > + > > +static int postcopy_incoming_umem_handle_qemu(void) > > +{ > > + int ret; > > + int offset = 0; > > + uint8_t cmd; > > + > > + ret = qemu_peek_buffer(umemd.from_qemu, &cmd, sizeof(cmd), offset); > > + offset += sizeof(cmd); > > + if (ret != sizeof(cmd)) { > > + return -EAGAIN; > > + } > > + DPRINTF("cmd %c\n", cmd); > > + switch (cmd) { > > + case UMEM_QEMU_QUIT: > > + postcopy_incoming_umem_recv_quit(); > > + postcopy_incoming_umem_done(); > > + break; > > + case UMEM_QEMU_PAGE_FAULTED: { > > + struct umem_pages *pages = umem_recv_pages(umemd.from_qemu, > > + &offset); > > + if (pages == NULL) { > > + return -EAGAIN; > > + } > > + if (postcopy_incoming_umem_page_faulted(pages)){ > > + postcopy_incoming_umem_done(); > > + } > > + g_free(pages); > > + break; > > + } > > + case UMEM_QEMU_PAGE_UNMAPPED: { > > + struct umem_pages *pages = umem_recv_pages(umemd.from_qemu, > > + &offset); > > + if (pages == NULL) { > > + return -EAGAIN; > > + } > > + if (postcopy_incoming_umem_page_unmapped(pages)){ > > + postcopy_incoming_umem_done(); > > + } > > + g_free(pages); > > + break; > > + } > > + default: > > + abort(); > > + break; > > + } > > + if (umemd.from_qemu != NULL) { > > + qemu_file_skip(umemd.from_qemu, offset); > > + } > > + return 0; > > +} > > + > > +static void set_fd(int fd, fd_set *fds, int *nfds) > > +{ > > + FD_SET(fd, fds); > > + if (fd > *nfds) { > > + *nfds = fd; > > + } > > +} > > + > > +static int postcopy_incoming_umemd_main_loop(void) > > +{ > > + fd_set writefds; > > + fd_set readfds; > > + int nfds; > > + RAMBlock *block; > > + int ret; > > + > > + int pending_size; > > + bool get_page_request; > > + > > + nfds = -1; > > + FD_ZERO(&writefds); > > + FD_ZERO(&readfds); > > + > > + if (umemd.mig_write != NULL) { > > + pending_size = nonblock_pending_size(umemd.mig_write); > > + if (pending_size > 0) { > > + set_fd(umemd.mig_write_fd, &writefds, &nfds); > > + } > > + } else { > > + pending_size = 0; > > + } > > + > > +#define PENDING_SIZE_MAX (MAX_REQUESTS * sizeof(uint64_t) * 2) > > + /* If page request to the migration source is accumulated, > > + suspend getting page fault request. */ > > + get_page_request = (pending_size <= PENDING_SIZE_MAX); > > + > > + if (get_page_request) { > > + QLIST_FOREACH(block, &ram_list.blocks, next) { > > + if (block->umem != NULL) { > > + set_fd(block->umem->fd, &readfds, &nfds); > > + } > > + } > > + } > > + > > + if (umemd.mig_read_fd >= 0) { > > + set_fd(umemd.mig_read_fd, &readfds, &nfds); > > + } > > + > > + if (umemd.to_qemu != NULL && > > + nonblock_pending_size(umemd.to_qemu) > 0) { > > + set_fd(umemd.to_qemu_fd, &writefds, &nfds); > > + } > > + if (umemd.from_qemu_fd >= 0) { > > + set_fd(umemd.from_qemu_fd, &readfds, &nfds); > > + } > > + > > + ret = select(nfds + 1, &readfds, &writefds, NULL, NULL); > > + if (ret == -1) { > > + if (errno == EINTR) { > > + return 0; > > + } > > + return ret; > > + } > > + > > + if (umemd.mig_write_fd >= 0 && FD_ISSET(umemd.mig_write_fd, &writefds)) { > > + nonblock_fflush(umemd.mig_write); > > + } > > + if (umemd.to_qemu_fd >= 0 && FD_ISSET(umemd.to_qemu_fd, &writefds)) { > > + nonblock_fflush(umemd.to_qemu); > > + } > > + if (get_page_request) { > > + QLIST_FOREACH(block, &ram_list.blocks, next) { > > + if (block->umem != NULL && FD_ISSET(block->umem->fd, &readfds)) { > > + postcopy_incoming_umem_send_page_req(block); > > + } > > + } > > + } > > + if (umemd.mig_read_fd >= 0 && FD_ISSET(umemd.mig_read_fd, &readfds)) { > > + do { > > + ret = postcopy_incoming_umem_ram_load(); > > + if (ret < 0) { > > + return ret; > > + } > > + } while (umemd.mig_read != NULL && > > + qemu_pending_size(umemd.mig_read) > 0); > > + } > > + if (umemd.from_qemu_fd >= 0 && FD_ISSET(umemd.from_qemu_fd, &readfds)) { > > + do { > > + ret = postcopy_incoming_umem_handle_qemu(); > > + if (ret == -EAGAIN) { > > + break; > > + } > > + } while (umemd.from_qemu != NULL && > > + qemu_pending_size(umemd.from_qemu) > 0); > > + } > > + > > + if (umemd.mig_write != NULL) { > > + nonblock_fflush(umemd.mig_write); > > + } > > + if (umemd.to_qemu != NULL) { > > + if (!(umemd.state & UMEM_STATE_QUIT_QUEUED)) { > > + postcopy_incoming_umem_send_pages_present(); > > + } > > + nonblock_fflush(umemd.to_qemu); > > + if ((umemd.state & UMEM_STATE_QUIT_QUEUED) && > > + nonblock_pending_size(umemd.to_qemu) == 0) { > > + DPRINTF("|= UMEM_STATE_QUIT_SENT\n"); > > + qemu_fclose(umemd.to_qemu->file); > > + umemd.to_qemu = NULL; > > + fd_close(&umemd.to_qemu_fd); > > + umemd.state |= UMEM_STATE_QUIT_SENT; > > + } > > + } > > + > > + return (umemd.state & UMEM_STATE_END_MASK) == UMEM_STATE_END_MASK; > > +} > > + > > +static void postcopy_incoming_umemd(void) > > +{ > > + ram_addr_t last_ram_offset; > > + int nbits; > > + RAMBlock *block; > > + int ret; > > + > > + qemu_daemon(1, 1); > > + signal(SIGPIPE, SIG_IGN); > > + DPRINTF("daemon pid: %d\n", getpid()); > > + > > + umemd.page_request.pgoffs = g_new(__u64, MAX_REQUESTS); > > + umemd.page_cached.pgoffs = > > + g_new(__u64, MAX_REQUESTS * > > + (TARGET_PAGE_SIZE >= umemd.host_page_size ? > > + 1: umemd.nr_host_pages_per_target_page)); > > + umemd.target_pgoffs = > > + g_new(uint64_t, MAX_REQUESTS * > > + MAX(umemd.nr_host_pages_per_target_page, > > + umemd.nr_target_pages_per_host_page)); > > + umemd.present_request = g_malloc(umem_pages_size(MAX_PRESENT_REQUESTS)); > > + umemd.present_request->nr = 0; > > + > > + last_ram_offset = qemu_last_ram_offset(); > > + nbits = last_ram_offset >> TARGET_PAGE_BITS; > > + umemd.phys_requested = g_new0(unsigned long, BITS_TO_LONGS(nbits)); > > + umemd.phys_received = g_new0(unsigned long, BITS_TO_LONGS(nbits)); > > + umemd.last_block_read = NULL; > > + umemd.last_block_write = NULL; > > + > > + QLIST_FOREACH(block, &ram_list.blocks, next) { > > + UMem *umem = block->umem; > > + umem->umem = NULL; /* umem mapping area has VM_DONT_COPY flag, > > + so we lost those mappings by fork */ > > + block->host = umem_map_shmem(umem); > > + umem_close_shmem(umem); > > + } > > + umem_daemon_ready(umemd.to_qemu_fd); > > + umemd.to_qemu = qemu_fopen_nonblock(umemd.to_qemu_fd); > > + > > + /* wait for qemu to disown migration_fd */ > > + umem_daemon_wait_for_qemu(umemd.from_qemu_fd); > > + umemd.from_qemu = qemu_fopen_pipe(umemd.from_qemu_fd); > > + > > + DPRINTF("entering umemd main loop\n"); > > + for (;;) { > > + ret = postcopy_incoming_umemd_main_loop(); > > + if (ret != 0) { > > + break; > > + } > > + } > > + DPRINTF("exiting umemd main loop\n"); > > + > > + /* This daemon forked from qemu and the parent qemu is still running. > > + * Cleanups of linked libraries like SDL should not be triggered, > > + * otherwise the parent qemu may use resources which was already freed. > > + */ > > + fflush(stdout); > > + fflush(stderr); > > + _exit(ret < 0? EXIT_FAILURE: 0); > > +} > > diff --git a/migration-tcp.c b/migration-tcp.c > > index cf6a9b8..aa35050 100644 > > --- a/migration-tcp.c > > +++ b/migration-tcp.c > > @@ -63,18 +63,25 @@ static void tcp_wait_for_connect(void *opaque) > > } while (ret == -1 && (socket_error()) == EINTR); > > > > if (ret < 0) { > > - migrate_fd_error(s); > > - return; > > + goto error_out; > > } > > > > qemu_set_fd_handler2(s->fd, NULL, NULL, NULL, NULL); > > > > - if (val == 0) > > + if (val == 0) { > > + ret = postcopy_outgoing_create_read_socket(s); > > + if (ret < 0) { > > + goto error_out; > > + } > > migrate_fd_connect(s); > > - else { > > + } else { > > DPRINTF("error connecting %d\n", val); > > - migrate_fd_error(s); > > + goto error_out; > > } > > + return; > > + > > +error_out: > > + migrate_fd_error(s); > > } > > > > int tcp_start_outgoing_migration(MigrationState *s, const char *host_port) > > @@ -112,11 +119,19 @@ int tcp_start_outgoing_migration(MigrationState *s, const char *host_port) > > > > if (ret < 0) { > > DPRINTF("connect failed\n"); > > - migrate_fd_error(s); > > - return ret; > > + goto error_out; > > + } > > + > > + ret = postcopy_outgoing_create_read_socket(s); > > + if (ret < 0) { > > + goto error_out; > > } > > migrate_fd_connect(s); > > return 0; > > + > > +error_out: > > + migrate_fd_error(s); > > + return ret; > > } > > > > static void tcp_accept_incoming_migration(void *opaque) > > @@ -145,7 +160,15 @@ static void tcp_accept_incoming_migration(void *opaque) > > } > > > > process_incoming_migration(f); > > + if (incoming_postcopy) { > > + postcopy_incoming_fork_umemd(c, f); > > + } > > qemu_fclose(f); > > + if (incoming_postcopy) { > > + /* now socket is disowned. > > + So tell umem server that it's safe to use it */ > > + postcopy_incoming_qemu_ready(); > > + } > > out: > > close(c); > > out2: > > diff --git a/migration-unix.c b/migration-unix.c > > index dfcf203..3707505 100644 > > --- a/migration-unix.c > > +++ b/migration-unix.c > > @@ -69,12 +69,20 @@ static void unix_wait_for_connect(void *opaque) > > > > qemu_set_fd_handler2(s->fd, NULL, NULL, NULL, NULL); > > > > - if (val == 0) > > + if (val == 0) { > > + ret = postcopy_outgoing_create_read_socket(s); > > + if (ret < 0) { > > + goto error_out; > > + } > > migrate_fd_connect(s); > > - else { > > + } else { > > DPRINTF("error connecting %d\n", val); > > - migrate_fd_error(s); > > + goto error_out; > > } > > + return; > > + > > +error_out: > > + migrate_fd_error(s); > > } > > > > int unix_start_outgoing_migration(MigrationState *s, const char *path) > > @@ -109,11 +117,19 @@ int unix_start_outgoing_migration(MigrationState *s, const char *path) > > > > if (ret < 0) { > > DPRINTF("connect failed\n"); > > - migrate_fd_error(s); > > - return ret; > > + goto error_out; > > + } > > + > > + ret = postcopy_outgoing_create_read_socket(s); > > + if (ret < 0) { > > + goto error_out; > > } > > migrate_fd_connect(s); > > return 0; > > + > > +error_out: > > + migrate_fd_error(s); > > + return ret; > > } > > > > static void unix_accept_incoming_migration(void *opaque) > > @@ -142,7 +158,13 @@ static void unix_accept_incoming_migration(void *opaque) > > } > > > > process_incoming_migration(f); > > + if (incoming_postcopy) { > > + postcopy_incoming_fork_umemd(c, f); > > + } > > qemu_fclose(f); > > + if (incoming_postcopy) { > > + postcopy_incoming_qemu_ready(); > > + } > > out: > > close(c); > > out2: > > diff --git a/migration.c b/migration.c > > index 0149ab3..51efe44 100644 > > --- a/migration.c > > +++ b/migration.c > > @@ -39,6 +39,11 @@ enum { > > MIG_STATE_COMPLETED, > > }; > > > > +enum { > > + MIG_SUBSTATE_PRECOPY, > > + MIG_SUBSTATE_POSTCOPY, > > +}; > > + > > #define MAX_THROTTLE (32 << 20) /* Migration speed throttling */ > > > > static NotifierList migration_state_notifiers = > > @@ -255,6 +260,18 @@ static void migrate_fd_put_ready(void *opaque) > > return; > > } > > > > + if (s->substate == MIG_SUBSTATE_POSTCOPY) { > > + /* PRINTF("postcopy background\n"); */ > > + ret = postcopy_outgoing_ram_save_background(s->mon, s->file, > > + s->postcopy); > > + if (ret > 0) { > > + migrate_fd_completed(s); > > + } else if (ret < 0) { > > + migrate_fd_error(s); > > + } > > + return; > > + } > > + > > DPRINTF("iterate\n"); > > ret = qemu_savevm_state_iterate(s->mon, s->file); > > if (ret < 0) { > > @@ -265,6 +282,19 @@ static void migrate_fd_put_ready(void *opaque) > > DPRINTF("done iterating\n"); > > vm_stop_force_state(RUN_STATE_FINISH_MIGRATE); > > > > + if (s->params.postcopy) { > > + if (qemu_savevm_state_complete(s->mon, s->file) < 0) { > > + migrate_fd_error(s); > > + if (old_vm_running) { > > + vm_start(); > > + } > > + return; > > + } > > + s->substate = MIG_SUBSTATE_POSTCOPY; > > + s->postcopy = postcopy_outgoing_begin(s); > > + return; > > + } > > + > > if (qemu_savevm_state_complete(s->mon, s->file) < 0) { > > migrate_fd_error(s); > > } else { > > @@ -357,6 +387,7 @@ void migrate_fd_connect(MigrationState *s) > > int ret; > > > > s->state = MIG_STATE_ACTIVE; > > + s->substate = MIG_SUBSTATE_PRECOPY; > > s->file = qemu_fopen_ops_buffered(s, > > s->bandwidth_limit, > > migrate_fd_put_buffer, > > diff --git a/migration.h b/migration.h > > index 90ae362..2809e99 100644 > > --- a/migration.h > > +++ b/migration.h > > @@ -40,6 +40,12 @@ struct MigrationState > > int (*write)(MigrationState *s, const void *buff, size_t size); > > void *opaque; > > MigrationParams params; > > + > > + /* for postcopy */ > > + int substate; /* precopy or postcopy */ > > + int fd_read; > > + QEMUFile *file_read; /* connection from the detination */ > > + void *postcopy; > > }; > > > > void process_incoming_migration(QEMUFile *f); > > @@ -86,6 +92,7 @@ uint64_t ram_bytes_remaining(void); > > uint64_t ram_bytes_transferred(void); > > uint64_t ram_bytes_total(void); > > > > +void ram_save_set_params(const MigrationParams *params, void *opaque); > > void sort_ram_list(void); > > int ram_save_block(QEMUFile *f); > > void ram_save_memory_set_dirty(void); > > @@ -107,7 +114,30 @@ void migrate_add_blocker(Error *reason); > > */ > > void migrate_del_blocker(Error *reason); > > > > +/* For outgoing postcopy */ > > +int postcopy_outgoing_create_read_socket(MigrationState *s); > > +int postcopy_outgoing_ram_save_live(Monitor *mon, > > + QEMUFile *f, int stage, void *opaque); > > +void *postcopy_outgoing_begin(MigrationState *s); > > +int postcopy_outgoing_ram_save_background(Monitor *mon, QEMUFile *f, > > + void *postcopy); > > + > > +/* For incoming postcopy */ > > extern bool incoming_postcopy; > > extern unsigned long incoming_postcopy_flags; > > > > +int postcopy_incoming_init(const char *incoming, bool incoming_postcopy); > > +void postcopy_incoming_ram_alloc(const char *name, > > + size_t size, uint8_t **hostp, UMem **umemp); > > +void postcopy_incoming_ram_free(UMem *umem); > > +void postcopy_incoming_prepare(void); > > + > > +int postcopy_incoming_ram_load(QEMUFile *f, void *opaque, int version_id); > > +void postcopy_incoming_fork_umemd(int mig_read_fd, QEMUFile *mig_read); > > +void postcopy_incoming_qemu_ready(void); > > +void postcopy_incoming_qemu_cleanup(void); > > +#ifdef NEED_CPU_H > > +void postcopy_incoming_qemu_pages_unmapped(ram_addr_t addr, ram_addr_t size); > > +#endif > > + > > #endif > > diff --git a/qemu-common.h b/qemu-common.h > > index 725922b..d74a8c9 100644 > > --- a/qemu-common.h > > +++ b/qemu-common.h > > @@ -17,6 +17,7 @@ typedef struct DeviceState DeviceState; > > > > struct Monitor; > > typedef struct Monitor Monitor; > > +typedef struct UMem UMem; > > > > /* we put basic includes here to avoid repeating them in device drivers */ > > #include > > diff --git a/qemu-options.hx b/qemu-options.hx > > index 5c5b8f3..19e20f9 100644 > > --- a/qemu-options.hx > > +++ b/qemu-options.hx > > @@ -2510,7 +2510,10 @@ DEF("postcopy-flags", HAS_ARG, QEMU_OPTION_postcopy_flags, > > "-postcopy-flags unsigned-int(flags)\n" > > " flags for postcopy incoming migration\n" > > " when -incoming and -postcopy are specified.\n" > > - " This is for benchmark/debug purpose (default: 0)\n", > > + " This is for benchmark/debug purpose (default: 0)\n" > > + " Currently supprted flags are\n" > > + " 1: enable fault request from umemd to qemu\n" > > + " (default: disabled)\n", > > QEMU_ARCH_ALL) > > STEXI > > @item -postcopy-flags int > > Can you move umem.h and umem.h to a separate patch please , > this patch > > diff --git a/umem.c b/umem.c > > new file mode 100644 > > index 0000000..b7be006 > > --- /dev/null > > +++ b/umem.c > > @@ -0,0 +1,379 @@ > > +/* > > + * umem.c: user process backed memory module for postcopy livemigration > > + * > > + * Copyright (c) 2011 > > + * National Institute of Advanced Industrial Science and Technology > > + * > > + * https://sites.google.com/site/grivonhome/quick-kvm-migration > > + * Author: Isaku Yamahata > > + * > > + * This program is free software; you can redistribute it and/or modify it > > + * under the terms and conditions of the GNU General Public License, > > + * version 2, as published by the Free Software Foundation. > > + * > > + * This program is distributed in the hope it will be useful, but WITHOUT > > + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or > > + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for > > + * more details. > > + * > > + * You should have received a copy of the GNU General Public License along > > + * with this program; if not, see . > > + */ > > + > > +#include > > +#include > > + > > +#include > > + > > +#include "bitops.h" > > +#include "sysemu.h" > > +#include "hw/hw.h" > > +#include "umem.h" > > + > > +//#define DEBUG_UMEM > > +#ifdef DEBUG_UMEM > > +#include > > +#define DPRINTF(format, ...) \ > > + do { \ > > + printf("%d:%ld %s:%d "format, getpid(), syscall(SYS_gettid), \ > > + __func__, __LINE__, ## __VA_ARGS__); \ > > + } while (0) > > +#else > > +#define DPRINTF(format, ...) do { } while (0) > > +#endif > > + > > +#define DEV_UMEM "/dev/umem" > > + > > +struct UMemDev { > > + int fd; > > + int page_shift; > > +}; > > + > > +UMemDev *umem_dev_new(void) > > +{ > > + UMemDev *umem_dev; > > + int umem_dev_fd = open(DEV_UMEM, O_RDWR); > > + if (umem_dev_fd < 0) { > > + perror("can't open "DEV_UMEM); > > + abort(); > > + } > > + > > + umem_dev = g_new(UMemDev, 1); > > + umem_dev->fd = umem_dev_fd; > > + umem_dev->page_shift = ffs(getpagesize()) - 1; > > + return umem_dev; > > +} > > + > > +void umem_dev_destroy(UMemDev *dev) > > +{ > > + close(dev->fd); > > + g_free(dev); > > +} > > + > > +UMem *umem_dev_create(UMemDev *dev, size_t size, const char *name) > > +{ > > + struct umem_create create = { > > + .size = size, > > + .async_req_max = 0, > > + .sync_req_max = 0, > > + }; > > + UMem *umem; > > + > > + snprintf(create.name.id, sizeof(create.name.id), > > + "pid-%"PRId64, (uint64_t)getpid()); > > + create.name.id[UMEM_ID_MAX - 1] = 0; > > + strncpy(create.name.name, name, sizeof(create.name.name)); > > + create.name.name[UMEM_NAME_MAX - 1] = 0; > > + > > + assert((size % getpagesize()) == 0); > > + if (ioctl(dev->fd, UMEM_DEV_CREATE_UMEM, &create) < 0) { > > + perror("UMEM_DEV_CREATE_UMEM"); > > + abort(); > > + } > > + if (ftruncate(create.shmem_fd, create.size) < 0) { > > + perror("truncate(\"shmem_fd\")"); > > + abort(); > > + } > > + > > + umem = g_new(UMem, 1); > > + umem->nbits = 0; > > + umem->nsets = 0; > > + umem->faulted = NULL; > > + umem->page_shift = dev->page_shift; > > + umem->fd = create.umem_fd; > > + umem->shmem_fd = create.shmem_fd; > > + umem->size = create.size; > > + umem->umem = mmap(NULL, size, PROT_EXEC | PROT_READ | PROT_WRITE, > > + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); > > + if (umem->umem == MAP_FAILED) { > > + perror("mmap(UMem) failed"); > > + abort(); > > + } > > + return umem; > > +} > > + > > +void umem_mmap(UMem *umem) > > +{ > > + void *ret = mmap(umem->umem, umem->size, > > + PROT_EXEC | PROT_READ | PROT_WRITE, > > + MAP_PRIVATE | MAP_FIXED, umem->fd, 0); > > + if (ret == MAP_FAILED) { > > + perror("umem_mmap(UMem) failed"); > > + abort(); > > + } > > +} > > + > > +void umem_destroy(UMem *umem) > > +{ > > + if (umem->fd != -1) { > > + close(umem->fd); > > + } > > + if (umem->shmem_fd != -1) { > > + close(umem->shmem_fd); > > + } > > + g_free(umem->faulted); > > + g_free(umem); > > +} > > + > > +void umem_get_page_request(UMem *umem, struct umem_page_request *page_request) > > +{ > > + if (ioctl(umem->fd, UMEM_GET_PAGE_REQUEST, page_request)) { > > + perror("daemon: UMEM_GET_PAGE_REQUEST"); > > + abort(); > > + } > > +} > > + > > +void umem_mark_page_cached(UMem *umem, struct umem_page_cached *page_cached) > > +{ > > + if (ioctl(umem->fd, UMEM_MARK_PAGE_CACHED, page_cached)) { > > + perror("daemon: UMEM_MARK_PAGE_CACHED"); > > + abort(); > > + } > > +} > > + > > +void umem_unmap(UMem *umem) > > +{ > > + munmap(umem->umem, umem->size); > > + umem->umem = NULL; > > +} > > + > > +void umem_close(UMem *umem) > > +{ > > + close(umem->fd); > > + umem->fd = -1; > > +} > > + > > +void *umem_map_shmem(UMem *umem) > > +{ > > + umem->nbits = umem->size >> umem->page_shift; > > + umem->nsets = 0; > > + umem->faulted = g_new0(unsigned long, BITS_TO_LONGS(umem->nbits)); > > + > > + umem->shmem = mmap(NULL, umem->size, PROT_READ | PROT_WRITE, MAP_SHARED, > > + umem->shmem_fd, 0); > > + if (umem->shmem == MAP_FAILED) { > > + perror("daemon: mmap(\"shmem\")"); > > + abort(); > > + } > > + return umem->shmem; > > +} > > + > > +void umem_unmap_shmem(UMem *umem) > > +{ > > + munmap(umem->shmem, umem->size); > > + umem->shmem = NULL; > > +} > > + > > +void umem_remove_shmem(UMem *umem, size_t offset, size_t size) > > +{ > > + int s = offset >> umem->page_shift; > > + int e = (offset + size) >> umem->page_shift; > > + int i; > > + > > + for (i = s; i < e; i++) { > > + if (!test_and_set_bit(i, umem->faulted)) { > > + umem->nsets++; > > +#if defined(CONFIG_MADVISE) && defined(MADV_REMOVE) > > + madvise(umem->shmem + offset, size, MADV_REMOVE); > > +#endif > > + } > > + } > > +} > > + > > +void umem_close_shmem(UMem *umem) > > +{ > > + close(umem->shmem_fd); > > + umem->shmem_fd = -1; > > +} > > + > > +/***************************************************************************/ > > +/* qemu <-> umem daemon communication */ > > + > > +size_t umem_pages_size(uint64_t nr) > > +{ > > + return sizeof(struct umem_pages) + nr * sizeof(uint64_t); > > +} > > + > > +static void umem_write_cmd(int fd, uint8_t cmd) > > +{ > > + DPRINTF("write cmd %c\n", cmd); > > + > > + for (;;) { > > + ssize_t ret = write(fd, &cmd, 1); > > + if (ret == -1) { > > + if (errno == EINTR) { > > + continue; > > + } else if (errno == EPIPE) { > > + perror("pipe"); > > + DPRINTF("write cmd %c %zd %d: pipe is closed\n", > > + cmd, ret, errno); > > + break; > > + } > > + > > + perror("pipe"); > > + DPRINTF("write cmd %c %zd %d\n", cmd, ret, errno); > > + abort(); > > + } > > + > > + break; > > + } > > +} > > + > > +static void umem_read_cmd(int fd, uint8_t expect) > > +{ > > + uint8_t cmd; > > + for (;;) { > > + ssize_t ret = read(fd, &cmd, 1); > > + if (ret == -1) { > > + if (errno == EINTR) { > > + continue; > > + } > > + perror("pipe"); > > + DPRINTF("read error cmd %c %zd %d\n", cmd, ret, errno); > > + abort(); > > + } > > + > > + if (ret == 0) { > > + DPRINTF("read cmd %c %zd: pipe is closed\n", cmd, ret); > > + abort(); > > + } > > + > > + break; > > + } > > + > > + DPRINTF("read cmd %c\n", cmd); > > + if (cmd != expect) { > > + DPRINTF("cmd %c expect %d\n", cmd, expect); > > + abort(); > > + } > > +} > > + > > +struct umem_pages *umem_recv_pages(QEMUFile *f, int *offset) > > +{ > > + int ret; > > + uint64_t nr; > > + size_t size; > > + struct umem_pages *pages; > > + > > + ret = qemu_peek_buffer(f, (uint8_t*)&nr, sizeof(nr), *offset); > > + *offset += sizeof(nr); > > + DPRINTF("ret %d nr %ld\n", ret, nr); > > + if (ret != sizeof(nr) || nr == 0) { > > + return NULL; > > + } > > + > > + size = umem_pages_size(nr); > > + pages = g_malloc(size); > > + pages->nr = nr; > > + size -= sizeof(pages->nr); > > + > > + ret = qemu_peek_buffer(f, (uint8_t*)pages->pgoffs, size, *offset); > > + *offset += size; > > + if (ret != size) { > > + g_free(pages); > > + return NULL; > > + } > > + return pages; > > +} > > + > > +static void umem_send_pages(QEMUFile *f, const struct umem_pages *pages) > > +{ > > + size_t len = umem_pages_size(pages->nr); > > + qemu_put_buffer(f, (const uint8_t*)pages, len); > > +} > > + > > +/* umem daemon -> qemu */ > > +void umem_daemon_ready(int to_qemu_fd) > > +{ > > + umem_write_cmd(to_qemu_fd, UMEM_DAEMON_READY); > > +} > > + > > +void umem_daemon_quit(QEMUFile *to_qemu) > > +{ > > + qemu_put_byte(to_qemu, UMEM_DAEMON_QUIT); > > +} > > + > > +void umem_daemon_send_pages_present(QEMUFile *to_qemu, > > + struct umem_pages *pages) > > +{ > > + qemu_put_byte(to_qemu, UMEM_DAEMON_TRIGGER_PAGE_FAULT); > > + umem_send_pages(to_qemu, pages); > > +} > > + > > +void umem_daemon_wait_for_qemu(int from_qemu_fd) > > +{ > > + umem_read_cmd(from_qemu_fd, UMEM_QEMU_READY); > > +} > > + > > +/* qemu -> umem daemon */ > > +void umem_qemu_wait_for_daemon(int from_umemd_fd) > > +{ > > + umem_read_cmd(from_umemd_fd, UMEM_DAEMON_READY); > > +} > > + > > +void umem_qemu_ready(int to_umemd_fd) > > +{ > > + umem_write_cmd(to_umemd_fd, UMEM_QEMU_READY); > > +} > > + > > +void umem_qemu_quit(QEMUFile *to_umemd) > > +{ > > + qemu_put_byte(to_umemd, UMEM_QEMU_QUIT); > > +} > > + > > +/* qemu side handler */ > > +struct umem_pages *umem_qemu_trigger_page_fault(QEMUFile *from_umemd, > > + int *offset) > > +{ > > + uint64_t i; > > + int page_shift = ffs(getpagesize()) - 1; > > + struct umem_pages *pages = umem_recv_pages(from_umemd, offset); > > + if (pages == NULL) { > > + return NULL; > > + } > > + > > + for (i = 0; i < pages->nr; i++) { > > + ram_addr_t addr = pages->pgoffs[i] << page_shift; > > + > > + /* make pages present by forcibly triggering page fault. */ > > + volatile uint8_t *ram = qemu_get_ram_ptr(addr); > > + uint8_t dummy_read = ram[0]; > > + (void)dummy_read; /* suppress unused variable warning */ > > + } > > + > > + return pages; > > +} > > + > > +void umem_qemu_send_pages_present(QEMUFile *to_umemd, > > + const struct umem_pages *pages) > > +{ > > + qemu_put_byte(to_umemd, UMEM_QEMU_PAGE_FAULTED); > > + umem_send_pages(to_umemd, pages); > > +} > > + > > +void umem_qemu_send_pages_unmapped(QEMUFile *to_umemd, > > + const struct umem_pages *pages) > > +{ > > + qemu_put_byte(to_umemd, UMEM_QEMU_PAGE_UNMAPPED); > > + umem_send_pages(to_umemd, pages); > > +} > > diff --git a/umem.h b/umem.h > > new file mode 100644 > > index 0000000..5ca19ef > > --- /dev/null > > +++ b/umem.h > > @@ -0,0 +1,105 @@ > > +/* > > + * umem.h: user process backed memory module for postcopy livemigration > > + * > > + * Copyright (c) 2011 > > + * National Institute of Advanced Industrial Science and Technology > > + * > > + * https://sites.google.com/site/grivonhome/quick-kvm-migration > > + * Author: Isaku Yamahata > > + * > > + * This program is free software; you can redistribute it and/or modify it > > + * under the terms and conditions of the GNU General Public License, > > + * version 2, as published by the Free Software Foundation. > > + * > > + * This program is distributed in the hope it will be useful, but WITHOUT > > + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or > > + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for > > + * more details. > > + * > > + * You should have received a copy of the GNU General Public License along > > + * with this program; if not, see . > > + */ > > + > > +#ifndef QEMU_UMEM_H > > +#define QEMU_UMEM_H > > + > > +#include > > + > > +#include "qemu-common.h" > > + > > +typedef struct UMemDev UMemDev; > > + > > +struct UMem { > > + void *umem; > > + int fd; > > + void *shmem; > > + int shmem_fd; > > + uint64_t size; > > + > > + /* indexed by host page size */ > > + int page_shift; > > + int nbits; > > + int nsets; > > + unsigned long *faulted; > > +}; > > + > > +UMemDev *umem_dev_new(void); > > +void umem_dev_destroy(UMemDev *dev); > > +UMem *umem_dev_create(UMemDev *dev, size_t size, const char *name); > > +void umem_mmap(UMem *umem); > > + > > +void umem_destroy(UMem *umem); > > + > > +/* umem device operations */ > > +void umem_get_page_request(UMem *umem, struct umem_page_request *page_request); > > +void umem_mark_page_cached(UMem *umem, struct umem_page_cached *page_cached); > > +void umem_unmap(UMem *umem); > > +void umem_close(UMem *umem); > > + > > +/* umem shmem operations */ > > +void *umem_map_shmem(UMem *umem); > > +void umem_unmap_shmem(UMem *umem); > > +void umem_remove_shmem(UMem *umem, size_t offset, size_t size); > > +void umem_close_shmem(UMem *umem); > > + > > +/* qemu on source <-> umem daemon communication */ > > + > > +struct umem_pages { > > + uint64_t nr; /* nr = 0 means completed */ > > + uint64_t pgoffs[0]; > > +}; > > + > > +/* daemon -> qemu */ > > +#define UMEM_DAEMON_READY 'R' > > +#define UMEM_DAEMON_QUIT 'Q' > > +#define UMEM_DAEMON_TRIGGER_PAGE_FAULT 'T' > > +#define UMEM_DAEMON_ERROR 'E' > > + > > +/* qemu -> daemon */ > > +#define UMEM_QEMU_READY 'r' > > +#define UMEM_QEMU_QUIT 'q' > > +#define UMEM_QEMU_PAGE_FAULTED 't' > > +#define UMEM_QEMU_PAGE_UNMAPPED 'u' > > + > > +struct umem_pages *umem_recv_pages(QEMUFile *f, int *offset); > > +size_t umem_pages_size(uint64_t nr); > > + > > +/* for umem daemon */ > > +void umem_daemon_ready(int to_qemu_fd); > > +void umem_daemon_wait_for_qemu(int from_qemu_fd); > > +void umem_daemon_quit(QEMUFile *to_qemu); > > +void umem_daemon_send_pages_present(QEMUFile *to_qemu, > > + struct umem_pages *pages); > > + > > +/* for qemu */ > > +void umem_qemu_wait_for_daemon(int from_umemd_fd); > > +void umem_qemu_ready(int to_umemd_fd); > > +void umem_qemu_quit(QEMUFile *to_umemd); > > +struct umem_pages *umem_qemu_trigger_page_fault(QEMUFile *from_umemd, > > + int *offset); > > +void umem_qemu_send_pages_present(QEMUFile *to_umemd, > > + const struct umem_pages *pages); > > +void umem_qemu_send_pages_unmapped(QEMUFile *to_umemd, > > + const struct umem_pages *pages); > > + > > +#endif /* QEMU_UMEM_H */ > > diff --git a/vl.c b/vl.c > > index 5430b8c..17427a0 100644 > > --- a/vl.c > > +++ b/vl.c > > @@ -3274,8 +3274,12 @@ int main(int argc, char **argv, char **envp) > > default_drive(default_sdcard, snapshot, machine->use_scsi, > > IF_SD, 0, SD_OPTS); > > > > - register_savevm_live(NULL, "ram", 0, RAM_SAVE_VERSION_ID, NULL, > > - ram_save_live, NULL, ram_load, NULL); > > + if (postcopy_incoming_init(incoming, incoming_postcopy) < 0) { > > + exit(1); > > + } > > + register_savevm_live(NULL, "ram", 0, RAM_SAVE_VERSION_ID, > > + ram_save_set_params, ram_save_live, NULL, > > + ram_load, NULL); > > > > if (nb_numa_nodes > 0) { > > int i; > > @@ -3471,6 +3475,9 @@ int main(int argc, char **argv, char **envp) > > > > if (incoming) { > > runstate_set(RUN_STATE_INMIGRATE); > > + if (incoming_postcopy) { > > + postcopy_incoming_prepare(); > >+ } > > how about moving postcopy_incoming_prepare into qemu_start_incoming_migration ? > > > int ret = qemu_start_incoming_migration(incoming); > > if (ret < 0) { > > fprintf(stderr, "Migration failed. Exit code %s(%d), exiting.\n", > > @@ -3488,6 +3495,9 @@ int main(int argc, char **argv, char **envp) > > bdrv_close_all(); > > pause_all_vcpus(); > > net_cleanup(); > > + if (incoming_postcopy) { > > + postcopy_incoming_qemu_cleanup(); > > + } > > res_free(); > > > > return 0; > > Orit > -- yamahata From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from eggs.gnu.org ([140.186.70.92]:58017) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1RiHcg-0005gP-Kh for qemu-devel@nongnu.org; Tue, 03 Jan 2012 22:34:48 -0500 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1RiHca-0008MM-7I for qemu-devel@nongnu.org; Tue, 03 Jan 2012 22:34:42 -0500 Received: from mail.valinux.co.jp ([210.128.90.3]:49967) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1RiHcY-0008M8-GF for qemu-devel@nongnu.org; Tue, 03 Jan 2012 22:34:36 -0500 Date: Wed, 4 Jan 2012 12:34:33 +0900 From: Isaku Yamahata Message-ID: <20120104033433.GN19274@valinux.co.jp> References: <4EFC8C88.70701@redhat.com> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: <4EFC8C88.70701@redhat.com> Subject: Re: [Qemu-devel] [PATCH 21/21] postcopy: implement postcopy livemigration List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: Orit Wasserman Cc: t.hirofuchi@aist.go.jp, qemu-devel@nongnu.org, kvm@vger.kernel.org, satoshi.itoh@aist.go.jp On Thu, Dec 29, 2011 at 05:51:36PM +0200, Orit Wasserman wrote: > Hi, Thank you for review. > A general comment this patch is a bit too long,which makes it hard to review. > Can you split it please? Will do. Maybe split into umem.[hc] part, incoming part and outgoing part. > On 12/29/2011 03:26 AM, Isaku Yamahata wrote: > > This patch implements postcopy livemigration. > > > > Signed-off-by: Isaku Yamahata > > --- > > Makefile.target | 4 + > > arch_init.c | 26 +- > > cpu-all.h | 7 + > > exec.c | 20 +- > > migration-exec.c | 8 + > > migration-fd.c | 30 + > > migration-postcopy-stub.c | 77 ++ > > migration-postcopy.c | 1891 +++++++++++++++++++++++++++++++++++++++++++++ > > migration-tcp.c | 37 +- > > migration-unix.c | 32 +- > > migration.c | 31 + > > migration.h | 30 + > > qemu-common.h | 1 + > > qemu-options.hx | 5 +- > > umem.c | 379 +++++++++ > > umem.h | 105 +++ > > vl.c | 14 +- > > 17 files changed, 2677 insertions(+), 20 deletions(-) > > create mode 100644 migration-postcopy-stub.c > > create mode 100644 migration-postcopy.c > > create mode 100644 umem.c > > create mode 100644 umem.h > > > > diff --git a/Makefile.target b/Makefile.target > > index 3261383..d94c53f 100644 > > --- a/Makefile.target > > +++ b/Makefile.target > > @@ -4,6 +4,7 @@ GENERATED_HEADERS = config-target.h > > CONFIG_NO_PCI = $(if $(subst n,,$(CONFIG_PCI)),n,y) > > CONFIG_NO_KVM = $(if $(subst n,,$(CONFIG_KVM)),n,y) > > CONFIG_NO_XEN = $(if $(subst n,,$(CONFIG_XEN)),n,y) > > +CONFIG_NO_POSTCOPY = $(if $(subst n,,$(CONFIG_POSTCOPY)),n,y) > > > > include ../config-host.mak > > include config-devices.mak > > @@ -199,6 +200,9 @@ obj-$(CONFIG_NO_KVM) += kvm-stub.o > > obj-y += memory.o > > LIBS+=-lz > > > > +common-obj-$(CONFIG_POSTCOPY) += migration-postcopy.o umem.o > > +common-obj-$(CONFIG_NO_POSTCOPY) += migration-postcopy-stub.o > > + > > QEMU_CFLAGS += $(VNC_TLS_CFLAGS) > > QEMU_CFLAGS += $(VNC_SASL_CFLAGS) > > QEMU_CFLAGS += $(VNC_JPEG_CFLAGS) > > diff --git a/arch_init.c b/arch_init.c > > index bc53092..8b3130d 100644 > > --- a/arch_init.c > > +++ b/arch_init.c > > @@ -102,6 +102,13 @@ static int is_dup_page(uint8_t *page, uint8_t ch) > > return 1; > > } > > > > +static bool outgoing_postcopy = false; > > + > > +void ram_save_set_params(const MigrationParams *params, void *opaque) > > +{ > > + outgoing_postcopy = params->postcopy; > > +} > > + > > static RAMBlock *last_block_sent = NULL; > > > > int ram_save_page(QEMUFile *f, RAMBlock *block, ram_addr_t offset) > > @@ -284,6 +291,17 @@ int ram_save_live(Monitor *mon, QEMUFile *f, int stage, void *opaque) > > uint64_t expected_time = 0; > > int ret; > > > > + if (stage == 1) { > > + last_block_sent = NULL; > > + > > + bytes_transferred = 0; > > + last_block = NULL; > > + last_offset = 0; > > Changing of line order + new empty line > > > + } > > + if (outgoing_postcopy) { > > + return postcopy_outgoing_ram_save_live(mon, f, stage, opaque); > > + } > > + > > I would just do : > > unregister_savevm_live and then register_savevm_live(...,postcopy_outgoing_ram_save_live,...) > when starting outgoing postcopy migration. > > > if (stage < 0) { > > cpu_physical_memory_set_dirty_tracking(0); > > return 0; > > @@ -295,10 +313,6 @@ int ram_save_live(Monitor *mon, QEMUFile *f, int stage, void *opaque) > > } > > > > if (stage == 1) { > > - bytes_transferred = 0; > > - last_block_sent = NULL; > > - last_block = NULL; > > - last_offset = 0; > > sort_ram_list(); > > > > /* Make sure all dirty bits are set */ > > @@ -436,6 +450,10 @@ int ram_load(QEMUFile *f, void *opaque, int version_id) > > int flags; > > int error; > > > > + if (incoming_postcopy) { > > + return postcopy_incoming_ram_load(f, opaque, version_id); > > + } > > + > why not call register_savevm_live(...,postcopy_incoming_ram_load,...) when starting guest with postcopy_incoming > > > if (version_id < 3 || version_id > RAM_SAVE_VERSION_ID) { > > return -EINVAL; > > } > > diff --git a/cpu-all.h b/cpu-all.h > > index 0244f7a..2e9d8a7 100644 > > --- a/cpu-all.h > > +++ b/cpu-all.h > > @@ -475,6 +475,9 @@ extern ram_addr_t ram_size; > > /* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */ > > #define RAM_PREALLOC_MASK (1 << 0) > > > > +/* RAM is allocated via umem for postcopy incoming mode */ > > +#define RAM_POSTCOPY_UMEM_MASK (1 << 1) > > + > > typedef struct RAMBlock { > > uint8_t *host; > > ram_addr_t offset; > > @@ -485,6 +488,10 @@ typedef struct RAMBlock { > > #if defined(__linux__) && !defined(TARGET_S390X) > > int fd; > > #endif > > + > > +#ifdef CONFIG_POSTCOPY > > + UMem *umem; /* for incoming postcopy mode */ > > +#endif > > } RAMBlock; > > > > typedef struct RAMList { > > diff --git a/exec.c b/exec.c > > index c8c6692..90b0491 100644 > > --- a/exec.c > > +++ b/exec.c > > @@ -35,6 +35,7 @@ > > #include "qemu-timer.h" > > #include "memory.h" > > #include "exec-memory.h" > > +#include "migration.h" > > #if defined(CONFIG_USER_ONLY) > > #include > > #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) > > @@ -2949,6 +2950,13 @@ ram_addr_t qemu_ram_alloc_from_ptr(DeviceState *dev, const char *name, > > new_block->host = host; > > new_block->flags |= RAM_PREALLOC_MASK; > > } else { > > +#ifdef CONFIG_POSTCOPY > > + if (incoming_postcopy) { > > + postcopy_incoming_ram_alloc(name, size, > > + &new_block->host, &new_block->umem); > > + new_block->flags |= RAM_POSTCOPY_UMEM_MASK; > > + } else > > +#endif > > if (mem_path) { > > #if defined (__linux__) && !defined(TARGET_S390X) > > new_block->host = file_ram_alloc(new_block, size, mem_path); > > @@ -3027,7 +3035,13 @@ void qemu_ram_free(ram_addr_t addr) > > QLIST_REMOVE(block, next); > > if (block->flags & RAM_PREALLOC_MASK) { > > ; > > - } else if (mem_path) { > > + } > > +#ifdef CONFIG_POSTCOPY > > + else if (block->flags & RAM_POSTCOPY_UMEM_MASK) { > > + postcopy_incoming_ram_free(block->umem); > > + } > > +#endif > > + else if (mem_path) { > > #if defined (__linux__) && !defined(TARGET_S390X) > > if (block->fd) { > > munmap(block->host, block->length); > > @@ -3073,6 +3087,10 @@ void qemu_ram_remap(ram_addr_t addr, ram_addr_t length) > > } else { > > flags = MAP_FIXED; > > munmap(vaddr, length); > > + if (block->flags & RAM_POSTCOPY_UMEM_MASK) { > > + postcopy_incoming_qemu_pages_unmapped(addr, length); > > + block->flags &= ~RAM_POSTCOPY_UMEM_MASK; > > + } > > if (mem_path) { > > #if defined(__linux__) && !defined(TARGET_S390X) > > if (block->fd) { > > diff --git a/migration-exec.c b/migration-exec.c > > index e14552e..2bd0c3b 100644 > > --- a/migration-exec.c > > +++ b/migration-exec.c > > @@ -62,6 +62,10 @@ int exec_start_outgoing_migration(MigrationState *s, const char *command) > > { > > FILE *f; > > > > + if (s->params.postcopy) { > > + return -ENOSYS; > > + } > > + > > f = popen(command, "w"); > > if (f == NULL) { > > DPRINTF("Unable to popen exec target\n"); > > @@ -104,6 +108,10 @@ int exec_start_incoming_migration(const char *command) > > { > > QEMUFile *f; > > > > + if (incoming_postcopy) { > > + return -ENOSYS; > > + } > > + > > DPRINTF("Attempting to start an incoming migration\n"); > > f = qemu_popen_cmd(command, "r"); > > if(f == NULL) { > > diff --git a/migration-fd.c b/migration-fd.c > > index 6211124..5a62ab9 100644 > > --- a/migration-fd.c > > +++ b/migration-fd.c > > @@ -88,6 +88,23 @@ int fd_start_outgoing_migration(MigrationState *s, const char *fdname) > > s->write = fd_write; > > s->close = fd_close; > > > > + if (s->params.postcopy) { > > + int flags = fcntl(s->fd, F_GETFL); > > + if ((flags & O_ACCMODE) != O_RDWR) { > > + goto err_after_open; > > + } > > + > > + s->fd_read = dup(s->fd); > > + if (s->fd_read == -1) { > > + goto err_after_open; > > + } > > + s->file_read = qemu_fdopen(s->fd_read, "r"); > > + if (s->file_read == NULL) { > > + close(s->fd_read); > > + goto err_after_open; > > + } > > + } > > + > > migrate_fd_connect(s); > > return 0; > > > > @@ -103,7 +120,14 @@ static void fd_accept_incoming_migration(void *opaque) > > > > process_incoming_migration(f); > > qemu_set_fd_handler2(qemu_stdio_fd(f), NULL, NULL, NULL, NULL); > > + if (incoming_postcopy) { > > + postcopy_incoming_fork_umemd(qemu_stdio_fd(f), f); > > + } > > qemu_fclose(f); > > + if (incoming_postcopy) { > > + postcopy_incoming_qemu_ready(); > > + } > > + return; > > } > > > > int fd_start_incoming_migration(const char *infd) > > @@ -114,6 +138,12 @@ int fd_start_incoming_migration(const char *infd) > > DPRINTF("Attempting to start an incoming migration via fd\n"); > > > > fd = strtol(infd, NULL, 0); > > + if (incoming_postcopy) { > > + int flags = fcntl(fd, F_GETFL); > > + if ((flags & O_ACCMODE) != O_RDWR) { > > + return -EINVAL; > > + } > > + } > > f = qemu_fdopen(fd, "rb"); > > if(f == NULL) { > > DPRINTF("Unable to apply qemu wrapper to file descriptor\n"); > > diff --git a/migration-postcopy-stub.c b/migration-postcopy-stub.c > > new file mode 100644 > > index 0000000..0b78de7 > > --- /dev/null > > +++ b/migration-postcopy-stub.c > > @@ -0,0 +1,77 @@ > > +/* > > + * migration-postcopy-stub.c: postcopy livemigration > > + * stub functions for non-supported hosts > > + * > > + * Copyright (c) 2011 > > + * National Institute of Advanced Industrial Science and Technology > > + * > > + * https://sites.google.com/site/grivonhome/quick-kvm-migration > > + * Author: Isaku Yamahata > > + * > > + * This program is free software; you can redistribute it and/or modify it > > + * under the terms and conditions of the GNU General Public License, > > + * version 2, as published by the Free Software Foundation. > > + * > > + * This program is distributed in the hope it will be useful, but WITHOUT > > + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or > > + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for > > + * more details. > > + * > > + * You should have received a copy of the GNU General Public License along > > + * with this program; if not, see . > > + */ > > + > > +#include "sysemu.h" > > +#include "migration.h" > > + > > +int postcopy_outgoing_create_read_socket(MigrationState *s) > > +{ > > + return -ENOSYS; > > +} > > + > > +int postcopy_outgoing_ram_save_live(Monitor *mon, > > + QEMUFile *f, int stage, void *opaque) > > +{ > > + return -ENOSYS; > > +} > > + > > +void *postcopy_outgoing_begin(MigrationState *ms) > > +{ > > + return NULL; > > +} > > + > > +int postcopy_outgoing_ram_save_background(Monitor *mon, QEMUFile *f, > > + void *postcopy) > > +{ > > + return -ENOSYS; > > +} > > + > > +int postcopy_incoming_init(const char *incoming, bool incoming_postcopy) > > +{ > > + return -ENOSYS; > > +} > > + > > +void postcopy_incoming_prepare(void) > > +{ > > +} > > + > > +int postcopy_incoming_ram_load(QEMUFile *f, void *opaque, int version_id) > > +{ > > + return -ENOSYS; > > +} > > + > > +void postcopy_incoming_fork_umemd(int mig_read_fd, QEMUFile *mig_read) > > +{ > > +} > > + > > +void postcopy_incoming_qemu_ready(void) > > +{ > > +} > > + > > +void postcopy_incoming_qemu_cleanup(void) > > +{ > > +} > > + > > +void postcopy_incoming_qemu_pages_unmapped(ram_addr_t addr, ram_addr_t size) > > +{ > > +} > > diff --git a/migration-postcopy.c b/migration-postcopy.c > > new file mode 100644 > > index 0000000..ed0d574 > > --- /dev/null > > +++ b/migration-postcopy.c > > @@ -0,0 +1,1891 @@ > > +/* > > + * migration-postcopy.c: postcopy livemigration > > + * > > + * Copyright (c) 2011 > > + * National Institute of Advanced Industrial Science and Technology > > + * > > + * https://sites.google.com/site/grivonhome/quick-kvm-migration > > + * Author: Isaku Yamahata > > + * > > + * This program is free software; you can redistribute it and/or modify it > > + * under the terms and conditions of the GNU General Public License, > > + * version 2, as published by the Free Software Foundation. > > + * > > + * This program is distributed in the hope it will be useful, but WITHOUT > > + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or > > + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for > > + * more details. > > + * > > + * You should have received a copy of the GNU General Public License along > > + * with this program; if not, see . > > + */ > > + > > +#include "bitmap.h" > > +#include "sysemu.h" > > +#include "hw/hw.h" > > +#include "arch_init.h" > > +#include "migration.h" > > +#include "umem.h" > > + > > +#include "memory.h" > > +#define WANT_EXEC_OBSOLETE > > +#include "exec-obsolete.h" > > + > > +//#define DEBUG_POSTCOPY > > +#ifdef DEBUG_POSTCOPY > > +#include > > +#define DPRINTF(fmt, ...) \ > > + do { \ > > + printf("%d:%ld %s:%d: " fmt, getpid(), syscall(SYS_gettid), \ > > + __func__, __LINE__, ## __VA_ARGS__); \ > > + } while (0) > > +#else > > +#define DPRINTF(fmt, ...) do { } while (0) > > +#endif > > + > > +#define ALIGN_UP(size, align) (((size) + (align) - 1) & ~((align) - 1)) > > + > > +static void fd_close(int *fd) > > +{ > > + if (*fd >= 0) { > > + close(*fd); > > + *fd = -1; > > + } > > +} > > + > > +/*************************************************************************** > > + * QEMUFile for non blocking pipe > > + */ > > + > > +/* read only */ > > +struct QEMUFilePipe { > > + int fd; > > + QEMUFile *file; > > +}; > > Why not use QEMUFileSocket ? Okay, will rename it to QEMUFile_FD (or whatever) and share the struct. > > +typedef struct QEMUFilePipe QEMUFilePipe; > > + > > +static int pipe_get_buffer(void *opaque, uint8_t *buf, int64_t pos, int size) > > +{ > > + QEMUFilePipe *s = opaque; > > + ssize_t len = 0; > > + > > + while (size > 0) { > > + ssize_t ret = read(s->fd, buf, size); > > + if (ret == -1) { > > + if (errno == EINTR) { > > + continue; > > + } > > + if (len == 0) { > > + len = -errno; > > + } > > + break; > > + } > > + > > + if (ret == 0) { > > + /* the write end of the pipe is closed */ > > + break; > > + } > > + len += ret; > > + buf += ret; > > + size -= ret; > > + } > > + > > + return len; > > +} > > + > > +static int pipe_close(void *opaque) > > +{ > > + QEMUFilePipe *s = opaque; > > + g_free(s); > > + return 0; > > +} > > + > > +static QEMUFile *qemu_fopen_pipe(int fd) > > +{ > > + QEMUFilePipe *s = g_malloc0(sizeof(*s)); > > + > > + s->fd = fd; > > + fcntl_setfl(fd, O_NONBLOCK); > > + s->file = qemu_fopen_ops(s, NULL, pipe_get_buffer, pipe_close, > > + NULL, NULL, NULL); > > + return s->file; > > +} > > + > > +/* write only */ > > +struct QEMUFileNonblock { > > + int fd; > > + QEMUFile *file; > > + > > + /* for pipe-write nonblocking mode */ > > +#define BUF_SIZE_INC (32 * 1024) /* = IO_BUF_SIZE */ > > + uint8_t *buffer; > > + size_t buffer_size; > > + size_t buffer_capacity; > > + bool freeze_output; > > +}; > > +typedef struct QEMUFileNonblock QEMUFileNonblock; > > + > > Couldn't you use QEMUFileBuffered ? QEMUFileBuffered can be built on top of QEMUFileNonblock. I'll refactor buffered_file.c > > +static void nonblock_flush_buffer(QEMUFileNonblock *s) > > +{ > > + size_t offset = 0; > > + ssize_t ret; > > + > > + while (offset < s->buffer_size) { > > + ret = write(s->fd, s->buffer + offset, s->buffer_size - offset); > > + if (ret == -1) { > > + if (errno == EINTR) { > > + continue; > > + } else if (errno == EAGAIN) { > > + s->freeze_output = true; > > + } else { > > + qemu_file_set_error(s->file, errno); > > + } > > + break; > > + } > > + > > + if (ret == 0) { > > + DPRINTF("ret == 0\n"); > > + break; > > + } > > + > > + offset += ret; > > + } > > + > > + if (offset > 0) { > > + assert(s->buffer_size >= offset); > > + memmove(s->buffer, s->buffer + offset, s->buffer_size - offset); > > + s->buffer_size -= offset; > > + } > > + if (s->buffer_size > 0) { > > + s->freeze_output = true; > > + } > > +} > > + > > +static int nonblock_put_buffer(void *opaque, > > + const uint8_t *buf, int64_t pos, int size) > > +{ > > + QEMUFileNonblock *s = opaque; > > + int error; > > + ssize_t len = 0; > > + > > + error = qemu_file_get_error(s->file); > > + if (error) { > > + return error; > > + } > > + > > + nonblock_flush_buffer(s); > > + error = qemu_file_get_error(s->file); > > + if (error) { > > + return error; > > + } > > + > > + while (!s->freeze_output && size > 0) { > > + ssize_t ret; > > + assert(s->buffer_size == 0); > > + > > + ret = write(s->fd, buf, size); > > + if (ret == -1) { > > + if (errno == EINTR) { > > + continue; > > + } else if (errno == EAGAIN) { > > + s->freeze_output = true; > > + } else { > > + qemu_file_set_error(s->file, errno); > > + } > > + break; > > + } > > + > > + len += ret; > > + buf += ret; > > + size -= ret; > > + } > > + > > + if (size > 0) { > > + int inc = size - (s->buffer_capacity - s->buffer_size); > > + if (inc > 0) { > > + s->buffer_capacity += > > + DIV_ROUND_UP(inc, BUF_SIZE_INC) * BUF_SIZE_INC; > > + s->buffer = g_realloc(s->buffer, s->buffer_capacity); > > + } > > + memcpy(s->buffer + s->buffer_size, buf, size); > > + s->buffer_size += size; > > + > > + len += size; > > + } > > + > > + return len; > > +} > > + > > +static int nonblock_pending_size(QEMUFileNonblock *s) > > +{ > > + return qemu_pending_size(s->file) + s->buffer_size; > > +} > > + > > +static void nonblock_fflush(QEMUFileNonblock *s) > > +{ > > + s->freeze_output = false; > > + nonblock_flush_buffer(s); > > + if (!s->freeze_output) { > > + qemu_fflush(s->file); > > + } > > +} > > + > > +static void nonblock_wait_for_flush(QEMUFileNonblock *s) > > +{ > > + while (nonblock_pending_size(s) > 0) { > > + fd_set fds; > > + FD_ZERO(&fds); > > + FD_SET(s->fd, &fds); > > + select(s->fd + 1, NULL, &fds, NULL, NULL); > > + > > + nonblock_fflush(s); > > + } > > +} > > + > > +static int nonblock_close(void *opaque) > > +{ > > + QEMUFileNonblock *s = opaque; > > + nonblock_wait_for_flush(s); > > + g_free(s->buffer); > > + g_free(s); > > + return 0; > > +} > > + > > +static QEMUFileNonblock *qemu_fopen_nonblock(int fd) > > +{ > > + QEMUFileNonblock *s = g_malloc0(sizeof(*s)); > > + > > + s->fd = fd; > > + fcntl_setfl(fd, O_NONBLOCK); > > + s->file = qemu_fopen_ops(s, nonblock_put_buffer, NULL, nonblock_close, > > + NULL, NULL, NULL); > > + return s; > > +} > > + > > +/*************************************************************************** > > + * umem daemon on destination <-> qemu on source protocol > > + */ > > + > > +#define QEMU_UMEM_REQ_INIT 0x00 > > +#define QEMU_UMEM_REQ_ON_DEMAND 0x01 > > +#define QEMU_UMEM_REQ_ON_DEMAND_CONT 0x02 > > +#define QEMU_UMEM_REQ_BACKGROUND 0x03 > > +#define QEMU_UMEM_REQ_BACKGROUND_CONT 0x04 > > +#define QEMU_UMEM_REQ_REMOVE 0x05 > > +#define QEMU_UMEM_REQ_EOC 0x06 > > + > > +struct qemu_umem_req { > > + int8_t cmd; > > + uint8_t len; > > + char *idstr; /* ON_DEMAND, BACKGROUND, REMOVE */ > > + uint32_t nr; /* ON_DEMAND, ON_DEMAND_CONT, > > + BACKGROUND, BACKGROUND_CONT, REMOVE */ > > + > > + /* in target page size as qemu migration protocol */ > > + uint64_t *pgoffs; /* ON_DEMAND, ON_DEMAND_CONT, > > + BACKGROUND, BACKGROUND_CONT, REMOVE */ > > +}; > > + > > +static void postcopy_incoming_send_req_idstr(QEMUFile *f, const char* idstr) > > +{ > > + qemu_put_byte(f, strlen(idstr)); > > + qemu_put_buffer(f, (uint8_t *)idstr, strlen(idstr)); > > +} > > + > > +static void postcopy_incoming_send_req_pgoffs(QEMUFile *f, uint32_t nr, > > + const uint64_t *pgoffs) > > +{ > > + uint32_t i; > > + > > + qemu_put_be32(f, nr); > > + for (i = 0; i < nr; i++) { > > + qemu_put_be64(f, pgoffs[i]); > > + } > > +} > > + > > +static void postcopy_incoming_send_req_one(QEMUFile *f, > > + const struct qemu_umem_req *req) > > +{ > > + DPRINTF("cmd %d\n", req->cmd); > > + qemu_put_byte(f, req->cmd); > > + switch (req->cmd) { > > + case QEMU_UMEM_REQ_INIT: > > + case QEMU_UMEM_REQ_EOC: > > + /* nothing */ > > + break; > > + case QEMU_UMEM_REQ_ON_DEMAND: > > + case QEMU_UMEM_REQ_BACKGROUND: > > + case QEMU_UMEM_REQ_REMOVE: > > + postcopy_incoming_send_req_idstr(f, req->idstr); > > + postcopy_incoming_send_req_pgoffs(f, req->nr, req->pgoffs); > > + break; > > + case QEMU_UMEM_REQ_ON_DEMAND_CONT: > > + case QEMU_UMEM_REQ_BACKGROUND_CONT: > > + postcopy_incoming_send_req_pgoffs(f, req->nr, req->pgoffs); > > + break; > > + default: > > + abort(); > > + break; > > + } > > +} > > + > > +/* QEMUFile can buffer up to IO_BUF_SIZE = 32 * 1024. > > + * So one message size must be <= IO_BUF_SIZE > > + * cmd: 1 > > + * id len: 1 > > + * id: 256 > > + * nr: 2 > > + */ > > +#define MAX_PAGE_NR ((32 * 1024 - 1 - 1 - 256 - 2) / sizeof(uint64_t)) > > +static void postcopy_incoming_send_req(QEMUFile *f, > > + const struct qemu_umem_req *req) > > +{ > > + uint32_t nr = req->nr; > > + struct qemu_umem_req tmp = *req; > > + > > + switch (req->cmd) { > > + case QEMU_UMEM_REQ_INIT: > > + case QEMU_UMEM_REQ_EOC: > > + postcopy_incoming_send_req_one(f, &tmp); > > + break; > > + case QEMU_UMEM_REQ_ON_DEMAND: > > + case QEMU_UMEM_REQ_BACKGROUND: > > + tmp.nr = MIN(nr, MAX_PAGE_NR); > > + postcopy_incoming_send_req_one(f, &tmp); > > + > > + nr -= tmp.nr; > > + tmp.pgoffs += tmp.nr; > > + if (tmp.cmd == QEMU_UMEM_REQ_ON_DEMAND) { > > + tmp.cmd = QEMU_UMEM_REQ_ON_DEMAND_CONT; > > + }else { > > + tmp.cmd = QEMU_UMEM_REQ_BACKGROUND_CONT; > > + } > > + /* fall through */ > > + case QEMU_UMEM_REQ_REMOVE: > > + case QEMU_UMEM_REQ_ON_DEMAND_CONT: > > + case QEMU_UMEM_REQ_BACKGROUND_CONT: > > + while (nr > 0) { > > + tmp.nr = MIN(nr, MAX_PAGE_NR); > > + postcopy_incoming_send_req_one(f, &tmp); > > + > > + nr -= tmp.nr; > > + tmp.pgoffs += tmp.nr; > > + } > > + break; > > + default: > > + abort(); > > + break; > > + } > > +} > > + > > +static int postcopy_outgoing_recv_req_idstr(QEMUFile *f, > > + struct qemu_umem_req *req, > > + size_t *offset) > > +{ > > + int ret; > > + > > + req->len = qemu_peek_byte(f, *offset); > > + *offset += 1; > > + if (req->len == 0) { > > + return -EAGAIN; > > + } > > + req->idstr = g_malloc((int)req->len + 1); > > + ret = qemu_peek_buffer(f, (uint8_t*)req->idstr, req->len, *offset); > > + *offset += ret; > > + if (ret != req->len) { > > + g_free(req->idstr); > > + req->idstr = NULL; > > + return -EAGAIN; > > + } > > + req->idstr[req->len] = 0; > > + return 0; > > +} > > + > > +static int postcopy_outgoing_recv_req_pgoffs(QEMUFile *f, > > + struct qemu_umem_req *req, > > + size_t *offset) > > +{ > > + int ret; > > + uint32_t be32; > > + uint32_t i; > > + > > + ret = qemu_peek_buffer(f, (uint8_t*)&be32, sizeof(be32), *offset); > > + *offset += sizeof(be32); > > + if (ret != sizeof(be32)) { > > + return -EAGAIN; > > + } > > + > > + req->nr = be32_to_cpu(be32); > > + req->pgoffs = g_new(uint64_t, req->nr); > > + for (i = 0; i < req->nr; i++) { > > + uint64_t be64; > > + ret = qemu_peek_buffer(f, (uint8_t*)&be64, sizeof(be64), *offset); > > + *offset += sizeof(be64); > > + if (ret != sizeof(be64)) { > > + g_free(req->pgoffs); > > + req->pgoffs = NULL; > > + return -EAGAIN; > > + } > > + req->pgoffs[i] = be64_to_cpu(be64); > > + } > > + return 0; > > +} > > + > > +static int postcopy_outgoing_recv_req(QEMUFile *f, struct qemu_umem_req *req) > > +{ > > + int size; > > + int ret; > > + size_t offset = 0; > > + > > + size = qemu_peek_buffer(f, (uint8_t*)&req->cmd, 1, offset); > > + if (size <= 0) { > > + return -EAGAIN; > > + } > > + offset += 1; > > + > > + switch (req->cmd) { > > + case QEMU_UMEM_REQ_INIT: > > + case QEMU_UMEM_REQ_EOC: > > + /* nothing */ > > + break; > > + case QEMU_UMEM_REQ_ON_DEMAND: > > + case QEMU_UMEM_REQ_BACKGROUND: > > + case QEMU_UMEM_REQ_REMOVE: > > + ret = postcopy_outgoing_recv_req_idstr(f, req, &offset); > > + if (ret < 0) { > > + return ret; > > + } > > + ret = postcopy_outgoing_recv_req_pgoffs(f, req, &offset); > > + if (ret < 0) { > > + return ret; > > + } > > + break; > > + case QEMU_UMEM_REQ_ON_DEMAND_CONT: > > + case QEMU_UMEM_REQ_BACKGROUND_CONT: > > + ret = postcopy_outgoing_recv_req_pgoffs(f, req, &offset); > > + if (ret < 0) { > > + return ret; > > + } > > + break; > > + default: > > + abort(); > > + break; > > + } > > + qemu_file_skip(f, offset); > > + DPRINTF("cmd %d\n", req->cmd); > > + return 0; > > +} > > + > > +static void postcopy_outgoing_free_req(struct qemu_umem_req *req) > > +{ > > + g_free(req->idstr); > > + g_free(req->pgoffs); > > +} > > + > > +/*************************************************************************** > > + * outgoing part > > + */ > > + > > +#define QEMU_SAVE_LIVE_STAGE_START 0x01 /* = QEMU_VM_SECTION_START */ > > +#define QEMU_SAVE_LIVE_STAGE_PART 0x02 /* = QEMU_VM_SECTION_PART */ > > +#define QEMU_SAVE_LIVE_STAGE_END 0x03 /* = QEMU_VM_SECTION_END */ > > + > > +enum POState { > > + PO_STATE_ERROR_RECEIVE, > > + PO_STATE_ACTIVE, > > + PO_STATE_EOC_RECEIVED, > > + PO_STATE_ALL_PAGES_SENT, > > + PO_STATE_COMPLETED, > > +}; > > +typedef enum POState POState; > > + > > +struct PostcopyOutgoingState { > > + POState state; > > + QEMUFile *mig_read; > > + int fd_read; > > + RAMBlock *last_block_read; > > + > > + QEMUFile *mig_buffered_write; > > + MigrationState *ms; > > + > > + /* For nobg mode. Check if all pages are sent */ > > + RAMBlock *block; > > + ram_addr_t addr; > > +}; > > +typedef struct PostcopyOutgoingState PostcopyOutgoingState; > > + > > +int postcopy_outgoing_create_read_socket(MigrationState *s) > > +{ > > + if (!s->params.postcopy) { > > + return 0; > > + } > > + > > + s->fd_read = dup(s->fd); > > + if (s->fd_read == -1) { > > + int ret = -errno; > > + perror("dup"); > > + return ret; > > + } > > + s->file_read = qemu_fopen_socket(s->fd_read); > > + if (s->file_read == NULL) { > > + return -EINVAL; > > + } > > + return 0; > > +} > > + > > +int postcopy_outgoing_ram_save_live(Monitor *mon, > > + QEMUFile *f, int stage, void *opaque) > > +{ > > + int ret = 0; > > + DPRINTF("stage %d\n", stage); > > + if (stage == QEMU_SAVE_LIVE_STAGE_START) { > > + sort_ram_list(); > > + ram_save_live_mem_size(f); > > + } > > + if (stage == QEMU_SAVE_LIVE_STAGE_PART) { > > + ret = 1; > > + } > > + qemu_put_be64(f, RAM_SAVE_FLAG_EOS); > > + return ret; > > +} > > + > > +static RAMBlock *postcopy_outgoing_find_block(const char *idstr) > > +{ > > + RAMBlock *block; > > + QLIST_FOREACH(block, &ram_list.blocks, next) { > > + if (!strncmp(idstr, block->idstr, strlen(idstr))) { > > + return block; > > + } > > + } > > + return NULL; > > +} > > + > > +/* > > + * return value > > + * 0: continue postcopy mode > > + * > 0: completed postcopy mode. > > + * < 0: error > > + */ > > +static int postcopy_outgoing_handle_req(PostcopyOutgoingState *s, > > + const struct qemu_umem_req *req, > > + bool *written) > > +{ > > + int i; > > + RAMBlock *block; > > + > > + DPRINTF("cmd %d state %d\n", req->cmd, s->state); > > + switch(req->cmd) { > > + case QEMU_UMEM_REQ_INIT: > > + /* nothing */ > > + break; > > + case QEMU_UMEM_REQ_EOC: > > + /* tell to finish migration. */ > > + if (s->state == PO_STATE_ALL_PAGES_SENT) { > > + s->state = PO_STATE_COMPLETED; > > + DPRINTF("-> PO_STATE_COMPLETED\n"); > > + } else { > > + s->state = PO_STATE_EOC_RECEIVED; > > + DPRINTF("-> PO_STATE_EOC_RECEIVED\n"); > > + } > > + return 1; > > + case QEMU_UMEM_REQ_ON_DEMAND: > > + case QEMU_UMEM_REQ_BACKGROUND: > > + DPRINTF("idstr: %s\n", req->idstr); > > + block = postcopy_outgoing_find_block(req->idstr); > > + if (block == NULL) { > > + return -EINVAL; > > + } > > + s->last_block_read = block; > > + /* fall through */ > > + case QEMU_UMEM_REQ_ON_DEMAND_CONT: > > + case QEMU_UMEM_REQ_BACKGROUND_CONT: > > + DPRINTF("nr %d\n", req->nr); > > + for (i = 0; i < req->nr; i++) { > > + DPRINTF("offs[%d] 0x%"PRIx64"\n", i, req->pgoffs[i]); > > + int ret = ram_save_page(s->mig_buffered_write, s->last_block_read, > > + req->pgoffs[i] << TARGET_PAGE_BITS); > > + if (ret > 0) { > > + *written = true; > > + } > > + } > > + break; > > + case QEMU_UMEM_REQ_REMOVE: > > + block = postcopy_outgoing_find_block(req->idstr); > > + if (block == NULL) { > > + return -EINVAL; > > + } > > + for (i = 0; i < req->nr; i++) { > > + ram_addr_t addr = block->offset + > > + (req->pgoffs[i] << TARGET_PAGE_BITS); > > + cpu_physical_memory_reset_dirty(addr, > > + addr + TARGET_PAGE_SIZE, > > + MIGRATION_DIRTY_FLAG); > > + } > > + break; > > + default: > > + return -EINVAL; > > + } > > + return 0; > > +} > > + > > +static void postcopy_outgoing_close_mig_read(PostcopyOutgoingState *s) > > +{ > > + if (s->mig_read != NULL) { > > + qemu_set_fd_handler(s->fd_read, NULL, NULL, NULL); > > + qemu_fclose(s->mig_read); > > + s->mig_read = NULL; > > + fd_close(&s->fd_read); > > + > > + s->ms->file_read = NULL; > > + s->ms->fd_read = -1; > > + } > > +} > > + > > +static void postcopy_outgoing_completed(PostcopyOutgoingState *s) > > +{ > > + postcopy_outgoing_close_mig_read(s); > > + s->ms->postcopy = NULL; > > + g_free(s); > > +} > > + > > +static void postcopy_outgoing_recv_handler(void *opaque) > > +{ > > + PostcopyOutgoingState *s = opaque; > > + bool written = false; > > + int ret = 0; > > + > > + assert(s->state == PO_STATE_ACTIVE || > > + s->state == PO_STATE_ALL_PAGES_SENT); > > + > > + do { > > + struct qemu_umem_req req = {.idstr = NULL, > > + .pgoffs = NULL}; > > + > > + ret = postcopy_outgoing_recv_req(s->mig_read, &req); > > + if (ret < 0) { > > + if (ret == -EAGAIN) { > > + ret = 0; > > + } > > + break; > > + } > > + if (s->state == PO_STATE_ACTIVE) { > > + ret = postcopy_outgoing_handle_req(s, &req, &written); > > + } > > + postcopy_outgoing_free_req(&req); > > + } while (ret == 0); > > + > > + /* > > + * flush buffered_file. > > + * Although mig_write is rate-limited buffered file, those written pages > > + * are requested on demand by the destination. So forcibly push > > + * those pages ignoring rate limiting > > + */ > > + if (written) { > > + qemu_fflush(s->mig_buffered_write); > > + /* qemu_buffered_file_drain(s->mig_buffered_write); */ > > + } > > + > > + if (ret < 0) { > > + switch (s->state) { > > + case PO_STATE_ACTIVE: > > + s->state = PO_STATE_ERROR_RECEIVE; > > + DPRINTF("-> PO_STATE_ERROR_RECEIVE\n"); > > + break; > > + case PO_STATE_ALL_PAGES_SENT: > > + s->state = PO_STATE_COMPLETED; > > + DPRINTF("-> PO_STATE_ALL_PAGES_SENT\n"); > > + break; > > + default: > > + abort(); > > + } > > + } > > + if (s->state == PO_STATE_ERROR_RECEIVE || s->state == PO_STATE_COMPLETED) { > > + postcopy_outgoing_close_mig_read(s); > > + } > > + if (s->state == PO_STATE_COMPLETED) { > > + DPRINTF("PO_STATE_COMPLETED\n"); > > + MigrationState *ms = s->ms; > > + postcopy_outgoing_completed(s); > > + migrate_fd_completed(ms); > > + } > > +} > > + > > +void *postcopy_outgoing_begin(MigrationState *ms) > > +{ > > + PostcopyOutgoingState *s = g_new(PostcopyOutgoingState, 1); > > + DPRINTF("outgoing begin\n"); > > + qemu_fflush(ms->file); > > + > > + s->ms = ms; > > + s->state = PO_STATE_ACTIVE; > > + s->fd_read = ms->fd_read; > > + s->mig_read = ms->file_read; > > + s->mig_buffered_write = ms->file; > > + s->block = NULL; > > + s->addr = 0; > > + > > + /* Make sure all dirty bits are set */ > > + ram_save_memory_set_dirty(); > > + > > + qemu_set_fd_handler(s->fd_read, > > + &postcopy_outgoing_recv_handler, NULL, s); > > + return s; > > +} > > + > > +static void postcopy_outgoing_ram_all_sent(QEMUFile *f, > > + PostcopyOutgoingState *s) > > +{ > > + assert(s->state == PO_STATE_ACTIVE); > > + > > + s->state = PO_STATE_ALL_PAGES_SENT; > > + /* tell incoming side that all pages are sent */ > > + qemu_put_be64(f, RAM_SAVE_FLAG_EOS); > > + qemu_fflush(f); > > + qemu_buffered_file_drain(f); > > + DPRINTF("sent RAM_SAVE_FLAG_EOS\n"); > > + migrate_fd_cleanup(s->ms); > > + > > + /* Later migrate_fd_complete() will be called which calls > > + * migrate_fd_cleanup() again. So dummy file is created > > + * for qemu monitor to keep working. > > + */ > > + s->ms->file = qemu_fopen_ops(NULL, NULL, NULL, NULL, NULL, > > + NULL, NULL); > > +} > > + > > +static int postcopy_outgoing_check_all_ram_sent(PostcopyOutgoingState *s, > > + RAMBlock *block, > > + ram_addr_t addr) > > +{ > > + if (block == NULL) { > > + block = QLIST_FIRST(&ram_list.blocks); > > + addr = block->offset; > > + } > > + > > + for (; block != NULL; > > + s->block = QLIST_NEXT(s->block, next), addr = block->offset) { > > + for (; addr < block->offset + block->length; > > + addr += TARGET_PAGE_SIZE) { > > + if (cpu_physical_memory_get_dirty(addr, MIGRATION_DIRTY_FLAG)) { > > + s->block = block; > > + s->addr = addr; > > + return 0; > > + } > > + } > > + } > > + > > + return 1; > > +} > > + > > +int postcopy_outgoing_ram_save_background(Monitor *mon, QEMUFile *f, > > + void *postcopy) > > +{ > > + PostcopyOutgoingState *s = postcopy; > > + > > + assert(s->state == PO_STATE_ACTIVE || > > + s->state == PO_STATE_EOC_RECEIVED || > > + s->state == PO_STATE_ERROR_RECEIVE); > > + > > + switch (s->state) { > > + case PO_STATE_ACTIVE: > > + /* nothing. processed below */ > > + break; > > + case PO_STATE_EOC_RECEIVED: > > + qemu_put_be64(f, RAM_SAVE_FLAG_EOS); > > + s->state = PO_STATE_COMPLETED; > > + postcopy_outgoing_completed(s); > > + DPRINTF("PO_STATE_COMPLETED\n"); > > + return 1; > > + case PO_STATE_ERROR_RECEIVE: > > + postcopy_outgoing_completed(s); > > + DPRINTF("PO_STATE_ERROR_RECEIVE\n"); > > + return -1; > > + default: > > + abort(); > > + } > > + > > + if (s->ms->params.nobg) { > > + /* See if all pages are sent. */ > > + if (postcopy_outgoing_check_all_ram_sent(s, s->block, s->addr) == 0) { > > + return 0; > > + } > > + /* ram_list can be reordered. (it doesn't seem so during migration, > > + though) So the whole list needs to be checked again */ > > + if (postcopy_outgoing_check_all_ram_sent(s, NULL, 0) == 0) { > > + return 0; > > + } > > + > > + postcopy_outgoing_ram_all_sent(f, s); > > + return 0; > > + } > > + > > + DPRINTF("outgoing background state: %d\n", s->state); > > + > > + while (qemu_file_rate_limit(f) == 0) { > > + if (ram_save_block(f) == 0) { /* no more blocks */ > > + assert(s->state == PO_STATE_ACTIVE); > > + postcopy_outgoing_ram_all_sent(f, s); > > + return 0; > > + } > > + } > > + > > + return 0; > > +} > > + > > +/*************************************************************************** > > + * incoming part > > + */ > > + > > +/* flags for incoming mode to modify the behavior. > > + This is for benchmark/debug purpose */ > > +#define INCOMING_FLAGS_FAULT_REQUEST 0x01 > > + > > + > > +static void postcopy_incoming_umemd(void); > > + > > +#define PIS_STATE_QUIT_RECEIVED 0x01 > > +#define PIS_STATE_QUIT_QUEUED 0x02 > > +#define PIS_STATE_QUIT_SENT 0x04 > > + > > +#define PIS_STATE_QUIT_MASK (PIS_STATE_QUIT_RECEIVED | \ > > + PIS_STATE_QUIT_QUEUED | \ > > + PIS_STATE_QUIT_SENT) > > + > > +struct PostcopyIncomingState { > > + /* dest qemu state */ > > + uint32_t state; > > + > > + UMemDev *dev; > > + int host_page_size; > > + int host_page_shift; > > + > > + /* qemu side */ > > + int to_umemd_fd; > > + QEMUFileNonblock *to_umemd; > > +#define MAX_FAULTED_PAGES 256 > > + struct umem_pages *faulted_pages; > > + > > + int from_umemd_fd; > > + QEMUFile *from_umemd; > > + int version_id; /* save/load format version id */ > > +}; > > +typedef struct PostcopyIncomingState PostcopyIncomingState; > > + > > + > > +#define UMEM_STATE_EOS_RECEIVED 0x01 /* umem daemon <-> src qemu */ > > +#define UMEM_STATE_EOC_SENT 0x02 /* umem daemon <-> src qemu */ > > +#define UMEM_STATE_QUIT_RECEIVED 0x04 /* umem daemon <-> dst qemu */ > > +#define UMEM_STATE_QUIT_QUEUED 0x08 /* umem daemon <-> dst qemu */ > > +#define UMEM_STATE_QUIT_SENT 0x10 /* umem daemon <-> dst qemu */ > > + > > +#define UMEM_STATE_QUIT_MASK (UMEM_STATE_QUIT_QUEUED | \ > > + UMEM_STATE_QUIT_SENT | \ > > + UMEM_STATE_QUIT_RECEIVED) > > +#define UMEM_STATE_END_MASK (UMEM_STATE_EOS_RECEIVED | \ > > + UMEM_STATE_EOC_SENT | \ > > + UMEM_STATE_QUIT_MASK) > > + > > +struct PostcopyIncomingUMemDaemon { > > + /* umem daemon side */ > > + uint32_t state; > > + > > + int host_page_size; > > + int host_page_shift; > > + int nr_host_pages_per_target_page; > > + int host_to_target_page_shift; > > + int nr_target_pages_per_host_page; > > + int target_to_host_page_shift; > > + int version_id; /* save/load format version id */ > > + > > + int to_qemu_fd; > > + QEMUFileNonblock *to_qemu; > > + int from_qemu_fd; > > + QEMUFile *from_qemu; > > + > > + int mig_read_fd; > > + QEMUFile *mig_read; /* qemu on source -> umem daemon */ > > + > > + int mig_write_fd; > > + QEMUFileNonblock *mig_write; /* umem daemon -> qemu on source */ > > + > > + /* = KVM_MAX_VCPUS * (ASYNC_PF_PER_VCPUS + 1) */ > > +#define MAX_REQUESTS (512 * (64 + 1)) > > + > > + struct umem_page_request page_request; > > + struct umem_page_cached page_cached; > > + > > +#define MAX_PRESENT_REQUESTS MAX_FAULTED_PAGES > > + struct umem_pages *present_request; > > + > > + uint64_t *target_pgoffs; > > + > > + /* bitmap indexed by target page offset */ > > + unsigned long *phys_requested; > > + > > + /* bitmap indexed by target page offset */ > > + unsigned long *phys_received; > > + > > + RAMBlock *last_block_read; /* qemu on source -> umem daemon */ > > + RAMBlock *last_block_write; /* umem daemon -> qemu on source */ > > +}; > > +typedef struct PostcopyIncomingUMemDaemon PostcopyIncomingUMemDaemon; > > + > > +static PostcopyIncomingState state = { > > + .state = 0, > > + .dev = NULL, > > + .to_umemd_fd = -1, > > + .to_umemd = NULL, > > + .from_umemd_fd = -1, > > + .from_umemd = NULL, > > +}; > > + > > +static PostcopyIncomingUMemDaemon umemd = { > > + .state = 0, > > + .to_qemu_fd = -1, > > + .to_qemu = NULL, > > + .from_qemu_fd = -1, > > + .from_qemu = NULL, > > + .mig_read_fd = -1, > > + .mig_read = NULL, > > + .mig_write_fd = -1, > > + .mig_write = NULL, > > +}; > > + > > +int postcopy_incoming_init(const char *incoming, bool incoming_postcopy) > > +{ > > + /* incoming_postcopy makes sense only when incoming migration mode */ > > + if (!incoming && incoming_postcopy) { > > + return -EINVAL; > > + } > > + > > + if (!incoming_postcopy) { > > + return 0; > > + } > > + > > + state.state = 0; > > + state.dev = umem_dev_new(); > > + state.host_page_size = getpagesize(); > > + state.host_page_shift = ffs(state.host_page_size) - 1; > > + state.version_id = RAM_SAVE_VERSION_ID; /* = save version of > > + ram_save_live() */ > > + return 0; > > +} > > + > > +void postcopy_incoming_ram_alloc(const char *name, > > + size_t size, uint8_t **hostp, UMem **umemp) > > +{ > > + UMem *umem; > > + size = ALIGN_UP(size, state.host_page_size); > > + umem = umem_dev_create(state.dev, size, name); > > + > > + *umemp = umem; > > + *hostp = umem->umem; > > +} > > + > > +void postcopy_incoming_ram_free(UMem *umem) > > +{ > > + umem_unmap(umem); > > + umem_close(umem); > > + umem_destroy(umem); > > +} > > + > > +void postcopy_incoming_prepare(void) > > +{ > > + RAMBlock *block; > > + > > + QLIST_FOREACH(block, &ram_list.blocks, next) { > > + if (block->umem != NULL) { > > + umem_mmap(block->umem); > > + } > > + } > > +} > > + > > +static int postcopy_incoming_ram_load_get64(QEMUFile *f, > > + ram_addr_t *addr, int *flags) > > +{ > > + *addr = qemu_get_be64(f); > > + *flags = *addr & ~TARGET_PAGE_MASK; > > + *addr &= TARGET_PAGE_MASK; > > + return qemu_file_get_error(f); > > +} > > + > > +int postcopy_incoming_ram_load(QEMUFile *f, void *opaque, int version_id) > > +{ > > + ram_addr_t addr; > > + int flags; > > + int error; > > + > > + DPRINTF("incoming ram load\n"); > > + /* > > + * RAM_SAVE_FLAGS_EOS or > > + * RAM_SAVE_FLAGS_MEM_SIZE + mem size + RAM_SAVE_FLAGS_EOS > > + * see postcopy_outgoing_ram_save_live() > > + */ > > + > > + if (version_id != RAM_SAVE_VERSION_ID) { > > + DPRINTF("RAM_SAVE_VERSION_ID %d != %d\n", > > + version_id, RAM_SAVE_VERSION_ID); > > + return -EINVAL; > > + } > > + error = postcopy_incoming_ram_load_get64(f, &addr, &flags); > > + DPRINTF("addr 0x%lx flags 0x%x\n", addr, flags); > > + if (error) { > > + DPRINTF("error %d\n", error); > > + return error; > > + } > > + if (flags == RAM_SAVE_FLAG_EOS && addr == 0) { > > + DPRINTF("EOS\n"); > > + return 0; > > + } > > + > > + if (flags != RAM_SAVE_FLAG_MEM_SIZE) { > > + DPRINTF("-EINVAL flags 0x%x\n", flags); > > + return -EINVAL; > > + } > > + error = ram_load_mem_size(f, addr); > > + if (error) { > > + DPRINTF("addr 0x%lx error %d\n", addr, error); > > + return error; > > + } > > + > > + error = postcopy_incoming_ram_load_get64(f, &addr, &flags); > > + if (error) { > > + DPRINTF("addr 0x%lx flags 0x%x error %d\n", addr, flags, error); > > + return error; > > + } > > + if (flags == RAM_SAVE_FLAG_EOS && addr == 0) { > > + DPRINTF("done\n"); > > + return 0; > > + } > > + DPRINTF("-EINVAL\n"); > > + return -EINVAL; > > +} > > + > > +void postcopy_incoming_fork_umemd(int mig_read_fd, QEMUFile *mig_read) > > +{ > > + int fds[2]; > > + RAMBlock *block; > > + > > + DPRINTF("fork\n"); > > + > > + /* socketpair(AF_UNIX)? */ > > + > > + if (qemu_pipe(fds) == -1) { > > + perror("qemu_pipe"); > > + abort(); > > + } > > + state.from_umemd_fd = fds[0]; > > + umemd.to_qemu_fd = fds[1]; > > + > > + if (qemu_pipe(fds) == -1) { > > + perror("qemu_pipe"); > > + abort(); > > + } > > + umemd.from_qemu_fd = fds[0]; > > + state.to_umemd_fd = fds[1]; > > + > > + pid_t child = fork(); > > + if (child < 0) { > > + perror("fork"); > > + abort(); > > + } > > + > > + if (child == 0) { > > + int mig_write_fd; > > + > > + fd_close(&state.to_umemd_fd); > > + fd_close(&state.from_umemd_fd); > > + umemd.host_page_size = state.host_page_size; > > + umemd.host_page_shift = state.host_page_shift; > > + > > + umemd.nr_host_pages_per_target_page = > > + TARGET_PAGE_SIZE / umemd.host_page_size; > > + umemd.nr_target_pages_per_host_page = > > + umemd.host_page_size / TARGET_PAGE_SIZE; > > + > > + umemd.target_to_host_page_shift = > > + ffs(umemd.nr_host_pages_per_target_page) - 1; > > + umemd.host_to_target_page_shift = > > + ffs(umemd.nr_target_pages_per_host_page) - 1; > > + > > + umemd.state = 0; > > + umemd.version_id = state.version_id; > > + umemd.mig_read_fd = mig_read_fd; > > + umemd.mig_read = mig_read; > > + > > + mig_write_fd = dup(mig_read_fd); > > + if (mig_write_fd < 0) { > > + perror("could not dup for writable socket \n"); > > + abort(); > > + } > > + umemd.mig_write_fd = mig_write_fd; > > + umemd.mig_write = qemu_fopen_nonblock(mig_write_fd); > > + > > + postcopy_incoming_umemd(); /* noreturn */ > > + } > > + > > + DPRINTF("qemu pid: %d daemon pid: %d\n", getpid(), child); > > + fd_close(&umemd.to_qemu_fd); > > + fd_close(&umemd.from_qemu_fd); > > + state.faulted_pages = g_malloc(umem_pages_size(MAX_FAULTED_PAGES)); > > + state.faulted_pages->nr = 0; > > + > > + /* close all UMem.shmem_fd */ > > + QLIST_FOREACH(block, &ram_list.blocks, next) { > > + umem_close_shmem(block->umem); > > + } > > + umem_qemu_wait_for_daemon(state.from_umemd_fd); > > +} > > + > > +static void postcopy_incoming_qemu_recv_quit(void) > > +{ > > + RAMBlock *block; > > + if (state.state & PIS_STATE_QUIT_RECEIVED) { > > + return; > > + } > > + > > + QLIST_FOREACH(block, &ram_list.blocks, next) { > > + if (block->umem != NULL) { > > + umem_destroy(block->umem); > > + block->umem = NULL; > > + block->flags &= ~RAM_POSTCOPY_UMEM_MASK; > > + } > > + } > > + > > + DPRINTF("|= PIS_STATE_QUIT_RECEIVED\n"); > > + state.state |= PIS_STATE_QUIT_RECEIVED; > > + qemu_set_fd_handler(state.from_umemd_fd, NULL, NULL, NULL); > > + qemu_fclose(state.from_umemd); > > + state.from_umemd = NULL; > > + fd_close(&state.from_umemd_fd); > > +} > > + > > +static void postcopy_incoming_qemu_fflush_to_umemd_handler(void *opaque) > > +{ > > + assert(state.to_umemd != NULL); > > + > > + nonblock_fflush(state.to_umemd); > > + if (nonblock_pending_size(state.to_umemd) > 0) { > > + return; > > + } > > + > > + qemu_set_fd_handler(state.to_umemd->fd, NULL, NULL, NULL); > > + if (state.state & PIS_STATE_QUIT_QUEUED) { > > + DPRINTF("|= PIS_STATE_QUIT_SENT\n"); > > + state.state |= PIS_STATE_QUIT_SENT; > > + qemu_fclose(state.to_umemd->file); > > + state.to_umemd = NULL; > > + fd_close(&state.to_umemd_fd); > > + g_free(state.faulted_pages); > > + state.faulted_pages = NULL; > > + } > > +} > > + > > +static void postcopy_incoming_qemu_fflush_to_umemd(void) > > +{ > > + qemu_set_fd_handler(state.to_umemd->fd, NULL, > > + postcopy_incoming_qemu_fflush_to_umemd_handler, NULL); > > + postcopy_incoming_qemu_fflush_to_umemd_handler(NULL); > > +} > > + > > +static void postcopy_incoming_qemu_queue_quit(void) > > +{ > > + if (state.state & PIS_STATE_QUIT_QUEUED) { > > + return; > > + } > > + > > + DPRINTF("|= PIS_STATE_QUIT_QUEUED\n"); > > + umem_qemu_quit(state.to_umemd->file); > > + state.state |= PIS_STATE_QUIT_QUEUED; > > +} > > + > > +static void postcopy_incoming_qemu_send_pages_present(void) > > +{ > > + if (state.faulted_pages->nr > 0) { > > + umem_qemu_send_pages_present(state.to_umemd->file, > > + state.faulted_pages); > > + state.faulted_pages->nr = 0; > > + } > > +} > > + > > +static void postcopy_incoming_qemu_faulted_pages( > > + const struct umem_pages *pages) > > +{ > > + assert(pages->nr <= MAX_FAULTED_PAGES); > > + assert(state.faulted_pages != NULL); > > + > > + if (state.faulted_pages->nr + pages->nr > MAX_FAULTED_PAGES) { > > + postcopy_incoming_qemu_send_pages_present(); > > + } > > + memcpy(&state.faulted_pages->pgoffs[state.faulted_pages->nr], > > + &pages->pgoffs[0], sizeof(pages->pgoffs[0]) * pages->nr); > > + state.faulted_pages->nr += pages->nr; > > +} > > + > > +static void postcopy_incoming_qemu_cleanup_umem(void); > > + > > +static int postcopy_incoming_qemu_handle_req_one(void) > > +{ > > + int offset = 0; > > + int ret; > > + uint8_t cmd; > > + > > + ret = qemu_peek_buffer(state.from_umemd, &cmd, sizeof(cmd), offset); > > + offset += sizeof(cmd); > > + if (ret != sizeof(cmd)) { > > + return -EAGAIN; > > + } > > + DPRINTF("cmd %c\n", cmd); > > + > > + switch (cmd) { > > + case UMEM_DAEMON_QUIT: > > + postcopy_incoming_qemu_recv_quit(); > > + postcopy_incoming_qemu_queue_quit(); > > + postcopy_incoming_qemu_cleanup_umem(); > > + break; > > + case UMEM_DAEMON_TRIGGER_PAGE_FAULT: { > > + struct umem_pages *pages = > > + umem_qemu_trigger_page_fault(state.from_umemd, &offset); > > + if (pages == NULL) { > > + return -EAGAIN; > > + } > > + if (state.to_umemd_fd >= 0 && !(state.state & PIS_STATE_QUIT_QUEUED)) { > > + postcopy_incoming_qemu_faulted_pages(pages); > > + g_free(pages); > > + } > > + break; > > + } > > + case UMEM_DAEMON_ERROR: > > + /* umem daemon hit troubles, so it warned us to stop vm execution */ > > + vm_stop(RUN_STATE_IO_ERROR); /* or RUN_STATE_INTERNAL_ERROR */ > > + break; > > + default: > > + abort(); > > + break; > > + } > > + > > + if (state.from_umemd != NULL) { > > + qemu_file_skip(state.from_umemd, offset); > > + } > > + return 0; > > +} > > + > > +static void postcopy_incoming_qemu_handle_req(void *opaque) > > +{ > > + do { > > + int ret = postcopy_incoming_qemu_handle_req_one(); > > + if (ret == -EAGAIN) { > > + break; > > + } > > + } while (state.from_umemd != NULL && > > + qemu_pending_size(state.from_umemd) > 0); > > + > > + if (state.to_umemd != NULL) { > > + if (state.faulted_pages->nr > 0) { > > + postcopy_incoming_qemu_send_pages_present(); > > + } > > + postcopy_incoming_qemu_fflush_to_umemd(); > > + } > > +} > > + > > +void postcopy_incoming_qemu_ready(void) > > +{ > > + umem_qemu_ready(state.to_umemd_fd); > > + > > + state.from_umemd = qemu_fopen_pipe(state.from_umemd_fd); > > + state.to_umemd = qemu_fopen_nonblock(state.to_umemd_fd); > > + qemu_set_fd_handler(state.from_umemd_fd, > > + postcopy_incoming_qemu_handle_req, NULL, NULL); > > +} > > + > > +static void postcopy_incoming_qemu_cleanup_umem(void) > > +{ > > + /* when qemu will quit before completing postcopy, tell umem daemon > > + to tear down umem device and exit. */ > > + if (state.to_umemd_fd >= 0) { > > + postcopy_incoming_qemu_queue_quit(); > > + postcopy_incoming_qemu_fflush_to_umemd(); > > + } > > + > > + if (state.dev) { > > + umem_dev_destroy(state.dev); > > + state.dev = NULL; > > + } > > +} > > + > > +void postcopy_incoming_qemu_cleanup(void) > > +{ > > + postcopy_incoming_qemu_cleanup_umem(); > > + if (state.to_umemd != NULL) { > > + nonblock_wait_for_flush(state.to_umemd); > > + } > > +} > > + > > +void postcopy_incoming_qemu_pages_unmapped(ram_addr_t addr, ram_addr_t size) > > +{ > > + uint64_t nr = DIV_ROUND_UP(size, state.host_page_size); > > + size_t len = umem_pages_size(nr); > > + ram_addr_t end = addr + size; > > + struct umem_pages *pages; > > + int i; > > + > > + if (state.to_umemd_fd < 0 || state.state & PIS_STATE_QUIT_QUEUED) { > > + return; > > + } > > + pages = g_malloc(len); > > + pages->nr = nr; > > + for (i = 0; addr < end; addr += state.host_page_size, i++) { > > + pages->pgoffs[i] = addr >> state.host_page_shift; > > + } > > + umem_qemu_send_pages_unmapped(state.to_umemd->file, pages); > > + g_free(pages); > > + assert(state.to_umemd != NULL); > > + postcopy_incoming_qemu_fflush_to_umemd(); > > +} > > + > > +/************************************************************************** > > + * incoming umem daemon > > + */ > > + > > +static void postcopy_incoming_umem_recv_quit(void) > > +{ > > + if (umemd.state & UMEM_STATE_QUIT_RECEIVED) { > > + return; > > + } > > + DPRINTF("|= UMEM_STATE_QUIT_RECEIVED\n"); > > + umemd.state |= UMEM_STATE_QUIT_RECEIVED; > > + qemu_fclose(umemd.from_qemu); > > + umemd.from_qemu = NULL; > > + fd_close(&umemd.from_qemu_fd); > > +} > > + > > +static void postcopy_incoming_umem_queue_quit(void) > > +{ > > + if (umemd.state & UMEM_STATE_QUIT_QUEUED) { > > + return; > > + } > > + DPRINTF("|= UMEM_STATE_QUIT_QUEUED\n"); > > + umem_daemon_quit(umemd.to_qemu->file); > > + umemd.state |= UMEM_STATE_QUIT_QUEUED; > > +} > > + > > +static void postcopy_incoming_umem_send_eoc_req(void) > > +{ > > + struct qemu_umem_req req; > > + > > + if (umemd.state & UMEM_STATE_EOC_SENT) { > > + return; > > + } > > + > > + DPRINTF("|= UMEM_STATE_EOC_SENT\n"); > > + req.cmd = QEMU_UMEM_REQ_EOC; > > + postcopy_incoming_send_req(umemd.mig_write->file, &req); > > + umemd.state |= UMEM_STATE_EOC_SENT; > > + qemu_fclose(umemd.mig_write->file); > > + umemd.mig_write = NULL; > > + fd_close(&umemd.mig_write_fd); > > +} > > + > > +static void postcopy_incoming_umem_send_page_req(RAMBlock *block) > > +{ > > + struct qemu_umem_req req; > > + int bit; > > + uint64_t target_pgoff; > > + int i; > > + > > + umemd.page_request.nr = MAX_REQUESTS; > > + umem_get_page_request(block->umem, &umemd.page_request); > > + DPRINTF("id %s nr %d offs 0x%"PRIx64" 0x%"PRIx64"\n", > > + block->idstr, umemd.page_request.nr, > > + (uint64_t)umemd.page_request.pgoffs[0], > > + (uint64_t)umemd.page_request.pgoffs[1]); > > + > > + if (umemd.last_block_write != block) { > > + req.cmd = QEMU_UMEM_REQ_ON_DEMAND; > > + req.idstr = block->idstr; > > + } else { > > + req.cmd = QEMU_UMEM_REQ_ON_DEMAND_CONT; > > + } > > + > > + req.nr = 0; > > + req.pgoffs = umemd.target_pgoffs; > > + if (TARGET_PAGE_SIZE >= umemd.host_page_size) { > > + for (i = 0; i < umemd.page_request.nr; i++) { > > + target_pgoff = > > + umemd.page_request.pgoffs[i] >> umemd.host_to_target_page_shift; > > + bit = (block->offset >> TARGET_PAGE_BITS) + target_pgoff; > > + > > + if (!test_and_set_bit(bit, umemd.phys_requested)) { > > + req.pgoffs[req.nr] = target_pgoff; > > + req.nr++; > > + } > > + } > > + } else { > > + for (i = 0; i < umemd.page_request.nr; i++) { > > + int j; > > + target_pgoff = > > + umemd.page_request.pgoffs[i] << umemd.host_to_target_page_shift; > > + bit = (block->offset >> TARGET_PAGE_BITS) + target_pgoff; > > + > > + for (j = 0; j < umemd.nr_target_pages_per_host_page; j++) { > > + if (!test_and_set_bit(bit + j, umemd.phys_requested)) { > > + req.pgoffs[req.nr] = target_pgoff + j; > > + req.nr++; > > + } > > + } > > + } > > + } > > + > > + DPRINTF("id %s nr %d offs 0x%"PRIx64" 0x%"PRIx64"\n", > > + block->idstr, req.nr, req.pgoffs[0], req.pgoffs[1]); > > + if (req.nr > 0 && umemd.mig_write != NULL) { > > + postcopy_incoming_send_req(umemd.mig_write->file, &req); > > + umemd.last_block_write = block; > > + } > > +} > > + > > +static void postcopy_incoming_umem_send_pages_present(void) > > +{ > > + if (umemd.present_request->nr > 0) { > > + umem_daemon_send_pages_present(umemd.to_qemu->file, > > + umemd.present_request); > > + umemd.present_request->nr = 0; > > + } > > +} > > + > > +static void postcopy_incoming_umem_pages_present_one( > > + uint32_t nr, const __u64 *pgoffs, uint64_t ramblock_pgoffset) > > +{ > > + uint32_t i; > > + assert(nr <= MAX_PRESENT_REQUESTS); > > + > > + if (umemd.present_request->nr + nr > MAX_PRESENT_REQUESTS) { > > + postcopy_incoming_umem_send_pages_present(); > > + } > > + > > + for (i = 0; i < nr; i++) { > > + umemd.present_request->pgoffs[umemd.present_request->nr + i] = > > + pgoffs[i] + ramblock_pgoffset; > > + } > > + umemd.present_request->nr += nr; > > +} > > + > > +static void postcopy_incoming_umem_pages_present( > > + const struct umem_page_cached *page_cached, uint64_t ramblock_pgoffset) > > +{ > > + uint32_t left = page_cached->nr; > > + uint32_t offset = 0; > > + > > + while (left > 0) { > > + uint32_t nr = MIN(left, MAX_PRESENT_REQUESTS); > > + postcopy_incoming_umem_pages_present_one( > > + nr, &page_cached->pgoffs[offset], ramblock_pgoffset); > > + > > + left -= nr; > > + offset += nr; > > + } > > +} > > + > > +static int postcopy_incoming_umem_ram_load(void) > > +{ > > + ram_addr_t offset; > > + int flags; > > + int error; > > + void *shmem; > > + int i; > > + int bit; > > + > > + if (umemd.version_id != RAM_SAVE_VERSION_ID) { > > + return -EINVAL; > > + } > > + > > + offset = qemu_get_be64(umemd.mig_read); > > + > > + flags = offset & ~TARGET_PAGE_MASK; > > + offset &= TARGET_PAGE_MASK; > > + > > + assert(!(flags & RAM_SAVE_FLAG_MEM_SIZE)); > > + > > + if (flags & RAM_SAVE_FLAG_EOS) { > > + DPRINTF("RAM_SAVE_FLAG_EOS\n"); > > + postcopy_incoming_umem_send_eoc_req(); > > + > > + qemu_fclose(umemd.mig_read); > > + umemd.mig_read = NULL; > > + fd_close(&umemd.mig_read_fd); > > + umemd.state |= UMEM_STATE_EOS_RECEIVED; > > + > > + postcopy_incoming_umem_queue_quit(); > > + DPRINTF("|= UMEM_STATE_EOS_RECEIVED\n"); > > + return 0; > > + } > > + > > + shmem = ram_load_host_from_stream_offset(umemd.mig_read, offset, flags, > > + &umemd.last_block_read); > > + if (!shmem) { > > + DPRINTF("shmem == NULL\n"); > > + return -EINVAL; > > + } > > + > > + if (flags & RAM_SAVE_FLAG_COMPRESS) { > > + uint8_t ch = qemu_get_byte(umemd.mig_read); > > + memset(shmem, ch, TARGET_PAGE_SIZE); > > + } else if (flags & RAM_SAVE_FLAG_PAGE) { > > + qemu_get_buffer(umemd.mig_read, shmem, TARGET_PAGE_SIZE); > > + } > > + > > + error = qemu_file_get_error(umemd.mig_read); > > + if (error) { > > + DPRINTF("error %d\n", error); > > + return error; > > + } > > + > > + umemd.page_cached.nr = 0; > > + bit = (umemd.last_block_read->offset + offset) >> TARGET_PAGE_BITS; > > + if (!test_and_set_bit(bit, umemd.phys_received)) { > > + if (TARGET_PAGE_SIZE >= umemd.host_page_size) { > > + __u64 pgoff = offset >> umemd.host_page_shift; > > + for (i = 0; i < umemd.nr_host_pages_per_target_page; i++) { > > + umemd.page_cached.pgoffs[umemd.page_cached.nr] = pgoff + i; > > + umemd.page_cached.nr++; > > + } > > + } else { > > + bool mark_cache = true; > > + for (i = 0; i < umemd.nr_target_pages_per_host_page; i++) { > > + if (!test_bit(bit + i, umemd.phys_received)) { > > + mark_cache = false; > > + break; > > + } > > + } > > + if (mark_cache) { > > + umemd.page_cached.pgoffs[0] = offset >> umemd.host_page_shift; > > + umemd.page_cached.nr = 1; > > + } > > + } > > + } > > + > > + if (umemd.page_cached.nr > 0) { > > + umem_mark_page_cached(umemd.last_block_read->umem, &umemd.page_cached); > > + > > + if (!(umemd.state & UMEM_STATE_QUIT_QUEUED) && umemd.to_qemu_fd >=0 && > > + (incoming_postcopy_flags & INCOMING_FLAGS_FAULT_REQUEST)) { > > + uint64_t ramblock_pgoffset; > > + > > + ramblock_pgoffset = > > + umemd.last_block_read->offset >> umemd.host_page_shift; > > + postcopy_incoming_umem_pages_present(&umemd.page_cached, > > + ramblock_pgoffset); > > + } > > + } > > + > > + return 0; > > +} > > + > > +static bool postcopy_incoming_umem_check_umem_done(void) > > +{ > > + bool all_done = true; > > + RAMBlock *block; > > + > > + QLIST_FOREACH(block, &ram_list.blocks, next) { > > + UMem *umem = block->umem; > > + if (umem != NULL && umem->nsets == umem->nbits) { > > + umem_unmap_shmem(umem); > > + umem_destroy(umem); > > + block->umem = NULL; > > + } > > + if (block->umem != NULL) { > > + all_done = false; > > + } > > + } > > + return all_done; > > +} > > + > > +static bool postcopy_incoming_umem_page_faulted(const struct umem_pages *pages) > > +{ > > + int i; > > + > > + for (i = 0; i < pages->nr; i++) { > > + ram_addr_t addr = pages->pgoffs[i] << umemd.host_page_shift; > > + RAMBlock *block = qemu_get_ram_block(addr); > > + addr -= block->offset; > > + umem_remove_shmem(block->umem, addr, umemd.host_page_size); > > + } > > + return postcopy_incoming_umem_check_umem_done(); > > +} > > + > > +static bool > > +postcopy_incoming_umem_page_unmapped(const struct umem_pages *pages) > > +{ > > + RAMBlock *block; > > + ram_addr_t addr; > > + int i; > > + > > + struct qemu_umem_req req = { > > + .cmd = QEMU_UMEM_REQ_REMOVE, > > + .nr = 0, > > + .pgoffs = (uint64_t*)pages->pgoffs, > > + }; > > + > > + addr = pages->pgoffs[0] << umemd.host_page_shift; > > + block = qemu_get_ram_block(addr); > > + > > + for (i = 0; i < pages->nr; i++) { > > + int pgoff; > > + > > + addr = pages->pgoffs[i] << umemd.host_page_shift; > > + pgoff = addr >> TARGET_PAGE_BITS; > > + if (!test_bit(pgoff, umemd.phys_received) && > > + !test_bit(pgoff, umemd.phys_requested)) { > > + req.pgoffs[req.nr] = pgoff; > > + req.nr++; > > + } > > + set_bit(pgoff, umemd.phys_received); > > + set_bit(pgoff, umemd.phys_requested); > > + > > + umem_remove_shmem(block->umem, > > + addr - block->offset, umemd.host_page_size); > > + } > > + if (req.nr > 0 && umemd.mig_write != NULL) { > > + req.idstr = block->idstr; > > + postcopy_incoming_send_req(umemd.mig_write->file, &req); > > + } > > + > > + return postcopy_incoming_umem_check_umem_done(); > > +} > > + > > +static void postcopy_incoming_umem_done(void) > > +{ > > + postcopy_incoming_umem_send_eoc_req(); > > + postcopy_incoming_umem_queue_quit(); > > +} > > + > > +static int postcopy_incoming_umem_handle_qemu(void) > > +{ > > + int ret; > > + int offset = 0; > > + uint8_t cmd; > > + > > + ret = qemu_peek_buffer(umemd.from_qemu, &cmd, sizeof(cmd), offset); > > + offset += sizeof(cmd); > > + if (ret != sizeof(cmd)) { > > + return -EAGAIN; > > + } > > + DPRINTF("cmd %c\n", cmd); > > + switch (cmd) { > > + case UMEM_QEMU_QUIT: > > + postcopy_incoming_umem_recv_quit(); > > + postcopy_incoming_umem_done(); > > + break; > > + case UMEM_QEMU_PAGE_FAULTED: { > > + struct umem_pages *pages = umem_recv_pages(umemd.from_qemu, > > + &offset); > > + if (pages == NULL) { > > + return -EAGAIN; > > + } > > + if (postcopy_incoming_umem_page_faulted(pages)){ > > + postcopy_incoming_umem_done(); > > + } > > + g_free(pages); > > + break; > > + } > > + case UMEM_QEMU_PAGE_UNMAPPED: { > > + struct umem_pages *pages = umem_recv_pages(umemd.from_qemu, > > + &offset); > > + if (pages == NULL) { > > + return -EAGAIN; > > + } > > + if (postcopy_incoming_umem_page_unmapped(pages)){ > > + postcopy_incoming_umem_done(); > > + } > > + g_free(pages); > > + break; > > + } > > + default: > > + abort(); > > + break; > > + } > > + if (umemd.from_qemu != NULL) { > > + qemu_file_skip(umemd.from_qemu, offset); > > + } > > + return 0; > > +} > > + > > +static void set_fd(int fd, fd_set *fds, int *nfds) > > +{ > > + FD_SET(fd, fds); > > + if (fd > *nfds) { > > + *nfds = fd; > > + } > > +} > > + > > +static int postcopy_incoming_umemd_main_loop(void) > > +{ > > + fd_set writefds; > > + fd_set readfds; > > + int nfds; > > + RAMBlock *block; > > + int ret; > > + > > + int pending_size; > > + bool get_page_request; > > + > > + nfds = -1; > > + FD_ZERO(&writefds); > > + FD_ZERO(&readfds); > > + > > + if (umemd.mig_write != NULL) { > > + pending_size = nonblock_pending_size(umemd.mig_write); > > + if (pending_size > 0) { > > + set_fd(umemd.mig_write_fd, &writefds, &nfds); > > + } > > + } else { > > + pending_size = 0; > > + } > > + > > +#define PENDING_SIZE_MAX (MAX_REQUESTS * sizeof(uint64_t) * 2) > > + /* If page request to the migration source is accumulated, > > + suspend getting page fault request. */ > > + get_page_request = (pending_size <= PENDING_SIZE_MAX); > > + > > + if (get_page_request) { > > + QLIST_FOREACH(block, &ram_list.blocks, next) { > > + if (block->umem != NULL) { > > + set_fd(block->umem->fd, &readfds, &nfds); > > + } > > + } > > + } > > + > > + if (umemd.mig_read_fd >= 0) { > > + set_fd(umemd.mig_read_fd, &readfds, &nfds); > > + } > > + > > + if (umemd.to_qemu != NULL && > > + nonblock_pending_size(umemd.to_qemu) > 0) { > > + set_fd(umemd.to_qemu_fd, &writefds, &nfds); > > + } > > + if (umemd.from_qemu_fd >= 0) { > > + set_fd(umemd.from_qemu_fd, &readfds, &nfds); > > + } > > + > > + ret = select(nfds + 1, &readfds, &writefds, NULL, NULL); > > + if (ret == -1) { > > + if (errno == EINTR) { > > + return 0; > > + } > > + return ret; > > + } > > + > > + if (umemd.mig_write_fd >= 0 && FD_ISSET(umemd.mig_write_fd, &writefds)) { > > + nonblock_fflush(umemd.mig_write); > > + } > > + if (umemd.to_qemu_fd >= 0 && FD_ISSET(umemd.to_qemu_fd, &writefds)) { > > + nonblock_fflush(umemd.to_qemu); > > + } > > + if (get_page_request) { > > + QLIST_FOREACH(block, &ram_list.blocks, next) { > > + if (block->umem != NULL && FD_ISSET(block->umem->fd, &readfds)) { > > + postcopy_incoming_umem_send_page_req(block); > > + } > > + } > > + } > > + if (umemd.mig_read_fd >= 0 && FD_ISSET(umemd.mig_read_fd, &readfds)) { > > + do { > > + ret = postcopy_incoming_umem_ram_load(); > > + if (ret < 0) { > > + return ret; > > + } > > + } while (umemd.mig_read != NULL && > > + qemu_pending_size(umemd.mig_read) > 0); > > + } > > + if (umemd.from_qemu_fd >= 0 && FD_ISSET(umemd.from_qemu_fd, &readfds)) { > > + do { > > + ret = postcopy_incoming_umem_handle_qemu(); > > + if (ret == -EAGAIN) { > > + break; > > + } > > + } while (umemd.from_qemu != NULL && > > + qemu_pending_size(umemd.from_qemu) > 0); > > + } > > + > > + if (umemd.mig_write != NULL) { > > + nonblock_fflush(umemd.mig_write); > > + } > > + if (umemd.to_qemu != NULL) { > > + if (!(umemd.state & UMEM_STATE_QUIT_QUEUED)) { > > + postcopy_incoming_umem_send_pages_present(); > > + } > > + nonblock_fflush(umemd.to_qemu); > > + if ((umemd.state & UMEM_STATE_QUIT_QUEUED) && > > + nonblock_pending_size(umemd.to_qemu) == 0) { > > + DPRINTF("|= UMEM_STATE_QUIT_SENT\n"); > > + qemu_fclose(umemd.to_qemu->file); > > + umemd.to_qemu = NULL; > > + fd_close(&umemd.to_qemu_fd); > > + umemd.state |= UMEM_STATE_QUIT_SENT; > > + } > > + } > > + > > + return (umemd.state & UMEM_STATE_END_MASK) == UMEM_STATE_END_MASK; > > +} > > + > > +static void postcopy_incoming_umemd(void) > > +{ > > + ram_addr_t last_ram_offset; > > + int nbits; > > + RAMBlock *block; > > + int ret; > > + > > + qemu_daemon(1, 1); > > + signal(SIGPIPE, SIG_IGN); > > + DPRINTF("daemon pid: %d\n", getpid()); > > + > > + umemd.page_request.pgoffs = g_new(__u64, MAX_REQUESTS); > > + umemd.page_cached.pgoffs = > > + g_new(__u64, MAX_REQUESTS * > > + (TARGET_PAGE_SIZE >= umemd.host_page_size ? > > + 1: umemd.nr_host_pages_per_target_page)); > > + umemd.target_pgoffs = > > + g_new(uint64_t, MAX_REQUESTS * > > + MAX(umemd.nr_host_pages_per_target_page, > > + umemd.nr_target_pages_per_host_page)); > > + umemd.present_request = g_malloc(umem_pages_size(MAX_PRESENT_REQUESTS)); > > + umemd.present_request->nr = 0; > > + > > + last_ram_offset = qemu_last_ram_offset(); > > + nbits = last_ram_offset >> TARGET_PAGE_BITS; > > + umemd.phys_requested = g_new0(unsigned long, BITS_TO_LONGS(nbits)); > > + umemd.phys_received = g_new0(unsigned long, BITS_TO_LONGS(nbits)); > > + umemd.last_block_read = NULL; > > + umemd.last_block_write = NULL; > > + > > + QLIST_FOREACH(block, &ram_list.blocks, next) { > > + UMem *umem = block->umem; > > + umem->umem = NULL; /* umem mapping area has VM_DONT_COPY flag, > > + so we lost those mappings by fork */ > > + block->host = umem_map_shmem(umem); > > + umem_close_shmem(umem); > > + } > > + umem_daemon_ready(umemd.to_qemu_fd); > > + umemd.to_qemu = qemu_fopen_nonblock(umemd.to_qemu_fd); > > + > > + /* wait for qemu to disown migration_fd */ > > + umem_daemon_wait_for_qemu(umemd.from_qemu_fd); > > + umemd.from_qemu = qemu_fopen_pipe(umemd.from_qemu_fd); > > + > > + DPRINTF("entering umemd main loop\n"); > > + for (;;) { > > + ret = postcopy_incoming_umemd_main_loop(); > > + if (ret != 0) { > > + break; > > + } > > + } > > + DPRINTF("exiting umemd main loop\n"); > > + > > + /* This daemon forked from qemu and the parent qemu is still running. > > + * Cleanups of linked libraries like SDL should not be triggered, > > + * otherwise the parent qemu may use resources which was already freed. > > + */ > > + fflush(stdout); > > + fflush(stderr); > > + _exit(ret < 0? EXIT_FAILURE: 0); > > +} > > diff --git a/migration-tcp.c b/migration-tcp.c > > index cf6a9b8..aa35050 100644 > > --- a/migration-tcp.c > > +++ b/migration-tcp.c > > @@ -63,18 +63,25 @@ static void tcp_wait_for_connect(void *opaque) > > } while (ret == -1 && (socket_error()) == EINTR); > > > > if (ret < 0) { > > - migrate_fd_error(s); > > - return; > > + goto error_out; > > } > > > > qemu_set_fd_handler2(s->fd, NULL, NULL, NULL, NULL); > > > > - if (val == 0) > > + if (val == 0) { > > + ret = postcopy_outgoing_create_read_socket(s); > > + if (ret < 0) { > > + goto error_out; > > + } > > migrate_fd_connect(s); > > - else { > > + } else { > > DPRINTF("error connecting %d\n", val); > > - migrate_fd_error(s); > > + goto error_out; > > } > > + return; > > + > > +error_out: > > + migrate_fd_error(s); > > } > > > > int tcp_start_outgoing_migration(MigrationState *s, const char *host_port) > > @@ -112,11 +119,19 @@ int tcp_start_outgoing_migration(MigrationState *s, const char *host_port) > > > > if (ret < 0) { > > DPRINTF("connect failed\n"); > > - migrate_fd_error(s); > > - return ret; > > + goto error_out; > > + } > > + > > + ret = postcopy_outgoing_create_read_socket(s); > > + if (ret < 0) { > > + goto error_out; > > } > > migrate_fd_connect(s); > > return 0; > > + > > +error_out: > > + migrate_fd_error(s); > > + return ret; > > } > > > > static void tcp_accept_incoming_migration(void *opaque) > > @@ -145,7 +160,15 @@ static void tcp_accept_incoming_migration(void *opaque) > > } > > > > process_incoming_migration(f); > > + if (incoming_postcopy) { > > + postcopy_incoming_fork_umemd(c, f); > > + } > > qemu_fclose(f); > > + if (incoming_postcopy) { > > + /* now socket is disowned. > > + So tell umem server that it's safe to use it */ > > + postcopy_incoming_qemu_ready(); > > + } > > out: > > close(c); > > out2: > > diff --git a/migration-unix.c b/migration-unix.c > > index dfcf203..3707505 100644 > > --- a/migration-unix.c > > +++ b/migration-unix.c > > @@ -69,12 +69,20 @@ static void unix_wait_for_connect(void *opaque) > > > > qemu_set_fd_handler2(s->fd, NULL, NULL, NULL, NULL); > > > > - if (val == 0) > > + if (val == 0) { > > + ret = postcopy_outgoing_create_read_socket(s); > > + if (ret < 0) { > > + goto error_out; > > + } > > migrate_fd_connect(s); > > - else { > > + } else { > > DPRINTF("error connecting %d\n", val); > > - migrate_fd_error(s); > > + goto error_out; > > } > > + return; > > + > > +error_out: > > + migrate_fd_error(s); > > } > > > > int unix_start_outgoing_migration(MigrationState *s, const char *path) > > @@ -109,11 +117,19 @@ int unix_start_outgoing_migration(MigrationState *s, const char *path) > > > > if (ret < 0) { > > DPRINTF("connect failed\n"); > > - migrate_fd_error(s); > > - return ret; > > + goto error_out; > > + } > > + > > + ret = postcopy_outgoing_create_read_socket(s); > > + if (ret < 0) { > > + goto error_out; > > } > > migrate_fd_connect(s); > > return 0; > > + > > +error_out: > > + migrate_fd_error(s); > > + return ret; > > } > > > > static void unix_accept_incoming_migration(void *opaque) > > @@ -142,7 +158,13 @@ static void unix_accept_incoming_migration(void *opaque) > > } > > > > process_incoming_migration(f); > > + if (incoming_postcopy) { > > + postcopy_incoming_fork_umemd(c, f); > > + } > > qemu_fclose(f); > > + if (incoming_postcopy) { > > + postcopy_incoming_qemu_ready(); > > + } > > out: > > close(c); > > out2: > > diff --git a/migration.c b/migration.c > > index 0149ab3..51efe44 100644 > > --- a/migration.c > > +++ b/migration.c > > @@ -39,6 +39,11 @@ enum { > > MIG_STATE_COMPLETED, > > }; > > > > +enum { > > + MIG_SUBSTATE_PRECOPY, > > + MIG_SUBSTATE_POSTCOPY, > > +}; > > + > > #define MAX_THROTTLE (32 << 20) /* Migration speed throttling */ > > > > static NotifierList migration_state_notifiers = > > @@ -255,6 +260,18 @@ static void migrate_fd_put_ready(void *opaque) > > return; > > } > > > > + if (s->substate == MIG_SUBSTATE_POSTCOPY) { > > + /* PRINTF("postcopy background\n"); */ > > + ret = postcopy_outgoing_ram_save_background(s->mon, s->file, > > + s->postcopy); > > + if (ret > 0) { > > + migrate_fd_completed(s); > > + } else if (ret < 0) { > > + migrate_fd_error(s); > > + } > > + return; > > + } > > + > > DPRINTF("iterate\n"); > > ret = qemu_savevm_state_iterate(s->mon, s->file); > > if (ret < 0) { > > @@ -265,6 +282,19 @@ static void migrate_fd_put_ready(void *opaque) > > DPRINTF("done iterating\n"); > > vm_stop_force_state(RUN_STATE_FINISH_MIGRATE); > > > > + if (s->params.postcopy) { > > + if (qemu_savevm_state_complete(s->mon, s->file) < 0) { > > + migrate_fd_error(s); > > + if (old_vm_running) { > > + vm_start(); > > + } > > + return; > > + } > > + s->substate = MIG_SUBSTATE_POSTCOPY; > > + s->postcopy = postcopy_outgoing_begin(s); > > + return; > > + } > > + > > if (qemu_savevm_state_complete(s->mon, s->file) < 0) { > > migrate_fd_error(s); > > } else { > > @@ -357,6 +387,7 @@ void migrate_fd_connect(MigrationState *s) > > int ret; > > > > s->state = MIG_STATE_ACTIVE; > > + s->substate = MIG_SUBSTATE_PRECOPY; > > s->file = qemu_fopen_ops_buffered(s, > > s->bandwidth_limit, > > migrate_fd_put_buffer, > > diff --git a/migration.h b/migration.h > > index 90ae362..2809e99 100644 > > --- a/migration.h > > +++ b/migration.h > > @@ -40,6 +40,12 @@ struct MigrationState > > int (*write)(MigrationState *s, const void *buff, size_t size); > > void *opaque; > > MigrationParams params; > > + > > + /* for postcopy */ > > + int substate; /* precopy or postcopy */ > > + int fd_read; > > + QEMUFile *file_read; /* connection from the detination */ > > + void *postcopy; > > }; > > > > void process_incoming_migration(QEMUFile *f); > > @@ -86,6 +92,7 @@ uint64_t ram_bytes_remaining(void); > > uint64_t ram_bytes_transferred(void); > > uint64_t ram_bytes_total(void); > > > > +void ram_save_set_params(const MigrationParams *params, void *opaque); > > void sort_ram_list(void); > > int ram_save_block(QEMUFile *f); > > void ram_save_memory_set_dirty(void); > > @@ -107,7 +114,30 @@ void migrate_add_blocker(Error *reason); > > */ > > void migrate_del_blocker(Error *reason); > > > > +/* For outgoing postcopy */ > > +int postcopy_outgoing_create_read_socket(MigrationState *s); > > +int postcopy_outgoing_ram_save_live(Monitor *mon, > > + QEMUFile *f, int stage, void *opaque); > > +void *postcopy_outgoing_begin(MigrationState *s); > > +int postcopy_outgoing_ram_save_background(Monitor *mon, QEMUFile *f, > > + void *postcopy); > > + > > +/* For incoming postcopy */ > > extern bool incoming_postcopy; > > extern unsigned long incoming_postcopy_flags; > > > > +int postcopy_incoming_init(const char *incoming, bool incoming_postcopy); > > +void postcopy_incoming_ram_alloc(const char *name, > > + size_t size, uint8_t **hostp, UMem **umemp); > > +void postcopy_incoming_ram_free(UMem *umem); > > +void postcopy_incoming_prepare(void); > > + > > +int postcopy_incoming_ram_load(QEMUFile *f, void *opaque, int version_id); > > +void postcopy_incoming_fork_umemd(int mig_read_fd, QEMUFile *mig_read); > > +void postcopy_incoming_qemu_ready(void); > > +void postcopy_incoming_qemu_cleanup(void); > > +#ifdef NEED_CPU_H > > +void postcopy_incoming_qemu_pages_unmapped(ram_addr_t addr, ram_addr_t size); > > +#endif > > + > > #endif > > diff --git a/qemu-common.h b/qemu-common.h > > index 725922b..d74a8c9 100644 > > --- a/qemu-common.h > > +++ b/qemu-common.h > > @@ -17,6 +17,7 @@ typedef struct DeviceState DeviceState; > > > > struct Monitor; > > typedef struct Monitor Monitor; > > +typedef struct UMem UMem; > > > > /* we put basic includes here to avoid repeating them in device drivers */ > > #include > > diff --git a/qemu-options.hx b/qemu-options.hx > > index 5c5b8f3..19e20f9 100644 > > --- a/qemu-options.hx > > +++ b/qemu-options.hx > > @@ -2510,7 +2510,10 @@ DEF("postcopy-flags", HAS_ARG, QEMU_OPTION_postcopy_flags, > > "-postcopy-flags unsigned-int(flags)\n" > > " flags for postcopy incoming migration\n" > > " when -incoming and -postcopy are specified.\n" > > - " This is for benchmark/debug purpose (default: 0)\n", > > + " This is for benchmark/debug purpose (default: 0)\n" > > + " Currently supprted flags are\n" > > + " 1: enable fault request from umemd to qemu\n" > > + " (default: disabled)\n", > > QEMU_ARCH_ALL) > > STEXI > > @item -postcopy-flags int > > Can you move umem.h and umem.h to a separate patch please , > this patch > > diff --git a/umem.c b/umem.c > > new file mode 100644 > > index 0000000..b7be006 > > --- /dev/null > > +++ b/umem.c > > @@ -0,0 +1,379 @@ > > +/* > > + * umem.c: user process backed memory module for postcopy livemigration > > + * > > + * Copyright (c) 2011 > > + * National Institute of Advanced Industrial Science and Technology > > + * > > + * https://sites.google.com/site/grivonhome/quick-kvm-migration > > + * Author: Isaku Yamahata > > + * > > + * This program is free software; you can redistribute it and/or modify it > > + * under the terms and conditions of the GNU General Public License, > > + * version 2, as published by the Free Software Foundation. > > + * > > + * This program is distributed in the hope it will be useful, but WITHOUT > > + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or > > + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for > > + * more details. > > + * > > + * You should have received a copy of the GNU General Public License along > > + * with this program; if not, see . > > + */ > > + > > +#include > > +#include > > + > > +#include > > + > > +#include "bitops.h" > > +#include "sysemu.h" > > +#include "hw/hw.h" > > +#include "umem.h" > > + > > +//#define DEBUG_UMEM > > +#ifdef DEBUG_UMEM > > +#include > > +#define DPRINTF(format, ...) \ > > + do { \ > > + printf("%d:%ld %s:%d "format, getpid(), syscall(SYS_gettid), \ > > + __func__, __LINE__, ## __VA_ARGS__); \ > > + } while (0) > > +#else > > +#define DPRINTF(format, ...) do { } while (0) > > +#endif > > + > > +#define DEV_UMEM "/dev/umem" > > + > > +struct UMemDev { > > + int fd; > > + int page_shift; > > +}; > > + > > +UMemDev *umem_dev_new(void) > > +{ > > + UMemDev *umem_dev; > > + int umem_dev_fd = open(DEV_UMEM, O_RDWR); > > + if (umem_dev_fd < 0) { > > + perror("can't open "DEV_UMEM); > > + abort(); > > + } > > + > > + umem_dev = g_new(UMemDev, 1); > > + umem_dev->fd = umem_dev_fd; > > + umem_dev->page_shift = ffs(getpagesize()) - 1; > > + return umem_dev; > > +} > > + > > +void umem_dev_destroy(UMemDev *dev) > > +{ > > + close(dev->fd); > > + g_free(dev); > > +} > > + > > +UMem *umem_dev_create(UMemDev *dev, size_t size, const char *name) > > +{ > > + struct umem_create create = { > > + .size = size, > > + .async_req_max = 0, > > + .sync_req_max = 0, > > + }; > > + UMem *umem; > > + > > + snprintf(create.name.id, sizeof(create.name.id), > > + "pid-%"PRId64, (uint64_t)getpid()); > > + create.name.id[UMEM_ID_MAX - 1] = 0; > > + strncpy(create.name.name, name, sizeof(create.name.name)); > > + create.name.name[UMEM_NAME_MAX - 1] = 0; > > + > > + assert((size % getpagesize()) == 0); > > + if (ioctl(dev->fd, UMEM_DEV_CREATE_UMEM, &create) < 0) { > > + perror("UMEM_DEV_CREATE_UMEM"); > > + abort(); > > + } > > + if (ftruncate(create.shmem_fd, create.size) < 0) { > > + perror("truncate(\"shmem_fd\")"); > > + abort(); > > + } > > + > > + umem = g_new(UMem, 1); > > + umem->nbits = 0; > > + umem->nsets = 0; > > + umem->faulted = NULL; > > + umem->page_shift = dev->page_shift; > > + umem->fd = create.umem_fd; > > + umem->shmem_fd = create.shmem_fd; > > + umem->size = create.size; > > + umem->umem = mmap(NULL, size, PROT_EXEC | PROT_READ | PROT_WRITE, > > + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); > > + if (umem->umem == MAP_FAILED) { > > + perror("mmap(UMem) failed"); > > + abort(); > > + } > > + return umem; > > +} > > + > > +void umem_mmap(UMem *umem) > > +{ > > + void *ret = mmap(umem->umem, umem->size, > > + PROT_EXEC | PROT_READ | PROT_WRITE, > > + MAP_PRIVATE | MAP_FIXED, umem->fd, 0); > > + if (ret == MAP_FAILED) { > > + perror("umem_mmap(UMem) failed"); > > + abort(); > > + } > > +} > > + > > +void umem_destroy(UMem *umem) > > +{ > > + if (umem->fd != -1) { > > + close(umem->fd); > > + } > > + if (umem->shmem_fd != -1) { > > + close(umem->shmem_fd); > > + } > > + g_free(umem->faulted); > > + g_free(umem); > > +} > > + > > +void umem_get_page_request(UMem *umem, struct umem_page_request *page_request) > > +{ > > + if (ioctl(umem->fd, UMEM_GET_PAGE_REQUEST, page_request)) { > > + perror("daemon: UMEM_GET_PAGE_REQUEST"); > > + abort(); > > + } > > +} > > + > > +void umem_mark_page_cached(UMem *umem, struct umem_page_cached *page_cached) > > +{ > > + if (ioctl(umem->fd, UMEM_MARK_PAGE_CACHED, page_cached)) { > > + perror("daemon: UMEM_MARK_PAGE_CACHED"); > > + abort(); > > + } > > +} > > + > > +void umem_unmap(UMem *umem) > > +{ > > + munmap(umem->umem, umem->size); > > + umem->umem = NULL; > > +} > > + > > +void umem_close(UMem *umem) > > +{ > > + close(umem->fd); > > + umem->fd = -1; > > +} > > + > > +void *umem_map_shmem(UMem *umem) > > +{ > > + umem->nbits = umem->size >> umem->page_shift; > > + umem->nsets = 0; > > + umem->faulted = g_new0(unsigned long, BITS_TO_LONGS(umem->nbits)); > > + > > + umem->shmem = mmap(NULL, umem->size, PROT_READ | PROT_WRITE, MAP_SHARED, > > + umem->shmem_fd, 0); > > + if (umem->shmem == MAP_FAILED) { > > + perror("daemon: mmap(\"shmem\")"); > > + abort(); > > + } > > + return umem->shmem; > > +} > > + > > +void umem_unmap_shmem(UMem *umem) > > +{ > > + munmap(umem->shmem, umem->size); > > + umem->shmem = NULL; > > +} > > + > > +void umem_remove_shmem(UMem *umem, size_t offset, size_t size) > > +{ > > + int s = offset >> umem->page_shift; > > + int e = (offset + size) >> umem->page_shift; > > + int i; > > + > > + for (i = s; i < e; i++) { > > + if (!test_and_set_bit(i, umem->faulted)) { > > + umem->nsets++; > > +#if defined(CONFIG_MADVISE) && defined(MADV_REMOVE) > > + madvise(umem->shmem + offset, size, MADV_REMOVE); > > +#endif > > + } > > + } > > +} > > + > > +void umem_close_shmem(UMem *umem) > > +{ > > + close(umem->shmem_fd); > > + umem->shmem_fd = -1; > > +} > > + > > +/***************************************************************************/ > > +/* qemu <-> umem daemon communication */ > > + > > +size_t umem_pages_size(uint64_t nr) > > +{ > > + return sizeof(struct umem_pages) + nr * sizeof(uint64_t); > > +} > > + > > +static void umem_write_cmd(int fd, uint8_t cmd) > > +{ > > + DPRINTF("write cmd %c\n", cmd); > > + > > + for (;;) { > > + ssize_t ret = write(fd, &cmd, 1); > > + if (ret == -1) { > > + if (errno == EINTR) { > > + continue; > > + } else if (errno == EPIPE) { > > + perror("pipe"); > > + DPRINTF("write cmd %c %zd %d: pipe is closed\n", > > + cmd, ret, errno); > > + break; > > + } > > + > > + perror("pipe"); > > + DPRINTF("write cmd %c %zd %d\n", cmd, ret, errno); > > + abort(); > > + } > > + > > + break; > > + } > > +} > > + > > +static void umem_read_cmd(int fd, uint8_t expect) > > +{ > > + uint8_t cmd; > > + for (;;) { > > + ssize_t ret = read(fd, &cmd, 1); > > + if (ret == -1) { > > + if (errno == EINTR) { > > + continue; > > + } > > + perror("pipe"); > > + DPRINTF("read error cmd %c %zd %d\n", cmd, ret, errno); > > + abort(); > > + } > > + > > + if (ret == 0) { > > + DPRINTF("read cmd %c %zd: pipe is closed\n", cmd, ret); > > + abort(); > > + } > > + > > + break; > > + } > > + > > + DPRINTF("read cmd %c\n", cmd); > > + if (cmd != expect) { > > + DPRINTF("cmd %c expect %d\n", cmd, expect); > > + abort(); > > + } > > +} > > + > > +struct umem_pages *umem_recv_pages(QEMUFile *f, int *offset) > > +{ > > + int ret; > > + uint64_t nr; > > + size_t size; > > + struct umem_pages *pages; > > + > > + ret = qemu_peek_buffer(f, (uint8_t*)&nr, sizeof(nr), *offset); > > + *offset += sizeof(nr); > > + DPRINTF("ret %d nr %ld\n", ret, nr); > > + if (ret != sizeof(nr) || nr == 0) { > > + return NULL; > > + } > > + > > + size = umem_pages_size(nr); > > + pages = g_malloc(size); > > + pages->nr = nr; > > + size -= sizeof(pages->nr); > > + > > + ret = qemu_peek_buffer(f, (uint8_t*)pages->pgoffs, size, *offset); > > + *offset += size; > > + if (ret != size) { > > + g_free(pages); > > + return NULL; > > + } > > + return pages; > > +} > > + > > +static void umem_send_pages(QEMUFile *f, const struct umem_pages *pages) > > +{ > > + size_t len = umem_pages_size(pages->nr); > > + qemu_put_buffer(f, (const uint8_t*)pages, len); > > +} > > + > > +/* umem daemon -> qemu */ > > +void umem_daemon_ready(int to_qemu_fd) > > +{ > > + umem_write_cmd(to_qemu_fd, UMEM_DAEMON_READY); > > +} > > + > > +void umem_daemon_quit(QEMUFile *to_qemu) > > +{ > > + qemu_put_byte(to_qemu, UMEM_DAEMON_QUIT); > > +} > > + > > +void umem_daemon_send_pages_present(QEMUFile *to_qemu, > > + struct umem_pages *pages) > > +{ > > + qemu_put_byte(to_qemu, UMEM_DAEMON_TRIGGER_PAGE_FAULT); > > + umem_send_pages(to_qemu, pages); > > +} > > + > > +void umem_daemon_wait_for_qemu(int from_qemu_fd) > > +{ > > + umem_read_cmd(from_qemu_fd, UMEM_QEMU_READY); > > +} > > + > > +/* qemu -> umem daemon */ > > +void umem_qemu_wait_for_daemon(int from_umemd_fd) > > +{ > > + umem_read_cmd(from_umemd_fd, UMEM_DAEMON_READY); > > +} > > + > > +void umem_qemu_ready(int to_umemd_fd) > > +{ > > + umem_write_cmd(to_umemd_fd, UMEM_QEMU_READY); > > +} > > + > > +void umem_qemu_quit(QEMUFile *to_umemd) > > +{ > > + qemu_put_byte(to_umemd, UMEM_QEMU_QUIT); > > +} > > + > > +/* qemu side handler */ > > +struct umem_pages *umem_qemu_trigger_page_fault(QEMUFile *from_umemd, > > + int *offset) > > +{ > > + uint64_t i; > > + int page_shift = ffs(getpagesize()) - 1; > > + struct umem_pages *pages = umem_recv_pages(from_umemd, offset); > > + if (pages == NULL) { > > + return NULL; > > + } > > + > > + for (i = 0; i < pages->nr; i++) { > > + ram_addr_t addr = pages->pgoffs[i] << page_shift; > > + > > + /* make pages present by forcibly triggering page fault. */ > > + volatile uint8_t *ram = qemu_get_ram_ptr(addr); > > + uint8_t dummy_read = ram[0]; > > + (void)dummy_read; /* suppress unused variable warning */ > > + } > > + > > + return pages; > > +} > > + > > +void umem_qemu_send_pages_present(QEMUFile *to_umemd, > > + const struct umem_pages *pages) > > +{ > > + qemu_put_byte(to_umemd, UMEM_QEMU_PAGE_FAULTED); > > + umem_send_pages(to_umemd, pages); > > +} > > + > > +void umem_qemu_send_pages_unmapped(QEMUFile *to_umemd, > > + const struct umem_pages *pages) > > +{ > > + qemu_put_byte(to_umemd, UMEM_QEMU_PAGE_UNMAPPED); > > + umem_send_pages(to_umemd, pages); > > +} > > diff --git a/umem.h b/umem.h > > new file mode 100644 > > index 0000000..5ca19ef > > --- /dev/null > > +++ b/umem.h > > @@ -0,0 +1,105 @@ > > +/* > > + * umem.h: user process backed memory module for postcopy livemigration > > + * > > + * Copyright (c) 2011 > > + * National Institute of Advanced Industrial Science and Technology > > + * > > + * https://sites.google.com/site/grivonhome/quick-kvm-migration > > + * Author: Isaku Yamahata > > + * > > + * This program is free software; you can redistribute it and/or modify it > > + * under the terms and conditions of the GNU General Public License, > > + * version 2, as published by the Free Software Foundation. > > + * > > + * This program is distributed in the hope it will be useful, but WITHOUT > > + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or > > + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for > > + * more details. > > + * > > + * You should have received a copy of the GNU General Public License along > > + * with this program; if not, see . > > + */ > > + > > +#ifndef QEMU_UMEM_H > > +#define QEMU_UMEM_H > > + > > +#include > > + > > +#include "qemu-common.h" > > + > > +typedef struct UMemDev UMemDev; > > + > > +struct UMem { > > + void *umem; > > + int fd; > > + void *shmem; > > + int shmem_fd; > > + uint64_t size; > > + > > + /* indexed by host page size */ > > + int page_shift; > > + int nbits; > > + int nsets; > > + unsigned long *faulted; > > +}; > > + > > +UMemDev *umem_dev_new(void); > > +void umem_dev_destroy(UMemDev *dev); > > +UMem *umem_dev_create(UMemDev *dev, size_t size, const char *name); > > +void umem_mmap(UMem *umem); > > + > > +void umem_destroy(UMem *umem); > > + > > +/* umem device operations */ > > +void umem_get_page_request(UMem *umem, struct umem_page_request *page_request); > > +void umem_mark_page_cached(UMem *umem, struct umem_page_cached *page_cached); > > +void umem_unmap(UMem *umem); > > +void umem_close(UMem *umem); > > + > > +/* umem shmem operations */ > > +void *umem_map_shmem(UMem *umem); > > +void umem_unmap_shmem(UMem *umem); > > +void umem_remove_shmem(UMem *umem, size_t offset, size_t size); > > +void umem_close_shmem(UMem *umem); > > + > > +/* qemu on source <-> umem daemon communication */ > > + > > +struct umem_pages { > > + uint64_t nr; /* nr = 0 means completed */ > > + uint64_t pgoffs[0]; > > +}; > > + > > +/* daemon -> qemu */ > > +#define UMEM_DAEMON_READY 'R' > > +#define UMEM_DAEMON_QUIT 'Q' > > +#define UMEM_DAEMON_TRIGGER_PAGE_FAULT 'T' > > +#define UMEM_DAEMON_ERROR 'E' > > + > > +/* qemu -> daemon */ > > +#define UMEM_QEMU_READY 'r' > > +#define UMEM_QEMU_QUIT 'q' > > +#define UMEM_QEMU_PAGE_FAULTED 't' > > +#define UMEM_QEMU_PAGE_UNMAPPED 'u' > > + > > +struct umem_pages *umem_recv_pages(QEMUFile *f, int *offset); > > +size_t umem_pages_size(uint64_t nr); > > + > > +/* for umem daemon */ > > +void umem_daemon_ready(int to_qemu_fd); > > +void umem_daemon_wait_for_qemu(int from_qemu_fd); > > +void umem_daemon_quit(QEMUFile *to_qemu); > > +void umem_daemon_send_pages_present(QEMUFile *to_qemu, > > + struct umem_pages *pages); > > + > > +/* for qemu */ > > +void umem_qemu_wait_for_daemon(int from_umemd_fd); > > +void umem_qemu_ready(int to_umemd_fd); > > +void umem_qemu_quit(QEMUFile *to_umemd); > > +struct umem_pages *umem_qemu_trigger_page_fault(QEMUFile *from_umemd, > > + int *offset); > > +void umem_qemu_send_pages_present(QEMUFile *to_umemd, > > + const struct umem_pages *pages); > > +void umem_qemu_send_pages_unmapped(QEMUFile *to_umemd, > > + const struct umem_pages *pages); > > + > > +#endif /* QEMU_UMEM_H */ > > diff --git a/vl.c b/vl.c > > index 5430b8c..17427a0 100644 > > --- a/vl.c > > +++ b/vl.c > > @@ -3274,8 +3274,12 @@ int main(int argc, char **argv, char **envp) > > default_drive(default_sdcard, snapshot, machine->use_scsi, > > IF_SD, 0, SD_OPTS); > > > > - register_savevm_live(NULL, "ram", 0, RAM_SAVE_VERSION_ID, NULL, > > - ram_save_live, NULL, ram_load, NULL); > > + if (postcopy_incoming_init(incoming, incoming_postcopy) < 0) { > > + exit(1); > > + } > > + register_savevm_live(NULL, "ram", 0, RAM_SAVE_VERSION_ID, > > + ram_save_set_params, ram_save_live, NULL, > > + ram_load, NULL); > > > > if (nb_numa_nodes > 0) { > > int i; > > @@ -3471,6 +3475,9 @@ int main(int argc, char **argv, char **envp) > > > > if (incoming) { > > runstate_set(RUN_STATE_INMIGRATE); > > + if (incoming_postcopy) { > > + postcopy_incoming_prepare(); > >+ } > > how about moving postcopy_incoming_prepare into qemu_start_incoming_migration ? > > > int ret = qemu_start_incoming_migration(incoming); > > if (ret < 0) { > > fprintf(stderr, "Migration failed. Exit code %s(%d), exiting.\n", > > @@ -3488,6 +3495,9 @@ int main(int argc, char **argv, char **envp) > > bdrv_close_all(); > > pause_all_vcpus(); > > net_cleanup(); > > + if (incoming_postcopy) { > > + postcopy_incoming_qemu_cleanup(); > > + } > > res_free(); > > > > return 0; > > Orit > -- yamahata