From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from eggs.gnu.org ([2001:4830:134:3::10]:50128) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1Yi4HB-00075U-O0 for qemu-devel@nongnu.org; Tue, 14 Apr 2015 13:05:31 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1Yi4H5-0002nh-KH for qemu-devel@nongnu.org; Tue, 14 Apr 2015 13:05:29 -0400 Received: from mx1.redhat.com ([209.132.183.28]:33323) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1Yi4H5-0002mk-Ag for qemu-devel@nongnu.org; Tue, 14 Apr 2015 13:05:23 -0400 From: "Dr. David Alan Gilbert (git)" Date: Tue, 14 Apr 2015 18:03:47 +0100 Message-Id: <1429031053-4454-22-git-send-email-dgilbert@redhat.com> In-Reply-To: <1429031053-4454-1-git-send-email-dgilbert@redhat.com> References: <1429031053-4454-1-git-send-email-dgilbert@redhat.com> Subject: [Qemu-devel] [PATCH v6 21/47] Add wrappers and handlers for sending/receiving the postcopy-ram migration messages. List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: qemu-devel@nongnu.org Cc: aarcange@redhat.com, yamahata@private.email.ne.jp, quintela@redhat.com, amit.shah@redhat.com, pbonzini@redhat.com, david@gibson.dropbear.id.au, yayanghy@cn.fujitsu.com From: "Dr. David Alan Gilbert" The state of the postcopy process is managed via a series of messages; * Add wrappers and handlers for sending/receiving these messages * Add state variable that track the current state of postcopy Signed-off-by: Dr. David Alan Gilbert --- include/migration/migration.h | 15 +++ include/sysemu/sysemu.h | 20 ++++ migration/migration.c | 13 +++ savevm.c | 247 ++++++++++++++++++++++++++++++++++++++++++ trace-events | 10 ++ 5 files changed, 305 insertions(+) diff --git a/include/migration/migration.h b/include/migration/migration.h index 5858788..e3389dc 100644 --- a/include/migration/migration.h +++ b/include/migration/migration.h @@ -52,6 +52,14 @@ typedef struct MigrationState MigrationState; typedef QLIST_HEAD(, LoadStateEntry) LoadStateEntry_Head; +typedef enum { + POSTCOPY_INCOMING_NONE = 0, /* Initial state - no postcopy */ + POSTCOPY_INCOMING_ADVISE, + POSTCOPY_INCOMING_LISTENING, + POSTCOPY_INCOMING_RUNNING, + POSTCOPY_INCOMING_END +} PostcopyState; + /* State for the incoming migration */ struct MigrationIncomingState { QEMUFile *file; @@ -59,6 +67,8 @@ struct MigrationIncomingState { /* See savevm.c */ LoadStateEntry_Head loadvm_handlers; + PostcopyState postcopy_state; + /* * Free at the start of the main state load, set as the main thread finishes * loading state. @@ -220,4 +230,9 @@ size_t ram_control_save_page(QEMUFile *f, ram_addr_t block_offset, ram_addr_t offset, size_t size, uint64_t *bytes_sent); +PostcopyState postcopy_state_get(MigrationIncomingState *mis); + +/* Set the state and return the old state */ +PostcopyState postcopy_state_set(MigrationIncomingState *mis, + PostcopyState new_state); #endif diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h index 49ba134..6dd2382 100644 --- a/include/sysemu/sysemu.h +++ b/include/sysemu/sysemu.h @@ -87,6 +87,17 @@ enum qemu_vm_cmd { MIG_CMD_INVALID = 0, /* Must be 0 */ MIG_CMD_OPEN_RETURN_PATH, /* Tell the dest to open the Return path */ MIG_CMD_PING, /* Request a PONG on the RP */ + + MIG_CMD_POSTCOPY_ADVISE = 20, /* Prior to any page transfers, just + warn we might want to do PC */ + MIG_CMD_POSTCOPY_LISTEN, /* Start listening for incoming + pages as it's running. */ + MIG_CMD_POSTCOPY_RUN, /* Start execution */ + + MIG_CMD_POSTCOPY_RAM_DISCARD, /* A list of pages to discard that + were previously sent during + precopy but are dirty. */ + }; bool qemu_savevm_state_blocked(Error **errp); @@ -101,6 +112,15 @@ void qemu_savevm_command_send(QEMUFile *f, enum qemu_vm_cmd command, uint16_t len, uint8_t *data); void qemu_savevm_send_ping(QEMUFile *f, uint32_t value); void qemu_savevm_send_open_return_path(QEMUFile *f); +void qemu_savevm_send_postcopy_advise(QEMUFile *f); +void qemu_savevm_send_postcopy_listen(QEMUFile *f); +void qemu_savevm_send_postcopy_run(QEMUFile *f); + +void qemu_savevm_send_postcopy_ram_discard(QEMUFile *f, const char *name, + uint16_t len, + uint64_t *start_list, + uint64_t *end_list); + int qemu_loadvm_state(QEMUFile *f); typedef enum DisplayType diff --git a/migration/migration.c b/migration/migration.c index f641fc7..b72a4c7 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -995,3 +995,16 @@ void migrate_fd_connect(MigrationState *s) qemu_thread_create(&s->thread, "migration", migration_thread, s, QEMU_THREAD_JOINABLE); } + +PostcopyState postcopy_state_get(MigrationIncomingState *mis) +{ + return atomic_fetch_add(&mis->postcopy_state, 0); +} + +/* Set the state and return the old state */ +PostcopyState postcopy_state_set(MigrationIncomingState *mis, + PostcopyState new_state) +{ + return atomic_xchg(&mis->postcopy_state, new_state); +} + diff --git a/savevm.c b/savevm.c index e7d42dc..8d2fe1f 100644 --- a/savevm.c +++ b/savevm.c @@ -39,6 +39,7 @@ #include "exec/memory.h" #include "qmp-commands.h" #include "trace.h" +#include "qemu/bitops.h" #include "qemu/iov.h" #include "block/snapshot.h" #include "block/qapi.h" @@ -634,6 +635,77 @@ void qemu_savevm_send_open_return_path(QEMUFile *f) qemu_savevm_command_send(f, MIG_CMD_OPEN_RETURN_PATH, 0, NULL); } +/* Send prior to any postcopy transfer */ +void qemu_savevm_send_postcopy_advise(QEMUFile *f) +{ + uint64_t tmp[2]; + tmp[0] = cpu_to_be64(getpagesize()); + tmp[1] = cpu_to_be64(1ul << qemu_target_page_bits()); + + trace_qemu_savevm_send_postcopy_advise(); + qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_ADVISE, 16, (uint8_t *)tmp); +} + +/* Sent prior to starting the destination running in postcopy, discard pages + * that have already been sent but redirtied on the source. + * CMD_POSTCOPY_RAM_DISCARD consist of: + * byte version (0) + * byte Length of name field (not including 0) + * n x byte RAM block name + * byte 0 terminator (just for safety) + * n x Byte ranges within the named RAMBlock + * be64 Start of the range + * be64 end of the range + 1 + * + * name: RAMBlock name that these entries are part of + * len: Number of page entries + * start_list: 'len' addresses + * end_list: 'len' addresses + * + */ +void qemu_savevm_send_postcopy_ram_discard(QEMUFile *f, const char *name, + uint16_t len, + uint64_t *start_list, + uint64_t *end_list) +{ + uint8_t *buf; + uint16_t tmplen; + uint16_t t; + size_t name_len = strlen(name); + + trace_qemu_savevm_send_postcopy_ram_discard(name, len); + buf = g_malloc0(len*16 + name_len + 3); + buf[0] = 0; /* Version */ + assert(name_len < 256); + buf[1] = name_len; + memcpy(buf+2, name, name_len); + tmplen = 2+name_len; + buf[tmplen++] = '\0'; + + for (t = 0; t < len; t++) { + cpu_to_be64w((uint64_t *)(buf + tmplen), start_list[t]); + tmplen += 8; + cpu_to_be64w((uint64_t *)(buf + tmplen), end_list[t]); + tmplen += 8; + } + qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_RAM_DISCARD, tmplen, buf); + g_free(buf); +} + +/* Get the destination into a state where it can receive postcopy data. */ +void qemu_savevm_send_postcopy_listen(QEMUFile *f) +{ + trace_savevm_send_postcopy_listen(); + qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_LISTEN, 0, NULL); +} + +/* Kick the destination into running */ +void qemu_savevm_send_postcopy_run(QEMUFile *f) +{ + trace_savevm_send_postcopy_run(); + qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_RUN, 0, NULL); +} + bool qemu_savevm_state_blocked(Error **errp) { SaveStateEntry *se; @@ -966,6 +1038,154 @@ enum LoadVMExitCodes { static int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis); +/* ------ incoming postcopy messages ------ */ +/* 'advise' arrives before any transfers just to tell us that a postcopy + * *might* happen - it might be skipped if precopy transferred everything + * quickly. + */ +static int loadvm_postcopy_handle_advise(MigrationIncomingState *mis, + uint64_t remote_hps, + uint64_t remote_tps) +{ + PostcopyState ps = postcopy_state_get(mis); + trace_loadvm_postcopy_handle_advise(); + if (ps != POSTCOPY_INCOMING_NONE) { + error_report("CMD_POSTCOPY_ADVISE in wrong postcopy state (%d)", ps); + return -1; + } + + if (remote_hps != getpagesize()) { + /* + * Some combinations of mismatch are probably possible but it gets + * a bit more complicated. In particular we need to place whole + * host pages on the dest at once, and we need to ensure that we + * handle dirtying to make sure we never end up sending part of + * a hostpage on it's own. + */ + error_report("Postcopy needs matching host page sizes (s=%d d=%d)", + (int)remote_hps, getpagesize()); + return -1; + } + + if (remote_tps != (1ul << qemu_target_page_bits())) { + /* + * Again, some differences could be dealt with, but for now keep it + * simple. + */ + error_report("Postcopy needs matching target page sizes (s=%d d=%d)", + (int)remote_tps, 1 << qemu_target_page_bits()); + return -1; + } + + postcopy_state_set(mis, POSTCOPY_INCOMING_ADVISE); + + return 0; +} + +/* After postcopy we will be told to throw some pages away since they're + * dirty and will have to be demand fetched. Must happen before CPU is + * started. + * There can be 0..many of these messages, each encoding multiple pages. + */ +static int loadvm_postcopy_ram_handle_discard(MigrationIncomingState *mis, + uint16_t len) +{ + int tmp; + char ramid[256]; + PostcopyState ps = postcopy_state_get(mis); + + trace_loadvm_postcopy_ram_handle_discard(); + + if (ps != POSTCOPY_INCOMING_ADVISE) { + error_report("CMD_POSTCOPY_RAM_DISCARD in wrong postcopy state (%d)", + ps); + return -1; + } + /* We're expecting a + * Version (0) + * a RAM ID string (length byte, name, 0 term) + * then at least 1 16 byte chunk + */ + if (len < 20) { + error_report("CMD_POSTCOPY_RAM_DISCARD invalid length (%d)", len); + return -1; + } + + tmp = qemu_get_byte(mis->file); + if (tmp != 0) { + error_report("CMD_POSTCOPY_RAM_DISCARD invalid version (%d)", tmp); + return -1; + } + + if (qemu_get_counted_string(mis->file, ramid)) { + error_report("CMD_POSTCOPY_RAM_DISCARD Failed to read RAMBlock ID"); + return -1; + } + tmp = qemu_get_byte(mis->file); + if (tmp != 0) { + error_report("CMD_POSTCOPY_RAM_DISCARD missing nil (%d)", tmp); + return -1; + } + + len -= 3+strlen(ramid); + if (len % 16) { + error_report("CMD_POSTCOPY_RAM_DISCARD invalid length (%d)", len); + return -1; + } + trace_loadvm_postcopy_ram_handle_discard_header(ramid, len); + while (len) { + /* TODO - ram_discard_range gets added in a later patch + uint64_t start_addr, end_addr; + start_addr = qemu_get_be64(mis->file); + end_addr = qemu_get_be64(mis->file); + + len -= 16; + int ret = ram_discard_range(mis, ramid, start_addr, end_addr - 1); + if (ret) { + return ret; + } + */ + } + trace_loadvm_postcopy_ram_handle_discard_end(); + + return 0; +} + +/* After this message we must be able to immediately receive postcopy data */ +static int loadvm_postcopy_handle_listen(MigrationIncomingState *mis) +{ + PostcopyState ps = postcopy_state_set(mis, POSTCOPY_INCOMING_LISTENING); + trace_loadvm_postcopy_handle_listen(); + if (ps != POSTCOPY_INCOMING_ADVISE) { + error_report("CMD_POSTCOPY_LISTEN in wrong postcopy state (%d)", ps); + return -1; + } + + /* TODO start up the postcopy listening thread */ + return 0; +} + +/* After all discards we can start running and asking for pages */ +static int loadvm_postcopy_handle_run(MigrationIncomingState *mis) +{ + PostcopyState ps = postcopy_state_set(mis, POSTCOPY_INCOMING_RUNNING); + trace_loadvm_postcopy_handle_run(); + if (ps != POSTCOPY_INCOMING_LISTENING) { + error_report("CMD_POSTCOPY_RUN in wrong postcopy state (%d)", ps); + return -1; + } + + if (autostart) { + /* Hold onto your hats, starting the CPU */ + vm_start(); + } else { + /* leave it paused and let management decide when to start the CPU */ + runstate_set(RUN_STATE_PAUSED); + } + + return 0; +} + static int loadvm_process_command_simple_lencheck(const char *name, unsigned int actual, unsigned int expected) @@ -991,6 +1211,7 @@ static int loadvm_process_command(QEMUFile *f) uint16_t com; uint16_t len; uint32_t tmp32; + uint64_t tmp64a, tmp64b; com = qemu_get_be16(f); len = qemu_get_be16(f); @@ -1028,6 +1249,32 @@ static int loadvm_process_command(QEMUFile *f) migrate_send_rp_pong(mis, tmp32); break; + case MIG_CMD_POSTCOPY_ADVISE: + if (loadvm_process_command_simple_lencheck("CMD_POSTCOPY_ADVISE", + len, 16)) { + return -1; + } + tmp64a = qemu_get_be64(f); /* hps */ + tmp64b = qemu_get_be64(f); /* tps */ + return loadvm_postcopy_handle_advise(mis, tmp64a, tmp64b); + + case MIG_CMD_POSTCOPY_LISTEN: + if (loadvm_process_command_simple_lencheck("CMD_POSTCOPY_LISTEN", + len, 0)) { + return -1; + } + return loadvm_postcopy_handle_listen(mis); + + case MIG_CMD_POSTCOPY_RUN: + if (loadvm_process_command_simple_lencheck("CMD_POSTCOPY_RUN", + len, 0)) { + return -1; + } + return loadvm_postcopy_handle_run(mis); + + case MIG_CMD_POSTCOPY_RAM_DISCARD: + return loadvm_postcopy_ram_handle_discard(mis, len); + default: error_report("VM_COMMAND 0x%x unknown (len 0x%x)", com, len); return -1; diff --git a/trace-events b/trace-events index e343c1a..26625be 100644 --- a/trace-events +++ b/trace-events @@ -1175,11 +1175,21 @@ qemu_loadvm_state_main(void) "" qemu_loadvm_state_main_quit_parent(void) "" qemu_loadvm_state_post_main(int ret) "%d" qemu_loadvm_state_section_startfull(uint32_t section_id, const char *idstr, uint32_t instance_id, uint32_t version_id) "%u(%s) %u %u" +loadvm_postcopy_handle_advise(void) "" +loadvm_postcopy_handle_listen(void) "" +loadvm_postcopy_handle_run(void) "" +loadvm_postcopy_ram_handle_discard(void) "" +loadvm_postcopy_ram_handle_discard_end(void) "" +loadvm_postcopy_ram_handle_discard_header(const char *ramid, uint16_t len) "%s: %ud" loadvm_process_command(uint16_t com, uint16_t len) "com=0x%x len=%d" loadvm_process_command_ping(uint32_t val) "%x" +qemu_savevm_send_postcopy_advise(void) "" +qemu_savevm_send_postcopy_ram_discard(const char *id, uint16_t len) "%s: %ud" savevm_section_start(const char *id, unsigned int section_id) "%s, section_id %u" savevm_section_end(const char *id, unsigned int section_id, int ret) "%s, section_id %u -> %d" savevm_send_ping(uint32_t val) "%x" +savevm_send_postcopy_listen(void) "" +savevm_send_postcopy_run(void) "" savevm_state_begin(void) "" savevm_state_header(void) "" savevm_state_iterate(void) "" -- 2.1.0