* [Qemu-devel] [PATCH 01/17] migration: add has_postcopy savevm handler
2017-02-07 15:05 [Qemu-devel] [PATCH v5 00/17] Dirty bitmaps postcopy migration Vladimir Sementsov-Ogievskiy
@ 2017-02-07 15:05 ` Vladimir Sementsov-Ogievskiy
2017-02-07 15:05 ` [Qemu-devel] [PATCH 02/17] migration: fix ram_save_pending Vladimir Sementsov-Ogievskiy
` (16 subsequent siblings)
17 siblings, 0 replies; 40+ messages in thread
From: Vladimir Sementsov-Ogievskiy @ 2017-02-07 15:05 UTC (permalink / raw)
To: qemu-block, qemu-devel
Cc: pbonzini, armbru, eblake, famz, stefanha, amit.shah, quintela,
mreitz, kwolf, peter.maydell, dgilbert, den, jsnow, vsementsov,
lirans
Now postcopy-able states are recognized by not NULL
save_live_complete_postcopy handler. But when we have several different
postcopy-able states, it is not convenient. Ram postcopy may be
disabled, while some other postcopy enabled, in this case Ram state
should behave as it is not postcopy-able.
This patch add separate has_postcopy handler to specify behaviour of
savevm state.
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
---
include/migration/vmstate.h | 1 +
migration/ram.c | 6 ++++++
migration/savevm.c | 6 ++++--
3 files changed, 11 insertions(+), 2 deletions(-)
diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h
index 3bbe3ed..1eadcf0 100644
--- a/include/migration/vmstate.h
+++ b/include/migration/vmstate.h
@@ -46,6 +46,7 @@ typedef struct SaveVMHandlers {
/* This runs both outside and inside the iothread lock. */
bool (*is_active)(void *opaque);
+ bool (*has_postcopy)(void *opaque);
/* This runs outside the iothread lock in the migration case, and
* within the lock in the savevm case. The callback had better only
diff --git a/migration/ram.c b/migration/ram.c
index ef8fadf..c688866 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -2589,10 +2589,16 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
return ret;
}
+static bool ram_has_postcopy(void *opaque)
+{
+ return migrate_postcopy_ram();
+}
+
static SaveVMHandlers savevm_ram_handlers = {
.save_live_setup = ram_save_setup,
.save_live_iterate = ram_save_iterate,
.save_live_complete_postcopy = ram_save_complete,
+ .has_postcopy = ram_has_postcopy,
.save_live_complete_precopy = ram_save_complete,
.save_live_pending = ram_save_pending,
.load_state = ram_load,
diff --git a/migration/savevm.c b/migration/savevm.c
index 204012e..5235833 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -1018,7 +1018,8 @@ int qemu_savevm_state_iterate(QEMUFile *f, bool postcopy)
* call that's already run, it might get confused if we call
* iterate afterwards.
*/
- if (postcopy && !se->ops->save_live_complete_postcopy) {
+ if (postcopy &&
+ !(se->ops->has_postcopy && se->ops->has_postcopy(se->opaque))) {
continue;
}
if (qemu_file_rate_limit(f)) {
@@ -1106,7 +1107,8 @@ void qemu_savevm_state_complete_precopy(QEMUFile *f, bool iterable_only)
QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
if (!se->ops ||
- (in_postcopy && se->ops->save_live_complete_postcopy) ||
+ (in_postcopy && se->ops->has_postcopy &&
+ se->ops->has_postcopy(se->opaque)) ||
(in_postcopy && !iterable_only) ||
!se->ops->save_live_complete_precopy) {
continue;
--
1.8.3.1
^ permalink raw reply related [flat|nested] 40+ messages in thread
* [Qemu-devel] [PATCH 02/17] migration: fix ram_save_pending
2017-02-07 15:05 [Qemu-devel] [PATCH v5 00/17] Dirty bitmaps postcopy migration Vladimir Sementsov-Ogievskiy
2017-02-07 15:05 ` [Qemu-devel] [PATCH 01/17] migration: add has_postcopy savevm handler Vladimir Sementsov-Ogievskiy
@ 2017-02-07 15:05 ` Vladimir Sementsov-Ogievskiy
2017-02-07 15:05 ` [Qemu-devel] [PATCH 03/17] migration: split common postcopy out of ram postcopy Vladimir Sementsov-Ogievskiy
` (15 subsequent siblings)
17 siblings, 0 replies; 40+ messages in thread
From: Vladimir Sementsov-Ogievskiy @ 2017-02-07 15:05 UTC (permalink / raw)
To: qemu-block, qemu-devel
Cc: pbonzini, armbru, eblake, famz, stefanha, amit.shah, quintela,
mreitz, kwolf, peter.maydell, dgilbert, den, jsnow, vsementsov,
lirans
Fill postcopy-able pending only if ram postcopy is enabled.
It is necessary because of there will be other postcopy-able states and
when ram postcopy is disabled, it should not spoil common postcopy
related pending.
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
---
migration/ram.c | 8 ++++++--
1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/migration/ram.c b/migration/ram.c
index c688866..5d0bbfd 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -2091,8 +2091,12 @@ static void ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size,
remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE;
}
- /* We can do postcopy, and all the data is postcopiable */
- *postcopiable_pending += remaining_size;
+ if (migrate_postcopy_ram()) {
+ /* We can do postcopy, and all the data is postcopiable */
+ *postcopiable_pending += remaining_size;
+ } else {
+ *non_postcopiable_pending += remaining_size;
+ }
}
static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)
--
1.8.3.1
^ permalink raw reply related [flat|nested] 40+ messages in thread
* [Qemu-devel] [PATCH 03/17] migration: split common postcopy out of ram postcopy
2017-02-07 15:05 [Qemu-devel] [PATCH v5 00/17] Dirty bitmaps postcopy migration Vladimir Sementsov-Ogievskiy
2017-02-07 15:05 ` [Qemu-devel] [PATCH 01/17] migration: add has_postcopy savevm handler Vladimir Sementsov-Ogievskiy
2017-02-07 15:05 ` [Qemu-devel] [PATCH 02/17] migration: fix ram_save_pending Vladimir Sementsov-Ogievskiy
@ 2017-02-07 15:05 ` Vladimir Sementsov-Ogievskiy
2017-02-07 15:05 ` [Qemu-devel] [PATCH 04/17] migration: introduce postcopy-only pending Vladimir Sementsov-Ogievskiy
` (14 subsequent siblings)
17 siblings, 0 replies; 40+ messages in thread
From: Vladimir Sementsov-Ogievskiy @ 2017-02-07 15:05 UTC (permalink / raw)
To: qemu-block, qemu-devel
Cc: pbonzini, armbru, eblake, famz, stefanha, amit.shah, quintela,
mreitz, kwolf, peter.maydell, dgilbert, den, jsnow, vsementsov,
lirans
Split common postcopy staff from ram postcopy staff.
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
---
include/migration/migration.h | 1 +
migration/migration.c | 39 ++++++++++++++++++++++++-----------
migration/postcopy-ram.c | 4 +++-
migration/savevm.c | 48 +++++++++++++++++++++++++++++++++++--------
4 files changed, 70 insertions(+), 22 deletions(-)
diff --git a/include/migration/migration.h b/include/migration/migration.h
index af9135f..3aa228c 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -305,6 +305,7 @@ int migrate_add_blocker(Error *reason, Error **errp);
*/
void migrate_del_blocker(Error *reason);
+bool migrate_postcopy(void);
bool migrate_postcopy_ram(void);
bool migrate_zero_blocks(void);
diff --git a/migration/migration.c b/migration/migration.c
index 2766d2f..0a5fd38 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -1310,6 +1310,11 @@ bool migrate_postcopy_ram(void)
return s->enabled_capabilities[MIGRATION_CAPABILITY_POSTCOPY_RAM];
}
+bool migrate_postcopy(void)
+{
+ return migrate_postcopy_ram();
+}
+
bool migrate_auto_converge(void)
{
MigrationState *s;
@@ -1637,9 +1642,11 @@ static int postcopy_start(MigrationState *ms, bool *old_vm_running)
* need to tell the destination to throw any pages it's already received
* that are dirty
*/
- if (ram_postcopy_send_discard_bitmap(ms)) {
- error_report("postcopy send discard bitmap failed");
- goto fail;
+ if (migrate_postcopy_ram()) {
+ if (ram_postcopy_send_discard_bitmap(ms)) {
+ error_report("postcopy send discard bitmap failed");
+ goto fail;
+ }
}
/*
@@ -1648,8 +1655,10 @@ static int postcopy_start(MigrationState *ms, bool *old_vm_running)
* wrap their state up here
*/
qemu_file_set_rate_limit(ms->to_dst_file, INT64_MAX);
- /* Ping just for debugging, helps line traces up */
- qemu_savevm_send_ping(ms->to_dst_file, 2);
+ if (migrate_postcopy_ram()) {
+ /* Ping just for debugging, helps line traces up */
+ qemu_savevm_send_ping(ms->to_dst_file, 2);
+ }
/*
* While loading the device state we may trigger page transfer
@@ -1674,7 +1683,9 @@ static int postcopy_start(MigrationState *ms, bool *old_vm_running)
qemu_savevm_send_postcopy_listen(fb);
qemu_savevm_state_complete_precopy(fb, false);
- qemu_savevm_send_ping(fb, 3);
+ if (migrate_postcopy_ram()) {
+ qemu_savevm_send_ping(fb, 3);
+ }
qemu_savevm_send_postcopy_run(fb);
@@ -1697,11 +1708,13 @@ static int postcopy_start(MigrationState *ms, bool *old_vm_running)
qemu_mutex_unlock_iothread();
- /*
- * Although this ping is just for debug, it could potentially be
- * used for getting a better measurement of downtime at the source.
- */
- qemu_savevm_send_ping(ms->to_dst_file, 4);
+ if (migrate_postcopy_ram()) {
+ /*
+ * Although this ping is just for debug, it could potentially be
+ * used for getting a better measurement of downtime at the source.
+ */
+ qemu_savevm_send_ping(ms->to_dst_file, 4);
+ }
ret = qemu_file_get_error(ms->to_dst_file);
if (ret) {
@@ -1857,7 +1870,9 @@ static void *migration_thread(void *opaque)
/* And do a ping that will make stuff easier to debug */
qemu_savevm_send_ping(s->to_dst_file, 1);
+ }
+ if (migrate_postcopy()) {
/*
* Tell the destination that we *might* want to do postcopy later;
* if the other end can't do postcopy it should fail now, nice and
@@ -1891,7 +1906,7 @@ static void *migration_thread(void *opaque)
if (pending_size && pending_size >= max_size) {
/* Still a significant amount to transfer */
- if (migrate_postcopy_ram() &&
+ if (migrate_postcopy() &&
s->state != MIGRATION_STATUS_POSTCOPY_ACTIVE &&
pend_nonpost <= max_size &&
atomic_read(&s->start_postcopy)) {
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index a40dddb..aef5690 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -339,7 +339,9 @@ int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis)
}
postcopy_state_set(POSTCOPY_INCOMING_END);
- migrate_send_rp_shut(mis, qemu_file_get_error(mis->from_src_file) != 0);
+ if (migrate_postcopy_ram()) {
+ migrate_send_rp_shut(mis, qemu_file_get_error(mis->from_src_file) != 0);
+ }
if (mis->postcopy_tmp_page) {
munmap(mis->postcopy_tmp_page, getpagesize());
diff --git a/migration/savevm.c b/migration/savevm.c
index 5235833..965a58c 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -73,7 +73,7 @@ static struct mig_cmd_args {
[MIG_CMD_INVALID] = { .len = -1, .name = "INVALID" },
[MIG_CMD_OPEN_RETURN_PATH] = { .len = 0, .name = "OPEN_RETURN_PATH" },
[MIG_CMD_PING] = { .len = sizeof(uint32_t), .name = "PING" },
- [MIG_CMD_POSTCOPY_ADVISE] = { .len = 16, .name = "POSTCOPY_ADVISE" },
+ [MIG_CMD_POSTCOPY_ADVISE] = { .len = -1, .name = "POSTCOPY_ADVISE" },
[MIG_CMD_POSTCOPY_LISTEN] = { .len = 0, .name = "POSTCOPY_LISTEN" },
[MIG_CMD_POSTCOPY_RUN] = { .len = 0, .name = "POSTCOPY_RUN" },
[MIG_CMD_POSTCOPY_RAM_DISCARD] = {
@@ -82,6 +82,23 @@ static struct mig_cmd_args {
[MIG_CMD_MAX] = { .len = -1, .name = "MAX" },
};
+/* Note for MIG_CMD_POSTCOPY_ADVISE:
+ * The format of arguments is depending on postcopy mode:
+ * - postcopy RAM only
+ * uint64_t host page size
+ * uint64_t taget page size
+ *
+ * - postcopy RAM and postcopy dirty bitmaps
+ * format is the same as for postcopy RAM only
+ *
+ * - postcopy dirty bitmaps only
+ * Nothing. Command length field is 0.
+ *
+ * Be careful: adding a new postcopy entity with some other parameters should
+ * not break format self-description ability. Good way is to introduce some
+ * generic extendable format with an exception for two old entities.
+ */
+
static int announce_self_create(uint8_t *buf,
uint8_t *mac_addr)
{
@@ -856,12 +873,17 @@ int qemu_savevm_send_packaged(QEMUFile *f, const uint8_t *buf, size_t len)
/* Send prior to any postcopy transfer */
void qemu_savevm_send_postcopy_advise(QEMUFile *f)
{
- uint64_t tmp[2];
- tmp[0] = cpu_to_be64(getpagesize());
- tmp[1] = cpu_to_be64(1ul << qemu_target_page_bits());
+ if (migrate_postcopy_ram()) {
+ uint64_t tmp[2];
+ tmp[0] = cpu_to_be64(getpagesize());
+ tmp[1] = cpu_to_be64(1ul << qemu_target_page_bits());
- trace_qemu_savevm_send_postcopy_advise();
- qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_ADVISE, 16, (uint8_t *)tmp);
+ trace_qemu_savevm_send_postcopy_advise();
+ qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_ADVISE,
+ 16, (uint8_t *)tmp);
+ } else {
+ qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_ADVISE, 0, NULL);
+ }
}
/* Sent prior to starting the destination running in postcopy, discard pages
@@ -1344,6 +1366,10 @@ static int loadvm_postcopy_handle_advise(MigrationIncomingState *mis)
return -1;
}
+ if (!migrate_postcopy_ram()) {
+ return 0;
+ }
+
if (!postcopy_ram_supported_by_host()) {
return -1;
}
@@ -1544,7 +1570,9 @@ static int loadvm_postcopy_handle_listen(MigrationIncomingState *mis)
* A rare case, we entered listen without having to do any discards,
* so do the setup that's normally done at the time of the 1st discard.
*/
- postcopy_ram_prepare_discard(mis);
+ if (migrate_postcopy_ram()) {
+ postcopy_ram_prepare_discard(mis);
+ }
}
/*
@@ -1552,8 +1580,10 @@ static int loadvm_postcopy_handle_listen(MigrationIncomingState *mis)
* However, at this point the CPU shouldn't be running, and the IO
* shouldn't be doing anything yet so don't actually expect requests
*/
- if (postcopy_ram_enable_notify(mis)) {
- return -1;
+ if (migrate_postcopy_ram()) {
+ if (postcopy_ram_enable_notify(mis)) {
+ return -1;
+ }
}
if (mis->have_listen_thread) {
--
1.8.3.1
^ permalink raw reply related [flat|nested] 40+ messages in thread
* [Qemu-devel] [PATCH 04/17] migration: introduce postcopy-only pending
2017-02-07 15:05 [Qemu-devel] [PATCH v5 00/17] Dirty bitmaps postcopy migration Vladimir Sementsov-Ogievskiy
` (2 preceding siblings ...)
2017-02-07 15:05 ` [Qemu-devel] [PATCH 03/17] migration: split common postcopy out of ram postcopy Vladimir Sementsov-Ogievskiy
@ 2017-02-07 15:05 ` Vladimir Sementsov-Ogievskiy
2017-02-07 15:05 ` [Qemu-devel] [PATCH 05/17] block: add bdrv_next_dirty_bitmap() Vladimir Sementsov-Ogievskiy
` (13 subsequent siblings)
17 siblings, 0 replies; 40+ messages in thread
From: Vladimir Sementsov-Ogievskiy @ 2017-02-07 15:05 UTC (permalink / raw)
To: qemu-block, qemu-devel
Cc: pbonzini, armbru, eblake, famz, stefanha, amit.shah, quintela,
mreitz, kwolf, peter.maydell, dgilbert, den, jsnow, vsementsov,
lirans
There would be savevm states (dirty-bitmap) which can migrate only in
postcopy stage. The corresponding pending is introduced here.
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
---
include/migration/vmstate.h | 17 +++++++++++++++--
include/sysemu/sysemu.h | 5 +++--
migration/block.c | 7 ++++---
migration/migration.c | 15 ++++++++-------
migration/ram.c | 9 +++++----
migration/savevm.c | 14 ++++++++------
migration/trace-events | 2 +-
7 files changed, 44 insertions(+), 25 deletions(-)
diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h
index 1eadcf0..da62a73 100644
--- a/include/migration/vmstate.h
+++ b/include/migration/vmstate.h
@@ -58,8 +58,21 @@ typedef struct SaveVMHandlers {
/* This runs outside the iothread lock! */
int (*save_live_setup)(QEMUFile *f, void *opaque);
void (*save_live_pending)(QEMUFile *f, void *opaque, uint64_t max_size,
- uint64_t *non_postcopiable_pending,
- uint64_t *postcopiable_pending);
+ uint64_t *res_precopy_only,
+ uint64_t *res_compatible,
+ uint64_t *res_postcopy_only);
+ /* Note for save_live_pending:
+ * - res_precopy_only is for data which must be migrated in precopy phase
+ * or in stopped state, in other words - before target vm start
+ * - res_compatible is for data which may be migrated in any phase
+ * - res_postcopy_only is for data which must be migrated in postcopy phase
+ * or in stopped state, in other words - after source vm stop
+ *
+ * Sum of res_postcopy_only, res_compatible and res_postcopy_only is the
+ * whole amount of pending data.
+ */
+
+
LoadStateHandler *load_state;
} SaveVMHandlers;
diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
index 4d50694..5d59d35 100644
--- a/include/sysemu/sysemu.h
+++ b/include/sysemu/sysemu.h
@@ -111,8 +111,9 @@ void qemu_savevm_state_cleanup(void);
void qemu_savevm_state_complete_postcopy(QEMUFile *f);
void qemu_savevm_state_complete_precopy(QEMUFile *f, bool iterable_only);
void qemu_savevm_state_pending(QEMUFile *f, uint64_t max_size,
- uint64_t *res_non_postcopiable,
- uint64_t *res_postcopiable);
+ uint64_t *res_precopy_only,
+ uint64_t *res_compatible,
+ uint64_t *res_postcopy_only);
void qemu_savevm_command_send(QEMUFile *f, enum qemu_vm_cmd command,
uint16_t len, uint8_t *data);
void qemu_savevm_send_ping(QEMUFile *f, uint32_t value);
diff --git a/migration/block.c b/migration/block.c
index ebc10e6..83b0785 100644
--- a/migration/block.c
+++ b/migration/block.c
@@ -830,8 +830,9 @@ static int block_save_complete(QEMUFile *f, void *opaque)
}
static void block_save_pending(QEMUFile *f, void *opaque, uint64_t max_size,
- uint64_t *non_postcopiable_pending,
- uint64_t *postcopiable_pending)
+ uint64_t *res_precopy_only,
+ uint64_t *res_compatible,
+ uint64_t *res_postcopy_only)
{
/* Estimate pending number of bytes to send */
uint64_t pending;
@@ -852,7 +853,7 @@ static void block_save_pending(QEMUFile *f, void *opaque, uint64_t max_size,
DPRINTF("Enter save live pending %" PRIu64 "\n", pending);
/* We don't do postcopy */
- *non_postcopiable_pending += pending;
+ *res_precopy_only += pending;
}
static int block_load(QEMUFile *f, void *opaque, int version_id)
diff --git a/migration/migration.c b/migration/migration.c
index 0a5fd38..2fb734e 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -1896,20 +1896,21 @@ static void *migration_thread(void *opaque)
uint64_t pending_size;
if (!qemu_file_rate_limit(s->to_dst_file)) {
- uint64_t pend_post, pend_nonpost;
+ uint64_t pend_pre, pend_compat, pend_post;
- qemu_savevm_state_pending(s->to_dst_file, max_size, &pend_nonpost,
- &pend_post);
- pending_size = pend_nonpost + pend_post;
+ qemu_savevm_state_pending(s->to_dst_file, max_size, &pend_pre,
+ &pend_compat, &pend_post);
+ pending_size = pend_pre + pend_compat + pend_post;
trace_migrate_pending(pending_size, max_size,
- pend_post, pend_nonpost);
+ pend_pre, pend_compat, pend_post);
if (pending_size && pending_size >= max_size) {
/* Still a significant amount to transfer */
if (migrate_postcopy() &&
s->state != MIGRATION_STATUS_POSTCOPY_ACTIVE &&
- pend_nonpost <= max_size &&
- atomic_read(&s->start_postcopy)) {
+ pend_pre <= max_size &&
+ (atomic_read(&s->start_postcopy) ||
+ (pend_pre + pend_compat <= max_size))) {
if (!postcopy_start(s, &old_vm_running)) {
current_active_state = MIGRATION_STATUS_POSTCOPY_ACTIVE;
diff --git a/migration/ram.c b/migration/ram.c
index 5d0bbfd..3b60010 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -2074,8 +2074,9 @@ static int ram_save_complete(QEMUFile *f, void *opaque)
}
static void ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size,
- uint64_t *non_postcopiable_pending,
- uint64_t *postcopiable_pending)
+ uint64_t *res_precopy_only,
+ uint64_t *res_compatible,
+ uint64_t *res_postcopy_only)
{
uint64_t remaining_size;
@@ -2093,9 +2094,9 @@ static void ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size,
if (migrate_postcopy_ram()) {
/* We can do postcopy, and all the data is postcopiable */
- *postcopiable_pending += remaining_size;
+ *res_compatible += remaining_size;
} else {
- *non_postcopiable_pending += remaining_size;
+ *res_precopy_only += remaining_size;
}
}
diff --git a/migration/savevm.c b/migration/savevm.c
index 965a58c..d1bab59 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -1209,13 +1209,15 @@ void qemu_savevm_state_complete_precopy(QEMUFile *f, bool iterable_only)
* for units that can't do postcopy.
*/
void qemu_savevm_state_pending(QEMUFile *f, uint64_t max_size,
- uint64_t *res_non_postcopiable,
- uint64_t *res_postcopiable)
+ uint64_t *res_precopy_only,
+ uint64_t *res_compatible,
+ uint64_t *res_postcopy_only)
{
SaveStateEntry *se;
- *res_non_postcopiable = 0;
- *res_postcopiable = 0;
+ *res_precopy_only = 0;
+ *res_compatible = 0;
+ *res_postcopy_only = 0;
QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
@@ -1227,8 +1229,8 @@ void qemu_savevm_state_pending(QEMUFile *f, uint64_t max_size,
continue;
}
}
- se->ops->save_live_pending(f, se->opaque, max_size,
- res_non_postcopiable, res_postcopiable);
+ se->ops->save_live_pending(f, se->opaque, max_size, res_precopy_only,
+ res_compatible, res_postcopy_only);
}
}
diff --git a/migration/trace-events b/migration/trace-events
index fa660e3..0212929 100644
--- a/migration/trace-events
+++ b/migration/trace-events
@@ -80,7 +80,7 @@ migrate_fd_cleanup(void) ""
migrate_fd_error(const char *error_desc) "error=%s"
migrate_fd_cancel(void) ""
migrate_handle_rp_req_pages(const char *rbname, size_t start, size_t len) "in %s at %zx len %zx"
-migrate_pending(uint64_t size, uint64_t max, uint64_t post, uint64_t nonpost) "pending size %" PRIu64 " max %" PRIu64 " (post=%" PRIu64 " nonpost=%" PRIu64 ")"
+migrate_pending(uint64_t size, uint64_t max, uint64_t pre, uint64_t compat, uint64_t post) "pending size %" PRIu64 " max %" PRIu64 " (pre = %" PRIu64 " compat=%" PRIu64 " post=%" PRIu64 ")"
migrate_send_rp_message(int msg_type, uint16_t len) "%d: len %d"
migration_completion_file_err(void) ""
migration_completion_postcopy_end(void) ""
--
1.8.3.1
^ permalink raw reply related [flat|nested] 40+ messages in thread
* [Qemu-devel] [PATCH 05/17] block: add bdrv_next_dirty_bitmap()
2017-02-07 15:05 [Qemu-devel] [PATCH v5 00/17] Dirty bitmaps postcopy migration Vladimir Sementsov-Ogievskiy
` (3 preceding siblings ...)
2017-02-07 15:05 ` [Qemu-devel] [PATCH 04/17] migration: introduce postcopy-only pending Vladimir Sementsov-Ogievskiy
@ 2017-02-07 15:05 ` Vladimir Sementsov-Ogievskiy
2017-02-07 15:05 ` [Qemu-devel] [PATCH 06/17] block: add bdrv_dirty_bitmap_enable_successor() Vladimir Sementsov-Ogievskiy
` (12 subsequent siblings)
17 siblings, 0 replies; 40+ messages in thread
From: Vladimir Sementsov-Ogievskiy @ 2017-02-07 15:05 UTC (permalink / raw)
To: qemu-block, qemu-devel
Cc: pbonzini, armbru, eblake, famz, stefanha, amit.shah, quintela,
mreitz, kwolf, peter.maydell, dgilbert, den, jsnow, vsementsov,
lirans
Like bdrv_next() - bdrv_next_dirty_bitmap() is a function to provide
access to private dirty bitmaps list.
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Reviewed-by: John Snow <jsnow@redhat.com>
---
block/dirty-bitmap.c | 10 ++++++++++
include/block/dirty-bitmap.h | 3 +++
2 files changed, 13 insertions(+)
diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c
index 519737c..7547c78 100644
--- a/block/dirty-bitmap.c
+++ b/block/dirty-bitmap.c
@@ -533,3 +533,13 @@ int64_t bdrv_get_meta_dirty_count(BdrvDirtyBitmap *bitmap)
{
return hbitmap_count(bitmap->meta);
}
+
+BdrvDirtyBitmap *bdrv_next_dirty_bitmap(BlockDriverState *bs,
+ BdrvDirtyBitmap *bitmap)
+{
+ if (bitmap == NULL) {
+ return QLIST_FIRST(&bs->dirty_bitmaps);
+ }
+
+ return QLIST_NEXT(bitmap, list);
+}
diff --git a/include/block/dirty-bitmap.h b/include/block/dirty-bitmap.h
index 9dea14b..1a457d5 100644
--- a/include/block/dirty-bitmap.h
+++ b/include/block/dirty-bitmap.h
@@ -72,4 +72,7 @@ void bdrv_dirty_bitmap_deserialize_zeroes(BdrvDirtyBitmap *bitmap,
bool finish);
void bdrv_dirty_bitmap_deserialize_finish(BdrvDirtyBitmap *bitmap);
+BdrvDirtyBitmap *bdrv_next_dirty_bitmap(BlockDriverState *bs,
+ BdrvDirtyBitmap *bitmap);
+
#endif
--
1.8.3.1
^ permalink raw reply related [flat|nested] 40+ messages in thread
* [Qemu-devel] [PATCH 06/17] block: add bdrv_dirty_bitmap_enable_successor()
2017-02-07 15:05 [Qemu-devel] [PATCH v5 00/17] Dirty bitmaps postcopy migration Vladimir Sementsov-Ogievskiy
` (4 preceding siblings ...)
2017-02-07 15:05 ` [Qemu-devel] [PATCH 05/17] block: add bdrv_next_dirty_bitmap() Vladimir Sementsov-Ogievskiy
@ 2017-02-07 15:05 ` Vladimir Sementsov-Ogievskiy
2017-02-07 15:05 ` [Qemu-devel] [PATCH 07/17] qapi: add dirty-bitmaps migration capability Vladimir Sementsov-Ogievskiy
` (11 subsequent siblings)
17 siblings, 0 replies; 40+ messages in thread
From: Vladimir Sementsov-Ogievskiy @ 2017-02-07 15:05 UTC (permalink / raw)
To: qemu-block, qemu-devel
Cc: pbonzini, armbru, eblake, famz, stefanha, amit.shah, quintela,
mreitz, kwolf, peter.maydell, dgilbert, den, jsnow, vsementsov,
lirans
Enabling bitmap successor is necessary to enable successors of bitmaps
being migrated before target vm start.
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
---
block/dirty-bitmap.c | 5 +++++
include/block/dirty-bitmap.h | 1 +
2 files changed, 6 insertions(+)
diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c
index 7547c78..ab2c7f5 100644
--- a/block/dirty-bitmap.c
+++ b/block/dirty-bitmap.c
@@ -217,6 +217,11 @@ int bdrv_dirty_bitmap_create_successor(BlockDriverState *bs,
return 0;
}
+void bdrv_dirty_bitmap_enable_successor(BdrvDirtyBitmap *bitmap)
+{
+ bdrv_enable_dirty_bitmap(bitmap->successor);
+}
+
/**
* For a bitmap with a successor, yield our name to the successor,
* delete the old bitmap, and return a handle to the new bitmap.
diff --git a/include/block/dirty-bitmap.h b/include/block/dirty-bitmap.h
index 1a457d5..57b476c 100644
--- a/include/block/dirty-bitmap.h
+++ b/include/block/dirty-bitmap.h
@@ -20,6 +20,7 @@ BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState *bs,
BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState *bs,
BdrvDirtyBitmap *bitmap,
Error **errp);
+void bdrv_dirty_bitmap_enable_successor(BdrvDirtyBitmap *bitmap);
BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs,
const char *name);
void bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap);
--
1.8.3.1
^ permalink raw reply related [flat|nested] 40+ messages in thread
* [Qemu-devel] [PATCH 07/17] qapi: add dirty-bitmaps migration capability
2017-02-07 15:05 [Qemu-devel] [PATCH v5 00/17] Dirty bitmaps postcopy migration Vladimir Sementsov-Ogievskiy
` (5 preceding siblings ...)
2017-02-07 15:05 ` [Qemu-devel] [PATCH 06/17] block: add bdrv_dirty_bitmap_enable_successor() Vladimir Sementsov-Ogievskiy
@ 2017-02-07 15:05 ` Vladimir Sementsov-Ogievskiy
2017-02-07 15:05 ` [Qemu-devel] [PATCH 08/17] block/dirty-bitmap: add bdrv_dirty_bitmap_release_successor Vladimir Sementsov-Ogievskiy
` (10 subsequent siblings)
17 siblings, 0 replies; 40+ messages in thread
From: Vladimir Sementsov-Ogievskiy @ 2017-02-07 15:05 UTC (permalink / raw)
To: qemu-block, qemu-devel
Cc: pbonzini, armbru, eblake, famz, stefanha, amit.shah, quintela,
mreitz, kwolf, peter.maydell, dgilbert, den, jsnow, vsementsov,
lirans
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Reviewed-by: John Snow <jsnow@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
---
include/migration/migration.h | 1 +
migration/migration.c | 9 +++++++++
qapi-schema.json | 4 +++-
3 files changed, 13 insertions(+), 1 deletion(-)
diff --git a/include/migration/migration.h b/include/migration/migration.h
index 3aa228c..38eba2e 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -308,6 +308,7 @@ void migrate_del_blocker(Error *reason);
bool migrate_postcopy(void);
bool migrate_postcopy_ram(void);
bool migrate_zero_blocks(void);
+bool migrate_dirty_bitmaps(void);
bool migrate_auto_converge(void);
diff --git a/migration/migration.c b/migration/migration.c
index 2fb734e..64f88ac 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -1369,6 +1369,15 @@ int migrate_decompress_threads(void)
return s->parameters.decompress_threads;
}
+bool migrate_dirty_bitmaps(void)
+{
+ MigrationState *s;
+
+ s = migrate_get_current();
+
+ return s->enabled_capabilities[MIGRATION_CAPABILITY_DIRTY_BITMAPS];
+}
+
bool migrate_use_events(void)
{
MigrationState *s;
diff --git a/qapi-schema.json b/qapi-schema.json
index cbdffdd..5ae1d79 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -865,11 +865,13 @@
# side, this process is called COarse-Grain LOck Stepping (COLO) for
# Non-stop Service. (since 2.8)
#
+# @dirty-bitmaps: If enabled, QEMU will migrate named dirty bitmaps. (since 2.9)
+#
# Since: 1.2
##
{ 'enum': 'MigrationCapability',
'data': ['xbzrle', 'rdma-pin-all', 'auto-converge', 'zero-blocks',
- 'compress', 'events', 'postcopy-ram', 'x-colo'] }
+ 'compress', 'events', 'postcopy-ram', 'x-colo', 'dirty-bitmaps'] }
##
# @MigrationCapabilityStatus:
--
1.8.3.1
^ permalink raw reply related [flat|nested] 40+ messages in thread
* [Qemu-devel] [PATCH 08/17] block/dirty-bitmap: add bdrv_dirty_bitmap_release_successor
2017-02-07 15:05 [Qemu-devel] [PATCH v5 00/17] Dirty bitmaps postcopy migration Vladimir Sementsov-Ogievskiy
` (6 preceding siblings ...)
2017-02-07 15:05 ` [Qemu-devel] [PATCH 07/17] qapi: add dirty-bitmaps migration capability Vladimir Sementsov-Ogievskiy
@ 2017-02-07 15:05 ` Vladimir Sementsov-Ogievskiy
2017-02-07 15:05 ` [Qemu-devel] [PATCH 09/17] migration: include migrate_dirty_bitmaps in migrate_postcopy Vladimir Sementsov-Ogievskiy
` (9 subsequent siblings)
17 siblings, 0 replies; 40+ messages in thread
From: Vladimir Sementsov-Ogievskiy @ 2017-02-07 15:05 UTC (permalink / raw)
To: qemu-block, qemu-devel
Cc: pbonzini, armbru, eblake, famz, stefanha, amit.shah, quintela,
mreitz, kwolf, peter.maydell, dgilbert, den, jsnow, vsementsov,
lirans
To just release successor and unfreeze bitmap without any additional
work.
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Signed-off-by: Denis V. Lunev <den@openvz.org>
Reviewed-by: Max Reitz <mreitz@redhat.com>
---
block/dirty-bitmap.c | 10 ++++++++++
include/block/dirty-bitmap.h | 2 ++
2 files changed, 12 insertions(+)
diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c
index ab2c7f5..32aa6eb 100644
--- a/block/dirty-bitmap.c
+++ b/block/dirty-bitmap.c
@@ -274,6 +274,16 @@ BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState *bs,
return parent;
}
+void bdrv_dirty_bitmap_release_successor(BlockDriverState *bs,
+ BdrvDirtyBitmap *parent)
+{
+ BdrvDirtyBitmap *successor = parent->successor;
+
+ if (successor) {
+ bdrv_release_dirty_bitmap(bs, successor);
+ parent->successor = NULL;
+ }
+}
/**
* Truncates _all_ bitmaps attached to a BDS.
*/
diff --git a/include/block/dirty-bitmap.h b/include/block/dirty-bitmap.h
index 57b476c..20b3ec7 100644
--- a/include/block/dirty-bitmap.h
+++ b/include/block/dirty-bitmap.h
@@ -20,6 +20,8 @@ BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState *bs,
BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState *bs,
BdrvDirtyBitmap *bitmap,
Error **errp);
+void bdrv_dirty_bitmap_release_successor(BlockDriverState *bs,
+ BdrvDirtyBitmap *bitmap);
void bdrv_dirty_bitmap_enable_successor(BdrvDirtyBitmap *bitmap);
BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs,
const char *name);
--
1.8.3.1
^ permalink raw reply related [flat|nested] 40+ messages in thread
* [Qemu-devel] [PATCH 09/17] migration: include migrate_dirty_bitmaps in migrate_postcopy
2017-02-07 15:05 [Qemu-devel] [PATCH v5 00/17] Dirty bitmaps postcopy migration Vladimir Sementsov-Ogievskiy
` (7 preceding siblings ...)
2017-02-07 15:05 ` [Qemu-devel] [PATCH 08/17] block/dirty-bitmap: add bdrv_dirty_bitmap_release_successor Vladimir Sementsov-Ogievskiy
@ 2017-02-07 15:05 ` Vladimir Sementsov-Ogievskiy
2017-02-07 15:05 ` [Qemu-devel] [PATCH 10/17] migration/qemu-file: add qemu_put_counted_string() Vladimir Sementsov-Ogievskiy
` (8 subsequent siblings)
17 siblings, 0 replies; 40+ messages in thread
From: Vladimir Sementsov-Ogievskiy @ 2017-02-07 15:05 UTC (permalink / raw)
To: qemu-block, qemu-devel
Cc: pbonzini, armbru, eblake, famz, stefanha, amit.shah, quintela,
mreitz, kwolf, peter.maydell, dgilbert, den, jsnow, vsementsov,
lirans
Enable postcopy if dirty bitmap migration is endabled.
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
---
migration/migration.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/migration/migration.c b/migration/migration.c
index 64f88ac..4716efd 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -1312,7 +1312,7 @@ bool migrate_postcopy_ram(void)
bool migrate_postcopy(void)
{
- return migrate_postcopy_ram();
+ return migrate_postcopy_ram() || migrate_dirty_bitmaps();
}
bool migrate_auto_converge(void)
--
1.8.3.1
^ permalink raw reply related [flat|nested] 40+ messages in thread
* [Qemu-devel] [PATCH 10/17] migration/qemu-file: add qemu_put_counted_string()
2017-02-07 15:05 [Qemu-devel] [PATCH v5 00/17] Dirty bitmaps postcopy migration Vladimir Sementsov-Ogievskiy
` (8 preceding siblings ...)
2017-02-07 15:05 ` [Qemu-devel] [PATCH 09/17] migration: include migrate_dirty_bitmaps in migrate_postcopy Vladimir Sementsov-Ogievskiy
@ 2017-02-07 15:05 ` Vladimir Sementsov-Ogievskiy
2017-02-07 15:05 ` [Qemu-devel] [PATCH 11/17] migration: add is_active_iterate handler Vladimir Sementsov-Ogievskiy
` (7 subsequent siblings)
17 siblings, 0 replies; 40+ messages in thread
From: Vladimir Sementsov-Ogievskiy @ 2017-02-07 15:05 UTC (permalink / raw)
To: qemu-block, qemu-devel
Cc: pbonzini, armbru, eblake, famz, stefanha, amit.shah, quintela,
mreitz, kwolf, peter.maydell, dgilbert, den, jsnow, vsementsov,
lirans
Add function opposite to qemu_get_counted_string.
qemu_put_counted_string puts one-byte length of the string (string
should not be longer than 255 characters), and then it puts the string,
without last zero byte.
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Reviewed-by: John Snow <jsnow@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
---
include/migration/qemu-file.h | 2 ++
migration/qemu-file.c | 13 +++++++++++++
2 files changed, 15 insertions(+)
diff --git a/include/migration/qemu-file.h b/include/migration/qemu-file.h
index abedd46..d860c92 100644
--- a/include/migration/qemu-file.h
+++ b/include/migration/qemu-file.h
@@ -309,4 +309,6 @@ static inline void qemu_get_sbe64s(QEMUFile *f, int64_t *pv)
size_t qemu_get_counted_string(QEMUFile *f, char buf[256]);
+void qemu_put_counted_string(QEMUFile *f, const char *name);
+
#endif
diff --git a/migration/qemu-file.c b/migration/qemu-file.c
index e9fae31..8e928b2 100644
--- a/migration/qemu-file.c
+++ b/migration/qemu-file.c
@@ -691,6 +691,19 @@ size_t qemu_get_counted_string(QEMUFile *f, char buf[256])
}
/*
+ * Put a string with one preceding byte containing its length. The length of
+ * the string should be less than 256.
+ */
+void qemu_put_counted_string(QEMUFile *f, const char *str)
+{
+ size_t len = strlen(str);
+
+ assert(len < 256);
+ qemu_put_byte(f, len);
+ qemu_put_buffer(f, (const uint8_t *)str, len);
+}
+
+/*
* Set the blocking state of the QEMUFile.
* Note: On some transports the OS only keeps a single blocking state for
* both directions, and thus changing the blocking on the main
--
1.8.3.1
^ permalink raw reply related [flat|nested] 40+ messages in thread
* [Qemu-devel] [PATCH 11/17] migration: add is_active_iterate handler
2017-02-07 15:05 [Qemu-devel] [PATCH v5 00/17] Dirty bitmaps postcopy migration Vladimir Sementsov-Ogievskiy
` (9 preceding siblings ...)
2017-02-07 15:05 ` [Qemu-devel] [PATCH 10/17] migration/qemu-file: add qemu_put_counted_string() Vladimir Sementsov-Ogievskiy
@ 2017-02-07 15:05 ` Vladimir Sementsov-Ogievskiy
2017-02-07 15:05 ` [Qemu-devel] [PATCH 12/17] migration: add postcopy migration of dirty bitmaps Vladimir Sementsov-Ogievskiy
` (6 subsequent siblings)
17 siblings, 0 replies; 40+ messages in thread
From: Vladimir Sementsov-Ogievskiy @ 2017-02-07 15:05 UTC (permalink / raw)
To: qemu-block, qemu-devel
Cc: pbonzini, armbru, eblake, famz, stefanha, amit.shah, quintela,
mreitz, kwolf, peter.maydell, dgilbert, den, jsnow, vsementsov,
lirans
Only-postcopy savevm states (dirty-bitmap) don't need live iteration, so
to disable them and stop transporting empty sections there is a new
savevm handler.
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
---
include/migration/vmstate.h | 1 +
migration/savevm.c | 5 +++++
2 files changed, 6 insertions(+)
diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h
index da62a73..6049deb 100644
--- a/include/migration/vmstate.h
+++ b/include/migration/vmstate.h
@@ -47,6 +47,7 @@ typedef struct SaveVMHandlers {
/* This runs both outside and inside the iothread lock. */
bool (*is_active)(void *opaque);
bool (*has_postcopy)(void *opaque);
+ bool (*is_active_iterate)(void *opaque);
/* This runs outside the iothread lock in the migration case, and
* within the lock in the savevm case. The callback had better only
diff --git a/migration/savevm.c b/migration/savevm.c
index d1bab59..94783e5 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -1034,6 +1034,11 @@ int qemu_savevm_state_iterate(QEMUFile *f, bool postcopy)
continue;
}
}
+ if (se->ops && se->ops->is_active_iterate) {
+ if (!se->ops->is_active_iterate(se->opaque)) {
+ continue;
+ }
+ }
/*
* In the postcopy phase, any device that doesn't know how to
* do postcopy should have saved it's state in the _complete
--
1.8.3.1
^ permalink raw reply related [flat|nested] 40+ messages in thread
* [Qemu-devel] [PATCH 12/17] migration: add postcopy migration of dirty bitmaps
2017-02-07 15:05 [Qemu-devel] [PATCH v5 00/17] Dirty bitmaps postcopy migration Vladimir Sementsov-Ogievskiy
` (10 preceding siblings ...)
2017-02-07 15:05 ` [Qemu-devel] [PATCH 11/17] migration: add is_active_iterate handler Vladimir Sementsov-Ogievskiy
@ 2017-02-07 15:05 ` Vladimir Sementsov-Ogievskiy
2017-02-07 15:05 ` [Qemu-devel] [PATCH 13/17] iotests: add add_incoming_migration to VM class Vladimir Sementsov-Ogievskiy
` (5 subsequent siblings)
17 siblings, 0 replies; 40+ messages in thread
From: Vladimir Sementsov-Ogievskiy @ 2017-02-07 15:05 UTC (permalink / raw)
To: qemu-block, qemu-devel
Cc: pbonzini, armbru, eblake, famz, stefanha, amit.shah, quintela,
mreitz, kwolf, peter.maydell, dgilbert, den, jsnow, vsementsov,
lirans
Postcopy migration of dirty bitmaps. Only named dirty bitmaps,
associated with root nodes and non-root named nodes are migrated.
If destination qemu is already containing a dirty bitmap with the same name
as a migrated bitmap (for the same node), than, if their granularities are
the same the migration will be done, otherwise the error will be generated.
If destination qemu doesn't contain such bitmap it will be created.
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
---
include/migration/block.h | 1 +
include/migration/migration.h | 4 +
migration/Makefile.objs | 2 +-
migration/block-dirty-bitmap.c | 679 +++++++++++++++++++++++++++++++++++++++++
migration/migration.c | 3 +
migration/savevm.c | 2 +
migration/trace-events | 14 +
vl.c | 1 +
8 files changed, 705 insertions(+), 1 deletion(-)
create mode 100644 migration/block-dirty-bitmap.c
diff --git a/include/migration/block.h b/include/migration/block.h
index 41a1ac8..8333c43 100644
--- a/include/migration/block.h
+++ b/include/migration/block.h
@@ -14,6 +14,7 @@
#ifndef MIGRATION_BLOCK_H
#define MIGRATION_BLOCK_H
+void dirty_bitmap_mig_init(void);
void blk_mig_init(void);
int blk_mig_active(void);
uint64_t blk_mig_bytes_transferred(void);
diff --git a/include/migration/migration.h b/include/migration/migration.h
index 38eba2e..9915e2d 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -372,4 +372,8 @@ int ram_save_queue_pages(MigrationState *ms, const char *rbname,
PostcopyState postcopy_state_get(void);
/* Set the state and return the old state */
PostcopyState postcopy_state_set(PostcopyState new_state);
+
+void dirty_bitmap_mig_before_vm_start(void);
+void init_dirty_bitmap_incoming_migration(void);
+
#endif
diff --git a/migration/Makefile.objs b/migration/Makefile.objs
index 480dd49..fa3bf6a 100644
--- a/migration/Makefile.objs
+++ b/migration/Makefile.objs
@@ -9,5 +9,5 @@ common-obj-y += qjson.o
common-obj-$(CONFIG_RDMA) += rdma.o
-common-obj-y += block.o
+common-obj-y += block.o block-dirty-bitmap.o
diff --git a/migration/block-dirty-bitmap.c b/migration/block-dirty-bitmap.c
new file mode 100644
index 0000000..28e3732
--- /dev/null
+++ b/migration/block-dirty-bitmap.c
@@ -0,0 +1,679 @@
+/*
+ * Block dirty bitmap postcopy migration
+ *
+ * Copyright IBM, Corp. 2009
+ * Copyright (C) 2016 Parallels IP Holdings GmbH. All rights reserved.
+ *
+ * Authors:
+ * Liran Schour <lirans@il.ibm.com>
+ * Vladimir Sementsov-Ogievskiy <vsementsov@parallels.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ * This file is derived from migration/block.c, so it's author and IBM copyright
+ * are here, although content is quite different.
+ *
+ * Contributions after 2012-01-13 are licensed under the terms of the
+ * GNU GPL, version 2 or (at your option) any later version.
+ *
+ * ***
+ *
+ * Here postcopy migration of dirty bitmaps is realized. Only named dirty
+ * bitmaps, associated with root nodes and non-root named nodes are migrated.
+ *
+ * If destination qemu is already containing a dirty bitmap with the same name
+ * as a migrated bitmap (for the same node), then, if their granularities are
+ * the same the migration will be done, otherwise the error will be generated.
+ *
+ * If destination qemu doesn't contain such bitmap it will be created.
+ *
+ * format of migration:
+ *
+ * # Header (shared for different chunk types)
+ * 1, 2 or 4 bytes: flags (see qemu_{put,put}_flags)
+ * [ 1 byte: node name size ] \ flags & DEVICE_NAME
+ * [ n bytes: node name ] /
+ * [ 1 byte: bitmap name size ] \ flags & BITMAP_NAME
+ * [ n bytes: bitmap name ] /
+ *
+ * # Start of bitmap migration (flags & START)
+ * header
+ * be64: granularity
+ * 1 byte: bitmap enabled flag
+ *
+ * # Complete of bitmap migration (flags & COMPLETE)
+ * header
+ *
+ * # Data chunk of bitmap migration
+ * header
+ * be64: start sector
+ * be32: number of sectors
+ * [ be64: buffer size ] \ ! (flags & ZEROES)
+ * [ n bytes: buffer ] /
+ *
+ * The last chunk in stream should contain flags & EOS. The chunk may skip
+ * device and/or bitmap names, assuming them to be the same with the previous
+ * chunk.
+ */
+
+#include "qemu/osdep.h"
+#include "block/block.h"
+#include "block/block_int.h"
+#include "sysemu/block-backend.h"
+#include "qemu/main-loop.h"
+#include "qemu/error-report.h"
+#include "migration/block.h"
+#include "migration/migration.h"
+#include "qemu/hbitmap.h"
+#include "sysemu/sysemu.h"
+#include "qemu/cutils.h"
+#include "qapi/error.h"
+#include "trace.h"
+#include <assert.h>
+
+#define CHUNK_SIZE (1 << 10)
+
+/* Flags occupy from one to four bytes. In all but one the 7-th (EXTRA_FLAGS)
+ * bit should be set. */
+#define DIRTY_BITMAP_MIG_FLAG_EOS 0x01
+#define DIRTY_BITMAP_MIG_FLAG_ZEROES 0x02
+#define DIRTY_BITMAP_MIG_FLAG_BITMAP_NAME 0x04
+#define DIRTY_BITMAP_MIG_FLAG_DEVICE_NAME 0x08
+#define DIRTY_BITMAP_MIG_FLAG_START 0x10
+#define DIRTY_BITMAP_MIG_FLAG_COMPLETE 0x20
+#define DIRTY_BITMAP_MIG_FLAG_BITS 0x40
+
+#define DIRTY_BITMAP_MIG_EXTRA_FLAGS 0x80
+#define DIRTY_BITMAP_MIG_FLAGS_SIZE_16 0x8000
+#define DIRTY_BITMAP_MIG_FLAGS_SIZE_32 0x8080
+
+#define DEBUG_DIRTY_BITMAP_MIGRATION 0
+
+typedef struct DirtyBitmapMigBitmapState {
+ /* Written during setup phase. */
+ BlockDriverState *bs;
+ const char *node_name;
+ BdrvDirtyBitmap *bitmap;
+ uint64_t total_sectors;
+ uint64_t sectors_per_chunk;
+ QSIMPLEQ_ENTRY(DirtyBitmapMigBitmapState) entry;
+
+ /* For bulk phase. */
+ bool bulk_completed;
+ uint64_t cur_sector;
+} DirtyBitmapMigBitmapState;
+
+typedef struct DirtyBitmapMigState {
+ QSIMPLEQ_HEAD(dbms_list, DirtyBitmapMigBitmapState) dbms_list;
+
+ bool bulk_completed;
+
+ /* for send_bitmap_bits() */
+ BlockDriverState *prev_bs;
+ BdrvDirtyBitmap *prev_bitmap;
+} DirtyBitmapMigState;
+
+typedef struct DirtyBitmapLoadState {
+ uint32_t flags;
+ char node_name[256];
+ char bitmap_name[256];
+ BlockDriverState *bs;
+ BdrvDirtyBitmap *bitmap;
+} DirtyBitmapLoadState;
+
+static DirtyBitmapMigState dirty_bitmap_mig_state;
+
+typedef struct DirtyBitmapLoadBitmapState {
+ BlockDriverState *bs;
+ BdrvDirtyBitmap *bitmap;
+ bool migrated;
+} DirtyBitmapLoadBitmapState;
+static GSList *enabled_bitmaps;
+QemuMutex finish_lock;
+
+void init_dirty_bitmap_incoming_migration(void)
+{
+ qemu_mutex_init(&finish_lock);
+}
+
+static uint32_t qemu_get_bitmap_flags(QEMUFile *f)
+{
+ uint8_t flags = qemu_get_byte(f);
+ if (flags & DIRTY_BITMAP_MIG_EXTRA_FLAGS) {
+ flags = flags << 8 | qemu_get_byte(f);
+ if (flags & DIRTY_BITMAP_MIG_EXTRA_FLAGS) {
+ flags = flags << 16 | qemu_get_be16(f);
+ }
+ }
+
+ return flags;
+}
+
+static void qemu_put_bitmap_flags(QEMUFile *f, uint32_t flags)
+{
+ if (!(flags & 0xffffff00)) {
+ qemu_put_byte(f, flags);
+ return;
+ }
+
+ if (!(flags & 0xffff0000)) {
+ qemu_put_be16(f, flags | DIRTY_BITMAP_MIG_FLAGS_SIZE_16);
+ return;
+ }
+
+ qemu_put_be32(f, flags | DIRTY_BITMAP_MIG_FLAGS_SIZE_32);
+}
+
+static void send_bitmap_header(QEMUFile *f, DirtyBitmapMigBitmapState *dbms,
+ uint32_t additional_flags)
+{
+ BlockDriverState *bs = dbms->bs;
+ BdrvDirtyBitmap *bitmap = dbms->bitmap;
+ uint32_t flags = additional_flags;
+ trace_send_bitmap_header_enter();
+
+ if (bs != dirty_bitmap_mig_state.prev_bs) {
+ dirty_bitmap_mig_state.prev_bs = bs;
+ flags |= DIRTY_BITMAP_MIG_FLAG_DEVICE_NAME;
+ }
+
+ if (bitmap != dirty_bitmap_mig_state.prev_bitmap) {
+ dirty_bitmap_mig_state.prev_bitmap = bitmap;
+ flags |= DIRTY_BITMAP_MIG_FLAG_BITMAP_NAME;
+ }
+
+ qemu_put_bitmap_flags(f, flags);
+
+ if (flags & DIRTY_BITMAP_MIG_FLAG_DEVICE_NAME) {
+ qemu_put_counted_string(f, dbms->node_name);
+ }
+
+ if (flags & DIRTY_BITMAP_MIG_FLAG_BITMAP_NAME) {
+ qemu_put_counted_string(f, bdrv_dirty_bitmap_name(bitmap));
+ }
+}
+
+static void send_bitmap_start(QEMUFile *f, DirtyBitmapMigBitmapState *dbms)
+{
+ send_bitmap_header(f, dbms, DIRTY_BITMAP_MIG_FLAG_START);
+ qemu_put_be32(f, bdrv_dirty_bitmap_granularity(dbms->bitmap));
+ qemu_put_byte(f, bdrv_dirty_bitmap_enabled(dbms->bitmap));
+}
+
+static void send_bitmap_complete(QEMUFile *f, DirtyBitmapMigBitmapState *dbms)
+{
+ send_bitmap_header(f, dbms, DIRTY_BITMAP_MIG_FLAG_COMPLETE);
+}
+
+static void send_bitmap_bits(QEMUFile *f, DirtyBitmapMigBitmapState *dbms,
+ uint64_t start_sector, uint32_t nr_sectors)
+{
+ /* align for buffer_is_zero() */
+ uint64_t align = 4 * sizeof(long);
+ uint64_t unaligned_size =
+ bdrv_dirty_bitmap_serialization_size(dbms->bitmap,
+ start_sector, nr_sectors);
+ uint64_t buf_size = (unaligned_size + align - 1) & ~(align - 1);
+ uint8_t *buf = g_malloc0(buf_size);
+ uint32_t flags = DIRTY_BITMAP_MIG_FLAG_BITS;
+
+ bdrv_dirty_bitmap_serialize_part(dbms->bitmap, buf,
+ start_sector, nr_sectors);
+
+ if (buffer_is_zero(buf, buf_size)) {
+ g_free(buf);
+ buf = NULL;
+ flags |= DIRTY_BITMAP_MIG_FLAG_ZEROES;
+ }
+
+ trace_send_bitmap_bits(flags, start_sector, nr_sectors, buf_size);
+
+ send_bitmap_header(f, dbms, flags);
+
+ qemu_put_be64(f, start_sector);
+ qemu_put_be32(f, nr_sectors);
+
+ /* if a block is zero we need to flush here since the network
+ * bandwidth is now a lot higher than the storage device bandwidth.
+ * thus if we queue zero blocks we slow down the migration. */
+ if (flags & DIRTY_BITMAP_MIG_FLAG_ZEROES) {
+ qemu_fflush(f);
+ } else {
+ qemu_put_be64(f, buf_size);
+ qemu_put_buffer(f, buf, buf_size);
+ }
+
+ g_free(buf);
+}
+
+
+/* Called with iothread lock taken. */
+
+static void init_dirty_bitmap_migration(void)
+{
+ BlockDriverState *bs;
+ BdrvDirtyBitmap *bitmap;
+ DirtyBitmapMigBitmapState *dbms;
+ BdrvNextIterator it;
+ uint64_t total_bytes = 0;
+
+ dirty_bitmap_mig_state.bulk_completed = false;
+ dirty_bitmap_mig_state.prev_bs = NULL;
+ dirty_bitmap_mig_state.prev_bitmap = NULL;
+
+ for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
+ for (bitmap = bdrv_next_dirty_bitmap(bs, NULL); bitmap;
+ bitmap = bdrv_next_dirty_bitmap(bs, bitmap)) {
+ if (!bdrv_dirty_bitmap_name(bitmap)) {
+ continue;
+ }
+
+ if (!bdrv_get_device_or_node_name(bs)) {
+ /* not named non-root node */
+ continue;
+ }
+
+ dbms = g_new0(DirtyBitmapMigBitmapState, 1);
+ dbms->bs = bs;
+ dbms->node_name = bdrv_get_node_name(bs);
+ if (!dbms->node_name || dbms->node_name[0] == '\0') {
+ dbms->node_name = bdrv_get_device_name(bs);
+ }
+ dbms->bitmap = bitmap;
+ dbms->total_sectors = bdrv_nb_sectors(bs);
+ dbms->sectors_per_chunk = CHUNK_SIZE * 8 *
+ bdrv_dirty_bitmap_granularity(bitmap) >> BDRV_SECTOR_BITS;
+
+ total_bytes +=
+ bdrv_dirty_bitmap_serialization_size(bitmap,
+ 0, dbms->total_sectors);
+
+ QSIMPLEQ_INSERT_TAIL(&dirty_bitmap_mig_state.dbms_list,
+ dbms, entry);
+ }
+ }
+}
+
+/* Called with no lock taken. */
+static void bulk_phase_send_chunk(QEMUFile *f, DirtyBitmapMigBitmapState *dbms)
+{
+ uint32_t nr_sectors = MIN(dbms->total_sectors - dbms->cur_sector,
+ dbms->sectors_per_chunk);
+
+ send_bitmap_bits(f, dbms, dbms->cur_sector, nr_sectors);
+
+ dbms->cur_sector += nr_sectors;
+ if (dbms->cur_sector >= dbms->total_sectors) {
+ dbms->bulk_completed = true;
+ }
+}
+
+/* Called with no lock taken. */
+static void bulk_phase(QEMUFile *f, bool limit)
+{
+ DirtyBitmapMigBitmapState *dbms;
+
+ QSIMPLEQ_FOREACH(dbms, &dirty_bitmap_mig_state.dbms_list, entry) {
+ while (!dbms->bulk_completed) {
+ bulk_phase_send_chunk(f, dbms);
+ if (limit && qemu_file_rate_limit(f)) {
+ return;
+ }
+ }
+ }
+
+ dirty_bitmap_mig_state.bulk_completed = true;
+}
+
+/* Called with iothread lock taken. */
+static void dirty_bitmap_mig_cleanup(void)
+{
+ DirtyBitmapMigBitmapState *dbms;
+
+ while ((dbms = QSIMPLEQ_FIRST(&dirty_bitmap_mig_state.dbms_list)) != NULL) {
+ QSIMPLEQ_REMOVE_HEAD(&dirty_bitmap_mig_state.dbms_list, entry);
+ g_free(dbms);
+ }
+}
+
+/* for SaveVMHandlers */
+static void dirty_bitmap_migration_cleanup(void *opaque)
+{
+ dirty_bitmap_mig_cleanup();
+}
+
+static int dirty_bitmap_save_iterate(QEMUFile *f, void *opaque)
+{
+ trace_dirty_bitmap_save_iterate(
+ migration_in_postcopy(migrate_get_current()));
+
+ if (migration_in_postcopy(migrate_get_current()) &&
+ !dirty_bitmap_mig_state.bulk_completed) {
+ bulk_phase(f, true);
+ }
+
+ qemu_put_bitmap_flags(f, DIRTY_BITMAP_MIG_FLAG_EOS);
+
+ return dirty_bitmap_mig_state.bulk_completed;
+}
+
+/* Called with iothread lock taken. */
+
+static int dirty_bitmap_save_complete(QEMUFile *f, void *opaque)
+{
+ DirtyBitmapMigBitmapState *dbms;
+ trace_dirty_bitmap_save_complete_enter();
+
+ if (!dirty_bitmap_mig_state.bulk_completed) {
+ bulk_phase(f, false);
+ }
+
+ QSIMPLEQ_FOREACH(dbms, &dirty_bitmap_mig_state.dbms_list, entry) {
+ send_bitmap_complete(f, dbms);
+ }
+
+ qemu_put_bitmap_flags(f, DIRTY_BITMAP_MIG_FLAG_EOS);
+
+ trace_dirty_bitmap_save_complete_finish();
+
+ dirty_bitmap_mig_cleanup();
+ return 0;
+}
+
+static void dirty_bitmap_save_pending(QEMUFile *f, void *opaque,
+ uint64_t max_size,
+ uint64_t *res_precopy_only,
+ uint64_t *res_compatible,
+ uint64_t *res_postcopy_only)
+{
+ DirtyBitmapMigBitmapState *dbms;
+ uint64_t pending = 0;
+
+ qemu_mutex_lock_iothread();
+
+ QSIMPLEQ_FOREACH(dbms, &dirty_bitmap_mig_state.dbms_list, entry) {
+ uint64_t gran = bdrv_dirty_bitmap_granularity(dbms->bitmap);
+ uint64_t sectors = dbms->bulk_completed ? 0 :
+ dbms->total_sectors - dbms->cur_sector;
+
+ pending += (sectors * BDRV_SECTOR_SIZE + gran - 1) / gran;
+ }
+
+ qemu_mutex_unlock_iothread();
+
+ trace_dirty_bitmap_save_pending(pending, max_size);
+
+ *res_postcopy_only += pending;
+}
+
+/* First occurrence of this bitmap. It should be created if doesn't exist */
+static int dirty_bitmap_load_start(QEMUFile *f, DirtyBitmapLoadState *s)
+{
+ Error *local_err = NULL;
+ uint32_t granularity = qemu_get_be32(f);
+ bool enabled = qemu_get_byte(f);
+
+ if (!s->bitmap) {
+ s->bitmap = bdrv_create_dirty_bitmap(s->bs, granularity,
+ s->bitmap_name, &local_err);
+ if (!s->bitmap) {
+ error_report_err(local_err);
+ return -EINVAL;
+ }
+ } else {
+ uint32_t dest_granularity =
+ bdrv_dirty_bitmap_granularity(s->bitmap);
+ if (dest_granularity != granularity) {
+ fprintf(stderr,
+ "Error: "
+ "Migrated bitmap granularity (%" PRIu32 ") "
+ "doesn't match the destination bitmap '%s' "
+ "granularity (%" PRIu32 ")\n",
+ granularity,
+ bdrv_dirty_bitmap_name(s->bitmap),
+ dest_granularity);
+ return -EINVAL;
+ }
+ }
+
+ bdrv_disable_dirty_bitmap(s->bitmap);
+ if (enabled) {
+ DirtyBitmapLoadBitmapState *b;
+
+ bdrv_dirty_bitmap_create_successor(s->bs, s->bitmap, &local_err);
+ if (local_err) {
+ error_report_err(local_err);
+ return -EINVAL;
+ }
+
+ b = g_new(DirtyBitmapLoadBitmapState, 1);
+ b->bs = s->bs;
+ b->bitmap = s->bitmap;
+ b->migrated = false;
+ enabled_bitmaps = g_slist_prepend(enabled_bitmaps, b);
+ }
+
+ return 0;
+}
+
+void dirty_bitmap_mig_before_vm_start(void)
+{
+ GSList *item;
+
+ qemu_mutex_lock(&finish_lock);
+
+ for (item = enabled_bitmaps; item; item = g_slist_next(item)) {
+ DirtyBitmapLoadBitmapState *b = item->data;
+
+ if (b->migrated) {
+ bdrv_enable_dirty_bitmap(b->bitmap);
+ } else {
+ bdrv_dirty_bitmap_enable_successor(b->bitmap);
+ }
+
+ g_free(b);
+ }
+
+ g_slist_free(enabled_bitmaps);
+ enabled_bitmaps = NULL;
+
+ qemu_mutex_unlock(&finish_lock);
+}
+
+static void dirty_bitmap_load_complete(QEMUFile *f, DirtyBitmapLoadState *s)
+{
+ GSList *item;
+ trace_dirty_bitmap_load_complete();
+ bdrv_dirty_bitmap_deserialize_finish(s->bitmap);
+
+ qemu_mutex_lock(&finish_lock);
+
+ for (item = enabled_bitmaps; item; item = g_slist_next(item)) {
+ DirtyBitmapLoadBitmapState *b = item->data;
+
+ if (b->bitmap == s->bitmap) {
+ b->migrated = true;
+ }
+ }
+
+ if (bdrv_dirty_bitmap_frozen(s->bitmap)) {
+ if (enabled_bitmaps == NULL) {
+ /* in postcopy */
+ AioContext *aio_context = bdrv_get_aio_context(s->bs);
+ aio_context_acquire(aio_context);
+
+ bdrv_reclaim_dirty_bitmap(s->bs, s->bitmap, &error_abort);
+ bdrv_enable_dirty_bitmap(s->bitmap);
+
+ aio_context_release(aio_context);
+ } else {
+ /* target not started, successor is empty */
+ bdrv_dirty_bitmap_release_successor(s->bs, s->bitmap);
+ }
+ }
+
+ qemu_mutex_unlock(&finish_lock);
+}
+
+static int dirty_bitmap_load_bits(QEMUFile *f, DirtyBitmapLoadState *s)
+{
+ uint64_t first_sector = qemu_get_be64(f);
+ uint32_t nr_sectors = qemu_get_be32(f);
+ trace_dirty_bitmap_load_bits_enter(first_sector, nr_sectors);
+
+ if (s->flags & DIRTY_BITMAP_MIG_FLAG_ZEROES) {
+ trace_dirty_bitmap_load_bits_zeroes();
+ bdrv_dirty_bitmap_deserialize_zeroes(s->bitmap, first_sector,
+ nr_sectors, false);
+ } else {
+ uint8_t *buf;
+ uint64_t buf_size = qemu_get_be64(f);
+ uint64_t needed_size =
+ bdrv_dirty_bitmap_serialization_size(s->bitmap,
+ first_sector, nr_sectors);
+
+ if (needed_size > buf_size) {
+ fprintf(stderr,
+ "Error: Migrated bitmap granularity doesn't "
+ "match the destination bitmap '%s' granularity\n",
+ bdrv_dirty_bitmap_name(s->bitmap));
+ return -EINVAL;
+ }
+
+ buf = g_malloc(buf_size);
+ qemu_get_buffer(f, buf, buf_size);
+ bdrv_dirty_bitmap_deserialize_part(s->bitmap, buf,
+ first_sector,
+ nr_sectors, false);
+ g_free(buf);
+ }
+
+ return 0;
+}
+
+static int dirty_bitmap_load_header(QEMUFile *f, DirtyBitmapLoadState *s)
+{
+ Error *local_err = NULL;
+ s->flags = qemu_get_bitmap_flags(f);
+ trace_dirty_bitmap_load_header(s->flags);
+
+ if (s->flags & DIRTY_BITMAP_MIG_FLAG_DEVICE_NAME) {
+ if (!qemu_get_counted_string(f, s->node_name)) {
+ fprintf(stderr, "Unable to read node name string\n");
+ return -EINVAL;
+ }
+ s->bs = bdrv_lookup_bs(s->node_name, s->node_name, &local_err);
+ if (!s->bs) {
+ error_report("%s", error_get_pretty(local_err));
+ error_free(local_err);
+ return -EINVAL;
+ }
+ } else if (!s->bs) {
+ fprintf(stderr, "Error: block device name is not set\n");
+ return -EINVAL;
+ }
+
+ if (s->flags & DIRTY_BITMAP_MIG_FLAG_BITMAP_NAME) {
+ if (!qemu_get_counted_string(f, s->bitmap_name)) {
+ fprintf(stderr, "Unable to read node name string\n");
+ return -EINVAL;
+ }
+ s->bitmap = bdrv_find_dirty_bitmap(s->bs, s->bitmap_name);
+
+ /* bitmap may be NULL here, it wouldn't be an error if it is the
+ * first occurrence of the bitmap */
+ if (!s->bitmap && !(s->flags & DIRTY_BITMAP_MIG_FLAG_START)) {
+ fprintf(stderr, "Error: unknown dirty bitmap "
+ "'%s' for block device '%s'\n",
+ s->bitmap_name, s->node_name);
+ return -EINVAL;
+ }
+ } else if (!s->bitmap) {
+ fprintf(stderr, "Error: block device name is not set\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int dirty_bitmap_load(QEMUFile *f, void *opaque, int version_id)
+{
+ static DirtyBitmapLoadState s;
+
+ int ret = 0;
+
+ trace_dirty_bitmap_load_enter();
+
+ do {
+ dirty_bitmap_load_header(f, &s);
+
+ if (s.flags & DIRTY_BITMAP_MIG_FLAG_START) {
+ ret = dirty_bitmap_load_start(f, &s);
+ } else if (s.flags & DIRTY_BITMAP_MIG_FLAG_COMPLETE) {
+ dirty_bitmap_load_complete(f, &s);
+ } else if (s.flags & DIRTY_BITMAP_MIG_FLAG_BITS) {
+ ret = dirty_bitmap_load_bits(f, &s);
+ }
+
+ if (!ret) {
+ ret = qemu_file_get_error(f);
+ }
+
+ if (ret) {
+ return ret;
+ }
+ } while (!(s.flags & DIRTY_BITMAP_MIG_FLAG_EOS));
+
+ trace_dirty_bitmap_load_success();
+ return 0;
+}
+
+static int dirty_bitmap_save_setup(QEMUFile *f, void *opaque)
+{
+ DirtyBitmapMigBitmapState *dbms = NULL;
+ init_dirty_bitmap_migration();
+
+ QSIMPLEQ_FOREACH(dbms, &dirty_bitmap_mig_state.dbms_list, entry) {
+ send_bitmap_start(f, dbms);
+ }
+ qemu_put_bitmap_flags(f, DIRTY_BITMAP_MIG_FLAG_EOS);
+
+ return 0;
+}
+
+static bool dirty_bitmap_is_active(void *opaque)
+{
+ return migrate_dirty_bitmaps();
+}
+
+static bool dirty_bitmap_is_active_iterate(void *opaque)
+{
+ return dirty_bitmap_is_active(opaque) && !runstate_is_running();
+}
+
+static bool dirty_bitmap_has_postcopy(void *opaque)
+{
+ return true;
+}
+
+static SaveVMHandlers savevm_dirty_bitmap_handlers = {
+ .save_live_setup = dirty_bitmap_save_setup,
+ .save_live_complete_postcopy = dirty_bitmap_save_complete,
+ .save_live_complete_precopy = dirty_bitmap_save_complete,
+ .has_postcopy = dirty_bitmap_has_postcopy,
+ .save_live_pending = dirty_bitmap_save_pending,
+ .save_live_iterate = dirty_bitmap_save_iterate,
+ .is_active_iterate = dirty_bitmap_is_active_iterate,
+ .load_state = dirty_bitmap_load,
+ .cleanup = dirty_bitmap_migration_cleanup,
+ .is_active = dirty_bitmap_is_active,
+};
+
+void dirty_bitmap_mig_init(void)
+{
+ QSIMPLEQ_INIT(&dirty_bitmap_mig_state.dbms_list);
+
+ register_savevm_live(NULL, "dirty-bitmap", 0, 1,
+ &savevm_dirty_bitmap_handlers,
+ &dirty_bitmap_mig_state);
+}
diff --git a/migration/migration.c b/migration/migration.c
index 4716efd..221be74 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -387,6 +387,9 @@ static void process_incoming_migration_co(void *opaque)
int ret;
mis = migration_incoming_state_new(f);
+
+ init_dirty_bitmap_incoming_migration();
+
postcopy_state_set(POSTCOPY_INCOMING_NONE);
migrate_set_state(&mis->state, MIGRATION_STATUS_NONE,
MIGRATION_STATUS_ACTIVE);
diff --git a/migration/savevm.c b/migration/savevm.c
index 94783e5..cc44e5e 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -1638,6 +1638,8 @@ static void loadvm_postcopy_handle_run_bh(void *opaque)
trace_loadvm_postcopy_handle_run_vmstart();
+ dirty_bitmap_mig_before_vm_start();
+
if (autostart) {
/* Hold onto your hats, starting the CPU */
vm_start();
diff --git a/migration/trace-events b/migration/trace-events
index 0212929..38fca41 100644
--- a/migration/trace-events
+++ b/migration/trace-events
@@ -222,3 +222,17 @@ colo_vm_state_change(const char *old, const char *new) "Change '%s' => '%s'"
colo_send_message(const char *msg) "Send '%s' message"
colo_receive_message(const char *msg) "Receive '%s' message"
colo_failover_set_state(const char *new_state) "new state %s"
+
+# migration/block-dirty-bitmap.c
+send_bitmap_header_enter(void) ""
+send_bitmap_bits(uint32_t flags, uint64_t start_sector, uint32_t nr_sectors, uint64_t data_size) "\n flags: %x\n start_sector: %" PRIu64 "\n nr_sectors: %" PRIu32 "\n data_size: %" PRIu64 "\n"
+dirty_bitmap_save_iterate(int in_postcopy) "in postcopy: %d"
+dirty_bitmap_save_complete_enter(void) ""
+dirty_bitmap_save_complete_finish(void) ""
+dirty_bitmap_save_pending(uint64_t pending, uint64_t max_size) "pending %" PRIu64 " max: %" PRIu64
+dirty_bitmap_load_complete(void) ""
+dirty_bitmap_load_bits_enter(uint64_t first_sector, uint32_t nr_sectors) "chunk: %" PRIu64 " %" PRIu32
+dirty_bitmap_load_bits_zeroes(void) ""
+dirty_bitmap_load_header(uint32_t flags) "flags %x"
+dirty_bitmap_load_enter(void) ""
+dirty_bitmap_load_success(void) ""
diff --git a/vl.c b/vl.c
index b4eaf03..f1ee9ff 100644
--- a/vl.c
+++ b/vl.c
@@ -4405,6 +4405,7 @@ int main(int argc, char **argv, char **envp)
blk_mig_init();
ram_mig_init();
+ dirty_bitmap_mig_init();
/* If the currently selected machine wishes to override the units-per-bus
* property of its default HBA interface type, do so now. */
--
1.8.3.1
^ permalink raw reply related [flat|nested] 40+ messages in thread
* [Qemu-devel] [PATCH 13/17] iotests: add add_incoming_migration to VM class
2017-02-07 15:05 [Qemu-devel] [PATCH v5 00/17] Dirty bitmaps postcopy migration Vladimir Sementsov-Ogievskiy
` (11 preceding siblings ...)
2017-02-07 15:05 ` [Qemu-devel] [PATCH 12/17] migration: add postcopy migration of dirty bitmaps Vladimir Sementsov-Ogievskiy
@ 2017-02-07 15:05 ` Vladimir Sementsov-Ogievskiy
2017-02-07 15:05 ` [Qemu-devel] [PATCH 14/17] qmp: add x-debug-block-dirty-bitmap-sha256 Vladimir Sementsov-Ogievskiy
` (4 subsequent siblings)
17 siblings, 0 replies; 40+ messages in thread
From: Vladimir Sementsov-Ogievskiy @ 2017-02-07 15:05 UTC (permalink / raw)
To: qemu-block, qemu-devel
Cc: pbonzini, armbru, eblake, famz, stefanha, amit.shah, quintela,
mreitz, kwolf, peter.maydell, dgilbert, den, jsnow, vsementsov,
lirans
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Reviewed-by: John Snow <jsnow@redhat.com>
---
tests/qemu-iotests/iotests.py | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py
index bec8eb4..f5ca4b8 100644
--- a/tests/qemu-iotests/iotests.py
+++ b/tests/qemu-iotests/iotests.py
@@ -177,6 +177,12 @@ class VM(qtest.QEMUQtestMachine):
self._num_drives += 1
return self
+ def add_incoming_migration(self, desc):
+ '''Add an incoming migration to the VM'''
+ self._args.append('-incoming')
+ self._args.append(desc)
+ return self
+
def pause_drive(self, drive, event=None):
'''Pause drive r/w operations'''
if not event:
--
1.8.3.1
^ permalink raw reply related [flat|nested] 40+ messages in thread
* [Qemu-devel] [PATCH 14/17] qmp: add x-debug-block-dirty-bitmap-sha256
2017-02-07 15:05 [Qemu-devel] [PATCH v5 00/17] Dirty bitmaps postcopy migration Vladimir Sementsov-Ogievskiy
` (12 preceding siblings ...)
2017-02-07 15:05 ` [Qemu-devel] [PATCH 13/17] iotests: add add_incoming_migration to VM class Vladimir Sementsov-Ogievskiy
@ 2017-02-07 15:05 ` Vladimir Sementsov-Ogievskiy
2017-02-07 15:05 ` [Qemu-devel] [PATCH 15/17] iotests: add default node-name Vladimir Sementsov-Ogievskiy
` (3 subsequent siblings)
17 siblings, 0 replies; 40+ messages in thread
From: Vladimir Sementsov-Ogievskiy @ 2017-02-07 15:05 UTC (permalink / raw)
To: qemu-block, qemu-devel
Cc: pbonzini, armbru, eblake, famz, stefanha, amit.shah, quintela,
mreitz, kwolf, peter.maydell, dgilbert, den, jsnow, vsementsov,
lirans
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
---
block/dirty-bitmap.c | 5 +++++
blockdev.c | 29 +++++++++++++++++++++++++++++
include/block/dirty-bitmap.h | 2 ++
include/qemu/hbitmap.h | 8 ++++++++
qapi/block-core.json | 27 +++++++++++++++++++++++++++
tests/Makefile.include | 2 +-
util/hbitmap.c | 11 +++++++++++
7 files changed, 83 insertions(+), 1 deletion(-)
diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c
index 32aa6eb..5bec99b 100644
--- a/block/dirty-bitmap.c
+++ b/block/dirty-bitmap.c
@@ -558,3 +558,8 @@ BdrvDirtyBitmap *bdrv_next_dirty_bitmap(BlockDriverState *bs,
return QLIST_NEXT(bitmap, list);
}
+
+char *bdrv_dirty_bitmap_sha256(const BdrvDirtyBitmap *bitmap, Error **errp)
+{
+ return hbitmap_sha256(bitmap->bitmap, errp);
+}
diff --git a/blockdev.c b/blockdev.c
index db82ac9..4d06885 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -2790,6 +2790,35 @@ void qmp_block_dirty_bitmap_clear(const char *node, const char *name,
aio_context_release(aio_context);
}
+BlockDirtyBitmapSha256 *qmp_x_debug_block_dirty_bitmap_sha256(const char *node,
+ const char *name,
+ Error **errp)
+{
+ AioContext *aio_context;
+ BdrvDirtyBitmap *bitmap;
+ BlockDriverState *bs;
+ BlockDirtyBitmapSha256 *ret = NULL;
+ char *sha256;
+
+ bitmap = block_dirty_bitmap_lookup(node, name, &bs, &aio_context, errp);
+ if (!bitmap || !bs) {
+ return NULL;
+ }
+
+ sha256 = bdrv_dirty_bitmap_sha256(bitmap, errp);
+ if (sha256 == NULL) {
+ goto out;
+ }
+
+ ret = g_new(BlockDirtyBitmapSha256, 1);
+ ret->sha256 = sha256;
+
+out:
+ aio_context_release(aio_context);
+
+ return ret;
+}
+
void hmp_drive_del(Monitor *mon, const QDict *qdict)
{
const char *id = qdict_get_str(qdict, "id");
diff --git a/include/block/dirty-bitmap.h b/include/block/dirty-bitmap.h
index 20b3ec7..ded872a 100644
--- a/include/block/dirty-bitmap.h
+++ b/include/block/dirty-bitmap.h
@@ -78,4 +78,6 @@ void bdrv_dirty_bitmap_deserialize_finish(BdrvDirtyBitmap *bitmap);
BdrvDirtyBitmap *bdrv_next_dirty_bitmap(BlockDriverState *bs,
BdrvDirtyBitmap *bitmap);
+char *bdrv_dirty_bitmap_sha256(const BdrvDirtyBitmap *bitmap, Error **errp);
+
#endif
diff --git a/include/qemu/hbitmap.h b/include/qemu/hbitmap.h
index 9239fe5..f353e56 100644
--- a/include/qemu/hbitmap.h
+++ b/include/qemu/hbitmap.h
@@ -238,6 +238,14 @@ void hbitmap_deserialize_zeroes(HBitmap *hb, uint64_t start, uint64_t count,
void hbitmap_deserialize_finish(HBitmap *hb);
/**
+ * hbitmap_sha256:
+ * @bitmap: HBitmap to operate on.
+ *
+ * Returns SHA256 hash of the last level.
+ */
+char *hbitmap_sha256(const HBitmap *bitmap, Error **errp);
+
+/**
* hbitmap_free:
* @hb: HBitmap to operate on.
*
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 932f5bb..8646054 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -1632,6 +1632,33 @@
'data': 'BlockDirtyBitmap' }
##
+# @BlockDirtyBitmapSha256:
+#
+# SHA256 hash of dirty bitmap data
+#
+# @sha256: ASCII representation of SHA256 bitmap hash
+#
+# Since: 2.9
+##
+ { 'struct': 'BlockDirtyBitmapSha256',
+ 'data': {'sha256': 'str'} }
+
+##
+# @x-debug-block-dirty-bitmap-sha256:
+#
+# Get bitmap SHA256
+#
+# Returns: BlockDirtyBitmapSha256 on success
+# If @node is not a valid block device, DeviceNotFound
+# If @name is not found or if hashing has failed, GenericError with an
+# explanation
+#
+# Since: 2.9
+##
+ { 'command': 'x-debug-block-dirty-bitmap-sha256',
+ 'data': 'BlockDirtyBitmap', 'returns': 'BlockDirtyBitmapSha256' }
+
+##
# @blockdev-mirror:
#
# Start mirroring a block device's writes to a new destination.
diff --git a/tests/Makefile.include b/tests/Makefile.include
index 634394a..7a71b4d 100644
--- a/tests/Makefile.include
+++ b/tests/Makefile.include
@@ -526,7 +526,7 @@ tests/test-blockjob$(EXESUF): tests/test-blockjob.o $(test-block-obj-y) $(test-u
tests/test-blockjob-txn$(EXESUF): tests/test-blockjob-txn.o $(test-block-obj-y) $(test-util-obj-y)
tests/test-thread-pool$(EXESUF): tests/test-thread-pool.o $(test-block-obj-y)
tests/test-iov$(EXESUF): tests/test-iov.o $(test-util-obj-y)
-tests/test-hbitmap$(EXESUF): tests/test-hbitmap.o $(test-util-obj-y)
+tests/test-hbitmap$(EXESUF): tests/test-hbitmap.o $(test-util-obj-y) $(test-crypto-obj-y)
tests/test-x86-cpuid$(EXESUF): tests/test-x86-cpuid.o
tests/test-xbzrle$(EXESUF): tests/test-xbzrle.o migration/xbzrle.o page_cache.o $(test-util-obj-y)
tests/test-cutils$(EXESUF): tests/test-cutils.o util/cutils.o
diff --git a/util/hbitmap.c b/util/hbitmap.c
index 35088e1..412b0fa 100644
--- a/util/hbitmap.c
+++ b/util/hbitmap.c
@@ -13,6 +13,7 @@
#include "qemu/hbitmap.h"
#include "qemu/host-utils.h"
#include "trace.h"
+#include "crypto/hash.h"
/* HBitmaps provides an array of bits. The bits are stored as usual in an
* array of unsigned longs, but HBitmap is also optimized to provide fast
@@ -689,3 +690,13 @@ void hbitmap_free_meta(HBitmap *hb)
hbitmap_free(hb->meta);
hb->meta = NULL;
}
+
+char *hbitmap_sha256(const HBitmap *bitmap, Error **errp)
+{
+ size_t size = bitmap->sizes[HBITMAP_LEVELS - 1] * sizeof(unsigned long);
+ char *data = (char *)bitmap->levels[HBITMAP_LEVELS - 1];
+ char *hash = NULL;
+ qcrypto_hash_digest(QCRYPTO_HASH_ALG_SHA256, data, size, &hash, errp);
+
+ return hash;
+}
--
1.8.3.1
^ permalink raw reply related [flat|nested] 40+ messages in thread
* [Qemu-devel] [PATCH 15/17] iotests: add default node-name
2017-02-07 15:05 [Qemu-devel] [PATCH v5 00/17] Dirty bitmaps postcopy migration Vladimir Sementsov-Ogievskiy
` (13 preceding siblings ...)
2017-02-07 15:05 ` [Qemu-devel] [PATCH 14/17] qmp: add x-debug-block-dirty-bitmap-sha256 Vladimir Sementsov-Ogievskiy
@ 2017-02-07 15:05 ` Vladimir Sementsov-Ogievskiy
2017-02-12 2:14 ` Max Reitz
2017-02-07 15:05 ` [Qemu-devel] [PATCH 16/17] iotests: add dirty bitmap migration test Vladimir Sementsov-Ogievskiy
` (2 subsequent siblings)
17 siblings, 1 reply; 40+ messages in thread
From: Vladimir Sementsov-Ogievskiy @ 2017-02-07 15:05 UTC (permalink / raw)
To: qemu-block, qemu-devel
Cc: pbonzini, armbru, eblake, famz, stefanha, amit.shah, quintela,
mreitz, kwolf, peter.maydell, dgilbert, den, jsnow, vsementsov,
lirans
When testing migration, auto-generated by qemu node-names differs in
source and destination qemu and migration fails. After this patch,
auto-generated by iotest nodenames will be the same.
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
---
tests/qemu-iotests/iotests.py | 2 ++
1 file changed, 2 insertions(+)
diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py
index f5ca4b8..e110c90 100644
--- a/tests/qemu-iotests/iotests.py
+++ b/tests/qemu-iotests/iotests.py
@@ -168,6 +168,8 @@ class VM(qtest.QEMUQtestMachine):
options.append('file=%s' % path)
options.append('format=%s' % format)
options.append('cache=%s' % cachemode)
+ if 'node-name' not in opts:
+ options.append('node-name=drivenode%d' % self._num_drives)
if opts:
options.append(opts)
--
1.8.3.1
^ permalink raw reply related [flat|nested] 40+ messages in thread
* Re: [Qemu-devel] [PATCH 15/17] iotests: add default node-name
2017-02-07 15:05 ` [Qemu-devel] [PATCH 15/17] iotests: add default node-name Vladimir Sementsov-Ogievskiy
@ 2017-02-12 2:14 ` Max Reitz
0 siblings, 0 replies; 40+ messages in thread
From: Max Reitz @ 2017-02-12 2:14 UTC (permalink / raw)
To: Vladimir Sementsov-Ogievskiy, qemu-block, qemu-devel
Cc: pbonzini, armbru, eblake, famz, stefanha, amit.shah, quintela,
kwolf, peter.maydell, dgilbert, den, jsnow, lirans
[-- Attachment #1: Type: text/plain, Size: 466 bytes --]
On 07.02.2017 16:05, Vladimir Sementsov-Ogievskiy wrote:
> When testing migration, auto-generated by qemu node-names differs in
> source and destination qemu and migration fails. After this patch,
> auto-generated by iotest nodenames will be the same.
>
> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
> ---
> tests/qemu-iotests/iotests.py | 2 ++
> 1 file changed, 2 insertions(+)
Reviewed-by: Max Reitz <mreitz@redhat.com>
[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 512 bytes --]
^ permalink raw reply [flat|nested] 40+ messages in thread
* [Qemu-devel] [PATCH 16/17] iotests: add dirty bitmap migration test
2017-02-07 15:05 [Qemu-devel] [PATCH v5 00/17] Dirty bitmaps postcopy migration Vladimir Sementsov-Ogievskiy
` (14 preceding siblings ...)
2017-02-07 15:05 ` [Qemu-devel] [PATCH 15/17] iotests: add default node-name Vladimir Sementsov-Ogievskiy
@ 2017-02-07 15:05 ` Vladimir Sementsov-Ogievskiy
2017-02-07 15:05 ` [Qemu-devel] [PATCH 17/17] iotests: add dirty bitmap postcopy test Vladimir Sementsov-Ogievskiy
2017-02-13 5:26 ` [Qemu-devel] [PATCH v5 00/17] Dirty bitmaps postcopy migration Fam Zheng
17 siblings, 0 replies; 40+ messages in thread
From: Vladimir Sementsov-Ogievskiy @ 2017-02-07 15:05 UTC (permalink / raw)
To: qemu-block, qemu-devel
Cc: pbonzini, armbru, eblake, famz, stefanha, amit.shah, quintela,
mreitz, kwolf, peter.maydell, dgilbert, den, jsnow, vsementsov,
lirans
The test starts two vms (vm_a, vm_b), create dirty bitmap in
the first one, do several writes to corresponding device and
then migrate vm_a to vm_b with dirty bitmaps.
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
---
tests/qemu-iotests/169 | 86 ++++++++++++++++++++++++++++++++++++++++++++++
tests/qemu-iotests/169.out | 5 +++
tests/qemu-iotests/group | 1 +
3 files changed, 92 insertions(+)
create mode 100755 tests/qemu-iotests/169
create mode 100644 tests/qemu-iotests/169.out
diff --git a/tests/qemu-iotests/169 b/tests/qemu-iotests/169
new file mode 100755
index 0000000..ec6cd46
--- /dev/null
+++ b/tests/qemu-iotests/169
@@ -0,0 +1,86 @@
+#!/usr/bin/env python
+#
+# Tests for dirty bitmaps migration.
+#
+# Copyright (C) Vladimir Sementsov-Ogievskiy 2015-2016
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+
+import os
+import iotests
+import time
+from iotests import qemu_img
+
+disk_a = os.path.join(iotests.test_dir, 'disk_a')
+disk_b = os.path.join(iotests.test_dir, 'disk_b')
+fifo = os.path.join(iotests.test_dir, 'mig_fifo')
+
+class TestDirtyBitmapMigration(iotests.QMPTestCase):
+
+ def setUp(self):
+ size = 0x400000000 # 1G
+ os.mkfifo(fifo)
+ qemu_img('create', '-f', iotests.imgfmt, disk_a, str(size))
+ qemu_img('create', '-f', iotests.imgfmt, disk_b, str(size))
+ self.vm_a = iotests.VM(path_suffix='a').add_drive(disk_a)
+ self.vm_b = iotests.VM(path_suffix='b').add_drive(disk_b)
+ self.vm_b.add_incoming_migration("exec: cat '" + fifo + "'")
+ self.vm_a.launch()
+ self.vm_b.launch()
+
+ def tearDown(self):
+ self.vm_a.shutdown()
+ self.vm_b.shutdown()
+ os.remove(disk_a)
+ os.remove(disk_b)
+ os.remove(fifo)
+
+ def test_migration(self):
+ granularity = 512
+ regions = [
+ { 'start': 0, 'count': 0x100000 },
+ { 'start': 0x100000000, 'count': 0x200000 },
+ { 'start': 0x399900000, 'count': 0x100000 }
+ ]
+
+ result = self.vm_a.qmp('block-dirty-bitmap-add', node='drive0',
+ name='bitmap', granularity=granularity)
+ self.assert_qmp(result, 'return', {});
+
+ for r in regions:
+ self.vm_a.hmp_qemu_io('drive0',
+ 'write %d %d' % (r['start'], r['count']))
+
+ result = self.vm_a.qmp('x-debug-block-dirty-bitmap-sha256',
+ node='drive0', name='bitmap')
+ sha256 = result['return']['sha256']
+
+ result = self.vm_a.qmp('migrate-set-capabilities',
+ capabilities=[{'capability': 'dirty-bitmaps',
+ 'state': True}])
+ self.assert_qmp(result, 'return', {})
+
+ result = self.vm_a.qmp('migrate', uri='exec:cat>' + fifo)
+ self.assertNotEqual(self.vm_a.event_wait("STOP"), None)
+ self.assertNotEqual(self.vm_b.event_wait("RESUME"), None)
+ time.sleep(2)
+
+ result = self.vm_b.qmp('x-debug-block-dirty-bitmap-sha256',
+ node='drive0', name='bitmap')
+ self.assert_qmp(result, 'return/sha256', sha256);
+
+
+if __name__ == '__main__':
+ iotests.main()
diff --git a/tests/qemu-iotests/169.out b/tests/qemu-iotests/169.out
new file mode 100644
index 0000000..ae1213e
--- /dev/null
+++ b/tests/qemu-iotests/169.out
@@ -0,0 +1,5 @@
+.
+----------------------------------------------------------------------
+Ran 1 tests
+
+OK
diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group
index 866c1a0..5dc75ef 100644
--- a/tests/qemu-iotests/group
+++ b/tests/qemu-iotests/group
@@ -162,6 +162,7 @@
159 rw auto quick
160 rw auto quick
162 auto quick
+169 rw auto quick
170 rw auto quick
171 rw auto quick
172 auto
--
1.8.3.1
^ permalink raw reply related [flat|nested] 40+ messages in thread
* [Qemu-devel] [PATCH 17/17] iotests: add dirty bitmap postcopy test
2017-02-07 15:05 [Qemu-devel] [PATCH v5 00/17] Dirty bitmaps postcopy migration Vladimir Sementsov-Ogievskiy
` (15 preceding siblings ...)
2017-02-07 15:05 ` [Qemu-devel] [PATCH 16/17] iotests: add dirty bitmap migration test Vladimir Sementsov-Ogievskiy
@ 2017-02-07 15:05 ` Vladimir Sementsov-Ogievskiy
2017-02-13 5:26 ` [Qemu-devel] [PATCH v5 00/17] Dirty bitmaps postcopy migration Fam Zheng
17 siblings, 0 replies; 40+ messages in thread
From: Vladimir Sementsov-Ogievskiy @ 2017-02-07 15:05 UTC (permalink / raw)
To: qemu-block, qemu-devel
Cc: pbonzini, armbru, eblake, famz, stefanha, amit.shah, quintela,
mreitz, kwolf, peter.maydell, dgilbert, den, jsnow, vsementsov,
lirans
Test
- start two vms (vm_a, vm_b)
- in a
- do writes from set A
- do writes from set B
- fix bitmap sha256
- clear bitmap
- do writes from set A
- start migration
- than, in b
- wait vm start (postcopy should start)
- do writes from set B
- check bitmap sha256
The test should verify postcopy migration and then merging with delta
(changes in target, during postcopy process).
Reduce supported cache modes to only 'none', because with cache on time
from source.STOP to target.RESUME is unpredictable and we can fail with
timout while waiting for target.RESUME.
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
---
tests/qemu-iotests/169 | 74 +++++++++++++++++++++++++++++++++++++------
tests/qemu-iotests/169.out | 4 +--
tests/qemu-iotests/iotests.py | 7 +++-
3 files changed, 72 insertions(+), 13 deletions(-)
diff --git a/tests/qemu-iotests/169 b/tests/qemu-iotests/169
index ec6cd46..333df58 100755
--- a/tests/qemu-iotests/169
+++ b/tests/qemu-iotests/169
@@ -29,8 +29,14 @@ fifo = os.path.join(iotests.test_dir, 'mig_fifo')
class TestDirtyBitmapMigration(iotests.QMPTestCase):
- def setUp(self):
- size = 0x400000000 # 1G
+ def tearDown(self):
+ self.vm_a.shutdown()
+ self.vm_b.shutdown()
+ os.remove(disk_a)
+ os.remove(disk_b)
+ os.remove(fifo)
+
+ def init(self, size):
os.mkfifo(fifo)
qemu_img('create', '-f', iotests.imgfmt, disk_a, str(size))
qemu_img('create', '-f', iotests.imgfmt, disk_b, str(size))
@@ -40,14 +46,8 @@ class TestDirtyBitmapMigration(iotests.QMPTestCase):
self.vm_a.launch()
self.vm_b.launch()
- def tearDown(self):
- self.vm_a.shutdown()
- self.vm_b.shutdown()
- os.remove(disk_a)
- os.remove(disk_b)
- os.remove(fifo)
-
def test_migration(self):
+ self.init(0x400000000) # 1G
granularity = 512
regions = [
{ 'start': 0, 'count': 0x100000 },
@@ -81,6 +81,60 @@ class TestDirtyBitmapMigration(iotests.QMPTestCase):
node='drive0', name='bitmap')
self.assert_qmp(result, 'return/sha256', sha256);
+ def test_postcopy(self):
+ self.init(0x4000000000) # 256G
+ write_size = 0x40000000
+ granularity = 512
+ chunk = 4096
+
+ result = self.vm_a.qmp('block-dirty-bitmap-add', node='drive0',
+ name='bitmap', granularity=granularity)
+ self.assert_qmp(result, 'return', {});
+
+ s = 0
+ while s < write_size:
+ self.vm_a.hmp_qemu_io('drive0', 'write %d %d' % (s, chunk))
+ s += 0x10000
+ s = 0x8000
+ while s < write_size:
+ self.vm_a.hmp_qemu_io('drive0', 'write %d %d' % (s, chunk))
+ s += 0x10000
+
+ result = self.vm_a.qmp('x-debug-block-dirty-bitmap-sha256',
+ node='drive0', name='bitmap')
+ sha256 = result['return']['sha256']
+
+ result = self.vm_a.qmp('block-dirty-bitmap-clear', node='drive0',
+ name='bitmap')
+ self.assert_qmp(result, 'return', {});
+ s = 0
+ while s < write_size:
+ self.vm_a.hmp_qemu_io('drive0', 'write %d %d' % (s, chunk))
+ s += 0x10000
+
+ result = self.vm_a.qmp('migrate-set-capabilities',
+ capabilities=[{'capability': 'dirty-bitmaps',
+ 'state': True}])
+ self.assert_qmp(result, 'return', {})
+
+ result = self.vm_a.qmp('migrate', uri='exec:cat>' + fifo)
+ self.assertNotEqual(self.vm_a.event_wait("STOP"), None)
+ self.assertNotEqual(self.vm_b.event_wait("RESUME"), None)
+
+ s = 0x8000
+ while s < write_size:
+ self.vm_b.hmp_qemu_io('drive0', 'write %d %d' % (s, chunk))
+ s += 0x10000
+
+ result = self.vm_b.qmp('query-block');
+ while len(result['return'][0]['dirty-bitmaps']) > 1:
+ time.sleep(2)
+ result = self.vm_b.qmp('query-block');
+
+ result = self.vm_b.qmp('x-debug-block-dirty-bitmap-sha256',
+ node='drive0', name='bitmap')
+
+ self.assert_qmp(result, 'return/sha256', sha256);
if __name__ == '__main__':
- iotests.main()
+ iotests.main(supported_fmts=['qcow2'], supported_cache_modes=['none'])
diff --git a/tests/qemu-iotests/169.out b/tests/qemu-iotests/169.out
index ae1213e..fbc63e6 100644
--- a/tests/qemu-iotests/169.out
+++ b/tests/qemu-iotests/169.out
@@ -1,5 +1,5 @@
-.
+..
----------------------------------------------------------------------
-Ran 1 tests
+Ran 2 tests
OK
diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py
index e110c90..7b64411 100644
--- a/tests/qemu-iotests/iotests.py
+++ b/tests/qemu-iotests/iotests.py
@@ -356,6 +356,10 @@ def verify_platform(supported_oses=['linux']):
if True not in [sys.platform.startswith(x) for x in supported_oses]:
notrun('not suitable for this OS: %s' % sys.platform)
+def verify_cache_mode(supported_cache_modes=[]):
+ if supported_cache_modes and (cachemode not in supported_cache_modes):
+ notrun('not suitable for this cache mode: %s' % cachemode)
+
def supports_quorum():
return 'quorum' in qemu_img_pipe('--help')
@@ -364,7 +368,7 @@ def verify_quorum():
if not supports_quorum():
notrun('quorum support missing')
-def main(supported_fmts=[], supported_oses=['linux']):
+def main(supported_fmts=[], supported_oses=['linux'], supported_cache_modes=[]):
'''Run tests'''
global debug
@@ -381,6 +385,7 @@ def main(supported_fmts=[], supported_oses=['linux']):
verbosity = 1
verify_image_format(supported_fmts)
verify_platform(supported_oses)
+ verify_cache_mode(supported_cache_modes)
# We need to filter out the time taken from the output so that qemu-iotest
# can reliably diff the results against master output.
--
1.8.3.1
^ permalink raw reply related [flat|nested] 40+ messages in thread
* Re: [Qemu-devel] [PATCH v5 00/17] Dirty bitmaps postcopy migration
2017-02-07 15:05 [Qemu-devel] [PATCH v5 00/17] Dirty bitmaps postcopy migration Vladimir Sementsov-Ogievskiy
` (16 preceding siblings ...)
2017-02-07 15:05 ` [Qemu-devel] [PATCH 17/17] iotests: add dirty bitmap postcopy test Vladimir Sementsov-Ogievskiy
@ 2017-02-13 5:26 ` Fam Zheng
17 siblings, 0 replies; 40+ messages in thread
From: Fam Zheng @ 2017-02-13 5:26 UTC (permalink / raw)
To: Vladimir Sementsov-Ogievskiy
Cc: qemu-block, qemu-devel, kwolf, peter.maydell, lirans, quintela,
jsnow, armbru, mreitz, stefanha, den, amit.shah, pbonzini,
dgilbert
On Tue, 02/07 18:05, Vladimir Sementsov-Ogievskiy wrote:
> Hi all!
>
> There is a new version of dirty bitmap postcopy migration series.
Doesn't apply to master now, could you rebase?
Fam
^ permalink raw reply [flat|nested] 40+ messages in thread
* [Qemu-devel] [PATCH 12/17] migration: add postcopy migration of dirty bitmaps
2017-02-13 9:54 [Qemu-devel] [PATCH v6 " Vladimir Sementsov-Ogievskiy
@ 2017-02-13 9:54 ` Vladimir Sementsov-Ogievskiy
2017-02-16 13:04 ` Fam Zheng
2017-02-24 13:26 ` Dr. David Alan Gilbert
0 siblings, 2 replies; 40+ messages in thread
From: Vladimir Sementsov-Ogievskiy @ 2017-02-13 9:54 UTC (permalink / raw)
To: qemu-block, qemu-devel
Cc: pbonzini, armbru, eblake, famz, stefanha, amit.shah, quintela,
mreitz, kwolf, peter.maydell, dgilbert, den, jsnow, vsementsov,
lirans
Postcopy migration of dirty bitmaps. Only named dirty bitmaps,
associated with root nodes and non-root named nodes are migrated.
If destination qemu is already containing a dirty bitmap with the same name
as a migrated bitmap (for the same node), than, if their granularities are
the same the migration will be done, otherwise the error will be generated.
If destination qemu doesn't contain such bitmap it will be created.
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
---
include/migration/block.h | 1 +
include/migration/migration.h | 4 +
migration/Makefile.objs | 2 +-
migration/block-dirty-bitmap.c | 679 +++++++++++++++++++++++++++++++++++++++++
migration/migration.c | 3 +
migration/savevm.c | 2 +
migration/trace-events | 14 +
vl.c | 1 +
8 files changed, 705 insertions(+), 1 deletion(-)
create mode 100644 migration/block-dirty-bitmap.c
diff --git a/include/migration/block.h b/include/migration/block.h
index 41a1ac8..8333c43 100644
--- a/include/migration/block.h
+++ b/include/migration/block.h
@@ -14,6 +14,7 @@
#ifndef MIGRATION_BLOCK_H
#define MIGRATION_BLOCK_H
+void dirty_bitmap_mig_init(void);
void blk_mig_init(void);
int blk_mig_active(void);
uint64_t blk_mig_bytes_transferred(void);
diff --git a/include/migration/migration.h b/include/migration/migration.h
index 46645f4..03a4993 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -371,4 +371,8 @@ int ram_save_queue_pages(MigrationState *ms, const char *rbname,
PostcopyState postcopy_state_get(void);
/* Set the state and return the old state */
PostcopyState postcopy_state_set(PostcopyState new_state);
+
+void dirty_bitmap_mig_before_vm_start(void);
+void init_dirty_bitmap_incoming_migration(void);
+
#endif
diff --git a/migration/Makefile.objs b/migration/Makefile.objs
index 480dd49..fa3bf6a 100644
--- a/migration/Makefile.objs
+++ b/migration/Makefile.objs
@@ -9,5 +9,5 @@ common-obj-y += qjson.o
common-obj-$(CONFIG_RDMA) += rdma.o
-common-obj-y += block.o
+common-obj-y += block.o block-dirty-bitmap.o
diff --git a/migration/block-dirty-bitmap.c b/migration/block-dirty-bitmap.c
new file mode 100644
index 0000000..28e3732
--- /dev/null
+++ b/migration/block-dirty-bitmap.c
@@ -0,0 +1,679 @@
+/*
+ * Block dirty bitmap postcopy migration
+ *
+ * Copyright IBM, Corp. 2009
+ * Copyright (C) 2016 Parallels IP Holdings GmbH. All rights reserved.
+ *
+ * Authors:
+ * Liran Schour <lirans@il.ibm.com>
+ * Vladimir Sementsov-Ogievskiy <vsementsov@parallels.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ * This file is derived from migration/block.c, so it's author and IBM copyright
+ * are here, although content is quite different.
+ *
+ * Contributions after 2012-01-13 are licensed under the terms of the
+ * GNU GPL, version 2 or (at your option) any later version.
+ *
+ * ***
+ *
+ * Here postcopy migration of dirty bitmaps is realized. Only named dirty
+ * bitmaps, associated with root nodes and non-root named nodes are migrated.
+ *
+ * If destination qemu is already containing a dirty bitmap with the same name
+ * as a migrated bitmap (for the same node), then, if their granularities are
+ * the same the migration will be done, otherwise the error will be generated.
+ *
+ * If destination qemu doesn't contain such bitmap it will be created.
+ *
+ * format of migration:
+ *
+ * # Header (shared for different chunk types)
+ * 1, 2 or 4 bytes: flags (see qemu_{put,put}_flags)
+ * [ 1 byte: node name size ] \ flags & DEVICE_NAME
+ * [ n bytes: node name ] /
+ * [ 1 byte: bitmap name size ] \ flags & BITMAP_NAME
+ * [ n bytes: bitmap name ] /
+ *
+ * # Start of bitmap migration (flags & START)
+ * header
+ * be64: granularity
+ * 1 byte: bitmap enabled flag
+ *
+ * # Complete of bitmap migration (flags & COMPLETE)
+ * header
+ *
+ * # Data chunk of bitmap migration
+ * header
+ * be64: start sector
+ * be32: number of sectors
+ * [ be64: buffer size ] \ ! (flags & ZEROES)
+ * [ n bytes: buffer ] /
+ *
+ * The last chunk in stream should contain flags & EOS. The chunk may skip
+ * device and/or bitmap names, assuming them to be the same with the previous
+ * chunk.
+ */
+
+#include "qemu/osdep.h"
+#include "block/block.h"
+#include "block/block_int.h"
+#include "sysemu/block-backend.h"
+#include "qemu/main-loop.h"
+#include "qemu/error-report.h"
+#include "migration/block.h"
+#include "migration/migration.h"
+#include "qemu/hbitmap.h"
+#include "sysemu/sysemu.h"
+#include "qemu/cutils.h"
+#include "qapi/error.h"
+#include "trace.h"
+#include <assert.h>
+
+#define CHUNK_SIZE (1 << 10)
+
+/* Flags occupy from one to four bytes. In all but one the 7-th (EXTRA_FLAGS)
+ * bit should be set. */
+#define DIRTY_BITMAP_MIG_FLAG_EOS 0x01
+#define DIRTY_BITMAP_MIG_FLAG_ZEROES 0x02
+#define DIRTY_BITMAP_MIG_FLAG_BITMAP_NAME 0x04
+#define DIRTY_BITMAP_MIG_FLAG_DEVICE_NAME 0x08
+#define DIRTY_BITMAP_MIG_FLAG_START 0x10
+#define DIRTY_BITMAP_MIG_FLAG_COMPLETE 0x20
+#define DIRTY_BITMAP_MIG_FLAG_BITS 0x40
+
+#define DIRTY_BITMAP_MIG_EXTRA_FLAGS 0x80
+#define DIRTY_BITMAP_MIG_FLAGS_SIZE_16 0x8000
+#define DIRTY_BITMAP_MIG_FLAGS_SIZE_32 0x8080
+
+#define DEBUG_DIRTY_BITMAP_MIGRATION 0
+
+typedef struct DirtyBitmapMigBitmapState {
+ /* Written during setup phase. */
+ BlockDriverState *bs;
+ const char *node_name;
+ BdrvDirtyBitmap *bitmap;
+ uint64_t total_sectors;
+ uint64_t sectors_per_chunk;
+ QSIMPLEQ_ENTRY(DirtyBitmapMigBitmapState) entry;
+
+ /* For bulk phase. */
+ bool bulk_completed;
+ uint64_t cur_sector;
+} DirtyBitmapMigBitmapState;
+
+typedef struct DirtyBitmapMigState {
+ QSIMPLEQ_HEAD(dbms_list, DirtyBitmapMigBitmapState) dbms_list;
+
+ bool bulk_completed;
+
+ /* for send_bitmap_bits() */
+ BlockDriverState *prev_bs;
+ BdrvDirtyBitmap *prev_bitmap;
+} DirtyBitmapMigState;
+
+typedef struct DirtyBitmapLoadState {
+ uint32_t flags;
+ char node_name[256];
+ char bitmap_name[256];
+ BlockDriverState *bs;
+ BdrvDirtyBitmap *bitmap;
+} DirtyBitmapLoadState;
+
+static DirtyBitmapMigState dirty_bitmap_mig_state;
+
+typedef struct DirtyBitmapLoadBitmapState {
+ BlockDriverState *bs;
+ BdrvDirtyBitmap *bitmap;
+ bool migrated;
+} DirtyBitmapLoadBitmapState;
+static GSList *enabled_bitmaps;
+QemuMutex finish_lock;
+
+void init_dirty_bitmap_incoming_migration(void)
+{
+ qemu_mutex_init(&finish_lock);
+}
+
+static uint32_t qemu_get_bitmap_flags(QEMUFile *f)
+{
+ uint8_t flags = qemu_get_byte(f);
+ if (flags & DIRTY_BITMAP_MIG_EXTRA_FLAGS) {
+ flags = flags << 8 | qemu_get_byte(f);
+ if (flags & DIRTY_BITMAP_MIG_EXTRA_FLAGS) {
+ flags = flags << 16 | qemu_get_be16(f);
+ }
+ }
+
+ return flags;
+}
+
+static void qemu_put_bitmap_flags(QEMUFile *f, uint32_t flags)
+{
+ if (!(flags & 0xffffff00)) {
+ qemu_put_byte(f, flags);
+ return;
+ }
+
+ if (!(flags & 0xffff0000)) {
+ qemu_put_be16(f, flags | DIRTY_BITMAP_MIG_FLAGS_SIZE_16);
+ return;
+ }
+
+ qemu_put_be32(f, flags | DIRTY_BITMAP_MIG_FLAGS_SIZE_32);
+}
+
+static void send_bitmap_header(QEMUFile *f, DirtyBitmapMigBitmapState *dbms,
+ uint32_t additional_flags)
+{
+ BlockDriverState *bs = dbms->bs;
+ BdrvDirtyBitmap *bitmap = dbms->bitmap;
+ uint32_t flags = additional_flags;
+ trace_send_bitmap_header_enter();
+
+ if (bs != dirty_bitmap_mig_state.prev_bs) {
+ dirty_bitmap_mig_state.prev_bs = bs;
+ flags |= DIRTY_BITMAP_MIG_FLAG_DEVICE_NAME;
+ }
+
+ if (bitmap != dirty_bitmap_mig_state.prev_bitmap) {
+ dirty_bitmap_mig_state.prev_bitmap = bitmap;
+ flags |= DIRTY_BITMAP_MIG_FLAG_BITMAP_NAME;
+ }
+
+ qemu_put_bitmap_flags(f, flags);
+
+ if (flags & DIRTY_BITMAP_MIG_FLAG_DEVICE_NAME) {
+ qemu_put_counted_string(f, dbms->node_name);
+ }
+
+ if (flags & DIRTY_BITMAP_MIG_FLAG_BITMAP_NAME) {
+ qemu_put_counted_string(f, bdrv_dirty_bitmap_name(bitmap));
+ }
+}
+
+static void send_bitmap_start(QEMUFile *f, DirtyBitmapMigBitmapState *dbms)
+{
+ send_bitmap_header(f, dbms, DIRTY_BITMAP_MIG_FLAG_START);
+ qemu_put_be32(f, bdrv_dirty_bitmap_granularity(dbms->bitmap));
+ qemu_put_byte(f, bdrv_dirty_bitmap_enabled(dbms->bitmap));
+}
+
+static void send_bitmap_complete(QEMUFile *f, DirtyBitmapMigBitmapState *dbms)
+{
+ send_bitmap_header(f, dbms, DIRTY_BITMAP_MIG_FLAG_COMPLETE);
+}
+
+static void send_bitmap_bits(QEMUFile *f, DirtyBitmapMigBitmapState *dbms,
+ uint64_t start_sector, uint32_t nr_sectors)
+{
+ /* align for buffer_is_zero() */
+ uint64_t align = 4 * sizeof(long);
+ uint64_t unaligned_size =
+ bdrv_dirty_bitmap_serialization_size(dbms->bitmap,
+ start_sector, nr_sectors);
+ uint64_t buf_size = (unaligned_size + align - 1) & ~(align - 1);
+ uint8_t *buf = g_malloc0(buf_size);
+ uint32_t flags = DIRTY_BITMAP_MIG_FLAG_BITS;
+
+ bdrv_dirty_bitmap_serialize_part(dbms->bitmap, buf,
+ start_sector, nr_sectors);
+
+ if (buffer_is_zero(buf, buf_size)) {
+ g_free(buf);
+ buf = NULL;
+ flags |= DIRTY_BITMAP_MIG_FLAG_ZEROES;
+ }
+
+ trace_send_bitmap_bits(flags, start_sector, nr_sectors, buf_size);
+
+ send_bitmap_header(f, dbms, flags);
+
+ qemu_put_be64(f, start_sector);
+ qemu_put_be32(f, nr_sectors);
+
+ /* if a block is zero we need to flush here since the network
+ * bandwidth is now a lot higher than the storage device bandwidth.
+ * thus if we queue zero blocks we slow down the migration. */
+ if (flags & DIRTY_BITMAP_MIG_FLAG_ZEROES) {
+ qemu_fflush(f);
+ } else {
+ qemu_put_be64(f, buf_size);
+ qemu_put_buffer(f, buf, buf_size);
+ }
+
+ g_free(buf);
+}
+
+
+/* Called with iothread lock taken. */
+
+static void init_dirty_bitmap_migration(void)
+{
+ BlockDriverState *bs;
+ BdrvDirtyBitmap *bitmap;
+ DirtyBitmapMigBitmapState *dbms;
+ BdrvNextIterator it;
+ uint64_t total_bytes = 0;
+
+ dirty_bitmap_mig_state.bulk_completed = false;
+ dirty_bitmap_mig_state.prev_bs = NULL;
+ dirty_bitmap_mig_state.prev_bitmap = NULL;
+
+ for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
+ for (bitmap = bdrv_next_dirty_bitmap(bs, NULL); bitmap;
+ bitmap = bdrv_next_dirty_bitmap(bs, bitmap)) {
+ if (!bdrv_dirty_bitmap_name(bitmap)) {
+ continue;
+ }
+
+ if (!bdrv_get_device_or_node_name(bs)) {
+ /* not named non-root node */
+ continue;
+ }
+
+ dbms = g_new0(DirtyBitmapMigBitmapState, 1);
+ dbms->bs = bs;
+ dbms->node_name = bdrv_get_node_name(bs);
+ if (!dbms->node_name || dbms->node_name[0] == '\0') {
+ dbms->node_name = bdrv_get_device_name(bs);
+ }
+ dbms->bitmap = bitmap;
+ dbms->total_sectors = bdrv_nb_sectors(bs);
+ dbms->sectors_per_chunk = CHUNK_SIZE * 8 *
+ bdrv_dirty_bitmap_granularity(bitmap) >> BDRV_SECTOR_BITS;
+
+ total_bytes +=
+ bdrv_dirty_bitmap_serialization_size(bitmap,
+ 0, dbms->total_sectors);
+
+ QSIMPLEQ_INSERT_TAIL(&dirty_bitmap_mig_state.dbms_list,
+ dbms, entry);
+ }
+ }
+}
+
+/* Called with no lock taken. */
+static void bulk_phase_send_chunk(QEMUFile *f, DirtyBitmapMigBitmapState *dbms)
+{
+ uint32_t nr_sectors = MIN(dbms->total_sectors - dbms->cur_sector,
+ dbms->sectors_per_chunk);
+
+ send_bitmap_bits(f, dbms, dbms->cur_sector, nr_sectors);
+
+ dbms->cur_sector += nr_sectors;
+ if (dbms->cur_sector >= dbms->total_sectors) {
+ dbms->bulk_completed = true;
+ }
+}
+
+/* Called with no lock taken. */
+static void bulk_phase(QEMUFile *f, bool limit)
+{
+ DirtyBitmapMigBitmapState *dbms;
+
+ QSIMPLEQ_FOREACH(dbms, &dirty_bitmap_mig_state.dbms_list, entry) {
+ while (!dbms->bulk_completed) {
+ bulk_phase_send_chunk(f, dbms);
+ if (limit && qemu_file_rate_limit(f)) {
+ return;
+ }
+ }
+ }
+
+ dirty_bitmap_mig_state.bulk_completed = true;
+}
+
+/* Called with iothread lock taken. */
+static void dirty_bitmap_mig_cleanup(void)
+{
+ DirtyBitmapMigBitmapState *dbms;
+
+ while ((dbms = QSIMPLEQ_FIRST(&dirty_bitmap_mig_state.dbms_list)) != NULL) {
+ QSIMPLEQ_REMOVE_HEAD(&dirty_bitmap_mig_state.dbms_list, entry);
+ g_free(dbms);
+ }
+}
+
+/* for SaveVMHandlers */
+static void dirty_bitmap_migration_cleanup(void *opaque)
+{
+ dirty_bitmap_mig_cleanup();
+}
+
+static int dirty_bitmap_save_iterate(QEMUFile *f, void *opaque)
+{
+ trace_dirty_bitmap_save_iterate(
+ migration_in_postcopy(migrate_get_current()));
+
+ if (migration_in_postcopy(migrate_get_current()) &&
+ !dirty_bitmap_mig_state.bulk_completed) {
+ bulk_phase(f, true);
+ }
+
+ qemu_put_bitmap_flags(f, DIRTY_BITMAP_MIG_FLAG_EOS);
+
+ return dirty_bitmap_mig_state.bulk_completed;
+}
+
+/* Called with iothread lock taken. */
+
+static int dirty_bitmap_save_complete(QEMUFile *f, void *opaque)
+{
+ DirtyBitmapMigBitmapState *dbms;
+ trace_dirty_bitmap_save_complete_enter();
+
+ if (!dirty_bitmap_mig_state.bulk_completed) {
+ bulk_phase(f, false);
+ }
+
+ QSIMPLEQ_FOREACH(dbms, &dirty_bitmap_mig_state.dbms_list, entry) {
+ send_bitmap_complete(f, dbms);
+ }
+
+ qemu_put_bitmap_flags(f, DIRTY_BITMAP_MIG_FLAG_EOS);
+
+ trace_dirty_bitmap_save_complete_finish();
+
+ dirty_bitmap_mig_cleanup();
+ return 0;
+}
+
+static void dirty_bitmap_save_pending(QEMUFile *f, void *opaque,
+ uint64_t max_size,
+ uint64_t *res_precopy_only,
+ uint64_t *res_compatible,
+ uint64_t *res_postcopy_only)
+{
+ DirtyBitmapMigBitmapState *dbms;
+ uint64_t pending = 0;
+
+ qemu_mutex_lock_iothread();
+
+ QSIMPLEQ_FOREACH(dbms, &dirty_bitmap_mig_state.dbms_list, entry) {
+ uint64_t gran = bdrv_dirty_bitmap_granularity(dbms->bitmap);
+ uint64_t sectors = dbms->bulk_completed ? 0 :
+ dbms->total_sectors - dbms->cur_sector;
+
+ pending += (sectors * BDRV_SECTOR_SIZE + gran - 1) / gran;
+ }
+
+ qemu_mutex_unlock_iothread();
+
+ trace_dirty_bitmap_save_pending(pending, max_size);
+
+ *res_postcopy_only += pending;
+}
+
+/* First occurrence of this bitmap. It should be created if doesn't exist */
+static int dirty_bitmap_load_start(QEMUFile *f, DirtyBitmapLoadState *s)
+{
+ Error *local_err = NULL;
+ uint32_t granularity = qemu_get_be32(f);
+ bool enabled = qemu_get_byte(f);
+
+ if (!s->bitmap) {
+ s->bitmap = bdrv_create_dirty_bitmap(s->bs, granularity,
+ s->bitmap_name, &local_err);
+ if (!s->bitmap) {
+ error_report_err(local_err);
+ return -EINVAL;
+ }
+ } else {
+ uint32_t dest_granularity =
+ bdrv_dirty_bitmap_granularity(s->bitmap);
+ if (dest_granularity != granularity) {
+ fprintf(stderr,
+ "Error: "
+ "Migrated bitmap granularity (%" PRIu32 ") "
+ "doesn't match the destination bitmap '%s' "
+ "granularity (%" PRIu32 ")\n",
+ granularity,
+ bdrv_dirty_bitmap_name(s->bitmap),
+ dest_granularity);
+ return -EINVAL;
+ }
+ }
+
+ bdrv_disable_dirty_bitmap(s->bitmap);
+ if (enabled) {
+ DirtyBitmapLoadBitmapState *b;
+
+ bdrv_dirty_bitmap_create_successor(s->bs, s->bitmap, &local_err);
+ if (local_err) {
+ error_report_err(local_err);
+ return -EINVAL;
+ }
+
+ b = g_new(DirtyBitmapLoadBitmapState, 1);
+ b->bs = s->bs;
+ b->bitmap = s->bitmap;
+ b->migrated = false;
+ enabled_bitmaps = g_slist_prepend(enabled_bitmaps, b);
+ }
+
+ return 0;
+}
+
+void dirty_bitmap_mig_before_vm_start(void)
+{
+ GSList *item;
+
+ qemu_mutex_lock(&finish_lock);
+
+ for (item = enabled_bitmaps; item; item = g_slist_next(item)) {
+ DirtyBitmapLoadBitmapState *b = item->data;
+
+ if (b->migrated) {
+ bdrv_enable_dirty_bitmap(b->bitmap);
+ } else {
+ bdrv_dirty_bitmap_enable_successor(b->bitmap);
+ }
+
+ g_free(b);
+ }
+
+ g_slist_free(enabled_bitmaps);
+ enabled_bitmaps = NULL;
+
+ qemu_mutex_unlock(&finish_lock);
+}
+
+static void dirty_bitmap_load_complete(QEMUFile *f, DirtyBitmapLoadState *s)
+{
+ GSList *item;
+ trace_dirty_bitmap_load_complete();
+ bdrv_dirty_bitmap_deserialize_finish(s->bitmap);
+
+ qemu_mutex_lock(&finish_lock);
+
+ for (item = enabled_bitmaps; item; item = g_slist_next(item)) {
+ DirtyBitmapLoadBitmapState *b = item->data;
+
+ if (b->bitmap == s->bitmap) {
+ b->migrated = true;
+ }
+ }
+
+ if (bdrv_dirty_bitmap_frozen(s->bitmap)) {
+ if (enabled_bitmaps == NULL) {
+ /* in postcopy */
+ AioContext *aio_context = bdrv_get_aio_context(s->bs);
+ aio_context_acquire(aio_context);
+
+ bdrv_reclaim_dirty_bitmap(s->bs, s->bitmap, &error_abort);
+ bdrv_enable_dirty_bitmap(s->bitmap);
+
+ aio_context_release(aio_context);
+ } else {
+ /* target not started, successor is empty */
+ bdrv_dirty_bitmap_release_successor(s->bs, s->bitmap);
+ }
+ }
+
+ qemu_mutex_unlock(&finish_lock);
+}
+
+static int dirty_bitmap_load_bits(QEMUFile *f, DirtyBitmapLoadState *s)
+{
+ uint64_t first_sector = qemu_get_be64(f);
+ uint32_t nr_sectors = qemu_get_be32(f);
+ trace_dirty_bitmap_load_bits_enter(first_sector, nr_sectors);
+
+ if (s->flags & DIRTY_BITMAP_MIG_FLAG_ZEROES) {
+ trace_dirty_bitmap_load_bits_zeroes();
+ bdrv_dirty_bitmap_deserialize_zeroes(s->bitmap, first_sector,
+ nr_sectors, false);
+ } else {
+ uint8_t *buf;
+ uint64_t buf_size = qemu_get_be64(f);
+ uint64_t needed_size =
+ bdrv_dirty_bitmap_serialization_size(s->bitmap,
+ first_sector, nr_sectors);
+
+ if (needed_size > buf_size) {
+ fprintf(stderr,
+ "Error: Migrated bitmap granularity doesn't "
+ "match the destination bitmap '%s' granularity\n",
+ bdrv_dirty_bitmap_name(s->bitmap));
+ return -EINVAL;
+ }
+
+ buf = g_malloc(buf_size);
+ qemu_get_buffer(f, buf, buf_size);
+ bdrv_dirty_bitmap_deserialize_part(s->bitmap, buf,
+ first_sector,
+ nr_sectors, false);
+ g_free(buf);
+ }
+
+ return 0;
+}
+
+static int dirty_bitmap_load_header(QEMUFile *f, DirtyBitmapLoadState *s)
+{
+ Error *local_err = NULL;
+ s->flags = qemu_get_bitmap_flags(f);
+ trace_dirty_bitmap_load_header(s->flags);
+
+ if (s->flags & DIRTY_BITMAP_MIG_FLAG_DEVICE_NAME) {
+ if (!qemu_get_counted_string(f, s->node_name)) {
+ fprintf(stderr, "Unable to read node name string\n");
+ return -EINVAL;
+ }
+ s->bs = bdrv_lookup_bs(s->node_name, s->node_name, &local_err);
+ if (!s->bs) {
+ error_report("%s", error_get_pretty(local_err));
+ error_free(local_err);
+ return -EINVAL;
+ }
+ } else if (!s->bs) {
+ fprintf(stderr, "Error: block device name is not set\n");
+ return -EINVAL;
+ }
+
+ if (s->flags & DIRTY_BITMAP_MIG_FLAG_BITMAP_NAME) {
+ if (!qemu_get_counted_string(f, s->bitmap_name)) {
+ fprintf(stderr, "Unable to read node name string\n");
+ return -EINVAL;
+ }
+ s->bitmap = bdrv_find_dirty_bitmap(s->bs, s->bitmap_name);
+
+ /* bitmap may be NULL here, it wouldn't be an error if it is the
+ * first occurrence of the bitmap */
+ if (!s->bitmap && !(s->flags & DIRTY_BITMAP_MIG_FLAG_START)) {
+ fprintf(stderr, "Error: unknown dirty bitmap "
+ "'%s' for block device '%s'\n",
+ s->bitmap_name, s->node_name);
+ return -EINVAL;
+ }
+ } else if (!s->bitmap) {
+ fprintf(stderr, "Error: block device name is not set\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int dirty_bitmap_load(QEMUFile *f, void *opaque, int version_id)
+{
+ static DirtyBitmapLoadState s;
+
+ int ret = 0;
+
+ trace_dirty_bitmap_load_enter();
+
+ do {
+ dirty_bitmap_load_header(f, &s);
+
+ if (s.flags & DIRTY_BITMAP_MIG_FLAG_START) {
+ ret = dirty_bitmap_load_start(f, &s);
+ } else if (s.flags & DIRTY_BITMAP_MIG_FLAG_COMPLETE) {
+ dirty_bitmap_load_complete(f, &s);
+ } else if (s.flags & DIRTY_BITMAP_MIG_FLAG_BITS) {
+ ret = dirty_bitmap_load_bits(f, &s);
+ }
+
+ if (!ret) {
+ ret = qemu_file_get_error(f);
+ }
+
+ if (ret) {
+ return ret;
+ }
+ } while (!(s.flags & DIRTY_BITMAP_MIG_FLAG_EOS));
+
+ trace_dirty_bitmap_load_success();
+ return 0;
+}
+
+static int dirty_bitmap_save_setup(QEMUFile *f, void *opaque)
+{
+ DirtyBitmapMigBitmapState *dbms = NULL;
+ init_dirty_bitmap_migration();
+
+ QSIMPLEQ_FOREACH(dbms, &dirty_bitmap_mig_state.dbms_list, entry) {
+ send_bitmap_start(f, dbms);
+ }
+ qemu_put_bitmap_flags(f, DIRTY_BITMAP_MIG_FLAG_EOS);
+
+ return 0;
+}
+
+static bool dirty_bitmap_is_active(void *opaque)
+{
+ return migrate_dirty_bitmaps();
+}
+
+static bool dirty_bitmap_is_active_iterate(void *opaque)
+{
+ return dirty_bitmap_is_active(opaque) && !runstate_is_running();
+}
+
+static bool dirty_bitmap_has_postcopy(void *opaque)
+{
+ return true;
+}
+
+static SaveVMHandlers savevm_dirty_bitmap_handlers = {
+ .save_live_setup = dirty_bitmap_save_setup,
+ .save_live_complete_postcopy = dirty_bitmap_save_complete,
+ .save_live_complete_precopy = dirty_bitmap_save_complete,
+ .has_postcopy = dirty_bitmap_has_postcopy,
+ .save_live_pending = dirty_bitmap_save_pending,
+ .save_live_iterate = dirty_bitmap_save_iterate,
+ .is_active_iterate = dirty_bitmap_is_active_iterate,
+ .load_state = dirty_bitmap_load,
+ .cleanup = dirty_bitmap_migration_cleanup,
+ .is_active = dirty_bitmap_is_active,
+};
+
+void dirty_bitmap_mig_init(void)
+{
+ QSIMPLEQ_INIT(&dirty_bitmap_mig_state.dbms_list);
+
+ register_savevm_live(NULL, "dirty-bitmap", 0, 1,
+ &savevm_dirty_bitmap_handlers,
+ &dirty_bitmap_mig_state);
+}
diff --git a/migration/migration.c b/migration/migration.c
index 179bd04..edf5623 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -122,6 +122,9 @@ MigrationIncomingState *migration_incoming_get_current(void)
QLIST_INIT(&mis_current.loadvm_handlers);
qemu_mutex_init(&mis_current.rp_mutex);
qemu_event_init(&mis_current.main_thread_load_event, false);
+
+ init_dirty_bitmap_incoming_migration();
+
once = true;
}
return &mis_current;
diff --git a/migration/savevm.c b/migration/savevm.c
index 54572ef..927a680 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -1651,6 +1651,8 @@ static void loadvm_postcopy_handle_run_bh(void *opaque)
trace_loadvm_postcopy_handle_run_vmstart();
+ dirty_bitmap_mig_before_vm_start();
+
if (autostart) {
/* Hold onto your hats, starting the CPU */
vm_start();
diff --git a/migration/trace-events b/migration/trace-events
index 0212929..38fca41 100644
--- a/migration/trace-events
+++ b/migration/trace-events
@@ -222,3 +222,17 @@ colo_vm_state_change(const char *old, const char *new) "Change '%s' => '%s'"
colo_send_message(const char *msg) "Send '%s' message"
colo_receive_message(const char *msg) "Receive '%s' message"
colo_failover_set_state(const char *new_state) "new state %s"
+
+# migration/block-dirty-bitmap.c
+send_bitmap_header_enter(void) ""
+send_bitmap_bits(uint32_t flags, uint64_t start_sector, uint32_t nr_sectors, uint64_t data_size) "\n flags: %x\n start_sector: %" PRIu64 "\n nr_sectors: %" PRIu32 "\n data_size: %" PRIu64 "\n"
+dirty_bitmap_save_iterate(int in_postcopy) "in postcopy: %d"
+dirty_bitmap_save_complete_enter(void) ""
+dirty_bitmap_save_complete_finish(void) ""
+dirty_bitmap_save_pending(uint64_t pending, uint64_t max_size) "pending %" PRIu64 " max: %" PRIu64
+dirty_bitmap_load_complete(void) ""
+dirty_bitmap_load_bits_enter(uint64_t first_sector, uint32_t nr_sectors) "chunk: %" PRIu64 " %" PRIu32
+dirty_bitmap_load_bits_zeroes(void) ""
+dirty_bitmap_load_header(uint32_t flags) "flags %x"
+dirty_bitmap_load_enter(void) ""
+dirty_bitmap_load_success(void) ""
diff --git a/vl.c b/vl.c
index b4eaf03..f1ee9ff 100644
--- a/vl.c
+++ b/vl.c
@@ -4405,6 +4405,7 @@ int main(int argc, char **argv, char **envp)
blk_mig_init();
ram_mig_init();
+ dirty_bitmap_mig_init();
/* If the currently selected machine wishes to override the units-per-bus
* property of its default HBA interface type, do so now. */
--
1.8.3.1
^ permalink raw reply related [flat|nested] 40+ messages in thread
* Re: [Qemu-devel] [PATCH 12/17] migration: add postcopy migration of dirty bitmaps
2017-02-13 9:54 ` [Qemu-devel] [PATCH 12/17] migration: add postcopy migration of dirty bitmaps Vladimir Sementsov-Ogievskiy
@ 2017-02-16 13:04 ` Fam Zheng
2017-02-25 17:56 ` Vladimir Sementsov-Ogievskiy
2017-07-05 9:24 ` Vladimir Sementsov-Ogievskiy
2017-02-24 13:26 ` Dr. David Alan Gilbert
1 sibling, 2 replies; 40+ messages in thread
From: Fam Zheng @ 2017-02-16 13:04 UTC (permalink / raw)
To: Vladimir Sementsov-Ogievskiy
Cc: qemu-block, qemu-devel, pbonzini, armbru, eblake, stefanha,
amit.shah, quintela, mreitz, kwolf, peter.maydell, dgilbert, den,
jsnow, lirans
On Mon, 02/13 12:54, Vladimir Sementsov-Ogievskiy wrote:
> Postcopy migration of dirty bitmaps. Only named dirty bitmaps,
> associated with root nodes and non-root named nodes are migrated.
>
> If destination qemu is already containing a dirty bitmap with the same name
> as a migrated bitmap (for the same node), than, if their granularities are
s/than/then/
> the same the migration will be done, otherwise the error will be generated.
>
> If destination qemu doesn't contain such bitmap it will be created.
>
> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
> +#include "qemu/osdep.h"
> +#include "block/block.h"
> +#include "block/block_int.h"
> +#include "sysemu/block-backend.h"
> +#include "qemu/main-loop.h"
> +#include "qemu/error-report.h"
> +#include "migration/block.h"
> +#include "migration/migration.h"
> +#include "qemu/hbitmap.h"
> +#include "sysemu/sysemu.h"
> +#include "qemu/cutils.h"
> +#include "qapi/error.h"
> +#include "trace.h"
> +#include <assert.h>
Please drop this line since "assert.h" is already included in osdep.h. (And
system headers, if any, ought to be included before local headers.)
> +
> +#define CHUNK_SIZE (1 << 10)
> +
> +/* Flags occupy from one to four bytes. In all but one the 7-th (EXTRA_FLAGS)
> + * bit should be set. */
> +#define DIRTY_BITMAP_MIG_FLAG_EOS 0x01
> +#define DIRTY_BITMAP_MIG_FLAG_ZEROES 0x02
> +#define DIRTY_BITMAP_MIG_FLAG_BITMAP_NAME 0x04
> +#define DIRTY_BITMAP_MIG_FLAG_DEVICE_NAME 0x08
> +#define DIRTY_BITMAP_MIG_FLAG_START 0x10
> +#define DIRTY_BITMAP_MIG_FLAG_COMPLETE 0x20
> +#define DIRTY_BITMAP_MIG_FLAG_BITS 0x40
> +
> +#define DIRTY_BITMAP_MIG_EXTRA_FLAGS 0x80
> +#define DIRTY_BITMAP_MIG_FLAGS_SIZE_16 0x8000
This flag means two bytes, right? But your above comment says "7-th bit should
be set". This doesn't make sense. Should this be "0x80" too?
> +#define DIRTY_BITMAP_MIG_FLAGS_SIZE_32 0x8080
> +
> +#define DEBUG_DIRTY_BITMAP_MIGRATION 0
This macro is unused, and you have done well in adding trace point, should it be
removed?
> +
> +typedef struct DirtyBitmapMigBitmapState {
> + /* Written during setup phase. */
> + BlockDriverState *bs;
> + const char *node_name;
> + BdrvDirtyBitmap *bitmap;
> + uint64_t total_sectors;
> + uint64_t sectors_per_chunk;
> + QSIMPLEQ_ENTRY(DirtyBitmapMigBitmapState) entry;
> +
> + /* For bulk phase. */
> + bool bulk_completed;
> + uint64_t cur_sector;
> +} DirtyBitmapMigBitmapState;
> +
> +typedef struct DirtyBitmapMigState {
> + QSIMPLEQ_HEAD(dbms_list, DirtyBitmapMigBitmapState) dbms_list;
> +
> + bool bulk_completed;
> +
> + /* for send_bitmap_bits() */
> + BlockDriverState *prev_bs;
> + BdrvDirtyBitmap *prev_bitmap;
> +} DirtyBitmapMigState;
> +
> +typedef struct DirtyBitmapLoadState {
> + uint32_t flags;
> + char node_name[256];
> + char bitmap_name[256];
> + BlockDriverState *bs;
> + BdrvDirtyBitmap *bitmap;
> +} DirtyBitmapLoadState;
> +
> +static DirtyBitmapMigState dirty_bitmap_mig_state;
> +
> +typedef struct DirtyBitmapLoadBitmapState {
> + BlockDriverState *bs;
> + BdrvDirtyBitmap *bitmap;
> + bool migrated;
> +} DirtyBitmapLoadBitmapState;
> +static GSList *enabled_bitmaps;
> +QemuMutex finish_lock;
> +
> +void init_dirty_bitmap_incoming_migration(void)
> +{
> + qemu_mutex_init(&finish_lock);
> +}
> +
> +static uint32_t qemu_get_bitmap_flags(QEMUFile *f)
> +{
> + uint8_t flags = qemu_get_byte(f);
> + if (flags & DIRTY_BITMAP_MIG_EXTRA_FLAGS) {
> + flags = flags << 8 | qemu_get_byte(f);
> + if (flags & DIRTY_BITMAP_MIG_EXTRA_FLAGS) {
> + flags = flags << 16 | qemu_get_be16(f);
> + }
> + }
> +
> + return flags;
> +}
> +
> +static void qemu_put_bitmap_flags(QEMUFile *f, uint32_t flags)
> +{
> + if (!(flags & 0xffffff00)) {
> + qemu_put_byte(f, flags);
Maybe "assert(!(flags & 0x80))",
> + return;
> + }
> +
> + if (!(flags & 0xffff0000)) {
> + qemu_put_be16(f, flags | DIRTY_BITMAP_MIG_FLAGS_SIZE_16);
> + return;
> + }
> +
> + qemu_put_be32(f, flags | DIRTY_BITMAP_MIG_FLAGS_SIZE_32);
> +}
> +
> +static void send_bitmap_header(QEMUFile *f, DirtyBitmapMigBitmapState *dbms,
> + uint32_t additional_flags)
> +{
> + BlockDriverState *bs = dbms->bs;
> + BdrvDirtyBitmap *bitmap = dbms->bitmap;
> + uint32_t flags = additional_flags;
> + trace_send_bitmap_header_enter();
> +
> + if (bs != dirty_bitmap_mig_state.prev_bs) {
> + dirty_bitmap_mig_state.prev_bs = bs;
> + flags |= DIRTY_BITMAP_MIG_FLAG_DEVICE_NAME;
> + }
> +
> + if (bitmap != dirty_bitmap_mig_state.prev_bitmap) {
> + dirty_bitmap_mig_state.prev_bitmap = bitmap;
> + flags |= DIRTY_BITMAP_MIG_FLAG_BITMAP_NAME;
> + }
> +
> + qemu_put_bitmap_flags(f, flags);
> +
> + if (flags & DIRTY_BITMAP_MIG_FLAG_DEVICE_NAME) {
> + qemu_put_counted_string(f, dbms->node_name);
> + }
> +
> + if (flags & DIRTY_BITMAP_MIG_FLAG_BITMAP_NAME) {
> + qemu_put_counted_string(f, bdrv_dirty_bitmap_name(bitmap));
> + }
> +}
> +
> +static void send_bitmap_start(QEMUFile *f, DirtyBitmapMigBitmapState *dbms)
> +{
> + send_bitmap_header(f, dbms, DIRTY_BITMAP_MIG_FLAG_START);
> + qemu_put_be32(f, bdrv_dirty_bitmap_granularity(dbms->bitmap));
> + qemu_put_byte(f, bdrv_dirty_bitmap_enabled(dbms->bitmap));
> +}
> +
> +static void send_bitmap_complete(QEMUFile *f, DirtyBitmapMigBitmapState *dbms)
> +{
> + send_bitmap_header(f, dbms, DIRTY_BITMAP_MIG_FLAG_COMPLETE);
> +}
> +
> +static void send_bitmap_bits(QEMUFile *f, DirtyBitmapMigBitmapState *dbms,
> + uint64_t start_sector, uint32_t nr_sectors)
> +{
> + /* align for buffer_is_zero() */
> + uint64_t align = 4 * sizeof(long);
> + uint64_t unaligned_size =
> + bdrv_dirty_bitmap_serialization_size(dbms->bitmap,
> + start_sector, nr_sectors);
> + uint64_t buf_size = (unaligned_size + align - 1) & ~(align - 1);
> + uint8_t *buf = g_malloc0(buf_size);
> + uint32_t flags = DIRTY_BITMAP_MIG_FLAG_BITS;
> +
> + bdrv_dirty_bitmap_serialize_part(dbms->bitmap, buf,
> + start_sector, nr_sectors);
While these bdrv_dirty_bitmap_* calls here seem fine, BdrvDirtyBitmap API is not
in general thread-safe, while this function is called without any lock. This
feels dangerous, as noted below, I'm most concerned about use-after-free.
> +
> + if (buffer_is_zero(buf, buf_size)) {
> + g_free(buf);
> + buf = NULL;
> + flags |= DIRTY_BITMAP_MIG_FLAG_ZEROES;
> + }
> +
> + trace_send_bitmap_bits(flags, start_sector, nr_sectors, buf_size);
> +
> + send_bitmap_header(f, dbms, flags);
> +
> + qemu_put_be64(f, start_sector);
> + qemu_put_be32(f, nr_sectors);
> +
> + /* if a block is zero we need to flush here since the network
> + * bandwidth is now a lot higher than the storage device bandwidth.
> + * thus if we queue zero blocks we slow down the migration. */
> + if (flags & DIRTY_BITMAP_MIG_FLAG_ZEROES) {
> + qemu_fflush(f);
> + } else {
> + qemu_put_be64(f, buf_size);
> + qemu_put_buffer(f, buf, buf_size);
> + }
> +
> + g_free(buf);
> +}
> +
> +
> +/* Called with iothread lock taken. */
> +
> +static void init_dirty_bitmap_migration(void)
> +{
> + BlockDriverState *bs;
> + BdrvDirtyBitmap *bitmap;
> + DirtyBitmapMigBitmapState *dbms;
> + BdrvNextIterator it;
> + uint64_t total_bytes = 0;
> +
> + dirty_bitmap_mig_state.bulk_completed = false;
> + dirty_bitmap_mig_state.prev_bs = NULL;
> + dirty_bitmap_mig_state.prev_bitmap = NULL;
> +
> + for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
> + for (bitmap = bdrv_next_dirty_bitmap(bs, NULL); bitmap;
> + bitmap = bdrv_next_dirty_bitmap(bs, bitmap)) {
> + if (!bdrv_dirty_bitmap_name(bitmap)) {
> + continue;
> + }
> +
> + if (!bdrv_get_device_or_node_name(bs)) {
> + /* not named non-root node */
> + continue;
> + }
Moving this to the outter loop is better, no need to check dirty bitmap
> +
> + dbms = g_new0(DirtyBitmapMigBitmapState, 1);
> + dbms->bs = bs;
Should we do bdrv_ref? migration/block.c references its BDSes indirectly through
BlockBackends it owns.
> + dbms->node_name = bdrv_get_node_name(bs);
> + if (!dbms->node_name || dbms->node_name[0] == '\0') {
> + dbms->node_name = bdrv_get_device_name(bs);
> + }
> + dbms->bitmap = bitmap;
What protects the case that the bitmap is released before migration completes?
> + dbms->total_sectors = bdrv_nb_sectors(bs);
> + dbms->sectors_per_chunk = CHUNK_SIZE * 8 *
> + bdrv_dirty_bitmap_granularity(bitmap) >> BDRV_SECTOR_BITS;
> +
> + total_bytes +=
> + bdrv_dirty_bitmap_serialization_size(bitmap,
> + 0, dbms->total_sectors);
Please drop total_bytes as it is assigned but not used.
> +
> + QSIMPLEQ_INSERT_TAIL(&dirty_bitmap_mig_state.dbms_list,
> + dbms, entry);
> + }
> + }
> +}
> +
> +/* Called with no lock taken. */
> +static void bulk_phase_send_chunk(QEMUFile *f, DirtyBitmapMigBitmapState *dbms)
> +{
> + uint32_t nr_sectors = MIN(dbms->total_sectors - dbms->cur_sector,
> + dbms->sectors_per_chunk);
> +
> + send_bitmap_bits(f, dbms, dbms->cur_sector, nr_sectors);
> +
> + dbms->cur_sector += nr_sectors;
> + if (dbms->cur_sector >= dbms->total_sectors) {
> + dbms->bulk_completed = true;
> + }
> +}
> +
> +/* Called with no lock taken. */
> +static void bulk_phase(QEMUFile *f, bool limit)
> +{
> + DirtyBitmapMigBitmapState *dbms;
> +
> + QSIMPLEQ_FOREACH(dbms, &dirty_bitmap_mig_state.dbms_list, entry) {
> + while (!dbms->bulk_completed) {
> + bulk_phase_send_chunk(f, dbms);
> + if (limit && qemu_file_rate_limit(f)) {
> + return;
> + }
> + }
> + }
> +
> + dirty_bitmap_mig_state.bulk_completed = true;
> +}
> +
> +/* Called with iothread lock taken. */
> +static void dirty_bitmap_mig_cleanup(void)
> +{
> + DirtyBitmapMigBitmapState *dbms;
> +
> + while ((dbms = QSIMPLEQ_FIRST(&dirty_bitmap_mig_state.dbms_list)) != NULL) {
> + QSIMPLEQ_REMOVE_HEAD(&dirty_bitmap_mig_state.dbms_list, entry);
> + g_free(dbms);
> + }
> +}
> +
> +/* for SaveVMHandlers */
> +static void dirty_bitmap_migration_cleanup(void *opaque)
> +{
> + dirty_bitmap_mig_cleanup();
> +}
> +
> +static int dirty_bitmap_save_iterate(QEMUFile *f, void *opaque)
> +{
> + trace_dirty_bitmap_save_iterate(
> + migration_in_postcopy(migrate_get_current()));
> +
> + if (migration_in_postcopy(migrate_get_current()) &&
> + !dirty_bitmap_mig_state.bulk_completed) {
> + bulk_phase(f, true);
> + }
> +
> + qemu_put_bitmap_flags(f, DIRTY_BITMAP_MIG_FLAG_EOS);
> +
> + return dirty_bitmap_mig_state.bulk_completed;
> +}
> +
> +/* Called with iothread lock taken. */
> +
> +static int dirty_bitmap_save_complete(QEMUFile *f, void *opaque)
> +{
> + DirtyBitmapMigBitmapState *dbms;
> + trace_dirty_bitmap_save_complete_enter();
> +
> + if (!dirty_bitmap_mig_state.bulk_completed) {
> + bulk_phase(f, false);
> + }
> +
> + QSIMPLEQ_FOREACH(dbms, &dirty_bitmap_mig_state.dbms_list, entry) {
> + send_bitmap_complete(f, dbms);
> + }
> +
> + qemu_put_bitmap_flags(f, DIRTY_BITMAP_MIG_FLAG_EOS);
> +
> + trace_dirty_bitmap_save_complete_finish();
> +
> + dirty_bitmap_mig_cleanup();
> + return 0;
> +}
> +
> +static void dirty_bitmap_save_pending(QEMUFile *f, void *opaque,
> + uint64_t max_size,
> + uint64_t *res_precopy_only,
> + uint64_t *res_compatible,
> + uint64_t *res_postcopy_only)
> +{
> + DirtyBitmapMigBitmapState *dbms;
> + uint64_t pending = 0;
> +
> + qemu_mutex_lock_iothread();
Why do you need the BQL here but not in bulk_phase()?
> +
> + QSIMPLEQ_FOREACH(dbms, &dirty_bitmap_mig_state.dbms_list, entry) {
> + uint64_t gran = bdrv_dirty_bitmap_granularity(dbms->bitmap);
> + uint64_t sectors = dbms->bulk_completed ? 0 :
> + dbms->total_sectors - dbms->cur_sector;
> +
> + pending += (sectors * BDRV_SECTOR_SIZE + gran - 1) / gran;
> + }
> +
> + qemu_mutex_unlock_iothread();
> +
> + trace_dirty_bitmap_save_pending(pending, max_size);
> +
> + *res_postcopy_only += pending;
> +}
> +
> +/* First occurrence of this bitmap. It should be created if doesn't exist */
> +static int dirty_bitmap_load_start(QEMUFile *f, DirtyBitmapLoadState *s)
> +{
> + Error *local_err = NULL;
> + uint32_t granularity = qemu_get_be32(f);
> + bool enabled = qemu_get_byte(f);
> +
> + if (!s->bitmap) {
> + s->bitmap = bdrv_create_dirty_bitmap(s->bs, granularity,
> + s->bitmap_name, &local_err);
> + if (!s->bitmap) {
> + error_report_err(local_err);
> + return -EINVAL;
> + }
> + } else {
> + uint32_t dest_granularity =
> + bdrv_dirty_bitmap_granularity(s->bitmap);
> + if (dest_granularity != granularity) {
> + fprintf(stderr,
error_report please, to be consistent with the error_report_err above. Applies
to some other places in this patch, too.
> + "Error: "
> + "Migrated bitmap granularity (%" PRIu32 ") "
> + "doesn't match the destination bitmap '%s' "
> + "granularity (%" PRIu32 ")\n",
> + granularity,
> + bdrv_dirty_bitmap_name(s->bitmap),
> + dest_granularity);
> + return -EINVAL;
> + }
> + }
> +
> + bdrv_disable_dirty_bitmap(s->bitmap);
> + if (enabled) {
> + DirtyBitmapLoadBitmapState *b;
> +
> + bdrv_dirty_bitmap_create_successor(s->bs, s->bitmap, &local_err);
> + if (local_err) {
> + error_report_err(local_err);
> + return -EINVAL;
> + }
> +
> + b = g_new(DirtyBitmapLoadBitmapState, 1);
> + b->bs = s->bs;
> + b->bitmap = s->bitmap;
> + b->migrated = false;
> + enabled_bitmaps = g_slist_prepend(enabled_bitmaps, b);
> + }
> +
> + return 0;
> +}
> +
> +void dirty_bitmap_mig_before_vm_start(void)
> +{
> + GSList *item;
> +
> + qemu_mutex_lock(&finish_lock);
> +
> + for (item = enabled_bitmaps; item; item = g_slist_next(item)) {
> + DirtyBitmapLoadBitmapState *b = item->data;
> +
> + if (b->migrated) {
> + bdrv_enable_dirty_bitmap(b->bitmap);
> + } else {
> + bdrv_dirty_bitmap_enable_successor(b->bitmap);
> + }
> +
> + g_free(b);
> + }
> +
> + g_slist_free(enabled_bitmaps);
> + enabled_bitmaps = NULL;
> +
> + qemu_mutex_unlock(&finish_lock);
> +}
> +
> +static void dirty_bitmap_load_complete(QEMUFile *f, DirtyBitmapLoadState *s)
> +{
> + GSList *item;
> + trace_dirty_bitmap_load_complete();
> + bdrv_dirty_bitmap_deserialize_finish(s->bitmap);
> +
> + qemu_mutex_lock(&finish_lock);
> +
> + for (item = enabled_bitmaps; item; item = g_slist_next(item)) {
> + DirtyBitmapLoadBitmapState *b = item->data;
> +
> + if (b->bitmap == s->bitmap) {
> + b->migrated = true;
> + }
> + }
> +
> + if (bdrv_dirty_bitmap_frozen(s->bitmap)) {
> + if (enabled_bitmaps == NULL) {
> + /* in postcopy */
> + AioContext *aio_context = bdrv_get_aio_context(s->bs);
> + aio_context_acquire(aio_context);
> +
> + bdrv_reclaim_dirty_bitmap(s->bs, s->bitmap, &error_abort);
> + bdrv_enable_dirty_bitmap(s->bitmap);
> +
> + aio_context_release(aio_context);
> + } else {
> + /* target not started, successor is empty */
> + bdrv_dirty_bitmap_release_successor(s->bs, s->bitmap);
> + }
> + }
> +
> + qemu_mutex_unlock(&finish_lock);
> +}
> +
> +static int dirty_bitmap_load_bits(QEMUFile *f, DirtyBitmapLoadState *s)
> +{
> + uint64_t first_sector = qemu_get_be64(f);
> + uint32_t nr_sectors = qemu_get_be32(f);
> + trace_dirty_bitmap_load_bits_enter(first_sector, nr_sectors);
> +
> + if (s->flags & DIRTY_BITMAP_MIG_FLAG_ZEROES) {
> + trace_dirty_bitmap_load_bits_zeroes();
> + bdrv_dirty_bitmap_deserialize_zeroes(s->bitmap, first_sector,
> + nr_sectors, false);
> + } else {
> + uint8_t *buf;
> + uint64_t buf_size = qemu_get_be64(f);
> + uint64_t needed_size =
> + bdrv_dirty_bitmap_serialization_size(s->bitmap,
> + first_sector, nr_sectors);
> +
> + if (needed_size > buf_size) {
> + fprintf(stderr,
> + "Error: Migrated bitmap granularity doesn't "
> + "match the destination bitmap '%s' granularity\n",
> + bdrv_dirty_bitmap_name(s->bitmap));
> + return -EINVAL;
> + }
> +
> + buf = g_malloc(buf_size);
> + qemu_get_buffer(f, buf, buf_size);
> + bdrv_dirty_bitmap_deserialize_part(s->bitmap, buf,
> + first_sector,
> + nr_sectors, false);
> + g_free(buf);
> + }
> +
> + return 0;
> +}
> +
> +static int dirty_bitmap_load_header(QEMUFile *f, DirtyBitmapLoadState *s)
> +{
> + Error *local_err = NULL;
> + s->flags = qemu_get_bitmap_flags(f);
> + trace_dirty_bitmap_load_header(s->flags);
> +
> + if (s->flags & DIRTY_BITMAP_MIG_FLAG_DEVICE_NAME) {
> + if (!qemu_get_counted_string(f, s->node_name)) {
> + fprintf(stderr, "Unable to read node name string\n");
> + return -EINVAL;
> + }
> + s->bs = bdrv_lookup_bs(s->node_name, s->node_name, &local_err);
> + if (!s->bs) {
> + error_report("%s", error_get_pretty(local_err));
> + error_free(local_err);
> + return -EINVAL;
> + }
> + } else if (!s->bs) {
> + fprintf(stderr, "Error: block device name is not set\n");
> + return -EINVAL;
> + }
> +
> + if (s->flags & DIRTY_BITMAP_MIG_FLAG_BITMAP_NAME) {
> + if (!qemu_get_counted_string(f, s->bitmap_name)) {
> + fprintf(stderr, "Unable to read node name string\n");
> + return -EINVAL;
> + }
> + s->bitmap = bdrv_find_dirty_bitmap(s->bs, s->bitmap_name);
> +
> + /* bitmap may be NULL here, it wouldn't be an error if it is the
> + * first occurrence of the bitmap */
> + if (!s->bitmap && !(s->flags & DIRTY_BITMAP_MIG_FLAG_START)) {
> + fprintf(stderr, "Error: unknown dirty bitmap "
> + "'%s' for block device '%s'\n",
> + s->bitmap_name, s->node_name);
> + return -EINVAL;
> + }
> + } else if (!s->bitmap) {
> + fprintf(stderr, "Error: block device name is not set\n");
> + return -EINVAL;
> + }
> +
> + return 0;
> +}
> +
> +static int dirty_bitmap_load(QEMUFile *f, void *opaque, int version_id)
> +{
> + static DirtyBitmapLoadState s;
> +
> + int ret = 0;
> +
> + trace_dirty_bitmap_load_enter();
> +
Should version_id be checked? We should detect a version bump of a future
release and fail if not supported.
> + do {
> + dirty_bitmap_load_header(f, &s);
> +
> + if (s.flags & DIRTY_BITMAP_MIG_FLAG_START) {
> + ret = dirty_bitmap_load_start(f, &s);
> + } else if (s.flags & DIRTY_BITMAP_MIG_FLAG_COMPLETE) {
> + dirty_bitmap_load_complete(f, &s);
> + } else if (s.flags & DIRTY_BITMAP_MIG_FLAG_BITS) {
> + ret = dirty_bitmap_load_bits(f, &s);
> + }
> +
> + if (!ret) {
> + ret = qemu_file_get_error(f);
> + }
> +
> + if (ret) {
> + return ret;
> + }
> + } while (!(s.flags & DIRTY_BITMAP_MIG_FLAG_EOS));
> +
> + trace_dirty_bitmap_load_success();
> + return 0;
> +}
> +
> +static int dirty_bitmap_save_setup(QEMUFile *f, void *opaque)
> +{
> + DirtyBitmapMigBitmapState *dbms = NULL;
> + init_dirty_bitmap_migration();
> +
> + QSIMPLEQ_FOREACH(dbms, &dirty_bitmap_mig_state.dbms_list, entry) {
> + send_bitmap_start(f, dbms);
> + }
> + qemu_put_bitmap_flags(f, DIRTY_BITMAP_MIG_FLAG_EOS);
> +
> + return 0;
> +}
> +
> +static bool dirty_bitmap_is_active(void *opaque)
> +{
> + return migrate_dirty_bitmaps();
> +}
> +
> +static bool dirty_bitmap_is_active_iterate(void *opaque)
> +{
> + return dirty_bitmap_is_active(opaque) && !runstate_is_running();
> +}
> +
Fam
^ permalink raw reply [flat|nested] 40+ messages in thread
* Re: [Qemu-devel] [PATCH 12/17] migration: add postcopy migration of dirty bitmaps
2017-02-16 13:04 ` Fam Zheng
@ 2017-02-25 17:56 ` Vladimir Sementsov-Ogievskiy
2017-04-26 13:11 ` Vladimir Sementsov-Ogievskiy
2017-07-05 9:24 ` Vladimir Sementsov-Ogievskiy
1 sibling, 1 reply; 40+ messages in thread
From: Vladimir Sementsov-Ogievskiy @ 2017-02-25 17:56 UTC (permalink / raw)
To: Fam Zheng
Cc: qemu-block, qemu-devel, pbonzini, armbru, eblake, stefanha,
amit.shah, quintela, mreitz, kwolf, peter.maydell, dgilbert, den,
jsnow, lirans
16.02.2017 16:04, Fam Zheng wrote:
> On Mon, 02/13 12:54, Vladimir Sementsov-Ogievskiy wrote:
>> Postcopy migration of dirty bitmaps. Only named dirty bitmaps,
>> associated with root nodes and non-root named nodes are migrated.
>>
>> If destination qemu is already containing a dirty bitmap with the same name
>> as a migrated bitmap (for the same node), than, if their granularities are
[...]
>> +
>> +#define CHUNK_SIZE (1 << 10)
>> +
>> +/* Flags occupy from one to four bytes. In all but one the 7-th (EXTRA_FLAGS)
>> + * bit should be set. */
>> +#define DIRTY_BITMAP_MIG_FLAG_EOS 0x01
>> +#define DIRTY_BITMAP_MIG_FLAG_ZEROES 0x02
>> +#define DIRTY_BITMAP_MIG_FLAG_BITMAP_NAME 0x04
>> +#define DIRTY_BITMAP_MIG_FLAG_DEVICE_NAME 0x08
>> +#define DIRTY_BITMAP_MIG_FLAG_START 0x10
>> +#define DIRTY_BITMAP_MIG_FLAG_COMPLETE 0x20
>> +#define DIRTY_BITMAP_MIG_FLAG_BITS 0x40
>> +
>> +#define DIRTY_BITMAP_MIG_EXTRA_FLAGS 0x80
>> +#define DIRTY_BITMAP_MIG_FLAGS_SIZE_16 0x8000
> This flag means two bytes, right? But your above comment says "7-th bit should
> be set". This doesn't make sense. Should this be "0x80" too?
Hmm, good caught, you are right. Also, the comment should be fixed so
that there are may be 1,2 or 4 bytes, and of course EXTRA_FLAGS bit may
be only in first and second bytes (the code do so).
>
>> +#define DIRTY_BITMAP_MIG_FLAGS_SIZE_32 0x8080
>> +
>> +#define DEBUG_DIRTY_BITMAP_MIGRATION 0
[...]
> +
> +static void send_bitmap_bits(QEMUFile *f, DirtyBitmapMigBitmapState *dbms,
> + uint64_t start_sector, uint32_t nr_sectors)
> +{
> + /* align for buffer_is_zero() */
> + uint64_t align = 4 * sizeof(long);
> + uint64_t unaligned_size =
> + bdrv_dirty_bitmap_serialization_size(dbms->bitmap,
> + start_sector, nr_sectors);
> + uint64_t buf_size = (unaligned_size + align - 1) & ~(align - 1);
> + uint8_t *buf = g_malloc0(buf_size);
> + uint32_t flags = DIRTY_BITMAP_MIG_FLAG_BITS;
> +
> + bdrv_dirty_bitmap_serialize_part(dbms->bitmap, buf,
> + start_sector, nr_sectors);
> While these bdrv_dirty_bitmap_* calls here seem fine, BdrvDirtyBitmap API is not
> in general thread-safe, while this function is called without any lock. This
> feels dangerous, as noted below, I'm most concerned about use-after-free.
This should be safe as it is a postcopy migration - source should be
already inactive.
>
>> +
>> + if (buffer_is_zero(buf, buf_size)) {
>> + g_free(buf);
>> + buf = NULL;
>> + flags |= DIRTY_BITMAP_MIG_FLAG_ZEROES;
>> + }
>> +
>> + trace_send_bitmap_bits(flags, start_sector, nr_sectors, buf_size);
>> +
>> + send_bitmap_header(f, dbms, flags);
>> +
>> + qemu_put_be64(f, start_sector);
>> + qemu_put_be32(f, nr_sectors);
>> +
>> + /* if a block is zero we need to flush here since the network
>> + * bandwidth is now a lot higher than the storage device bandwidth.
>> + * thus if we queue zero blocks we slow down the migration. */
>> + if (flags & DIRTY_BITMAP_MIG_FLAG_ZEROES) {
>> + qemu_fflush(f);
>> + } else {
>> + qemu_put_be64(f, buf_size);
>> + qemu_put_buffer(f, buf, buf_size);
>> + }
>> +
>> + g_free(buf);
>> +}
>> +
>> +
[...]
>> +
>> +static void dirty_bitmap_save_pending(QEMUFile *f, void *opaque,
>> + uint64_t max_size,
>> + uint64_t *res_precopy_only,
>> + uint64_t *res_compatible,
>> + uint64_t *res_postcopy_only)
>> +{
>> + DirtyBitmapMigBitmapState *dbms;
>> + uint64_t pending = 0;
>> +
>> + qemu_mutex_lock_iothread();
> Why do you need the BQL here but not in bulk_phase()?
bulk_phase is in postcopy, source is inactive
--
Best regards,
Vladimir
^ permalink raw reply [flat|nested] 40+ messages in thread
* Re: [Qemu-devel] [PATCH 12/17] migration: add postcopy migration of dirty bitmaps
2017-02-25 17:56 ` Vladimir Sementsov-Ogievskiy
@ 2017-04-26 13:11 ` Vladimir Sementsov-Ogievskiy
0 siblings, 0 replies; 40+ messages in thread
From: Vladimir Sementsov-Ogievskiy @ 2017-04-26 13:11 UTC (permalink / raw)
To: Fam Zheng
Cc: qemu-block, qemu-devel, pbonzini, armbru, eblake, stefanha,
amit.shah, quintela, mreitz, kwolf, peter.maydell, dgilbert, den,
jsnow, lirans
25.02.2017 20:56, Vladimir Sementsov-Ogievskiy wrote:
> 16.02.2017 16:04, Fam Zheng wrote:
>> On Mon, 02/13 12:54, Vladimir Sementsov-Ogievskiy wrote:
>>> Postcopy migration of dirty bitmaps. Only named dirty bitmaps,
>>> associated with root nodes and non-root named nodes are migrated.
>>>
>>> If destination qemu is already containing a dirty bitmap with the
>>> same name
>>> as a migrated bitmap (for the same node), than, if their
>>> granularities are
>
> [...]
>
>>> +
>>> +#define CHUNK_SIZE (1 << 10)
>>> +
>>> +/* Flags occupy from one to four bytes. In all but one the 7-th
>>> (EXTRA_FLAGS)
>>> + * bit should be set. */
>>> +#define DIRTY_BITMAP_MIG_FLAG_EOS 0x01
>>> +#define DIRTY_BITMAP_MIG_FLAG_ZEROES 0x02
>>> +#define DIRTY_BITMAP_MIG_FLAG_BITMAP_NAME 0x04
>>> +#define DIRTY_BITMAP_MIG_FLAG_DEVICE_NAME 0x08
>>> +#define DIRTY_BITMAP_MIG_FLAG_START 0x10
>>> +#define DIRTY_BITMAP_MIG_FLAG_COMPLETE 0x20
>>> +#define DIRTY_BITMAP_MIG_FLAG_BITS 0x40
>>> +
>>> +#define DIRTY_BITMAP_MIG_EXTRA_FLAGS 0x80
>>> +#define DIRTY_BITMAP_MIG_FLAGS_SIZE_16 0x8000
>> This flag means two bytes, right? But your above comment says "7-th
>> bit should
>> be set". This doesn't make sense. Should this be "0x80" too?
>
> Hmm, good caught, you are right. Also, the comment should be fixed so
> that there are may be 1,2 or 4 bytes, and of course EXTRA_FLAGS bit
> may be only in first and second bytes (the code do so).
Aha, now I understand that 0x8000 is ok for big endian
>
>>
>>> +#define DIRTY_BITMAP_MIG_FLAGS_SIZE_32 0x8080
and this is not ok)
I think, I'll just drop this. Anyway it is a dead code, we do not send
flags more than 1 byte in the code. On the other hand, receive flags
path is absolutely ok, and it is for backward compatibility - we just
ignore unknown flags.
>>> +
>>> +#define DEBUG_DIRTY_BITMAP_MIGRATION 0
>
> [...]
>
>> +
>> +static void send_bitmap_bits(QEMUFile *f, DirtyBitmapMigBitmapState
>> *dbms,
>> + uint64_t start_sector, uint32_t
>> nr_sectors)
>> +{
>> + /* align for buffer_is_zero() */
>> + uint64_t align = 4 * sizeof(long);
>> + uint64_t unaligned_size =
>> + bdrv_dirty_bitmap_serialization_size(dbms->bitmap,
>> + start_sector, nr_sectors);
>> + uint64_t buf_size = (unaligned_size + align - 1) & ~(align - 1);
>> + uint8_t *buf = g_malloc0(buf_size);
>> + uint32_t flags = DIRTY_BITMAP_MIG_FLAG_BITS;
>> +
>> + bdrv_dirty_bitmap_serialize_part(dbms->bitmap, buf,
>> + start_sector, nr_sectors);
>> While these bdrv_dirty_bitmap_* calls here seem fine, BdrvDirtyBitmap
>> API is not
>> in general thread-safe, while this function is called without any
>> lock. This
>> feels dangerous, as noted below, I'm most concerned about
>> use-after-free.
>
> This should be safe as it is a postcopy migration - source should be
> already inactive.
>
>>
>>> +
>>> + if (buffer_is_zero(buf, buf_size)) {
>>> + g_free(buf);
>>> + buf = NULL;
>>> + flags |= DIRTY_BITMAP_MIG_FLAG_ZEROES;
>>> + }
>>> +
>>> + trace_send_bitmap_bits(flags, start_sector, nr_sectors, buf_size);
>>> +
>>> + send_bitmap_header(f, dbms, flags);
>>> +
>>> + qemu_put_be64(f, start_sector);
>>> + qemu_put_be32(f, nr_sectors);
>>> +
>>> + /* if a block is zero we need to flush here since the network
>>> + * bandwidth is now a lot higher than the storage device
>>> bandwidth.
>>> + * thus if we queue zero blocks we slow down the migration. */
>>> + if (flags & DIRTY_BITMAP_MIG_FLAG_ZEROES) {
>>> + qemu_fflush(f);
>>> + } else {
>>> + qemu_put_be64(f, buf_size);
>>> + qemu_put_buffer(f, buf, buf_size);
>>> + }
>>> +
>>> + g_free(buf);
>>> +}
>>> +
>>> +
>
> [...]
>
>>> +
>>> +static void dirty_bitmap_save_pending(QEMUFile *f, void *opaque,
>>> + uint64_t max_size,
>>> + uint64_t *res_precopy_only,
>>> + uint64_t *res_compatible,
>>> + uint64_t *res_postcopy_only)
>>> +{
>>> + DirtyBitmapMigBitmapState *dbms;
>>> + uint64_t pending = 0;
>>> +
>>> + qemu_mutex_lock_iothread();
>> Why do you need the BQL here but not in bulk_phase()?
>
> bulk_phase is in postcopy, source is inactive
>
>
--
Best regards,
Vladimir
^ permalink raw reply [flat|nested] 40+ messages in thread
* Re: [Qemu-devel] [PATCH 12/17] migration: add postcopy migration of dirty bitmaps
2017-02-16 13:04 ` Fam Zheng
2017-02-25 17:56 ` Vladimir Sementsov-Ogievskiy
@ 2017-07-05 9:24 ` Vladimir Sementsov-Ogievskiy
2017-07-05 21:46 ` John Snow
1 sibling, 1 reply; 40+ messages in thread
From: Vladimir Sementsov-Ogievskiy @ 2017-07-05 9:24 UTC (permalink / raw)
To: Fam Zheng
Cc: qemu-block, qemu-devel, pbonzini, armbru, eblake, stefanha,
amit.shah, quintela, mreitz, kwolf, peter.maydell, dgilbert, den,
jsnow, lirans
16.02.2017 16:04, Fam Zheng wrote:
>> + dbms->node_name = bdrv_get_node_name(bs);
>> + if (!dbms->node_name || dbms->node_name[0] == '\0') {
>> + dbms->node_name = bdrv_get_device_name(bs);
>> + }
>> + dbms->bitmap = bitmap;
> What protects the case that the bitmap is released before migration completes?
>
What is the source of such deletion? qmp command? Theoretically possible.
I see the following variants:
1. additional variable BdrvDirtyBItmap.migration, which forbids bitmap
deletion
2. make bitmap anonymous (bdrv_dirty_bitmap_make_anon) - it will not be
available through qmp
what do you think?
--
Best regards,
Vladimir
^ permalink raw reply [flat|nested] 40+ messages in thread
* Re: [Qemu-devel] [PATCH 12/17] migration: add postcopy migration of dirty bitmaps
2017-07-05 9:24 ` Vladimir Sementsov-Ogievskiy
@ 2017-07-05 21:46 ` John Snow
2017-07-06 8:05 ` Vladimir Sementsov-Ogievskiy
0 siblings, 1 reply; 40+ messages in thread
From: John Snow @ 2017-07-05 21:46 UTC (permalink / raw)
To: Vladimir Sementsov-Ogievskiy, Fam Zheng
Cc: kwolf, peter.maydell, lirans, qemu-block, quintela, qemu-devel,
armbru, stefanha, den, pbonzini, mreitz, dgilbert
On 07/05/2017 05:24 AM, Vladimir Sementsov-Ogievskiy wrote:
> 16.02.2017 16:04, Fam Zheng wrote:
>>> + dbms->node_name = bdrv_get_node_name(bs);
>>> + if (!dbms->node_name || dbms->node_name[0] == '\0') {
>>> + dbms->node_name = bdrv_get_device_name(bs);
>>> + }
>>> + dbms->bitmap = bitmap;
>> What protects the case that the bitmap is released before migration
>> completes?
>>
> What is the source of such deletion? qmp command? Theoretically possible.
>
> I see the following variants:
>
> 1. additional variable BdrvDirtyBItmap.migration, which forbids bitmap
> deletion
>
> 2. make bitmap anonymous (bdrv_dirty_bitmap_make_anon) - it will not be
> available through qmp
>
Making the bitmap anonymous would forbid us to query the bitmap, which
there is no general reason to do, excepting the idea that a third party
attempting to use the bitmap during a migration is probably a bad idea.
I don't really like the idea of "hiding" information from the user,
though, because then we'd have to worry about name collisions when we
de-anonymized the bitmap again. That's not so palatable.
> what do you think?
>
The modes for bitmaps are getting messy.
As a reminder, the officially exposed "modes" of a bitmap are currently:
FROZEN: Cannot be reset/deleted. Implication is that the bitmap is
otherwise "ACTIVE."
DISABLED: Not recording any writes (by choice.)
ACTIVE: Actively recording writes.
These are documented in the public API as possibilities for
DirtyBitmapStatus in block-core.json. We didn't add a new condition for
"readonly" either, which I think is actually required:
READONLY: Not recording any writes (by necessity.)
Your new use case here sounds like Frozen to me, but it simply does not
have an anonymous successor to force it to be recognized as "frozen." We
can add a `bool protected` or `bool frozen` field to force recognition
of this status and adjust the json documentation accordingly.
I think then we'd have four recognized states:
FROZEN: Cannot be reset/deleted. Bitmap is in-use by a block job or
other internal process. Bitmap is otherwise ACTIVE.
DISABLED: Not recording any writes (by choice.)
READONLY: Not able to record any writes (by necessity.)
ACTIVE: Normal bitmap status.
Sound right?
^ permalink raw reply [flat|nested] 40+ messages in thread
* Re: [Qemu-devel] [PATCH 12/17] migration: add postcopy migration of dirty bitmaps
2017-07-05 21:46 ` John Snow
@ 2017-07-06 8:05 ` Vladimir Sementsov-Ogievskiy
2017-07-06 17:53 ` John Snow
0 siblings, 1 reply; 40+ messages in thread
From: Vladimir Sementsov-Ogievskiy @ 2017-07-06 8:05 UTC (permalink / raw)
To: John Snow, Fam Zheng
Cc: kwolf, peter.maydell, lirans, qemu-block, quintela, qemu-devel,
armbru, stefanha, den, pbonzini, mreitz, dgilbert
06.07.2017 00:46, John Snow wrote:
>
> On 07/05/2017 05:24 AM, Vladimir Sementsov-Ogievskiy wrote:
>> 16.02.2017 16:04, Fam Zheng wrote:
>>>> + dbms->node_name = bdrv_get_node_name(bs);
>>>> + if (!dbms->node_name || dbms->node_name[0] == '\0') {
>>>> + dbms->node_name = bdrv_get_device_name(bs);
>>>> + }
>>>> + dbms->bitmap = bitmap;
>>> What protects the case that the bitmap is released before migration
>>> completes?
>>>
>> What is the source of such deletion? qmp command? Theoretically possible.
>>
>> I see the following variants:
>>
>> 1. additional variable BdrvDirtyBItmap.migration, which forbids bitmap
>> deletion
>>
>> 2. make bitmap anonymous (bdrv_dirty_bitmap_make_anon) - it will not be
>> available through qmp
>>
> Making the bitmap anonymous would forbid us to query the bitmap, which
> there is no general reason to do, excepting the idea that a third party
> attempting to use the bitmap during a migration is probably a bad idea.
> I don't really like the idea of "hiding" information from the user,
> though, because then we'd have to worry about name collisions when we
> de-anonymized the bitmap again. That's not so palatable.
>
>> what do you think?
>>
> The modes for bitmaps are getting messy.
>
> As a reminder, the officially exposed "modes" of a bitmap are currently:
>
> FROZEN: Cannot be reset/deleted. Implication is that the bitmap is
> otherwise "ACTIVE."
> DISABLED: Not recording any writes (by choice.)
> ACTIVE: Actively recording writes.
>
> These are documented in the public API as possibilities for
> DirtyBitmapStatus in block-core.json. We didn't add a new condition for
> "readonly" either, which I think is actually required:
>
> READONLY: Not recording any writes (by necessity.)
>
>
> Your new use case here sounds like Frozen to me, but it simply does not
> have an anonymous successor to force it to be recognized as "frozen." We
> can add a `bool protected` or `bool frozen` field to force recognition
> of this status and adjust the json documentation accordingly.
Bitmaps are selected for migration when source is running, so we should
protect them (from deletion (or frozing or disabling), not from chaning
bits) before source stop, so that is not like frozen. Bitmaps may be
changed in this state.
It is more like ACTIVE.
We can move bitmap selection on the point after precopy migration, after
source stop, but I'm not sure that it would be good.
>
> I think then we'd have four recognized states:
>
> FROZEN: Cannot be reset/deleted. Bitmap is in-use by a block job or
> other internal process. Bitmap is otherwise ACTIVE.
? Frozen means that all writes goes to the successor and frozen bitmap
itself is unchanged, no?
> DISABLED: Not recording any writes (by choice.)
> READONLY: Not able to record any writes (by necessity.)
> ACTIVE: Normal bitmap status.
>
> Sound right?
--
Best regards,
Vladimir
^ permalink raw reply [flat|nested] 40+ messages in thread
* Re: [Qemu-devel] [PATCH 12/17] migration: add postcopy migration of dirty bitmaps
2017-07-06 8:05 ` Vladimir Sementsov-Ogievskiy
@ 2017-07-06 17:53 ` John Snow
2017-07-07 9:04 ` Denis V. Lunev
2017-07-07 9:13 ` Vladimir Sementsov-Ogievskiy
0 siblings, 2 replies; 40+ messages in thread
From: John Snow @ 2017-07-06 17:53 UTC (permalink / raw)
To: Vladimir Sementsov-Ogievskiy, Fam Zheng
Cc: kwolf, peter.maydell, lirans, qemu-block, quintela, qemu-devel,
armbru, stefanha, den, pbonzini, mreitz, dgilbert
On 07/06/2017 04:05 AM, Vladimir Sementsov-Ogievskiy wrote:
> 06.07.2017 00:46, John Snow wrote:
>>
>> On 07/05/2017 05:24 AM, Vladimir Sementsov-Ogievskiy wrote:
>>> 16.02.2017 16:04, Fam Zheng wrote:
>>>>> + dbms->node_name = bdrv_get_node_name(bs);
>>>>> + if (!dbms->node_name || dbms->node_name[0] == '\0') {
>>>>> + dbms->node_name = bdrv_get_device_name(bs);
>>>>> + }
>>>>> + dbms->bitmap = bitmap;
>>>> What protects the case that the bitmap is released before migration
>>>> completes?
>>>>
>>> What is the source of such deletion? qmp command? Theoretically
>>> possible.
>>>
>>> I see the following variants:
>>>
>>> 1. additional variable BdrvDirtyBItmap.migration, which forbids bitmap
>>> deletion
>>>
>>> 2. make bitmap anonymous (bdrv_dirty_bitmap_make_anon) - it will not be
>>> available through qmp
>>>
>> Making the bitmap anonymous would forbid us to query the bitmap, which
>> there is no general reason to do, excepting the idea that a third party
>> attempting to use the bitmap during a migration is probably a bad idea.
>> I don't really like the idea of "hiding" information from the user,
>> though, because then we'd have to worry about name collisions when we
>> de-anonymized the bitmap again. That's not so palatable.
>>
>>> what do you think?
>>>
>> The modes for bitmaps are getting messy.
>>
>> As a reminder, the officially exposed "modes" of a bitmap are currently:
>>
>> FROZEN: Cannot be reset/deleted. Implication is that the bitmap is
>> otherwise "ACTIVE."
>> DISABLED: Not recording any writes (by choice.)
>> ACTIVE: Actively recording writes.
>>
>> These are documented in the public API as possibilities for
>> DirtyBitmapStatus in block-core.json. We didn't add a new condition for
>> "readonly" either, which I think is actually required:
>>
>> READONLY: Not recording any writes (by necessity.)
>>
>>
>> Your new use case here sounds like Frozen to me, but it simply does not
>> have an anonymous successor to force it to be recognized as "frozen." We
>> can add a `bool protected` or `bool frozen` field to force recognition
>> of this status and adjust the json documentation accordingly.
>
> Bitmaps are selected for migration when source is running, so we should
> protect them (from deletion (or frozing or disabling), not from chaning
> bits) before source stop, so that is not like frozen. Bitmaps may be
> changed in this state.
> It is more like ACTIVE.
>
Right, it's not exactly like frozen's _implementation_ today, but...
> We can move bitmap selection on the point after precopy migration, after
> source stop, but I'm not sure that it would be good.
>
>>
>> I think then we'd have four recognized states:
>>
>> FROZEN: Cannot be reset/deleted. Bitmap is in-use by a block job or
>> other internal process. Bitmap is otherwise ACTIVE.
>
> ? Frozen means that all writes goes to the successor and frozen bitmap
> itself is unchanged, no?
>
I was thinking from the point of view of the API. Of course internally,
you're correct; a "frozen bitmap" is one that is actually disabled and
has an anonymous successor, and that successor records IO.
>From the point of view of the API, a frozen bitmap is just "one bitmap"
that is still recording reads/writes, but is protected from being edited
from QMP.
It depends on if you're looking at bitmaps as opaque API objects or if
you're looking at the implementation.
>From an API point of view, protecting an Active bitmap from being
renamed/cleared/deleted is functionally identical to the existing case
of protecting a bitmap-and-successor pair during a backup job.
>> DISABLED: Not recording any writes (by choice.)
>> READONLY: Not able to record any writes (by necessity.)
>> ACTIVE: Normal bitmap status.
>>
>> Sound right?
>
>
^ permalink raw reply [flat|nested] 40+ messages in thread
* Re: [Qemu-devel] [PATCH 12/17] migration: add postcopy migration of dirty bitmaps
2017-07-06 17:53 ` John Snow
@ 2017-07-07 9:04 ` Denis V. Lunev
2017-07-07 9:13 ` Vladimir Sementsov-Ogievskiy
1 sibling, 0 replies; 40+ messages in thread
From: Denis V. Lunev @ 2017-07-07 9:04 UTC (permalink / raw)
To: John Snow, Vladimir Sementsov-Ogievskiy, Fam Zheng
Cc: kwolf, peter.maydell, lirans, qemu-block, quintela, qemu-devel,
armbru, stefanha, pbonzini, mreitz, dgilbert
On 07/06/2017 08:53 PM, John Snow wrote:
>
> On 07/06/2017 04:05 AM, Vladimir Sementsov-Ogievskiy wrote:
>> 06.07.2017 00:46, John Snow wrote:
>>> On 07/05/2017 05:24 AM, Vladimir Sementsov-Ogievskiy wrote:
>>>> 16.02.2017 16:04, Fam Zheng wrote:
>>>>>> + dbms->node_name = bdrv_get_node_name(bs);
>>>>>> + if (!dbms->node_name || dbms->node_name[0] == '\0') {
>>>>>> + dbms->node_name = bdrv_get_device_name(bs);
>>>>>> + }
>>>>>> + dbms->bitmap = bitmap;
>>>>> What protects the case that the bitmap is released before migration
>>>>> completes?
>>>>>
>>>> What is the source of such deletion? qmp command? Theoretically
>>>> possible.
>>>>
>>>> I see the following variants:
>>>>
>>>> 1. additional variable BdrvDirtyBItmap.migration, which forbids bitmap
>>>> deletion
>>>>
>>>> 2. make bitmap anonymous (bdrv_dirty_bitmap_make_anon) - it will not be
>>>> available through qmp
>>>>
>>> Making the bitmap anonymous would forbid us to query the bitmap, which
>>> there is no general reason to do, excepting the idea that a third party
>>> attempting to use the bitmap during a migration is probably a bad idea.
>>> I don't really like the idea of "hiding" information from the user,
>>> though, because then we'd have to worry about name collisions when we
>>> de-anonymized the bitmap again. That's not so palatable.
>>>
>>>> what do you think?
>>>>
>>> The modes for bitmaps are getting messy.
>>>
>>> As a reminder, the officially exposed "modes" of a bitmap are currently:
>>>
>>> FROZEN: Cannot be reset/deleted. Implication is that the bitmap is
>>> otherwise "ACTIVE."
>>> DISABLED: Not recording any writes (by choice.)
>>> ACTIVE: Actively recording writes.
>>>
>>> These are documented in the public API as possibilities for
>>> DirtyBitmapStatus in block-core.json. We didn't add a new condition for
>>> "readonly" either, which I think is actually required:
>>>
>>> READONLY: Not recording any writes (by necessity.)
>>>
>>>
>>> Your new use case here sounds like Frozen to me, but it simply does not
>>> have an anonymous successor to force it to be recognized as "frozen." We
>>> can add a `bool protected` or `bool frozen` field to force recognition
>>> of this status and adjust the json documentation accordingly.
>> Bitmaps are selected for migration when source is running, so we should
>> protect them (from deletion (or frozing or disabling), not from chaning
>> bits) before source stop, so that is not like frozen. Bitmaps may be
>> changed in this state.
>> It is more like ACTIVE.
>>
> Right, it's not exactly like frozen's _implementation_ today, but...
>
>> We can move bitmap selection on the point after precopy migration, after
>> source stop, but I'm not sure that it would be good.
>>
>>> I think then we'd have four recognized states:
>>>
>>> FROZEN: Cannot be reset/deleted. Bitmap is in-use by a block job or
>>> other internal process. Bitmap is otherwise ACTIVE.
>> ? Frozen means that all writes goes to the successor and frozen bitmap
>> itself is unchanged, no?
>>
> I was thinking from the point of view of the API. Of course internally,
> you're correct; a "frozen bitmap" is one that is actually disabled and
> has an anonymous successor, and that successor records IO.
>
> From the point of view of the API, a frozen bitmap is just "one bitmap"
> that is still recording reads/writes, but is protected from being edited
> from QMP.
>
> It depends on if you're looking at bitmaps as opaque API objects or if
> you're looking at the implementation.
>
> From an API point of view, protecting an Active bitmap from being
> renamed/cleared/deleted is functionally identical to the existing case
> of protecting a bitmap-and-successor pair during a backup job.
I think that libvirt properly guards QMP avoid commands changing the
device tree etc at the moment. Thus we should be fine here.
Den
>>> DISABLED: Not recording any writes (by choice.)
>>> READONLY: Not able to record any writes (by necessity.)
>>> ACTIVE: Normal bitmap status.
>>>
>>> Sound right?
>>
^ permalink raw reply [flat|nested] 40+ messages in thread
* Re: [Qemu-devel] [PATCH 12/17] migration: add postcopy migration of dirty bitmaps
2017-07-06 17:53 ` John Snow
2017-07-07 9:04 ` Denis V. Lunev
@ 2017-07-07 9:13 ` Vladimir Sementsov-Ogievskiy
2017-07-07 23:32 ` John Snow
1 sibling, 1 reply; 40+ messages in thread
From: Vladimir Sementsov-Ogievskiy @ 2017-07-07 9:13 UTC (permalink / raw)
To: John Snow, Fam Zheng
Cc: kwolf, peter.maydell, lirans, qemu-block, quintela, qemu-devel,
armbru, stefanha, den, pbonzini, mreitz, dgilbert
06.07.2017 20:53, John Snow wrote:
>
> On 07/06/2017 04:05 AM, Vladimir Sementsov-Ogievskiy wrote:
>> 06.07.2017 00:46, John Snow wrote:
>>> On 07/05/2017 05:24 AM, Vladimir Sementsov-Ogievskiy wrote:
>>>> 16.02.2017 16:04, Fam Zheng wrote:
>>>>>> + dbms->node_name = bdrv_get_node_name(bs);
>>>>>> + if (!dbms->node_name || dbms->node_name[0] == '\0') {
>>>>>> + dbms->node_name = bdrv_get_device_name(bs);
>>>>>> + }
>>>>>> + dbms->bitmap = bitmap;
>>>>> What protects the case that the bitmap is released before migration
>>>>> completes?
>>>>>
>>>> What is the source of such deletion? qmp command? Theoretically
>>>> possible.
>>>>
>>>> I see the following variants:
>>>>
>>>> 1. additional variable BdrvDirtyBItmap.migration, which forbids bitmap
>>>> deletion
>>>>
>>>> 2. make bitmap anonymous (bdrv_dirty_bitmap_make_anon) - it will not be
>>>> available through qmp
>>>>
>>> Making the bitmap anonymous would forbid us to query the bitmap, which
>>> there is no general reason to do, excepting the idea that a third party
>>> attempting to use the bitmap during a migration is probably a bad idea.
>>> I don't really like the idea of "hiding" information from the user,
>>> though, because then we'd have to worry about name collisions when we
>>> de-anonymized the bitmap again. That's not so palatable.
>>>
>>>> what do you think?
>>>>
>>> The modes for bitmaps are getting messy.
>>>
>>> As a reminder, the officially exposed "modes" of a bitmap are currently:
>>>
>>> FROZEN: Cannot be reset/deleted. Implication is that the bitmap is
>>> otherwise "ACTIVE."
>>> DISABLED: Not recording any writes (by choice.)
>>> ACTIVE: Actively recording writes.
>>>
>>> These are documented in the public API as possibilities for
>>> DirtyBitmapStatus in block-core.json. We didn't add a new condition for
>>> "readonly" either, which I think is actually required:
>>>
>>> READONLY: Not recording any writes (by necessity.)
>>>
>>>
>>> Your new use case here sounds like Frozen to me, but it simply does not
>>> have an anonymous successor to force it to be recognized as "frozen." We
>>> can add a `bool protected` or `bool frozen` field to force recognition
>>> of this status and adjust the json documentation accordingly.
>> Bitmaps are selected for migration when source is running, so we should
>> protect them (from deletion (or frozing or disabling), not from chaning
>> bits) before source stop, so that is not like frozen. Bitmaps may be
>> changed in this state.
>> It is more like ACTIVE.
>>
> Right, it's not exactly like frozen's _implementation_ today, but...
>
>> We can move bitmap selection on the point after precopy migration, after
>> source stop, but I'm not sure that it would be good.
>>
>>> I think then we'd have four recognized states:
>>>
>>> FROZEN: Cannot be reset/deleted. Bitmap is in-use by a block job or
>>> other internal process. Bitmap is otherwise ACTIVE.
>> ? Frozen means that all writes goes to the successor and frozen bitmap
>> itself is unchanged, no?
>>
> I was thinking from the point of view of the API. Of course internally,
> you're correct; a "frozen bitmap" is one that is actually disabled and
> has an anonymous successor, and that successor records IO.
>
> From the point of view of the API, a frozen bitmap is just "one bitmap"
> that is still recording reads/writes, but is protected from being edited
> from QMP.
>
> It depends on if you're looking at bitmaps as opaque API objects or if
> you're looking at the implementation.
>
> From an API point of view, protecting an Active bitmap from being
> renamed/cleared/deleted is functionally identical to the existing case
> of protecting a bitmap-and-successor pair during a backup job.
then, this should be described in API ref..
for now I see here:
# @frozen: The bitmap is currently in-use by a backup operation or block
job,
# and is immutable.
Which looks more like the bitmap is unchanged at all.
---
Do not we want in future allow user to create successors through qmp?
We have the following case: exteranal backup.
Backup should be done through image fleecing (temporary image, online
drive is backing for temp, start backup with sync=none from online drive
to temp, export temp through nbd).
To make this backup incremental here is BLOCK_STATUS nbd extension,
which allows dirty bitmap export.
But we need also frozen-bitmap mechanism like with normal incremental
backup.. Should we create qmp_create_bitmap_successor and friends for it?
Or we will add separate mechanism like qmp_start_external_backup,
qmp_finish_external_backup(success = true/false) ?
>
>>> DISABLED: Not recording any writes (by choice.)
>>> READONLY: Not able to record any writes (by necessity.)
>>> ACTIVE: Normal bitmap status.
>>>
>>> Sound right?
>>
--
Best regards,
Vladimir
^ permalink raw reply [flat|nested] 40+ messages in thread
* Re: [Qemu-devel] [PATCH 12/17] migration: add postcopy migration of dirty bitmaps
2017-07-07 9:13 ` Vladimir Sementsov-Ogievskiy
@ 2017-07-07 23:32 ` John Snow
2017-07-10 9:17 ` Vladimir Sementsov-Ogievskiy
0 siblings, 1 reply; 40+ messages in thread
From: John Snow @ 2017-07-07 23:32 UTC (permalink / raw)
To: Vladimir Sementsov-Ogievskiy, Fam Zheng
Cc: kwolf, peter.maydell, lirans, qemu-block, quintela, qemu-devel,
armbru, stefanha, den, pbonzini, mreitz, dgilbert
On 07/07/2017 05:13 AM, Vladimir Sementsov-Ogievskiy wrote:
> 06.07.2017 20:53, John Snow wrote:
>>
>> On 07/06/2017 04:05 AM, Vladimir Sementsov-Ogievskiy wrote:
>>> 06.07.2017 00:46, John Snow wrote:
>>>> On 07/05/2017 05:24 AM, Vladimir Sementsov-Ogievskiy wrote:
>>>>> 16.02.2017 16:04, Fam Zheng wrote:
>>>>>>> + dbms->node_name = bdrv_get_node_name(bs);
>>>>>>> + if (!dbms->node_name || dbms->node_name[0] == '\0') {
>>>>>>> + dbms->node_name = bdrv_get_device_name(bs);
>>>>>>> + }
>>>>>>> + dbms->bitmap = bitmap;
>>>>>> What protects the case that the bitmap is released before migration
>>>>>> completes?
>>>>>>
>>>>> What is the source of such deletion? qmp command? Theoretically
>>>>> possible.
>>>>>
>>>>> I see the following variants:
>>>>>
>>>>> 1. additional variable BdrvDirtyBItmap.migration, which forbids bitmap
>>>>> deletion
>>>>>
>>>>> 2. make bitmap anonymous (bdrv_dirty_bitmap_make_anon) - it will
>>>>> not be
>>>>> available through qmp
>>>>>
>>>> Making the bitmap anonymous would forbid us to query the bitmap, which
>>>> there is no general reason to do, excepting the idea that a third party
>>>> attempting to use the bitmap during a migration is probably a bad idea.
>>>> I don't really like the idea of "hiding" information from the user,
>>>> though, because then we'd have to worry about name collisions when we
>>>> de-anonymized the bitmap again. That's not so palatable.
>>>>
>>>>> what do you think?
>>>>>
>>>> The modes for bitmaps are getting messy.
>>>>
>>>> As a reminder, the officially exposed "modes" of a bitmap are
>>>> currently:
>>>>
>>>> FROZEN: Cannot be reset/deleted. Implication is that the bitmap is
>>>> otherwise "ACTIVE."
>>>> DISABLED: Not recording any writes (by choice.)
>>>> ACTIVE: Actively recording writes.
>>>>
>>>> These are documented in the public API as possibilities for
>>>> DirtyBitmapStatus in block-core.json. We didn't add a new condition for
>>>> "readonly" either, which I think is actually required:
>>>>
>>>> READONLY: Not recording any writes (by necessity.)
>>>>
>>>>
>>>> Your new use case here sounds like Frozen to me, but it simply does not
>>>> have an anonymous successor to force it to be recognized as
>>>> "frozen." We
>>>> can add a `bool protected` or `bool frozen` field to force recognition
>>>> of this status and adjust the json documentation accordingly.
>>> Bitmaps are selected for migration when source is running, so we should
>>> protect them (from deletion (or frozing or disabling), not from chaning
>>> bits) before source stop, so that is not like frozen. Bitmaps may be
>>> changed in this state.
>>> It is more like ACTIVE.
>>>
>> Right, it's not exactly like frozen's _implementation_ today, but...
>>
>>> We can move bitmap selection on the point after precopy migration, after
>>> source stop, but I'm not sure that it would be good.
>>>
>>>> I think then we'd have four recognized states:
>>>>
>>>> FROZEN: Cannot be reset/deleted. Bitmap is in-use by a block job or
>>>> other internal process. Bitmap is otherwise ACTIVE.
>>> ? Frozen means that all writes goes to the successor and frozen bitmap
>>> itself is unchanged, no?
>>>
>> I was thinking from the point of view of the API. Of course internally,
>> you're correct; a "frozen bitmap" is one that is actually disabled and
>> has an anonymous successor, and that successor records IO.
>>
>> From the point of view of the API, a frozen bitmap is just "one bitmap"
>> that is still recording reads/writes, but is protected from being edited
>> from QMP.
>>
>> It depends on if you're looking at bitmaps as opaque API objects or if
>> you're looking at the implementation.
>>
>> From an API point of view, protecting an Active bitmap from being
>> renamed/cleared/deleted is functionally identical to the existing case
>> of protecting a bitmap-and-successor pair during a backup job.
>
> then, this should be described in API ref..
>
> for now I see here:
> # @frozen: The bitmap is currently in-use by a backup operation or block
> job,
> # and is immutable.
>
> Which looks more like the bitmap is unchanged at all.
>
You're right, this is a bad wording. It's technically true of course,
but in truth the bitmap is effectively recording writes if you consider
the bitmap and the anonymous successor as "one object."
What's really immutable here from the API POV is any user-modifiable
properties.
> ---
>
> Do not we want in future allow user to create successors through qmp?
>
I am not sure if I want to expose this functionality *DIRECTLY* but yes,
this use case will necessitate the creation of successors. I just don't
really want to give the user direct control of them via QMP. They're an
implementation detail, nothing more.
> We have the following case: exteranal backup.
>
> Backup should be done through image fleecing (temporary image, online
> drive is backing for temp, start backup with sync=none from online drive
> to temp, export temp through nbd).
> To make this backup incremental here is BLOCK_STATUS nbd extension,
> which allows dirty bitmap export.
> But we need also frozen-bitmap mechanism like with normal incremental
> backup.. Should we create qmp_create_bitmap_successor and friends for it?
> Or we will add separate mechanism like qmp_start_external_backup,
> qmp_finish_external_backup(success = true/false) ?
>
That was my thought at least, yes. Some kind of job mechanism that
allowed us to have start/stop/cancel mechanics that made it clear to
QEMU if it should roll back the bitmap or promote the successor.
>
>
>>
>>>> DISABLED: Not recording any writes (by choice.)
>>>> READONLY: Not able to record any writes (by necessity.)
>>>> ACTIVE: Normal bitmap status.
>>>>
>>>> Sound right?
>>>
>
^ permalink raw reply [flat|nested] 40+ messages in thread
* Re: [Qemu-devel] [PATCH 12/17] migration: add postcopy migration of dirty bitmaps
2017-07-07 23:32 ` John Snow
@ 2017-07-10 9:17 ` Vladimir Sementsov-Ogievskiy
2017-07-10 23:27 ` John Snow
0 siblings, 1 reply; 40+ messages in thread
From: Vladimir Sementsov-Ogievskiy @ 2017-07-10 9:17 UTC (permalink / raw)
To: John Snow, Fam Zheng
Cc: kwolf, peter.maydell, lirans, qemu-block, quintela, qemu-devel,
armbru, stefanha, den, pbonzini, mreitz, dgilbert
08.07.2017 02:32, John Snow wrote:
>
> On 07/07/2017 05:13 AM, Vladimir Sementsov-Ogievskiy wrote:
>> 06.07.2017 20:53, John Snow wrote:
>>> On 07/06/2017 04:05 AM, Vladimir Sementsov-Ogievskiy wrote:
>>>> 06.07.2017 00:46, John Snow wrote:
>>>>> On 07/05/2017 05:24 AM, Vladimir Sementsov-Ogievskiy wrote:
>>>>>> 16.02.2017 16:04, Fam Zheng wrote:
>>>>>>>> + dbms->node_name = bdrv_get_node_name(bs);
>>>>>>>> + if (!dbms->node_name || dbms->node_name[0] == '\0') {
>>>>>>>> + dbms->node_name = bdrv_get_device_name(bs);
>>>>>>>> + }
>>>>>>>> + dbms->bitmap = bitmap;
>>>>>>> What protects the case that the bitmap is released before migration
>>>>>>> completes?
>>>>>>>
>>>>>> What is the source of such deletion? qmp command? Theoretically
>>>>>> possible.
>>>>>>
>>>>>> I see the following variants:
>>>>>>
>>>>>> 1. additional variable BdrvDirtyBItmap.migration, which forbids bitmap
>>>>>> deletion
>>>>>>
>>>>>> 2. make bitmap anonymous (bdrv_dirty_bitmap_make_anon) - it will
>>>>>> not be
>>>>>> available through qmp
>>>>>>
>>>>> Making the bitmap anonymous would forbid us to query the bitmap, which
>>>>> there is no general reason to do, excepting the idea that a third party
>>>>> attempting to use the bitmap during a migration is probably a bad idea.
>>>>> I don't really like the idea of "hiding" information from the user,
>>>>> though, because then we'd have to worry about name collisions when we
>>>>> de-anonymized the bitmap again. That's not so palatable.
>>>>>
>>>>>> what do you think?
>>>>>>
>>>>> The modes for bitmaps are getting messy.
>>>>>
>>>>> As a reminder, the officially exposed "modes" of a bitmap are
>>>>> currently:
>>>>>
>>>>> FROZEN: Cannot be reset/deleted. Implication is that the bitmap is
>>>>> otherwise "ACTIVE."
>>>>> DISABLED: Not recording any writes (by choice.)
>>>>> ACTIVE: Actively recording writes.
>>>>>
>>>>> These are documented in the public API as possibilities for
>>>>> DirtyBitmapStatus in block-core.json. We didn't add a new condition for
>>>>> "readonly" either, which I think is actually required:
>>>>>
>>>>> READONLY: Not recording any writes (by necessity.)
>>>>>
>>>>>
>>>>> Your new use case here sounds like Frozen to me, but it simply does not
>>>>> have an anonymous successor to force it to be recognized as
>>>>> "frozen." We
>>>>> can add a `bool protected` or `bool frozen` field to force recognition
>>>>> of this status and adjust the json documentation accordingly.
>>>> Bitmaps are selected for migration when source is running, so we should
>>>> protect them (from deletion (or frozing or disabling), not from chaning
>>>> bits) before source stop, so that is not like frozen. Bitmaps may be
>>>> changed in this state.
>>>> It is more like ACTIVE.
>>>>
>>> Right, it's not exactly like frozen's _implementation_ today, but...
>>>
>>>> We can move bitmap selection on the point after precopy migration, after
>>>> source stop, but I'm not sure that it would be good.
>>>>
>>>>> I think then we'd have four recognized states:
>>>>>
>>>>> FROZEN: Cannot be reset/deleted. Bitmap is in-use by a block job or
>>>>> other internal process. Bitmap is otherwise ACTIVE.
>>>> ? Frozen means that all writes goes to the successor and frozen bitmap
>>>> itself is unchanged, no?
>>>>
>>> I was thinking from the point of view of the API. Of course internally,
>>> you're correct; a "frozen bitmap" is one that is actually disabled and
>>> has an anonymous successor, and that successor records IO.
>>>
>>> From the point of view of the API, a frozen bitmap is just "one bitmap"
>>> that is still recording reads/writes, but is protected from being edited
>>> from QMP.
>>>
>>> It depends on if you're looking at bitmaps as opaque API objects or if
>>> you're looking at the implementation.
>>>
>>> From an API point of view, protecting an Active bitmap from being
>>> renamed/cleared/deleted is functionally identical to the existing case
>>> of protecting a bitmap-and-successor pair during a backup job.
>> then, this should be described in API ref..
>>
>> for now I see here:
>> # @frozen: The bitmap is currently in-use by a backup operation or block
>> job,
>> # and is immutable.
>>
>> Which looks more like the bitmap is unchanged at all.
>>
> You're right, this is a bad wording. It's technically true of course,
> but in truth the bitmap is effectively recording writes if you consider
> the bitmap and the anonymous successor as "one object."
>
> What's really immutable here from the API POV is any user-modifiable
> properties.
Can we document this somehow?
In this terminology, what happens with frozen bitmap (which in fact is
like "active" for the user) on successful backup finish? It turns into a
difference between current-state and what-was-backed-up? It is not very
transparent.. May be bitmap part, related to the backup is substituted
from the bitmap?
// it's hard for me to believe that 'frozen' doesn't mean 'can not
move', but if it is already established idea then ok.
>
>> ---
>>
>> Do not we want in future allow user to create successors through qmp?
>>
> I am not sure if I want to expose this functionality *DIRECTLY* but yes,
> this use case will necessitate the creation of successors. I just don't
> really want to give the user direct control of them via QMP. They're an
> implementation detail, nothing more.
>
>> We have the following case: exteranal backup.
>>
>> Backup should be done through image fleecing (temporary image, online
>> drive is backing for temp, start backup with sync=none from online drive
>> to temp, export temp through nbd).
>> To make this backup incremental here is BLOCK_STATUS nbd extension,
>> which allows dirty bitmap export.
>> But we need also frozen-bitmap mechanism like with normal incremental
>> backup.. Should we create qmp_create_bitmap_successor and friends for it?
>> Or we will add separate mechanism like qmp_start_external_backup,
>> qmp_finish_external_backup(success = true/false) ?
>>
> That was my thought at least, yes. Some kind of job mechanism that
> allowed us to have start/stop/cancel mechanics that made it clear to
> QEMU if it should roll back the bitmap or promote the successor.
>
>>
>>>>> DISABLED: Not recording any writes (by choice.)
>>>>> READONLY: Not able to record any writes (by necessity.)
>>>>> ACTIVE: Normal bitmap status.
>>>>>
>>>>> Sound right?
--
Best regards,
Vladimir
^ permalink raw reply [flat|nested] 40+ messages in thread
* Re: [Qemu-devel] [PATCH 12/17] migration: add postcopy migration of dirty bitmaps
2017-07-10 9:17 ` Vladimir Sementsov-Ogievskiy
@ 2017-07-10 23:27 ` John Snow
0 siblings, 0 replies; 40+ messages in thread
From: John Snow @ 2017-07-10 23:27 UTC (permalink / raw)
To: Vladimir Sementsov-Ogievskiy, Fam Zheng
Cc: kwolf, peter.maydell, lirans, qemu-block, quintela, qemu-devel,
armbru, stefanha, den, pbonzini, mreitz, dgilbert
On 07/10/2017 05:17 AM, Vladimir Sementsov-Ogievskiy wrote:
>
> Can we document this somehow?
>
> In this terminology, what happens with frozen bitmap (which in fact is
> like "active" for the user) on successful backup finish? It turns into a
> difference between current-state and what-was-backed-up? It is not very
> transparent.. May be bitmap part, related to the backup is substituted
> from the bitmap?
>
> // it's hard for me to believe that 'frozen' doesn't mean 'can not
> move', but if it is already established idea then ok.
Admittedly a mistake on my part, mixing up implementation detail and
exposed interface. It's not necessarily an established idea...
To the end-user, a bitmap that is "frozen" is still indeed recording new
writes from a certain point of view, because once the backup completes,
the bitmap (secretly: the promoted successor) will contain writes that
occurred during that frozen period of time. How could this happen unless
our "frozen" bitmap was recording writes?
Oops, that's not very frozen at all, is it.
That's an unfortunate bit of design that I didn't take into account when
I named the "Frozen" property and exposed it via the JSON API. I was
only thinking about it from the implementation point of view, like you
are -- the bitmap _is_ well and truly frozen: it cannot be modified and
does not record any writes. (Only the successor does, as you know.)
So as it stands we have a discrepancy in the code between internal and
external usage over what exactly "frozen" implies. It's a poor name.
I should have named the QAPI-facing portion of it "locked" or
"protected" or something that helped imply that it could continue to
change, but it was off-limits.
^ permalink raw reply [flat|nested] 40+ messages in thread
* Re: [Qemu-devel] [PATCH 12/17] migration: add postcopy migration of dirty bitmaps
2017-02-13 9:54 ` [Qemu-devel] [PATCH 12/17] migration: add postcopy migration of dirty bitmaps Vladimir Sementsov-Ogievskiy
2017-02-16 13:04 ` Fam Zheng
@ 2017-02-24 13:26 ` Dr. David Alan Gilbert
2017-02-25 17:25 ` Vladimir Sementsov-Ogievskiy
1 sibling, 1 reply; 40+ messages in thread
From: Dr. David Alan Gilbert @ 2017-02-24 13:26 UTC (permalink / raw)
To: Vladimir Sementsov-Ogievskiy
Cc: qemu-block, qemu-devel, pbonzini, armbru, eblake, famz, stefanha,
quintela, mreitz, kwolf, peter.maydell, den, jsnow, lirans
* Vladimir Sementsov-Ogievskiy (vsementsov@virtuozzo.com) wrote:
> Postcopy migration of dirty bitmaps. Only named dirty bitmaps,
> associated with root nodes and non-root named nodes are migrated.
>
> If destination qemu is already containing a dirty bitmap with the same name
> as a migrated bitmap (for the same node), than, if their granularities are
> the same the migration will be done, otherwise the error will be generated.
>
> If destination qemu doesn't contain such bitmap it will be created.
>
> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
> ---
> include/migration/block.h | 1 +
> include/migration/migration.h | 4 +
> migration/Makefile.objs | 2 +-
> migration/block-dirty-bitmap.c | 679 +++++++++++++++++++++++++++++++++++++++++
> migration/migration.c | 3 +
> migration/savevm.c | 2 +
> migration/trace-events | 14 +
> vl.c | 1 +
> 8 files changed, 705 insertions(+), 1 deletion(-)
> create mode 100644 migration/block-dirty-bitmap.c
>
> diff --git a/include/migration/block.h b/include/migration/block.h
> index 41a1ac8..8333c43 100644
> --- a/include/migration/block.h
> +++ b/include/migration/block.h
> @@ -14,6 +14,7 @@
> #ifndef MIGRATION_BLOCK_H
> #define MIGRATION_BLOCK_H
>
> +void dirty_bitmap_mig_init(void);
> void blk_mig_init(void);
> int blk_mig_active(void);
> uint64_t blk_mig_bytes_transferred(void);
> diff --git a/include/migration/migration.h b/include/migration/migration.h
> index 46645f4..03a4993 100644
> --- a/include/migration/migration.h
> +++ b/include/migration/migration.h
> @@ -371,4 +371,8 @@ int ram_save_queue_pages(MigrationState *ms, const char *rbname,
> PostcopyState postcopy_state_get(void);
> /* Set the state and return the old state */
> PostcopyState postcopy_state_set(PostcopyState new_state);
> +
> +void dirty_bitmap_mig_before_vm_start(void);
> +void init_dirty_bitmap_incoming_migration(void);
> +
> #endif
> diff --git a/migration/Makefile.objs b/migration/Makefile.objs
> index 480dd49..fa3bf6a 100644
> --- a/migration/Makefile.objs
> +++ b/migration/Makefile.objs
> @@ -9,5 +9,5 @@ common-obj-y += qjson.o
>
> common-obj-$(CONFIG_RDMA) += rdma.o
>
> -common-obj-y += block.o
> +common-obj-y += block.o block-dirty-bitmap.o
>
> diff --git a/migration/block-dirty-bitmap.c b/migration/block-dirty-bitmap.c
> new file mode 100644
> index 0000000..28e3732
> --- /dev/null
> +++ b/migration/block-dirty-bitmap.c
> @@ -0,0 +1,679 @@
> +/*
> + * Block dirty bitmap postcopy migration
> + *
> + * Copyright IBM, Corp. 2009
> + * Copyright (C) 2016 Parallels IP Holdings GmbH. All rights reserved.
> + *
> + * Authors:
> + * Liran Schour <lirans@il.ibm.com>
> + * Vladimir Sementsov-Ogievskiy <vsementsov@parallels.com>
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2. See
> + * the COPYING file in the top-level directory.
> + * This file is derived from migration/block.c, so it's author and IBM copyright
> + * are here, although content is quite different.
> + *
> + * Contributions after 2012-01-13 are licensed under the terms of the
> + * GNU GPL, version 2 or (at your option) any later version.
> + *
> + * ***
> + *
> + * Here postcopy migration of dirty bitmaps is realized. Only named dirty
> + * bitmaps, associated with root nodes and non-root named nodes are migrated.
> + *
> + * If destination qemu is already containing a dirty bitmap with the same name
> + * as a migrated bitmap (for the same node), then, if their granularities are
> + * the same the migration will be done, otherwise the error will be generated.
> + *
> + * If destination qemu doesn't contain such bitmap it will be created.
> + *
> + * format of migration:
> + *
> + * # Header (shared for different chunk types)
> + * 1, 2 or 4 bytes: flags (see qemu_{put,put}_flags)
> + * [ 1 byte: node name size ] \ flags & DEVICE_NAME
> + * [ n bytes: node name ] /
> + * [ 1 byte: bitmap name size ] \ flags & BITMAP_NAME
> + * [ n bytes: bitmap name ] /
> + *
> + * # Start of bitmap migration (flags & START)
> + * header
> + * be64: granularity
> + * 1 byte: bitmap enabled flag
> + *
> + * # Complete of bitmap migration (flags & COMPLETE)
> + * header
> + *
> + * # Data chunk of bitmap migration
> + * header
> + * be64: start sector
> + * be32: number of sectors
> + * [ be64: buffer size ] \ ! (flags & ZEROES)
> + * [ n bytes: buffer ] /
> + *
> + * The last chunk in stream should contain flags & EOS. The chunk may skip
> + * device and/or bitmap names, assuming them to be the same with the previous
> + * chunk.
> + */
> +
> +#include "qemu/osdep.h"
> +#include "block/block.h"
> +#include "block/block_int.h"
> +#include "sysemu/block-backend.h"
> +#include "qemu/main-loop.h"
> +#include "qemu/error-report.h"
> +#include "migration/block.h"
> +#include "migration/migration.h"
> +#include "qemu/hbitmap.h"
> +#include "sysemu/sysemu.h"
> +#include "qemu/cutils.h"
> +#include "qapi/error.h"
> +#include "trace.h"
> +#include <assert.h>
> +
> +#define CHUNK_SIZE (1 << 10)
> +
> +/* Flags occupy from one to four bytes. In all but one the 7-th (EXTRA_FLAGS)
> + * bit should be set. */
> +#define DIRTY_BITMAP_MIG_FLAG_EOS 0x01
> +#define DIRTY_BITMAP_MIG_FLAG_ZEROES 0x02
> +#define DIRTY_BITMAP_MIG_FLAG_BITMAP_NAME 0x04
> +#define DIRTY_BITMAP_MIG_FLAG_DEVICE_NAME 0x08
> +#define DIRTY_BITMAP_MIG_FLAG_START 0x10
> +#define DIRTY_BITMAP_MIG_FLAG_COMPLETE 0x20
> +#define DIRTY_BITMAP_MIG_FLAG_BITS 0x40
> +
> +#define DIRTY_BITMAP_MIG_EXTRA_FLAGS 0x80
> +#define DIRTY_BITMAP_MIG_FLAGS_SIZE_16 0x8000
> +#define DIRTY_BITMAP_MIG_FLAGS_SIZE_32 0x8080
> +
> +#define DEBUG_DIRTY_BITMAP_MIGRATION 0
> +
> +typedef struct DirtyBitmapMigBitmapState {
> + /* Written during setup phase. */
> + BlockDriverState *bs;
> + const char *node_name;
> + BdrvDirtyBitmap *bitmap;
> + uint64_t total_sectors;
> + uint64_t sectors_per_chunk;
> + QSIMPLEQ_ENTRY(DirtyBitmapMigBitmapState) entry;
> +
> + /* For bulk phase. */
> + bool bulk_completed;
> + uint64_t cur_sector;
> +} DirtyBitmapMigBitmapState;
> +
> +typedef struct DirtyBitmapMigState {
> + QSIMPLEQ_HEAD(dbms_list, DirtyBitmapMigBitmapState) dbms_list;
> +
> + bool bulk_completed;
> +
> + /* for send_bitmap_bits() */
> + BlockDriverState *prev_bs;
> + BdrvDirtyBitmap *prev_bitmap;
> +} DirtyBitmapMigState;
> +
> +typedef struct DirtyBitmapLoadState {
> + uint32_t flags;
> + char node_name[256];
> + char bitmap_name[256];
> + BlockDriverState *bs;
> + BdrvDirtyBitmap *bitmap;
> +} DirtyBitmapLoadState;
> +
> +static DirtyBitmapMigState dirty_bitmap_mig_state;
> +
> +typedef struct DirtyBitmapLoadBitmapState {
> + BlockDriverState *bs;
> + BdrvDirtyBitmap *bitmap;
> + bool migrated;
> +} DirtyBitmapLoadBitmapState;
> +static GSList *enabled_bitmaps;
> +QemuMutex finish_lock;
> +
> +void init_dirty_bitmap_incoming_migration(void)
> +{
> + qemu_mutex_init(&finish_lock);
> +}
> +
> +static uint32_t qemu_get_bitmap_flags(QEMUFile *f)
> +{
> + uint8_t flags = qemu_get_byte(f);
> + if (flags & DIRTY_BITMAP_MIG_EXTRA_FLAGS) {
> + flags = flags << 8 | qemu_get_byte(f);
> + if (flags & DIRTY_BITMAP_MIG_EXTRA_FLAGS) {
> + flags = flags << 16 | qemu_get_be16(f);
> + }
> + }
> +
> + return flags;
> +}
> +
> +static void qemu_put_bitmap_flags(QEMUFile *f, uint32_t flags)
> +{
> + if (!(flags & 0xffffff00)) {
> + qemu_put_byte(f, flags);
> + return;
> + }
> +
> + if (!(flags & 0xffff0000)) {
> + qemu_put_be16(f, flags | DIRTY_BITMAP_MIG_FLAGS_SIZE_16);
> + return;
> + }
> +
> + qemu_put_be32(f, flags | DIRTY_BITMAP_MIG_FLAGS_SIZE_32);
Is this stuff worth the hastle? It would seem easier just to send
the 32bits each time; the overhead doesn't seem to be much except
in the case where you send minimum size chunks a lot.
Dave
> +}
> +
> +static void send_bitmap_header(QEMUFile *f, DirtyBitmapMigBitmapState *dbms,
> + uint32_t additional_flags)
> +{
> + BlockDriverState *bs = dbms->bs;
> + BdrvDirtyBitmap *bitmap = dbms->bitmap;
> + uint32_t flags = additional_flags;
> + trace_send_bitmap_header_enter();
> +
> + if (bs != dirty_bitmap_mig_state.prev_bs) {
> + dirty_bitmap_mig_state.prev_bs = bs;
> + flags |= DIRTY_BITMAP_MIG_FLAG_DEVICE_NAME;
> + }
> +
> + if (bitmap != dirty_bitmap_mig_state.prev_bitmap) {
> + dirty_bitmap_mig_state.prev_bitmap = bitmap;
> + flags |= DIRTY_BITMAP_MIG_FLAG_BITMAP_NAME;
> + }
> +
> + qemu_put_bitmap_flags(f, flags);
> +
> + if (flags & DIRTY_BITMAP_MIG_FLAG_DEVICE_NAME) {
> + qemu_put_counted_string(f, dbms->node_name);
> + }
> +
> + if (flags & DIRTY_BITMAP_MIG_FLAG_BITMAP_NAME) {
> + qemu_put_counted_string(f, bdrv_dirty_bitmap_name(bitmap));
> + }
> +}
> +
> +static void send_bitmap_start(QEMUFile *f, DirtyBitmapMigBitmapState *dbms)
> +{
> + send_bitmap_header(f, dbms, DIRTY_BITMAP_MIG_FLAG_START);
> + qemu_put_be32(f, bdrv_dirty_bitmap_granularity(dbms->bitmap));
> + qemu_put_byte(f, bdrv_dirty_bitmap_enabled(dbms->bitmap));
> +}
> +
> +static void send_bitmap_complete(QEMUFile *f, DirtyBitmapMigBitmapState *dbms)
> +{
> + send_bitmap_header(f, dbms, DIRTY_BITMAP_MIG_FLAG_COMPLETE);
> +}
> +
> +static void send_bitmap_bits(QEMUFile *f, DirtyBitmapMigBitmapState *dbms,
> + uint64_t start_sector, uint32_t nr_sectors)
> +{
> + /* align for buffer_is_zero() */
> + uint64_t align = 4 * sizeof(long);
> + uint64_t unaligned_size =
> + bdrv_dirty_bitmap_serialization_size(dbms->bitmap,
> + start_sector, nr_sectors);
> + uint64_t buf_size = (unaligned_size + align - 1) & ~(align - 1);
> + uint8_t *buf = g_malloc0(buf_size);
> + uint32_t flags = DIRTY_BITMAP_MIG_FLAG_BITS;
> +
> + bdrv_dirty_bitmap_serialize_part(dbms->bitmap, buf,
> + start_sector, nr_sectors);
> +
> + if (buffer_is_zero(buf, buf_size)) {
> + g_free(buf);
> + buf = NULL;
> + flags |= DIRTY_BITMAP_MIG_FLAG_ZEROES;
> + }
> +
> + trace_send_bitmap_bits(flags, start_sector, nr_sectors, buf_size);
> +
> + send_bitmap_header(f, dbms, flags);
> +
> + qemu_put_be64(f, start_sector);
> + qemu_put_be32(f, nr_sectors);
> +
> + /* if a block is zero we need to flush here since the network
> + * bandwidth is now a lot higher than the storage device bandwidth.
> + * thus if we queue zero blocks we slow down the migration. */
> + if (flags & DIRTY_BITMAP_MIG_FLAG_ZEROES) {
> + qemu_fflush(f);
> + } else {
> + qemu_put_be64(f, buf_size);
> + qemu_put_buffer(f, buf, buf_size);
> + }
> +
> + g_free(buf);
> +}
> +
> +
> +/* Called with iothread lock taken. */
> +
> +static void init_dirty_bitmap_migration(void)
> +{
> + BlockDriverState *bs;
> + BdrvDirtyBitmap *bitmap;
> + DirtyBitmapMigBitmapState *dbms;
> + BdrvNextIterator it;
> + uint64_t total_bytes = 0;
> +
> + dirty_bitmap_mig_state.bulk_completed = false;
> + dirty_bitmap_mig_state.prev_bs = NULL;
> + dirty_bitmap_mig_state.prev_bitmap = NULL;
> +
> + for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
> + for (bitmap = bdrv_next_dirty_bitmap(bs, NULL); bitmap;
> + bitmap = bdrv_next_dirty_bitmap(bs, bitmap)) {
> + if (!bdrv_dirty_bitmap_name(bitmap)) {
> + continue;
> + }
> +
> + if (!bdrv_get_device_or_node_name(bs)) {
> + /* not named non-root node */
> + continue;
> + }
> +
> + dbms = g_new0(DirtyBitmapMigBitmapState, 1);
> + dbms->bs = bs;
> + dbms->node_name = bdrv_get_node_name(bs);
> + if (!dbms->node_name || dbms->node_name[0] == '\0') {
> + dbms->node_name = bdrv_get_device_name(bs);
> + }
> + dbms->bitmap = bitmap;
> + dbms->total_sectors = bdrv_nb_sectors(bs);
> + dbms->sectors_per_chunk = CHUNK_SIZE * 8 *
> + bdrv_dirty_bitmap_granularity(bitmap) >> BDRV_SECTOR_BITS;
> +
> + total_bytes +=
> + bdrv_dirty_bitmap_serialization_size(bitmap,
> + 0, dbms->total_sectors);
> +
> + QSIMPLEQ_INSERT_TAIL(&dirty_bitmap_mig_state.dbms_list,
> + dbms, entry);
> + }
> + }
> +}
> +
> +/* Called with no lock taken. */
> +static void bulk_phase_send_chunk(QEMUFile *f, DirtyBitmapMigBitmapState *dbms)
> +{
> + uint32_t nr_sectors = MIN(dbms->total_sectors - dbms->cur_sector,
> + dbms->sectors_per_chunk);
> +
> + send_bitmap_bits(f, dbms, dbms->cur_sector, nr_sectors);
> +
> + dbms->cur_sector += nr_sectors;
> + if (dbms->cur_sector >= dbms->total_sectors) {
> + dbms->bulk_completed = true;
> + }
> +}
> +
> +/* Called with no lock taken. */
> +static void bulk_phase(QEMUFile *f, bool limit)
> +{
> + DirtyBitmapMigBitmapState *dbms;
> +
> + QSIMPLEQ_FOREACH(dbms, &dirty_bitmap_mig_state.dbms_list, entry) {
> + while (!dbms->bulk_completed) {
> + bulk_phase_send_chunk(f, dbms);
> + if (limit && qemu_file_rate_limit(f)) {
> + return;
> + }
> + }
> + }
> +
> + dirty_bitmap_mig_state.bulk_completed = true;
> +}
> +
> +/* Called with iothread lock taken. */
> +static void dirty_bitmap_mig_cleanup(void)
> +{
> + DirtyBitmapMigBitmapState *dbms;
> +
> + while ((dbms = QSIMPLEQ_FIRST(&dirty_bitmap_mig_state.dbms_list)) != NULL) {
> + QSIMPLEQ_REMOVE_HEAD(&dirty_bitmap_mig_state.dbms_list, entry);
> + g_free(dbms);
> + }
> +}
> +
> +/* for SaveVMHandlers */
> +static void dirty_bitmap_migration_cleanup(void *opaque)
> +{
> + dirty_bitmap_mig_cleanup();
> +}
> +
> +static int dirty_bitmap_save_iterate(QEMUFile *f, void *opaque)
> +{
> + trace_dirty_bitmap_save_iterate(
> + migration_in_postcopy(migrate_get_current()));
> +
> + if (migration_in_postcopy(migrate_get_current()) &&
> + !dirty_bitmap_mig_state.bulk_completed) {
> + bulk_phase(f, true);
> + }
> +
> + qemu_put_bitmap_flags(f, DIRTY_BITMAP_MIG_FLAG_EOS);
> +
> + return dirty_bitmap_mig_state.bulk_completed;
> +}
> +
> +/* Called with iothread lock taken. */
> +
> +static int dirty_bitmap_save_complete(QEMUFile *f, void *opaque)
> +{
> + DirtyBitmapMigBitmapState *dbms;
> + trace_dirty_bitmap_save_complete_enter();
> +
> + if (!dirty_bitmap_mig_state.bulk_completed) {
> + bulk_phase(f, false);
> + }
> +
> + QSIMPLEQ_FOREACH(dbms, &dirty_bitmap_mig_state.dbms_list, entry) {
> + send_bitmap_complete(f, dbms);
> + }
> +
> + qemu_put_bitmap_flags(f, DIRTY_BITMAP_MIG_FLAG_EOS);
> +
> + trace_dirty_bitmap_save_complete_finish();
> +
> + dirty_bitmap_mig_cleanup();
> + return 0;
> +}
> +
> +static void dirty_bitmap_save_pending(QEMUFile *f, void *opaque,
> + uint64_t max_size,
> + uint64_t *res_precopy_only,
> + uint64_t *res_compatible,
> + uint64_t *res_postcopy_only)
> +{
> + DirtyBitmapMigBitmapState *dbms;
> + uint64_t pending = 0;
> +
> + qemu_mutex_lock_iothread();
> +
> + QSIMPLEQ_FOREACH(dbms, &dirty_bitmap_mig_state.dbms_list, entry) {
> + uint64_t gran = bdrv_dirty_bitmap_granularity(dbms->bitmap);
> + uint64_t sectors = dbms->bulk_completed ? 0 :
> + dbms->total_sectors - dbms->cur_sector;
> +
> + pending += (sectors * BDRV_SECTOR_SIZE + gran - 1) / gran;
> + }
> +
> + qemu_mutex_unlock_iothread();
> +
> + trace_dirty_bitmap_save_pending(pending, max_size);
> +
> + *res_postcopy_only += pending;
> +}
> +
> +/* First occurrence of this bitmap. It should be created if doesn't exist */
> +static int dirty_bitmap_load_start(QEMUFile *f, DirtyBitmapLoadState *s)
> +{
> + Error *local_err = NULL;
> + uint32_t granularity = qemu_get_be32(f);
> + bool enabled = qemu_get_byte(f);
> +
> + if (!s->bitmap) {
> + s->bitmap = bdrv_create_dirty_bitmap(s->bs, granularity,
> + s->bitmap_name, &local_err);
> + if (!s->bitmap) {
> + error_report_err(local_err);
> + return -EINVAL;
> + }
> + } else {
> + uint32_t dest_granularity =
> + bdrv_dirty_bitmap_granularity(s->bitmap);
> + if (dest_granularity != granularity) {
> + fprintf(stderr,
> + "Error: "
> + "Migrated bitmap granularity (%" PRIu32 ") "
> + "doesn't match the destination bitmap '%s' "
> + "granularity (%" PRIu32 ")\n",
> + granularity,
> + bdrv_dirty_bitmap_name(s->bitmap),
> + dest_granularity);
> + return -EINVAL;
> + }
> + }
> +
> + bdrv_disable_dirty_bitmap(s->bitmap);
> + if (enabled) {
> + DirtyBitmapLoadBitmapState *b;
> +
> + bdrv_dirty_bitmap_create_successor(s->bs, s->bitmap, &local_err);
> + if (local_err) {
> + error_report_err(local_err);
> + return -EINVAL;
> + }
> +
> + b = g_new(DirtyBitmapLoadBitmapState, 1);
> + b->bs = s->bs;
> + b->bitmap = s->bitmap;
> + b->migrated = false;
> + enabled_bitmaps = g_slist_prepend(enabled_bitmaps, b);
> + }
> +
> + return 0;
> +}
> +
> +void dirty_bitmap_mig_before_vm_start(void)
> +{
> + GSList *item;
> +
> + qemu_mutex_lock(&finish_lock);
> +
> + for (item = enabled_bitmaps; item; item = g_slist_next(item)) {
> + DirtyBitmapLoadBitmapState *b = item->data;
> +
> + if (b->migrated) {
> + bdrv_enable_dirty_bitmap(b->bitmap);
> + } else {
> + bdrv_dirty_bitmap_enable_successor(b->bitmap);
> + }
> +
> + g_free(b);
> + }
> +
> + g_slist_free(enabled_bitmaps);
> + enabled_bitmaps = NULL;
> +
> + qemu_mutex_unlock(&finish_lock);
> +}
> +
> +static void dirty_bitmap_load_complete(QEMUFile *f, DirtyBitmapLoadState *s)
> +{
> + GSList *item;
> + trace_dirty_bitmap_load_complete();
> + bdrv_dirty_bitmap_deserialize_finish(s->bitmap);
> +
> + qemu_mutex_lock(&finish_lock);
> +
> + for (item = enabled_bitmaps; item; item = g_slist_next(item)) {
> + DirtyBitmapLoadBitmapState *b = item->data;
> +
> + if (b->bitmap == s->bitmap) {
> + b->migrated = true;
> + }
> + }
> +
> + if (bdrv_dirty_bitmap_frozen(s->bitmap)) {
> + if (enabled_bitmaps == NULL) {
> + /* in postcopy */
> + AioContext *aio_context = bdrv_get_aio_context(s->bs);
> + aio_context_acquire(aio_context);
> +
> + bdrv_reclaim_dirty_bitmap(s->bs, s->bitmap, &error_abort);
> + bdrv_enable_dirty_bitmap(s->bitmap);
> +
> + aio_context_release(aio_context);
> + } else {
> + /* target not started, successor is empty */
> + bdrv_dirty_bitmap_release_successor(s->bs, s->bitmap);
> + }
> + }
> +
> + qemu_mutex_unlock(&finish_lock);
> +}
> +
> +static int dirty_bitmap_load_bits(QEMUFile *f, DirtyBitmapLoadState *s)
> +{
> + uint64_t first_sector = qemu_get_be64(f);
> + uint32_t nr_sectors = qemu_get_be32(f);
> + trace_dirty_bitmap_load_bits_enter(first_sector, nr_sectors);
> +
> + if (s->flags & DIRTY_BITMAP_MIG_FLAG_ZEROES) {
> + trace_dirty_bitmap_load_bits_zeroes();
> + bdrv_dirty_bitmap_deserialize_zeroes(s->bitmap, first_sector,
> + nr_sectors, false);
> + } else {
> + uint8_t *buf;
> + uint64_t buf_size = qemu_get_be64(f);
> + uint64_t needed_size =
> + bdrv_dirty_bitmap_serialization_size(s->bitmap,
> + first_sector, nr_sectors);
> +
> + if (needed_size > buf_size) {
> + fprintf(stderr,
> + "Error: Migrated bitmap granularity doesn't "
> + "match the destination bitmap '%s' granularity\n",
> + bdrv_dirty_bitmap_name(s->bitmap));
> + return -EINVAL;
> + }
> +
> + buf = g_malloc(buf_size);
> + qemu_get_buffer(f, buf, buf_size);
> + bdrv_dirty_bitmap_deserialize_part(s->bitmap, buf,
> + first_sector,
> + nr_sectors, false);
> + g_free(buf);
> + }
> +
> + return 0;
> +}
> +
> +static int dirty_bitmap_load_header(QEMUFile *f, DirtyBitmapLoadState *s)
> +{
> + Error *local_err = NULL;
> + s->flags = qemu_get_bitmap_flags(f);
> + trace_dirty_bitmap_load_header(s->flags);
> +
> + if (s->flags & DIRTY_BITMAP_MIG_FLAG_DEVICE_NAME) {
> + if (!qemu_get_counted_string(f, s->node_name)) {
> + fprintf(stderr, "Unable to read node name string\n");
> + return -EINVAL;
> + }
> + s->bs = bdrv_lookup_bs(s->node_name, s->node_name, &local_err);
> + if (!s->bs) {
> + error_report("%s", error_get_pretty(local_err));
> + error_free(local_err);
> + return -EINVAL;
> + }
> + } else if (!s->bs) {
> + fprintf(stderr, "Error: block device name is not set\n");
> + return -EINVAL;
> + }
> +
> + if (s->flags & DIRTY_BITMAP_MIG_FLAG_BITMAP_NAME) {
> + if (!qemu_get_counted_string(f, s->bitmap_name)) {
> + fprintf(stderr, "Unable to read node name string\n");
> + return -EINVAL;
> + }
> + s->bitmap = bdrv_find_dirty_bitmap(s->bs, s->bitmap_name);
> +
> + /* bitmap may be NULL here, it wouldn't be an error if it is the
> + * first occurrence of the bitmap */
> + if (!s->bitmap && !(s->flags & DIRTY_BITMAP_MIG_FLAG_START)) {
> + fprintf(stderr, "Error: unknown dirty bitmap "
> + "'%s' for block device '%s'\n",
> + s->bitmap_name, s->node_name);
> + return -EINVAL;
> + }
> + } else if (!s->bitmap) {
> + fprintf(stderr, "Error: block device name is not set\n");
> + return -EINVAL;
> + }
> +
> + return 0;
> +}
> +
> +static int dirty_bitmap_load(QEMUFile *f, void *opaque, int version_id)
> +{
> + static DirtyBitmapLoadState s;
> +
> + int ret = 0;
> +
> + trace_dirty_bitmap_load_enter();
> +
> + do {
> + dirty_bitmap_load_header(f, &s);
> +
> + if (s.flags & DIRTY_BITMAP_MIG_FLAG_START) {
> + ret = dirty_bitmap_load_start(f, &s);
> + } else if (s.flags & DIRTY_BITMAP_MIG_FLAG_COMPLETE) {
> + dirty_bitmap_load_complete(f, &s);
> + } else if (s.flags & DIRTY_BITMAP_MIG_FLAG_BITS) {
> + ret = dirty_bitmap_load_bits(f, &s);
> + }
> +
> + if (!ret) {
> + ret = qemu_file_get_error(f);
> + }
> +
> + if (ret) {
> + return ret;
> + }
> + } while (!(s.flags & DIRTY_BITMAP_MIG_FLAG_EOS));
> +
> + trace_dirty_bitmap_load_success();
> + return 0;
> +}
> +
> +static int dirty_bitmap_save_setup(QEMUFile *f, void *opaque)
> +{
> + DirtyBitmapMigBitmapState *dbms = NULL;
> + init_dirty_bitmap_migration();
> +
> + QSIMPLEQ_FOREACH(dbms, &dirty_bitmap_mig_state.dbms_list, entry) {
> + send_bitmap_start(f, dbms);
> + }
> + qemu_put_bitmap_flags(f, DIRTY_BITMAP_MIG_FLAG_EOS);
> +
> + return 0;
> +}
> +
> +static bool dirty_bitmap_is_active(void *opaque)
> +{
> + return migrate_dirty_bitmaps();
> +}
> +
> +static bool dirty_bitmap_is_active_iterate(void *opaque)
> +{
> + return dirty_bitmap_is_active(opaque) && !runstate_is_running();
> +}
> +
> +static bool dirty_bitmap_has_postcopy(void *opaque)
> +{
> + return true;
> +}
> +
> +static SaveVMHandlers savevm_dirty_bitmap_handlers = {
> + .save_live_setup = dirty_bitmap_save_setup,
> + .save_live_complete_postcopy = dirty_bitmap_save_complete,
> + .save_live_complete_precopy = dirty_bitmap_save_complete,
> + .has_postcopy = dirty_bitmap_has_postcopy,
> + .save_live_pending = dirty_bitmap_save_pending,
> + .save_live_iterate = dirty_bitmap_save_iterate,
> + .is_active_iterate = dirty_bitmap_is_active_iterate,
> + .load_state = dirty_bitmap_load,
> + .cleanup = dirty_bitmap_migration_cleanup,
> + .is_active = dirty_bitmap_is_active,
> +};
> +
> +void dirty_bitmap_mig_init(void)
> +{
> + QSIMPLEQ_INIT(&dirty_bitmap_mig_state.dbms_list);
> +
> + register_savevm_live(NULL, "dirty-bitmap", 0, 1,
> + &savevm_dirty_bitmap_handlers,
> + &dirty_bitmap_mig_state);
> +}
> diff --git a/migration/migration.c b/migration/migration.c
> index 179bd04..edf5623 100644
> --- a/migration/migration.c
> +++ b/migration/migration.c
> @@ -122,6 +122,9 @@ MigrationIncomingState *migration_incoming_get_current(void)
> QLIST_INIT(&mis_current.loadvm_handlers);
> qemu_mutex_init(&mis_current.rp_mutex);
> qemu_event_init(&mis_current.main_thread_load_event, false);
> +
> + init_dirty_bitmap_incoming_migration();
> +
> once = true;
> }
> return &mis_current;
> diff --git a/migration/savevm.c b/migration/savevm.c
> index 54572ef..927a680 100644
> --- a/migration/savevm.c
> +++ b/migration/savevm.c
> @@ -1651,6 +1651,8 @@ static void loadvm_postcopy_handle_run_bh(void *opaque)
>
> trace_loadvm_postcopy_handle_run_vmstart();
>
> + dirty_bitmap_mig_before_vm_start();
> +
> if (autostart) {
> /* Hold onto your hats, starting the CPU */
> vm_start();
> diff --git a/migration/trace-events b/migration/trace-events
> index 0212929..38fca41 100644
> --- a/migration/trace-events
> +++ b/migration/trace-events
> @@ -222,3 +222,17 @@ colo_vm_state_change(const char *old, const char *new) "Change '%s' => '%s'"
> colo_send_message(const char *msg) "Send '%s' message"
> colo_receive_message(const char *msg) "Receive '%s' message"
> colo_failover_set_state(const char *new_state) "new state %s"
> +
> +# migration/block-dirty-bitmap.c
> +send_bitmap_header_enter(void) ""
> +send_bitmap_bits(uint32_t flags, uint64_t start_sector, uint32_t nr_sectors, uint64_t data_size) "\n flags: %x\n start_sector: %" PRIu64 "\n nr_sectors: %" PRIu32 "\n data_size: %" PRIu64 "\n"
> +dirty_bitmap_save_iterate(int in_postcopy) "in postcopy: %d"
> +dirty_bitmap_save_complete_enter(void) ""
> +dirty_bitmap_save_complete_finish(void) ""
> +dirty_bitmap_save_pending(uint64_t pending, uint64_t max_size) "pending %" PRIu64 " max: %" PRIu64
> +dirty_bitmap_load_complete(void) ""
> +dirty_bitmap_load_bits_enter(uint64_t first_sector, uint32_t nr_sectors) "chunk: %" PRIu64 " %" PRIu32
> +dirty_bitmap_load_bits_zeroes(void) ""
> +dirty_bitmap_load_header(uint32_t flags) "flags %x"
> +dirty_bitmap_load_enter(void) ""
> +dirty_bitmap_load_success(void) ""
> diff --git a/vl.c b/vl.c
> index b4eaf03..f1ee9ff 100644
> --- a/vl.c
> +++ b/vl.c
> @@ -4405,6 +4405,7 @@ int main(int argc, char **argv, char **envp)
>
> blk_mig_init();
> ram_mig_init();
> + dirty_bitmap_mig_init();
>
> /* If the currently selected machine wishes to override the units-per-bus
> * property of its default HBA interface type, do so now. */
> --
> 1.8.3.1
>
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
^ permalink raw reply [flat|nested] 40+ messages in thread
* Re: [Qemu-devel] [PATCH 12/17] migration: add postcopy migration of dirty bitmaps
2017-02-24 13:26 ` Dr. David Alan Gilbert
@ 2017-02-25 17:25 ` Vladimir Sementsov-Ogievskiy
2017-02-27 20:12 ` Dr. David Alan Gilbert
0 siblings, 1 reply; 40+ messages in thread
From: Vladimir Sementsov-Ogievskiy @ 2017-02-25 17:25 UTC (permalink / raw)
To: Dr. David Alan Gilbert
Cc: qemu-block, qemu-devel, pbonzini, armbru, eblake, famz, stefanha,
quintela, mreitz, kwolf, peter.maydell, den, jsnow, lirans
24.02.2017 16:26, Dr. David Alan Gilbert wrote:
> * Vladimir Sementsov-Ogievskiy (vsementsov@virtuozzo.com) wrote:
>> Postcopy migration of dirty bitmaps. Only named dirty bitmaps,
>> associated with root nodes and non-root named nodes are migrated.
>>
>> If destination qemu is already containing a dirty bitmap with the same name
>> as a migrated bitmap (for the same node), than, if their granularities are
>> the same the migration will be done, otherwise the error will be generated.
>>
>> If destination qemu doesn't contain such bitmap it will be created.
>>
>> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
>> ---
>> include/migration/block.h | 1 +
>> include/migration/migration.h | 4 +
>> migration/Makefile.objs | 2 +-
>> migration/block-dirty-bitmap.c | 679 +++++++++++++++++++++++++++++++++++++++++
>> migration/migration.c | 3 +
>> migration/savevm.c | 2 +
>> migration/trace-events | 14 +
>> vl.c | 1 +
>> 8 files changed, 705 insertions(+), 1 deletion(-)
>> create mode 100644 migration/block-dirty-bitmap.c
>>
>> diff --git a/include/migration/block.h b/include/migration/block.h
>> index 41a1ac8..8333c43 100644
>> --- a/include/migration/block.h
>> +++ b/include/migration/block.h
>> @@ -14,6 +14,7 @@
>> #ifndef MIGRATION_BLOCK_H
>> #define MIGRATION_BLOCK_H
>>
>> +void dirty_bitmap_mig_init(void);
>> void blk_mig_init(void);
>> int blk_mig_active(void);
>> uint64_t blk_mig_bytes_transferred(void);
>> diff --git a/include/migration/migration.h b/include/migration/migration.h
>> index 46645f4..03a4993 100644
>> --- a/include/migration/migration.h
>> +++ b/include/migration/migration.h
>> @@ -371,4 +371,8 @@ int ram_save_queue_pages(MigrationState *ms, const char *rbname,
>> PostcopyState postcopy_state_get(void);
>> /* Set the state and return the old state */
>> PostcopyState postcopy_state_set(PostcopyState new_state);
>> +
>> +void dirty_bitmap_mig_before_vm_start(void);
>> +void init_dirty_bitmap_incoming_migration(void);
>> +
>> #endif
>> diff --git a/migration/Makefile.objs b/migration/Makefile.objs
>> index 480dd49..fa3bf6a 100644
>> --- a/migration/Makefile.objs
>> +++ b/migration/Makefile.objs
>> @@ -9,5 +9,5 @@ common-obj-y += qjson.o
>>
>> common-obj-$(CONFIG_RDMA) += rdma.o
>>
>> -common-obj-y += block.o
>> +common-obj-y += block.o block-dirty-bitmap.o
>>
>> diff --git a/migration/block-dirty-bitmap.c b/migration/block-dirty-bitmap.c
>> new file mode 100644
>> index 0000000..28e3732
>> --- /dev/null
>> +++ b/migration/block-dirty-bitmap.c
>> @@ -0,0 +1,679 @@
>> +/*
>> + * Block dirty bitmap postcopy migration
>> + *
>> + * Copyright IBM, Corp. 2009
>> + * Copyright (C) 2016 Parallels IP Holdings GmbH. All rights reserved.
>> + *
>> + * Authors:
>> + * Liran Schour <lirans@il.ibm.com>
>> + * Vladimir Sementsov-Ogievskiy <vsementsov@parallels.com>
>> + *
>> + * This work is licensed under the terms of the GNU GPL, version 2. See
>> + * the COPYING file in the top-level directory.
>> + * This file is derived from migration/block.c, so it's author and IBM copyright
>> + * are here, although content is quite different.
>> + *
>> + * Contributions after 2012-01-13 are licensed under the terms of the
>> + * GNU GPL, version 2 or (at your option) any later version.
>> + *
>> + * ***
>> + *
>> + * Here postcopy migration of dirty bitmaps is realized. Only named dirty
>> + * bitmaps, associated with root nodes and non-root named nodes are migrated.
>> + *
>> + * If destination qemu is already containing a dirty bitmap with the same name
>> + * as a migrated bitmap (for the same node), then, if their granularities are
>> + * the same the migration will be done, otherwise the error will be generated.
>> + *
>> + * If destination qemu doesn't contain such bitmap it will be created.
>> + *
>> + * format of migration:
>> + *
>> + * # Header (shared for different chunk types)
>> + * 1, 2 or 4 bytes: flags (see qemu_{put,put}_flags)
>> + * [ 1 byte: node name size ] \ flags & DEVICE_NAME
>> + * [ n bytes: node name ] /
>> + * [ 1 byte: bitmap name size ] \ flags & BITMAP_NAME
>> + * [ n bytes: bitmap name ] /
>> + *
>> + * # Start of bitmap migration (flags & START)
>> + * header
>> + * be64: granularity
>> + * 1 byte: bitmap enabled flag
>> + *
>> + * # Complete of bitmap migration (flags & COMPLETE)
>> + * header
>> + *
>> + * # Data chunk of bitmap migration
>> + * header
>> + * be64: start sector
>> + * be32: number of sectors
>> + * [ be64: buffer size ] \ ! (flags & ZEROES)
>> + * [ n bytes: buffer ] /
>> + *
>> + * The last chunk in stream should contain flags & EOS. The chunk may skip
>> + * device and/or bitmap names, assuming them to be the same with the previous
>> + * chunk.
>> + */
>> +
>> +#include "qemu/osdep.h"
>> +#include "block/block.h"
>> +#include "block/block_int.h"
>> +#include "sysemu/block-backend.h"
>> +#include "qemu/main-loop.h"
>> +#include "qemu/error-report.h"
>> +#include "migration/block.h"
>> +#include "migration/migration.h"
>> +#include "qemu/hbitmap.h"
>> +#include "sysemu/sysemu.h"
>> +#include "qemu/cutils.h"
>> +#include "qapi/error.h"
>> +#include "trace.h"
>> +#include <assert.h>
>> +
>> +#define CHUNK_SIZE (1 << 10)
>> +
>> +/* Flags occupy from one to four bytes. In all but one the 7-th (EXTRA_FLAGS)
>> + * bit should be set. */
>> +#define DIRTY_BITMAP_MIG_FLAG_EOS 0x01
>> +#define DIRTY_BITMAP_MIG_FLAG_ZEROES 0x02
>> +#define DIRTY_BITMAP_MIG_FLAG_BITMAP_NAME 0x04
>> +#define DIRTY_BITMAP_MIG_FLAG_DEVICE_NAME 0x08
>> +#define DIRTY_BITMAP_MIG_FLAG_START 0x10
>> +#define DIRTY_BITMAP_MIG_FLAG_COMPLETE 0x20
>> +#define DIRTY_BITMAP_MIG_FLAG_BITS 0x40
>> +
>> +#define DIRTY_BITMAP_MIG_EXTRA_FLAGS 0x80
>> +#define DIRTY_BITMAP_MIG_FLAGS_SIZE_16 0x8000
>> +#define DIRTY_BITMAP_MIG_FLAGS_SIZE_32 0x8080
>> +
>> +#define DEBUG_DIRTY_BITMAP_MIGRATION 0
>> +
>> +typedef struct DirtyBitmapMigBitmapState {
>> + /* Written during setup phase. */
>> + BlockDriverState *bs;
>> + const char *node_name;
>> + BdrvDirtyBitmap *bitmap;
>> + uint64_t total_sectors;
>> + uint64_t sectors_per_chunk;
>> + QSIMPLEQ_ENTRY(DirtyBitmapMigBitmapState) entry;
>> +
>> + /* For bulk phase. */
>> + bool bulk_completed;
>> + uint64_t cur_sector;
>> +} DirtyBitmapMigBitmapState;
>> +
>> +typedef struct DirtyBitmapMigState {
>> + QSIMPLEQ_HEAD(dbms_list, DirtyBitmapMigBitmapState) dbms_list;
>> +
>> + bool bulk_completed;
>> +
>> + /* for send_bitmap_bits() */
>> + BlockDriverState *prev_bs;
>> + BdrvDirtyBitmap *prev_bitmap;
>> +} DirtyBitmapMigState;
>> +
>> +typedef struct DirtyBitmapLoadState {
>> + uint32_t flags;
>> + char node_name[256];
>> + char bitmap_name[256];
>> + BlockDriverState *bs;
>> + BdrvDirtyBitmap *bitmap;
>> +} DirtyBitmapLoadState;
>> +
>> +static DirtyBitmapMigState dirty_bitmap_mig_state;
>> +
>> +typedef struct DirtyBitmapLoadBitmapState {
>> + BlockDriverState *bs;
>> + BdrvDirtyBitmap *bitmap;
>> + bool migrated;
>> +} DirtyBitmapLoadBitmapState;
>> +static GSList *enabled_bitmaps;
>> +QemuMutex finish_lock;
>> +
>> +void init_dirty_bitmap_incoming_migration(void)
>> +{
>> + qemu_mutex_init(&finish_lock);
>> +}
>> +
>> +static uint32_t qemu_get_bitmap_flags(QEMUFile *f)
>> +{
>> + uint8_t flags = qemu_get_byte(f);
>> + if (flags & DIRTY_BITMAP_MIG_EXTRA_FLAGS) {
>> + flags = flags << 8 | qemu_get_byte(f);
>> + if (flags & DIRTY_BITMAP_MIG_EXTRA_FLAGS) {
>> + flags = flags << 16 | qemu_get_be16(f);
>> + }
>> + }
>> +
>> + return flags;
>> +}
>> +
>> +static void qemu_put_bitmap_flags(QEMUFile *f, uint32_t flags)
>> +{
>> + if (!(flags & 0xffffff00)) {
>> + qemu_put_byte(f, flags);
>> + return;
>> + }
>> +
>> + if (!(flags & 0xffff0000)) {
>> + qemu_put_be16(f, flags | DIRTY_BITMAP_MIG_FLAGS_SIZE_16);
>> + return;
>> + }
>> +
>> + qemu_put_be32(f, flags | DIRTY_BITMAP_MIG_FLAGS_SIZE_32);
> Is this stuff worth the hastle? It would seem easier just to send
> the 32bits each time; the overhead doesn't seem to be much except
> in the case where you send minimum size chunks a lot.
I've started with one byte of flags (I don't need more) and I've
implemented this after proposal by John Snow:
> I might recommend reserving the last bit of the second byte to be a
> flag such as DIRTY_BITMAP_EXTRA_FLAGS that indicates the presence of
> additional byte(s) of flags, to be determined later, if we ever need
> them, but two bytes for now should be sufficient.
And I think it is not bad. Code is a bit more complex, yes, but why
should we send 4 bytes with every chunk when (very likely) we will never
use more than one?
>
> Dave
>
>> +}
>> +
>> +static void send_bitmap_header(QEMUFile *f, DirtyBitmapMigBitmapState *dbms,
>> + uint32_t additional_flags)
>> +{
>> + BlockDriverState *bs = dbms->bs;
>> + BdrvDirtyBitmap *bitmap = dbms->bitmap;
>> + uint32_t flags = additional_flags;
>> + trace_send_bitmap_header_enter();
>> +
>> + if (bs != dirty_bitmap_mig_state.prev_bs) {
>> + dirty_bitmap_mig_state.prev_bs = bs;
>> + flags |= DIRTY_BITMAP_MIG_FLAG_DEVICE_NAME;
>> + }
>> +
>> + if (bitmap != dirty_bitmap_mig_state.prev_bitmap) {
>> + dirty_bitmap_mig_state.prev_bitmap = bitmap;
>> + flags |= DIRTY_BITMAP_MIG_FLAG_BITMAP_NAME;
>> + }
>> +
>> + qemu_put_bitmap_flags(f, flags);
>> +
>> + if (flags & DIRTY_BITMAP_MIG_FLAG_DEVICE_NAME) {
>> + qemu_put_counted_string(f, dbms->node_name);
>> + }
>> +
>> + if (flags & DIRTY_BITMAP_MIG_FLAG_BITMAP_NAME) {
>> + qemu_put_counted_string(f, bdrv_dirty_bitmap_name(bitmap));
>> + }
>> +}
>> +
>> +static void send_bitmap_start(QEMUFile *f, DirtyBitmapMigBitmapState *dbms)
>> +{
>> + send_bitmap_header(f, dbms, DIRTY_BITMAP_MIG_FLAG_START);
>> + qemu_put_be32(f, bdrv_dirty_bitmap_granularity(dbms->bitmap));
>> + qemu_put_byte(f, bdrv_dirty_bitmap_enabled(dbms->bitmap));
>> +}
>> +
>> +static void send_bitmap_complete(QEMUFile *f, DirtyBitmapMigBitmapState *dbms)
>> +{
>> + send_bitmap_header(f, dbms, DIRTY_BITMAP_MIG_FLAG_COMPLETE);
>> +}
>> +
>> +static void send_bitmap_bits(QEMUFile *f, DirtyBitmapMigBitmapState *dbms,
>> + uint64_t start_sector, uint32_t nr_sectors)
>> +{
>> + /* align for buffer_is_zero() */
>> + uint64_t align = 4 * sizeof(long);
>> + uint64_t unaligned_size =
>> + bdrv_dirty_bitmap_serialization_size(dbms->bitmap,
>> + start_sector, nr_sectors);
>> + uint64_t buf_size = (unaligned_size + align - 1) & ~(align - 1);
>> + uint8_t *buf = g_malloc0(buf_size);
>> + uint32_t flags = DIRTY_BITMAP_MIG_FLAG_BITS;
>> +
>> + bdrv_dirty_bitmap_serialize_part(dbms->bitmap, buf,
>> + start_sector, nr_sectors);
>> +
>> + if (buffer_is_zero(buf, buf_size)) {
>> + g_free(buf);
>> + buf = NULL;
>> + flags |= DIRTY_BITMAP_MIG_FLAG_ZEROES;
>> + }
>> +
>> + trace_send_bitmap_bits(flags, start_sector, nr_sectors, buf_size);
>> +
>> + send_bitmap_header(f, dbms, flags);
>> +
>> + qemu_put_be64(f, start_sector);
>> + qemu_put_be32(f, nr_sectors);
>> +
>> + /* if a block is zero we need to flush here since the network
>> + * bandwidth is now a lot higher than the storage device bandwidth.
>> + * thus if we queue zero blocks we slow down the migration. */
>> + if (flags & DIRTY_BITMAP_MIG_FLAG_ZEROES) {
>> + qemu_fflush(f);
>> + } else {
>> + qemu_put_be64(f, buf_size);
>> + qemu_put_buffer(f, buf, buf_size);
>> + }
>> +
>> + g_free(buf);
>> +}
>> +
>> +
>> +/* Called with iothread lock taken. */
>> +
>> +static void init_dirty_bitmap_migration(void)
>> +{
>> + BlockDriverState *bs;
>> + BdrvDirtyBitmap *bitmap;
>> + DirtyBitmapMigBitmapState *dbms;
>> + BdrvNextIterator it;
>> + uint64_t total_bytes = 0;
>> +
>> + dirty_bitmap_mig_state.bulk_completed = false;
>> + dirty_bitmap_mig_state.prev_bs = NULL;
>> + dirty_bitmap_mig_state.prev_bitmap = NULL;
>> +
>> + for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
>> + for (bitmap = bdrv_next_dirty_bitmap(bs, NULL); bitmap;
>> + bitmap = bdrv_next_dirty_bitmap(bs, bitmap)) {
>> + if (!bdrv_dirty_bitmap_name(bitmap)) {
>> + continue;
>> + }
>> +
>> + if (!bdrv_get_device_or_node_name(bs)) {
>> + /* not named non-root node */
>> + continue;
>> + }
>> +
>> + dbms = g_new0(DirtyBitmapMigBitmapState, 1);
>> + dbms->bs = bs;
>> + dbms->node_name = bdrv_get_node_name(bs);
>> + if (!dbms->node_name || dbms->node_name[0] == '\0') {
>> + dbms->node_name = bdrv_get_device_name(bs);
>> + }
>> + dbms->bitmap = bitmap;
>> + dbms->total_sectors = bdrv_nb_sectors(bs);
>> + dbms->sectors_per_chunk = CHUNK_SIZE * 8 *
>> + bdrv_dirty_bitmap_granularity(bitmap) >> BDRV_SECTOR_BITS;
>> +
>> + total_bytes +=
>> + bdrv_dirty_bitmap_serialization_size(bitmap,
>> + 0, dbms->total_sectors);
>> +
>> + QSIMPLEQ_INSERT_TAIL(&dirty_bitmap_mig_state.dbms_list,
>> + dbms, entry);
>> + }
>> + }
>> +}
>> +
>> +/* Called with no lock taken. */
>> +static void bulk_phase_send_chunk(QEMUFile *f, DirtyBitmapMigBitmapState *dbms)
>> +{
>> + uint32_t nr_sectors = MIN(dbms->total_sectors - dbms->cur_sector,
>> + dbms->sectors_per_chunk);
>> +
>> + send_bitmap_bits(f, dbms, dbms->cur_sector, nr_sectors);
>> +
>> + dbms->cur_sector += nr_sectors;
>> + if (dbms->cur_sector >= dbms->total_sectors) {
>> + dbms->bulk_completed = true;
>> + }
>> +}
>> +
>> +/* Called with no lock taken. */
>> +static void bulk_phase(QEMUFile *f, bool limit)
>> +{
>> + DirtyBitmapMigBitmapState *dbms;
>> +
>> + QSIMPLEQ_FOREACH(dbms, &dirty_bitmap_mig_state.dbms_list, entry) {
>> + while (!dbms->bulk_completed) {
>> + bulk_phase_send_chunk(f, dbms);
>> + if (limit && qemu_file_rate_limit(f)) {
>> + return;
>> + }
>> + }
>> + }
>> +
>> + dirty_bitmap_mig_state.bulk_completed = true;
>> +}
>> +
>> +/* Called with iothread lock taken. */
>> +static void dirty_bitmap_mig_cleanup(void)
>> +{
>> + DirtyBitmapMigBitmapState *dbms;
>> +
>> + while ((dbms = QSIMPLEQ_FIRST(&dirty_bitmap_mig_state.dbms_list)) != NULL) {
>> + QSIMPLEQ_REMOVE_HEAD(&dirty_bitmap_mig_state.dbms_list, entry);
>> + g_free(dbms);
>> + }
>> +}
>> +
>> +/* for SaveVMHandlers */
>> +static void dirty_bitmap_migration_cleanup(void *opaque)
>> +{
>> + dirty_bitmap_mig_cleanup();
>> +}
>> +
>> +static int dirty_bitmap_save_iterate(QEMUFile *f, void *opaque)
>> +{
>> + trace_dirty_bitmap_save_iterate(
>> + migration_in_postcopy(migrate_get_current()));
>> +
>> + if (migration_in_postcopy(migrate_get_current()) &&
>> + !dirty_bitmap_mig_state.bulk_completed) {
>> + bulk_phase(f, true);
>> + }
>> +
>> + qemu_put_bitmap_flags(f, DIRTY_BITMAP_MIG_FLAG_EOS);
>> +
>> + return dirty_bitmap_mig_state.bulk_completed;
>> +}
>> +
>> +/* Called with iothread lock taken. */
>> +
>> +static int dirty_bitmap_save_complete(QEMUFile *f, void *opaque)
>> +{
>> + DirtyBitmapMigBitmapState *dbms;
>> + trace_dirty_bitmap_save_complete_enter();
>> +
>> + if (!dirty_bitmap_mig_state.bulk_completed) {
>> + bulk_phase(f, false);
>> + }
>> +
>> + QSIMPLEQ_FOREACH(dbms, &dirty_bitmap_mig_state.dbms_list, entry) {
>> + send_bitmap_complete(f, dbms);
>> + }
>> +
>> + qemu_put_bitmap_flags(f, DIRTY_BITMAP_MIG_FLAG_EOS);
>> +
>> + trace_dirty_bitmap_save_complete_finish();
>> +
>> + dirty_bitmap_mig_cleanup();
>> + return 0;
>> +}
>> +
>> +static void dirty_bitmap_save_pending(QEMUFile *f, void *opaque,
>> + uint64_t max_size,
>> + uint64_t *res_precopy_only,
>> + uint64_t *res_compatible,
>> + uint64_t *res_postcopy_only)
>> +{
>> + DirtyBitmapMigBitmapState *dbms;
>> + uint64_t pending = 0;
>> +
>> + qemu_mutex_lock_iothread();
>> +
>> + QSIMPLEQ_FOREACH(dbms, &dirty_bitmap_mig_state.dbms_list, entry) {
>> + uint64_t gran = bdrv_dirty_bitmap_granularity(dbms->bitmap);
>> + uint64_t sectors = dbms->bulk_completed ? 0 :
>> + dbms->total_sectors - dbms->cur_sector;
>> +
>> + pending += (sectors * BDRV_SECTOR_SIZE + gran - 1) / gran;
>> + }
>> +
>> + qemu_mutex_unlock_iothread();
>> +
>> + trace_dirty_bitmap_save_pending(pending, max_size);
>> +
>> + *res_postcopy_only += pending;
>> +}
>> +
>> +/* First occurrence of this bitmap. It should be created if doesn't exist */
>> +static int dirty_bitmap_load_start(QEMUFile *f, DirtyBitmapLoadState *s)
>> +{
>> + Error *local_err = NULL;
>> + uint32_t granularity = qemu_get_be32(f);
>> + bool enabled = qemu_get_byte(f);
>> +
>> + if (!s->bitmap) {
>> + s->bitmap = bdrv_create_dirty_bitmap(s->bs, granularity,
>> + s->bitmap_name, &local_err);
>> + if (!s->bitmap) {
>> + error_report_err(local_err);
>> + return -EINVAL;
>> + }
>> + } else {
>> + uint32_t dest_granularity =
>> + bdrv_dirty_bitmap_granularity(s->bitmap);
>> + if (dest_granularity != granularity) {
>> + fprintf(stderr,
>> + "Error: "
>> + "Migrated bitmap granularity (%" PRIu32 ") "
>> + "doesn't match the destination bitmap '%s' "
>> + "granularity (%" PRIu32 ")\n",
>> + granularity,
>> + bdrv_dirty_bitmap_name(s->bitmap),
>> + dest_granularity);
>> + return -EINVAL;
>> + }
>> + }
>> +
>> + bdrv_disable_dirty_bitmap(s->bitmap);
>> + if (enabled) {
>> + DirtyBitmapLoadBitmapState *b;
>> +
>> + bdrv_dirty_bitmap_create_successor(s->bs, s->bitmap, &local_err);
>> + if (local_err) {
>> + error_report_err(local_err);
>> + return -EINVAL;
>> + }
>> +
>> + b = g_new(DirtyBitmapLoadBitmapState, 1);
>> + b->bs = s->bs;
>> + b->bitmap = s->bitmap;
>> + b->migrated = false;
>> + enabled_bitmaps = g_slist_prepend(enabled_bitmaps, b);
>> + }
>> +
>> + return 0;
>> +}
>> +
>> +void dirty_bitmap_mig_before_vm_start(void)
>> +{
>> + GSList *item;
>> +
>> + qemu_mutex_lock(&finish_lock);
>> +
>> + for (item = enabled_bitmaps; item; item = g_slist_next(item)) {
>> + DirtyBitmapLoadBitmapState *b = item->data;
>> +
>> + if (b->migrated) {
>> + bdrv_enable_dirty_bitmap(b->bitmap);
>> + } else {
>> + bdrv_dirty_bitmap_enable_successor(b->bitmap);
>> + }
>> +
>> + g_free(b);
>> + }
>> +
>> + g_slist_free(enabled_bitmaps);
>> + enabled_bitmaps = NULL;
>> +
>> + qemu_mutex_unlock(&finish_lock);
>> +}
>> +
>> +static void dirty_bitmap_load_complete(QEMUFile *f, DirtyBitmapLoadState *s)
>> +{
>> + GSList *item;
>> + trace_dirty_bitmap_load_complete();
>> + bdrv_dirty_bitmap_deserialize_finish(s->bitmap);
>> +
>> + qemu_mutex_lock(&finish_lock);
>> +
>> + for (item = enabled_bitmaps; item; item = g_slist_next(item)) {
>> + DirtyBitmapLoadBitmapState *b = item->data;
>> +
>> + if (b->bitmap == s->bitmap) {
>> + b->migrated = true;
>> + }
>> + }
>> +
>> + if (bdrv_dirty_bitmap_frozen(s->bitmap)) {
>> + if (enabled_bitmaps == NULL) {
>> + /* in postcopy */
>> + AioContext *aio_context = bdrv_get_aio_context(s->bs);
>> + aio_context_acquire(aio_context);
>> +
>> + bdrv_reclaim_dirty_bitmap(s->bs, s->bitmap, &error_abort);
>> + bdrv_enable_dirty_bitmap(s->bitmap);
>> +
>> + aio_context_release(aio_context);
>> + } else {
>> + /* target not started, successor is empty */
>> + bdrv_dirty_bitmap_release_successor(s->bs, s->bitmap);
>> + }
>> + }
>> +
>> + qemu_mutex_unlock(&finish_lock);
>> +}
>> +
>> +static int dirty_bitmap_load_bits(QEMUFile *f, DirtyBitmapLoadState *s)
>> +{
>> + uint64_t first_sector = qemu_get_be64(f);
>> + uint32_t nr_sectors = qemu_get_be32(f);
>> + trace_dirty_bitmap_load_bits_enter(first_sector, nr_sectors);
>> +
>> + if (s->flags & DIRTY_BITMAP_MIG_FLAG_ZEROES) {
>> + trace_dirty_bitmap_load_bits_zeroes();
>> + bdrv_dirty_bitmap_deserialize_zeroes(s->bitmap, first_sector,
>> + nr_sectors, false);
>> + } else {
>> + uint8_t *buf;
>> + uint64_t buf_size = qemu_get_be64(f);
>> + uint64_t needed_size =
>> + bdrv_dirty_bitmap_serialization_size(s->bitmap,
>> + first_sector, nr_sectors);
>> +
>> + if (needed_size > buf_size) {
>> + fprintf(stderr,
>> + "Error: Migrated bitmap granularity doesn't "
>> + "match the destination bitmap '%s' granularity\n",
>> + bdrv_dirty_bitmap_name(s->bitmap));
>> + return -EINVAL;
>> + }
>> +
>> + buf = g_malloc(buf_size);
>> + qemu_get_buffer(f, buf, buf_size);
>> + bdrv_dirty_bitmap_deserialize_part(s->bitmap, buf,
>> + first_sector,
>> + nr_sectors, false);
>> + g_free(buf);
>> + }
>> +
>> + return 0;
>> +}
>> +
>> +static int dirty_bitmap_load_header(QEMUFile *f, DirtyBitmapLoadState *s)
>> +{
>> + Error *local_err = NULL;
>> + s->flags = qemu_get_bitmap_flags(f);
>> + trace_dirty_bitmap_load_header(s->flags);
>> +
>> + if (s->flags & DIRTY_BITMAP_MIG_FLAG_DEVICE_NAME) {
>> + if (!qemu_get_counted_string(f, s->node_name)) {
>> + fprintf(stderr, "Unable to read node name string\n");
>> + return -EINVAL;
>> + }
>> + s->bs = bdrv_lookup_bs(s->node_name, s->node_name, &local_err);
>> + if (!s->bs) {
>> + error_report("%s", error_get_pretty(local_err));
>> + error_free(local_err);
>> + return -EINVAL;
>> + }
>> + } else if (!s->bs) {
>> + fprintf(stderr, "Error: block device name is not set\n");
>> + return -EINVAL;
>> + }
>> +
>> + if (s->flags & DIRTY_BITMAP_MIG_FLAG_BITMAP_NAME) {
>> + if (!qemu_get_counted_string(f, s->bitmap_name)) {
>> + fprintf(stderr, "Unable to read node name string\n");
>> + return -EINVAL;
>> + }
>> + s->bitmap = bdrv_find_dirty_bitmap(s->bs, s->bitmap_name);
>> +
>> + /* bitmap may be NULL here, it wouldn't be an error if it is the
>> + * first occurrence of the bitmap */
>> + if (!s->bitmap && !(s->flags & DIRTY_BITMAP_MIG_FLAG_START)) {
>> + fprintf(stderr, "Error: unknown dirty bitmap "
>> + "'%s' for block device '%s'\n",
>> + s->bitmap_name, s->node_name);
>> + return -EINVAL;
>> + }
>> + } else if (!s->bitmap) {
>> + fprintf(stderr, "Error: block device name is not set\n");
>> + return -EINVAL;
>> + }
>> +
>> + return 0;
>> +}
>> +
>> +static int dirty_bitmap_load(QEMUFile *f, void *opaque, int version_id)
>> +{
>> + static DirtyBitmapLoadState s;
>> +
>> + int ret = 0;
>> +
>> + trace_dirty_bitmap_load_enter();
>> +
>> + do {
>> + dirty_bitmap_load_header(f, &s);
>> +
>> + if (s.flags & DIRTY_BITMAP_MIG_FLAG_START) {
>> + ret = dirty_bitmap_load_start(f, &s);
>> + } else if (s.flags & DIRTY_BITMAP_MIG_FLAG_COMPLETE) {
>> + dirty_bitmap_load_complete(f, &s);
>> + } else if (s.flags & DIRTY_BITMAP_MIG_FLAG_BITS) {
>> + ret = dirty_bitmap_load_bits(f, &s);
>> + }
>> +
>> + if (!ret) {
>> + ret = qemu_file_get_error(f);
>> + }
>> +
>> + if (ret) {
>> + return ret;
>> + }
>> + } while (!(s.flags & DIRTY_BITMAP_MIG_FLAG_EOS));
>> +
>> + trace_dirty_bitmap_load_success();
>> + return 0;
>> +}
>> +
>> +static int dirty_bitmap_save_setup(QEMUFile *f, void *opaque)
>> +{
>> + DirtyBitmapMigBitmapState *dbms = NULL;
>> + init_dirty_bitmap_migration();
>> +
>> + QSIMPLEQ_FOREACH(dbms, &dirty_bitmap_mig_state.dbms_list, entry) {
>> + send_bitmap_start(f, dbms);
>> + }
>> + qemu_put_bitmap_flags(f, DIRTY_BITMAP_MIG_FLAG_EOS);
>> +
>> + return 0;
>> +}
>> +
>> +static bool dirty_bitmap_is_active(void *opaque)
>> +{
>> + return migrate_dirty_bitmaps();
>> +}
>> +
>> +static bool dirty_bitmap_is_active_iterate(void *opaque)
>> +{
>> + return dirty_bitmap_is_active(opaque) && !runstate_is_running();
>> +}
>> +
>> +static bool dirty_bitmap_has_postcopy(void *opaque)
>> +{
>> + return true;
>> +}
>> +
>> +static SaveVMHandlers savevm_dirty_bitmap_handlers = {
>> + .save_live_setup = dirty_bitmap_save_setup,
>> + .save_live_complete_postcopy = dirty_bitmap_save_complete,
>> + .save_live_complete_precopy = dirty_bitmap_save_complete,
>> + .has_postcopy = dirty_bitmap_has_postcopy,
>> + .save_live_pending = dirty_bitmap_save_pending,
>> + .save_live_iterate = dirty_bitmap_save_iterate,
>> + .is_active_iterate = dirty_bitmap_is_active_iterate,
>> + .load_state = dirty_bitmap_load,
>> + .cleanup = dirty_bitmap_migration_cleanup,
>> + .is_active = dirty_bitmap_is_active,
>> +};
>> +
>> +void dirty_bitmap_mig_init(void)
>> +{
>> + QSIMPLEQ_INIT(&dirty_bitmap_mig_state.dbms_list);
>> +
>> + register_savevm_live(NULL, "dirty-bitmap", 0, 1,
>> + &savevm_dirty_bitmap_handlers,
>> + &dirty_bitmap_mig_state);
>> +}
>> diff --git a/migration/migration.c b/migration/migration.c
>> index 179bd04..edf5623 100644
>> --- a/migration/migration.c
>> +++ b/migration/migration.c
>> @@ -122,6 +122,9 @@ MigrationIncomingState *migration_incoming_get_current(void)
>> QLIST_INIT(&mis_current.loadvm_handlers);
>> qemu_mutex_init(&mis_current.rp_mutex);
>> qemu_event_init(&mis_current.main_thread_load_event, false);
>> +
>> + init_dirty_bitmap_incoming_migration();
>> +
>> once = true;
>> }
>> return &mis_current;
>> diff --git a/migration/savevm.c b/migration/savevm.c
>> index 54572ef..927a680 100644
>> --- a/migration/savevm.c
>> +++ b/migration/savevm.c
>> @@ -1651,6 +1651,8 @@ static void loadvm_postcopy_handle_run_bh(void *opaque)
>>
>> trace_loadvm_postcopy_handle_run_vmstart();
>>
>> + dirty_bitmap_mig_before_vm_start();
>> +
>> if (autostart) {
>> /* Hold onto your hats, starting the CPU */
>> vm_start();
>> diff --git a/migration/trace-events b/migration/trace-events
>> index 0212929..38fca41 100644
>> --- a/migration/trace-events
>> +++ b/migration/trace-events
>> @@ -222,3 +222,17 @@ colo_vm_state_change(const char *old, const char *new) "Change '%s' => '%s'"
>> colo_send_message(const char *msg) "Send '%s' message"
>> colo_receive_message(const char *msg) "Receive '%s' message"
>> colo_failover_set_state(const char *new_state) "new state %s"
>> +
>> +# migration/block-dirty-bitmap.c
>> +send_bitmap_header_enter(void) ""
>> +send_bitmap_bits(uint32_t flags, uint64_t start_sector, uint32_t nr_sectors, uint64_t data_size) "\n flags: %x\n start_sector: %" PRIu64 "\n nr_sectors: %" PRIu32 "\n data_size: %" PRIu64 "\n"
>> +dirty_bitmap_save_iterate(int in_postcopy) "in postcopy: %d"
>> +dirty_bitmap_save_complete_enter(void) ""
>> +dirty_bitmap_save_complete_finish(void) ""
>> +dirty_bitmap_save_pending(uint64_t pending, uint64_t max_size) "pending %" PRIu64 " max: %" PRIu64
>> +dirty_bitmap_load_complete(void) ""
>> +dirty_bitmap_load_bits_enter(uint64_t first_sector, uint32_t nr_sectors) "chunk: %" PRIu64 " %" PRIu32
>> +dirty_bitmap_load_bits_zeroes(void) ""
>> +dirty_bitmap_load_header(uint32_t flags) "flags %x"
>> +dirty_bitmap_load_enter(void) ""
>> +dirty_bitmap_load_success(void) ""
>> diff --git a/vl.c b/vl.c
>> index b4eaf03..f1ee9ff 100644
>> --- a/vl.c
>> +++ b/vl.c
>> @@ -4405,6 +4405,7 @@ int main(int argc, char **argv, char **envp)
>>
>> blk_mig_init();
>> ram_mig_init();
>> + dirty_bitmap_mig_init();
>>
>> /* If the currently selected machine wishes to override the units-per-bus
>> * property of its default HBA interface type, do so now. */
>> --
>> 1.8.3.1
>>
> --
> Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
--
Best regards,
Vladimir
^ permalink raw reply [flat|nested] 40+ messages in thread
* Re: [Qemu-devel] [PATCH 12/17] migration: add postcopy migration of dirty bitmaps
2017-02-25 17:25 ` Vladimir Sementsov-Ogievskiy
@ 2017-02-27 20:12 ` Dr. David Alan Gilbert
0 siblings, 0 replies; 40+ messages in thread
From: Dr. David Alan Gilbert @ 2017-02-27 20:12 UTC (permalink / raw)
To: Vladimir Sementsov-Ogievskiy
Cc: qemu-block, qemu-devel, pbonzini, armbru, eblake, famz, stefanha,
quintela, mreitz, kwolf, peter.maydell, den, jsnow, lirans
* Vladimir Sementsov-Ogievskiy (vsementsov@virtuozzo.com) wrote:
> 24.02.2017 16:26, Dr. David Alan Gilbert wrote:
> > * Vladimir Sementsov-Ogievskiy (vsementsov@virtuozzo.com) wrote:
> > > Postcopy migration of dirty bitmaps. Only named dirty bitmaps,
> > > associated with root nodes and non-root named nodes are migrated.
> > >
> > > If destination qemu is already containing a dirty bitmap with the same name
> > > as a migrated bitmap (for the same node), than, if their granularities are
> > > the same the migration will be done, otherwise the error will be generated.
> > >
> > > If destination qemu doesn't contain such bitmap it will be created.
> > >
> > > Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
> > > ---
> > > include/migration/block.h | 1 +
> > > include/migration/migration.h | 4 +
> > > migration/Makefile.objs | 2 +-
> > > migration/block-dirty-bitmap.c | 679 +++++++++++++++++++++++++++++++++++++++++
> > > migration/migration.c | 3 +
> > > migration/savevm.c | 2 +
> > > migration/trace-events | 14 +
> > > vl.c | 1 +
> > > 8 files changed, 705 insertions(+), 1 deletion(-)
> > > create mode 100644 migration/block-dirty-bitmap.c
> > >
> > > diff --git a/include/migration/block.h b/include/migration/block.h
> > > index 41a1ac8..8333c43 100644
> > > --- a/include/migration/block.h
> > > +++ b/include/migration/block.h
> > > @@ -14,6 +14,7 @@
> > > #ifndef MIGRATION_BLOCK_H
> > > #define MIGRATION_BLOCK_H
> > > +void dirty_bitmap_mig_init(void);
> > > void blk_mig_init(void);
> > > int blk_mig_active(void);
> > > uint64_t blk_mig_bytes_transferred(void);
> > > diff --git a/include/migration/migration.h b/include/migration/migration.h
> > > index 46645f4..03a4993 100644
> > > --- a/include/migration/migration.h
> > > +++ b/include/migration/migration.h
> > > @@ -371,4 +371,8 @@ int ram_save_queue_pages(MigrationState *ms, const char *rbname,
> > > PostcopyState postcopy_state_get(void);
> > > /* Set the state and return the old state */
> > > PostcopyState postcopy_state_set(PostcopyState new_state);
> > > +
> > > +void dirty_bitmap_mig_before_vm_start(void);
> > > +void init_dirty_bitmap_incoming_migration(void);
> > > +
> > > #endif
> > > diff --git a/migration/Makefile.objs b/migration/Makefile.objs
> > > index 480dd49..fa3bf6a 100644
> > > --- a/migration/Makefile.objs
> > > +++ b/migration/Makefile.objs
> > > @@ -9,5 +9,5 @@ common-obj-y += qjson.o
> > > common-obj-$(CONFIG_RDMA) += rdma.o
> > > -common-obj-y += block.o
> > > +common-obj-y += block.o block-dirty-bitmap.o
> > > diff --git a/migration/block-dirty-bitmap.c b/migration/block-dirty-bitmap.c
> > > new file mode 100644
> > > index 0000000..28e3732
> > > --- /dev/null
> > > +++ b/migration/block-dirty-bitmap.c
> > > @@ -0,0 +1,679 @@
> > > +/*
> > > + * Block dirty bitmap postcopy migration
> > > + *
> > > + * Copyright IBM, Corp. 2009
> > > + * Copyright (C) 2016 Parallels IP Holdings GmbH. All rights reserved.
> > > + *
> > > + * Authors:
> > > + * Liran Schour <lirans@il.ibm.com>
> > > + * Vladimir Sementsov-Ogievskiy <vsementsov@parallels.com>
> > > + *
> > > + * This work is licensed under the terms of the GNU GPL, version 2. See
> > > + * the COPYING file in the top-level directory.
> > > + * This file is derived from migration/block.c, so it's author and IBM copyright
> > > + * are here, although content is quite different.
> > > + *
> > > + * Contributions after 2012-01-13 are licensed under the terms of the
> > > + * GNU GPL, version 2 or (at your option) any later version.
> > > + *
> > > + * ***
> > > + *
> > > + * Here postcopy migration of dirty bitmaps is realized. Only named dirty
> > > + * bitmaps, associated with root nodes and non-root named nodes are migrated.
> > > + *
> > > + * If destination qemu is already containing a dirty bitmap with the same name
> > > + * as a migrated bitmap (for the same node), then, if their granularities are
> > > + * the same the migration will be done, otherwise the error will be generated.
> > > + *
> > > + * If destination qemu doesn't contain such bitmap it will be created.
> > > + *
> > > + * format of migration:
> > > + *
> > > + * # Header (shared for different chunk types)
> > > + * 1, 2 or 4 bytes: flags (see qemu_{put,put}_flags)
> > > + * [ 1 byte: node name size ] \ flags & DEVICE_NAME
> > > + * [ n bytes: node name ] /
> > > + * [ 1 byte: bitmap name size ] \ flags & BITMAP_NAME
> > > + * [ n bytes: bitmap name ] /
> > > + *
> > > + * # Start of bitmap migration (flags & START)
> > > + * header
> > > + * be64: granularity
> > > + * 1 byte: bitmap enabled flag
> > > + *
> > > + * # Complete of bitmap migration (flags & COMPLETE)
> > > + * header
> > > + *
> > > + * # Data chunk of bitmap migration
> > > + * header
> > > + * be64: start sector
> > > + * be32: number of sectors
> > > + * [ be64: buffer size ] \ ! (flags & ZEROES)
> > > + * [ n bytes: buffer ] /
> > > + *
> > > + * The last chunk in stream should contain flags & EOS. The chunk may skip
> > > + * device and/or bitmap names, assuming them to be the same with the previous
> > > + * chunk.
> > > + */
> > > +
> > > +#include "qemu/osdep.h"
> > > +#include "block/block.h"
> > > +#include "block/block_int.h"
> > > +#include "sysemu/block-backend.h"
> > > +#include "qemu/main-loop.h"
> > > +#include "qemu/error-report.h"
> > > +#include "migration/block.h"
> > > +#include "migration/migration.h"
> > > +#include "qemu/hbitmap.h"
> > > +#include "sysemu/sysemu.h"
> > > +#include "qemu/cutils.h"
> > > +#include "qapi/error.h"
> > > +#include "trace.h"
> > > +#include <assert.h>
> > > +
> > > +#define CHUNK_SIZE (1 << 10)
> > > +
> > > +/* Flags occupy from one to four bytes. In all but one the 7-th (EXTRA_FLAGS)
> > > + * bit should be set. */
> > > +#define DIRTY_BITMAP_MIG_FLAG_EOS 0x01
> > > +#define DIRTY_BITMAP_MIG_FLAG_ZEROES 0x02
> > > +#define DIRTY_BITMAP_MIG_FLAG_BITMAP_NAME 0x04
> > > +#define DIRTY_BITMAP_MIG_FLAG_DEVICE_NAME 0x08
> > > +#define DIRTY_BITMAP_MIG_FLAG_START 0x10
> > > +#define DIRTY_BITMAP_MIG_FLAG_COMPLETE 0x20
> > > +#define DIRTY_BITMAP_MIG_FLAG_BITS 0x40
> > > +
> > > +#define DIRTY_BITMAP_MIG_EXTRA_FLAGS 0x80
> > > +#define DIRTY_BITMAP_MIG_FLAGS_SIZE_16 0x8000
> > > +#define DIRTY_BITMAP_MIG_FLAGS_SIZE_32 0x8080
> > > +
> > > +#define DEBUG_DIRTY_BITMAP_MIGRATION 0
> > > +
> > > +typedef struct DirtyBitmapMigBitmapState {
> > > + /* Written during setup phase. */
> > > + BlockDriverState *bs;
> > > + const char *node_name;
> > > + BdrvDirtyBitmap *bitmap;
> > > + uint64_t total_sectors;
> > > + uint64_t sectors_per_chunk;
> > > + QSIMPLEQ_ENTRY(DirtyBitmapMigBitmapState) entry;
> > > +
> > > + /* For bulk phase. */
> > > + bool bulk_completed;
> > > + uint64_t cur_sector;
> > > +} DirtyBitmapMigBitmapState;
> > > +
> > > +typedef struct DirtyBitmapMigState {
> > > + QSIMPLEQ_HEAD(dbms_list, DirtyBitmapMigBitmapState) dbms_list;
> > > +
> > > + bool bulk_completed;
> > > +
> > > + /* for send_bitmap_bits() */
> > > + BlockDriverState *prev_bs;
> > > + BdrvDirtyBitmap *prev_bitmap;
> > > +} DirtyBitmapMigState;
> > > +
> > > +typedef struct DirtyBitmapLoadState {
> > > + uint32_t flags;
> > > + char node_name[256];
> > > + char bitmap_name[256];
> > > + BlockDriverState *bs;
> > > + BdrvDirtyBitmap *bitmap;
> > > +} DirtyBitmapLoadState;
> > > +
> > > +static DirtyBitmapMigState dirty_bitmap_mig_state;
> > > +
> > > +typedef struct DirtyBitmapLoadBitmapState {
> > > + BlockDriverState *bs;
> > > + BdrvDirtyBitmap *bitmap;
> > > + bool migrated;
> > > +} DirtyBitmapLoadBitmapState;
> > > +static GSList *enabled_bitmaps;
> > > +QemuMutex finish_lock;
> > > +
> > > +void init_dirty_bitmap_incoming_migration(void)
> > > +{
> > > + qemu_mutex_init(&finish_lock);
> > > +}
> > > +
> > > +static uint32_t qemu_get_bitmap_flags(QEMUFile *f)
> > > +{
> > > + uint8_t flags = qemu_get_byte(f);
> > > + if (flags & DIRTY_BITMAP_MIG_EXTRA_FLAGS) {
> > > + flags = flags << 8 | qemu_get_byte(f);
> > > + if (flags & DIRTY_BITMAP_MIG_EXTRA_FLAGS) {
> > > + flags = flags << 16 | qemu_get_be16(f);
> > > + }
> > > + }
> > > +
> > > + return flags;
> > > +}
> > > +
> > > +static void qemu_put_bitmap_flags(QEMUFile *f, uint32_t flags)
> > > +{
> > > + if (!(flags & 0xffffff00)) {
> > > + qemu_put_byte(f, flags);
> > > + return;
> > > + }
> > > +
> > > + if (!(flags & 0xffff0000)) {
> > > + qemu_put_be16(f, flags | DIRTY_BITMAP_MIG_FLAGS_SIZE_16);
> > > + return;
> > > + }
> > > +
> > > + qemu_put_be32(f, flags | DIRTY_BITMAP_MIG_FLAGS_SIZE_32);
> > Is this stuff worth the hastle? It would seem easier just to send
> > the 32bits each time; the overhead doesn't seem to be much except
> > in the case where you send minimum size chunks a lot.
>
> I've started with one byte of flags (I don't need more) and I've implemented
> this after proposal by John Snow:
> > I might recommend reserving the last bit of the second byte to be a flag
> > such as DIRTY_BITMAP_EXTRA_FLAGS that indicates the presence of
> > additional byte(s) of flags, to be determined later, if we ever need
> > them, but two bytes for now should be sufficient.
>
> And I think it is not bad. Code is a bit more complex, yes, but why should
> we send 4 bytes with every chunk when (very likely) we will never use more
> than one?
Yes if it's very rare and a useful % of the size then that makes sense; just
don't worry about squeezing every byte out of it if it's a tiny %.
And yes, keeping one flag set aside for 'and there's more' is good.
Dave
> >
> > Dave
> >
> > > +}
> > > +
> > > +static void send_bitmap_header(QEMUFile *f, DirtyBitmapMigBitmapState *dbms,
> > > + uint32_t additional_flags)
> > > +{
> > > + BlockDriverState *bs = dbms->bs;
> > > + BdrvDirtyBitmap *bitmap = dbms->bitmap;
> > > + uint32_t flags = additional_flags;
> > > + trace_send_bitmap_header_enter();
> > > +
> > > + if (bs != dirty_bitmap_mig_state.prev_bs) {
> > > + dirty_bitmap_mig_state.prev_bs = bs;
> > > + flags |= DIRTY_BITMAP_MIG_FLAG_DEVICE_NAME;
> > > + }
> > > +
> > > + if (bitmap != dirty_bitmap_mig_state.prev_bitmap) {
> > > + dirty_bitmap_mig_state.prev_bitmap = bitmap;
> > > + flags |= DIRTY_BITMAP_MIG_FLAG_BITMAP_NAME;
> > > + }
> > > +
> > > + qemu_put_bitmap_flags(f, flags);
> > > +
> > > + if (flags & DIRTY_BITMAP_MIG_FLAG_DEVICE_NAME) {
> > > + qemu_put_counted_string(f, dbms->node_name);
> > > + }
> > > +
> > > + if (flags & DIRTY_BITMAP_MIG_FLAG_BITMAP_NAME) {
> > > + qemu_put_counted_string(f, bdrv_dirty_bitmap_name(bitmap));
> > > + }
> > > +}
> > > +
> > > +static void send_bitmap_start(QEMUFile *f, DirtyBitmapMigBitmapState *dbms)
> > > +{
> > > + send_bitmap_header(f, dbms, DIRTY_BITMAP_MIG_FLAG_START);
> > > + qemu_put_be32(f, bdrv_dirty_bitmap_granularity(dbms->bitmap));
> > > + qemu_put_byte(f, bdrv_dirty_bitmap_enabled(dbms->bitmap));
> > > +}
> > > +
> > > +static void send_bitmap_complete(QEMUFile *f, DirtyBitmapMigBitmapState *dbms)
> > > +{
> > > + send_bitmap_header(f, dbms, DIRTY_BITMAP_MIG_FLAG_COMPLETE);
> > > +}
> > > +
> > > +static void send_bitmap_bits(QEMUFile *f, DirtyBitmapMigBitmapState *dbms,
> > > + uint64_t start_sector, uint32_t nr_sectors)
> > > +{
> > > + /* align for buffer_is_zero() */
> > > + uint64_t align = 4 * sizeof(long);
> > > + uint64_t unaligned_size =
> > > + bdrv_dirty_bitmap_serialization_size(dbms->bitmap,
> > > + start_sector, nr_sectors);
> > > + uint64_t buf_size = (unaligned_size + align - 1) & ~(align - 1);
> > > + uint8_t *buf = g_malloc0(buf_size);
> > > + uint32_t flags = DIRTY_BITMAP_MIG_FLAG_BITS;
> > > +
> > > + bdrv_dirty_bitmap_serialize_part(dbms->bitmap, buf,
> > > + start_sector, nr_sectors);
> > > +
> > > + if (buffer_is_zero(buf, buf_size)) {
> > > + g_free(buf);
> > > + buf = NULL;
> > > + flags |= DIRTY_BITMAP_MIG_FLAG_ZEROES;
> > > + }
> > > +
> > > + trace_send_bitmap_bits(flags, start_sector, nr_sectors, buf_size);
> > > +
> > > + send_bitmap_header(f, dbms, flags);
> > > +
> > > + qemu_put_be64(f, start_sector);
> > > + qemu_put_be32(f, nr_sectors);
> > > +
> > > + /* if a block is zero we need to flush here since the network
> > > + * bandwidth is now a lot higher than the storage device bandwidth.
> > > + * thus if we queue zero blocks we slow down the migration. */
> > > + if (flags & DIRTY_BITMAP_MIG_FLAG_ZEROES) {
> > > + qemu_fflush(f);
> > > + } else {
> > > + qemu_put_be64(f, buf_size);
> > > + qemu_put_buffer(f, buf, buf_size);
> > > + }
> > > +
> > > + g_free(buf);
> > > +}
> > > +
> > > +
> > > +/* Called with iothread lock taken. */
> > > +
> > > +static void init_dirty_bitmap_migration(void)
> > > +{
> > > + BlockDriverState *bs;
> > > + BdrvDirtyBitmap *bitmap;
> > > + DirtyBitmapMigBitmapState *dbms;
> > > + BdrvNextIterator it;
> > > + uint64_t total_bytes = 0;
> > > +
> > > + dirty_bitmap_mig_state.bulk_completed = false;
> > > + dirty_bitmap_mig_state.prev_bs = NULL;
> > > + dirty_bitmap_mig_state.prev_bitmap = NULL;
> > > +
> > > + for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
> > > + for (bitmap = bdrv_next_dirty_bitmap(bs, NULL); bitmap;
> > > + bitmap = bdrv_next_dirty_bitmap(bs, bitmap)) {
> > > + if (!bdrv_dirty_bitmap_name(bitmap)) {
> > > + continue;
> > > + }
> > > +
> > > + if (!bdrv_get_device_or_node_name(bs)) {
> > > + /* not named non-root node */
> > > + continue;
> > > + }
> > > +
> > > + dbms = g_new0(DirtyBitmapMigBitmapState, 1);
> > > + dbms->bs = bs;
> > > + dbms->node_name = bdrv_get_node_name(bs);
> > > + if (!dbms->node_name || dbms->node_name[0] == '\0') {
> > > + dbms->node_name = bdrv_get_device_name(bs);
> > > + }
> > > + dbms->bitmap = bitmap;
> > > + dbms->total_sectors = bdrv_nb_sectors(bs);
> > > + dbms->sectors_per_chunk = CHUNK_SIZE * 8 *
> > > + bdrv_dirty_bitmap_granularity(bitmap) >> BDRV_SECTOR_BITS;
> > > +
> > > + total_bytes +=
> > > + bdrv_dirty_bitmap_serialization_size(bitmap,
> > > + 0, dbms->total_sectors);
> > > +
> > > + QSIMPLEQ_INSERT_TAIL(&dirty_bitmap_mig_state.dbms_list,
> > > + dbms, entry);
> > > + }
> > > + }
> > > +}
> > > +
> > > +/* Called with no lock taken. */
> > > +static void bulk_phase_send_chunk(QEMUFile *f, DirtyBitmapMigBitmapState *dbms)
> > > +{
> > > + uint32_t nr_sectors = MIN(dbms->total_sectors - dbms->cur_sector,
> > > + dbms->sectors_per_chunk);
> > > +
> > > + send_bitmap_bits(f, dbms, dbms->cur_sector, nr_sectors);
> > > +
> > > + dbms->cur_sector += nr_sectors;
> > > + if (dbms->cur_sector >= dbms->total_sectors) {
> > > + dbms->bulk_completed = true;
> > > + }
> > > +}
> > > +
> > > +/* Called with no lock taken. */
> > > +static void bulk_phase(QEMUFile *f, bool limit)
> > > +{
> > > + DirtyBitmapMigBitmapState *dbms;
> > > +
> > > + QSIMPLEQ_FOREACH(dbms, &dirty_bitmap_mig_state.dbms_list, entry) {
> > > + while (!dbms->bulk_completed) {
> > > + bulk_phase_send_chunk(f, dbms);
> > > + if (limit && qemu_file_rate_limit(f)) {
> > > + return;
> > > + }
> > > + }
> > > + }
> > > +
> > > + dirty_bitmap_mig_state.bulk_completed = true;
> > > +}
> > > +
> > > +/* Called with iothread lock taken. */
> > > +static void dirty_bitmap_mig_cleanup(void)
> > > +{
> > > + DirtyBitmapMigBitmapState *dbms;
> > > +
> > > + while ((dbms = QSIMPLEQ_FIRST(&dirty_bitmap_mig_state.dbms_list)) != NULL) {
> > > + QSIMPLEQ_REMOVE_HEAD(&dirty_bitmap_mig_state.dbms_list, entry);
> > > + g_free(dbms);
> > > + }
> > > +}
> > > +
> > > +/* for SaveVMHandlers */
> > > +static void dirty_bitmap_migration_cleanup(void *opaque)
> > > +{
> > > + dirty_bitmap_mig_cleanup();
> > > +}
> > > +
> > > +static int dirty_bitmap_save_iterate(QEMUFile *f, void *opaque)
> > > +{
> > > + trace_dirty_bitmap_save_iterate(
> > > + migration_in_postcopy(migrate_get_current()));
> > > +
> > > + if (migration_in_postcopy(migrate_get_current()) &&
> > > + !dirty_bitmap_mig_state.bulk_completed) {
> > > + bulk_phase(f, true);
> > > + }
> > > +
> > > + qemu_put_bitmap_flags(f, DIRTY_BITMAP_MIG_FLAG_EOS);
> > > +
> > > + return dirty_bitmap_mig_state.bulk_completed;
> > > +}
> > > +
> > > +/* Called with iothread lock taken. */
> > > +
> > > +static int dirty_bitmap_save_complete(QEMUFile *f, void *opaque)
> > > +{
> > > + DirtyBitmapMigBitmapState *dbms;
> > > + trace_dirty_bitmap_save_complete_enter();
> > > +
> > > + if (!dirty_bitmap_mig_state.bulk_completed) {
> > > + bulk_phase(f, false);
> > > + }
> > > +
> > > + QSIMPLEQ_FOREACH(dbms, &dirty_bitmap_mig_state.dbms_list, entry) {
> > > + send_bitmap_complete(f, dbms);
> > > + }
> > > +
> > > + qemu_put_bitmap_flags(f, DIRTY_BITMAP_MIG_FLAG_EOS);
> > > +
> > > + trace_dirty_bitmap_save_complete_finish();
> > > +
> > > + dirty_bitmap_mig_cleanup();
> > > + return 0;
> > > +}
> > > +
> > > +static void dirty_bitmap_save_pending(QEMUFile *f, void *opaque,
> > > + uint64_t max_size,
> > > + uint64_t *res_precopy_only,
> > > + uint64_t *res_compatible,
> > > + uint64_t *res_postcopy_only)
> > > +{
> > > + DirtyBitmapMigBitmapState *dbms;
> > > + uint64_t pending = 0;
> > > +
> > > + qemu_mutex_lock_iothread();
> > > +
> > > + QSIMPLEQ_FOREACH(dbms, &dirty_bitmap_mig_state.dbms_list, entry) {
> > > + uint64_t gran = bdrv_dirty_bitmap_granularity(dbms->bitmap);
> > > + uint64_t sectors = dbms->bulk_completed ? 0 :
> > > + dbms->total_sectors - dbms->cur_sector;
> > > +
> > > + pending += (sectors * BDRV_SECTOR_SIZE + gran - 1) / gran;
> > > + }
> > > +
> > > + qemu_mutex_unlock_iothread();
> > > +
> > > + trace_dirty_bitmap_save_pending(pending, max_size);
> > > +
> > > + *res_postcopy_only += pending;
> > > +}
> > > +
> > > +/* First occurrence of this bitmap. It should be created if doesn't exist */
> > > +static int dirty_bitmap_load_start(QEMUFile *f, DirtyBitmapLoadState *s)
> > > +{
> > > + Error *local_err = NULL;
> > > + uint32_t granularity = qemu_get_be32(f);
> > > + bool enabled = qemu_get_byte(f);
> > > +
> > > + if (!s->bitmap) {
> > > + s->bitmap = bdrv_create_dirty_bitmap(s->bs, granularity,
> > > + s->bitmap_name, &local_err);
> > > + if (!s->bitmap) {
> > > + error_report_err(local_err);
> > > + return -EINVAL;
> > > + }
> > > + } else {
> > > + uint32_t dest_granularity =
> > > + bdrv_dirty_bitmap_granularity(s->bitmap);
> > > + if (dest_granularity != granularity) {
> > > + fprintf(stderr,
> > > + "Error: "
> > > + "Migrated bitmap granularity (%" PRIu32 ") "
> > > + "doesn't match the destination bitmap '%s' "
> > > + "granularity (%" PRIu32 ")\n",
> > > + granularity,
> > > + bdrv_dirty_bitmap_name(s->bitmap),
> > > + dest_granularity);
> > > + return -EINVAL;
> > > + }
> > > + }
> > > +
> > > + bdrv_disable_dirty_bitmap(s->bitmap);
> > > + if (enabled) {
> > > + DirtyBitmapLoadBitmapState *b;
> > > +
> > > + bdrv_dirty_bitmap_create_successor(s->bs, s->bitmap, &local_err);
> > > + if (local_err) {
> > > + error_report_err(local_err);
> > > + return -EINVAL;
> > > + }
> > > +
> > > + b = g_new(DirtyBitmapLoadBitmapState, 1);
> > > + b->bs = s->bs;
> > > + b->bitmap = s->bitmap;
> > > + b->migrated = false;
> > > + enabled_bitmaps = g_slist_prepend(enabled_bitmaps, b);
> > > + }
> > > +
> > > + return 0;
> > > +}
> > > +
> > > +void dirty_bitmap_mig_before_vm_start(void)
> > > +{
> > > + GSList *item;
> > > +
> > > + qemu_mutex_lock(&finish_lock);
> > > +
> > > + for (item = enabled_bitmaps; item; item = g_slist_next(item)) {
> > > + DirtyBitmapLoadBitmapState *b = item->data;
> > > +
> > > + if (b->migrated) {
> > > + bdrv_enable_dirty_bitmap(b->bitmap);
> > > + } else {
> > > + bdrv_dirty_bitmap_enable_successor(b->bitmap);
> > > + }
> > > +
> > > + g_free(b);
> > > + }
> > > +
> > > + g_slist_free(enabled_bitmaps);
> > > + enabled_bitmaps = NULL;
> > > +
> > > + qemu_mutex_unlock(&finish_lock);
> > > +}
> > > +
> > > +static void dirty_bitmap_load_complete(QEMUFile *f, DirtyBitmapLoadState *s)
> > > +{
> > > + GSList *item;
> > > + trace_dirty_bitmap_load_complete();
> > > + bdrv_dirty_bitmap_deserialize_finish(s->bitmap);
> > > +
> > > + qemu_mutex_lock(&finish_lock);
> > > +
> > > + for (item = enabled_bitmaps; item; item = g_slist_next(item)) {
> > > + DirtyBitmapLoadBitmapState *b = item->data;
> > > +
> > > + if (b->bitmap == s->bitmap) {
> > > + b->migrated = true;
> > > + }
> > > + }
> > > +
> > > + if (bdrv_dirty_bitmap_frozen(s->bitmap)) {
> > > + if (enabled_bitmaps == NULL) {
> > > + /* in postcopy */
> > > + AioContext *aio_context = bdrv_get_aio_context(s->bs);
> > > + aio_context_acquire(aio_context);
> > > +
> > > + bdrv_reclaim_dirty_bitmap(s->bs, s->bitmap, &error_abort);
> > > + bdrv_enable_dirty_bitmap(s->bitmap);
> > > +
> > > + aio_context_release(aio_context);
> > > + } else {
> > > + /* target not started, successor is empty */
> > > + bdrv_dirty_bitmap_release_successor(s->bs, s->bitmap);
> > > + }
> > > + }
> > > +
> > > + qemu_mutex_unlock(&finish_lock);
> > > +}
> > > +
> > > +static int dirty_bitmap_load_bits(QEMUFile *f, DirtyBitmapLoadState *s)
> > > +{
> > > + uint64_t first_sector = qemu_get_be64(f);
> > > + uint32_t nr_sectors = qemu_get_be32(f);
> > > + trace_dirty_bitmap_load_bits_enter(first_sector, nr_sectors);
> > > +
> > > + if (s->flags & DIRTY_BITMAP_MIG_FLAG_ZEROES) {
> > > + trace_dirty_bitmap_load_bits_zeroes();
> > > + bdrv_dirty_bitmap_deserialize_zeroes(s->bitmap, first_sector,
> > > + nr_sectors, false);
> > > + } else {
> > > + uint8_t *buf;
> > > + uint64_t buf_size = qemu_get_be64(f);
> > > + uint64_t needed_size =
> > > + bdrv_dirty_bitmap_serialization_size(s->bitmap,
> > > + first_sector, nr_sectors);
> > > +
> > > + if (needed_size > buf_size) {
> > > + fprintf(stderr,
> > > + "Error: Migrated bitmap granularity doesn't "
> > > + "match the destination bitmap '%s' granularity\n",
> > > + bdrv_dirty_bitmap_name(s->bitmap));
> > > + return -EINVAL;
> > > + }
> > > +
> > > + buf = g_malloc(buf_size);
> > > + qemu_get_buffer(f, buf, buf_size);
> > > + bdrv_dirty_bitmap_deserialize_part(s->bitmap, buf,
> > > + first_sector,
> > > + nr_sectors, false);
> > > + g_free(buf);
> > > + }
> > > +
> > > + return 0;
> > > +}
> > > +
> > > +static int dirty_bitmap_load_header(QEMUFile *f, DirtyBitmapLoadState *s)
> > > +{
> > > + Error *local_err = NULL;
> > > + s->flags = qemu_get_bitmap_flags(f);
> > > + trace_dirty_bitmap_load_header(s->flags);
> > > +
> > > + if (s->flags & DIRTY_BITMAP_MIG_FLAG_DEVICE_NAME) {
> > > + if (!qemu_get_counted_string(f, s->node_name)) {
> > > + fprintf(stderr, "Unable to read node name string\n");
> > > + return -EINVAL;
> > > + }
> > > + s->bs = bdrv_lookup_bs(s->node_name, s->node_name, &local_err);
> > > + if (!s->bs) {
> > > + error_report("%s", error_get_pretty(local_err));
> > > + error_free(local_err);
> > > + return -EINVAL;
> > > + }
> > > + } else if (!s->bs) {
> > > + fprintf(stderr, "Error: block device name is not set\n");
> > > + return -EINVAL;
> > > + }
> > > +
> > > + if (s->flags & DIRTY_BITMAP_MIG_FLAG_BITMAP_NAME) {
> > > + if (!qemu_get_counted_string(f, s->bitmap_name)) {
> > > + fprintf(stderr, "Unable to read node name string\n");
> > > + return -EINVAL;
> > > + }
> > > + s->bitmap = bdrv_find_dirty_bitmap(s->bs, s->bitmap_name);
> > > +
> > > + /* bitmap may be NULL here, it wouldn't be an error if it is the
> > > + * first occurrence of the bitmap */
> > > + if (!s->bitmap && !(s->flags & DIRTY_BITMAP_MIG_FLAG_START)) {
> > > + fprintf(stderr, "Error: unknown dirty bitmap "
> > > + "'%s' for block device '%s'\n",
> > > + s->bitmap_name, s->node_name);
> > > + return -EINVAL;
> > > + }
> > > + } else if (!s->bitmap) {
> > > + fprintf(stderr, "Error: block device name is not set\n");
> > > + return -EINVAL;
> > > + }
> > > +
> > > + return 0;
> > > +}
> > > +
> > > +static int dirty_bitmap_load(QEMUFile *f, void *opaque, int version_id)
> > > +{
> > > + static DirtyBitmapLoadState s;
> > > +
> > > + int ret = 0;
> > > +
> > > + trace_dirty_bitmap_load_enter();
> > > +
> > > + do {
> > > + dirty_bitmap_load_header(f, &s);
> > > +
> > > + if (s.flags & DIRTY_BITMAP_MIG_FLAG_START) {
> > > + ret = dirty_bitmap_load_start(f, &s);
> > > + } else if (s.flags & DIRTY_BITMAP_MIG_FLAG_COMPLETE) {
> > > + dirty_bitmap_load_complete(f, &s);
> > > + } else if (s.flags & DIRTY_BITMAP_MIG_FLAG_BITS) {
> > > + ret = dirty_bitmap_load_bits(f, &s);
> > > + }
> > > +
> > > + if (!ret) {
> > > + ret = qemu_file_get_error(f);
> > > + }
> > > +
> > > + if (ret) {
> > > + return ret;
> > > + }
> > > + } while (!(s.flags & DIRTY_BITMAP_MIG_FLAG_EOS));
> > > +
> > > + trace_dirty_bitmap_load_success();
> > > + return 0;
> > > +}
> > > +
> > > +static int dirty_bitmap_save_setup(QEMUFile *f, void *opaque)
> > > +{
> > > + DirtyBitmapMigBitmapState *dbms = NULL;
> > > + init_dirty_bitmap_migration();
> > > +
> > > + QSIMPLEQ_FOREACH(dbms, &dirty_bitmap_mig_state.dbms_list, entry) {
> > > + send_bitmap_start(f, dbms);
> > > + }
> > > + qemu_put_bitmap_flags(f, DIRTY_BITMAP_MIG_FLAG_EOS);
> > > +
> > > + return 0;
> > > +}
> > > +
> > > +static bool dirty_bitmap_is_active(void *opaque)
> > > +{
> > > + return migrate_dirty_bitmaps();
> > > +}
> > > +
> > > +static bool dirty_bitmap_is_active_iterate(void *opaque)
> > > +{
> > > + return dirty_bitmap_is_active(opaque) && !runstate_is_running();
> > > +}
> > > +
> > > +static bool dirty_bitmap_has_postcopy(void *opaque)
> > > +{
> > > + return true;
> > > +}
> > > +
> > > +static SaveVMHandlers savevm_dirty_bitmap_handlers = {
> > > + .save_live_setup = dirty_bitmap_save_setup,
> > > + .save_live_complete_postcopy = dirty_bitmap_save_complete,
> > > + .save_live_complete_precopy = dirty_bitmap_save_complete,
> > > + .has_postcopy = dirty_bitmap_has_postcopy,
> > > + .save_live_pending = dirty_bitmap_save_pending,
> > > + .save_live_iterate = dirty_bitmap_save_iterate,
> > > + .is_active_iterate = dirty_bitmap_is_active_iterate,
> > > + .load_state = dirty_bitmap_load,
> > > + .cleanup = dirty_bitmap_migration_cleanup,
> > > + .is_active = dirty_bitmap_is_active,
> > > +};
> > > +
> > > +void dirty_bitmap_mig_init(void)
> > > +{
> > > + QSIMPLEQ_INIT(&dirty_bitmap_mig_state.dbms_list);
> > > +
> > > + register_savevm_live(NULL, "dirty-bitmap", 0, 1,
> > > + &savevm_dirty_bitmap_handlers,
> > > + &dirty_bitmap_mig_state);
> > > +}
> > > diff --git a/migration/migration.c b/migration/migration.c
> > > index 179bd04..edf5623 100644
> > > --- a/migration/migration.c
> > > +++ b/migration/migration.c
> > > @@ -122,6 +122,9 @@ MigrationIncomingState *migration_incoming_get_current(void)
> > > QLIST_INIT(&mis_current.loadvm_handlers);
> > > qemu_mutex_init(&mis_current.rp_mutex);
> > > qemu_event_init(&mis_current.main_thread_load_event, false);
> > > +
> > > + init_dirty_bitmap_incoming_migration();
> > > +
> > > once = true;
> > > }
> > > return &mis_current;
> > > diff --git a/migration/savevm.c b/migration/savevm.c
> > > index 54572ef..927a680 100644
> > > --- a/migration/savevm.c
> > > +++ b/migration/savevm.c
> > > @@ -1651,6 +1651,8 @@ static void loadvm_postcopy_handle_run_bh(void *opaque)
> > > trace_loadvm_postcopy_handle_run_vmstart();
> > > + dirty_bitmap_mig_before_vm_start();
> > > +
> > > if (autostart) {
> > > /* Hold onto your hats, starting the CPU */
> > > vm_start();
> > > diff --git a/migration/trace-events b/migration/trace-events
> > > index 0212929..38fca41 100644
> > > --- a/migration/trace-events
> > > +++ b/migration/trace-events
> > > @@ -222,3 +222,17 @@ colo_vm_state_change(const char *old, const char *new) "Change '%s' => '%s'"
> > > colo_send_message(const char *msg) "Send '%s' message"
> > > colo_receive_message(const char *msg) "Receive '%s' message"
> > > colo_failover_set_state(const char *new_state) "new state %s"
> > > +
> > > +# migration/block-dirty-bitmap.c
> > > +send_bitmap_header_enter(void) ""
> > > +send_bitmap_bits(uint32_t flags, uint64_t start_sector, uint32_t nr_sectors, uint64_t data_size) "\n flags: %x\n start_sector: %" PRIu64 "\n nr_sectors: %" PRIu32 "\n data_size: %" PRIu64 "\n"
> > > +dirty_bitmap_save_iterate(int in_postcopy) "in postcopy: %d"
> > > +dirty_bitmap_save_complete_enter(void) ""
> > > +dirty_bitmap_save_complete_finish(void) ""
> > > +dirty_bitmap_save_pending(uint64_t pending, uint64_t max_size) "pending %" PRIu64 " max: %" PRIu64
> > > +dirty_bitmap_load_complete(void) ""
> > > +dirty_bitmap_load_bits_enter(uint64_t first_sector, uint32_t nr_sectors) "chunk: %" PRIu64 " %" PRIu32
> > > +dirty_bitmap_load_bits_zeroes(void) ""
> > > +dirty_bitmap_load_header(uint32_t flags) "flags %x"
> > > +dirty_bitmap_load_enter(void) ""
> > > +dirty_bitmap_load_success(void) ""
> > > diff --git a/vl.c b/vl.c
> > > index b4eaf03..f1ee9ff 100644
> > > --- a/vl.c
> > > +++ b/vl.c
> > > @@ -4405,6 +4405,7 @@ int main(int argc, char **argv, char **envp)
> > > blk_mig_init();
> > > ram_mig_init();
> > > + dirty_bitmap_mig_init();
> > > /* If the currently selected machine wishes to override the units-per-bus
> > > * property of its default HBA interface type, do so now. */
> > > --
> > > 1.8.3.1
> > >
> > --
> > Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
>
>
> --
> Best regards,
> Vladimir
>
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
^ permalink raw reply [flat|nested] 40+ messages in thread
* [Qemu-devel] [PATCH 12/17] migration: add postcopy migration of dirty bitmaps
2016-11-22 17:54 [Qemu-devel] [PATCH v4 00/17] Dirty bitmaps postcopy migration Vladimir Sementsov-Ogievskiy
@ 2016-11-22 17:54 ` Vladimir Sementsov-Ogievskiy
2017-01-24 9:40 ` Juan Quintela
2017-02-01 23:02 ` Max Reitz
0 siblings, 2 replies; 40+ messages in thread
From: Vladimir Sementsov-Ogievskiy @ 2016-11-22 17:54 UTC (permalink / raw)
To: qemu-block, qemu-devel
Cc: pbonzini, armbru, eblake, famz, stefanha, amit.shah, quintela,
mreitz, kwolf, peter.maydell, dgilbert, den, jsnow, vsementsov,
lirans
Postcopy migration of dirty bitmaps. Only named dirty bitmaps,
associated with root nodes and non-root named nodes are migrated.
If destination qemu is already containing a dirty bitmap with the same name
as a migrated bitmap (for the same node), than, if their granularities are
the same the migration will be done, otherwise the error will be generated.
If destination qemu doesn't contain such bitmap it will be created.
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
---
include/migration/block.h | 1 +
include/migration/migration.h | 4 +
migration/Makefile.objs | 2 +-
migration/block-dirty-bitmap.c | 679 +++++++++++++++++++++++++++++++++++++++++
migration/migration.c | 3 +
migration/savevm.c | 2 +
migration/trace-events | 14 +
vl.c | 1 +
8 files changed, 705 insertions(+), 1 deletion(-)
create mode 100644 migration/block-dirty-bitmap.c
diff --git a/include/migration/block.h b/include/migration/block.h
index 41a1ac8..8333c43 100644
--- a/include/migration/block.h
+++ b/include/migration/block.h
@@ -14,6 +14,7 @@
#ifndef MIGRATION_BLOCK_H
#define MIGRATION_BLOCK_H
+void dirty_bitmap_mig_init(void);
void blk_mig_init(void);
int blk_mig_active(void);
uint64_t blk_mig_bytes_transferred(void);
diff --git a/include/migration/migration.h b/include/migration/migration.h
index 6cbb9c6..af6e0b7 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -361,4 +361,8 @@ int ram_save_queue_pages(MigrationState *ms, const char *rbname,
PostcopyState postcopy_state_get(void);
/* Set the state and return the old state */
PostcopyState postcopy_state_set(PostcopyState new_state);
+
+void dirty_bitmap_mig_before_vm_start(void);
+void init_dirty_bitmap_incoming_migration(void);
+
#endif
diff --git a/migration/Makefile.objs b/migration/Makefile.objs
index 3f3e237..3031636 100644
--- a/migration/Makefile.objs
+++ b/migration/Makefile.objs
@@ -10,5 +10,5 @@ common-obj-y += qjson.o
common-obj-$(CONFIG_RDMA) += rdma.o
-common-obj-y += block.o
+common-obj-y += block.o block-dirty-bitmap.o
diff --git a/migration/block-dirty-bitmap.c b/migration/block-dirty-bitmap.c
new file mode 100644
index 0000000..28e3732
--- /dev/null
+++ b/migration/block-dirty-bitmap.c
@@ -0,0 +1,679 @@
+/*
+ * Block dirty bitmap postcopy migration
+ *
+ * Copyright IBM, Corp. 2009
+ * Copyright (C) 2016 Parallels IP Holdings GmbH. All rights reserved.
+ *
+ * Authors:
+ * Liran Schour <lirans@il.ibm.com>
+ * Vladimir Sementsov-Ogievskiy <vsementsov@parallels.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ * This file is derived from migration/block.c, so it's author and IBM copyright
+ * are here, although content is quite different.
+ *
+ * Contributions after 2012-01-13 are licensed under the terms of the
+ * GNU GPL, version 2 or (at your option) any later version.
+ *
+ * ***
+ *
+ * Here postcopy migration of dirty bitmaps is realized. Only named dirty
+ * bitmaps, associated with root nodes and non-root named nodes are migrated.
+ *
+ * If destination qemu is already containing a dirty bitmap with the same name
+ * as a migrated bitmap (for the same node), then, if their granularities are
+ * the same the migration will be done, otherwise the error will be generated.
+ *
+ * If destination qemu doesn't contain such bitmap it will be created.
+ *
+ * format of migration:
+ *
+ * # Header (shared for different chunk types)
+ * 1, 2 or 4 bytes: flags (see qemu_{put,put}_flags)
+ * [ 1 byte: node name size ] \ flags & DEVICE_NAME
+ * [ n bytes: node name ] /
+ * [ 1 byte: bitmap name size ] \ flags & BITMAP_NAME
+ * [ n bytes: bitmap name ] /
+ *
+ * # Start of bitmap migration (flags & START)
+ * header
+ * be64: granularity
+ * 1 byte: bitmap enabled flag
+ *
+ * # Complete of bitmap migration (flags & COMPLETE)
+ * header
+ *
+ * # Data chunk of bitmap migration
+ * header
+ * be64: start sector
+ * be32: number of sectors
+ * [ be64: buffer size ] \ ! (flags & ZEROES)
+ * [ n bytes: buffer ] /
+ *
+ * The last chunk in stream should contain flags & EOS. The chunk may skip
+ * device and/or bitmap names, assuming them to be the same with the previous
+ * chunk.
+ */
+
+#include "qemu/osdep.h"
+#include "block/block.h"
+#include "block/block_int.h"
+#include "sysemu/block-backend.h"
+#include "qemu/main-loop.h"
+#include "qemu/error-report.h"
+#include "migration/block.h"
+#include "migration/migration.h"
+#include "qemu/hbitmap.h"
+#include "sysemu/sysemu.h"
+#include "qemu/cutils.h"
+#include "qapi/error.h"
+#include "trace.h"
+#include <assert.h>
+
+#define CHUNK_SIZE (1 << 10)
+
+/* Flags occupy from one to four bytes. In all but one the 7-th (EXTRA_FLAGS)
+ * bit should be set. */
+#define DIRTY_BITMAP_MIG_FLAG_EOS 0x01
+#define DIRTY_BITMAP_MIG_FLAG_ZEROES 0x02
+#define DIRTY_BITMAP_MIG_FLAG_BITMAP_NAME 0x04
+#define DIRTY_BITMAP_MIG_FLAG_DEVICE_NAME 0x08
+#define DIRTY_BITMAP_MIG_FLAG_START 0x10
+#define DIRTY_BITMAP_MIG_FLAG_COMPLETE 0x20
+#define DIRTY_BITMAP_MIG_FLAG_BITS 0x40
+
+#define DIRTY_BITMAP_MIG_EXTRA_FLAGS 0x80
+#define DIRTY_BITMAP_MIG_FLAGS_SIZE_16 0x8000
+#define DIRTY_BITMAP_MIG_FLAGS_SIZE_32 0x8080
+
+#define DEBUG_DIRTY_BITMAP_MIGRATION 0
+
+typedef struct DirtyBitmapMigBitmapState {
+ /* Written during setup phase. */
+ BlockDriverState *bs;
+ const char *node_name;
+ BdrvDirtyBitmap *bitmap;
+ uint64_t total_sectors;
+ uint64_t sectors_per_chunk;
+ QSIMPLEQ_ENTRY(DirtyBitmapMigBitmapState) entry;
+
+ /* For bulk phase. */
+ bool bulk_completed;
+ uint64_t cur_sector;
+} DirtyBitmapMigBitmapState;
+
+typedef struct DirtyBitmapMigState {
+ QSIMPLEQ_HEAD(dbms_list, DirtyBitmapMigBitmapState) dbms_list;
+
+ bool bulk_completed;
+
+ /* for send_bitmap_bits() */
+ BlockDriverState *prev_bs;
+ BdrvDirtyBitmap *prev_bitmap;
+} DirtyBitmapMigState;
+
+typedef struct DirtyBitmapLoadState {
+ uint32_t flags;
+ char node_name[256];
+ char bitmap_name[256];
+ BlockDriverState *bs;
+ BdrvDirtyBitmap *bitmap;
+} DirtyBitmapLoadState;
+
+static DirtyBitmapMigState dirty_bitmap_mig_state;
+
+typedef struct DirtyBitmapLoadBitmapState {
+ BlockDriverState *bs;
+ BdrvDirtyBitmap *bitmap;
+ bool migrated;
+} DirtyBitmapLoadBitmapState;
+static GSList *enabled_bitmaps;
+QemuMutex finish_lock;
+
+void init_dirty_bitmap_incoming_migration(void)
+{
+ qemu_mutex_init(&finish_lock);
+}
+
+static uint32_t qemu_get_bitmap_flags(QEMUFile *f)
+{
+ uint8_t flags = qemu_get_byte(f);
+ if (flags & DIRTY_BITMAP_MIG_EXTRA_FLAGS) {
+ flags = flags << 8 | qemu_get_byte(f);
+ if (flags & DIRTY_BITMAP_MIG_EXTRA_FLAGS) {
+ flags = flags << 16 | qemu_get_be16(f);
+ }
+ }
+
+ return flags;
+}
+
+static void qemu_put_bitmap_flags(QEMUFile *f, uint32_t flags)
+{
+ if (!(flags & 0xffffff00)) {
+ qemu_put_byte(f, flags);
+ return;
+ }
+
+ if (!(flags & 0xffff0000)) {
+ qemu_put_be16(f, flags | DIRTY_BITMAP_MIG_FLAGS_SIZE_16);
+ return;
+ }
+
+ qemu_put_be32(f, flags | DIRTY_BITMAP_MIG_FLAGS_SIZE_32);
+}
+
+static void send_bitmap_header(QEMUFile *f, DirtyBitmapMigBitmapState *dbms,
+ uint32_t additional_flags)
+{
+ BlockDriverState *bs = dbms->bs;
+ BdrvDirtyBitmap *bitmap = dbms->bitmap;
+ uint32_t flags = additional_flags;
+ trace_send_bitmap_header_enter();
+
+ if (bs != dirty_bitmap_mig_state.prev_bs) {
+ dirty_bitmap_mig_state.prev_bs = bs;
+ flags |= DIRTY_BITMAP_MIG_FLAG_DEVICE_NAME;
+ }
+
+ if (bitmap != dirty_bitmap_mig_state.prev_bitmap) {
+ dirty_bitmap_mig_state.prev_bitmap = bitmap;
+ flags |= DIRTY_BITMAP_MIG_FLAG_BITMAP_NAME;
+ }
+
+ qemu_put_bitmap_flags(f, flags);
+
+ if (flags & DIRTY_BITMAP_MIG_FLAG_DEVICE_NAME) {
+ qemu_put_counted_string(f, dbms->node_name);
+ }
+
+ if (flags & DIRTY_BITMAP_MIG_FLAG_BITMAP_NAME) {
+ qemu_put_counted_string(f, bdrv_dirty_bitmap_name(bitmap));
+ }
+}
+
+static void send_bitmap_start(QEMUFile *f, DirtyBitmapMigBitmapState *dbms)
+{
+ send_bitmap_header(f, dbms, DIRTY_BITMAP_MIG_FLAG_START);
+ qemu_put_be32(f, bdrv_dirty_bitmap_granularity(dbms->bitmap));
+ qemu_put_byte(f, bdrv_dirty_bitmap_enabled(dbms->bitmap));
+}
+
+static void send_bitmap_complete(QEMUFile *f, DirtyBitmapMigBitmapState *dbms)
+{
+ send_bitmap_header(f, dbms, DIRTY_BITMAP_MIG_FLAG_COMPLETE);
+}
+
+static void send_bitmap_bits(QEMUFile *f, DirtyBitmapMigBitmapState *dbms,
+ uint64_t start_sector, uint32_t nr_sectors)
+{
+ /* align for buffer_is_zero() */
+ uint64_t align = 4 * sizeof(long);
+ uint64_t unaligned_size =
+ bdrv_dirty_bitmap_serialization_size(dbms->bitmap,
+ start_sector, nr_sectors);
+ uint64_t buf_size = (unaligned_size + align - 1) & ~(align - 1);
+ uint8_t *buf = g_malloc0(buf_size);
+ uint32_t flags = DIRTY_BITMAP_MIG_FLAG_BITS;
+
+ bdrv_dirty_bitmap_serialize_part(dbms->bitmap, buf,
+ start_sector, nr_sectors);
+
+ if (buffer_is_zero(buf, buf_size)) {
+ g_free(buf);
+ buf = NULL;
+ flags |= DIRTY_BITMAP_MIG_FLAG_ZEROES;
+ }
+
+ trace_send_bitmap_bits(flags, start_sector, nr_sectors, buf_size);
+
+ send_bitmap_header(f, dbms, flags);
+
+ qemu_put_be64(f, start_sector);
+ qemu_put_be32(f, nr_sectors);
+
+ /* if a block is zero we need to flush here since the network
+ * bandwidth is now a lot higher than the storage device bandwidth.
+ * thus if we queue zero blocks we slow down the migration. */
+ if (flags & DIRTY_BITMAP_MIG_FLAG_ZEROES) {
+ qemu_fflush(f);
+ } else {
+ qemu_put_be64(f, buf_size);
+ qemu_put_buffer(f, buf, buf_size);
+ }
+
+ g_free(buf);
+}
+
+
+/* Called with iothread lock taken. */
+
+static void init_dirty_bitmap_migration(void)
+{
+ BlockDriverState *bs;
+ BdrvDirtyBitmap *bitmap;
+ DirtyBitmapMigBitmapState *dbms;
+ BdrvNextIterator it;
+ uint64_t total_bytes = 0;
+
+ dirty_bitmap_mig_state.bulk_completed = false;
+ dirty_bitmap_mig_state.prev_bs = NULL;
+ dirty_bitmap_mig_state.prev_bitmap = NULL;
+
+ for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
+ for (bitmap = bdrv_next_dirty_bitmap(bs, NULL); bitmap;
+ bitmap = bdrv_next_dirty_bitmap(bs, bitmap)) {
+ if (!bdrv_dirty_bitmap_name(bitmap)) {
+ continue;
+ }
+
+ if (!bdrv_get_device_or_node_name(bs)) {
+ /* not named non-root node */
+ continue;
+ }
+
+ dbms = g_new0(DirtyBitmapMigBitmapState, 1);
+ dbms->bs = bs;
+ dbms->node_name = bdrv_get_node_name(bs);
+ if (!dbms->node_name || dbms->node_name[0] == '\0') {
+ dbms->node_name = bdrv_get_device_name(bs);
+ }
+ dbms->bitmap = bitmap;
+ dbms->total_sectors = bdrv_nb_sectors(bs);
+ dbms->sectors_per_chunk = CHUNK_SIZE * 8 *
+ bdrv_dirty_bitmap_granularity(bitmap) >> BDRV_SECTOR_BITS;
+
+ total_bytes +=
+ bdrv_dirty_bitmap_serialization_size(bitmap,
+ 0, dbms->total_sectors);
+
+ QSIMPLEQ_INSERT_TAIL(&dirty_bitmap_mig_state.dbms_list,
+ dbms, entry);
+ }
+ }
+}
+
+/* Called with no lock taken. */
+static void bulk_phase_send_chunk(QEMUFile *f, DirtyBitmapMigBitmapState *dbms)
+{
+ uint32_t nr_sectors = MIN(dbms->total_sectors - dbms->cur_sector,
+ dbms->sectors_per_chunk);
+
+ send_bitmap_bits(f, dbms, dbms->cur_sector, nr_sectors);
+
+ dbms->cur_sector += nr_sectors;
+ if (dbms->cur_sector >= dbms->total_sectors) {
+ dbms->bulk_completed = true;
+ }
+}
+
+/* Called with no lock taken. */
+static void bulk_phase(QEMUFile *f, bool limit)
+{
+ DirtyBitmapMigBitmapState *dbms;
+
+ QSIMPLEQ_FOREACH(dbms, &dirty_bitmap_mig_state.dbms_list, entry) {
+ while (!dbms->bulk_completed) {
+ bulk_phase_send_chunk(f, dbms);
+ if (limit && qemu_file_rate_limit(f)) {
+ return;
+ }
+ }
+ }
+
+ dirty_bitmap_mig_state.bulk_completed = true;
+}
+
+/* Called with iothread lock taken. */
+static void dirty_bitmap_mig_cleanup(void)
+{
+ DirtyBitmapMigBitmapState *dbms;
+
+ while ((dbms = QSIMPLEQ_FIRST(&dirty_bitmap_mig_state.dbms_list)) != NULL) {
+ QSIMPLEQ_REMOVE_HEAD(&dirty_bitmap_mig_state.dbms_list, entry);
+ g_free(dbms);
+ }
+}
+
+/* for SaveVMHandlers */
+static void dirty_bitmap_migration_cleanup(void *opaque)
+{
+ dirty_bitmap_mig_cleanup();
+}
+
+static int dirty_bitmap_save_iterate(QEMUFile *f, void *opaque)
+{
+ trace_dirty_bitmap_save_iterate(
+ migration_in_postcopy(migrate_get_current()));
+
+ if (migration_in_postcopy(migrate_get_current()) &&
+ !dirty_bitmap_mig_state.bulk_completed) {
+ bulk_phase(f, true);
+ }
+
+ qemu_put_bitmap_flags(f, DIRTY_BITMAP_MIG_FLAG_EOS);
+
+ return dirty_bitmap_mig_state.bulk_completed;
+}
+
+/* Called with iothread lock taken. */
+
+static int dirty_bitmap_save_complete(QEMUFile *f, void *opaque)
+{
+ DirtyBitmapMigBitmapState *dbms;
+ trace_dirty_bitmap_save_complete_enter();
+
+ if (!dirty_bitmap_mig_state.bulk_completed) {
+ bulk_phase(f, false);
+ }
+
+ QSIMPLEQ_FOREACH(dbms, &dirty_bitmap_mig_state.dbms_list, entry) {
+ send_bitmap_complete(f, dbms);
+ }
+
+ qemu_put_bitmap_flags(f, DIRTY_BITMAP_MIG_FLAG_EOS);
+
+ trace_dirty_bitmap_save_complete_finish();
+
+ dirty_bitmap_mig_cleanup();
+ return 0;
+}
+
+static void dirty_bitmap_save_pending(QEMUFile *f, void *opaque,
+ uint64_t max_size,
+ uint64_t *res_precopy_only,
+ uint64_t *res_compatible,
+ uint64_t *res_postcopy_only)
+{
+ DirtyBitmapMigBitmapState *dbms;
+ uint64_t pending = 0;
+
+ qemu_mutex_lock_iothread();
+
+ QSIMPLEQ_FOREACH(dbms, &dirty_bitmap_mig_state.dbms_list, entry) {
+ uint64_t gran = bdrv_dirty_bitmap_granularity(dbms->bitmap);
+ uint64_t sectors = dbms->bulk_completed ? 0 :
+ dbms->total_sectors - dbms->cur_sector;
+
+ pending += (sectors * BDRV_SECTOR_SIZE + gran - 1) / gran;
+ }
+
+ qemu_mutex_unlock_iothread();
+
+ trace_dirty_bitmap_save_pending(pending, max_size);
+
+ *res_postcopy_only += pending;
+}
+
+/* First occurrence of this bitmap. It should be created if doesn't exist */
+static int dirty_bitmap_load_start(QEMUFile *f, DirtyBitmapLoadState *s)
+{
+ Error *local_err = NULL;
+ uint32_t granularity = qemu_get_be32(f);
+ bool enabled = qemu_get_byte(f);
+
+ if (!s->bitmap) {
+ s->bitmap = bdrv_create_dirty_bitmap(s->bs, granularity,
+ s->bitmap_name, &local_err);
+ if (!s->bitmap) {
+ error_report_err(local_err);
+ return -EINVAL;
+ }
+ } else {
+ uint32_t dest_granularity =
+ bdrv_dirty_bitmap_granularity(s->bitmap);
+ if (dest_granularity != granularity) {
+ fprintf(stderr,
+ "Error: "
+ "Migrated bitmap granularity (%" PRIu32 ") "
+ "doesn't match the destination bitmap '%s' "
+ "granularity (%" PRIu32 ")\n",
+ granularity,
+ bdrv_dirty_bitmap_name(s->bitmap),
+ dest_granularity);
+ return -EINVAL;
+ }
+ }
+
+ bdrv_disable_dirty_bitmap(s->bitmap);
+ if (enabled) {
+ DirtyBitmapLoadBitmapState *b;
+
+ bdrv_dirty_bitmap_create_successor(s->bs, s->bitmap, &local_err);
+ if (local_err) {
+ error_report_err(local_err);
+ return -EINVAL;
+ }
+
+ b = g_new(DirtyBitmapLoadBitmapState, 1);
+ b->bs = s->bs;
+ b->bitmap = s->bitmap;
+ b->migrated = false;
+ enabled_bitmaps = g_slist_prepend(enabled_bitmaps, b);
+ }
+
+ return 0;
+}
+
+void dirty_bitmap_mig_before_vm_start(void)
+{
+ GSList *item;
+
+ qemu_mutex_lock(&finish_lock);
+
+ for (item = enabled_bitmaps; item; item = g_slist_next(item)) {
+ DirtyBitmapLoadBitmapState *b = item->data;
+
+ if (b->migrated) {
+ bdrv_enable_dirty_bitmap(b->bitmap);
+ } else {
+ bdrv_dirty_bitmap_enable_successor(b->bitmap);
+ }
+
+ g_free(b);
+ }
+
+ g_slist_free(enabled_bitmaps);
+ enabled_bitmaps = NULL;
+
+ qemu_mutex_unlock(&finish_lock);
+}
+
+static void dirty_bitmap_load_complete(QEMUFile *f, DirtyBitmapLoadState *s)
+{
+ GSList *item;
+ trace_dirty_bitmap_load_complete();
+ bdrv_dirty_bitmap_deserialize_finish(s->bitmap);
+
+ qemu_mutex_lock(&finish_lock);
+
+ for (item = enabled_bitmaps; item; item = g_slist_next(item)) {
+ DirtyBitmapLoadBitmapState *b = item->data;
+
+ if (b->bitmap == s->bitmap) {
+ b->migrated = true;
+ }
+ }
+
+ if (bdrv_dirty_bitmap_frozen(s->bitmap)) {
+ if (enabled_bitmaps == NULL) {
+ /* in postcopy */
+ AioContext *aio_context = bdrv_get_aio_context(s->bs);
+ aio_context_acquire(aio_context);
+
+ bdrv_reclaim_dirty_bitmap(s->bs, s->bitmap, &error_abort);
+ bdrv_enable_dirty_bitmap(s->bitmap);
+
+ aio_context_release(aio_context);
+ } else {
+ /* target not started, successor is empty */
+ bdrv_dirty_bitmap_release_successor(s->bs, s->bitmap);
+ }
+ }
+
+ qemu_mutex_unlock(&finish_lock);
+}
+
+static int dirty_bitmap_load_bits(QEMUFile *f, DirtyBitmapLoadState *s)
+{
+ uint64_t first_sector = qemu_get_be64(f);
+ uint32_t nr_sectors = qemu_get_be32(f);
+ trace_dirty_bitmap_load_bits_enter(first_sector, nr_sectors);
+
+ if (s->flags & DIRTY_BITMAP_MIG_FLAG_ZEROES) {
+ trace_dirty_bitmap_load_bits_zeroes();
+ bdrv_dirty_bitmap_deserialize_zeroes(s->bitmap, first_sector,
+ nr_sectors, false);
+ } else {
+ uint8_t *buf;
+ uint64_t buf_size = qemu_get_be64(f);
+ uint64_t needed_size =
+ bdrv_dirty_bitmap_serialization_size(s->bitmap,
+ first_sector, nr_sectors);
+
+ if (needed_size > buf_size) {
+ fprintf(stderr,
+ "Error: Migrated bitmap granularity doesn't "
+ "match the destination bitmap '%s' granularity\n",
+ bdrv_dirty_bitmap_name(s->bitmap));
+ return -EINVAL;
+ }
+
+ buf = g_malloc(buf_size);
+ qemu_get_buffer(f, buf, buf_size);
+ bdrv_dirty_bitmap_deserialize_part(s->bitmap, buf,
+ first_sector,
+ nr_sectors, false);
+ g_free(buf);
+ }
+
+ return 0;
+}
+
+static int dirty_bitmap_load_header(QEMUFile *f, DirtyBitmapLoadState *s)
+{
+ Error *local_err = NULL;
+ s->flags = qemu_get_bitmap_flags(f);
+ trace_dirty_bitmap_load_header(s->flags);
+
+ if (s->flags & DIRTY_BITMAP_MIG_FLAG_DEVICE_NAME) {
+ if (!qemu_get_counted_string(f, s->node_name)) {
+ fprintf(stderr, "Unable to read node name string\n");
+ return -EINVAL;
+ }
+ s->bs = bdrv_lookup_bs(s->node_name, s->node_name, &local_err);
+ if (!s->bs) {
+ error_report("%s", error_get_pretty(local_err));
+ error_free(local_err);
+ return -EINVAL;
+ }
+ } else if (!s->bs) {
+ fprintf(stderr, "Error: block device name is not set\n");
+ return -EINVAL;
+ }
+
+ if (s->flags & DIRTY_BITMAP_MIG_FLAG_BITMAP_NAME) {
+ if (!qemu_get_counted_string(f, s->bitmap_name)) {
+ fprintf(stderr, "Unable to read node name string\n");
+ return -EINVAL;
+ }
+ s->bitmap = bdrv_find_dirty_bitmap(s->bs, s->bitmap_name);
+
+ /* bitmap may be NULL here, it wouldn't be an error if it is the
+ * first occurrence of the bitmap */
+ if (!s->bitmap && !(s->flags & DIRTY_BITMAP_MIG_FLAG_START)) {
+ fprintf(stderr, "Error: unknown dirty bitmap "
+ "'%s' for block device '%s'\n",
+ s->bitmap_name, s->node_name);
+ return -EINVAL;
+ }
+ } else if (!s->bitmap) {
+ fprintf(stderr, "Error: block device name is not set\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int dirty_bitmap_load(QEMUFile *f, void *opaque, int version_id)
+{
+ static DirtyBitmapLoadState s;
+
+ int ret = 0;
+
+ trace_dirty_bitmap_load_enter();
+
+ do {
+ dirty_bitmap_load_header(f, &s);
+
+ if (s.flags & DIRTY_BITMAP_MIG_FLAG_START) {
+ ret = dirty_bitmap_load_start(f, &s);
+ } else if (s.flags & DIRTY_BITMAP_MIG_FLAG_COMPLETE) {
+ dirty_bitmap_load_complete(f, &s);
+ } else if (s.flags & DIRTY_BITMAP_MIG_FLAG_BITS) {
+ ret = dirty_bitmap_load_bits(f, &s);
+ }
+
+ if (!ret) {
+ ret = qemu_file_get_error(f);
+ }
+
+ if (ret) {
+ return ret;
+ }
+ } while (!(s.flags & DIRTY_BITMAP_MIG_FLAG_EOS));
+
+ trace_dirty_bitmap_load_success();
+ return 0;
+}
+
+static int dirty_bitmap_save_setup(QEMUFile *f, void *opaque)
+{
+ DirtyBitmapMigBitmapState *dbms = NULL;
+ init_dirty_bitmap_migration();
+
+ QSIMPLEQ_FOREACH(dbms, &dirty_bitmap_mig_state.dbms_list, entry) {
+ send_bitmap_start(f, dbms);
+ }
+ qemu_put_bitmap_flags(f, DIRTY_BITMAP_MIG_FLAG_EOS);
+
+ return 0;
+}
+
+static bool dirty_bitmap_is_active(void *opaque)
+{
+ return migrate_dirty_bitmaps();
+}
+
+static bool dirty_bitmap_is_active_iterate(void *opaque)
+{
+ return dirty_bitmap_is_active(opaque) && !runstate_is_running();
+}
+
+static bool dirty_bitmap_has_postcopy(void *opaque)
+{
+ return true;
+}
+
+static SaveVMHandlers savevm_dirty_bitmap_handlers = {
+ .save_live_setup = dirty_bitmap_save_setup,
+ .save_live_complete_postcopy = dirty_bitmap_save_complete,
+ .save_live_complete_precopy = dirty_bitmap_save_complete,
+ .has_postcopy = dirty_bitmap_has_postcopy,
+ .save_live_pending = dirty_bitmap_save_pending,
+ .save_live_iterate = dirty_bitmap_save_iterate,
+ .is_active_iterate = dirty_bitmap_is_active_iterate,
+ .load_state = dirty_bitmap_load,
+ .cleanup = dirty_bitmap_migration_cleanup,
+ .is_active = dirty_bitmap_is_active,
+};
+
+void dirty_bitmap_mig_init(void)
+{
+ QSIMPLEQ_INIT(&dirty_bitmap_mig_state.dbms_list);
+
+ register_savevm_live(NULL, "dirty-bitmap", 0, 1,
+ &savevm_dirty_bitmap_handlers,
+ &dirty_bitmap_mig_state);
+}
diff --git a/migration/migration.c b/migration/migration.c
index 080d487..97fde30 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -387,6 +387,9 @@ static void process_incoming_migration_co(void *opaque)
int ret;
mis = migration_incoming_state_new(f);
+
+ init_dirty_bitmap_incoming_migration();
+
postcopy_state_set(POSTCOPY_INCOMING_NONE);
migrate_set_state(&mis->state, MIGRATION_STATUS_NONE,
MIGRATION_STATUS_ACTIVE);
diff --git a/migration/savevm.c b/migration/savevm.c
index 4eb1640..4f6db95 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -1592,6 +1592,8 @@ static void loadvm_postcopy_handle_run_bh(void *opaque)
trace_loadvm_postcopy_handle_run_vmstart();
+ dirty_bitmap_mig_before_vm_start();
+
if (autostart) {
/* Hold onto your hats, starting the CPU */
vm_start();
diff --git a/migration/trace-events b/migration/trace-events
index c3f726a..6e4d8e4 100644
--- a/migration/trace-events
+++ b/migration/trace-events
@@ -213,3 +213,17 @@ colo_vm_state_change(const char *old, const char *new) "Change '%s' => '%s'"
colo_send_message(const char *msg) "Send '%s' message"
colo_receive_message(const char *msg) "Receive '%s' message"
colo_failover_set_state(const char *new_state) "new state %s"
+
+# migration/block-dirty-bitmap.c
+send_bitmap_header_enter(void) ""
+send_bitmap_bits(uint32_t flags, uint64_t start_sector, uint32_t nr_sectors, uint64_t data_size) "\n flags: %x\n start_sector: %" PRIu64 "\n nr_sectors: %" PRIu32 "\n data_size: %" PRIu64 "\n"
+dirty_bitmap_save_iterate(int in_postcopy) "in postcopy: %d"
+dirty_bitmap_save_complete_enter(void) ""
+dirty_bitmap_save_complete_finish(void) ""
+dirty_bitmap_save_pending(uint64_t pending, uint64_t max_size) "pending %" PRIu64 " max: %" PRIu64
+dirty_bitmap_load_complete(void) ""
+dirty_bitmap_load_bits_enter(uint64_t first_sector, uint32_t nr_sectors) "chunk: %" PRIu64 " %" PRIu32
+dirty_bitmap_load_bits_zeroes(void) ""
+dirty_bitmap_load_header(uint32_t flags) "flags %x"
+dirty_bitmap_load_enter(void) ""
+dirty_bitmap_load_success(void) ""
diff --git a/vl.c b/vl.c
index d77dd86..928c7c5 100644
--- a/vl.c
+++ b/vl.c
@@ -4466,6 +4466,7 @@ int main(int argc, char **argv, char **envp)
blk_mig_init();
ram_mig_init();
+ dirty_bitmap_mig_init();
/* If the currently selected machine wishes to override the units-per-bus
* property of its default HBA interface type, do so now. */
--
1.8.3.1
^ permalink raw reply related [flat|nested] 40+ messages in thread
* Re: [Qemu-devel] [PATCH 12/17] migration: add postcopy migration of dirty bitmaps
2016-11-22 17:54 ` [Qemu-devel] [PATCH 12/17] migration: add postcopy migration of dirty bitmaps Vladimir Sementsov-Ogievskiy
@ 2017-01-24 9:40 ` Juan Quintela
2017-02-01 23:02 ` Max Reitz
1 sibling, 0 replies; 40+ messages in thread
From: Juan Quintela @ 2017-01-24 9:40 UTC (permalink / raw)
To: Vladimir Sementsov-Ogievskiy
Cc: qemu-block, qemu-devel, pbonzini, armbru, eblake, famz, stefanha,
amit.shah, mreitz, kwolf, peter.maydell, dgilbert, den, jsnow,
lirans
Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> wrote:
> Postcopy migration of dirty bitmaps. Only named dirty bitmaps,
> associated with root nodes and non-root named nodes are migrated.
>
> If destination qemu is already containing a dirty bitmap with the same name
> as a migrated bitmap (for the same node), than, if their granularities are
> the same the migration will be done, otherwise the error will be generated.
>
> If destination qemu doesn't contain such bitmap it will be created.
>
> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
> diff --git a/migration/block-dirty-bitmap.c b/migration/block-dirty-bitmap.c
> new file mode 100644
> index 0000000..28e3732
I only did a fast look at this file, nothing obvious is wrong, but it
was quick O:-)
So
Reviewed-by: Juan Quintela <quintela@redhat.com>
^ permalink raw reply [flat|nested] 40+ messages in thread
* Re: [Qemu-devel] [PATCH 12/17] migration: add postcopy migration of dirty bitmaps
2016-11-22 17:54 ` [Qemu-devel] [PATCH 12/17] migration: add postcopy migration of dirty bitmaps Vladimir Sementsov-Ogievskiy
2017-01-24 9:40 ` Juan Quintela
@ 2017-02-01 23:02 ` Max Reitz
1 sibling, 0 replies; 40+ messages in thread
From: Max Reitz @ 2017-02-01 23:02 UTC (permalink / raw)
To: Vladimir Sementsov-Ogievskiy, qemu-block, qemu-devel
Cc: pbonzini, armbru, eblake, famz, stefanha, amit.shah, quintela,
kwolf, peter.maydell, dgilbert, den, jsnow, lirans
[-- Attachment #1: Type: text/plain, Size: 1350 bytes --]
On 22.11.2016 18:54, Vladimir Sementsov-Ogievskiy wrote:
> Postcopy migration of dirty bitmaps. Only named dirty bitmaps,
> associated with root nodes and non-root named nodes are migrated.
>
> If destination qemu is already containing a dirty bitmap with the same name
> as a migrated bitmap (for the same node), than, if their granularities are
> the same the migration will be done, otherwise the error will be generated.
>
> If destination qemu doesn't contain such bitmap it will be created.
>
> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
> ---
> include/migration/block.h | 1 +
> include/migration/migration.h | 4 +
> migration/Makefile.objs | 2 +-
> migration/block-dirty-bitmap.c | 679 +++++++++++++++++++++++++++++++++++++++++
> migration/migration.c | 3 +
> migration/savevm.c | 2 +
> migration/trace-events | 14 +
> vl.c | 1 +
> 8 files changed, 705 insertions(+), 1 deletion(-)
> create mode 100644 migration/block-dirty-bitmap.c
From someone who doesn't know a thing about migration, a rather cursory:
Reviewed-by: Max Reitz <mreitz@redhat.com>
But at least that probably means the set of code covered by this R-b is
disjoint from the set of code covered by Juan's R-b. :-)
Max
[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 512 bytes --]
^ permalink raw reply [flat|nested] 40+ messages in thread
* [Qemu-devel] [PATCH 12/17] migration: add postcopy migration of dirty bitmaps
2016-11-21 15:29 [Qemu-devel] [PATCH v3 00/17] Dirty bitmaps postcopy migration Vladimir Sementsov-Ogievskiy
@ 2016-11-21 15:29 ` Vladimir Sementsov-Ogievskiy
0 siblings, 0 replies; 40+ messages in thread
From: Vladimir Sementsov-Ogievskiy @ 2016-11-21 15:29 UTC (permalink / raw)
To: qemu-block, qemu-devel
Cc: pbonzini, armbru, eblake, famz, stefanha, amit.shah, quintela,
mreitz, kwolf, peter.maydell, dgilbert, den, jsnow, vsementsov,
lirans
Postcopy migration of dirty bitmaps. Only named dirty bitmaps,
associated with root nodes and non-root named nodes are migrated.
If destination qemu is already containing a dirty bitmap with the same name
as a migrated bitmap (for the same node), than, if their granularities are
the same the migration will be done, otherwise the error will be generated.
If destination qemu doesn't contain such bitmap it will be created.
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
---
include/migration/block.h | 1 +
include/migration/migration.h | 4 +
migration/Makefile.objs | 2 +-
migration/block-dirty-bitmap.c | 679 +++++++++++++++++++++++++++++++++++++++++
migration/migration.c | 3 +
migration/savevm.c | 2 +
migration/trace-events | 14 +
vl.c | 1 +
8 files changed, 705 insertions(+), 1 deletion(-)
create mode 100644 migration/block-dirty-bitmap.c
diff --git a/include/migration/block.h b/include/migration/block.h
index 41a1ac8..8333c43 100644
--- a/include/migration/block.h
+++ b/include/migration/block.h
@@ -14,6 +14,7 @@
#ifndef MIGRATION_BLOCK_H
#define MIGRATION_BLOCK_H
+void dirty_bitmap_mig_init(void);
void blk_mig_init(void);
int blk_mig_active(void);
uint64_t blk_mig_bytes_transferred(void);
diff --git a/include/migration/migration.h b/include/migration/migration.h
index 6cbb9c6..af6e0b7 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -361,4 +361,8 @@ int ram_save_queue_pages(MigrationState *ms, const char *rbname,
PostcopyState postcopy_state_get(void);
/* Set the state and return the old state */
PostcopyState postcopy_state_set(PostcopyState new_state);
+
+void dirty_bitmap_mig_before_vm_start(void);
+void init_dirty_bitmap_incoming_migration(void);
+
#endif
diff --git a/migration/Makefile.objs b/migration/Makefile.objs
index 3f3e237..3031636 100644
--- a/migration/Makefile.objs
+++ b/migration/Makefile.objs
@@ -10,5 +10,5 @@ common-obj-y += qjson.o
common-obj-$(CONFIG_RDMA) += rdma.o
-common-obj-y += block.o
+common-obj-y += block.o block-dirty-bitmap.o
diff --git a/migration/block-dirty-bitmap.c b/migration/block-dirty-bitmap.c
new file mode 100644
index 0000000..28e3732
--- /dev/null
+++ b/migration/block-dirty-bitmap.c
@@ -0,0 +1,679 @@
+/*
+ * Block dirty bitmap postcopy migration
+ *
+ * Copyright IBM, Corp. 2009
+ * Copyright (C) 2016 Parallels IP Holdings GmbH. All rights reserved.
+ *
+ * Authors:
+ * Liran Schour <lirans@il.ibm.com>
+ * Vladimir Sementsov-Ogievskiy <vsementsov@parallels.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ * This file is derived from migration/block.c, so it's author and IBM copyright
+ * are here, although content is quite different.
+ *
+ * Contributions after 2012-01-13 are licensed under the terms of the
+ * GNU GPL, version 2 or (at your option) any later version.
+ *
+ * ***
+ *
+ * Here postcopy migration of dirty bitmaps is realized. Only named dirty
+ * bitmaps, associated with root nodes and non-root named nodes are migrated.
+ *
+ * If destination qemu is already containing a dirty bitmap with the same name
+ * as a migrated bitmap (for the same node), then, if their granularities are
+ * the same the migration will be done, otherwise the error will be generated.
+ *
+ * If destination qemu doesn't contain such bitmap it will be created.
+ *
+ * format of migration:
+ *
+ * # Header (shared for different chunk types)
+ * 1, 2 or 4 bytes: flags (see qemu_{put,put}_flags)
+ * [ 1 byte: node name size ] \ flags & DEVICE_NAME
+ * [ n bytes: node name ] /
+ * [ 1 byte: bitmap name size ] \ flags & BITMAP_NAME
+ * [ n bytes: bitmap name ] /
+ *
+ * # Start of bitmap migration (flags & START)
+ * header
+ * be64: granularity
+ * 1 byte: bitmap enabled flag
+ *
+ * # Complete of bitmap migration (flags & COMPLETE)
+ * header
+ *
+ * # Data chunk of bitmap migration
+ * header
+ * be64: start sector
+ * be32: number of sectors
+ * [ be64: buffer size ] \ ! (flags & ZEROES)
+ * [ n bytes: buffer ] /
+ *
+ * The last chunk in stream should contain flags & EOS. The chunk may skip
+ * device and/or bitmap names, assuming them to be the same with the previous
+ * chunk.
+ */
+
+#include "qemu/osdep.h"
+#include "block/block.h"
+#include "block/block_int.h"
+#include "sysemu/block-backend.h"
+#include "qemu/main-loop.h"
+#include "qemu/error-report.h"
+#include "migration/block.h"
+#include "migration/migration.h"
+#include "qemu/hbitmap.h"
+#include "sysemu/sysemu.h"
+#include "qemu/cutils.h"
+#include "qapi/error.h"
+#include "trace.h"
+#include <assert.h>
+
+#define CHUNK_SIZE (1 << 10)
+
+/* Flags occupy from one to four bytes. In all but one the 7-th (EXTRA_FLAGS)
+ * bit should be set. */
+#define DIRTY_BITMAP_MIG_FLAG_EOS 0x01
+#define DIRTY_BITMAP_MIG_FLAG_ZEROES 0x02
+#define DIRTY_BITMAP_MIG_FLAG_BITMAP_NAME 0x04
+#define DIRTY_BITMAP_MIG_FLAG_DEVICE_NAME 0x08
+#define DIRTY_BITMAP_MIG_FLAG_START 0x10
+#define DIRTY_BITMAP_MIG_FLAG_COMPLETE 0x20
+#define DIRTY_BITMAP_MIG_FLAG_BITS 0x40
+
+#define DIRTY_BITMAP_MIG_EXTRA_FLAGS 0x80
+#define DIRTY_BITMAP_MIG_FLAGS_SIZE_16 0x8000
+#define DIRTY_BITMAP_MIG_FLAGS_SIZE_32 0x8080
+
+#define DEBUG_DIRTY_BITMAP_MIGRATION 0
+
+typedef struct DirtyBitmapMigBitmapState {
+ /* Written during setup phase. */
+ BlockDriverState *bs;
+ const char *node_name;
+ BdrvDirtyBitmap *bitmap;
+ uint64_t total_sectors;
+ uint64_t sectors_per_chunk;
+ QSIMPLEQ_ENTRY(DirtyBitmapMigBitmapState) entry;
+
+ /* For bulk phase. */
+ bool bulk_completed;
+ uint64_t cur_sector;
+} DirtyBitmapMigBitmapState;
+
+typedef struct DirtyBitmapMigState {
+ QSIMPLEQ_HEAD(dbms_list, DirtyBitmapMigBitmapState) dbms_list;
+
+ bool bulk_completed;
+
+ /* for send_bitmap_bits() */
+ BlockDriverState *prev_bs;
+ BdrvDirtyBitmap *prev_bitmap;
+} DirtyBitmapMigState;
+
+typedef struct DirtyBitmapLoadState {
+ uint32_t flags;
+ char node_name[256];
+ char bitmap_name[256];
+ BlockDriverState *bs;
+ BdrvDirtyBitmap *bitmap;
+} DirtyBitmapLoadState;
+
+static DirtyBitmapMigState dirty_bitmap_mig_state;
+
+typedef struct DirtyBitmapLoadBitmapState {
+ BlockDriverState *bs;
+ BdrvDirtyBitmap *bitmap;
+ bool migrated;
+} DirtyBitmapLoadBitmapState;
+static GSList *enabled_bitmaps;
+QemuMutex finish_lock;
+
+void init_dirty_bitmap_incoming_migration(void)
+{
+ qemu_mutex_init(&finish_lock);
+}
+
+static uint32_t qemu_get_bitmap_flags(QEMUFile *f)
+{
+ uint8_t flags = qemu_get_byte(f);
+ if (flags & DIRTY_BITMAP_MIG_EXTRA_FLAGS) {
+ flags = flags << 8 | qemu_get_byte(f);
+ if (flags & DIRTY_BITMAP_MIG_EXTRA_FLAGS) {
+ flags = flags << 16 | qemu_get_be16(f);
+ }
+ }
+
+ return flags;
+}
+
+static void qemu_put_bitmap_flags(QEMUFile *f, uint32_t flags)
+{
+ if (!(flags & 0xffffff00)) {
+ qemu_put_byte(f, flags);
+ return;
+ }
+
+ if (!(flags & 0xffff0000)) {
+ qemu_put_be16(f, flags | DIRTY_BITMAP_MIG_FLAGS_SIZE_16);
+ return;
+ }
+
+ qemu_put_be32(f, flags | DIRTY_BITMAP_MIG_FLAGS_SIZE_32);
+}
+
+static void send_bitmap_header(QEMUFile *f, DirtyBitmapMigBitmapState *dbms,
+ uint32_t additional_flags)
+{
+ BlockDriverState *bs = dbms->bs;
+ BdrvDirtyBitmap *bitmap = dbms->bitmap;
+ uint32_t flags = additional_flags;
+ trace_send_bitmap_header_enter();
+
+ if (bs != dirty_bitmap_mig_state.prev_bs) {
+ dirty_bitmap_mig_state.prev_bs = bs;
+ flags |= DIRTY_BITMAP_MIG_FLAG_DEVICE_NAME;
+ }
+
+ if (bitmap != dirty_bitmap_mig_state.prev_bitmap) {
+ dirty_bitmap_mig_state.prev_bitmap = bitmap;
+ flags |= DIRTY_BITMAP_MIG_FLAG_BITMAP_NAME;
+ }
+
+ qemu_put_bitmap_flags(f, flags);
+
+ if (flags & DIRTY_BITMAP_MIG_FLAG_DEVICE_NAME) {
+ qemu_put_counted_string(f, dbms->node_name);
+ }
+
+ if (flags & DIRTY_BITMAP_MIG_FLAG_BITMAP_NAME) {
+ qemu_put_counted_string(f, bdrv_dirty_bitmap_name(bitmap));
+ }
+}
+
+static void send_bitmap_start(QEMUFile *f, DirtyBitmapMigBitmapState *dbms)
+{
+ send_bitmap_header(f, dbms, DIRTY_BITMAP_MIG_FLAG_START);
+ qemu_put_be32(f, bdrv_dirty_bitmap_granularity(dbms->bitmap));
+ qemu_put_byte(f, bdrv_dirty_bitmap_enabled(dbms->bitmap));
+}
+
+static void send_bitmap_complete(QEMUFile *f, DirtyBitmapMigBitmapState *dbms)
+{
+ send_bitmap_header(f, dbms, DIRTY_BITMAP_MIG_FLAG_COMPLETE);
+}
+
+static void send_bitmap_bits(QEMUFile *f, DirtyBitmapMigBitmapState *dbms,
+ uint64_t start_sector, uint32_t nr_sectors)
+{
+ /* align for buffer_is_zero() */
+ uint64_t align = 4 * sizeof(long);
+ uint64_t unaligned_size =
+ bdrv_dirty_bitmap_serialization_size(dbms->bitmap,
+ start_sector, nr_sectors);
+ uint64_t buf_size = (unaligned_size + align - 1) & ~(align - 1);
+ uint8_t *buf = g_malloc0(buf_size);
+ uint32_t flags = DIRTY_BITMAP_MIG_FLAG_BITS;
+
+ bdrv_dirty_bitmap_serialize_part(dbms->bitmap, buf,
+ start_sector, nr_sectors);
+
+ if (buffer_is_zero(buf, buf_size)) {
+ g_free(buf);
+ buf = NULL;
+ flags |= DIRTY_BITMAP_MIG_FLAG_ZEROES;
+ }
+
+ trace_send_bitmap_bits(flags, start_sector, nr_sectors, buf_size);
+
+ send_bitmap_header(f, dbms, flags);
+
+ qemu_put_be64(f, start_sector);
+ qemu_put_be32(f, nr_sectors);
+
+ /* if a block is zero we need to flush here since the network
+ * bandwidth is now a lot higher than the storage device bandwidth.
+ * thus if we queue zero blocks we slow down the migration. */
+ if (flags & DIRTY_BITMAP_MIG_FLAG_ZEROES) {
+ qemu_fflush(f);
+ } else {
+ qemu_put_be64(f, buf_size);
+ qemu_put_buffer(f, buf, buf_size);
+ }
+
+ g_free(buf);
+}
+
+
+/* Called with iothread lock taken. */
+
+static void init_dirty_bitmap_migration(void)
+{
+ BlockDriverState *bs;
+ BdrvDirtyBitmap *bitmap;
+ DirtyBitmapMigBitmapState *dbms;
+ BdrvNextIterator it;
+ uint64_t total_bytes = 0;
+
+ dirty_bitmap_mig_state.bulk_completed = false;
+ dirty_bitmap_mig_state.prev_bs = NULL;
+ dirty_bitmap_mig_state.prev_bitmap = NULL;
+
+ for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
+ for (bitmap = bdrv_next_dirty_bitmap(bs, NULL); bitmap;
+ bitmap = bdrv_next_dirty_bitmap(bs, bitmap)) {
+ if (!bdrv_dirty_bitmap_name(bitmap)) {
+ continue;
+ }
+
+ if (!bdrv_get_device_or_node_name(bs)) {
+ /* not named non-root node */
+ continue;
+ }
+
+ dbms = g_new0(DirtyBitmapMigBitmapState, 1);
+ dbms->bs = bs;
+ dbms->node_name = bdrv_get_node_name(bs);
+ if (!dbms->node_name || dbms->node_name[0] == '\0') {
+ dbms->node_name = bdrv_get_device_name(bs);
+ }
+ dbms->bitmap = bitmap;
+ dbms->total_sectors = bdrv_nb_sectors(bs);
+ dbms->sectors_per_chunk = CHUNK_SIZE * 8 *
+ bdrv_dirty_bitmap_granularity(bitmap) >> BDRV_SECTOR_BITS;
+
+ total_bytes +=
+ bdrv_dirty_bitmap_serialization_size(bitmap,
+ 0, dbms->total_sectors);
+
+ QSIMPLEQ_INSERT_TAIL(&dirty_bitmap_mig_state.dbms_list,
+ dbms, entry);
+ }
+ }
+}
+
+/* Called with no lock taken. */
+static void bulk_phase_send_chunk(QEMUFile *f, DirtyBitmapMigBitmapState *dbms)
+{
+ uint32_t nr_sectors = MIN(dbms->total_sectors - dbms->cur_sector,
+ dbms->sectors_per_chunk);
+
+ send_bitmap_bits(f, dbms, dbms->cur_sector, nr_sectors);
+
+ dbms->cur_sector += nr_sectors;
+ if (dbms->cur_sector >= dbms->total_sectors) {
+ dbms->bulk_completed = true;
+ }
+}
+
+/* Called with no lock taken. */
+static void bulk_phase(QEMUFile *f, bool limit)
+{
+ DirtyBitmapMigBitmapState *dbms;
+
+ QSIMPLEQ_FOREACH(dbms, &dirty_bitmap_mig_state.dbms_list, entry) {
+ while (!dbms->bulk_completed) {
+ bulk_phase_send_chunk(f, dbms);
+ if (limit && qemu_file_rate_limit(f)) {
+ return;
+ }
+ }
+ }
+
+ dirty_bitmap_mig_state.bulk_completed = true;
+}
+
+/* Called with iothread lock taken. */
+static void dirty_bitmap_mig_cleanup(void)
+{
+ DirtyBitmapMigBitmapState *dbms;
+
+ while ((dbms = QSIMPLEQ_FIRST(&dirty_bitmap_mig_state.dbms_list)) != NULL) {
+ QSIMPLEQ_REMOVE_HEAD(&dirty_bitmap_mig_state.dbms_list, entry);
+ g_free(dbms);
+ }
+}
+
+/* for SaveVMHandlers */
+static void dirty_bitmap_migration_cleanup(void *opaque)
+{
+ dirty_bitmap_mig_cleanup();
+}
+
+static int dirty_bitmap_save_iterate(QEMUFile *f, void *opaque)
+{
+ trace_dirty_bitmap_save_iterate(
+ migration_in_postcopy(migrate_get_current()));
+
+ if (migration_in_postcopy(migrate_get_current()) &&
+ !dirty_bitmap_mig_state.bulk_completed) {
+ bulk_phase(f, true);
+ }
+
+ qemu_put_bitmap_flags(f, DIRTY_BITMAP_MIG_FLAG_EOS);
+
+ return dirty_bitmap_mig_state.bulk_completed;
+}
+
+/* Called with iothread lock taken. */
+
+static int dirty_bitmap_save_complete(QEMUFile *f, void *opaque)
+{
+ DirtyBitmapMigBitmapState *dbms;
+ trace_dirty_bitmap_save_complete_enter();
+
+ if (!dirty_bitmap_mig_state.bulk_completed) {
+ bulk_phase(f, false);
+ }
+
+ QSIMPLEQ_FOREACH(dbms, &dirty_bitmap_mig_state.dbms_list, entry) {
+ send_bitmap_complete(f, dbms);
+ }
+
+ qemu_put_bitmap_flags(f, DIRTY_BITMAP_MIG_FLAG_EOS);
+
+ trace_dirty_bitmap_save_complete_finish();
+
+ dirty_bitmap_mig_cleanup();
+ return 0;
+}
+
+static void dirty_bitmap_save_pending(QEMUFile *f, void *opaque,
+ uint64_t max_size,
+ uint64_t *res_precopy_only,
+ uint64_t *res_compatible,
+ uint64_t *res_postcopy_only)
+{
+ DirtyBitmapMigBitmapState *dbms;
+ uint64_t pending = 0;
+
+ qemu_mutex_lock_iothread();
+
+ QSIMPLEQ_FOREACH(dbms, &dirty_bitmap_mig_state.dbms_list, entry) {
+ uint64_t gran = bdrv_dirty_bitmap_granularity(dbms->bitmap);
+ uint64_t sectors = dbms->bulk_completed ? 0 :
+ dbms->total_sectors - dbms->cur_sector;
+
+ pending += (sectors * BDRV_SECTOR_SIZE + gran - 1) / gran;
+ }
+
+ qemu_mutex_unlock_iothread();
+
+ trace_dirty_bitmap_save_pending(pending, max_size);
+
+ *res_postcopy_only += pending;
+}
+
+/* First occurrence of this bitmap. It should be created if doesn't exist */
+static int dirty_bitmap_load_start(QEMUFile *f, DirtyBitmapLoadState *s)
+{
+ Error *local_err = NULL;
+ uint32_t granularity = qemu_get_be32(f);
+ bool enabled = qemu_get_byte(f);
+
+ if (!s->bitmap) {
+ s->bitmap = bdrv_create_dirty_bitmap(s->bs, granularity,
+ s->bitmap_name, &local_err);
+ if (!s->bitmap) {
+ error_report_err(local_err);
+ return -EINVAL;
+ }
+ } else {
+ uint32_t dest_granularity =
+ bdrv_dirty_bitmap_granularity(s->bitmap);
+ if (dest_granularity != granularity) {
+ fprintf(stderr,
+ "Error: "
+ "Migrated bitmap granularity (%" PRIu32 ") "
+ "doesn't match the destination bitmap '%s' "
+ "granularity (%" PRIu32 ")\n",
+ granularity,
+ bdrv_dirty_bitmap_name(s->bitmap),
+ dest_granularity);
+ return -EINVAL;
+ }
+ }
+
+ bdrv_disable_dirty_bitmap(s->bitmap);
+ if (enabled) {
+ DirtyBitmapLoadBitmapState *b;
+
+ bdrv_dirty_bitmap_create_successor(s->bs, s->bitmap, &local_err);
+ if (local_err) {
+ error_report_err(local_err);
+ return -EINVAL;
+ }
+
+ b = g_new(DirtyBitmapLoadBitmapState, 1);
+ b->bs = s->bs;
+ b->bitmap = s->bitmap;
+ b->migrated = false;
+ enabled_bitmaps = g_slist_prepend(enabled_bitmaps, b);
+ }
+
+ return 0;
+}
+
+void dirty_bitmap_mig_before_vm_start(void)
+{
+ GSList *item;
+
+ qemu_mutex_lock(&finish_lock);
+
+ for (item = enabled_bitmaps; item; item = g_slist_next(item)) {
+ DirtyBitmapLoadBitmapState *b = item->data;
+
+ if (b->migrated) {
+ bdrv_enable_dirty_bitmap(b->bitmap);
+ } else {
+ bdrv_dirty_bitmap_enable_successor(b->bitmap);
+ }
+
+ g_free(b);
+ }
+
+ g_slist_free(enabled_bitmaps);
+ enabled_bitmaps = NULL;
+
+ qemu_mutex_unlock(&finish_lock);
+}
+
+static void dirty_bitmap_load_complete(QEMUFile *f, DirtyBitmapLoadState *s)
+{
+ GSList *item;
+ trace_dirty_bitmap_load_complete();
+ bdrv_dirty_bitmap_deserialize_finish(s->bitmap);
+
+ qemu_mutex_lock(&finish_lock);
+
+ for (item = enabled_bitmaps; item; item = g_slist_next(item)) {
+ DirtyBitmapLoadBitmapState *b = item->data;
+
+ if (b->bitmap == s->bitmap) {
+ b->migrated = true;
+ }
+ }
+
+ if (bdrv_dirty_bitmap_frozen(s->bitmap)) {
+ if (enabled_bitmaps == NULL) {
+ /* in postcopy */
+ AioContext *aio_context = bdrv_get_aio_context(s->bs);
+ aio_context_acquire(aio_context);
+
+ bdrv_reclaim_dirty_bitmap(s->bs, s->bitmap, &error_abort);
+ bdrv_enable_dirty_bitmap(s->bitmap);
+
+ aio_context_release(aio_context);
+ } else {
+ /* target not started, successor is empty */
+ bdrv_dirty_bitmap_release_successor(s->bs, s->bitmap);
+ }
+ }
+
+ qemu_mutex_unlock(&finish_lock);
+}
+
+static int dirty_bitmap_load_bits(QEMUFile *f, DirtyBitmapLoadState *s)
+{
+ uint64_t first_sector = qemu_get_be64(f);
+ uint32_t nr_sectors = qemu_get_be32(f);
+ trace_dirty_bitmap_load_bits_enter(first_sector, nr_sectors);
+
+ if (s->flags & DIRTY_BITMAP_MIG_FLAG_ZEROES) {
+ trace_dirty_bitmap_load_bits_zeroes();
+ bdrv_dirty_bitmap_deserialize_zeroes(s->bitmap, first_sector,
+ nr_sectors, false);
+ } else {
+ uint8_t *buf;
+ uint64_t buf_size = qemu_get_be64(f);
+ uint64_t needed_size =
+ bdrv_dirty_bitmap_serialization_size(s->bitmap,
+ first_sector, nr_sectors);
+
+ if (needed_size > buf_size) {
+ fprintf(stderr,
+ "Error: Migrated bitmap granularity doesn't "
+ "match the destination bitmap '%s' granularity\n",
+ bdrv_dirty_bitmap_name(s->bitmap));
+ return -EINVAL;
+ }
+
+ buf = g_malloc(buf_size);
+ qemu_get_buffer(f, buf, buf_size);
+ bdrv_dirty_bitmap_deserialize_part(s->bitmap, buf,
+ first_sector,
+ nr_sectors, false);
+ g_free(buf);
+ }
+
+ return 0;
+}
+
+static int dirty_bitmap_load_header(QEMUFile *f, DirtyBitmapLoadState *s)
+{
+ Error *local_err = NULL;
+ s->flags = qemu_get_bitmap_flags(f);
+ trace_dirty_bitmap_load_header(s->flags);
+
+ if (s->flags & DIRTY_BITMAP_MIG_FLAG_DEVICE_NAME) {
+ if (!qemu_get_counted_string(f, s->node_name)) {
+ fprintf(stderr, "Unable to read node name string\n");
+ return -EINVAL;
+ }
+ s->bs = bdrv_lookup_bs(s->node_name, s->node_name, &local_err);
+ if (!s->bs) {
+ error_report("%s", error_get_pretty(local_err));
+ error_free(local_err);
+ return -EINVAL;
+ }
+ } else if (!s->bs) {
+ fprintf(stderr, "Error: block device name is not set\n");
+ return -EINVAL;
+ }
+
+ if (s->flags & DIRTY_BITMAP_MIG_FLAG_BITMAP_NAME) {
+ if (!qemu_get_counted_string(f, s->bitmap_name)) {
+ fprintf(stderr, "Unable to read node name string\n");
+ return -EINVAL;
+ }
+ s->bitmap = bdrv_find_dirty_bitmap(s->bs, s->bitmap_name);
+
+ /* bitmap may be NULL here, it wouldn't be an error if it is the
+ * first occurrence of the bitmap */
+ if (!s->bitmap && !(s->flags & DIRTY_BITMAP_MIG_FLAG_START)) {
+ fprintf(stderr, "Error: unknown dirty bitmap "
+ "'%s' for block device '%s'\n",
+ s->bitmap_name, s->node_name);
+ return -EINVAL;
+ }
+ } else if (!s->bitmap) {
+ fprintf(stderr, "Error: block device name is not set\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int dirty_bitmap_load(QEMUFile *f, void *opaque, int version_id)
+{
+ static DirtyBitmapLoadState s;
+
+ int ret = 0;
+
+ trace_dirty_bitmap_load_enter();
+
+ do {
+ dirty_bitmap_load_header(f, &s);
+
+ if (s.flags & DIRTY_BITMAP_MIG_FLAG_START) {
+ ret = dirty_bitmap_load_start(f, &s);
+ } else if (s.flags & DIRTY_BITMAP_MIG_FLAG_COMPLETE) {
+ dirty_bitmap_load_complete(f, &s);
+ } else if (s.flags & DIRTY_BITMAP_MIG_FLAG_BITS) {
+ ret = dirty_bitmap_load_bits(f, &s);
+ }
+
+ if (!ret) {
+ ret = qemu_file_get_error(f);
+ }
+
+ if (ret) {
+ return ret;
+ }
+ } while (!(s.flags & DIRTY_BITMAP_MIG_FLAG_EOS));
+
+ trace_dirty_bitmap_load_success();
+ return 0;
+}
+
+static int dirty_bitmap_save_setup(QEMUFile *f, void *opaque)
+{
+ DirtyBitmapMigBitmapState *dbms = NULL;
+ init_dirty_bitmap_migration();
+
+ QSIMPLEQ_FOREACH(dbms, &dirty_bitmap_mig_state.dbms_list, entry) {
+ send_bitmap_start(f, dbms);
+ }
+ qemu_put_bitmap_flags(f, DIRTY_BITMAP_MIG_FLAG_EOS);
+
+ return 0;
+}
+
+static bool dirty_bitmap_is_active(void *opaque)
+{
+ return migrate_dirty_bitmaps();
+}
+
+static bool dirty_bitmap_is_active_iterate(void *opaque)
+{
+ return dirty_bitmap_is_active(opaque) && !runstate_is_running();
+}
+
+static bool dirty_bitmap_has_postcopy(void *opaque)
+{
+ return true;
+}
+
+static SaveVMHandlers savevm_dirty_bitmap_handlers = {
+ .save_live_setup = dirty_bitmap_save_setup,
+ .save_live_complete_postcopy = dirty_bitmap_save_complete,
+ .save_live_complete_precopy = dirty_bitmap_save_complete,
+ .has_postcopy = dirty_bitmap_has_postcopy,
+ .save_live_pending = dirty_bitmap_save_pending,
+ .save_live_iterate = dirty_bitmap_save_iterate,
+ .is_active_iterate = dirty_bitmap_is_active_iterate,
+ .load_state = dirty_bitmap_load,
+ .cleanup = dirty_bitmap_migration_cleanup,
+ .is_active = dirty_bitmap_is_active,
+};
+
+void dirty_bitmap_mig_init(void)
+{
+ QSIMPLEQ_INIT(&dirty_bitmap_mig_state.dbms_list);
+
+ register_savevm_live(NULL, "dirty-bitmap", 0, 1,
+ &savevm_dirty_bitmap_handlers,
+ &dirty_bitmap_mig_state);
+}
diff --git a/migration/migration.c b/migration/migration.c
index 9543dbb..0921aae 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -387,6 +387,9 @@ static void process_incoming_migration_co(void *opaque)
int ret;
mis = migration_incoming_state_new(f);
+
+ init_dirty_bitmap_incoming_migration();
+
postcopy_state_set(POSTCOPY_INCOMING_NONE);
migrate_set_state(&mis->state, MIGRATION_STATUS_NONE,
MIGRATION_STATUS_ACTIVE);
diff --git a/migration/savevm.c b/migration/savevm.c
index 4eb1640..4f6db95 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -1592,6 +1592,8 @@ static void loadvm_postcopy_handle_run_bh(void *opaque)
trace_loadvm_postcopy_handle_run_vmstart();
+ dirty_bitmap_mig_before_vm_start();
+
if (autostart) {
/* Hold onto your hats, starting the CPU */
vm_start();
diff --git a/migration/trace-events b/migration/trace-events
index c3f726a..6e4d8e4 100644
--- a/migration/trace-events
+++ b/migration/trace-events
@@ -213,3 +213,17 @@ colo_vm_state_change(const char *old, const char *new) "Change '%s' => '%s'"
colo_send_message(const char *msg) "Send '%s' message"
colo_receive_message(const char *msg) "Receive '%s' message"
colo_failover_set_state(const char *new_state) "new state %s"
+
+# migration/block-dirty-bitmap.c
+send_bitmap_header_enter(void) ""
+send_bitmap_bits(uint32_t flags, uint64_t start_sector, uint32_t nr_sectors, uint64_t data_size) "\n flags: %x\n start_sector: %" PRIu64 "\n nr_sectors: %" PRIu32 "\n data_size: %" PRIu64 "\n"
+dirty_bitmap_save_iterate(int in_postcopy) "in postcopy: %d"
+dirty_bitmap_save_complete_enter(void) ""
+dirty_bitmap_save_complete_finish(void) ""
+dirty_bitmap_save_pending(uint64_t pending, uint64_t max_size) "pending %" PRIu64 " max: %" PRIu64
+dirty_bitmap_load_complete(void) ""
+dirty_bitmap_load_bits_enter(uint64_t first_sector, uint32_t nr_sectors) "chunk: %" PRIu64 " %" PRIu32
+dirty_bitmap_load_bits_zeroes(void) ""
+dirty_bitmap_load_header(uint32_t flags) "flags %x"
+dirty_bitmap_load_enter(void) ""
+dirty_bitmap_load_success(void) ""
diff --git a/vl.c b/vl.c
index 319f641..9f90dbf 100644
--- a/vl.c
+++ b/vl.c
@@ -4466,6 +4466,7 @@ int main(int argc, char **argv, char **envp)
blk_mig_init();
ram_mig_init();
+ dirty_bitmap_mig_init();
/* If the currently selected machine wishes to override the units-per-bus
* property of its default HBA interface type, do so now. */
--
1.8.3.1
^ permalink raw reply related [flat|nested] 40+ messages in thread