If the secondary has to failover during checkpointing, it still is in the old state (i.e. different state than primary). Thus we can't expose the primary state until after the checkpoint is sent. This fixes sporadic connection reset of client connections during failover. Signed-off-by: Lukas Straub --- migration/colo.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/migration/colo.c b/migration/colo.c index a69782efc5..a3fc21e86e 100644 --- a/migration/colo.c +++ b/migration/colo.c @@ -430,12 +430,6 @@ static int colo_do_checkpoint_transaction(MigrationState *s, goto out; } - qemu_event_reset(&s->colo_checkpoint_event); - colo_notify_compares_event(NULL, COLO_EVENT_CHECKPOINT, &local_err); - if (local_err) { - goto out; - } - /* Disable block migration */ migrate_set_block_enabled(false, &local_err); qemu_mutex_lock_iothread(); @@ -494,6 +488,12 @@ static int colo_do_checkpoint_transaction(MigrationState *s, goto out; } + qemu_event_reset(&s->colo_checkpoint_event); + colo_notify_compares_event(NULL, COLO_EVENT_CHECKPOINT, &local_err); + if (local_err) { + goto out; + } + colo_receive_check_message(s->rp_state.from_dst_file, COLO_MESSAGE_VMSTATE_LOADED, &local_err); if (local_err) { -- 2.20.1