* [PATCH] migration: do not exit on incoming failure
@ 2024-04-17 22:13 Vladimir Sementsov-Ogievskiy
2024-04-18 14:27 ` Fabiano Rosas
2024-04-18 14:37 ` Daniel P. Berrangé
0 siblings, 2 replies; 9+ messages in thread
From: Vladimir Sementsov-Ogievskiy @ 2024-04-17 22:13 UTC (permalink / raw)
To: peterx, farosas
Cc: vsementsov, yc-core, thuth, lvivier, pbonzini, qemu-devel, pkrempa
We do set MIGRATION_FAILED state, but don't give a chance to
orchestrator to query migration state and get the error.
Let's report an error through QAPI like we do on outgoing migration.
migration-test is updated correspondingly.
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
---
Doubt: is exiting on failure a contract? Will this commit break
something in Libvirt? Finally, could we just change the logic, or I need
and additional migration-parameter for new behavior?
migration/migration.c | 22 +++++++---------------
tests/qtest/migration-helpers.c | 13 ++++++++++---
tests/qtest/migration-helpers.h | 3 ++-
tests/qtest/migration-test.c | 14 +++++++-------
4 files changed, 26 insertions(+), 26 deletions(-)
diff --git a/migration/migration.c b/migration/migration.c
index 86bf76e925..3c203e767d 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -738,11 +738,12 @@ process_incoming_migration_co(void *opaque)
MigrationIncomingState *mis = migration_incoming_get_current();
PostcopyState ps;
int ret;
+ Error *local_err = NULL;
assert(mis->from_src_file);
if (compress_threads_load_setup(mis->from_src_file)) {
- error_report("Failed to setup decompress threads");
+ error_setg(&local_err, "Failed to setup decompress threads");
goto fail;
}
@@ -779,32 +780,23 @@ process_incoming_migration_co(void *opaque)
}
if (ret < 0) {
- MigrationState *s = migrate_get_current();
-
- if (migrate_has_error(s)) {
- WITH_QEMU_LOCK_GUARD(&s->error_mutex) {
- error_report_err(s->error);
- }
- }
- error_report("load of migration failed: %s", strerror(-ret));
+ error_setg(&local_err, "load of migration failed: %s", strerror(-ret));
goto fail;
}
if (colo_incoming_co() < 0) {
+ error_setg(&local_err, "colo incoming failed");
goto fail;
}
migration_bh_schedule(process_incoming_migration_bh, mis);
return;
fail:
+ migrate_set_error(migrate_get_current(), local_err);
+ error_report_err(local_err);
migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
MIGRATION_STATUS_FAILED);
- qemu_fclose(mis->from_src_file);
-
- multifd_recv_cleanup();
- compress_threads_load_cleanup();
-
- exit(EXIT_FAILURE);
+ migration_incoming_state_destroy();
}
/**
diff --git a/tests/qtest/migration-helpers.c b/tests/qtest/migration-helpers.c
index e451dbdbed..91c13bd566 100644
--- a/tests/qtest/migration-helpers.c
+++ b/tests/qtest/migration-helpers.c
@@ -211,7 +211,8 @@ void wait_for_migration_complete(QTestState *who)
wait_for_migration_status(who, "completed", NULL);
}
-void wait_for_migration_fail(QTestState *from, bool allow_active)
+void wait_for_migration_fail(QTestState *from, bool allow_active,
+ bool is_incoming)
{
g_test_timer_start();
QDict *rsp_return;
@@ -236,8 +237,14 @@ void wait_for_migration_fail(QTestState *from, bool allow_active)
/* Is the machine currently running? */
rsp_return = qtest_qmp_assert_success_ref(from,
"{ 'execute': 'query-status' }");
- g_assert(qdict_haskey(rsp_return, "running"));
- g_assert(qdict_get_bool(rsp_return, "running"));
+ if (is_incoming) {
+ if (qdict_haskey(rsp_return, "running")) {
+ g_assert(!qdict_get_bool(rsp_return, "running"));
+ }
+ } else {
+ g_assert(qdict_haskey(rsp_return, "running"));
+ g_assert(qdict_get_bool(rsp_return, "running"));
+ }
qobject_unref(rsp_return);
}
diff --git a/tests/qtest/migration-helpers.h b/tests/qtest/migration-helpers.h
index 3bf7ded1b9..7bd07059ae 100644
--- a/tests/qtest/migration-helpers.h
+++ b/tests/qtest/migration-helpers.h
@@ -46,7 +46,8 @@ void wait_for_migration_status(QTestState *who,
void wait_for_migration_complete(QTestState *who);
-void wait_for_migration_fail(QTestState *from, bool allow_active);
+void wait_for_migration_fail(QTestState *from, bool allow_active,
+ bool is_incoming);
char *find_common_machine_version(const char *mtype, const char *var1,
const char *var2);
diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c
index 1d2cee87ea..e00b755f05 100644
--- a/tests/qtest/migration-test.c
+++ b/tests/qtest/migration-test.c
@@ -1670,7 +1670,7 @@ static void test_baddest(void)
return;
}
migrate_qmp(from, "tcp:127.0.0.1:0", "{}");
- wait_for_migration_fail(from, false);
+ wait_for_migration_fail(from, false, false);
test_migrate_end(from, to, false);
}
@@ -1781,10 +1781,10 @@ static void test_precopy_common(MigrateCommon *args)
if (args->result != MIG_TEST_SUCCEED) {
bool allow_active = args->result == MIG_TEST_FAIL;
- wait_for_migration_fail(from, allow_active);
+ wait_for_migration_fail(from, allow_active, false);
if (args->result == MIG_TEST_FAIL_DEST_QUIT_ERR) {
- qtest_set_expected_status(to, EXIT_FAILURE);
+ wait_for_migration_fail(to, true, true);
}
} else {
if (args->live) {
@@ -2571,8 +2571,8 @@ static void do_test_validate_uuid(MigrateStart *args, bool should_fail)
migrate_qmp(from, uri, "{}");
if (should_fail) {
- qtest_set_expected_status(to, EXIT_FAILURE);
- wait_for_migration_fail(from, true);
+ wait_for_migration_fail(to, true, true);
+ wait_for_migration_fail(from, true, false);
} else {
wait_for_migration_complete(from);
}
@@ -3047,8 +3047,8 @@ static void test_multifd_tcp_cancel(void)
migrate_cancel(from);
/* Make sure QEMU process "to" exited */
- qtest_set_expected_status(to, EXIT_FAILURE);
- qtest_wait_qemu(to);
+ wait_for_migration_fail(to, true, true);
+ qtest_quit(to);
args = (MigrateStart){
.only_target = true,
--
2.34.1
^ permalink raw reply related [flat|nested] 9+ messages in thread
* Re: [PATCH] migration: do not exit on incoming failure
2024-04-17 22:13 [PATCH] migration: do not exit on incoming failure Vladimir Sementsov-Ogievskiy
@ 2024-04-18 14:27 ` Fabiano Rosas
2024-04-18 15:38 ` Vladimir Sementsov-Ogievskiy
2024-04-18 14:37 ` Daniel P. Berrangé
1 sibling, 1 reply; 9+ messages in thread
From: Fabiano Rosas @ 2024-04-18 14:27 UTC (permalink / raw)
To: Vladimir Sementsov-Ogievskiy, peterx
Cc: vsementsov, yc-core, thuth, lvivier, pbonzini, qemu-devel, pkrempa
Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru> writes:
> We do set MIGRATION_FAILED state, but don't give a chance to
> orchestrator to query migration state and get the error.
>
> Let's report an error through QAPI like we do on outgoing migration.
>
> migration-test is updated correspondingly.
>
> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
> ---
>
> Doubt: is exiting on failure a contract? Will this commit break
> something in Libvirt? Finally, could we just change the logic, or I need
> and additional migration-parameter for new behavior?
It seems we depend on the non-zero value:
4aead69241 ("migration: reflect incoming failure to shell")
Author: Eric Blake <eblake@redhat.com>
Date: Tue Apr 16 15:50:41 2013 -0600
migration: reflect incoming failure to shell
Management apps like libvirt don't know to pay attention to
stderr unless there is a non-zero exit status.
* migration.c (process_incoming_migration_co): Exit with non-zero
status on failure.
Signed-off-by: Eric Blake <eblake@redhat.com>
Message-id: 1366149041-626-1-git-send-email-eblake@redhat.com
Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
One idea would be to plumb the s->error somehow through
migration_shutdown() and allow qemu_cleanup() to change the status
value.
> migration/migration.c | 22 +++++++---------------
> tests/qtest/migration-helpers.c | 13 ++++++++++---
> tests/qtest/migration-helpers.h | 3 ++-
> tests/qtest/migration-test.c | 14 +++++++-------
> 4 files changed, 26 insertions(+), 26 deletions(-)
>
> diff --git a/migration/migration.c b/migration/migration.c
> index 86bf76e925..3c203e767d 100644
> --- a/migration/migration.c
> +++ b/migration/migration.c
> @@ -738,11 +738,12 @@ process_incoming_migration_co(void *opaque)
> MigrationIncomingState *mis = migration_incoming_get_current();
> PostcopyState ps;
> int ret;
> + Error *local_err = NULL;
>
> assert(mis->from_src_file);
>
> if (compress_threads_load_setup(mis->from_src_file)) {
> - error_report("Failed to setup decompress threads");
> + error_setg(&local_err, "Failed to setup decompress threads");
> goto fail;
> }
>
> @@ -779,32 +780,23 @@ process_incoming_migration_co(void *opaque)
> }
>
> if (ret < 0) {
> - MigrationState *s = migrate_get_current();
> -
> - if (migrate_has_error(s)) {
> - WITH_QEMU_LOCK_GUARD(&s->error_mutex) {
> - error_report_err(s->error);
> - }
> - }
> - error_report("load of migration failed: %s", strerror(-ret));
> + error_setg(&local_err, "load of migration failed: %s", strerror(-ret));
> goto fail;
> }
>
> if (colo_incoming_co() < 0) {
> + error_setg(&local_err, "colo incoming failed");
> goto fail;
> }
>
> migration_bh_schedule(process_incoming_migration_bh, mis);
> return;
> fail:
> + migrate_set_error(migrate_get_current(), local_err);
> + error_report_err(local_err);
This will report an different error from the QMP one if s->error happens
to be already set. Either use s->error here or prepend the "load of
migration..." error to the s->error above.
> migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
> MIGRATION_STATUS_FAILED);
> - qemu_fclose(mis->from_src_file);
> -
> - multifd_recv_cleanup();
> - compress_threads_load_cleanup();
> -
> - exit(EXIT_FAILURE);
> + migration_incoming_state_destroy();
> }
>
> /**
> diff --git a/tests/qtest/migration-helpers.c b/tests/qtest/migration-helpers.c
> index e451dbdbed..91c13bd566 100644
> --- a/tests/qtest/migration-helpers.c
> +++ b/tests/qtest/migration-helpers.c
> @@ -211,7 +211,8 @@ void wait_for_migration_complete(QTestState *who)
> wait_for_migration_status(who, "completed", NULL);
> }
>
> -void wait_for_migration_fail(QTestState *from, bool allow_active)
> +void wait_for_migration_fail(QTestState *from, bool allow_active,
> + bool is_incoming)
> {
> g_test_timer_start();
> QDict *rsp_return;
> @@ -236,8 +237,14 @@ void wait_for_migration_fail(QTestState *from, bool allow_active)
> /* Is the machine currently running? */
> rsp_return = qtest_qmp_assert_success_ref(from,
> "{ 'execute': 'query-status' }");
> - g_assert(qdict_haskey(rsp_return, "running"));
> - g_assert(qdict_get_bool(rsp_return, "running"));
> + if (is_incoming) {
> + if (qdict_haskey(rsp_return, "running")) {
> + g_assert(!qdict_get_bool(rsp_return, "running"));
> + }
> + } else {
> + g_assert(qdict_haskey(rsp_return, "running"));
> + g_assert(qdict_get_bool(rsp_return, "running"));
> + }
> qobject_unref(rsp_return);
> }
>
> diff --git a/tests/qtest/migration-helpers.h b/tests/qtest/migration-helpers.h
> index 3bf7ded1b9..7bd07059ae 100644
> --- a/tests/qtest/migration-helpers.h
> +++ b/tests/qtest/migration-helpers.h
> @@ -46,7 +46,8 @@ void wait_for_migration_status(QTestState *who,
>
> void wait_for_migration_complete(QTestState *who);
>
> -void wait_for_migration_fail(QTestState *from, bool allow_active);
> +void wait_for_migration_fail(QTestState *from, bool allow_active,
> + bool is_incoming);
>
> char *find_common_machine_version(const char *mtype, const char *var1,
> const char *var2);
> diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c
> index 1d2cee87ea..e00b755f05 100644
> --- a/tests/qtest/migration-test.c
> +++ b/tests/qtest/migration-test.c
> @@ -1670,7 +1670,7 @@ static void test_baddest(void)
> return;
> }
> migrate_qmp(from, "tcp:127.0.0.1:0", "{}");
> - wait_for_migration_fail(from, false);
> + wait_for_migration_fail(from, false, false);
> test_migrate_end(from, to, false);
> }
>
> @@ -1781,10 +1781,10 @@ static void test_precopy_common(MigrateCommon *args)
>
> if (args->result != MIG_TEST_SUCCEED) {
> bool allow_active = args->result == MIG_TEST_FAIL;
> - wait_for_migration_fail(from, allow_active);
> + wait_for_migration_fail(from, allow_active, false);
>
> if (args->result == MIG_TEST_FAIL_DEST_QUIT_ERR) {
> - qtest_set_expected_status(to, EXIT_FAILURE);
> + wait_for_migration_fail(to, true, true);
> }
> } else {
> if (args->live) {
> @@ -2571,8 +2571,8 @@ static void do_test_validate_uuid(MigrateStart *args, bool should_fail)
> migrate_qmp(from, uri, "{}");
>
> if (should_fail) {
> - qtest_set_expected_status(to, EXIT_FAILURE);
> - wait_for_migration_fail(from, true);
> + wait_for_migration_fail(to, true, true);
> + wait_for_migration_fail(from, true, false);
> } else {
> wait_for_migration_complete(from);
> }
> @@ -3047,8 +3047,8 @@ static void test_multifd_tcp_cancel(void)
> migrate_cancel(from);
>
> /* Make sure QEMU process "to" exited */
> - qtest_set_expected_status(to, EXIT_FAILURE);
> - qtest_wait_qemu(to);
> + wait_for_migration_fail(to, true, true);
> + qtest_quit(to);
>
> args = (MigrateStart){
> .only_target = true,
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] migration: do not exit on incoming failure
2024-04-17 22:13 [PATCH] migration: do not exit on incoming failure Vladimir Sementsov-Ogievskiy
2024-04-18 14:27 ` Fabiano Rosas
@ 2024-04-18 14:37 ` Daniel P. Berrangé
2024-04-18 15:40 ` Vladimir Sementsov-Ogievskiy
1 sibling, 1 reply; 9+ messages in thread
From: Daniel P. Berrangé @ 2024-04-18 14:37 UTC (permalink / raw)
To: Vladimir Sementsov-Ogievskiy
Cc: peterx, farosas, yc-core, thuth, lvivier, pbonzini, qemu-devel, pkrempa
On Thu, Apr 18, 2024 at 01:13:29AM +0300, Vladimir Sementsov-Ogievskiy wrote:
> We do set MIGRATION_FAILED state, but don't give a chance to
> orchestrator to query migration state and get the error.
>
> Let's report an error through QAPI like we do on outgoing migration.
>
> migration-test is updated correspondingly.
>
> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
> ---
>
> Doubt: is exiting on failure a contract? Will this commit break
> something in Libvirt? Finally, could we just change the logic, or I need
> and additional migration-parameter for new behavior?
There's a decent risk that this could break apps, whether
libvirt or something else, especially if the app is just
launching QEMU with '-incoming URI', rather than using
'-incoming defer' and then explicitly using QMP to start the
incoming migration.
I'd say that with '-incoming defer' we should *not* exit on
migration error, because that arg implies the app explicitly
wants to be using QMP to control migration.
With the legacy '-incoming URI' it is probably best to keep
exit on error, as that's comparatively more likely to be used
in adhoc scenarios where the app/user is ignoring QMP on the
dst side.
None the less, I think we need to check how libvirt behaves
with this patch to be sure of no surprises.
>
> migration/migration.c | 22 +++++++---------------
> tests/qtest/migration-helpers.c | 13 ++++++++++---
> tests/qtest/migration-helpers.h | 3 ++-
> tests/qtest/migration-test.c | 14 +++++++-------
> 4 files changed, 26 insertions(+), 26 deletions(-)
>
> diff --git a/migration/migration.c b/migration/migration.c
> index 86bf76e925..3c203e767d 100644
> --- a/migration/migration.c
> +++ b/migration/migration.c
> @@ -738,11 +738,12 @@ process_incoming_migration_co(void *opaque)
> MigrationIncomingState *mis = migration_incoming_get_current();
> PostcopyState ps;
> int ret;
> + Error *local_err = NULL;
>
> assert(mis->from_src_file);
>
> if (compress_threads_load_setup(mis->from_src_file)) {
> - error_report("Failed to setup decompress threads");
> + error_setg(&local_err, "Failed to setup decompress threads");
> goto fail;
> }
>
> @@ -779,32 +780,23 @@ process_incoming_migration_co(void *opaque)
> }
>
> if (ret < 0) {
> - MigrationState *s = migrate_get_current();
> -
> - if (migrate_has_error(s)) {
> - WITH_QEMU_LOCK_GUARD(&s->error_mutex) {
> - error_report_err(s->error);
> - }
> - }
> - error_report("load of migration failed: %s", strerror(-ret));
> + error_setg(&local_err, "load of migration failed: %s", strerror(-ret));
> goto fail;
> }
>
> if (colo_incoming_co() < 0) {
> + error_setg(&local_err, "colo incoming failed");
> goto fail;
> }
>
> migration_bh_schedule(process_incoming_migration_bh, mis);
> return;
> fail:
> + migrate_set_error(migrate_get_current(), local_err);
> + error_report_err(local_err);
> migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
> MIGRATION_STATUS_FAILED);
> - qemu_fclose(mis->from_src_file);
> -
> - multifd_recv_cleanup();
> - compress_threads_load_cleanup();
> -
> - exit(EXIT_FAILURE);
> + migration_incoming_state_destroy();
> }
>
> /**
> diff --git a/tests/qtest/migration-helpers.c b/tests/qtest/migration-helpers.c
> index e451dbdbed..91c13bd566 100644
> --- a/tests/qtest/migration-helpers.c
> +++ b/tests/qtest/migration-helpers.c
> @@ -211,7 +211,8 @@ void wait_for_migration_complete(QTestState *who)
> wait_for_migration_status(who, "completed", NULL);
> }
>
> -void wait_for_migration_fail(QTestState *from, bool allow_active)
> +void wait_for_migration_fail(QTestState *from, bool allow_active,
> + bool is_incoming)
> {
> g_test_timer_start();
> QDict *rsp_return;
> @@ -236,8 +237,14 @@ void wait_for_migration_fail(QTestState *from, bool allow_active)
> /* Is the machine currently running? */
> rsp_return = qtest_qmp_assert_success_ref(from,
> "{ 'execute': 'query-status' }");
> - g_assert(qdict_haskey(rsp_return, "running"));
> - g_assert(qdict_get_bool(rsp_return, "running"));
> + if (is_incoming) {
> + if (qdict_haskey(rsp_return, "running")) {
> + g_assert(!qdict_get_bool(rsp_return, "running"));
> + }
> + } else {
> + g_assert(qdict_haskey(rsp_return, "running"));
> + g_assert(qdict_get_bool(rsp_return, "running"));
> + }
> qobject_unref(rsp_return);
> }
>
> diff --git a/tests/qtest/migration-helpers.h b/tests/qtest/migration-helpers.h
> index 3bf7ded1b9..7bd07059ae 100644
> --- a/tests/qtest/migration-helpers.h
> +++ b/tests/qtest/migration-helpers.h
> @@ -46,7 +46,8 @@ void wait_for_migration_status(QTestState *who,
>
> void wait_for_migration_complete(QTestState *who);
>
> -void wait_for_migration_fail(QTestState *from, bool allow_active);
> +void wait_for_migration_fail(QTestState *from, bool allow_active,
> + bool is_incoming);
>
> char *find_common_machine_version(const char *mtype, const char *var1,
> const char *var2);
> diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c
> index 1d2cee87ea..e00b755f05 100644
> --- a/tests/qtest/migration-test.c
> +++ b/tests/qtest/migration-test.c
> @@ -1670,7 +1670,7 @@ static void test_baddest(void)
> return;
> }
> migrate_qmp(from, "tcp:127.0.0.1:0", "{}");
> - wait_for_migration_fail(from, false);
> + wait_for_migration_fail(from, false, false);
> test_migrate_end(from, to, false);
> }
>
> @@ -1781,10 +1781,10 @@ static void test_precopy_common(MigrateCommon *args)
>
> if (args->result != MIG_TEST_SUCCEED) {
> bool allow_active = args->result == MIG_TEST_FAIL;
> - wait_for_migration_fail(from, allow_active);
> + wait_for_migration_fail(from, allow_active, false);
>
> if (args->result == MIG_TEST_FAIL_DEST_QUIT_ERR) {
> - qtest_set_expected_status(to, EXIT_FAILURE);
> + wait_for_migration_fail(to, true, true);
> }
> } else {
> if (args->live) {
> @@ -2571,8 +2571,8 @@ static void do_test_validate_uuid(MigrateStart *args, bool should_fail)
> migrate_qmp(from, uri, "{}");
>
> if (should_fail) {
> - qtest_set_expected_status(to, EXIT_FAILURE);
> - wait_for_migration_fail(from, true);
> + wait_for_migration_fail(to, true, true);
> + wait_for_migration_fail(from, true, false);
> } else {
> wait_for_migration_complete(from);
> }
> @@ -3047,8 +3047,8 @@ static void test_multifd_tcp_cancel(void)
> migrate_cancel(from);
>
> /* Make sure QEMU process "to" exited */
> - qtest_set_expected_status(to, EXIT_FAILURE);
> - qtest_wait_qemu(to);
> + wait_for_migration_fail(to, true, true);
> + qtest_quit(to);
>
> args = (MigrateStart){
> .only_target = true,
> --
> 2.34.1
>
>
With regards,
Daniel
--
|: https://berrange.com -o- https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o- https://fstop138.berrange.com :|
|: https://entangle-photo.org -o- https://www.instagram.com/dberrange :|
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] migration: do not exit on incoming failure
2024-04-18 14:27 ` Fabiano Rosas
@ 2024-04-18 15:38 ` Vladimir Sementsov-Ogievskiy
0 siblings, 0 replies; 9+ messages in thread
From: Vladimir Sementsov-Ogievskiy @ 2024-04-18 15:38 UTC (permalink / raw)
To: Fabiano Rosas, peterx
Cc: yc-core, thuth, lvivier, pbonzini, qemu-devel, pkrempa
On 18.04.24 17:27, Fabiano Rosas wrote:
> Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru> writes:
>
>> We do set MIGRATION_FAILED state, but don't give a chance to
>> orchestrator to query migration state and get the error.
>>
>> Let's report an error through QAPI like we do on outgoing migration.
>>
>> migration-test is updated correspondingly.
>>
>> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
>> ---
>>
>> Doubt: is exiting on failure a contract? Will this commit break
>> something in Libvirt? Finally, could we just change the logic, or I need
>> and additional migration-parameter for new behavior?
>
> It seems we depend on the non-zero value:
>
> 4aead69241 ("migration: reflect incoming failure to shell")
> Author: Eric Blake <eblake@redhat.com>
> Date: Tue Apr 16 15:50:41 2013 -0600
>
> migration: reflect incoming failure to shell
>
> Management apps like libvirt don't know to pay attention to
> stderr unless there is a non-zero exit status.
>
> * migration.c (process_incoming_migration_co): Exit with non-zero
> status on failure.
>
> Signed-off-by: Eric Blake <eblake@redhat.com>
> Message-id: 1366149041-626-1-git-send-email-eblake@redhat.com
> Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
>
> One idea would be to plumb the s->error somehow through
> migration_shutdown() and allow qemu_cleanup() to change the status
> value.
The idea is not to exit at all, and wait for 'quit' QMP command to exit. But I agree with Daniel that new behavior is good only for -incoming defer.
>
>> migration/migration.c | 22 +++++++---------------
>> tests/qtest/migration-helpers.c | 13 ++++++++++---
>> tests/qtest/migration-helpers.h | 3 ++-
>> tests/qtest/migration-test.c | 14 +++++++-------
>> 4 files changed, 26 insertions(+), 26 deletions(-)
>>
>> diff --git a/migration/migration.c b/migration/migration.c
>> index 86bf76e925..3c203e767d 100644
>> --- a/migration/migration.c
>> +++ b/migration/migration.c
>> @@ -738,11 +738,12 @@ process_incoming_migration_co(void *opaque)
>> MigrationIncomingState *mis = migration_incoming_get_current();
>> PostcopyState ps;
>> int ret;
>> + Error *local_err = NULL;
>>
>> assert(mis->from_src_file);
>>
>> if (compress_threads_load_setup(mis->from_src_file)) {
>> - error_report("Failed to setup decompress threads");
>> + error_setg(&local_err, "Failed to setup decompress threads");
>> goto fail;
>> }
>>
>> @@ -779,32 +780,23 @@ process_incoming_migration_co(void *opaque)
>> }
>>
>> if (ret < 0) {
>> - MigrationState *s = migrate_get_current();
>> -
>> - if (migrate_has_error(s)) {
>> - WITH_QEMU_LOCK_GUARD(&s->error_mutex) {
>> - error_report_err(s->error);
>> - }
>> - }
>> - error_report("load of migration failed: %s", strerror(-ret));
>> + error_setg(&local_err, "load of migration failed: %s", strerror(-ret));
>> goto fail;
>> }
>>
>> if (colo_incoming_co() < 0) {
>> + error_setg(&local_err, "colo incoming failed");
>> goto fail;
>> }
>>
>> migration_bh_schedule(process_incoming_migration_bh, mis);
>> return;
>> fail:
>> + migrate_set_error(migrate_get_current(), local_err);
>> + error_report_err(local_err);
>
> This will report an different error from the QMP one if s->error happens
> to be already set. Either use s->error here or prepend the "load of
> migration..." error to the s->error above.
I had another idea: first, modify migrate_set_error so that it always prints the error. This way we should not care to print it here.
>
>> migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
>> MIGRATION_STATUS_FAILED);
>> - qemu_fclose(mis->from_src_file);
>> -
>> - multifd_recv_cleanup();
>> - compress_threads_load_cleanup();
>> -
>> - exit(EXIT_FAILURE);
>> + migration_incoming_state_destroy();
>> }
>>
>> /**
>> diff --git a/tests/qtest/migration-helpers.c b/tests/qtest/migration-helpers.c
>> index e451dbdbed..91c13bd566 100644
>> --- a/tests/qtest/migration-helpers.c
>> +++ b/tests/qtest/migration-helpers.c
>> @@ -211,7 +211,8 @@ void wait_for_migration_complete(QTestState *who)
>> wait_for_migration_status(who, "completed", NULL);
>> }
>>
>> -void wait_for_migration_fail(QTestState *from, bool allow_active)
>> +void wait_for_migration_fail(QTestState *from, bool allow_active,
>> + bool is_incoming)
>> {
>> g_test_timer_start();
>> QDict *rsp_return;
>> @@ -236,8 +237,14 @@ void wait_for_migration_fail(QTestState *from, bool allow_active)
>> /* Is the machine currently running? */
>> rsp_return = qtest_qmp_assert_success_ref(from,
>> "{ 'execute': 'query-status' }");
>> - g_assert(qdict_haskey(rsp_return, "running"));
>> - g_assert(qdict_get_bool(rsp_return, "running"));
>> + if (is_incoming) {
>> + if (qdict_haskey(rsp_return, "running")) {
>> + g_assert(!qdict_get_bool(rsp_return, "running"));
>> + }
>> + } else {
>> + g_assert(qdict_haskey(rsp_return, "running"));
>> + g_assert(qdict_get_bool(rsp_return, "running"));
>> + }
>> qobject_unref(rsp_return);
>> }
>>
>> diff --git a/tests/qtest/migration-helpers.h b/tests/qtest/migration-helpers.h
>> index 3bf7ded1b9..7bd07059ae 100644
>> --- a/tests/qtest/migration-helpers.h
>> +++ b/tests/qtest/migration-helpers.h
>> @@ -46,7 +46,8 @@ void wait_for_migration_status(QTestState *who,
>>
>> void wait_for_migration_complete(QTestState *who);
>>
>> -void wait_for_migration_fail(QTestState *from, bool allow_active);
>> +void wait_for_migration_fail(QTestState *from, bool allow_active,
>> + bool is_incoming);
>>
>> char *find_common_machine_version(const char *mtype, const char *var1,
>> const char *var2);
>> diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c
>> index 1d2cee87ea..e00b755f05 100644
>> --- a/tests/qtest/migration-test.c
>> +++ b/tests/qtest/migration-test.c
>> @@ -1670,7 +1670,7 @@ static void test_baddest(void)
>> return;
>> }
>> migrate_qmp(from, "tcp:127.0.0.1:0", "{}");
>> - wait_for_migration_fail(from, false);
>> + wait_for_migration_fail(from, false, false);
>> test_migrate_end(from, to, false);
>> }
>>
>> @@ -1781,10 +1781,10 @@ static void test_precopy_common(MigrateCommon *args)
>>
>> if (args->result != MIG_TEST_SUCCEED) {
>> bool allow_active = args->result == MIG_TEST_FAIL;
>> - wait_for_migration_fail(from, allow_active);
>> + wait_for_migration_fail(from, allow_active, false);
>>
>> if (args->result == MIG_TEST_FAIL_DEST_QUIT_ERR) {
>> - qtest_set_expected_status(to, EXIT_FAILURE);
>> + wait_for_migration_fail(to, true, true);
>> }
>> } else {
>> if (args->live) {
>> @@ -2571,8 +2571,8 @@ static void do_test_validate_uuid(MigrateStart *args, bool should_fail)
>> migrate_qmp(from, uri, "{}");
>>
>> if (should_fail) {
>> - qtest_set_expected_status(to, EXIT_FAILURE);
>> - wait_for_migration_fail(from, true);
>> + wait_for_migration_fail(to, true, true);
>> + wait_for_migration_fail(from, true, false);
>> } else {
>> wait_for_migration_complete(from);
>> }
>> @@ -3047,8 +3047,8 @@ static void test_multifd_tcp_cancel(void)
>> migrate_cancel(from);
>>
>> /* Make sure QEMU process "to" exited */
>> - qtest_set_expected_status(to, EXIT_FAILURE);
>> - qtest_wait_qemu(to);
>> + wait_for_migration_fail(to, true, true);
>> + qtest_quit(to);
>>
>> args = (MigrateStart){
>> .only_target = true,
--
Best regards,
Vladimir
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] migration: do not exit on incoming failure
2024-04-18 14:37 ` Daniel P. Berrangé
@ 2024-04-18 15:40 ` Vladimir Sementsov-Ogievskiy
2024-04-18 15:43 ` Daniel P. Berrangé
0 siblings, 1 reply; 9+ messages in thread
From: Vladimir Sementsov-Ogievskiy @ 2024-04-18 15:40 UTC (permalink / raw)
To: Daniel P. Berrangé
Cc: peterx, farosas, yc-core, thuth, lvivier, pbonzini, qemu-devel, pkrempa
On 18.04.24 17:37, Daniel P. Berrangé wrote:
> On Thu, Apr 18, 2024 at 01:13:29AM +0300, Vladimir Sementsov-Ogievskiy wrote:
>> We do set MIGRATION_FAILED state, but don't give a chance to
>> orchestrator to query migration state and get the error.
>>
>> Let's report an error through QAPI like we do on outgoing migration.
>>
>> migration-test is updated correspondingly.
>>
>> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
>> ---
>>
>> Doubt: is exiting on failure a contract? Will this commit break
>> something in Libvirt? Finally, could we just change the logic, or I need
>> and additional migration-parameter for new behavior?
>
> There's a decent risk that this could break apps, whether
> libvirt or something else, especially if the app is just
> launching QEMU with '-incoming URI', rather than using
> '-incoming defer' and then explicitly using QMP to start the
> incoming migration.
>
> I'd say that with '-incoming defer' we should *not* exit on
> migration error, because that arg implies the app explicitly
> wants to be using QMP to control migration.
>
> With the legacy '-incoming URI' it is probably best to keep
> exit on error, as that's comparatively more likely to be used
> in adhoc scenarios where the app/user is ignoring QMP on the
> dst side.
>
> None the less, I think we need to check how libvirt behaves
> with this patch to be sure of no surprises.
>
Sounds reasonable, thanks! I'll rework it to behave the new way only with "-incoming defer", and check how libvirt behave with it.
>>
>> migration/migration.c | 22 +++++++---------------
>> tests/qtest/migration-helpers.c | 13 ++++++++++---
>> tests/qtest/migration-helpers.h | 3 ++-
>> tests/qtest/migration-test.c | 14 +++++++-------
>> 4 files changed, 26 insertions(+), 26 deletions(-)
>>
>> diff --git a/migration/migration.c b/migration/migration.c
>> index 86bf76e925..3c203e767d 100644
>> --- a/migration/migration.c
>> +++ b/migration/migration.c
>> @@ -738,11 +738,12 @@ process_incoming_migration_co(void *opaque)
>> MigrationIncomingState *mis = migration_incoming_get_current();
>> PostcopyState ps;
>> int ret;
>> + Error *local_err = NULL;
>>
>> assert(mis->from_src_file);
>>
>> if (compress_threads_load_setup(mis->from_src_file)) {
>> - error_report("Failed to setup decompress threads");
>> + error_setg(&local_err, "Failed to setup decompress threads");
>> goto fail;
>> }
>>
>> @@ -779,32 +780,23 @@ process_incoming_migration_co(void *opaque)
>> }
>>
>> if (ret < 0) {
>> - MigrationState *s = migrate_get_current();
>> -
>> - if (migrate_has_error(s)) {
>> - WITH_QEMU_LOCK_GUARD(&s->error_mutex) {
>> - error_report_err(s->error);
>> - }
>> - }
>> - error_report("load of migration failed: %s", strerror(-ret));
>> + error_setg(&local_err, "load of migration failed: %s", strerror(-ret));
>> goto fail;
>> }
>>
>> if (colo_incoming_co() < 0) {
>> + error_setg(&local_err, "colo incoming failed");
>> goto fail;
>> }
>>
>> migration_bh_schedule(process_incoming_migration_bh, mis);
>> return;
>> fail:
>> + migrate_set_error(migrate_get_current(), local_err);
>> + error_report_err(local_err);
>> migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
>> MIGRATION_STATUS_FAILED);
>> - qemu_fclose(mis->from_src_file);
>> -
>> - multifd_recv_cleanup();
>> - compress_threads_load_cleanup();
>> -
>> - exit(EXIT_FAILURE);
>> + migration_incoming_state_destroy();
>> }
>>
>> /**
>> diff --git a/tests/qtest/migration-helpers.c b/tests/qtest/migration-helpers.c
>> index e451dbdbed..91c13bd566 100644
>> --- a/tests/qtest/migration-helpers.c
>> +++ b/tests/qtest/migration-helpers.c
>> @@ -211,7 +211,8 @@ void wait_for_migration_complete(QTestState *who)
>> wait_for_migration_status(who, "completed", NULL);
>> }
>>
>> -void wait_for_migration_fail(QTestState *from, bool allow_active)
>> +void wait_for_migration_fail(QTestState *from, bool allow_active,
>> + bool is_incoming)
>> {
>> g_test_timer_start();
>> QDict *rsp_return;
>> @@ -236,8 +237,14 @@ void wait_for_migration_fail(QTestState *from, bool allow_active)
>> /* Is the machine currently running? */
>> rsp_return = qtest_qmp_assert_success_ref(from,
>> "{ 'execute': 'query-status' }");
>> - g_assert(qdict_haskey(rsp_return, "running"));
>> - g_assert(qdict_get_bool(rsp_return, "running"));
>> + if (is_incoming) {
>> + if (qdict_haskey(rsp_return, "running")) {
>> + g_assert(!qdict_get_bool(rsp_return, "running"));
>> + }
>> + } else {
>> + g_assert(qdict_haskey(rsp_return, "running"));
>> + g_assert(qdict_get_bool(rsp_return, "running"));
>> + }
>> qobject_unref(rsp_return);
>> }
>>
>> diff --git a/tests/qtest/migration-helpers.h b/tests/qtest/migration-helpers.h
>> index 3bf7ded1b9..7bd07059ae 100644
>> --- a/tests/qtest/migration-helpers.h
>> +++ b/tests/qtest/migration-helpers.h
>> @@ -46,7 +46,8 @@ void wait_for_migration_status(QTestState *who,
>>
>> void wait_for_migration_complete(QTestState *who);
>>
>> -void wait_for_migration_fail(QTestState *from, bool allow_active);
>> +void wait_for_migration_fail(QTestState *from, bool allow_active,
>> + bool is_incoming);
>>
>> char *find_common_machine_version(const char *mtype, const char *var1,
>> const char *var2);
>> diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c
>> index 1d2cee87ea..e00b755f05 100644
>> --- a/tests/qtest/migration-test.c
>> +++ b/tests/qtest/migration-test.c
>> @@ -1670,7 +1670,7 @@ static void test_baddest(void)
>> return;
>> }
>> migrate_qmp(from, "tcp:127.0.0.1:0", "{}");
>> - wait_for_migration_fail(from, false);
>> + wait_for_migration_fail(from, false, false);
>> test_migrate_end(from, to, false);
>> }
>>
>> @@ -1781,10 +1781,10 @@ static void test_precopy_common(MigrateCommon *args)
>>
>> if (args->result != MIG_TEST_SUCCEED) {
>> bool allow_active = args->result == MIG_TEST_FAIL;
>> - wait_for_migration_fail(from, allow_active);
>> + wait_for_migration_fail(from, allow_active, false);
>>
>> if (args->result == MIG_TEST_FAIL_DEST_QUIT_ERR) {
>> - qtest_set_expected_status(to, EXIT_FAILURE);
>> + wait_for_migration_fail(to, true, true);
>> }
>> } else {
>> if (args->live) {
>> @@ -2571,8 +2571,8 @@ static void do_test_validate_uuid(MigrateStart *args, bool should_fail)
>> migrate_qmp(from, uri, "{}");
>>
>> if (should_fail) {
>> - qtest_set_expected_status(to, EXIT_FAILURE);
>> - wait_for_migration_fail(from, true);
>> + wait_for_migration_fail(to, true, true);
>> + wait_for_migration_fail(from, true, false);
>> } else {
>> wait_for_migration_complete(from);
>> }
>> @@ -3047,8 +3047,8 @@ static void test_multifd_tcp_cancel(void)
>> migrate_cancel(from);
>>
>> /* Make sure QEMU process "to" exited */
>> - qtest_set_expected_status(to, EXIT_FAILURE);
>> - qtest_wait_qemu(to);
>> + wait_for_migration_fail(to, true, true);
>> + qtest_quit(to);
>>
>> args = (MigrateStart){
>> .only_target = true,
>> --
>> 2.34.1
>>
>>
>
> With regards,
> Daniel
--
Best regards,
Vladimir
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] migration: do not exit on incoming failure
2024-04-18 15:40 ` Vladimir Sementsov-Ogievskiy
@ 2024-04-18 15:43 ` Daniel P. Berrangé
2024-04-18 15:47 ` Vladimir Sementsov-Ogievskiy
0 siblings, 1 reply; 9+ messages in thread
From: Daniel P. Berrangé @ 2024-04-18 15:43 UTC (permalink / raw)
To: Vladimir Sementsov-Ogievskiy
Cc: peterx, farosas, yc-core, thuth, lvivier, pbonzini, qemu-devel, pkrempa
On Thu, Apr 18, 2024 at 06:40:38PM +0300, Vladimir Sementsov-Ogievskiy wrote:
> On 18.04.24 17:37, Daniel P. Berrangé wrote:
> > On Thu, Apr 18, 2024 at 01:13:29AM +0300, Vladimir Sementsov-Ogievskiy wrote:
> > > We do set MIGRATION_FAILED state, but don't give a chance to
> > > orchestrator to query migration state and get the error.
> > >
> > > Let's report an error through QAPI like we do on outgoing migration.
> > >
> > > migration-test is updated correspondingly.
> > >
> > > Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
> > > ---
> > >
> > > Doubt: is exiting on failure a contract? Will this commit break
> > > something in Libvirt? Finally, could we just change the logic, or I need
> > > and additional migration-parameter for new behavior?
> >
> > There's a decent risk that this could break apps, whether
> > libvirt or something else, especially if the app is just
> > launching QEMU with '-incoming URI', rather than using
> > '-incoming defer' and then explicitly using QMP to start the
> > incoming migration.
> >
> > I'd say that with '-incoming defer' we should *not* exit on
> > migration error, because that arg implies the app explicitly
> > wants to be using QMP to control migration.
> >
> > With the legacy '-incoming URI' it is probably best to keep
> > exit on error, as that's comparatively more likely to be used
> > in adhoc scenarios where the app/user is ignoring QMP on the
> > dst side.
> >
> > None the less, I think we need to check how libvirt behaves
> > with this patch to be sure of no surprises.
> >
>
> Sounds reasonable, thanks! I'll rework it to behave the new
> way only with "-incoming defer", and check how libvirt behave with it.
If there are problems and/or we want to be super safe wrt
backcompat, we could add a new '-incoming managed' as
being equivalent to '-incoming defer' but without the
implicit exit.
With regards,
Daniel
--
|: https://berrange.com -o- https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o- https://fstop138.berrange.com :|
|: https://entangle-photo.org -o- https://www.instagram.com/dberrange :|
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] migration: do not exit on incoming failure
2024-04-18 15:43 ` Daniel P. Berrangé
@ 2024-04-18 15:47 ` Vladimir Sementsov-Ogievskiy
2024-04-18 16:43 ` Peter Xu
0 siblings, 1 reply; 9+ messages in thread
From: Vladimir Sementsov-Ogievskiy @ 2024-04-18 15:47 UTC (permalink / raw)
To: Daniel P. Berrangé
Cc: peterx, farosas, yc-core, thuth, lvivier, pbonzini, qemu-devel, pkrempa
On 18.04.24 18:43, Daniel P. Berrangé wrote:
> On Thu, Apr 18, 2024 at 06:40:38PM +0300, Vladimir Sementsov-Ogievskiy wrote:
>> On 18.04.24 17:37, Daniel P. Berrangé wrote:
>>> On Thu, Apr 18, 2024 at 01:13:29AM +0300, Vladimir Sementsov-Ogievskiy wrote:
>>>> We do set MIGRATION_FAILED state, but don't give a chance to
>>>> orchestrator to query migration state and get the error.
>>>>
>>>> Let's report an error through QAPI like we do on outgoing migration.
>>>>
>>>> migration-test is updated correspondingly.
>>>>
>>>> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
>>>> ---
>>>>
>>>> Doubt: is exiting on failure a contract? Will this commit break
>>>> something in Libvirt? Finally, could we just change the logic, or I need
>>>> and additional migration-parameter for new behavior?
>>>
>>> There's a decent risk that this could break apps, whether
>>> libvirt or something else, especially if the app is just
>>> launching QEMU with '-incoming URI', rather than using
>>> '-incoming defer' and then explicitly using QMP to start the
>>> incoming migration.
>>>
>>> I'd say that with '-incoming defer' we should *not* exit on
>>> migration error, because that arg implies the app explicitly
>>> wants to be using QMP to control migration.
>>>
>>> With the legacy '-incoming URI' it is probably best to keep
>>> exit on error, as that's comparatively more likely to be used
>>> in adhoc scenarios where the app/user is ignoring QMP on the
>>> dst side.
>>>
>>> None the less, I think we need to check how libvirt behaves
>>> with this patch to be sure of no surprises.
>>>
>>
>> Sounds reasonable, thanks! I'll rework it to behave the new
>> way only with "-incoming defer", and check how libvirt behave with it.
>
> If there are problems and/or we want to be super safe wrt
> backcompat, we could add a new '-incoming managed' as
> being equivalent to '-incoming defer' but without the
> implicit exit.
>
Probably, that's the best variant. As I can check libvirt in some case, but not at all cases. And libvirt is not the only vm manager finally.
And we can in the same time deprecate "-incoming defer" in favor of new behavior.
--
Best regards,
Vladimir
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] migration: do not exit on incoming failure
2024-04-18 15:47 ` Vladimir Sementsov-Ogievskiy
@ 2024-04-18 16:43 ` Peter Xu
2024-04-18 16:57 ` Daniel P. Berrangé
0 siblings, 1 reply; 9+ messages in thread
From: Peter Xu @ 2024-04-18 16:43 UTC (permalink / raw)
To: Vladimir Sementsov-Ogievskiy
Cc: Daniel P. Berrangé,
farosas, yc-core, thuth, lvivier, pbonzini, qemu-devel, pkrempa
On Thu, Apr 18, 2024 at 06:47:31PM +0300, Vladimir Sementsov-Ogievskiy wrote:
> On 18.04.24 18:43, Daniel P. Berrangé wrote:
> > On Thu, Apr 18, 2024 at 06:40:38PM +0300, Vladimir Sementsov-Ogievskiy wrote:
> > > On 18.04.24 17:37, Daniel P. Berrangé wrote:
> > > > On Thu, Apr 18, 2024 at 01:13:29AM +0300, Vladimir Sementsov-Ogievskiy wrote:
> > > > > We do set MIGRATION_FAILED state, but don't give a chance to
> > > > > orchestrator to query migration state and get the error.
> > > > >
> > > > > Let's report an error through QAPI like we do on outgoing migration.
> > > > >
> > > > > migration-test is updated correspondingly.
> > > > >
> > > > > Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
> > > > > ---
> > > > >
> > > > > Doubt: is exiting on failure a contract? Will this commit break
> > > > > something in Libvirt? Finally, could we just change the logic, or I need
> > > > > and additional migration-parameter for new behavior?
> > > >
> > > > There's a decent risk that this could break apps, whether
> > > > libvirt or something else, especially if the app is just
> > > > launching QEMU with '-incoming URI', rather than using
> > > > '-incoming defer' and then explicitly using QMP to start the
> > > > incoming migration.
> > > >
> > > > I'd say that with '-incoming defer' we should *not* exit on
> > > > migration error, because that arg implies the app explicitly
> > > > wants to be using QMP to control migration.
> > > >
> > > > With the legacy '-incoming URI' it is probably best to keep
> > > > exit on error, as that's comparatively more likely to be used
> > > > in adhoc scenarios where the app/user is ignoring QMP on the
> > > > dst side.
> > > >
> > > > None the less, I think we need to check how libvirt behaves
> > > > with this patch to be sure of no surprises.
> > > >
> > >
> > > Sounds reasonable, thanks! I'll rework it to behave the new
> > > way only with "-incoming defer", and check how libvirt behave with it.
> >
> > If there are problems and/or we want to be super safe wrt
> > backcompat, we could add a new '-incoming managed' as
> > being equivalent to '-incoming defer' but without the
> > implicit exit.
> >
>
> Probably, that's the best variant. As I can check libvirt in some case, but not at all cases. And libvirt is not the only vm manager finally.
> And we can in the same time deprecate "-incoming defer" in favor of new behavior.
Or just make it a new migration parameter? Then we keep all existing
interfaces untouched, no risk of breaking anyone, and then it'll also apply
to anyone who uses things like -incoming tcp but still wants to keep the
qemu instance alive?
Thanks,
--
Peter Xu
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] migration: do not exit on incoming failure
2024-04-18 16:43 ` Peter Xu
@ 2024-04-18 16:57 ` Daniel P. Berrangé
0 siblings, 0 replies; 9+ messages in thread
From: Daniel P. Berrangé @ 2024-04-18 16:57 UTC (permalink / raw)
To: Peter Xu
Cc: Vladimir Sementsov-Ogievskiy, farosas, yc-core, thuth, lvivier,
pbonzini, qemu-devel, pkrempa
On Thu, Apr 18, 2024 at 12:43:42PM -0400, Peter Xu wrote:
> On Thu, Apr 18, 2024 at 06:47:31PM +0300, Vladimir Sementsov-Ogievskiy wrote:
> > On 18.04.24 18:43, Daniel P. Berrangé wrote:
> > > On Thu, Apr 18, 2024 at 06:40:38PM +0300, Vladimir Sementsov-Ogievskiy wrote:
> > > > On 18.04.24 17:37, Daniel P. Berrangé wrote:
> > > > > On Thu, Apr 18, 2024 at 01:13:29AM +0300, Vladimir Sementsov-Ogievskiy wrote:
> > > > > > We do set MIGRATION_FAILED state, but don't give a chance to
> > > > > > orchestrator to query migration state and get the error.
> > > > > >
> > > > > > Let's report an error through QAPI like we do on outgoing migration.
> > > > > >
> > > > > > migration-test is updated correspondingly.
> > > > > >
> > > > > > Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
> > > > > > ---
> > > > > >
> > > > > > Doubt: is exiting on failure a contract? Will this commit break
> > > > > > something in Libvirt? Finally, could we just change the logic, or I need
> > > > > > and additional migration-parameter for new behavior?
> > > > >
> > > > > There's a decent risk that this could break apps, whether
> > > > > libvirt or something else, especially if the app is just
> > > > > launching QEMU with '-incoming URI', rather than using
> > > > > '-incoming defer' and then explicitly using QMP to start the
> > > > > incoming migration.
> > > > >
> > > > > I'd say that with '-incoming defer' we should *not* exit on
> > > > > migration error, because that arg implies the app explicitly
> > > > > wants to be using QMP to control migration.
> > > > >
> > > > > With the legacy '-incoming URI' it is probably best to keep
> > > > > exit on error, as that's comparatively more likely to be used
> > > > > in adhoc scenarios where the app/user is ignoring QMP on the
> > > > > dst side.
> > > > >
> > > > > None the less, I think we need to check how libvirt behaves
> > > > > with this patch to be sure of no surprises.
> > > > >
> > > >
> > > > Sounds reasonable, thanks! I'll rework it to behave the new
> > > > way only with "-incoming defer", and check how libvirt behave with it.
> > >
> > > If there are problems and/or we want to be super safe wrt
> > > backcompat, we could add a new '-incoming managed' as
> > > being equivalent to '-incoming defer' but without the
> > > implicit exit.
> > >
> >
> > Probably, that's the best variant. As I can check libvirt in some case, but not at all cases. And libvirt is not the only vm manager finally.
> > And we can in the same time deprecate "-incoming defer" in favor of new behavior.
>
> Or just make it a new migration parameter? Then we keep all existing
> interfaces untouched, no risk of breaking anyone, and then it'll also apply
> to anyone who uses things like -incoming tcp but still wants to keep the
> qemu instance alive?
True, or even more simply, an argument to the 'migrate-incoming' command
diff --git a/qapi/migration.json b/qapi/migration.json
index 8c65b90328..6882aef328 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -1889,7 +1889,8 @@
##
{ 'command': 'migrate-incoming',
'data': {'*uri': 'str',
- '*channels': [ 'MigrationChannel' ] } }
+ '*channels': [ 'MigrationChannel' ],
+ '*exit-on-error': 'bool' } }
##
# @xen-save-devices-state:
With regards,
Daniel
--
|: https://berrange.com -o- https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o- https://fstop138.berrange.com :|
|: https://entangle-photo.org -o- https://www.instagram.com/dberrange :|
^ permalink raw reply related [flat|nested] 9+ messages in thread
end of thread, other threads:[~2024-04-18 16:58 UTC | newest]
Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-04-17 22:13 [PATCH] migration: do not exit on incoming failure Vladimir Sementsov-Ogievskiy
2024-04-18 14:27 ` Fabiano Rosas
2024-04-18 15:38 ` Vladimir Sementsov-Ogievskiy
2024-04-18 14:37 ` Daniel P. Berrangé
2024-04-18 15:40 ` Vladimir Sementsov-Ogievskiy
2024-04-18 15:43 ` Daniel P. Berrangé
2024-04-18 15:47 ` Vladimir Sementsov-Ogievskiy
2024-04-18 16:43 ` Peter Xu
2024-04-18 16:57 ` Daniel P. Berrangé
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.