qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
To: Yury Kotov <yury-kotov@yandex-team.ru>
Cc: Peter Crosthwaite <crosthwaite.peter@gmail.com>,
	Stefan Weil <sw@weilnetz.de>, Juan Quintela <quintela@redhat.com>,
	"open list:Overall" <qemu-devel@nongnu.org>,
	"yc-core@yandex-team.ru" <yc-core@yandex-team.ru>,
	Paolo Bonzini <pbonzini@redhat.com>,
	Richard Henderson <rth@twiddle.net>
Subject: Re: [Qemu-devel] [PATCH v3 3/3] tests/migration: Add a test for auto converge
Date: Tue, 23 Jul 2019 11:23:53 +0100	[thread overview]
Message-ID: <20190723102353.GG2719@work-vm> (raw)
In-Reply-To: <483241563872053@myt5-bd00a25f9194.qloud-c.yandex.net>

* Yury Kotov (yury-kotov@yandex-team.ru) wrote:
> 22.07.2019, 20:35, "Dr. David Alan Gilbert" <dgilbert@redhat.com>:
> > * Yury Kotov (yury-kotov@yandex-team.ru) wrote:
> >>  Signed-off-by: Yury Kotov <yury-kotov@yandex-team.ru>
> >
> > This looks OK to me, but have you tried it on a really really overloaded
> > host?
> > I worry that you might skip some of the percentage steps or not hit the
> > bandwidth on the small overloaded VMs we get in CI.
> 
> No, I haven't tried. I saw patchew's logs and you're absolutely right - such
> problem exists. Now I'm looking for a way to fix it.

Yes, we've hit that type of thing before; make sure it's happy with 1
CPU that's already got 2 or 3 other things on it.

But as much as possible try and make it survive if it's really slow.

Dave

> >
> > Dave
> >
> >>  ---
> >>   tests/migration-test.c | 119 +++++++++++++++++++++++++++++++++++++----
> >>   1 file changed, 108 insertions(+), 11 deletions(-)
> >>
> >>  diff --git a/tests/migration-test.c b/tests/migration-test.c
> >>  index a4feb9545d..bb69517fc8 100644
> >>  --- a/tests/migration-test.c
> >>  +++ b/tests/migration-test.c
> >>  @@ -241,6 +241,17 @@ static int64_t read_ram_property_int(QTestState *who, const char *property)
> >>       return result;
> >>   }
> >>
> >>  +static int64_t read_migrate_property_int(QTestState *who, const char *property)
> >>  +{
> >>  + QDict *rsp_return;
> >>  + int64_t result;
> >>  +
> >>  + rsp_return = migrate_query(who);
> >>  + result = qdict_get_try_int(rsp_return, property, 0);
> >>  + qobject_unref(rsp_return);
> >>  + return result;
> >>  +}
> >>  +
> >>   static uint64_t get_migration_pass(QTestState *who)
> >>   {
> >>       return read_ram_property_int(who, "dirty-sync-count");
> >>  @@ -255,20 +266,22 @@ static void read_blocktime(QTestState *who)
> >>       qobject_unref(rsp_return);
> >>   }
> >>
> >>  +static bool check_migration_status(QTestState *who, const char *status)
> >>  +{
> >>  + bool completed;
> >>  + char *current_status;
> >>  +
> >>  + current_status = migrate_query_status(who);
> >>  + completed = strcmp(current_status, status) == 0;
> >>  + g_assert_cmpstr(current_status, !=, "failed");
> >>  + g_free(current_status);
> >>  + return completed;
> >>  +}
> >>  +
> >>   static void wait_for_migration_status(QTestState *who,
> >>                                         const char *goal)
> >>   {
> >>  - while (true) {
> >>  - bool completed;
> >>  - char *status;
> >>  -
> >>  - status = migrate_query_status(who);
> >>  - completed = strcmp(status, goal) == 0;
> >>  - g_assert_cmpstr(status, !=, "failed");
> >>  - g_free(status);
> >>  - if (completed) {
> >>  - return;
> >>  - }
> >>  + while (!check_migration_status(who, goal)) {
> >>           usleep(1000);
> >>       }
> >>   }
> >>  @@ -1121,6 +1134,89 @@ static void test_migrate_fd_proto(void)
> >>       test_migrate_end(from, to, true);
> >>   }
> >>
> >>  +static void test_migrate_auto_converge(void)
> >>  +{
> >>  + char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs);
> >>  + QTestState *from, *to;
> >>  + int i;
> >>  + int64_t remaining, downtime;
> >>  +
> >>  + /*
> >>  + * We want the test to be fast enough, but stable.
> >>  + * Throttle percentages are chosen to cover all cases (init, increment, max)
> >>  + */
> >>  + static const int64_t expected_pcts[] = { 0, 1, 51, 98 };
> >>  + const int64_t max_bandwidth = 200000000; /* ~200Mb/s */
> >>  + const int64_t downtime_limit = 50; /* 50ms */
> >>  + /*
> >>  + * We migrate through unix-socket (> 500Mb/s).
> >>  + * Thus, expected migration speed ~= bandwidth limit (< 500Mb/s).
> >>  + * So, we can predict expected_threshold
> >>  + */
> >>  + const int64_t expected_threshold = max_bandwidth * downtime_limit / 1000;
> >>  +
> >>  + if (test_migrate_start(&from, &to, uri, false, false)) {
> >>  + return;
> >>  + }
> >>  +
> >>  + migrate_set_capability(from, "auto-converge", true);
> >>  + migrate_set_parameter_int(from, "cpu-throttle-initial", expected_pcts[1]);
> >>  + migrate_set_parameter_int(from, "cpu-throttle-increment",
> >>  + expected_pcts[2] - expected_pcts[1]);
> >>  + migrate_set_parameter_int(from, "max-cpu-throttle", expected_pcts[3]);
> >>  +
> >>  + migrate_set_parameter_int(from, "max-bandwidth", max_bandwidth);
> >>  + migrate_set_parameter_int(from, "downtime-limit", downtime_limit);
> >>  +
> >>  + /* To check remaining size after precopy */
> >>  + migrate_set_capability(from, "pause-before-switchover", true);
> >>  +
> >>  + /* Wait for the first serial output from the source */
> >>  + wait_for_serial("src_serial");
> >>  +
> >>  + migrate(from, uri, "{}");
> >>  +
> >>  + for (i = 0; i < ARRAY_SIZE(expected_pcts); i++) {
> >>  + int64_t pct;
> >>  + pct = read_migrate_property_int(from, "cpu-throttle-percentage");
> >>  + g_assert_cmpint(pct, ==, expected_pcts[i]);
> >>  + while (pct == expected_pcts[i] && !got_stop) {
> >>  + usleep(1000);
> >>  + pct = read_migrate_property_int(from, "cpu-throttle-percentage");
> >>  + }
> >>  + /* We break out of this loop only in paused state */
> >>  + if (got_stop || i + 1 == ARRAY_SIZE(expected_pcts)) {
> >>  + /* Check unexpected throttle percentage change */
> >>  + g_assert_true(got_stop);
> >>  + /* Check unexpected converge */
> >>  + g_assert_cmpint(i + 1, ==, ARRAY_SIZE(expected_pcts));
> >>  + g_assert_true(check_migration_status(from, "pre-switchover"));
> >>  + }
> >>  + }
> >>  +
> >>  + remaining = read_ram_property_int(from, "remaining");
> >>  + g_assert_cmpint(remaining, <, expected_threshold);
> >>  +
> >>  + wait_command(from, "{ 'execute': 'migrate-continue' , 'arguments':"
> >>  + " { 'state': 'pre-switchover' } }");
> >>  +
> >>  + qtest_qmp_eventwait(to, "RESUME");
> >>  +
> >>  + wait_for_serial("dest_serial");
> >>  + wait_for_migration_complete(from);
> >>  +
> >>  + downtime = read_migrate_property_int(from, "downtime");
> >>  + /*
> >>  + * Actual downtime may be greater than downtime limit,
> >>  + * but the difference should be small enough (~20ms)
> >>  + */
> >>  + g_assert_cmpint(downtime, <, downtime_limit + 20);
> >>  +
> >>  + g_free(uri);
> >>  +
> >>  + test_migrate_end(from, to, true);
> >>  +}
> >>  +
> >>   int main(int argc, char **argv)
> >>   {
> >>       char template[] = "/tmp/migration-test-XXXXXX";
> >>  @@ -1176,6 +1272,7 @@ int main(int argc, char **argv)
> >>       /* qtest_add_func("/migration/ignore_shared", test_ignore_shared); */
> >>       qtest_add_func("/migration/xbzrle/unix", test_xbzrle_unix);
> >>       qtest_add_func("/migration/fd_proto", test_migrate_fd_proto);
> >>  + qtest_add_func("/migration/auto_converge", test_migrate_auto_converge);
> >>
> >>       ret = g_test_run();
> >>
> >>  --
> >>  2.22.0
> > --
> > Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
> 
> Regards,
> Yury
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK


  reply	other threads:[~2019-07-23 10:24 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-07-18  9:17 [Qemu-devel] [PATCH v3 0/3] High downtime with 95+ throttle pct Yury Kotov
2019-07-18  9:17 ` [Qemu-devel] [PATCH v3 1/3] qemu-thread: Add qemu_cond_timedwait Yury Kotov
2019-07-18  9:17 ` [Qemu-devel] [PATCH v3 2/3] cpus: Fix throttling during vm_stop Yury Kotov
2019-07-18  9:17 ` [Qemu-devel] [PATCH v3 3/3] tests/migration: Add a test for auto converge Yury Kotov
2019-07-22 17:35   ` Dr. David Alan Gilbert
2019-07-23  8:54     ` Yury Kotov
2019-07-23 10:23       ` Dr. David Alan Gilbert [this message]
2019-07-18 15:33 ` [Qemu-devel] [PATCH v3 0/3] High downtime with 95+ throttle pct no-reply

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190723102353.GG2719@work-vm \
    --to=dgilbert@redhat.com \
    --cc=crosthwaite.peter@gmail.com \
    --cc=pbonzini@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=quintela@redhat.com \
    --cc=rth@twiddle.net \
    --cc=sw@weilnetz.de \
    --cc=yc-core@yandex-team.ru \
    --cc=yury-kotov@yandex-team.ru \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).