qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: "Dr. David Alan Gilbert (git)" <dgilbert@redhat.com>
To: qemu-devel@nongnu.org, david@redhat.com, zhukeqian1@huawei.com,
	maozhongyi@cmss.chinamobile.com, marcandre.lureau@redhat.com,
	pannengyuan@huawei.com, f4bug@amsat.org, wei.w.wang@intel.com,
	yi.y.sun@intel.com, quintela@redhat.com
Subject: [PULL 07/12] migration/throttle: Add cpu-throttle-tailslow migration parameter
Date: Thu,  7 May 2020 18:02:06 +0100	[thread overview]
Message-ID: <20200507170211.238283-8-dgilbert@redhat.com> (raw)
In-Reply-To: <20200507170211.238283-1-dgilbert@redhat.com>

From: Keqian Zhu <zhukeqian1@huawei.com>

At the tail stage of throttling, the Guest is very sensitive to
CPU percentage while the @cpu-throttle-increment is excessive
usually at tail stage.

If this parameter is true, we will compute the ideal CPU percentage
used by the Guest, which may exactly make the dirty rate match the
dirty rate threshold. Then we will choose a smaller throttle increment
between the one specified by @cpu-throttle-increment and the one
generated by ideal CPU percentage.

Therefore, it is compatible to traditional throttling, meanwhile
the throttle increment won't be excessive at tail stage. This may
make migration time longer, and is disabled by default.

Signed-off-by: Keqian Zhu <zhukeqian1@huawei.com>
Message-Id: <20200413101508.54793-1-zhukeqian1@huawei.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
---
 migration/migration.c | 13 ++++++++++++
 migration/ram.c       | 25 +++++++++++++++++-----
 monitor/hmp-cmds.c    |  8 ++++++++
 qapi/migration.json   | 48 +++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 89 insertions(+), 5 deletions(-)

diff --git a/migration/migration.c b/migration/migration.c
index 79f16f6625..f5dbffc442 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -785,6 +785,8 @@ MigrationParameters *qmp_query_migrate_parameters(Error **errp)
     params->cpu_throttle_initial = s->parameters.cpu_throttle_initial;
     params->has_cpu_throttle_increment = true;
     params->cpu_throttle_increment = s->parameters.cpu_throttle_increment;
+    params->has_cpu_throttle_tailslow = true;
+    params->cpu_throttle_tailslow = s->parameters.cpu_throttle_tailslow;
     params->has_tls_creds = true;
     params->tls_creds = g_strdup(s->parameters.tls_creds);
     params->has_tls_hostname = true;
@@ -1327,6 +1329,10 @@ static void migrate_params_test_apply(MigrateSetParameters *params,
         dest->cpu_throttle_increment = params->cpu_throttle_increment;
     }
 
+    if (params->has_cpu_throttle_tailslow) {
+        dest->cpu_throttle_tailslow = params->cpu_throttle_tailslow;
+    }
+
     if (params->has_tls_creds) {
         assert(params->tls_creds->type == QTYPE_QSTRING);
         dest->tls_creds = g_strdup(params->tls_creds->u.s);
@@ -1415,6 +1421,10 @@ static void migrate_params_apply(MigrateSetParameters *params, Error **errp)
         s->parameters.cpu_throttle_increment = params->cpu_throttle_increment;
     }
 
+    if (params->has_cpu_throttle_tailslow) {
+        s->parameters.cpu_throttle_tailslow = params->cpu_throttle_tailslow;
+    }
+
     if (params->has_tls_creds) {
         g_free(s->parameters.tls_creds);
         assert(params->tls_creds->type == QTYPE_QSTRING);
@@ -3597,6 +3607,8 @@ static Property migration_properties[] = {
     DEFINE_PROP_UINT8("x-cpu-throttle-increment", MigrationState,
                       parameters.cpu_throttle_increment,
                       DEFAULT_MIGRATE_CPU_THROTTLE_INCREMENT),
+    DEFINE_PROP_BOOL("x-cpu-throttle-tailslow", MigrationState,
+                      parameters.cpu_throttle_tailslow, false),
     DEFINE_PROP_SIZE("x-max-bandwidth", MigrationState,
                       parameters.max_bandwidth, MAX_THROTTLE),
     DEFINE_PROP_UINT64("x-downtime-limit", MigrationState,
@@ -3703,6 +3715,7 @@ static void migration_instance_init(Object *obj)
     params->has_throttle_trigger_threshold = true;
     params->has_cpu_throttle_initial = true;
     params->has_cpu_throttle_increment = true;
+    params->has_cpu_throttle_tailslow = true;
     params->has_max_bandwidth = true;
     params->has_downtime_limit = true;
     params->has_x_checkpoint_delay = true;
diff --git a/migration/ram.c b/migration/ram.c
index 53166fc279..52fc032b83 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -616,20 +616,34 @@ static size_t save_page_header(RAMState *rs, QEMUFile *f,  RAMBlock *block,
  * able to complete migration. Some workloads dirty memory way too
  * fast and will not effectively converge, even with auto-converge.
  */
-static void mig_throttle_guest_down(void)
+static void mig_throttle_guest_down(uint64_t bytes_dirty_period,
+                                    uint64_t bytes_dirty_threshold)
 {
     MigrationState *s = migrate_get_current();
     uint64_t pct_initial = s->parameters.cpu_throttle_initial;
-    uint64_t pct_icrement = s->parameters.cpu_throttle_increment;
+    uint64_t pct_increment = s->parameters.cpu_throttle_increment;
+    bool pct_tailslow = s->parameters.cpu_throttle_tailslow;
     int pct_max = s->parameters.max_cpu_throttle;
 
+    uint64_t throttle_now = cpu_throttle_get_percentage();
+    uint64_t cpu_now, cpu_ideal, throttle_inc;
+
     /* We have not started throttling yet. Let's start it. */
     if (!cpu_throttle_active()) {
         cpu_throttle_set(pct_initial);
     } else {
         /* Throttling already on, just increase the rate */
-        cpu_throttle_set(MIN(cpu_throttle_get_percentage() + pct_icrement,
-                         pct_max));
+        if (!pct_tailslow) {
+            throttle_inc = pct_increment;
+        } else {
+            /* Compute the ideal CPU percentage used by Guest, which may
+             * make the dirty rate match the dirty rate threshold. */
+            cpu_now = 100 - throttle_now;
+            cpu_ideal = cpu_now * (bytes_dirty_threshold * 1.0 /
+                        bytes_dirty_period);
+            throttle_inc = MIN(cpu_now - cpu_ideal, pct_increment);
+        }
+        cpu_throttle_set(MIN(throttle_now + throttle_inc, pct_max));
     }
 }
 
@@ -919,7 +933,8 @@ static void migration_trigger_throttle(RAMState *rs)
             (++rs->dirty_rate_high_cnt >= 2)) {
             trace_migration_throttle();
             rs->dirty_rate_high_cnt = 0;
-            mig_throttle_guest_down();
+            mig_throttle_guest_down(bytes_dirty_period,
+                                    bytes_dirty_threshold);
         }
     }
 }
diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
index 1552dee489..ade1f85e0c 100644
--- a/monitor/hmp-cmds.c
+++ b/monitor/hmp-cmds.c
@@ -420,6 +420,10 @@ void hmp_info_migrate_parameters(Monitor *mon, const QDict *qdict)
         monitor_printf(mon, "%s: %u\n",
             MigrationParameter_str(MIGRATION_PARAMETER_CPU_THROTTLE_INCREMENT),
             params->cpu_throttle_increment);
+        assert(params->has_cpu_throttle_tailslow);
+        monitor_printf(mon, "%s: %s\n",
+            MigrationParameter_str(MIGRATION_PARAMETER_CPU_THROTTLE_TAILSLOW),
+            params->cpu_throttle_tailslow ? "on" : "off");
         assert(params->has_max_cpu_throttle);
         monitor_printf(mon, "%s: %u\n",
             MigrationParameter_str(MIGRATION_PARAMETER_MAX_CPU_THROTTLE),
@@ -1275,6 +1279,10 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict)
         p->has_cpu_throttle_increment = true;
         visit_type_int(v, param, &p->cpu_throttle_increment, &err);
         break;
+    case MIGRATION_PARAMETER_CPU_THROTTLE_TAILSLOW:
+        p->has_cpu_throttle_tailslow = true;
+        visit_type_bool(v, param, &p->cpu_throttle_tailslow, &err);
+        break;
     case MIGRATION_PARAMETER_MAX_CPU_THROTTLE:
         p->has_max_cpu_throttle = true;
         visit_type_int(v, param, &p->max_cpu_throttle, &err);
diff --git a/qapi/migration.json b/qapi/migration.json
index eca2981d0a..ee6c5a0cae 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -552,6 +552,21 @@
 #                          auto-converge detects that migration is not making
 #                          progress. The default value is 10. (Since 2.7)
 #
+# @cpu-throttle-tailslow: Make CPU throttling slower at tail stage
+#                         At the tail stage of throttling, the Guest is very
+#                         sensitive to CPU percentage while the @cpu-throttle
+#                         -increment is excessive usually at tail stage.
+#                         If this parameter is true, we will compute the ideal
+#                         CPU percentage used by the Guest, which may exactly make
+#                         the dirty rate match the dirty rate threshold. Then we
+#                         will choose a smaller throttle increment between the
+#                         one specified by @cpu-throttle-increment and the one
+#                         generated by ideal CPU percentage.
+#                         Therefore, it is compatible to traditional throttling,
+#                         meanwhile the throttle increment won't be excessive
+#                         at tail stage.
+#                         The default value is false. (Since 5.1)
+#
 # @tls-creds: ID of the 'tls-creds' object that provides credentials for
 #             establishing a TLS connection over the migration data channel.
 #             On the outgoing side of the migration, the credentials must
@@ -631,6 +646,7 @@
            'compress-level', 'compress-threads', 'decompress-threads',
            'compress-wait-thread', 'throttle-trigger-threshold',
            'cpu-throttle-initial', 'cpu-throttle-increment',
+           'cpu-throttle-tailslow',
            'tls-creds', 'tls-hostname', 'tls-authz', 'max-bandwidth',
            'downtime-limit', 'x-checkpoint-delay', 'block-incremental',
            'multifd-channels',
@@ -676,6 +692,21 @@
 #                          auto-converge detects that migration is not making
 #                          progress. The default value is 10. (Since 2.7)
 #
+# @cpu-throttle-tailslow: Make CPU throttling slower at tail stage
+#                         At the tail stage of throttling, the Guest is very
+#                         sensitive to CPU percentage while the @cpu-throttle
+#                         -increment is excessive usually at tail stage.
+#                         If this parameter is true, we will compute the ideal
+#                         CPU percentage used by the Guest, which may exactly make
+#                         the dirty rate match the dirty rate threshold. Then we
+#                         will choose a smaller throttle increment between the
+#                         one specified by @cpu-throttle-increment and the one
+#                         generated by ideal CPU percentage.
+#                         Therefore, it is compatible to traditional throttling,
+#                         meanwhile the throttle increment won't be excessive
+#                         at tail stage.
+#                         The default value is false. (Since 5.1)
+#
 # @tls-creds: ID of the 'tls-creds' object that provides credentials
 #             for establishing a TLS connection over the migration data
 #             channel. On the outgoing side of the migration, the credentials
@@ -763,6 +794,7 @@
             '*throttle-trigger-threshold': 'int',
             '*cpu-throttle-initial': 'int',
             '*cpu-throttle-increment': 'int',
+            '*cpu-throttle-tailslow': 'bool',
             '*tls-creds': 'StrOrNull',
             '*tls-hostname': 'StrOrNull',
             '*tls-authz': 'StrOrNull',
@@ -834,6 +866,21 @@
 #                          auto-converge detects that migration is not making
 #                          progress. (Since 2.7)
 #
+# @cpu-throttle-tailslow: Make CPU throttling slower at tail stage
+#                         At the tail stage of throttling, the Guest is very
+#                         sensitive to CPU percentage while the @cpu-throttle
+#                         -increment is excessive usually at tail stage.
+#                         If this parameter is true, we will compute the ideal
+#                         CPU percentage used by the Guest, which may exactly make
+#                         the dirty rate match the dirty rate threshold. Then we
+#                         will choose a smaller throttle increment between the
+#                         one specified by @cpu-throttle-increment and the one
+#                         generated by ideal CPU percentage.
+#                         Therefore, it is compatible to traditional throttling,
+#                         meanwhile the throttle increment won't be excessive
+#                         at tail stage.
+#                         The default value is false. (Since 5.1)
+#
 # @tls-creds: ID of the 'tls-creds' object that provides credentials
 #             for establishing a TLS connection over the migration data
 #             channel. On the outgoing side of the migration, the credentials
@@ -921,6 +968,7 @@
             '*throttle-trigger-threshold': 'uint8',
             '*cpu-throttle-initial': 'uint8',
             '*cpu-throttle-increment': 'uint8',
+            '*cpu-throttle-tailslow': 'bool',
             '*tls-creds': 'str',
             '*tls-hostname': 'str',
             '*tls-authz': 'str',
-- 
2.26.2



  parent reply	other threads:[~2020-05-07 17:08 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-05-07 17:01 [PULL 00/12] migration queue Dr. David Alan Gilbert (git)
2020-05-07 17:02 ` [PULL 01/12] migration: fix bad indentation in error_report() Dr. David Alan Gilbert (git)
2020-05-07 17:02 ` [PULL 02/12] migration/migration: improve error reporting for migrate parameters Dr. David Alan Gilbert (git)
2020-05-07 17:02 ` [PULL 03/12] monitor/hmp-cmds: add hmp_handle_error() for hmp_migrate_set_speed() Dr. David Alan Gilbert (git)
2020-05-07 17:02 ` [PULL 04/12] migration: move the units of migrate parameters from milliseconds to ms Dr. David Alan Gilbert (git)
2020-05-07 17:02 ` [PULL 05/12] docs/devel/migration: start a debugging section Dr. David Alan Gilbert (git)
2020-05-07 17:02 ` [PULL 06/12] migration/colo: Add missing error-propagation code Dr. David Alan Gilbert (git)
2020-05-07 17:02 ` Dr. David Alan Gilbert (git) [this message]
2020-05-07 17:02 ` [PULL 08/12] migration/ram: Consolidate variable reset after placement in ram_load_postcopy() Dr. David Alan Gilbert (git)
2020-05-07 17:02 ` [PULL 09/12] migration/rdma: fix a memleak on error path in rdma_start_incoming_migration Dr. David Alan Gilbert (git)
2020-05-07 17:02 ` [PULL 10/12] migration/xbzrle: add encoding rate Dr. David Alan Gilbert (git)
2020-05-07 17:02 ` [PULL 11/12] migration/multifd: fix memleaks in multifd_new_send_channel_async Dr. David Alan Gilbert (git)
2020-05-07 17:02 ` [PULL 12/12] migration/multifd: Do error_free after migrate_set_error to avoid memleaks Dr. David Alan Gilbert (git)
2020-05-07 18:49 ` [PULL 00/12] migration queue Peter Maydell

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200507170211.238283-8-dgilbert@redhat.com \
    --to=dgilbert@redhat.com \
    --cc=david@redhat.com \
    --cc=f4bug@amsat.org \
    --cc=maozhongyi@cmss.chinamobile.com \
    --cc=marcandre.lureau@redhat.com \
    --cc=pannengyuan@huawei.com \
    --cc=qemu-devel@nongnu.org \
    --cc=quintela@redhat.com \
    --cc=wei.w.wang@intel.com \
    --cc=yi.y.sun@intel.com \
    --cc=zhukeqian1@huawei.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).