All of lore.kernel.org
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH v8 3/3] Force auto-convegence of live migration
@ 2013-06-24  9:47 Chegu Vinod
  2013-06-24 15:59 ` Paolo Bonzini
                   ` (2 more replies)
  0 siblings, 3 replies; 5+ messages in thread
From: Chegu Vinod @ 2013-06-24  9:47 UTC (permalink / raw)
  To: eblake, anthony, quintela, owasserm, qemu-devel, pbonzini; +Cc: chegu_vinod

If a user chooses to turn on the auto-converge migration capability
these changes detect the lack of convergence and throttle down the
guest. i.e. force the VCPUs out of the guest for some duration
and let the migration thread catchup and help converge.

Verified the convergence using the following :
 - Java Warehouse workload running on a 20VCPU/256G guest(~80% busy)
 - OLTP like workload running on a 80VCPU/512G guest (~80% busy)

Sample results with Java warehouse workload : (migrate speed set to 20Gb and
migrate downtime set to 4seconds).

 (qemu) info migrate
 capabilities: xbzrle: off auto-converge: off  <----
 Migration status: active
 total time: 1487503 milliseconds
 expected downtime: 519 milliseconds
 transferred ram: 383749347 kbytes
 remaining ram: 2753372 kbytes
 total ram: 268444224 kbytes
 duplicate: 65461532 pages
 skipped: 64901568 pages
 normal: 95750218 pages
 normal bytes: 383000872 kbytes
 dirty pages rate: 67551 pages

 ---

 (qemu) info migrate
 capabilities: xbzrle: off auto-converge: on   <----
 Migration status: completed
 total time: 241161 milliseconds
 downtime: 6373 milliseconds
 transferred ram: 28235307 kbytes
 remaining ram: 0 kbytes
 total ram: 268444224 kbytes
 duplicate: 64946416 pages
 skipped: 64903523 pages
 normal: 7044971 pages
 normal bytes: 28179884 kbytes

Signed-off-by: Chegu Vinod <chegu_vinod@hp.com>
---
 arch_init.c |   79 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 79 insertions(+), 0 deletions(-)

diff --git a/arch_init.c b/arch_init.c
index a8b91ee..e7ca3b1 100644
--- a/arch_init.c
+++ b/arch_init.c
@@ -104,6 +104,9 @@ int graphic_depth = 15;
 #endif
 
 const uint32_t arch_type = QEMU_ARCH;
+static bool mig_throttle_on;
+static int dirty_rate_high_cnt;
+static void check_guest_throttling(void);
 
 /***********************************************************/
 /* ram save/restore */
@@ -378,8 +381,14 @@ static void migration_bitmap_sync(void)
     uint64_t num_dirty_pages_init = migration_dirty_pages;
     MigrationState *s = migrate_get_current();
     static int64_t start_time;
+    static int64_t bytes_xfer_prev;
     static int64_t num_dirty_pages_period;
     int64_t end_time;
+    int64_t bytes_xfer_now;
+
+    if (!bytes_xfer_prev) {
+        bytes_xfer_prev = ram_bytes_transferred();
+    }
 
     if (!start_time) {
         start_time = qemu_get_clock_ms(rt_clock);
@@ -404,6 +413,23 @@ static void migration_bitmap_sync(void)
 
     /* more than 1 second = 1000 millisecons */
     if (end_time > start_time + 1000) {
+        if (migrate_auto_converge()) {
+            /* The following detection logic can be refined later. For now:
+               Check to see if the dirtied bytes is 50% more than the approx.
+               amount of bytes that just got transferred since the last time we
+               were in this routine. If that happens >N times (for now N==4)
+               we turn on the throttle down logic */
+            bytes_xfer_now = ram_bytes_transferred();
+            if (s->dirty_pages_rate &&
+               (num_dirty_pages_period * TARGET_PAGE_SIZE >
+                   (bytes_xfer_now - bytes_xfer_prev)/2) &&
+               (dirty_rate_high_cnt++ > 4)) {
+                    trace_migration_throttle();
+                    mig_throttle_on = true;
+                    dirty_rate_high_cnt = 0;
+             }
+             bytes_xfer_prev = bytes_xfer_now;
+        } else {
+             mig_throttle_on = false;
+        }
         s->dirty_pages_rate = num_dirty_pages_period * 1000
             / (end_time - start_time);
         s->dirty_bytes_rate = s->dirty_pages_rate * TARGET_PAGE_SIZE;
@@ -566,6 +592,8 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
     migration_bitmap = bitmap_new(ram_pages);
     bitmap_set(migration_bitmap, 0, ram_pages);
     migration_dirty_pages = ram_pages;
+    mig_throttle_on = false;
+    dirty_rate_high_cnt = 0;
 
     if (migrate_use_xbzrle()) {
         XBZRLE.cache = cache_init(migrate_xbzrle_cache_size() /
@@ -628,6 +656,7 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)
         }
         total_sent += bytes_sent;
         acct_info.iterations++;
+        check_guest_throttling();
         /* we want to check in the 1st loop, just in case it was the 1st time
            and we had to sync the dirty bitmap.
            qemu_get_clock_ns() is a bit expensive, so we only check each some
@@ -1097,3 +1126,53 @@ TargetInfo *qmp_query_target(Error **errp)
 
     return info;
 }
+
+/* Stub function that's gets run on the vcpu when its brought out of the
+   VM to run inside qemu via async_run_on_cpu()*/
+static void mig_sleep_cpu(void *opq)
+{
+    qemu_mutex_unlock_iothread();
+    g_usleep(30*1000);
+    qemu_mutex_lock_iothread();
+}
+
+/* To reduce the dirty rate explicitly disallow the VCPUs from spending
+   much time in the VM. The migration thread will try to catchup.
+   Workload will experience a performance drop.
+*/
+static void mig_throttle_cpu_down(CPUState *cpu, void *data)
+{
+    async_run_on_cpu(cpu, mig_sleep_cpu, NULL);
+}
+
+static void mig_throttle_guest_down(void)
+{
+    qemu_mutex_lock_iothread();
+    qemu_for_each_cpu(mig_throttle_cpu_down, NULL);
+    qemu_mutex_unlock_iothread();
+}
+
+static void check_guest_throttling(void)
+{
+    static int64_t t0;
+    int64_t        t1;
+
+    if (!mig_throttle_on) {
+        return;
+    }
+
+    if (!t0)  {
+        t0 = qemu_get_clock_ns(rt_clock);
+        return;
+    }
+
+    t1 = qemu_get_clock_ns(rt_clock);
+
+    /* If it has been more than 40 ms since the last time the guest
+     * was throttled then do it again.
+     */
+    if (40 < (t1-t0)/1000000) {
+        mig_throttle_guest_down();
+        t0 = t1;
+    }
+}
-- 
1.7.1

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [Qemu-devel] [PATCH v8 3/3] Force auto-convegence of live migration
  2013-06-24  9:47 [Qemu-devel] [PATCH v8 3/3] Force auto-convegence of live migration Chegu Vinod
@ 2013-06-24 15:59 ` Paolo Bonzini
  2013-06-24 16:22   ` Chegu Vinod
  2013-07-12 12:00 ` Juan Quintela
  2013-07-13  9:48 ` Orit Wasserman
  2 siblings, 1 reply; 5+ messages in thread
From: Paolo Bonzini @ 2013-06-24 15:59 UTC (permalink / raw)
  To: Chegu Vinod; +Cc: owasserm, qemu-devel, anthony, quintela

Il 24/06/2013 11:47, Chegu Vinod ha scritto:
> If a user chooses to turn on the auto-converge migration capability
> these changes detect the lack of convergence and throttle down the
> guest. i.e. force the VCPUs out of the guest for some duration
> and let the migration thread catchup and help converge.
> 
> Verified the convergence using the following :
>  - Java Warehouse workload running on a 20VCPU/256G guest(~80% busy)
>  - OLTP like workload running on a 80VCPU/512G guest (~80% busy)
> 
> Sample results with Java warehouse workload : (migrate speed set to 20Gb and
> migrate downtime set to 4seconds).
> 
>  (qemu) info migrate
>  capabilities: xbzrle: off auto-converge: off  <----
>  Migration status: active
>  total time: 1487503 milliseconds
>  expected downtime: 519 milliseconds
>  transferred ram: 383749347 kbytes
>  remaining ram: 2753372 kbytes
>  total ram: 268444224 kbytes
>  duplicate: 65461532 pages
>  skipped: 64901568 pages
>  normal: 95750218 pages
>  normal bytes: 383000872 kbytes
>  dirty pages rate: 67551 pages
> 
>  ---
> 
>  (qemu) info migrate
>  capabilities: xbzrle: off auto-converge: on   <----
>  Migration status: completed
>  total time: 241161 milliseconds
>  downtime: 6373 milliseconds
>  transferred ram: 28235307 kbytes
>  remaining ram: 0 kbytes
>  total ram: 268444224 kbytes
>  duplicate: 64946416 pages
>  skipped: 64903523 pages
>  normal: 7044971 pages
>  normal bytes: 28179884 kbytes
> 
> Signed-off-by: Chegu Vinod <chegu_vinod@hp.com>

As far as the algorithm is concerned,

Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>

but are you sure that this passes checkpatch.pl?

> +            /* The following detection logic can be refined later. For now:
> +               Check to see if the dirtied bytes is 50% more than the approx.
> +               amount of bytes that just got transferred since the last time we
> +               were in this routine. If that happens >N times (for now N==4)
> +               we turn on the throttle down logic */
> +            bytes_xfer_now = ram_bytes_transferred();
> +            if (s->dirty_pages_rate &&
> +               (num_dirty_pages_period * TARGET_PAGE_SIZE >
> +                   (bytes_xfer_now - bytes_xfer_prev)/2) &&
> +               (dirty_rate_high_cnt++ > 4)) {

the spacing of the operators here looks like something checkpatch.pl
would complain about.  If you have to respin for that, keep my R-b and
please also remove all other superfluous parentheses.

Paolo

> +                    trace_migration_throttle();
> +                    mig_throttle_on = true;
> +                    dirty_rate_high_cnt = 0;
> +             }
> +             bytes_xfer_prev = bytes_xfer_now;
> +        } else {
> +             mig_throttle_on = false;
> +        }
>          s->dirty_pages_rate = num_dirty_pages_period * 1000
>              / (end_time - start_time);
>          s->dirty_bytes_rate = s->dirty_pages_rate * TARGET_PAGE_SIZE;
> @@ -566,6 +592,8 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
>      migration_bitmap = bitmap_new(ram_pages);
>      bitmap_set(migration_bitmap, 0, ram_pages);
>      migration_dirty_pages = ram_pages;
> +    mig_throttle_on = false;
> +    dirty_rate_high_cnt = 0;
>  
>      if (migrate_use_xbzrle()) {
>          XBZRLE.cache = cache_init(migrate_xbzrle_cache_size() /
> @@ -628,6 +656,7 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)
>          }
>          total_sent += bytes_sent;
>          acct_info.iterations++;
> +        check_guest_throttling();
>          /* we want to check in the 1st loop, just in case it was the 1st time
>             and we had to sync the dirty bitmap.
>             qemu_get_clock_ns() is a bit expensive, so we only check each some
> @@ -1097,3 +1126,53 @@ TargetInfo *qmp_query_target(Error **errp)
>  
>      return info;
>  }
> +
> +/* Stub function that's gets run on the vcpu when its brought out of the
> +   VM to run inside qemu via async_run_on_cpu()*/
> +static void mig_sleep_cpu(void *opq)
> +{
> +    qemu_mutex_unlock_iothread();
> +    g_usleep(30*1000);
> +    qemu_mutex_lock_iothread();
> +}
> +
> +/* To reduce the dirty rate explicitly disallow the VCPUs from spending
> +   much time in the VM. The migration thread will try to catchup.
> +   Workload will experience a performance drop.
> +*/
> +static void mig_throttle_cpu_down(CPUState *cpu, void *data)
> +{
> +    async_run_on_cpu(cpu, mig_sleep_cpu, NULL);
> +}
> +
> +static void mig_throttle_guest_down(void)
> +{
> +    qemu_mutex_lock_iothread();
> +    qemu_for_each_cpu(mig_throttle_cpu_down, NULL);
> +    qemu_mutex_unlock_iothread();
> +}
> +
> +static void check_guest_throttling(void)
> +{
> +    static int64_t t0;
> +    int64_t        t1;
> +
> +    if (!mig_throttle_on) {
> +        return;
> +    }
> +
> +    if (!t0)  {
> +        t0 = qemu_get_clock_ns(rt_clock);
> +        return;
> +    }
> +
> +    t1 = qemu_get_clock_ns(rt_clock);
> +
> +    /* If it has been more than 40 ms since the last time the guest
> +     * was throttled then do it again.
> +     */
> +    if (40 < (t1-t0)/1000000) {
> +        mig_throttle_guest_down();
> +        t0 = t1;
> +    }
> +}
> 

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [Qemu-devel] [PATCH v8 3/3] Force auto-convegence of live migration
  2013-06-24 15:59 ` Paolo Bonzini
@ 2013-06-24 16:22   ` Chegu Vinod
  0 siblings, 0 replies; 5+ messages in thread
From: Chegu Vinod @ 2013-06-24 16:22 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: owasserm, qemu-devel, anthony, quintela

On 6/24/2013 8:59 AM, Paolo Bonzini wrote:
> Il 24/06/2013 11:47, Chegu Vinod ha scritto:
>> If a user chooses to turn on the auto-converge migration capability
>> these changes detect the lack of convergence and throttle down the
>> guest. i.e. force the VCPUs out of the guest for some duration
>> and let the migration thread catchup and help converge.
>>
>> Verified the convergence using the following :
>>   - Java Warehouse workload running on a 20VCPU/256G guest(~80% busy)
>>   - OLTP like workload running on a 80VCPU/512G guest (~80% busy)
>>
>> Sample results with Java warehouse workload : (migrate speed set to 20Gb and
>> migrate downtime set to 4seconds).
>>
>>   (qemu) info migrate
>>   capabilities: xbzrle: off auto-converge: off  <----
>>   Migration status: active
>>   total time: 1487503 milliseconds
>>   expected downtime: 519 milliseconds
>>   transferred ram: 383749347 kbytes
>>   remaining ram: 2753372 kbytes
>>   total ram: 268444224 kbytes
>>   duplicate: 65461532 pages
>>   skipped: 64901568 pages
>>   normal: 95750218 pages
>>   normal bytes: 383000872 kbytes
>>   dirty pages rate: 67551 pages
>>
>>   ---
>>
>>   (qemu) info migrate
>>   capabilities: xbzrle: off auto-converge: on   <----
>>   Migration status: completed
>>   total time: 241161 milliseconds
>>   downtime: 6373 milliseconds
>>   transferred ram: 28235307 kbytes
>>   remaining ram: 0 kbytes
>>   total ram: 268444224 kbytes
>>   duplicate: 64946416 pages
>>   skipped: 64903523 pages
>>   normal: 7044971 pages
>>   normal bytes: 28179884 kbytes
>>
>> Signed-off-by: Chegu Vinod <chegu_vinod@hp.com>
> As far as the algorithm is concerned,
>
> Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>

Thanks!
>
> but are you sure that this passes checkpatch.pl?

Yes it does (had checked it before I posted).

# ./scripts/checkpatch.pl 
0003-Force-auto-convegence-of-live-migration.patch
total: 0 errors, 0 warnings, 114 lines checked

0003-Force-auto-convegence-of-live-migration.patch has no obvious style 
problems and is ready for submission.

Vinod
>
>> +            /* The following detection logic can be refined later. For now:
>> +               Check to see if the dirtied bytes is 50% more than the approx.
>> +               amount of bytes that just got transferred since the last time we
>> +               were in this routine. If that happens >N times (for now N==4)
>> +               we turn on the throttle down logic */
>> +            bytes_xfer_now = ram_bytes_transferred();
>> +            if (s->dirty_pages_rate &&
>> +               (num_dirty_pages_period * TARGET_PAGE_SIZE >
>> +                   (bytes_xfer_now - bytes_xfer_prev)/2) &&
>> +               (dirty_rate_high_cnt++ > 4)) {
> the spacing of the operators here looks like something checkpatch.pl
> would complain about.  If you have to respin for that, keep my R-b and
> please also remove all other superfluous parentheses.
>
> Paolo
>
>> +                    trace_migration_throttle();
>> +                    mig_throttle_on = true;
>> +                    dirty_rate_high_cnt = 0;
>> +             }
>> +             bytes_xfer_prev = bytes_xfer_now;
>> +        } else {
>> +             mig_throttle_on = false;
>> +        }
>>           s->dirty_pages_rate = num_dirty_pages_period * 1000
>>               / (end_time - start_time);
>>           s->dirty_bytes_rate = s->dirty_pages_rate * TARGET_PAGE_SIZE;
>> @@ -566,6 +592,8 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
>>       migration_bitmap = bitmap_new(ram_pages);
>>       bitmap_set(migration_bitmap, 0, ram_pages);
>>       migration_dirty_pages = ram_pages;
>> +    mig_throttle_on = false;
>> +    dirty_rate_high_cnt = 0;
>>   
>>       if (migrate_use_xbzrle()) {
>>           XBZRLE.cache = cache_init(migrate_xbzrle_cache_size() /
>> @@ -628,6 +656,7 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)
>>           }
>>           total_sent += bytes_sent;
>>           acct_info.iterations++;
>> +        check_guest_throttling();
>>           /* we want to check in the 1st loop, just in case it was the 1st time
>>              and we had to sync the dirty bitmap.
>>              qemu_get_clock_ns() is a bit expensive, so we only check each some
>> @@ -1097,3 +1126,53 @@ TargetInfo *qmp_query_target(Error **errp)
>>   
>>       return info;
>>   }
>> +
>> +/* Stub function that's gets run on the vcpu when its brought out of the
>> +   VM to run inside qemu via async_run_on_cpu()*/
>> +static void mig_sleep_cpu(void *opq)
>> +{
>> +    qemu_mutex_unlock_iothread();
>> +    g_usleep(30*1000);
>> +    qemu_mutex_lock_iothread();
>> +}
>> +
>> +/* To reduce the dirty rate explicitly disallow the VCPUs from spending
>> +   much time in the VM. The migration thread will try to catchup.
>> +   Workload will experience a performance drop.
>> +*/
>> +static void mig_throttle_cpu_down(CPUState *cpu, void *data)
>> +{
>> +    async_run_on_cpu(cpu, mig_sleep_cpu, NULL);
>> +}
>> +
>> +static void mig_throttle_guest_down(void)
>> +{
>> +    qemu_mutex_lock_iothread();
>> +    qemu_for_each_cpu(mig_throttle_cpu_down, NULL);
>> +    qemu_mutex_unlock_iothread();
>> +}
>> +
>> +static void check_guest_throttling(void)
>> +{
>> +    static int64_t t0;
>> +    int64_t        t1;
>> +
>> +    if (!mig_throttle_on) {
>> +        return;
>> +    }
>> +
>> +    if (!t0)  {
>> +        t0 = qemu_get_clock_ns(rt_clock);
>> +        return;
>> +    }
>> +
>> +    t1 = qemu_get_clock_ns(rt_clock);
>> +
>> +    /* If it has been more than 40 ms since the last time the guest
>> +     * was throttled then do it again.
>> +     */
>> +    if (40 < (t1-t0)/1000000) {
>> +        mig_throttle_guest_down();
>> +        t0 = t1;
>> +    }
>> +}
>>
> .
>

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [Qemu-devel] [PATCH v8 3/3] Force auto-convegence of live migration
  2013-06-24  9:47 [Qemu-devel] [PATCH v8 3/3] Force auto-convegence of live migration Chegu Vinod
  2013-06-24 15:59 ` Paolo Bonzini
@ 2013-07-12 12:00 ` Juan Quintela
  2013-07-13  9:48 ` Orit Wasserman
  2 siblings, 0 replies; 5+ messages in thread
From: Juan Quintela @ 2013-07-12 12:00 UTC (permalink / raw)
  To: Chegu Vinod; +Cc: owasserm, qemu-devel, anthony, pbonzini

Chegu Vinod <chegu_vinod@hp.com> wrote:
> If a user chooses to turn on the auto-converge migration capability
> these changes detect the lack of convergence and throttle down the
> guest. i.e. force the VCPUs out of the guest for some duration
> and let the migration thread catchup and help converge.
>
> Verified the convergence using the following :
>  - Java Warehouse workload running on a 20VCPU/256G guest(~80% busy)
>  - OLTP like workload running on a 80VCPU/512G guest (~80% busy)
>
> Sample results with Java warehouse workload : (migrate speed set to 20Gb and
> migrate downtime set to 4seconds).
>
>  (qemu) info migrate
>  capabilities: xbzrle: off auto-converge: off  <----
>  Migration status: active
>  total time: 1487503 milliseconds
>  expected downtime: 519 milliseconds
>  transferred ram: 383749347 kbytes
>  remaining ram: 2753372 kbytes
>  total ram: 268444224 kbytes
>  duplicate: 65461532 pages
>  skipped: 64901568 pages
>  normal: 95750218 pages
>  normal bytes: 383000872 kbytes
>  dirty pages rate: 67551 pages
>
>  ---
>
>  (qemu) info migrate
>  capabilities: xbzrle: off auto-converge: on   <----
>  Migration status: completed
>  total time: 241161 milliseconds
>  downtime: 6373 milliseconds
>  transferred ram: 28235307 kbytes
>  remaining ram: 0 kbytes
>  total ram: 268444224 kbytes
>  duplicate: 64946416 pages
>  skipped: 64903523 pages
>  normal: 7044971 pages
>  normal bytes: 28179884 kbytes
>
> Signed-off-by: Chegu Vinod <chegu_vinod@hp.com>

Reviewed-by: Juan Quintela <quintela@redhat.com>

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [Qemu-devel] [PATCH v8 3/3] Force auto-convegence of live migration
  2013-06-24  9:47 [Qemu-devel] [PATCH v8 3/3] Force auto-convegence of live migration Chegu Vinod
  2013-06-24 15:59 ` Paolo Bonzini
  2013-07-12 12:00 ` Juan Quintela
@ 2013-07-13  9:48 ` Orit Wasserman
  2 siblings, 0 replies; 5+ messages in thread
From: Orit Wasserman @ 2013-07-13  9:48 UTC (permalink / raw)
  To: Chegu Vinod; +Cc: pbonzini, qemu-devel, anthony, quintela

On 06/24/2013 12:47 PM, Chegu Vinod wrote:
> If a user chooses to turn on the auto-converge migration capability
> these changes detect the lack of convergence and throttle down the
> guest. i.e. force the VCPUs out of the guest for some duration
> and let the migration thread catchup and help converge.
> 
> Verified the convergence using the following :
>  - Java Warehouse workload running on a 20VCPU/256G guest(~80% busy)
>  - OLTP like workload running on a 80VCPU/512G guest (~80% busy)
> 
> Sample results with Java warehouse workload : (migrate speed set to 20Gb and
> migrate downtime set to 4seconds).
> 
>  (qemu) info migrate
>  capabilities: xbzrle: off auto-converge: off  <----
>  Migration status: active
>  total time: 1487503 milliseconds
>  expected downtime: 519 milliseconds
>  transferred ram: 383749347 kbytes
>  remaining ram: 2753372 kbytes
>  total ram: 268444224 kbytes
>  duplicate: 65461532 pages
>  skipped: 64901568 pages
>  normal: 95750218 pages
>  normal bytes: 383000872 kbytes
>  dirty pages rate: 67551 pages
> 
>  ---
> 
>  (qemu) info migrate
>  capabilities: xbzrle: off auto-converge: on   <----
>  Migration status: completed
>  total time: 241161 milliseconds
>  downtime: 6373 milliseconds
>  transferred ram: 28235307 kbytes
>  remaining ram: 0 kbytes
>  total ram: 268444224 kbytes
>  duplicate: 64946416 pages
>  skipped: 64903523 pages
>  normal: 7044971 pages
>  normal bytes: 28179884 kbytes
> 
> Signed-off-by: Chegu Vinod <chegu_vinod@hp.com>
> ---
>  arch_init.c |   79 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 files changed, 79 insertions(+), 0 deletions(-)
> 
> diff --git a/arch_init.c b/arch_init.c
> index a8b91ee..e7ca3b1 100644
> --- a/arch_init.c
> +++ b/arch_init.c
> @@ -104,6 +104,9 @@ int graphic_depth = 15;
>  #endif
>  
>  const uint32_t arch_type = QEMU_ARCH;
> +static bool mig_throttle_on;
> +static int dirty_rate_high_cnt;
> +static void check_guest_throttling(void);
>  
>  /***********************************************************/
>  /* ram save/restore */
> @@ -378,8 +381,14 @@ static void migration_bitmap_sync(void)
>      uint64_t num_dirty_pages_init = migration_dirty_pages;
>      MigrationState *s = migrate_get_current();
>      static int64_t start_time;
> +    static int64_t bytes_xfer_prev;
>      static int64_t num_dirty_pages_period;
>      int64_t end_time;
> +    int64_t bytes_xfer_now;
> +
> +    if (!bytes_xfer_prev) {
> +        bytes_xfer_prev = ram_bytes_transferred();
> +    }
>  
>      if (!start_time) {
>          start_time = qemu_get_clock_ms(rt_clock);
> @@ -404,6 +413,23 @@ static void migration_bitmap_sync(void)
>  
>      /* more than 1 second = 1000 millisecons */
>      if (end_time > start_time + 1000) {
> +        if (migrate_auto_converge()) {
> +            /* The following detection logic can be refined later. For now:
> +               Check to see if the dirtied bytes is 50% more than the approx.
> +               amount of bytes that just got transferred since the last time we
> +               were in this routine. If that happens >N times (for now N==4)
> +               we turn on the throttle down logic */
> +            bytes_xfer_now = ram_bytes_transferred();
> +            if (s->dirty_pages_rate &&
> +               (num_dirty_pages_period * TARGET_PAGE_SIZE >
> +                   (bytes_xfer_now - bytes_xfer_prev)/2) &&
> +               (dirty_rate_high_cnt++ > 4)) {
> +                    trace_migration_throttle();
> +                    mig_throttle_on = true;
> +                    dirty_rate_high_cnt = 0;
> +             }
> +             bytes_xfer_prev = bytes_xfer_now;
> +        } else {
> +             mig_throttle_on = false;
> +        }
>          s->dirty_pages_rate = num_dirty_pages_period * 1000
>              / (end_time - start_time);
>          s->dirty_bytes_rate = s->dirty_pages_rate * TARGET_PAGE_SIZE;
> @@ -566,6 +592,8 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
>      migration_bitmap = bitmap_new(ram_pages);
>      bitmap_set(migration_bitmap, 0, ram_pages);
>      migration_dirty_pages = ram_pages;
> +    mig_throttle_on = false;
> +    dirty_rate_high_cnt = 0;
>  
>      if (migrate_use_xbzrle()) {
>          XBZRLE.cache = cache_init(migrate_xbzrle_cache_size() /
> @@ -628,6 +656,7 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)
>          }
>          total_sent += bytes_sent;
>          acct_info.iterations++;
> +        check_guest_throttling();
>          /* we want to check in the 1st loop, just in case it was the 1st time
>             and we had to sync the dirty bitmap.
>             qemu_get_clock_ns() is a bit expensive, so we only check each some
> @@ -1097,3 +1126,53 @@ TargetInfo *qmp_query_target(Error **errp)
>  
>      return info;
>  }
> +
> +/* Stub function that's gets run on the vcpu when its brought out of the
> +   VM to run inside qemu via async_run_on_cpu()*/
> +static void mig_sleep_cpu(void *opq)
> +{
> +    qemu_mutex_unlock_iothread();
> +    g_usleep(30*1000);
> +    qemu_mutex_lock_iothread();
> +}
> +
> +/* To reduce the dirty rate explicitly disallow the VCPUs from spending
> +   much time in the VM. The migration thread will try to catchup.
> +   Workload will experience a performance drop.
> +*/
> +static void mig_throttle_cpu_down(CPUState *cpu, void *data)
> +{
> +    async_run_on_cpu(cpu, mig_sleep_cpu, NULL);
> +}
> +
> +static void mig_throttle_guest_down(void)
> +{
> +    qemu_mutex_lock_iothread();
> +    qemu_for_each_cpu(mig_throttle_cpu_down, NULL);
> +    qemu_mutex_unlock_iothread();
> +}
> +
> +static void check_guest_throttling(void)
> +{
> +    static int64_t t0;
> +    int64_t        t1;
> +
> +    if (!mig_throttle_on) {
> +        return;
> +    }
> +
> +    if (!t0)  {
> +        t0 = qemu_get_clock_ns(rt_clock);
> +        return;
> +    }
> +
> +    t1 = qemu_get_clock_ns(rt_clock);
> +
> +    /* If it has been more than 40 ms since the last time the guest
> +     * was throttled then do it again.
> +     */
> +    if (40 < (t1-t0)/1000000) {
> +        mig_throttle_guest_down();
> +        t0 = t1;
> +    }
> +}
> 

Reviewed-by: Orit Wasserman <owasserm@redhat.com>

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2013-07-13  9:47 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-06-24  9:47 [Qemu-devel] [PATCH v8 3/3] Force auto-convegence of live migration Chegu Vinod
2013-06-24 15:59 ` Paolo Bonzini
2013-06-24 16:22   ` Chegu Vinod
2013-07-12 12:00 ` Juan Quintela
2013-07-13  9:48 ` Orit Wasserman

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.