All of lore.kernel.org
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH 0/2] Bitmap for recovery of postcopy migration from n/w failure
@ 2016-08-16  0:25 Md Haris Iqbal
  2016-08-16  0:25 ` [Qemu-devel] [PATCH 1/2] Migration: New bitmap for postcopy migration failure Md Haris Iqbal
  2016-08-16  0:25 ` [Qemu-devel] [PATCH 2/2] Migration: Request lost pages (due to n/w failure) from source Md Haris Iqbal
  0 siblings, 2 replies; 4+ messages in thread
From: Md Haris Iqbal @ 2016-08-16  0:25 UTC (permalink / raw)
  To: qemu-devel; +Cc: dgilbert, Md Haris Iqbal

A new bitmap to keep in track the pages that were lost in case there was a
network failure during postcopy.
The bitmap is used to req for those lost pages towards the end of migration.

Md Haris Iqbal (2):
  Migration: New bitmap for postcopy migration failure
  Migration: Request lost pages (due to n/w failure) from source

 include/migration/migration.h | 10 +++++
 migration/migration.c         | 10 ++++-
 migration/postcopy-ram.c      |  2 +-
 migration/ram.c               | 98 ++++++++++++++++++++++++++++++++++++++++++-
 migration/savevm.c            | 19 +++++++++
 5 files changed, 135 insertions(+), 4 deletions(-)

-- 
2.7.4

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [Qemu-devel] [PATCH 1/2] Migration: New bitmap for postcopy migration failure
  2016-08-16  0:25 [Qemu-devel] [PATCH 0/2] Bitmap for recovery of postcopy migration from n/w failure Md Haris Iqbal
@ 2016-08-16  0:25 ` Md Haris Iqbal
  2016-08-16  0:25 ` [Qemu-devel] [PATCH 2/2] Migration: Request lost pages (due to n/w failure) from source Md Haris Iqbal
  1 sibling, 0 replies; 4+ messages in thread
From: Md Haris Iqbal @ 2016-08-16  0:25 UTC (permalink / raw)
  To: qemu-devel; +Cc: dgilbert, Md Haris Iqbal

Signed-off-by: Md Haris Iqbal <haris.phnx@gmail.com>
---
 include/migration/migration.h |  3 +++
 migration/migration.c         |  8 ++++--
 migration/postcopy-ram.c      |  2 +-
 migration/ram.c               | 63 ++++++++++++++++++++++++++++++++++++++++++-
 4 files changed, 72 insertions(+), 4 deletions(-)

diff --git a/include/migration/migration.h b/include/migration/migration.h
index 74d456e..0a42b87 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -358,6 +358,9 @@ int ram_save_queue_pages(MigrationState *ms, const char *rbname,
 int qemu_migrate_postcopy_outgoing_recovery(MigrationState *ms);
 int qemu_migrate_postcopy_incoming_recovery(QEMUFile **f,MigrationIncomingState* mis);
 
+void migrate_incoming_ram_bitmap_init(void);
+void migrate_incoming_ram_bitmap_update(RAMBlock *rb, ram_addr_t addr);
+
 PostcopyState postcopy_state_get(void);
 /* Set the state and return the old state */
 PostcopyState postcopy_state_set(PostcopyState new_state);
diff --git a/migration/migration.c b/migration/migration.c
index 4edd77c..99138dd 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -381,6 +381,10 @@ static void process_incoming_migration_co(void *opaque)
     postcopy_state_set(POSTCOPY_INCOMING_NONE);
     migrate_set_state(&mis->state, MIGRATION_STATUS_NONE,
                       MIGRATION_STATUS_ACTIVE);
+
+    /* Initializing the bitmap for destination side */
+    migrate_incoming_ram_bitmap_init();
+
     ret = qemu_loadvm_state(f);
 
     ps = postcopy_state_get();
diff --git a/migration/ram.c b/migration/ram.c
index 815bc0e..4f16243 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -250,6 +250,13 @@ static struct BitmapRcu {
      * of the postcopy phase
      */
     unsigned long *unsentmap;
+    /*
+     * A new bitmap for postcopy network failure recovery.
+     * It keeps track of the pages recieved.
+     * In the end, it would be used to request pages that were
+     * lost due to network failure.
+     */
+    unsigned long *not_received;
 } *migration_bitmap_rcu;
 
 struct CompressParam {
@@ -2340,6 +2347,7 @@ static int ram_load_postcopy(QEMUFile *f)
         void *page_buffer = NULL;
         void *place_source = NULL;
         uint8_t ch;
+        RAMBlock* block = NULL;
 
         addr = qemu_get_be64(f);
         flags = addr & ~TARGET_PAGE_MASK;
@@ -2348,7 +2356,7 @@ static int ram_load_postcopy(QEMUFile *f)
         trace_ram_load_postcopy_loop((uint64_t)addr, flags);
         place_needed = false;
         if (flags & (RAM_SAVE_FLAG_COMPRESS | RAM_SAVE_FLAG_PAGE)) {
-            RAMBlock *block = ram_block_from_stream(f, flags);
+            block = ram_block_from_stream(f, flags);
 
             host = host_from_ram_block_offset(block, addr);
             if (!host) {
@@ -2436,6 +2444,15 @@ static int ram_load_postcopy(QEMUFile *f)
         if (!ret) {
             ret = qemu_file_get_error(f);
         }
+        if (block != NULL) {
+            /*
+             * TODO
+             * We need to delay updating the bits until host page is
+             * recieved and the place is done, or tidy up the bitmap later
+             * accordingly (whether whole host page was recieved or not)
+             */
+            migrate_incoming_ram_bitmap_update(block, addr);
+        }
     }
 
     return ret;
@@ -2483,6 +2500,16 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
             RAMBlock *block = ram_block_from_stream(f, flags);
 
             host = host_from_ram_block_offset(block, addr);
+
+            migrate_incoming_ram_bitmap_update(block, addr);
+            /*
+             * TODO
+             * 1) Do we need a bitmap_update call later in the while loop also?
+             * 2) We need to delay updating the bits until host page is
+             * recieved and the place is done, or tidy up the bitmap later
+             * accordingly (whether whole host page was recieved or not)
+             */
+
             if (!host) {
                 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
                 ret = -EINVAL;
@@ -2578,6 +2605,40 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
     return ret;
 }
 
+void migrate_incoming_ram_bitmap_init(void)
+{
+    int64_t ram_bitmap_pages; /* Size of bitmap in pages, including gaps */
+
+    /*
+     * A new bitmap for postcopy network failure recovery.
+     * It keeps track of the pages recieved.
+     * In the end, it would be used to request pages that were
+     * lost due to network failure.
+     */
+
+    ram_bitmap_pages = last_ram_offset() >> TARGET_PAGE_BITS;
+    migration_bitmap_rcu = g_new0(struct BitmapRcu, 1);
+    migration_bitmap_rcu->not_received = bitmap_new(ram_bitmap_pages);
+    bitmap_set(migration_bitmap_rcu->not_received, 0, ram_bitmap_pages);
+}
+
+void migrate_incoming_ram_bitmap_update(RAMBlock *rb, ram_addr_t addr)
+{
+    unsigned long base = rb->offset >> TARGET_PAGE_BITS;
+    unsigned long nr = base + (addr >> TARGET_PAGE_BITS);
+    unsigned long *bitmap;
+
+    bitmap = atomic_rcu_read(&migration_bitmap_rcu)->not_received;
+    clear_bit(nr, bitmap);
+
+    static int count = 0;
+    count++;
+    if(count == 1000) {
+        count = 0;
+        ram_debug_dump_bitmap(bitmap, true);
+    }
+}
+
 static SaveVMHandlers savevm_ram_handlers = {
     .save_live_setup = ram_save_setup,
     .save_live_iterate = ram_save_iterate,
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [Qemu-devel] [PATCH 2/2] Migration: Request lost pages (due to n/w failure) from source
  2016-08-16  0:25 [Qemu-devel] [PATCH 0/2] Bitmap for recovery of postcopy migration from n/w failure Md Haris Iqbal
  2016-08-16  0:25 ` [Qemu-devel] [PATCH 1/2] Migration: New bitmap for postcopy migration failure Md Haris Iqbal
@ 2016-08-16  0:25 ` Md Haris Iqbal
  2016-08-16 15:31   ` Dr. David Alan Gilbert
  1 sibling, 1 reply; 4+ messages in thread
From: Md Haris Iqbal @ 2016-08-16  0:25 UTC (permalink / raw)
  To: qemu-devel; +Cc: dgilbert, Md Haris Iqbal

Signed-off-by: Md Haris Iqbal <haris.phnx@gmail.com>
---
 include/migration/migration.h |  7 +++++++
 migration/migration.c         |  2 ++
 migration/ram.c               | 35 +++++++++++++++++++++++++++++++++++
 migration/savevm.c            | 19 +++++++++++++++++++
 4 files changed, 63 insertions(+)

diff --git a/include/migration/migration.h b/include/migration/migration.h
index 0a42b87..4c787ce 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -36,6 +36,7 @@
 #define QEMU_VM_CONFIGURATION        0x07
 #define QEMU_VM_COMMAND              0x08
 #define QEMU_VM_SECTION_FOOTER       0x7e
+#define QEMU_VM_ALMOST_COMPLETE      0x09
 
 struct MigrationParams {
     bool blk;
@@ -145,6 +146,11 @@ struct MigrationState
     int state;
     /* Old style params from 'migrate' command */
     MigrationParams params;
+    /*
+     * Don't need 2 variables for recovery.
+     * Clean this up, use a single variable with different states.
+     */
+    bool recovered_once;
     bool in_recovery;
 
     /* State related to return path */
@@ -360,6 +366,7 @@ int qemu_migrate_postcopy_incoming_recovery(QEMUFile **f,MigrationIncomingState*
 
 void migrate_incoming_ram_bitmap_init(void);
 void migrate_incoming_ram_bitmap_update(RAMBlock *rb, ram_addr_t addr);
+void *migrate_incoming_ram_req_pages(void *opaque);
 
 PostcopyState postcopy_state_get(void);
 /* Set the state and return the old state */
diff --git a/migration/migration.c b/migration/migration.c
index 99138dd..be24b69 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -1042,6 +1042,7 @@ MigrationState *migrate_init(const MigrationParams *params)
     s->xfer_limit = 0;
     s->cleanup_bh = 0;
     s->to_dst_file = NULL;
+    s->recovered_once = false;
     s->in_recovery = false;
     s->state = MIGRATION_STATUS_NONE;
     s->params = *params;
@@ -1918,6 +1919,7 @@ static void *migration_thread(void *opaque)
                 if(ret == 0) {
                     current_active_state = MIGRATION_STATUS_POSTCOPY_ACTIVE;
                     runstate_set(RUN_STATE_FINISH_MIGRATE);
+                    s->recovered_once = true;
                     qemu_file_clear_error(s->to_dst_file);
                     continue;
                 }
diff --git a/migration/ram.c b/migration/ram.c
index 4f16243..445b863 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -2639,6 +2639,41 @@ void migrate_incoming_ram_bitmap_update(RAMBlock *rb, ram_addr_t addr)
     }
 }
 
+void *migrate_incoming_ram_req_pages(void* opaque)
+{
+    MigrationIncomingState *mis = opaque;
+    struct RAMBlock *rb;
+    size_t hostpagesize = getpagesize();
+    uint64_t addr;
+    unsigned long base;
+    unsigned long nr;
+    unsigned long size;
+    unsigned long next;
+    unsigned long *not_received;
+
+    not_received = atomic_rcu_read(&migration_bitmap_rcu)->not_received;
+    QLIST_FOREACH_RCU(rb, &ram_list.blocks, next) {
+        addr = 0;
+        base = rb->offset >> TARGET_PAGE_BITS;
+        size = base + (rb->used_length >> TARGET_PAGE_BITS);
+        while (true) {
+            nr = base + (addr >> TARGET_PAGE_BITS);
+            next = find_next_bit(not_received, size, nr);
+            addr = (next - base) << TARGET_PAGE_BITS;
+
+            if (addr >= rb->used_length) {
+                break;
+            }
+            else {
+                migrate_send_rp_req_pages(mis, qemu_ram_get_idstr(rb),
+                                     addr, hostpagesize);
+                addr++;
+            }
+        }
+    }
+    return NULL;
+}
+
 static SaveVMHandlers savevm_ram_handlers = {
     .save_live_setup = ram_save_setup,
     .save_live_iterate = ram_save_iterate,
diff --git a/migration/savevm.c b/migration/savevm.c
index 5fa39c1..103f0b8 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -986,6 +986,12 @@ void qemu_savevm_state_complete_postcopy(QEMUFile *f)
 {
     SaveStateEntry *se;
     int ret;
+    MigrationState* ms = migrate_get_current();
+
+    if (ms->recovered_once == true) {
+        qemu_put_byte(f, QEMU_VM_ALMOST_COMPLETE);
+        qemu_fflush(f);
+    }
 
     QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
         if (!se->ops || !se->ops->save_live_complete_postcopy) {
@@ -1830,6 +1836,7 @@ static int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis)
     uint8_t section_type;
     int ret;
     PostcopyState ps;
+    QemuThread req_pages_not_received;
 
     while ((section_type = qemu_get_byte(f)) != QEMU_VM_EOF) {
 
@@ -1851,6 +1858,18 @@ static int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis)
                 return ret;
              }
             break;
+        case QEMU_VM_ALMOST_COMPLETE:
+            /*
+             * This case will only be used when migration recovers from a
+             * network failure during a postcopy migration.
+             * Now, send the requests for pages that were lost due to the
+             * network failure.
+             */
+            qemu_thread_create(&req_pages_not_received,
+                       "postcopy/req_pages_not_received",
+                       migrate_incoming_ram_req_pages, mis,
+                       QEMU_THREAD_DETACHED);
+            break;
         default:
             error_report("Unknown savevm section type %d", section_type);
             return -EINVAL;
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [Qemu-devel] [PATCH 2/2] Migration: Request lost pages (due to n/w failure) from source
  2016-08-16  0:25 ` [Qemu-devel] [PATCH 2/2] Migration: Request lost pages (due to n/w failure) from source Md Haris Iqbal
@ 2016-08-16 15:31   ` Dr. David Alan Gilbert
  0 siblings, 0 replies; 4+ messages in thread
From: Dr. David Alan Gilbert @ 2016-08-16 15:31 UTC (permalink / raw)
  To: Md Haris Iqbal; +Cc: qemu-devel


* Md Haris Iqbal (haris.phnx@gmail.com) wrote:
> Signed-off-by: Md Haris Iqbal <haris.phnx@gmail.com>
> ---
>  include/migration/migration.h |  7 +++++++
>  migration/migration.c         |  2 ++
>  migration/ram.c               | 35 +++++++++++++++++++++++++++++++++++
>  migration/savevm.c            | 19 +++++++++++++++++++
>  4 files changed, 63 insertions(+)

I think you probably want to split this patch into two parts:
  a) Sending the message that triggers the recovery
  b) The code that works through the bitmap and requests pages.

(and also put it together with your previous patches to make the whole series).

> diff --git a/include/migration/migration.h b/include/migration/migration.h
> index 0a42b87..4c787ce 100644
> --- a/include/migration/migration.h
> +++ b/include/migration/migration.h
> @@ -36,6 +36,7 @@
>  #define QEMU_VM_CONFIGURATION        0x07
>  #define QEMU_VM_COMMAND              0x08
>  #define QEMU_VM_SECTION_FOOTER       0x7e
> +#define QEMU_VM_ALMOST_COMPLETE      0x09

Lets keep this in numeric order; but I think you're
better using a QEMU_VM_COMMAND; e.g. allocate a new MIG_CMD_ and use that; e.g.
like MIG_CMD_OPEN_RETURN_PATH.  Also 'ALMOST_COMPLETE' is a bit odd; lets
call it POSTCOPY_RECOVERY.

>  struct MigrationParams {
>      bool blk;
> @@ -145,6 +146,11 @@ struct MigrationState
>      int state;
>      /* Old style params from 'migrate' command */
>      MigrationParams params;
> +    /*
> +     * Don't need 2 variables for recovery.
> +     * Clean this up, use a single variable with different states.
> +     */
> +    bool recovered_once;
>      bool in_recovery;
>  
>      /* State related to return path */
> @@ -360,6 +366,7 @@ int qemu_migrate_postcopy_incoming_recovery(QEMUFile **f,MigrationIncomingState*
>  
>  void migrate_incoming_ram_bitmap_init(void);
>  void migrate_incoming_ram_bitmap_update(RAMBlock *rb, ram_addr_t addr);
> +void *migrate_incoming_ram_req_pages(void *opaque);
>  
>  PostcopyState postcopy_state_get(void);
>  /* Set the state and return the old state */
> diff --git a/migration/migration.c b/migration/migration.c
> index 99138dd..be24b69 100644
> --- a/migration/migration.c
> +++ b/migration/migration.c
> @@ -1042,6 +1042,7 @@ MigrationState *migrate_init(const MigrationParams *params)
>      s->xfer_limit = 0;
>      s->cleanup_bh = 0;
>      s->to_dst_file = NULL;
> +    s->recovered_once = false;
>      s->in_recovery = false;
>      s->state = MIGRATION_STATUS_NONE;
>      s->params = *params;
> @@ -1918,6 +1919,7 @@ static void *migration_thread(void *opaque)
>                  if(ret == 0) {
>                      current_active_state = MIGRATION_STATUS_POSTCOPY_ACTIVE;
>                      runstate_set(RUN_STATE_FINISH_MIGRATE);
> +                    s->recovered_once = true;
>                      qemu_file_clear_error(s->to_dst_file);
>                      continue;
>                  }
> diff --git a/migration/ram.c b/migration/ram.c
> index 4f16243..445b863 100644
> --- a/migration/ram.c
> +++ b/migration/ram.c
> @@ -2639,6 +2639,41 @@ void migrate_incoming_ram_bitmap_update(RAMBlock *rb, ram_addr_t addr)
>      }
>  }
>  
> +void *migrate_incoming_ram_req_pages(void* opaque)
> +{
> +    MigrationIncomingState *mis = opaque;
> +    struct RAMBlock *rb;
> +    size_t hostpagesize = getpagesize();
> +    uint64_t addr;
> +    unsigned long base;
> +    unsigned long nr;
> +    unsigned long size;
> +    unsigned long next;
> +    unsigned long *not_received;
> +
> +    not_received = atomic_rcu_read(&migration_bitmap_rcu)->not_received;
> +    QLIST_FOREACH_RCU(rb, &ram_list.blocks, next) {
> +        addr = 0;
> +        base = rb->offset >> TARGET_PAGE_BITS;
> +        size = base + (rb->used_length >> TARGET_PAGE_BITS);

I think you can move those declarations down into the loop if you want;
also 'size' is an odd name there - I think that's really 'end'.

> +        while (true) {
> +            nr = base + (addr >> TARGET_PAGE_BITS);
> +            next = find_next_bit(not_received, size, nr);
> +            addr = (next - base) << TARGET_PAGE_BITS;

It feels like you should be able to do this with a bit less shifting;
if you just remember 'nr' between iterations rather than addr, I think you
save one set of shifts.

> +            if (addr >= rb->used_length) {
> +                break;
> +            }o
> +            else {
> +                migrate_send_rp_req_pages(mis, qemu_ram_get_idstr(rb),
> +                                     addr, hostpagesize);

Note that migrate_send_rp_req_pages can send a request for upto 4GB of space;
so you could search for the next 0 and then get a whole run of pages in one
message. (Note migrate_send_rp_req_pages really needs fixing to have the
right parameter to force it not take more than 4GB).

> +                addr++;
> +            }
> +        }
> +    }
> +    return NULL;
> +}
> +
>  static SaveVMHandlers savevm_ram_handlers = {
>      .save_live_setup = ram_save_setup,
>      .save_live_iterate = ram_save_iterate,
> diff --git a/migration/savevm.c b/migration/savevm.c
> index 5fa39c1..103f0b8 100644
> --- a/migration/savevm.c
> +++ b/migration/savevm.c
> @@ -986,6 +986,12 @@ void qemu_savevm_state_complete_postcopy(QEMUFile *f)
>  {
>      SaveStateEntry *se;
>      int ret;
> +    MigrationState* ms = migrate_get_current();

Style  is   MigrationState *ms

> +    if (ms->recovered_once == true) {

Don't need to == true  test

> +        qemu_put_byte(f, QEMU_VM_ALMOST_COMPLETE);
> +        qemu_fflush(f);
> +    }
>  
>      QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
>          if (!se->ops || !se->ops->save_live_complete_postcopy) {
> @@ -1830,6 +1836,7 @@ static int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis)
>      uint8_t section_type;
>      int ret;
>      PostcopyState ps;
> +    QemuThread req_pages_not_received;
>  
>      while ((section_type = qemu_get_byte(f)) != QEMU_VM_EOF) {
>  
> @@ -1851,6 +1858,18 @@ static int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis)
>                  return ret;
>               }
>              break;
> +        case QEMU_VM_ALMOST_COMPLETE:
> +            /*
> +             * This case will only be used when migration recovers from a
> +             * network failure during a postcopy migration.
> +             * Now, send the requests for pages that were lost due to the
> +             * network failure.
> +             */
> +            qemu_thread_create(&req_pages_not_received,
> +                       "postcopy/req_pages_not_received",

Note the thread name only goes upto 14characters; so a shorter
name is needed; e.g. "pc/recovery".

> +                       migrate_incoming_ram_req_pages, mis,
> +                       QEMU_THREAD_DETACHED);
> +            break;
>          default:
>              error_report("Unknown savevm section type %d", section_type);
>              return -EINVAL;
> -- 
> 2.7.4
> 
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2016-08-16 15:31 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-08-16  0:25 [Qemu-devel] [PATCH 0/2] Bitmap for recovery of postcopy migration from n/w failure Md Haris Iqbal
2016-08-16  0:25 ` [Qemu-devel] [PATCH 1/2] Migration: New bitmap for postcopy migration failure Md Haris Iqbal
2016-08-16  0:25 ` [Qemu-devel] [PATCH 2/2] Migration: Request lost pages (due to n/w failure) from source Md Haris Iqbal
2016-08-16 15:31   ` Dr. David Alan Gilbert

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.