All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH Remus v1 0/8] Remus support for Migration-v2
@ 2015-05-07  6:37 Yang Hongyang
  2015-05-07  6:37 ` [PATCH Remus v1 1/8] tools/libxc: adjust the memory allocation for migration Yang Hongyang
                   ` (8 more replies)
  0 siblings, 9 replies; 31+ messages in thread
From: Yang Hongyang @ 2015-05-07  6:37 UTC (permalink / raw)
  To: xen-devel
  Cc: wei.liu2, ian.campbell, wency, ian.jackson, yunhong.jiang,
	eddie.dong, rshriram

This patchset implement the Remus support for Migration v2 but without
memory compressing.

PATCH 1-5: Some refactor and prepare work.
PATCH 6-7: The main Remus loop implement.
PATCH 8: Fix for Remus.

Yang Hongyang (8):
  tools/libxc: adjust the memory allocation for migration
  tools/libxc: reuse send_some_pages() in send_all_pages()
  tools/libxc: introduce process_record()
  tools/libxc: split read/handle qemu info
  tools/libxc: defer the setting of HVM_PARAM_IDENT_PT
  tools/libxc: implement Remus checkpointed save
  tools/libxc: implement Remus checkpointed restore
  tools/libxc: X86_PV_INFO can be sent multiple times under Remus

 tools/libxc/include/xenguest.h      |   1 +
 tools/libxc/xc_bitops.h             |   5 +
 tools/libxc/xc_sr_common.h          |  17 ++++
 tools/libxc/xc_sr_restore.c         | 179 +++++++++++++++++++++++++++++-------
 tools/libxc/xc_sr_restore_x86_hvm.c |  44 ++++++++-
 tools/libxc/xc_sr_restore_x86_pv.c  |   2 +-
 tools/libxc/xc_sr_save.c            | 166 +++++++++++++++++----------------
 tools/libxl/libxl_dom.c             |   1 +
 8 files changed, 295 insertions(+), 120 deletions(-)

-- 
1.9.1

^ permalink raw reply	[flat|nested] 31+ messages in thread

* [PATCH Remus v1 1/8] tools/libxc: adjust the memory allocation for migration
  2015-05-07  6:37 [PATCH Remus v1 0/8] Remus support for Migration-v2 Yang Hongyang
@ 2015-05-07  6:37 ` Yang Hongyang
  2015-05-07  9:48   ` Andrew Cooper
  2015-05-07  6:37 ` [PATCH Remus v1 2/8] tools/libxc: reuse send_some_pages() in send_all_pages() Yang Hongyang
                   ` (7 subsequent siblings)
  8 siblings, 1 reply; 31+ messages in thread
From: Yang Hongyang @ 2015-05-07  6:37 UTC (permalink / raw)
  To: xen-devel
  Cc: wei.liu2, ian.campbell, wency, ian.jackson, yunhong.jiang,
	eddie.dong, rshriram

Move the memory allocation before the concrete live/nolive save
in order to avoid the free/alloc memory loop when using Remus.

Signed-off-by: Yang Hongyang <yanghy@cn.fujitsu.com>
---
 tools/libxc/xc_sr_save.c | 53 +++++++++++++++++++-----------------------------
 1 file changed, 21 insertions(+), 32 deletions(-)

diff --git a/tools/libxc/xc_sr_save.c b/tools/libxc/xc_sr_save.c
index 5d9c267..7fed668 100644
--- a/tools/libxc/xc_sr_save.c
+++ b/tools/libxc/xc_sr_save.c
@@ -3,6 +3,8 @@
 
 #include "xc_sr_common.h"
 
+DECLARE_HYPERCALL_BUFFER(unsigned long, to_send);
+
 /*
  * Writes an Image header and Domain header into the stream.
  */
@@ -475,25 +477,11 @@ static int update_progress_string(struct xc_sr_context *ctx,
 static int send_domain_memory_live(struct xc_sr_context *ctx)
 {
     xc_interface *xch = ctx->xch;
-    DECLARE_HYPERCALL_BUFFER(unsigned long, to_send);
     xc_shadow_op_stats_t stats = { 0, ctx->save.p2m_size };
     char *progress_str = NULL;
     unsigned x;
     int rc = -1;
 
-    to_send = xc_hypercall_buffer_alloc_pages(
-        xch, to_send, NRPAGES(bitmap_size(ctx->save.p2m_size)));
-
-    ctx->save.batch_pfns = malloc(MAX_BATCH_SIZE *
-                                  sizeof(*ctx->save.batch_pfns));
-    ctx->save.deferred_pages = calloc(1, bitmap_size(ctx->save.p2m_size));
-
-    if ( !ctx->save.batch_pfns || !to_send || !ctx->save.deferred_pages )
-    {
-        ERROR("Unable to allocate memory for to_{send,fix}/batch bitmaps");
-        goto out;
-    }
-
     rc = enable_logdirty(ctx);
     if ( rc )
         goto out;
@@ -593,10 +581,6 @@ static int send_domain_memory_live(struct xc_sr_context *ctx)
   out:
     xc_set_progress_prefix(xch, NULL);
     free(progress_str);
-    xc_hypercall_buffer_free_pages(xch, to_send,
-                                   NRPAGES(bitmap_size(ctx->save.p2m_size)));
-    free(ctx->save.deferred_pages);
-    free(ctx->save.batch_pfns);
     return rc;
 }
 
@@ -609,17 +593,6 @@ static int send_domain_memory_nonlive(struct xc_sr_context *ctx)
     xc_interface *xch = ctx->xch;
     int rc = -1;
 
-    ctx->save.batch_pfns = malloc(MAX_BATCH_SIZE *
-                                  sizeof(*ctx->save.batch_pfns));
-    ctx->save.deferred_pages = calloc(1, bitmap_size(ctx->save.p2m_size));
-
-    if ( !ctx->save.batch_pfns || !ctx->save.deferred_pages )
-    {
-        PERROR("Failed to allocate memory for nonlive tracking structures");
-        errno = ENOMEM;
-        goto err;
-    }
-
     rc = suspend_domain(ctx);
     if ( rc )
         goto err;
@@ -631,9 +604,6 @@ static int send_domain_memory_nonlive(struct xc_sr_context *ctx)
         goto err;
 
  err:
-    free(ctx->save.deferred_pages);
-    free(ctx->save.batch_pfns);
-
     return rc;
 }
 
@@ -645,6 +615,20 @@ static int save(struct xc_sr_context *ctx, uint16_t guest_type)
     xc_interface *xch = ctx->xch;
     int rc, saved_rc = 0, saved_errno = 0;
 
+    to_send = xc_hypercall_buffer_alloc_pages(
+                        xch, to_send, NRPAGES(bitmap_size(ctx->save.p2m_size)));
+    ctx->save.batch_pfns = malloc(MAX_BATCH_SIZE *
+                                  sizeof(*ctx->save.batch_pfns));
+    ctx->save.deferred_pages = calloc(1, bitmap_size(ctx->save.p2m_size));
+
+    if ( !ctx->save.batch_pfns || !to_send || !ctx->save.deferred_pages )
+    {
+        ERROR("Unable to allocate memory for to_{send,fix}/batch bitmaps");
+        rc = -1;
+        errno = ENOMEM;
+        goto err;
+    }
+
     IPRINTF("Saving domain %d, type %s",
             ctx->domid, dhdr_type_to_str(guest_type));
 
@@ -704,6 +688,11 @@ static int save(struct xc_sr_context *ctx, uint16_t guest_type)
     if ( rc )
         PERROR("Failed to clean up");
 
+    xc_hypercall_buffer_free_pages(xch, to_send,
+                                   NRPAGES(bitmap_size(ctx->save.p2m_size)));
+    free(ctx->save.deferred_pages);
+    free(ctx->save.batch_pfns);
+
     if ( saved_rc )
     {
         rc = saved_rc;
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH Remus v1 2/8] tools/libxc: reuse send_some_pages() in send_all_pages()
  2015-05-07  6:37 [PATCH Remus v1 0/8] Remus support for Migration-v2 Yang Hongyang
  2015-05-07  6:37 ` [PATCH Remus v1 1/8] tools/libxc: adjust the memory allocation for migration Yang Hongyang
@ 2015-05-07  6:37 ` Yang Hongyang
  2015-05-07 10:08   ` Andrew Cooper
  2015-05-07  6:37 ` [PATCH Remus v1 3/8] tools/libxc: introduce process_record() Yang Hongyang
                   ` (6 subsequent siblings)
  8 siblings, 1 reply; 31+ messages in thread
From: Yang Hongyang @ 2015-05-07  6:37 UTC (permalink / raw)
  To: xen-devel
  Cc: wei.liu2, ian.campbell, wency, ian.jackson, yunhong.jiang,
	eddie.dong, rshriram

introduce bitmap_set() to set the entire bitmap.
in send_all_pages(), set the entire bitmap and call send_some_pages().

Signed-off-by: Yang Hongyang <yanghy@cn.fujitsu.com>
---
 tools/libxc/xc_bitops.h  |  5 +++++
 tools/libxc/xc_sr_save.c | 41 +++++++++++------------------------------
 2 files changed, 16 insertions(+), 30 deletions(-)

diff --git a/tools/libxc/xc_bitops.h b/tools/libxc/xc_bitops.h
index dfce3b8..cd749f4 100644
--- a/tools/libxc/xc_bitops.h
+++ b/tools/libxc/xc_bitops.h
@@ -26,6 +26,11 @@ static inline unsigned long *bitmap_alloc(int nr_bits)
     return calloc(1, bitmap_size(nr_bits));
 }
 
+static inline void bitmap_set(unsigned long *addr, int nr_bits)
+{
+    memset(addr, 0xff, bitmap_size(nr_bits));
+}
+
 static inline void bitmap_clear(unsigned long *addr, int nr_bits)
 {
     memset(addr, 0, bitmap_size(nr_bits));
diff --git a/tools/libxc/xc_sr_save.c b/tools/libxc/xc_sr_save.c
index 7fed668..2993ec3 100644
--- a/tools/libxc/xc_sr_save.c
+++ b/tools/libxc/xc_sr_save.c
@@ -346,36 +346,6 @@ static int suspend_domain(struct xc_sr_context *ctx)
 }
 
 /*
- * Send all pages in the guests p2m.  Used as the first iteration of the live
- * migration loop, and for a non-live save.
- */
-static int send_all_pages(struct xc_sr_context *ctx)
-{
-    xc_interface *xch = ctx->xch;
-    xen_pfn_t p;
-    int rc;
-
-    for ( p = 0; p < ctx->save.p2m_size; ++p )
-    {
-        rc = add_to_batch(ctx, p);
-        if ( rc )
-            return rc;
-
-        /* Update progress every 4MB worth of memory sent. */
-        if ( (p & ((1U << (22 - 12)) - 1)) == 0 )
-            xc_report_progress_step(xch, p, ctx->save.p2m_size);
-    }
-
-    rc = flush_batch(ctx);
-    if ( rc )
-        return rc;
-
-    xc_report_progress_step(xch, ctx->save.p2m_size,
-                            ctx->save.p2m_size);
-    return 0;
-}
-
-/*
  * Send a subset of pages in the guests p2m, according to the provided bitmap.
  * Used for each subsequent iteration of the live migration loop.
  *
@@ -417,6 +387,17 @@ static int send_some_pages(struct xc_sr_context *ctx,
     return 0;
 }
 
+/*
+ * Send all pages in the guests p2m.  Used as the first iteration of the live
+ * migration loop, and for a non-live save.
+ */
+static int send_all_pages(struct xc_sr_context *ctx)
+{
+    bitmap_set(to_send, ctx->save.p2m_size);
+
+    return send_some_pages(ctx, to_send, ctx->save.p2m_size);
+}
+
 static int enable_logdirty(struct xc_sr_context *ctx)
 {
     xc_interface *xch = ctx->xch;
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH Remus v1 3/8] tools/libxc: introduce process_record()
  2015-05-07  6:37 [PATCH Remus v1 0/8] Remus support for Migration-v2 Yang Hongyang
  2015-05-07  6:37 ` [PATCH Remus v1 1/8] tools/libxc: adjust the memory allocation for migration Yang Hongyang
  2015-05-07  6:37 ` [PATCH Remus v1 2/8] tools/libxc: reuse send_some_pages() in send_all_pages() Yang Hongyang
@ 2015-05-07  6:37 ` Yang Hongyang
  2015-05-07 10:12   ` Andrew Cooper
  2015-05-07  6:37 ` [PATCH Remus v1 4/8] tools/libxc: split read/handle qemu info Yang Hongyang
                   ` (5 subsequent siblings)
  8 siblings, 1 reply; 31+ messages in thread
From: Yang Hongyang @ 2015-05-07  6:37 UTC (permalink / raw)
  To: xen-devel
  Cc: wei.liu2, ian.campbell, wency, ian.jackson, yunhong.jiang,
	eddie.dong, rshriram

Move record handle codes into a function process_record().
It will be used multiple times by Remus.
No functional change.

Signed-off-by: Yang Hongyang <yanghy@cn.fujitsu.com>
---
 tools/libxc/xc_sr_restore.c | 77 +++++++++++++++++++++++++--------------------
 1 file changed, 43 insertions(+), 34 deletions(-)

diff --git a/tools/libxc/xc_sr_restore.c b/tools/libxc/xc_sr_restore.c
index 0bf4bae..53bd674 100644
--- a/tools/libxc/xc_sr_restore.c
+++ b/tools/libxc/xc_sr_restore.c
@@ -468,6 +468,48 @@ static int handle_page_data(struct xc_sr_context *ctx, struct xc_sr_record *rec)
     return rc;
 }
 
+static int process_record(struct xc_sr_context *ctx, struct xc_sr_record *rec)
+{
+    xc_interface *xch = ctx->xch;
+    int rc = 0;
+
+    switch ( rec->type )
+    {
+    case REC_TYPE_END:
+        break;
+
+    case REC_TYPE_PAGE_DATA:
+        rc = handle_page_data(ctx, rec);
+        break;
+
+    case REC_TYPE_VERIFY:
+        DPRINTF("Verify mode enabled");
+        ctx->restore.verify = true;
+        break;
+
+    default:
+        rc = ctx->restore.ops.process_record(ctx, rec);
+        break;
+    }
+
+    free(rec->data);
+
+    if ( rc == RECORD_NOT_PROCESSED )
+    {
+        if ( rec->type & REC_TYPE_OPTIONAL )
+            DPRINTF("Ignoring optional record %#x (%s)",
+                    rec->type, rec_type_to_str(rec->type));
+        else
+        {
+            ERROR("Mandatory record %#x (%s) not handled",
+                  rec->type, rec_type_to_str(rec->type));
+            rc = -1;
+        }
+    }
+
+    return rc;
+}
+
 /*
  * Restore a domain.
  */
@@ -498,40 +540,7 @@ static int restore(struct xc_sr_context *ctx)
         if ( rc )
             goto err;
 
-        switch ( rec.type )
-        {
-        case REC_TYPE_END:
-            break;
-
-        case REC_TYPE_PAGE_DATA:
-            rc = handle_page_data(ctx, &rec);
-            break;
-
-        case REC_TYPE_VERIFY:
-            DPRINTF("Verify mode enabled");
-            ctx->restore.verify = true;
-            break;
-
-        default:
-            rc = ctx->restore.ops.process_record(ctx, &rec);
-            break;
-        }
-
-        free(rec.data);
-
-        if ( rc == RECORD_NOT_PROCESSED )
-        {
-            if ( rec.type & REC_TYPE_OPTIONAL )
-                DPRINTF("Ignoring optional record %#x (%s)",
-                        rec.type, rec_type_to_str(rec.type));
-            else
-            {
-                ERROR("Mandatory record %#x (%s) not handled",
-                      rec.type, rec_type_to_str(rec.type));
-                rc = -1;
-            }
-        }
-
+        rc = process_record(ctx, &rec);
         if ( rc )
             goto err;
 
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH Remus v1 4/8] tools/libxc: split read/handle qemu info
  2015-05-07  6:37 [PATCH Remus v1 0/8] Remus support for Migration-v2 Yang Hongyang
                   ` (2 preceding siblings ...)
  2015-05-07  6:37 ` [PATCH Remus v1 3/8] tools/libxc: introduce process_record() Yang Hongyang
@ 2015-05-07  6:37 ` Yang Hongyang
  2015-05-07 10:48   ` Andrew Cooper
  2015-05-07  6:37 ` [PATCH Remus v1 5/8] tools/libxc: defer the setting of HVM_PARAM_IDENT_PT Yang Hongyang
                   ` (4 subsequent siblings)
  8 siblings, 1 reply; 31+ messages in thread
From: Yang Hongyang @ 2015-05-07  6:37 UTC (permalink / raw)
  To: xen-devel
  Cc: wei.liu2, ian.campbell, wency, ian.jackson, yunhong.jiang,
	eddie.dong, rshriram

Split read/handle qemu info. The receiving of qemu info
should be done while we receive the migration stream,
handle_qemu will be called when the stream complete.
Otherwise, it will break Remus because read_record()
won't read qemu info and stream_complete will be called
at failover.

Signed-off-by: Yang Hongyang <yanghy@cn.fujitsu.com>
---
 tools/libxc/xc_sr_common.h          |  5 +++++
 tools/libxc/xc_sr_restore.c         | 12 ++++++++++++
 tools/libxc/xc_sr_restore_x86_hvm.c | 28 +++++++++++++++++++++++++---
 3 files changed, 42 insertions(+), 3 deletions(-)

diff --git a/tools/libxc/xc_sr_common.h b/tools/libxc/xc_sr_common.h
index ef42412..6f099b8 100644
--- a/tools/libxc/xc_sr_common.h
+++ b/tools/libxc/xc_sr_common.h
@@ -279,6 +279,11 @@ struct xc_sr_context
                     /* HVM context blob. */
                     void *context;
                     size_t contextsz;
+
+#ifdef XG_LIBXL_HVM_COMPAT
+                    uint32_t qlen;
+                    void *qbuf;
+#endif
                 } restore;
             };
         } x86_hvm;
diff --git a/tools/libxc/xc_sr_restore.c b/tools/libxc/xc_sr_restore.c
index 53bd674..8022c3d 100644
--- a/tools/libxc/xc_sr_restore.c
+++ b/tools/libxc/xc_sr_restore.c
@@ -510,6 +510,9 @@ static int process_record(struct xc_sr_context *ctx, struct xc_sr_record *rec)
     return rc;
 }
 
+#ifdef XG_LIBXL_HVM_COMPAT
+extern int read_qemu(struct xc_sr_context *ctx);
+#endif
 /*
  * Restore a domain.
  */
@@ -546,6 +549,15 @@ static int restore(struct xc_sr_context *ctx)
 
     } while ( rec.type != REC_TYPE_END );
 
+#ifdef XG_LIBXL_HVM_COMPAT
+    if ( ctx->dominfo.hvm )
+    {
+        rc = read_qemu(ctx);
+        if ( rc )
+            goto err;
+    }
+#endif
+
     rc = ctx->restore.ops.stream_complete(ctx);
     if ( rc )
         goto err;
diff --git a/tools/libxc/xc_sr_restore_x86_hvm.c b/tools/libxc/xc_sr_restore_x86_hvm.c
index 6e9b318..6f5af0e 100644
--- a/tools/libxc/xc_sr_restore_x86_hvm.c
+++ b/tools/libxc/xc_sr_restore_x86_hvm.c
@@ -94,14 +94,14 @@ static int handle_hvm_params(struct xc_sr_context *ctx,
 }
 
 #ifdef XG_LIBXL_HVM_COMPAT
-static int handle_qemu(struct xc_sr_context *ctx)
+int read_qemu(struct xc_sr_context *ctx);
+int read_qemu(struct xc_sr_context *ctx)
 {
     xc_interface *xch = ctx->xch;
-    char qemusig[21], path[256];
+    char qemusig[21];
     uint32_t qlen;
     void *qbuf = NULL;
     int rc = -1;
-    FILE *fp = NULL;
 
     if ( read_exact(ctx->fd, qemusig, sizeof(qemusig)) )
     {
@@ -137,6 +137,28 @@ static int handle_qemu(struct xc_sr_context *ctx)
         goto out;
     }
 
+    /* With Remus, this could be read many times */
+    if ( ctx->x86_hvm.restore.qbuf )
+        free(ctx->x86_hvm.restore.qbuf);
+    ctx->x86_hvm.restore.qbuf = qbuf;
+    ctx->x86_hvm.restore.qlen = qlen;
+    rc = 0;
+
+out:
+    if (rc)
+        free(qbuf);
+    return rc;
+}
+
+static int handle_qemu(struct xc_sr_context *ctx)
+{
+    xc_interface *xch = ctx->xch;
+    char path[256];
+    uint32_t qlen = ctx->x86_hvm.restore.qlen;
+    void *qbuf = ctx->x86_hvm.restore.qbuf;
+    int rc = -1;
+    FILE *fp = NULL;
+
     sprintf(path, XC_DEVICE_MODEL_RESTORE_FILE".%u", ctx->domid);
     fp = fopen(path, "wb");
     if ( !fp )
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH Remus v1 5/8] tools/libxc: defer the setting of HVM_PARAM_IDENT_PT
  2015-05-07  6:37 [PATCH Remus v1 0/8] Remus support for Migration-v2 Yang Hongyang
                   ` (3 preceding siblings ...)
  2015-05-07  6:37 ` [PATCH Remus v1 4/8] tools/libxc: split read/handle qemu info Yang Hongyang
@ 2015-05-07  6:37 ` Yang Hongyang
  2015-05-07 10:35   ` Andrew Cooper
  2015-05-07  6:37 ` [PATCH Remus v1 6/8] tools/libxc: implement Remus checkpointed save Yang Hongyang
                   ` (3 subsequent siblings)
  8 siblings, 1 reply; 31+ messages in thread
From: Yang Hongyang @ 2015-05-07  6:37 UTC (permalink / raw)
  To: xen-devel
  Cc: wei.liu2, ian.campbell, wency, ian.jackson, yunhong.jiang,
	eddie.dong, rshriram

Set the hvm param HVM_PARAM_IDENT_PT second time will fail.
So defer the setting of HVM_PARAM_IDENT_PT.

Signed-off-by: Yang Hongyang <yanghy@cn.fujitsu.com>
---
 tools/libxc/xc_sr_common.h          |  3 +++
 tools/libxc/xc_sr_restore_x86_hvm.c | 16 ++++++++++++++++
 2 files changed, 19 insertions(+)

diff --git a/tools/libxc/xc_sr_common.h b/tools/libxc/xc_sr_common.h
index 6f099b8..f85eb41 100644
--- a/tools/libxc/xc_sr_common.h
+++ b/tools/libxc/xc_sr_common.h
@@ -280,6 +280,9 @@ struct xc_sr_context
                     void *context;
                     size_t contextsz;
 
+                    /* HVM_PARAM_IDENT_PT */
+                    xen_pfn_t ident_pt;
+
 #ifdef XG_LIBXL_HVM_COMPAT
                     uint32_t qlen;
                     void *qbuf;
diff --git a/tools/libxc/xc_sr_restore_x86_hvm.c b/tools/libxc/xc_sr_restore_x86_hvm.c
index 6f5af0e..929bee8 100644
--- a/tools/libxc/xc_sr_restore_x86_hvm.c
+++ b/tools/libxc/xc_sr_restore_x86_hvm.c
@@ -76,6 +76,14 @@ static int handle_hvm_params(struct xc_sr_context *ctx,
             ctx->restore.xenstore_gfn = entry->value;
             xc_clear_domain_page(xch, ctx->domid, entry->value);
             break;
+        case HVM_PARAM_IDENT_PT:
+            /*
+             * With Remus, we need to buffer this param because this
+             * can not be set multiple times, we will set this param
+             * at x86_hvm_stream_complete()
+             */
+            ctx->x86_hvm.restore.ident_pt = entry->value;
+            continue;
         case HVM_PARAM_IOREQ_PFN:
         case HVM_PARAM_BUFIOREQ_PFN:
             xc_clear_domain_page(xch, ctx->domid, entry->value);
@@ -278,6 +286,14 @@ static int x86_hvm_stream_complete(struct xc_sr_context *ctx)
     xc_interface *xch = ctx->xch;
     int rc;
 
+    rc = xc_hvm_param_set(xch, ctx->domid, HVM_PARAM_IDENT_PT,
+                          ctx->x86_hvm.restore.ident_pt);
+    if ( rc )
+    {
+        PERROR("Failed to set HVM_PARAM_IDENT_PT");
+        return rc;
+    }
+
     rc = xc_hvm_param_set(xch, ctx->domid, HVM_PARAM_STORE_EVTCHN,
                           ctx->restore.xenstore_evtchn);
     if ( rc )
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH Remus v1 6/8] tools/libxc: implement Remus checkpointed save
  2015-05-07  6:37 [PATCH Remus v1 0/8] Remus support for Migration-v2 Yang Hongyang
                   ` (4 preceding siblings ...)
  2015-05-07  6:37 ` [PATCH Remus v1 5/8] tools/libxc: defer the setting of HVM_PARAM_IDENT_PT Yang Hongyang
@ 2015-05-07  6:37 ` Yang Hongyang
  2015-05-07  6:37 ` [PATCH Remus v1 7/8] tools/libxc: implement Remus checkpointed restore Yang Hongyang
                   ` (2 subsequent siblings)
  8 siblings, 0 replies; 31+ messages in thread
From: Yang Hongyang @ 2015-05-07  6:37 UTC (permalink / raw)
  To: xen-devel
  Cc: wei.liu2, ian.campbell, wency, ian.jackson, yunhong.jiang,
	eddie.dong, rshriram

With Remus, the save flow should be:
live migration->{ periodically save(checkpointed save) }

Signed-off-by: Yang Hongyang <yanghy@cn.fujitsu.com>
---
 tools/libxc/include/xenguest.h |  1 +
 tools/libxc/xc_sr_common.h     |  6 ++++
 tools/libxc/xc_sr_save.c       | 74 +++++++++++++++++++++++++++++-------------
 tools/libxl/libxl_dom.c        |  1 +
 4 files changed, 59 insertions(+), 23 deletions(-)

diff --git a/tools/libxc/include/xenguest.h b/tools/libxc/include/xenguest.h
index 8e39075..7581263 100644
--- a/tools/libxc/include/xenguest.h
+++ b/tools/libxc/include/xenguest.h
@@ -30,6 +30,7 @@
 #define XCFLAGS_HVM       (1 << 2)
 #define XCFLAGS_STDVGA    (1 << 3)
 #define XCFLAGS_CHECKPOINT_COMPRESS    (1 << 4)
+#define XCFLAGS_CHECKPOINTED    (1 << 5)
 
 #define X86_64_B_SIZE   64 
 #define X86_32_B_SIZE   32
diff --git a/tools/libxc/xc_sr_common.h b/tools/libxc/xc_sr_common.h
index f85eb41..1aab5aa 100644
--- a/tools/libxc/xc_sr_common.h
+++ b/tools/libxc/xc_sr_common.h
@@ -172,6 +172,12 @@ struct xc_sr_context
             /* Further debugging information in the stream. */
             bool debug;
 
+            /*
+             * Whether it is a checkpointed save, we'll enter checkpointed
+             * save after live migration under Remus.
+             */
+            bool checkpointed;
+
             /* Parameters for tweaking live migration. */
             unsigned max_iterations;
             unsigned dirty_threshold;
diff --git a/tools/libxc/xc_sr_save.c b/tools/libxc/xc_sr_save.c
index 2993ec3..a11f558 100644
--- a/tools/libxc/xc_sr_save.c
+++ b/tools/libxc/xc_sr_save.c
@@ -463,6 +463,9 @@ static int send_domain_memory_live(struct xc_sr_context *ctx)
     unsigned x;
     int rc = -1;
 
+    if ( !ctx->save.live )
+        goto last_iter;
+
     rc = enable_logdirty(ctx);
     if ( rc )
         goto out;
@@ -501,6 +504,7 @@ static int send_domain_memory_live(struct xc_sr_context *ctx)
             goto out;
     }
 
+ last_iter:
     rc = suspend_domain(ctx);
     if ( rc )
         goto out;
@@ -623,35 +627,52 @@ static int save(struct xc_sr_context *ctx, uint16_t guest_type)
     if ( rc )
         goto err;
 
-    rc = ctx->save.ops.start_of_stream(ctx);
-    if ( rc )
-        goto err;
+    do {
+        rc = ctx->save.ops.start_of_stream(ctx);
+        if ( rc )
+            goto err;
 
-    if ( ctx->save.live )
-        rc = send_domain_memory_live(ctx);
-    else
-        rc = send_domain_memory_nonlive(ctx);
+        if ( ctx->save.live || ctx->save.checkpointed )
+            rc = send_domain_memory_live(ctx);
+        else
+            rc = send_domain_memory_nonlive(ctx);
 
-    if ( rc )
-        goto err;
+        if ( rc )
+            goto err;
 
-    if ( !ctx->dominfo.shutdown ||
-         (ctx->dominfo.shutdown_reason != SHUTDOWN_suspend) )
-    {
-        ERROR("Domain has not been suspended");
-        rc = -1;
-        goto err;
-    }
+        if ( !ctx->dominfo.shutdown ||
+             (ctx->dominfo.shutdown_reason != SHUTDOWN_suspend) )
+        {
+            ERROR("Domain has not been suspended");
+            rc = -1;
+            goto err;
+        }
 
-    xc_report_progress_single(xch, "End of stream");
+        xc_report_progress_single(xch, "End of stream");
 
-    rc = ctx->save.ops.end_of_stream(ctx);
-    if ( rc )
-        goto err;
+        rc = ctx->save.ops.end_of_stream(ctx);
+        if ( rc )
+            goto err;
 
-    rc = write_end_record(ctx);
-    if ( rc )
-        goto err;
+        rc = write_end_record(ctx);
+        if ( rc )
+            goto err;
+
+        if ( ctx->save.live ) {
+            /* End of live migration */
+            ctx->save.live = false;
+        }
+
+        if ( ctx->save.checkpointed ) {
+            ctx->save.callbacks->postcopy(ctx->save.callbacks->data);
+
+            rc = ctx->save.callbacks->checkpoint(ctx->save.callbacks->data);
+            if ( rc > 0 )
+                IPRINTF("Checkpointed save");
+            else
+                ctx->save.checkpointed = false;
+        }
+    } while ( ctx->save.checkpointed );
 
     xc_report_progress_single(xch, "Complete");
     goto done;
@@ -698,6 +719,13 @@ int xc_domain_save2(xc_interface *xch, int io_fd, uint32_t dom,
     ctx.save.callbacks = callbacks;
     ctx.save.live  = !!(flags & XCFLAGS_LIVE);
     ctx.save.debug = !!(flags & XCFLAGS_DEBUG);
+    ctx.save.checkpointed = !!(flags & XCFLAGS_CHECKPOINTED);
+
+    if ( ctx.save.checkpointed ) {
+        /* This is a checkpointed save, we need these callbacks */
+        assert(ctx.save.callbacks->postcopy);
+        assert(ctx.save.callbacks->checkpoint);
+    }
 
     /*
      * TODO: Find some time to better tweak the live migration algorithm.
diff --git a/tools/libxl/libxl_dom.c b/tools/libxl/libxl_dom.c
index f408646..a0c9850 100644
--- a/tools/libxl/libxl_dom.c
+++ b/tools/libxl/libxl_dom.c
@@ -2003,6 +2003,7 @@ void libxl__domain_suspend(libxl__egc *egc, libxl__domain_suspend_state *dss)
 
     if (r_info != NULL) {
         dss->interval = r_info->interval;
+        dss->xcflags |= XCFLAGS_CHECKPOINTED;
         if (libxl_defbool_val(r_info->compression))
             dss->xcflags |= XCFLAGS_CHECKPOINT_COMPRESS;
     }
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH Remus v1 7/8] tools/libxc: implement Remus checkpointed restore
  2015-05-07  6:37 [PATCH Remus v1 0/8] Remus support for Migration-v2 Yang Hongyang
                   ` (5 preceding siblings ...)
  2015-05-07  6:37 ` [PATCH Remus v1 6/8] tools/libxc: implement Remus checkpointed save Yang Hongyang
@ 2015-05-07  6:37 ` Yang Hongyang
  2015-05-07  6:37 ` [PATCH Remus v1 8/8] tools/libxc: X86_PV_INFO can be sent multiple times under Remus Yang Hongyang
  2015-05-07  7:04 ` [PATCH Remus v1 0/8] Remus support for Migration-v2 Hongyang Yang
  8 siblings, 0 replies; 31+ messages in thread
From: Yang Hongyang @ 2015-05-07  6:37 UTC (permalink / raw)
  To: xen-devel
  Cc: wei.liu2, ian.campbell, wency, ian.jackson, yunhong.jiang,
	eddie.dong, rshriram

With Remus, the restore flow should be:
the first full migration stream -> { periodically restore stream }
on failover:
  stream_complete()

Signed-off-by: Yang Hongyang <yanghy@cn.fujitsu.com>
---
 tools/libxc/xc_sr_common.h  |   3 ++
 tools/libxc/xc_sr_restore.c | 108 +++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 104 insertions(+), 7 deletions(-)

diff --git a/tools/libxc/xc_sr_common.h b/tools/libxc/xc_sr_common.h
index 1aab5aa..4da0a19 100644
--- a/tools/libxc/xc_sr_common.h
+++ b/tools/libxc/xc_sr_common.h
@@ -195,6 +195,9 @@ struct xc_sr_context
             struct xc_sr_restore_ops ops;
             struct restore_callbacks *callbacks;
 
+            /* Whether it is a checkpointed stream */
+            bool checkpointed;
+
             /* From Image Header. */
             uint32_t format_version;
 
diff --git a/tools/libxc/xc_sr_restore.c b/tools/libxc/xc_sr_restore.c
index 8022c3d..fceb5c7 100644
--- a/tools/libxc/xc_sr_restore.c
+++ b/tools/libxc/xc_sr_restore.c
@@ -3,6 +3,15 @@
 #include "xc_sr_common.h"
 
 /*
+ * With Remus, we buffer the records sent by primary at checkpoint,
+ * in case the primary will fail, we can recover from the last
+ * checkpoint state.
+ * This should be enough because primary only send dirty pages at
+ * checkpoint.
+ */
+#define MAX_RECORDS 1024
+
+/*
  * Read and validate the Image and Domain headers.
  */
 static int read_headers(struct xc_sr_context *ctx)
@@ -519,8 +528,11 @@ extern int read_qemu(struct xc_sr_context *ctx);
 static int restore(struct xc_sr_context *ctx)
 {
     xc_interface *xch = ctx->xch;
-    struct xc_sr_record rec;
-    int rc, saved_rc = 0, saved_errno = 0;
+    struct xc_sr_record *rec;
+    int rc, saved_rc = 0, saved_errno = 0, i;
+    struct xc_sr_record *records[MAX_RECORDS];
+
+    memset(records, 0, MAX_RECORDS*sizeof(void *));
 
     IPRINTF("Restoring domain");
 
@@ -537,17 +549,29 @@ static int restore(struct xc_sr_context *ctx)
         goto err;
     }
 
+    rec = malloc(sizeof(struct xc_sr_record));
+    if (!rec)
+    {
+        ERROR("Unable to allocate memory for record");
+        rc = -1;
+        goto err;
+    }
     do
     {
-        rc = read_record(ctx, &rec);
-        if ( rc )
+        rc = read_record(ctx, rec);
+        if ( rc ) {
+            free(rec);
             goto err;
+        }
 
-        rc = process_record(ctx, &rec);
-        if ( rc )
+        rc = process_record(ctx, rec);
+        if ( rc ) {
+            free(rec);
             goto err;
+        }
 
-    } while ( rec.type != REC_TYPE_END );
+    } while ( rec->type != REC_TYPE_END );
+    free(rec);
 
 #ifdef XG_LIBXL_HVM_COMPAT
     if ( ctx->dominfo.hvm )
@@ -557,7 +581,69 @@ static int restore(struct xc_sr_context *ctx)
             goto err;
     }
 #endif
+    /*
+     * If it is a live migration, this is the end of the live migration
+     * stream
+     * With Remus, we will enter a loop to receive the stream periodically
+     * sent by primary.
+     */
+
+    while ( ctx->restore.checkpointed )
+    {
+        i = 0;
+        do
+        {
+            /* Buffer records */
+            if ( i >= MAX_RECORDS )
+            {
+                ERROR("There are too many records");
+                rc = -1;
+                goto err;
+            }
 
+            rec = malloc(sizeof(struct xc_sr_record));
+            if (!rec)
+            {
+                ERROR("Unable to allocate memory for record");
+                rc = -1;
+                goto err;
+            }
+            rc = read_record(ctx, rec);
+            if ( rc )
+                goto err_buf;
+
+            records[i++] = rec;
+        } while ( rec->type != REC_TYPE_END );
+#ifdef XG_LIBXL_HVM_COMPAT
+        if ( ctx->dominfo.hvm )
+        {
+            rc = read_qemu(ctx);
+            if ( rc )
+                goto err_buf;
+        }
+#endif
+        IPRINTF("All records buffered");
+
+        i = 0;
+        rec = records[i++];
+        while (rec)
+        {
+            rc = process_record(ctx, rec);
+            free(rec);
+            records[i-1] = NULL;
+            if ( rc )
+                goto err;
+
+            rec = records[i++];
+        }
+        IPRINTF("All records processed");
+    }
+
+ err_buf:
+    /*
+     * With Remus, if we reach here, there must be some error on primary,
+     * failover from the last checkpoint state.
+     */
     rc = ctx->restore.ops.stream_complete(ctx);
     if ( rc )
         goto err;
@@ -571,6 +657,13 @@ static int restore(struct xc_sr_context *ctx)
     PERROR("Restore failed");
 
  done:
+    for ( i = 0; i < MAX_RECORDS; i++)
+    {
+        if ( records[i] ) {
+            free(records[i]->data);
+            free(records[i]);
+        }
+    }
     free(ctx->restore.populated_pfns);
     rc = ctx->restore.ops.cleanup(ctx);
     if ( rc )
@@ -605,6 +698,7 @@ int xc_domain_restore2(xc_interface *xch, int io_fd, uint32_t dom,
     ctx.restore.xenstore_evtchn = store_evtchn;
     ctx.restore.xenstore_domid = store_domid;
     ctx.restore.callbacks = callbacks;
+    ctx.restore.checkpointed = checkpointed_stream;
 
     IPRINTF("In experimental %s", __func__);
     DPRINTF("fd %d, dom %u, hvm %u, pae %u, superpages %d"
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH Remus v1 8/8] tools/libxc: X86_PV_INFO can be sent multiple times under Remus
  2015-05-07  6:37 [PATCH Remus v1 0/8] Remus support for Migration-v2 Yang Hongyang
                   ` (6 preceding siblings ...)
  2015-05-07  6:37 ` [PATCH Remus v1 7/8] tools/libxc: implement Remus checkpointed restore Yang Hongyang
@ 2015-05-07  6:37 ` Yang Hongyang
  2015-05-07 10:58   ` Andrew Cooper
  2015-05-07  7:04 ` [PATCH Remus v1 0/8] Remus support for Migration-v2 Hongyang Yang
  8 siblings, 1 reply; 31+ messages in thread
From: Yang Hongyang @ 2015-05-07  6:37 UTC (permalink / raw)
  To: xen-devel
  Cc: wei.liu2, ian.campbell, wency, ian.jackson, yunhong.jiang,
	eddie.dong, rshriram

X86_PV_INFO can be sent multiple times under Remus

Signed-off-by: Yang Hongyang <yanghy@cn.fujitsu.com>
---
 tools/libxc/xc_sr_restore_x86_pv.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/libxc/xc_sr_restore_x86_pv.c b/tools/libxc/xc_sr_restore_x86_pv.c
index ef26c64..652b400 100644
--- a/tools/libxc/xc_sr_restore_x86_pv.c
+++ b/tools/libxc/xc_sr_restore_x86_pv.c
@@ -590,7 +590,7 @@ static int handle_x86_pv_info(struct xc_sr_context *ctx,
     xc_interface *xch = ctx->xch;
     struct xc_sr_rec_x86_pv_info *info = rec->data;
 
-    if ( ctx->x86_pv.restore.seen_pv_info )
+    if ( ctx->x86_pv.restore.seen_pv_info && !ctx->restore.checkpointed )
     {
         ERROR("Already received X86_PV_INFO record");
         return -1;
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* Re: [PATCH Remus v1 0/8] Remus support for Migration-v2
  2015-05-07  6:37 [PATCH Remus v1 0/8] Remus support for Migration-v2 Yang Hongyang
                   ` (7 preceding siblings ...)
  2015-05-07  6:37 ` [PATCH Remus v1 8/8] tools/libxc: X86_PV_INFO can be sent multiple times under Remus Yang Hongyang
@ 2015-05-07  7:04 ` Hongyang Yang
  2015-05-07  9:31   ` Andrew Cooper
  8 siblings, 1 reply; 31+ messages in thread
From: Hongyang Yang @ 2015-05-07  7:04 UTC (permalink / raw)
  To: xen-devel
  Cc: wei.liu2, ian.campbell, wency, eddie.dong, yunhong.jiang,
	ian.jackson, andrew.cooper3, rshriram

Cc Andrew cooper

Sorry I missed your address...

On 05/07/2015 02:37 PM, Yang Hongyang wrote:
> This patchset implement the Remus support for Migration v2 but without
> memory compressing.
>
> PATCH 1-5: Some refactor and prepare work.
> PATCH 6-7: The main Remus loop implement.
> PATCH 8: Fix for Remus.
>
> Yang Hongyang (8):
>    tools/libxc: adjust the memory allocation for migration
>    tools/libxc: reuse send_some_pages() in send_all_pages()
>    tools/libxc: introduce process_record()
>    tools/libxc: split read/handle qemu info
>    tools/libxc: defer the setting of HVM_PARAM_IDENT_PT
>    tools/libxc: implement Remus checkpointed save
>    tools/libxc: implement Remus checkpointed restore
>    tools/libxc: X86_PV_INFO can be sent multiple times under Remus
>
>   tools/libxc/include/xenguest.h      |   1 +
>   tools/libxc/xc_bitops.h             |   5 +
>   tools/libxc/xc_sr_common.h          |  17 ++++
>   tools/libxc/xc_sr_restore.c         | 179 +++++++++++++++++++++++++++++-------
>   tools/libxc/xc_sr_restore_x86_hvm.c |  44 ++++++++-
>   tools/libxc/xc_sr_restore_x86_pv.c  |   2 +-
>   tools/libxc/xc_sr_save.c            | 166 +++++++++++++++++----------------
>   tools/libxl/libxl_dom.c             |   1 +
>   8 files changed, 295 insertions(+), 120 deletions(-)
>

-- 
Thanks,
Yang.

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH Remus v1 0/8] Remus support for Migration-v2
  2015-05-07  7:04 ` [PATCH Remus v1 0/8] Remus support for Migration-v2 Hongyang Yang
@ 2015-05-07  9:31   ` Andrew Cooper
  2015-05-07 15:07     ` Hongyang Yang
  0 siblings, 1 reply; 31+ messages in thread
From: Andrew Cooper @ 2015-05-07  9:31 UTC (permalink / raw)
  To: Hongyang Yang, xen-devel
  Cc: wei.liu2, ian.campbell, wency, eddie.dong, yunhong.jiang,
	ian.jackson, rshriram

On 07/05/15 08:04, Hongyang Yang wrote:
> Cc Andrew cooper
>
> Sorry I missed your address...

No worries.  You have managed a far faster turnaround on this than I
have with libxl migration v2!

~Andrew

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH Remus v1 1/8] tools/libxc: adjust the memory allocation for migration
  2015-05-07  6:37 ` [PATCH Remus v1 1/8] tools/libxc: adjust the memory allocation for migration Yang Hongyang
@ 2015-05-07  9:48   ` Andrew Cooper
  2015-05-07 13:42     ` Hongyang Yang
  0 siblings, 1 reply; 31+ messages in thread
From: Andrew Cooper @ 2015-05-07  9:48 UTC (permalink / raw)
  To: Yang Hongyang, xen-devel
  Cc: wei.liu2, ian.campbell, wency, eddie.dong, yunhong.jiang,
	ian.jackson, rshriram

On 07/05/15 07:37, Yang Hongyang wrote:
> Move the memory allocation before the concrete live/nolive save
> in order to avoid the free/alloc memory loop when using Remus.
>
> Signed-off-by: Yang Hongyang <yanghy@cn.fujitsu.com>
> ---
>  tools/libxc/xc_sr_save.c | 53 +++++++++++++++++++-----------------------------
>  1 file changed, 21 insertions(+), 32 deletions(-)
>
> diff --git a/tools/libxc/xc_sr_save.c b/tools/libxc/xc_sr_save.c
> index 5d9c267..7fed668 100644
> --- a/tools/libxc/xc_sr_save.c
> +++ b/tools/libxc/xc_sr_save.c
> @@ -3,6 +3,8 @@
>  
>  #include "xc_sr_common.h"
>  
> +DECLARE_HYPERCALL_BUFFER(unsigned long, to_send);

This unfortunately causes an issue when concurrent calls to
xc_domain_save() in the same process.  While this is a highly
ill-advised action, I did try to avoid breaking it.

Please move this declaration into the ctx.save union.

> +
>  /*
>   * Writes an Image header and Domain header into the stream.
>   */
> @@ -475,25 +477,11 @@ static int update_progress_string(struct xc_sr_context *ctx,
>  static int send_domain_memory_live(struct xc_sr_context *ctx)
>  {
>      xc_interface *xch = ctx->xch;
> -    DECLARE_HYPERCALL_BUFFER(unsigned long, to_send);
>      xc_shadow_op_stats_t stats = { 0, ctx->save.p2m_size };
>      char *progress_str = NULL;
>      unsigned x;
>      int rc = -1;
>  
> -    to_send = xc_hypercall_buffer_alloc_pages(
> -        xch, to_send, NRPAGES(bitmap_size(ctx->save.p2m_size)));
> -
> -    ctx->save.batch_pfns = malloc(MAX_BATCH_SIZE *
> -                                  sizeof(*ctx->save.batch_pfns));
> -    ctx->save.deferred_pages = calloc(1, bitmap_size(ctx->save.p2m_size));
> -
> -    if ( !ctx->save.batch_pfns || !to_send || !ctx->save.deferred_pages )
> -    {
> -        ERROR("Unable to allocate memory for to_{send,fix}/batch bitmaps");
> -        goto out;
> -    }
> -
>      rc = enable_logdirty(ctx);
>      if ( rc )
>          goto out;
> @@ -593,10 +581,6 @@ static int send_domain_memory_live(struct xc_sr_context *ctx)
>    out:
>      xc_set_progress_prefix(xch, NULL);
>      free(progress_str);
> -    xc_hypercall_buffer_free_pages(xch, to_send,
> -                                   NRPAGES(bitmap_size(ctx->save.p2m_size)));
> -    free(ctx->save.deferred_pages);
> -    free(ctx->save.batch_pfns);
>      return rc;
>  }
>  
> @@ -609,17 +593,6 @@ static int send_domain_memory_nonlive(struct xc_sr_context *ctx)
>      xc_interface *xch = ctx->xch;
>      int rc = -1;
>  
> -    ctx->save.batch_pfns = malloc(MAX_BATCH_SIZE *
> -                                  sizeof(*ctx->save.batch_pfns));
> -    ctx->save.deferred_pages = calloc(1, bitmap_size(ctx->save.p2m_size));
> -
> -    if ( !ctx->save.batch_pfns || !ctx->save.deferred_pages )
> -    {
> -        PERROR("Failed to allocate memory for nonlive tracking structures");
> -        errno = ENOMEM;
> -        goto err;
> -    }
> -
>      rc = suspend_domain(ctx);
>      if ( rc )
>          goto err;
> @@ -631,9 +604,6 @@ static int send_domain_memory_nonlive(struct xc_sr_context *ctx)
>          goto err;
>  
>   err:
> -    free(ctx->save.deferred_pages);
> -    free(ctx->save.batch_pfns);
> -
>      return rc;
>  }
>  
> @@ -645,6 +615,20 @@ static int save(struct xc_sr_context *ctx, uint16_t guest_type)
>      xc_interface *xch = ctx->xch;
>      int rc, saved_rc = 0, saved_errno = 0;
>  
> +    to_send = xc_hypercall_buffer_alloc_pages(
> +                        xch, to_send, NRPAGES(bitmap_size(ctx->save.p2m_size)));

This allocation only needs to be made if ctx->save.live is set.  For
plain suspend and resume, logdirty handling is not required.

> +    ctx->save.batch_pfns = malloc(MAX_BATCH_SIZE *
> +                                  sizeof(*ctx->save.batch_pfns));
> +    ctx->save.deferred_pages = calloc(1, bitmap_size(ctx->save.p2m_size));
> +
> +    if ( !ctx->save.batch_pfns || !to_send || !ctx->save.deferred_pages )
> +    {
> +        ERROR("Unable to allocate memory for to_{send,fix}/batch bitmaps");
> +        rc = -1;
> +        errno = ENOMEM;
> +        goto err;
> +    }
> +

Instead of complicating save() like this, could you introduce two new
static functions setup() and cleanup() which subsume the
ctx->save.ops.{setup,cleanup}() calls and also do these allocations and
free.

I think the SHADOW_OP_OFF hypercall can also be moved into the cleanup()
function.

~Andrew

>      IPRINTF("Saving domain %d, type %s",
>              ctx->domid, dhdr_type_to_str(guest_type));
>  
> @@ -704,6 +688,11 @@ static int save(struct xc_sr_context *ctx, uint16_t guest_type)
>      if ( rc )
>          PERROR("Failed to clean up");
>  
> +    xc_hypercall_buffer_free_pages(xch, to_send,
> +                                   NRPAGES(bitmap_size(ctx->save.p2m_size)));
> +    free(ctx->save.deferred_pages);
> +    free(ctx->save.batch_pfns);
> +
>      if ( saved_rc )
>      {
>          rc = saved_rc;

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH Remus v1 2/8] tools/libxc: reuse send_some_pages() in send_all_pages()
  2015-05-07  6:37 ` [PATCH Remus v1 2/8] tools/libxc: reuse send_some_pages() in send_all_pages() Yang Hongyang
@ 2015-05-07 10:08   ` Andrew Cooper
  2015-05-07 13:48     ` Hongyang Yang
  0 siblings, 1 reply; 31+ messages in thread
From: Andrew Cooper @ 2015-05-07 10:08 UTC (permalink / raw)
  To: Yang Hongyang, xen-devel
  Cc: wei.liu2, ian.campbell, wency, eddie.dong, yunhong.jiang,
	ian.jackson, rshriram

On 07/05/15 07:37, Yang Hongyang wrote:
> introduce bitmap_set() to set the entire bitmap.
> in send_all_pages(), set the entire bitmap and call send_some_pages().
>
> Signed-off-by: Yang Hongyang <yanghy@cn.fujitsu.com>

Ah - I see now why you unconditionally allocated the to_send bitmap.

I suppose it is probably better to have less divergence between the live
and non-live cases, and the size of the bitmap is substantially less
than other structures in use.

In which case, can I suggest the following which partially supersedes my
review in patch 1.

* Instead of having the bitmap called "to_send", having it called
something more appropriate like "dirty_bitmap".
* s/send_some_pages/send_dirty_pages/ and drop the *bitmap parameter. 
('entries' should stay as it is a useful sanity check).

This way, send_all_page() logically sets all pages as dirty, then sends
any dirty pages.

~Andrew

> ---
>  tools/libxc/xc_bitops.h  |  5 +++++
>  tools/libxc/xc_sr_save.c | 41 +++++++++++------------------------------
>  2 files changed, 16 insertions(+), 30 deletions(-)
>
> diff --git a/tools/libxc/xc_bitops.h b/tools/libxc/xc_bitops.h
> index dfce3b8..cd749f4 100644
> --- a/tools/libxc/xc_bitops.h
> +++ b/tools/libxc/xc_bitops.h
> @@ -26,6 +26,11 @@ static inline unsigned long *bitmap_alloc(int nr_bits)
>      return calloc(1, bitmap_size(nr_bits));
>  }
>  
> +static inline void bitmap_set(unsigned long *addr, int nr_bits)
> +{
> +    memset(addr, 0xff, bitmap_size(nr_bits));
> +}
> +
>  static inline void bitmap_clear(unsigned long *addr, int nr_bits)
>  {
>      memset(addr, 0, bitmap_size(nr_bits));
> diff --git a/tools/libxc/xc_sr_save.c b/tools/libxc/xc_sr_save.c
> index 7fed668..2993ec3 100644
> --- a/tools/libxc/xc_sr_save.c
> +++ b/tools/libxc/xc_sr_save.c
> @@ -346,36 +346,6 @@ static int suspend_domain(struct xc_sr_context *ctx)
>  }
>  
>  /*
> - * Send all pages in the guests p2m.  Used as the first iteration of the live
> - * migration loop, and for a non-live save.
> - */
> -static int send_all_pages(struct xc_sr_context *ctx)
> -{
> -    xc_interface *xch = ctx->xch;
> -    xen_pfn_t p;
> -    int rc;
> -
> -    for ( p = 0; p < ctx->save.p2m_size; ++p )
> -    {
> -        rc = add_to_batch(ctx, p);
> -        if ( rc )
> -            return rc;
> -
> -        /* Update progress every 4MB worth of memory sent. */
> -        if ( (p & ((1U << (22 - 12)) - 1)) == 0 )
> -            xc_report_progress_step(xch, p, ctx->save.p2m_size);
> -    }
> -
> -    rc = flush_batch(ctx);
> -    if ( rc )
> -        return rc;
> -
> -    xc_report_progress_step(xch, ctx->save.p2m_size,
> -                            ctx->save.p2m_size);
> -    return 0;
> -}
> -
> -/*
>   * Send a subset of pages in the guests p2m, according to the provided bitmap.
>   * Used for each subsequent iteration of the live migration loop.
>   *
> @@ -417,6 +387,17 @@ static int send_some_pages(struct xc_sr_context *ctx,
>      return 0;
>  }
>  
> +/*
> + * Send all pages in the guests p2m.  Used as the first iteration of the live
> + * migration loop, and for a non-live save.
> + */
> +static int send_all_pages(struct xc_sr_context *ctx)
> +{
> +    bitmap_set(to_send, ctx->save.p2m_size);
> +
> +    return send_some_pages(ctx, to_send, ctx->save.p2m_size);
> +}
> +
>  static int enable_logdirty(struct xc_sr_context *ctx)
>  {
>      xc_interface *xch = ctx->xch;

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH Remus v1 3/8] tools/libxc: introduce process_record()
  2015-05-07  6:37 ` [PATCH Remus v1 3/8] tools/libxc: introduce process_record() Yang Hongyang
@ 2015-05-07 10:12   ` Andrew Cooper
  0 siblings, 0 replies; 31+ messages in thread
From: Andrew Cooper @ 2015-05-07 10:12 UTC (permalink / raw)
  To: Yang Hongyang, xen-devel
  Cc: wei.liu2, ian.campbell, wency, eddie.dong, yunhong.jiang,
	ian.jackson, rshriram

On 07/05/15 07:37, Yang Hongyang wrote:
> Move record handle codes into a function process_record().
> It will be used multiple times by Remus.
> No functional change.
>
> Signed-off-by: Yang Hongyang <yanghy@cn.fujitsu.com>

Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH Remus v1 5/8] tools/libxc: defer the setting of HVM_PARAM_IDENT_PT
  2015-05-07  6:37 ` [PATCH Remus v1 5/8] tools/libxc: defer the setting of HVM_PARAM_IDENT_PT Yang Hongyang
@ 2015-05-07 10:35   ` Andrew Cooper
  2015-05-07 13:59     ` Hongyang Yang
  0 siblings, 1 reply; 31+ messages in thread
From: Andrew Cooper @ 2015-05-07 10:35 UTC (permalink / raw)
  To: Yang Hongyang, xen-devel
  Cc: wei.liu2, ian.campbell, wency, eddie.dong, yunhong.jiang,
	ian.jackson, rshriram

On 07/05/15 07:37, Yang Hongyang wrote:
> Set the hvm param HVM_PARAM_IDENT_PT second time will fail.
> So defer the setting of HVM_PARAM_IDENT_PT.
>
> Signed-off-by: Yang Hongyang <yanghy@cn.fujitsu.com>

I would argue that this is a Xen bug, not a migration bug.  There is
nothing conceptually wrong with setting this param multiple times.

Please try the following hypervisor patch instead:

diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index 3a09439..9ee56b3 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -5748,12 +5748,13 @@ long do_hvm_op(unsigned long op,
XEN_GUEST_HANDLE_PARAM(void) arg)
                 if ( curr_d == d )
                     break;
 
-                rc = -EINVAL;
-                if ( d->arch.hvm_domain.params[a.index] != 0 )
-                    break;
-
                 rc = 0;
-                if ( !paging_mode_hap(d) )
+                d->arch.hvm_domain.params[a.index] = a.value;
+                /*
+                 * Only actually required for VT-x lacking
unrestricted_guest
+                 * capabilities.  Short circuit the pause if possible.
+                 */
+                if ( !paging_mode_hap(d) || !cpu_has_vmx )
                     break;
 
                 /*
@@ -5767,7 +5768,6 @@ long do_hvm_op(unsigned long op,
XEN_GUEST_HANDLE_PARAM(void) arg)
 
                 rc = 0;
                 domain_pause(d);
-                d->arch.hvm_domain.params[a.index] = a.value;
                 for_each_vcpu ( d, v )
                     paging_update_cr3(v);
                 domain_unpause(d);

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* Re: [PATCH Remus v1 4/8] tools/libxc: split read/handle qemu info
  2015-05-07  6:37 ` [PATCH Remus v1 4/8] tools/libxc: split read/handle qemu info Yang Hongyang
@ 2015-05-07 10:48   ` Andrew Cooper
  2015-05-07 13:55     ` Hongyang Yang
  0 siblings, 1 reply; 31+ messages in thread
From: Andrew Cooper @ 2015-05-07 10:48 UTC (permalink / raw)
  To: Yang Hongyang, xen-devel
  Cc: wei.liu2, ian.campbell, wency, eddie.dong, yunhong.jiang,
	ian.jackson, rshriram

On 07/05/15 07:37, Yang Hongyang wrote:
> Split read/handle qemu info. The receiving of qemu info
> should be done while we receive the migration stream,
> handle_qemu will be called when the stream complete.
> Otherwise, it will break Remus because read_record()
> won't read qemu info and stream_complete will be called
> at failover.
>
> Signed-off-by: Yang Hongyang <yanghy@cn.fujitsu.com>

I presume this is because Remus sends multiple qemu records in the stream?

I will be fixing the qemu record layer violation as part of libxl
migration v2, at which point all the XG_LIBXL_HVM_COMPAT code shall
disappear.

As all this code appears to live inside XG_LIBXL_HVM_COMPAT, I am
willing to trust that it DoesTheRightThing, if you have confirmed that
plain HVM migration with migration v2 and XG_LIBXL_HVM_COMPAT continues
to work (I am slightly suspicious of the 3rd hunk in this regard).

~Andrew

> ---
>  tools/libxc/xc_sr_common.h          |  5 +++++
>  tools/libxc/xc_sr_restore.c         | 12 ++++++++++++
>  tools/libxc/xc_sr_restore_x86_hvm.c | 28 +++++++++++++++++++++++++---
>  3 files changed, 42 insertions(+), 3 deletions(-)
>
> diff --git a/tools/libxc/xc_sr_common.h b/tools/libxc/xc_sr_common.h
> index ef42412..6f099b8 100644
> --- a/tools/libxc/xc_sr_common.h
> +++ b/tools/libxc/xc_sr_common.h
> @@ -279,6 +279,11 @@ struct xc_sr_context
>                      /* HVM context blob. */
>                      void *context;
>                      size_t contextsz;
> +
> +#ifdef XG_LIBXL_HVM_COMPAT
> +                    uint32_t qlen;
> +                    void *qbuf;
> +#endif
>                  } restore;
>              };
>          } x86_hvm;
> diff --git a/tools/libxc/xc_sr_restore.c b/tools/libxc/xc_sr_restore.c
> index 53bd674..8022c3d 100644
> --- a/tools/libxc/xc_sr_restore.c
> +++ b/tools/libxc/xc_sr_restore.c
> @@ -510,6 +510,9 @@ static int process_record(struct xc_sr_context *ctx, struct xc_sr_record *rec)
>      return rc;
>  }
>  
> +#ifdef XG_LIBXL_HVM_COMPAT
> +extern int read_qemu(struct xc_sr_context *ctx);
> +#endif
>  /*
>   * Restore a domain.
>   */
> @@ -546,6 +549,15 @@ static int restore(struct xc_sr_context *ctx)
>  
>      } while ( rec.type != REC_TYPE_END );
>  
> +#ifdef XG_LIBXL_HVM_COMPAT
> +    if ( ctx->dominfo.hvm )
> +    {
> +        rc = read_qemu(ctx);
> +        if ( rc )
> +            goto err;
> +    }
> +#endif
> +
>      rc = ctx->restore.ops.stream_complete(ctx);
>      if ( rc )
>          goto err;
> diff --git a/tools/libxc/xc_sr_restore_x86_hvm.c b/tools/libxc/xc_sr_restore_x86_hvm.c
> index 6e9b318..6f5af0e 100644
> --- a/tools/libxc/xc_sr_restore_x86_hvm.c
> +++ b/tools/libxc/xc_sr_restore_x86_hvm.c
> @@ -94,14 +94,14 @@ static int handle_hvm_params(struct xc_sr_context *ctx,
>  }
>  
>  #ifdef XG_LIBXL_HVM_COMPAT
> -static int handle_qemu(struct xc_sr_context *ctx)
> +int read_qemu(struct xc_sr_context *ctx);
> +int read_qemu(struct xc_sr_context *ctx)
>  {
>      xc_interface *xch = ctx->xch;
> -    char qemusig[21], path[256];
> +    char qemusig[21];
>      uint32_t qlen;
>      void *qbuf = NULL;
>      int rc = -1;
> -    FILE *fp = NULL;
>  
>      if ( read_exact(ctx->fd, qemusig, sizeof(qemusig)) )
>      {
> @@ -137,6 +137,28 @@ static int handle_qemu(struct xc_sr_context *ctx)
>          goto out;
>      }
>  
> +    /* With Remus, this could be read many times */
> +    if ( ctx->x86_hvm.restore.qbuf )
> +        free(ctx->x86_hvm.restore.qbuf);
> +    ctx->x86_hvm.restore.qbuf = qbuf;
> +    ctx->x86_hvm.restore.qlen = qlen;
> +    rc = 0;
> +
> +out:
> +    if (rc)
> +        free(qbuf);
> +    return rc;
> +}
> +
> +static int handle_qemu(struct xc_sr_context *ctx)
> +{
> +    xc_interface *xch = ctx->xch;
> +    char path[256];
> +    uint32_t qlen = ctx->x86_hvm.restore.qlen;
> +    void *qbuf = ctx->x86_hvm.restore.qbuf;
> +    int rc = -1;
> +    FILE *fp = NULL;
> +
>      sprintf(path, XC_DEVICE_MODEL_RESTORE_FILE".%u", ctx->domid);
>      fp = fopen(path, "wb");
>      if ( !fp )

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH Remus v1 8/8] tools/libxc: X86_PV_INFO can be sent multiple times under Remus
  2015-05-07  6:37 ` [PATCH Remus v1 8/8] tools/libxc: X86_PV_INFO can be sent multiple times under Remus Yang Hongyang
@ 2015-05-07 10:58   ` Andrew Cooper
  2015-05-07 14:03     ` Hongyang Yang
  0 siblings, 1 reply; 31+ messages in thread
From: Andrew Cooper @ 2015-05-07 10:58 UTC (permalink / raw)
  To: Yang Hongyang, xen-devel
  Cc: wei.liu2, ian.campbell, wency, eddie.dong, yunhong.jiang,
	ian.jackson, rshriram

On 07/05/15 07:37, Yang Hongyang wrote:
> X86_PV_INFO can be sent multiple times under Remus
>
> Signed-off-by: Yang Hongyang <yanghy@cn.fujitsu.com>

X86_PV_INFO cannot be sent multiple times.  If any information in it
were to change, the receiving side would have no choice other than to
abort the restore.

I think this is a side effect of how you have done the send loop in
patch 6.  Let me see if I can think of a suitable solution.

~Andrew

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH Remus v1 1/8] tools/libxc: adjust the memory allocation for migration
  2015-05-07  9:48   ` Andrew Cooper
@ 2015-05-07 13:42     ` Hongyang Yang
  2015-05-07 13:57       ` Ian Campbell
  2015-05-07 14:01       ` Andrew Cooper
  0 siblings, 2 replies; 31+ messages in thread
From: Hongyang Yang @ 2015-05-07 13:42 UTC (permalink / raw)
  To: Andrew Cooper, xen-devel
  Cc: wei.liu2, ian.campbell, wency, eddie.dong, yunhong.jiang,
	ian.jackson, rshriram



On 05/07/2015 05:48 PM, Andrew Cooper wrote:
> On 07/05/15 07:37, Yang Hongyang wrote:
>> Move the memory allocation before the concrete live/nolive save
>> in order to avoid the free/alloc memory loop when using Remus.
>>
>> Signed-off-by: Yang Hongyang <yanghy@cn.fujitsu.com>
>> ---
>>   tools/libxc/xc_sr_save.c | 53 +++++++++++++++++++-----------------------------
>>   1 file changed, 21 insertions(+), 32 deletions(-)
>>
>> diff --git a/tools/libxc/xc_sr_save.c b/tools/libxc/xc_sr_save.c
>> index 5d9c267..7fed668 100644
>> --- a/tools/libxc/xc_sr_save.c
>> +++ b/tools/libxc/xc_sr_save.c
>> @@ -3,6 +3,8 @@
>>
>>   #include "xc_sr_common.h"
>>
>> +DECLARE_HYPERCALL_BUFFER(unsigned long, to_send);
>
> This unfortunately causes an issue when concurrent calls to
> xc_domain_save() in the same process.  While this is a highly
> ill-advised action, I did try to avoid breaking it.
>
> Please move this declaration into the ctx.save union.

I know the best way is to put this into ctx.save union, but I haven't
found a method to put it in, the DECLARE_HYPERCALL_BUFFER macro can not
be used there, should I just define a unsigned long var at ctx.save
union, and use other macro(what macro?) define at save()?

>
>> +
>>   /*
>>    * Writes an Image header and Domain header into the stream.
>>    */
>> @@ -475,25 +477,11 @@ static int update_progress_string(struct xc_sr_context *ctx,
>>   static int send_domain_memory_live(struct xc_sr_context *ctx)
>>   {
>>       xc_interface *xch = ctx->xch;
>> -    DECLARE_HYPERCALL_BUFFER(unsigned long, to_send);
>>       xc_shadow_op_stats_t stats = { 0, ctx->save.p2m_size };
>>       char *progress_str = NULL;
>>       unsigned x;
>>       int rc = -1;
>>
>> -    to_send = xc_hypercall_buffer_alloc_pages(
>> -        xch, to_send, NRPAGES(bitmap_size(ctx->save.p2m_size)));
>> -
>> -    ctx->save.batch_pfns = malloc(MAX_BATCH_SIZE *
>> -                                  sizeof(*ctx->save.batch_pfns));
>> -    ctx->save.deferred_pages = calloc(1, bitmap_size(ctx->save.p2m_size));
>> -
>> -    if ( !ctx->save.batch_pfns || !to_send || !ctx->save.deferred_pages )
>> -    {
>> -        ERROR("Unable to allocate memory for to_{send,fix}/batch bitmaps");
>> -        goto out;
>> -    }
>> -
>>       rc = enable_logdirty(ctx);
>>       if ( rc )
>>           goto out;
>> @@ -593,10 +581,6 @@ static int send_domain_memory_live(struct xc_sr_context *ctx)
>>     out:
>>       xc_set_progress_prefix(xch, NULL);
>>       free(progress_str);
>> -    xc_hypercall_buffer_free_pages(xch, to_send,
>> -                                   NRPAGES(bitmap_size(ctx->save.p2m_size)));
>> -    free(ctx->save.deferred_pages);
>> -    free(ctx->save.batch_pfns);
>>       return rc;
>>   }
>>
>> @@ -609,17 +593,6 @@ static int send_domain_memory_nonlive(struct xc_sr_context *ctx)
>>       xc_interface *xch = ctx->xch;
>>       int rc = -1;
>>
>> -    ctx->save.batch_pfns = malloc(MAX_BATCH_SIZE *
>> -                                  sizeof(*ctx->save.batch_pfns));
>> -    ctx->save.deferred_pages = calloc(1, bitmap_size(ctx->save.p2m_size));
>> -
>> -    if ( !ctx->save.batch_pfns || !ctx->save.deferred_pages )
>> -    {
>> -        PERROR("Failed to allocate memory for nonlive tracking structures");
>> -        errno = ENOMEM;
>> -        goto err;
>> -    }
>> -
>>       rc = suspend_domain(ctx);
>>       if ( rc )
>>           goto err;
>> @@ -631,9 +604,6 @@ static int send_domain_memory_nonlive(struct xc_sr_context *ctx)
>>           goto err;
>>
>>    err:
>> -    free(ctx->save.deferred_pages);
>> -    free(ctx->save.batch_pfns);
>> -
>>       return rc;
>>   }
>>
>> @@ -645,6 +615,20 @@ static int save(struct xc_sr_context *ctx, uint16_t guest_type)
>>       xc_interface *xch = ctx->xch;
>>       int rc, saved_rc = 0, saved_errno = 0;
>>
>> +    to_send = xc_hypercall_buffer_alloc_pages(
>> +                        xch, to_send, NRPAGES(bitmap_size(ctx->save.p2m_size)));
>
> This allocation only needs to be made if ctx->save.live is set.  For
> plain suspend and resume, logdirty handling is not required.
>
>> +    ctx->save.batch_pfns = malloc(MAX_BATCH_SIZE *
>> +                                  sizeof(*ctx->save.batch_pfns));
>> +    ctx->save.deferred_pages = calloc(1, bitmap_size(ctx->save.p2m_size));
>> +
>> +    if ( !ctx->save.batch_pfns || !to_send || !ctx->save.deferred_pages )
>> +    {
>> +        ERROR("Unable to allocate memory for to_{send,fix}/batch bitmaps");
>> +        rc = -1;
>> +        errno = ENOMEM;
>> +        goto err;
>> +    }
>> +
>
> Instead of complicating save() like this, could you introduce two new
> static functions setup() and cleanup() which subsume the
> ctx->save.ops.{setup,cleanup}() calls and also do these allocations and
> free.

Sure, will do this in next version.

>
> I think the SHADOW_OP_OFF hypercall can also be moved into the cleanup()
> function.
>
> ~Andrew
>
>>       IPRINTF("Saving domain %d, type %s",
>>               ctx->domid, dhdr_type_to_str(guest_type));
>>
>> @@ -704,6 +688,11 @@ static int save(struct xc_sr_context *ctx, uint16_t guest_type)
>>       if ( rc )
>>           PERROR("Failed to clean up");
>>
>> +    xc_hypercall_buffer_free_pages(xch, to_send,
>> +                                   NRPAGES(bitmap_size(ctx->save.p2m_size)));
>> +    free(ctx->save.deferred_pages);
>> +    free(ctx->save.batch_pfns);
>> +
>>       if ( saved_rc )
>>       {
>>           rc = saved_rc;
>
> .
>

-- 
Thanks,
Yang.

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH Remus v1 2/8] tools/libxc: reuse send_some_pages() in send_all_pages()
  2015-05-07 10:08   ` Andrew Cooper
@ 2015-05-07 13:48     ` Hongyang Yang
  0 siblings, 0 replies; 31+ messages in thread
From: Hongyang Yang @ 2015-05-07 13:48 UTC (permalink / raw)
  To: Andrew Cooper, xen-devel
  Cc: wei.liu2, ian.campbell, wency, eddie.dong, yunhong.jiang,
	ian.jackson, rshriram



On 05/07/2015 06:08 PM, Andrew Cooper wrote:
> On 07/05/15 07:37, Yang Hongyang wrote:
>> introduce bitmap_set() to set the entire bitmap.
>> in send_all_pages(), set the entire bitmap and call send_some_pages().
>>
>> Signed-off-by: Yang Hongyang <yanghy@cn.fujitsu.com>
>
> Ah - I see now why you unconditionally allocated the to_send bitmap.
>
> I suppose it is probably better to have less divergence between the live
> and non-live cases, and the size of the bitmap is substantially less
> than other structures in use.
>
> In which case, can I suggest the following which partially supersedes my
> review in patch 1.
>
> * Instead of having the bitmap called "to_send", having it called
> something more appropriate like "dirty_bitmap".
> * s/send_some_pages/send_dirty_pages/ and drop the *bitmap parameter.
> ('entries' should stay as it is a useful sanity check).
>
> This way, send_all_page() logically sets all pages as dirty, then sends
> any dirty pages.

Yeah, "dirty_bitmap" sounds good, will use this name as well as change the
name send_dirty_pages.

>
> ~Andrew
>
>> ---
>>   tools/libxc/xc_bitops.h  |  5 +++++
>>   tools/libxc/xc_sr_save.c | 41 +++++++++++------------------------------
>>   2 files changed, 16 insertions(+), 30 deletions(-)
>>
>> diff --git a/tools/libxc/xc_bitops.h b/tools/libxc/xc_bitops.h
>> index dfce3b8..cd749f4 100644
>> --- a/tools/libxc/xc_bitops.h
>> +++ b/tools/libxc/xc_bitops.h
>> @@ -26,6 +26,11 @@ static inline unsigned long *bitmap_alloc(int nr_bits)
>>       return calloc(1, bitmap_size(nr_bits));
>>   }
>>
>> +static inline void bitmap_set(unsigned long *addr, int nr_bits)
>> +{
>> +    memset(addr, 0xff, bitmap_size(nr_bits));
>> +}
>> +
>>   static inline void bitmap_clear(unsigned long *addr, int nr_bits)
>>   {
>>       memset(addr, 0, bitmap_size(nr_bits));
>> diff --git a/tools/libxc/xc_sr_save.c b/tools/libxc/xc_sr_save.c
>> index 7fed668..2993ec3 100644
>> --- a/tools/libxc/xc_sr_save.c
>> +++ b/tools/libxc/xc_sr_save.c
>> @@ -346,36 +346,6 @@ static int suspend_domain(struct xc_sr_context *ctx)
>>   }
>>
>>   /*
>> - * Send all pages in the guests p2m.  Used as the first iteration of the live
>> - * migration loop, and for a non-live save.
>> - */
>> -static int send_all_pages(struct xc_sr_context *ctx)
>> -{
>> -    xc_interface *xch = ctx->xch;
>> -    xen_pfn_t p;
>> -    int rc;
>> -
>> -    for ( p = 0; p < ctx->save.p2m_size; ++p )
>> -    {
>> -        rc = add_to_batch(ctx, p);
>> -        if ( rc )
>> -            return rc;
>> -
>> -        /* Update progress every 4MB worth of memory sent. */
>> -        if ( (p & ((1U << (22 - 12)) - 1)) == 0 )
>> -            xc_report_progress_step(xch, p, ctx->save.p2m_size);
>> -    }
>> -
>> -    rc = flush_batch(ctx);
>> -    if ( rc )
>> -        return rc;
>> -
>> -    xc_report_progress_step(xch, ctx->save.p2m_size,
>> -                            ctx->save.p2m_size);
>> -    return 0;
>> -}
>> -
>> -/*
>>    * Send a subset of pages in the guests p2m, according to the provided bitmap.
>>    * Used for each subsequent iteration of the live migration loop.
>>    *
>> @@ -417,6 +387,17 @@ static int send_some_pages(struct xc_sr_context *ctx,
>>       return 0;
>>   }
>>
>> +/*
>> + * Send all pages in the guests p2m.  Used as the first iteration of the live
>> + * migration loop, and for a non-live save.
>> + */
>> +static int send_all_pages(struct xc_sr_context *ctx)
>> +{
>> +    bitmap_set(to_send, ctx->save.p2m_size);
>> +
>> +    return send_some_pages(ctx, to_send, ctx->save.p2m_size);
>> +}
>> +
>>   static int enable_logdirty(struct xc_sr_context *ctx)
>>   {
>>       xc_interface *xch = ctx->xch;
>
> .
>

-- 
Thanks,
Yang.

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH Remus v1 4/8] tools/libxc: split read/handle qemu info
  2015-05-07 10:48   ` Andrew Cooper
@ 2015-05-07 13:55     ` Hongyang Yang
  0 siblings, 0 replies; 31+ messages in thread
From: Hongyang Yang @ 2015-05-07 13:55 UTC (permalink / raw)
  To: Andrew Cooper, xen-devel
  Cc: wei.liu2, ian.campbell, wency, eddie.dong, yunhong.jiang,
	ian.jackson, rshriram



On 05/07/2015 06:48 PM, Andrew Cooper wrote:
> On 07/05/15 07:37, Yang Hongyang wrote:
>> Split read/handle qemu info. The receiving of qemu info
>> should be done while we receive the migration stream,
>> handle_qemu will be called when the stream complete.
>> Otherwise, it will break Remus because read_record()
>> won't read qemu info and stream_complete will be called
>> at failover.
>>
>> Signed-off-by: Yang Hongyang <yanghy@cn.fujitsu.com>
>
> I presume this is because Remus sends multiple qemu records in the stream?
>
> I will be fixing the qemu record layer violation as part of libxl
> migration v2, at which point all the XG_LIBXL_HVM_COMPAT code shall
> disappear.

ye, this patch should disappear with your fix :)

>
> As all this code appears to live inside XG_LIBXL_HVM_COMPAT, I am
> willing to trust that it DoesTheRightThing, if you have confirmed that
> plain HVM migration with migration v2 and XG_LIBXL_HVM_COMPAT continues
> to work (I am slightly suspicious of the 3rd hunk in this regard).

I tested both pv&hvm, normal migration are all work as it is.

>
> ~Andrew
>
>> ---
>>   tools/libxc/xc_sr_common.h          |  5 +++++
>>   tools/libxc/xc_sr_restore.c         | 12 ++++++++++++
>>   tools/libxc/xc_sr_restore_x86_hvm.c | 28 +++++++++++++++++++++++++---
>>   3 files changed, 42 insertions(+), 3 deletions(-)
>>
>> diff --git a/tools/libxc/xc_sr_common.h b/tools/libxc/xc_sr_common.h
>> index ef42412..6f099b8 100644
>> --- a/tools/libxc/xc_sr_common.h
>> +++ b/tools/libxc/xc_sr_common.h
>> @@ -279,6 +279,11 @@ struct xc_sr_context
>>                       /* HVM context blob. */
>>                       void *context;
>>                       size_t contextsz;
>> +
>> +#ifdef XG_LIBXL_HVM_COMPAT
>> +                    uint32_t qlen;
>> +                    void *qbuf;
>> +#endif
>>                   } restore;
>>               };
>>           } x86_hvm;
>> diff --git a/tools/libxc/xc_sr_restore.c b/tools/libxc/xc_sr_restore.c
>> index 53bd674..8022c3d 100644
>> --- a/tools/libxc/xc_sr_restore.c
>> +++ b/tools/libxc/xc_sr_restore.c
>> @@ -510,6 +510,9 @@ static int process_record(struct xc_sr_context *ctx, struct xc_sr_record *rec)
>>       return rc;
>>   }
>>
>> +#ifdef XG_LIBXL_HVM_COMPAT
>> +extern int read_qemu(struct xc_sr_context *ctx);
>> +#endif
>>   /*
>>    * Restore a domain.
>>    */
>> @@ -546,6 +549,15 @@ static int restore(struct xc_sr_context *ctx)
>>
>>       } while ( rec.type != REC_TYPE_END );
>>
>> +#ifdef XG_LIBXL_HVM_COMPAT
>> +    if ( ctx->dominfo.hvm )
>> +    {
>> +        rc = read_qemu(ctx);
>> +        if ( rc )
>> +            goto err;
>> +    }
>> +#endif
>> +
>>       rc = ctx->restore.ops.stream_complete(ctx);
>>       if ( rc )
>>           goto err;
>> diff --git a/tools/libxc/xc_sr_restore_x86_hvm.c b/tools/libxc/xc_sr_restore_x86_hvm.c
>> index 6e9b318..6f5af0e 100644
>> --- a/tools/libxc/xc_sr_restore_x86_hvm.c
>> +++ b/tools/libxc/xc_sr_restore_x86_hvm.c
>> @@ -94,14 +94,14 @@ static int handle_hvm_params(struct xc_sr_context *ctx,
>>   }
>>
>>   #ifdef XG_LIBXL_HVM_COMPAT
>> -static int handle_qemu(struct xc_sr_context *ctx)
>> +int read_qemu(struct xc_sr_context *ctx);
>> +int read_qemu(struct xc_sr_context *ctx)
>>   {
>>       xc_interface *xch = ctx->xch;
>> -    char qemusig[21], path[256];
>> +    char qemusig[21];
>>       uint32_t qlen;
>>       void *qbuf = NULL;
>>       int rc = -1;
>> -    FILE *fp = NULL;
>>
>>       if ( read_exact(ctx->fd, qemusig, sizeof(qemusig)) )
>>       {
>> @@ -137,6 +137,28 @@ static int handle_qemu(struct xc_sr_context *ctx)
>>           goto out;
>>       }
>>
>> +    /* With Remus, this could be read many times */
>> +    if ( ctx->x86_hvm.restore.qbuf )
>> +        free(ctx->x86_hvm.restore.qbuf);
>> +    ctx->x86_hvm.restore.qbuf = qbuf;
>> +    ctx->x86_hvm.restore.qlen = qlen;
>> +    rc = 0;
>> +
>> +out:
>> +    if (rc)
>> +        free(qbuf);
>> +    return rc;
>> +}
>> +
>> +static int handle_qemu(struct xc_sr_context *ctx)
>> +{
>> +    xc_interface *xch = ctx->xch;
>> +    char path[256];
>> +    uint32_t qlen = ctx->x86_hvm.restore.qlen;
>> +    void *qbuf = ctx->x86_hvm.restore.qbuf;
>> +    int rc = -1;
>> +    FILE *fp = NULL;
>> +
>>       sprintf(path, XC_DEVICE_MODEL_RESTORE_FILE".%u", ctx->domid);
>>       fp = fopen(path, "wb");
>>       if ( !fp )
>
> .
>

-- 
Thanks,
Yang.

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH Remus v1 1/8] tools/libxc: adjust the memory allocation for migration
  2015-05-07 13:42     ` Hongyang Yang
@ 2015-05-07 13:57       ` Ian Campbell
  2015-05-08  9:11         ` Hongyang Yang
  2015-05-08 16:27         ` David Vrabel
  2015-05-07 14:01       ` Andrew Cooper
  1 sibling, 2 replies; 31+ messages in thread
From: Ian Campbell @ 2015-05-07 13:57 UTC (permalink / raw)
  To: Hongyang Yang, David Vrabel
  Cc: wei.liu2, eddie.dong, wency, Andrew Cooper, yunhong.jiang,
	ian.jackson, xen-devel, rshriram

On Thu, 2015-05-07 at 21:42 +0800, Hongyang Yang wrote:
> 
> On 05/07/2015 05:48 PM, Andrew Cooper wrote:
> > On 07/05/15 07:37, Yang Hongyang wrote:
> >> Move the memory allocation before the concrete live/nolive save
> >> in order to avoid the free/alloc memory loop when using Remus.
> >>
> >> Signed-off-by: Yang Hongyang <yanghy@cn.fujitsu.com>
> >> ---
> >>   tools/libxc/xc_sr_save.c | 53 +++++++++++++++++++-----------------------------
> >>   1 file changed, 21 insertions(+), 32 deletions(-)
> >>
> >> diff --git a/tools/libxc/xc_sr_save.c b/tools/libxc/xc_sr_save.c
> >> index 5d9c267..7fed668 100644
> >> --- a/tools/libxc/xc_sr_save.c
> >> +++ b/tools/libxc/xc_sr_save.c
> >> @@ -3,6 +3,8 @@
> >>
> >>   #include "xc_sr_common.h"
> >>
> >> +DECLARE_HYPERCALL_BUFFER(unsigned long, to_send);
> >
> > This unfortunately causes an issue when concurrent calls to
> > xc_domain_save() in the same process.  While this is a highly
> > ill-advised action, I did try to avoid breaking it.
> >
> > Please move this declaration into the ctx.save union.
> 
> I know the best way is to put this into ctx.save union, but I haven't
> found a method to put it in, the DECLARE_HYPERCALL_BUFFER macro can not
> be used there, should I just define a unsigned long var at ctx.save
> union, and use other macro(what macro?) define at save()?

I think you need a variable of type xc_hypercall_buffer_t in the struct
and then to use DECLARE_HYPERCALL_BUFFER_SHADOW in functions which need
to access it.

DECLARE_HYPERCALL_BUFFER_SHADOW seems to currently be unused, David
added it in 60572c972b8d, I suspect to be used by migration v2, although
perhaps it never was (at least not in tree yet).

Ian.

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH Remus v1 5/8] tools/libxc: defer the setting of HVM_PARAM_IDENT_PT
  2015-05-07 10:35   ` Andrew Cooper
@ 2015-05-07 13:59     ` Hongyang Yang
  2015-05-07 15:24       ` Andrew Cooper
  0 siblings, 1 reply; 31+ messages in thread
From: Hongyang Yang @ 2015-05-07 13:59 UTC (permalink / raw)
  To: Andrew Cooper, xen-devel
  Cc: wei.liu2, ian.campbell, wency, eddie.dong, yunhong.jiang,
	ian.jackson, rshriram



On 05/07/2015 06:35 PM, Andrew Cooper wrote:
> On 07/05/15 07:37, Yang Hongyang wrote:
>> Set the hvm param HVM_PARAM_IDENT_PT second time will fail.
>> So defer the setting of HVM_PARAM_IDENT_PT.
>>
>> Signed-off-by: Yang Hongyang <yanghy@cn.fujitsu.com>
>
> I would argue that this is a Xen bug, not a migration bug.  There is
> nothing conceptually wrong with setting this param multiple times.

I noticed that in legacy migration, this was set multiple times, so
this patch is only a workaround that will avoid the fail in my test.
will try your hypervisor patch tomorrow.

>
> Please try the following hypervisor patch instead:
>
> diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
> index 3a09439..9ee56b3 100644
> --- a/xen/arch/x86/hvm/hvm.c
> +++ b/xen/arch/x86/hvm/hvm.c
> @@ -5748,12 +5748,13 @@ long do_hvm_op(unsigned long op,
> XEN_GUEST_HANDLE_PARAM(void) arg)
>                   if ( curr_d == d )
>                       break;
>
> -                rc = -EINVAL;
> -                if ( d->arch.hvm_domain.params[a.index] != 0 )
> -                    break;
> -
>                   rc = 0;
> -                if ( !paging_mode_hap(d) )
> +                d->arch.hvm_domain.params[a.index] = a.value;
> +                /*
> +                 * Only actually required for VT-x lacking
> unrestricted_guest
> +                 * capabilities.  Short circuit the pause if possible.
> +                 */
> +                if ( !paging_mode_hap(d) || !cpu_has_vmx )
>                       break;
>
>                   /*
> @@ -5767,7 +5768,6 @@ long do_hvm_op(unsigned long op,
> XEN_GUEST_HANDLE_PARAM(void) arg)
>
>                   rc = 0;
>                   domain_pause(d);
> -                d->arch.hvm_domain.params[a.index] = a.value;
>                   for_each_vcpu ( d, v )
>                       paging_update_cr3(v);
>                   domain_unpause(d);
>
> .
>

-- 
Thanks,
Yang.

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH Remus v1 1/8] tools/libxc: adjust the memory allocation for migration
  2015-05-07 13:42     ` Hongyang Yang
  2015-05-07 13:57       ` Ian Campbell
@ 2015-05-07 14:01       ` Andrew Cooper
  1 sibling, 0 replies; 31+ messages in thread
From: Andrew Cooper @ 2015-05-07 14:01 UTC (permalink / raw)
  To: Hongyang Yang, xen-devel
  Cc: wei.liu2, ian.campbell, wency, eddie.dong, yunhong.jiang,
	ian.jackson, rshriram

On 07/05/15 14:42, Hongyang Yang wrote:
>
>
> On 05/07/2015 05:48 PM, Andrew Cooper wrote:
>> On 07/05/15 07:37, Yang Hongyang wrote:
>>> Move the memory allocation before the concrete live/nolive save
>>> in order to avoid the free/alloc memory loop when using Remus.
>>>
>>> Signed-off-by: Yang Hongyang <yanghy@cn.fujitsu.com>
>>> ---
>>>   tools/libxc/xc_sr_save.c | 53
>>> +++++++++++++++++++-----------------------------
>>>   1 file changed, 21 insertions(+), 32 deletions(-)
>>>
>>> diff --git a/tools/libxc/xc_sr_save.c b/tools/libxc/xc_sr_save.c
>>> index 5d9c267..7fed668 100644
>>> --- a/tools/libxc/xc_sr_save.c
>>> +++ b/tools/libxc/xc_sr_save.c
>>> @@ -3,6 +3,8 @@
>>>
>>>   #include "xc_sr_common.h"
>>>
>>> +DECLARE_HYPERCALL_BUFFER(unsigned long, to_send);
>>
>> This unfortunately causes an issue when concurrent calls to
>> xc_domain_save() in the same process.  While this is a highly
>> ill-advised action, I did try to avoid breaking it.
>>
>> Please move this declaration into the ctx.save union.
>
> I know the best way is to put this into ctx.save union, but I haven't
> found a method to put it in, the DECLARE_HYPERCALL_BUFFER macro can not
> be used there, should I just define a unsigned long var at ctx.save
> union, and use other macro(what macro?) define at save()?

Urgh yes - the hypercall buffer infrastructure is very obscure, and I
never remember how to use it.  I don't think there is a way to do this
in the current infrastructure.

I think you are going to have to manually split
DECLARE_HYPERCALL_BUFFER() between the ctx declaration and new setup()
function.  Leave a comment by both halves, as it will be rather peculiar.

(Fundamentally, the DECLARE in the name is wrong, and contrary to all
other styles.  It should instead be INIT to match similar constructs in
Xen and Linux)

~Andrew

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH Remus v1 8/8] tools/libxc: X86_PV_INFO can be sent multiple times under Remus
  2015-05-07 10:58   ` Andrew Cooper
@ 2015-05-07 14:03     ` Hongyang Yang
  0 siblings, 0 replies; 31+ messages in thread
From: Hongyang Yang @ 2015-05-07 14:03 UTC (permalink / raw)
  To: Andrew Cooper, xen-devel
  Cc: wei.liu2, ian.campbell, wency, eddie.dong, yunhong.jiang,
	ian.jackson, rshriram



On 05/07/2015 06:58 PM, Andrew Cooper wrote:
> On 07/05/15 07:37, Yang Hongyang wrote:
>> X86_PV_INFO can be sent multiple times under Remus
>>
>> Signed-off-by: Yang Hongyang <yanghy@cn.fujitsu.com>
>
> X86_PV_INFO cannot be sent multiple times.  If any information in it
> were to change, the receiving side would have no choice other than to
> abort the restore.

I don't know much about the hypervisor, I will rely on you to find a
suitable solution :)

>
> I think this is a side effect of how you have done the send loop in
> patch 6.  Let me see if I can think of a suitable solution.
>
> ~Andrew
> .
>

-- 
Thanks,
Yang.

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH Remus v1 0/8] Remus support for Migration-v2
  2015-05-07  9:31   ` Andrew Cooper
@ 2015-05-07 15:07     ` Hongyang Yang
  0 siblings, 0 replies; 31+ messages in thread
From: Hongyang Yang @ 2015-05-07 15:07 UTC (permalink / raw)
  To: Andrew Cooper, xen-devel
  Cc: wei.liu2, ian.campbell, wency, eddie.dong, yunhong.jiang,
	ian.jackson, rshriram



On 05/07/2015 05:31 PM, Andrew Cooper wrote:
> On 07/05/15 08:04, Hongyang Yang wrote:
>> Cc Andrew cooper
>>
>> Sorry I missed your address...
>
> No worries.  You have managed a far faster turnaround on this than I
> have with libxl migration v2!
>

Thank you for the more faster review!

> ~Andrew
> .
>

-- 
Thanks,
Yang.

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH Remus v1 5/8] tools/libxc: defer the setting of HVM_PARAM_IDENT_PT
  2015-05-07 13:59     ` Hongyang Yang
@ 2015-05-07 15:24       ` Andrew Cooper
  2015-05-08  4:49         ` Hongyang Yang
  0 siblings, 1 reply; 31+ messages in thread
From: Andrew Cooper @ 2015-05-07 15:24 UTC (permalink / raw)
  To: Hongyang Yang, xen-devel
  Cc: wei.liu2, ian.campbell, wency, eddie.dong, yunhong.jiang,
	ian.jackson, rshriram

On 07/05/15 14:59, Hongyang Yang wrote:
>
>
> On 05/07/2015 06:35 PM, Andrew Cooper wrote:
>> On 07/05/15 07:37, Yang Hongyang wrote:
>>> Set the hvm param HVM_PARAM_IDENT_PT second time will fail.
>>> So defer the setting of HVM_PARAM_IDENT_PT.
>>>
>>> Signed-off-by: Yang Hongyang <yanghy@cn.fujitsu.com>
>>
>> I would argue that this is a Xen bug, not a migration bug.  There is
>> nothing conceptually wrong with setting this param multiple times.
>
> I noticed that in legacy migration, this was set multiple times, so
> this patch is only a workaround that will avoid the fail in my test.
> will try your hypervisor patch tomorrow.

Try this version, which has been rebased over Pauls' cleanup in this
area which has just been committed to staging.

~Andrew

diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index 371fd33..941e9ea 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -5710,12 +5710,13 @@ static int hvmop_set_param(
             rc = -EINVAL;
         break;
     case HVM_PARAM_IDENT_PT:
-        rc = -EINVAL;
-        if ( d->arch.hvm_domain.params[a.index] != 0 )
-            break;
-
         rc = 0;
-        if ( !paging_mode_hap(d) )
+        d->arch.hvm_domain.params[a.index] = a.value;
+        /*
+         * Only actually required for VT-x lacking unrestricted_guest
+         * capabilities.  Short circuit the pause if possible.
+         */
+        if ( !paging_mode_hap(d) || !cpu_has_vmx )
             break;
 
         /*
@@ -5729,7 +5730,6 @@ static int hvmop_set_param(
 
         rc = 0;
         domain_pause(d);
-        d->arch.hvm_domain.params[a.index] = a.value;
         for_each_vcpu ( d, v )
             paging_update_cr3(v);
         domain_unpause(d);

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* Re: [PATCH Remus v1 5/8] tools/libxc: defer the setting of HVM_PARAM_IDENT_PT
  2015-05-07 15:24       ` Andrew Cooper
@ 2015-05-08  4:49         ` Hongyang Yang
  2015-05-08  8:19           ` Andrew Cooper
  0 siblings, 1 reply; 31+ messages in thread
From: Hongyang Yang @ 2015-05-08  4:49 UTC (permalink / raw)
  To: Andrew Cooper, xen-devel
  Cc: wei.liu2, ian.campbell, wency, eddie.dong, yunhong.jiang,
	ian.jackson, rshriram

Hi Andrew,

On 05/07/2015 11:24 PM, Andrew Cooper wrote:
> On 07/05/15 14:59, Hongyang Yang wrote:
>>
>>
>> On 05/07/2015 06:35 PM, Andrew Cooper wrote:
>>> On 07/05/15 07:37, Yang Hongyang wrote:
>>>> Set the hvm param HVM_PARAM_IDENT_PT second time will fail.
>>>> So defer the setting of HVM_PARAM_IDENT_PT.
>>>>
>>>> Signed-off-by: Yang Hongyang <yanghy@cn.fujitsu.com>
>>>
>>> I would argue that this is a Xen bug, not a migration bug.  There is
>>> nothing conceptually wrong with setting this param multiple times.
>>
>> I noticed that in legacy migration, this was set multiple times, so
>> this patch is only a workaround that will avoid the fail in my test.
>> will try your hypervisor patch tomorrow.
>
> Try this version, which has been rebased over Pauls' cleanup in this
> area which has just been committed to staging.

A quick test show that this patch fix the problem, will you send this
patch separately or I put this patch into my series?

>
> ~Andrew
>
> diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
> index 371fd33..941e9ea 100644
> --- a/xen/arch/x86/hvm/hvm.c
> +++ b/xen/arch/x86/hvm/hvm.c
> @@ -5710,12 +5710,13 @@ static int hvmop_set_param(
>               rc = -EINVAL;
>           break;
>       case HVM_PARAM_IDENT_PT:
> -        rc = -EINVAL;
> -        if ( d->arch.hvm_domain.params[a.index] != 0 )
> -            break;
> -
>           rc = 0;
> -        if ( !paging_mode_hap(d) )
> +        d->arch.hvm_domain.params[a.index] = a.value;
> +        /*
> +         * Only actually required for VT-x lacking unrestricted_guest
> +         * capabilities.  Short circuit the pause if possible.
> +         */
> +        if ( !paging_mode_hap(d) || !cpu_has_vmx )
>               break;
>
>           /*
> @@ -5729,7 +5730,6 @@ static int hvmop_set_param(
>
>           rc = 0;
>           domain_pause(d);
> -        d->arch.hvm_domain.params[a.index] = a.value;
>           for_each_vcpu ( d, v )
>               paging_update_cr3(v);
>           domain_unpause(d);
>
> .
>

-- 
Thanks,
Yang.

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH Remus v1 5/8] tools/libxc: defer the setting of HVM_PARAM_IDENT_PT
  2015-05-08  4:49         ` Hongyang Yang
@ 2015-05-08  8:19           ` Andrew Cooper
  0 siblings, 0 replies; 31+ messages in thread
From: Andrew Cooper @ 2015-05-08  8:19 UTC (permalink / raw)
  To: Hongyang Yang, xen-devel
  Cc: wei.liu2, ian.campbell, wency, eddie.dong, yunhong.jiang,
	ian.jackson, rshriram

On 08/05/15 05:49, Hongyang Yang wrote:
> Hi Andrew,
>
> On 05/07/2015 11:24 PM, Andrew Cooper wrote:
>> On 07/05/15 14:59, Hongyang Yang wrote:
>>>
>>>
>>> On 05/07/2015 06:35 PM, Andrew Cooper wrote:
>>>> On 07/05/15 07:37, Yang Hongyang wrote:
>>>>> Set the hvm param HVM_PARAM_IDENT_PT second time will fail.
>>>>> So defer the setting of HVM_PARAM_IDENT_PT.
>>>>>
>>>>> Signed-off-by: Yang Hongyang <yanghy@cn.fujitsu.com>
>>>>
>>>> I would argue that this is a Xen bug, not a migration bug.  There is
>>>> nothing conceptually wrong with setting this param multiple times.
>>>
>>> I noticed that in legacy migration, this was set multiple times, so
>>> this patch is only a workaround that will avoid the fail in my test.
>>> will try your hypervisor patch tomorrow.
>>
>> Try this version, which has been rebased over Pauls' cleanup in this
>> area which has just been committed to staging.
>
> A quick test show that this patch fix the problem, will you send this
> patch separately or I put this patch into my series?

I am putting together a short series to help your integration.  I shall
include this with a proper commit message etc.

~Andrew

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH Remus v1 1/8] tools/libxc: adjust the memory allocation for migration
  2015-05-07 13:57       ` Ian Campbell
@ 2015-05-08  9:11         ` Hongyang Yang
  2015-05-08 16:27         ` David Vrabel
  1 sibling, 0 replies; 31+ messages in thread
From: Hongyang Yang @ 2015-05-08  9:11 UTC (permalink / raw)
  To: Ian Campbell, David Vrabel
  Cc: wei.liu2, eddie.dong, wency, Andrew Cooper, yunhong.jiang,
	ian.jackson, xen-devel, rshriram

Hi Ian,

On 05/07/2015 09:57 PM, Ian Campbell wrote:
> On Thu, 2015-05-07 at 21:42 +0800, Hongyang Yang wrote:
>>
>> On 05/07/2015 05:48 PM, Andrew Cooper wrote:
>>> On 07/05/15 07:37, Yang Hongyang wrote:
>>>> Move the memory allocation before the concrete live/nolive save
>>>> in order to avoid the free/alloc memory loop when using Remus.
>>>>
>>>> Signed-off-by: Yang Hongyang <yanghy@cn.fujitsu.com>
>>>> ---
>>>>    tools/libxc/xc_sr_save.c | 53 +++++++++++++++++++-----------------------------
>>>>    1 file changed, 21 insertions(+), 32 deletions(-)
>>>>
>>>> diff --git a/tools/libxc/xc_sr_save.c b/tools/libxc/xc_sr_save.c
>>>> index 5d9c267..7fed668 100644
>>>> --- a/tools/libxc/xc_sr_save.c
>>>> +++ b/tools/libxc/xc_sr_save.c
>>>> @@ -3,6 +3,8 @@
>>>>
>>>>    #include "xc_sr_common.h"
>>>>
>>>> +DECLARE_HYPERCALL_BUFFER(unsigned long, to_send);
>>>
>>> This unfortunately causes an issue when concurrent calls to
>>> xc_domain_save() in the same process.  While this is a highly
>>> ill-advised action, I did try to avoid breaking it.
>>>
>>> Please move this declaration into the ctx.save union.
>>
>> I know the best way is to put this into ctx.save union, but I haven't
>> found a method to put it in, the DECLARE_HYPERCALL_BUFFER macro can not
>> be used there, should I just define a unsigned long var at ctx.save
>> union, and use other macro(what macro?) define at save()?
>
> I think you need a variable of type xc_hypercall_buffer_t in the struct
> and then to use DECLARE_HYPERCALL_BUFFER_SHADOW in functions which need
> to access it.
>
> DECLARE_HYPERCALL_BUFFER_SHADOW seems to currently be unused, David
> added it in 60572c972b8d, I suspect to be used by migration v2, although
> perhaps it never was (at least not in tree yet).

Thank you for the information, I finally found the way to use it, but I
had to add a new macro DECLARE_HYPERCALL_BUFFER_USER_POINTER which let
me to define a user pointer to access the buffer data, because in
send_all_pages() I only need to access the buffer data, if I use
DECLARE_HYPERCALL_BUFFER_SHADOW, it will define a shadow hypercall
buffer which I don't use and the compiler will report unused var error.

I will send out v2 series which you can see clearly what I've described
above.

>
> Ian.
>
> .
>

-- 
Thanks,
Yang.

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH Remus v1 1/8] tools/libxc: adjust the memory allocation for migration
  2015-05-07 13:57       ` Ian Campbell
  2015-05-08  9:11         ` Hongyang Yang
@ 2015-05-08 16:27         ` David Vrabel
  2015-05-08 16:42           ` Ian Campbell
  1 sibling, 1 reply; 31+ messages in thread
From: David Vrabel @ 2015-05-08 16:27 UTC (permalink / raw)
  To: Ian Campbell
  Cc: wei.liu2, wency, Andrew Cooper, yunhong.jiang, eddie.dong,
	xen-devel, David Vrabel, rshriram, Hongyang Yang, ian.jackson

On 07/05/15 14:57, Ian Campbell wrote:
> 
> DECLARE_HYPERCALL_BUFFER_SHADOW seems to currently be unused, David
> added it in 60572c972b8d, I suspect to be used by migration v2, although
> perhaps it never was (at least not in tree yet).

It's used by kexec-tools.

David

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH Remus v1 1/8] tools/libxc: adjust the memory allocation for migration
  2015-05-08 16:27         ` David Vrabel
@ 2015-05-08 16:42           ` Ian Campbell
  0 siblings, 0 replies; 31+ messages in thread
From: Ian Campbell @ 2015-05-08 16:42 UTC (permalink / raw)
  To: David Vrabel
  Cc: wei.liu2, wency, Andrew Cooper, yunhong.jiang, eddie.dong,
	xen-devel, David Vrabel, rshriram, Hongyang Yang, ian.jackson

On Fri, 2015-05-08 at 17:27 +0100, David Vrabel wrote:
> On 07/05/15 14:57, Ian Campbell wrote:
> > 
> > DECLARE_HYPERCALL_BUFFER_SHADOW seems to currently be unused, David
> > added it in 60572c972b8d, I suspect to be used by migration v2, although
> > perhaps it never was (at least not in tree yet).
> 
> It's used by kexec-tools.

Ah, thanks!

^ permalink raw reply	[flat|nested] 31+ messages in thread

end of thread, other threads:[~2015-05-08 16:42 UTC | newest]

Thread overview: 31+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-05-07  6:37 [PATCH Remus v1 0/8] Remus support for Migration-v2 Yang Hongyang
2015-05-07  6:37 ` [PATCH Remus v1 1/8] tools/libxc: adjust the memory allocation for migration Yang Hongyang
2015-05-07  9:48   ` Andrew Cooper
2015-05-07 13:42     ` Hongyang Yang
2015-05-07 13:57       ` Ian Campbell
2015-05-08  9:11         ` Hongyang Yang
2015-05-08 16:27         ` David Vrabel
2015-05-08 16:42           ` Ian Campbell
2015-05-07 14:01       ` Andrew Cooper
2015-05-07  6:37 ` [PATCH Remus v1 2/8] tools/libxc: reuse send_some_pages() in send_all_pages() Yang Hongyang
2015-05-07 10:08   ` Andrew Cooper
2015-05-07 13:48     ` Hongyang Yang
2015-05-07  6:37 ` [PATCH Remus v1 3/8] tools/libxc: introduce process_record() Yang Hongyang
2015-05-07 10:12   ` Andrew Cooper
2015-05-07  6:37 ` [PATCH Remus v1 4/8] tools/libxc: split read/handle qemu info Yang Hongyang
2015-05-07 10:48   ` Andrew Cooper
2015-05-07 13:55     ` Hongyang Yang
2015-05-07  6:37 ` [PATCH Remus v1 5/8] tools/libxc: defer the setting of HVM_PARAM_IDENT_PT Yang Hongyang
2015-05-07 10:35   ` Andrew Cooper
2015-05-07 13:59     ` Hongyang Yang
2015-05-07 15:24       ` Andrew Cooper
2015-05-08  4:49         ` Hongyang Yang
2015-05-08  8:19           ` Andrew Cooper
2015-05-07  6:37 ` [PATCH Remus v1 6/8] tools/libxc: implement Remus checkpointed save Yang Hongyang
2015-05-07  6:37 ` [PATCH Remus v1 7/8] tools/libxc: implement Remus checkpointed restore Yang Hongyang
2015-05-07  6:37 ` [PATCH Remus v1 8/8] tools/libxc: X86_PV_INFO can be sent multiple times under Remus Yang Hongyang
2015-05-07 10:58   ` Andrew Cooper
2015-05-07 14:03     ` Hongyang Yang
2015-05-07  7:04 ` [PATCH Remus v1 0/8] Remus support for Migration-v2 Hongyang Yang
2015-05-07  9:31   ` Andrew Cooper
2015-05-07 15:07     ` Hongyang Yang

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.