All of lore.kernel.org
 help / color / mirror / Atom feed
From: guangrong.xiao@gmail.com
To: pbonzini@redhat.com, mst@redhat.com, mtosatti@redhat.com
Cc: kvm@vger.kernel.org, Xiao Guangrong <xiaoguangrong@tencent.com>,
	qemu-devel@nongnu.org, peterx@redhat.com, dgilbert@redhat.com,
	wei.w.wang@intel.com, jiang.biao2@zte.com.cn
Subject: [PATCH v2 04/10] migration: detect compression and decompression errors
Date: Tue, 27 Mar 2018 17:10:37 +0800	[thread overview]
Message-ID: <20180327091043.30220-5-xiaoguangrong@tencent.com> (raw)
In-Reply-To: <20180327091043.30220-1-xiaoguangrong@tencent.com>

From: Xiao Guangrong <xiaoguangrong@tencent.com>

Currently the page being compressed is allowed to be updated by
the VM on the source QEMU, correspondingly the destination QEMU
just ignores the decompression error. However, we completely miss
the chance to catch real errors, then the VM is corrupted silently

To make the migration more robuster, we copy the page to a buffer
first to avoid it being written by VM, then detect and handle the
errors of both compression and decompression errors properly

Signed-off-by: Xiao Guangrong <xiaoguangrong@tencent.com>
---
 migration/qemu-file.c |  4 ++--
 migration/ram.c       | 55 +++++++++++++++++++++++++++++++++++----------------
 2 files changed, 40 insertions(+), 19 deletions(-)

diff --git a/migration/qemu-file.c b/migration/qemu-file.c
index e924cc23c5..a7614e8c28 100644
--- a/migration/qemu-file.c
+++ b/migration/qemu-file.c
@@ -710,9 +710,9 @@ ssize_t qemu_put_compression_data(QEMUFile *f, z_stream *stream,
     blen = qemu_compress_data(stream, f->buf + f->buf_index + sizeof(int32_t),
                               blen, p, size);
     if (blen < 0) {
-        error_report("Compress Failed!");
-        return 0;
+        return -1;
     }
+
     qemu_put_be32(f, blen);
     if (f->ops->writev_buffer) {
         add_to_iovec(f, f->buf + f->buf_index, blen, false);
diff --git a/migration/ram.c b/migration/ram.c
index 6b699650ca..e85191c1cb 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -269,7 +269,10 @@ struct CompressParam {
     QemuCond cond;
     RAMBlock *block;
     ram_addr_t offset;
+
+    /* internally used fields */
     z_stream stream;
+    uint8_t *originbuf;
 };
 typedef struct CompressParam CompressParam;
 
@@ -278,6 +281,7 @@ struct DecompressParam {
     bool quit;
     QemuMutex mutex;
     QemuCond cond;
+    QEMUFile *file;
     void *des;
     uint8_t *compbuf;
     int len;
@@ -302,7 +306,7 @@ static QemuMutex decomp_done_lock;
 static QemuCond decomp_done_cond;
 
 static int do_compress_ram_page(QEMUFile *f, z_stream *stream, RAMBlock *block,
-                                ram_addr_t offset);
+                                ram_addr_t offset, uint8_t *source_buf);
 
 static void *do_data_compress(void *opaque)
 {
@@ -318,7 +322,8 @@ static void *do_data_compress(void *opaque)
             param->block = NULL;
             qemu_mutex_unlock(&param->mutex);
 
-            do_compress_ram_page(param->file, &param->stream, block, offset);
+            do_compress_ram_page(param->file, &param->stream, block, offset,
+                                 param->originbuf);
 
             qemu_mutex_lock(&comp_done_lock);
             param->done = true;
@@ -372,6 +377,7 @@ static void compress_threads_save_cleanup(void)
         qemu_mutex_destroy(&comp_param[i].mutex);
         qemu_cond_destroy(&comp_param[i].cond);
         deflateEnd(&comp_param[i].stream);
+        g_free(comp_param[i].originbuf);
         comp_param[i].stream.opaque = NULL;
     }
     qemu_mutex_destroy(&comp_done_lock);
@@ -395,8 +401,14 @@ static int compress_threads_save_setup(void)
     qemu_cond_init(&comp_done_cond);
     qemu_mutex_init(&comp_done_lock);
     for (i = 0; i < thread_count; i++) {
+        comp_param[i].originbuf = g_try_malloc(TARGET_PAGE_SIZE);
+        if (!comp_param[i].originbuf) {
+            goto exit;
+        }
+
         if (deflateInit(&comp_param[i].stream,
                            migrate_compress_level()) != Z_OK) {
+            g_free(comp_param[i].originbuf);
             goto exit;
         }
         comp_param[i].stream.opaque = &comp_param[i];
@@ -1055,7 +1067,7 @@ static int ram_save_page(RAMState *rs, PageSearchStatus *pss, bool last_stage)
 }
 
 static int do_compress_ram_page(QEMUFile *f, z_stream *stream, RAMBlock *block,
-                                ram_addr_t offset)
+                                ram_addr_t offset, uint8_t *source_buf)
 {
     RAMState *rs = ram_state;
     int bytes_sent, blen;
@@ -1063,7 +1075,14 @@ static int do_compress_ram_page(QEMUFile *f, z_stream *stream, RAMBlock *block,
 
     bytes_sent = save_page_header(rs, f, block, offset |
                                   RAM_SAVE_FLAG_COMPRESS_PAGE);
-    blen = qemu_put_compression_data(f, stream, p, TARGET_PAGE_SIZE);
+
+    /*
+     * copy it to a internal buffer to avoid it being modified by VM
+     * so that we can catch up the error during compression and
+     * decompression
+     */
+    memcpy(source_buf, p, TARGET_PAGE_SIZE);
+    blen = qemu_put_compression_data(f, stream, source_buf, TARGET_PAGE_SIZE);
     if (blen < 0) {
         bytes_sent = 0;
         qemu_file_set_error(migrate_get_current()->to_dst_file, blen);
@@ -2557,7 +2576,7 @@ static void *do_data_decompress(void *opaque)
     DecompressParam *param = opaque;
     unsigned long pagesize;
     uint8_t *des;
-    int len;
+    int len, ret;
 
     qemu_mutex_lock(&param->mutex);
     while (!param->quit) {
@@ -2568,13 +2587,13 @@ static void *do_data_decompress(void *opaque)
             qemu_mutex_unlock(&param->mutex);
 
             pagesize = TARGET_PAGE_SIZE;
-            /* qemu_uncompress_data() will return failed in some case,
-             * especially when the page is dirtied when doing the compression,
-             * it's not a problem because the dirty page will be retransferred
-             * and uncompress() won't break the data in other pages.
-             */
-            qemu_uncompress_data(&param->stream, des, pagesize, param->compbuf,
-                                 len);
+
+            ret = qemu_uncompress_data(&param->stream, des, pagesize,
+                                       param->compbuf, len);
+            if (ret < 0) {
+                error_report("decompress data failed");
+                qemu_file_set_error(param->file, ret);
+            }
 
             qemu_mutex_lock(&decomp_done_lock);
             param->done = true;
@@ -2591,12 +2610,12 @@ static void *do_data_decompress(void *opaque)
     return NULL;
 }
 
-static void wait_for_decompress_done(void)
+static int wait_for_decompress_done(QEMUFile *f)
 {
     int idx, thread_count;
 
     if (!migrate_use_compression()) {
-        return;
+        return 0;
     }
 
     thread_count = migrate_decompress_threads();
@@ -2607,6 +2626,7 @@ static void wait_for_decompress_done(void)
         }
     }
     qemu_mutex_unlock(&decomp_done_lock);
+    return qemu_file_get_error(f);
 }
 
 static void compress_threads_load_cleanup(void)
@@ -2646,7 +2666,7 @@ static void compress_threads_load_cleanup(void)
     decomp_param = NULL;
 }
 
-static int compress_threads_load_setup(void)
+static int compress_threads_load_setup(QEMUFile *f)
 {
     int i, thread_count;
 
@@ -2665,6 +2685,7 @@ static int compress_threads_load_setup(void)
         }
         decomp_param[i].stream.opaque = &decomp_param[i];
 
+        decomp_param[i].file = f;
         qemu_mutex_init(&decomp_param[i].mutex);
         qemu_cond_init(&decomp_param[i].cond);
         decomp_param[i].compbuf = g_malloc0(compressBound(TARGET_PAGE_SIZE));
@@ -2719,7 +2740,7 @@ static void decompress_data_with_multi_threads(QEMUFile *f,
  */
 static int ram_load_setup(QEMUFile *f, void *opaque)
 {
-    if (compress_threads_load_setup()) {
+    if (compress_threads_load_setup(f)) {
         return -1;
     }
 
@@ -3074,7 +3095,7 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
         }
     }
 
-    wait_for_decompress_done();
+    ret |= wait_for_decompress_done(f);
     rcu_read_unlock();
     trace_ram_load_complete(ret, seq_iter);
     return ret;
-- 
2.14.3

WARNING: multiple messages have this Message-ID (diff)
From: guangrong.xiao@gmail.com
To: pbonzini@redhat.com, mst@redhat.com, mtosatti@redhat.com
Cc: qemu-devel@nongnu.org, kvm@vger.kernel.org, dgilbert@redhat.com,
	peterx@redhat.com, jiang.biao2@zte.com.cn, wei.w.wang@intel.com,
	Xiao Guangrong <xiaoguangrong@tencent.com>
Subject: [Qemu-devel] [PATCH v2 04/10] migration: detect compression and decompression errors
Date: Tue, 27 Mar 2018 17:10:37 +0800	[thread overview]
Message-ID: <20180327091043.30220-5-xiaoguangrong@tencent.com> (raw)
In-Reply-To: <20180327091043.30220-1-xiaoguangrong@tencent.com>

From: Xiao Guangrong <xiaoguangrong@tencent.com>

Currently the page being compressed is allowed to be updated by
the VM on the source QEMU, correspondingly the destination QEMU
just ignores the decompression error. However, we completely miss
the chance to catch real errors, then the VM is corrupted silently

To make the migration more robuster, we copy the page to a buffer
first to avoid it being written by VM, then detect and handle the
errors of both compression and decompression errors properly

Signed-off-by: Xiao Guangrong <xiaoguangrong@tencent.com>
---
 migration/qemu-file.c |  4 ++--
 migration/ram.c       | 55 +++++++++++++++++++++++++++++++++++----------------
 2 files changed, 40 insertions(+), 19 deletions(-)

diff --git a/migration/qemu-file.c b/migration/qemu-file.c
index e924cc23c5..a7614e8c28 100644
--- a/migration/qemu-file.c
+++ b/migration/qemu-file.c
@@ -710,9 +710,9 @@ ssize_t qemu_put_compression_data(QEMUFile *f, z_stream *stream,
     blen = qemu_compress_data(stream, f->buf + f->buf_index + sizeof(int32_t),
                               blen, p, size);
     if (blen < 0) {
-        error_report("Compress Failed!");
-        return 0;
+        return -1;
     }
+
     qemu_put_be32(f, blen);
     if (f->ops->writev_buffer) {
         add_to_iovec(f, f->buf + f->buf_index, blen, false);
diff --git a/migration/ram.c b/migration/ram.c
index 6b699650ca..e85191c1cb 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -269,7 +269,10 @@ struct CompressParam {
     QemuCond cond;
     RAMBlock *block;
     ram_addr_t offset;
+
+    /* internally used fields */
     z_stream stream;
+    uint8_t *originbuf;
 };
 typedef struct CompressParam CompressParam;
 
@@ -278,6 +281,7 @@ struct DecompressParam {
     bool quit;
     QemuMutex mutex;
     QemuCond cond;
+    QEMUFile *file;
     void *des;
     uint8_t *compbuf;
     int len;
@@ -302,7 +306,7 @@ static QemuMutex decomp_done_lock;
 static QemuCond decomp_done_cond;
 
 static int do_compress_ram_page(QEMUFile *f, z_stream *stream, RAMBlock *block,
-                                ram_addr_t offset);
+                                ram_addr_t offset, uint8_t *source_buf);
 
 static void *do_data_compress(void *opaque)
 {
@@ -318,7 +322,8 @@ static void *do_data_compress(void *opaque)
             param->block = NULL;
             qemu_mutex_unlock(&param->mutex);
 
-            do_compress_ram_page(param->file, &param->stream, block, offset);
+            do_compress_ram_page(param->file, &param->stream, block, offset,
+                                 param->originbuf);
 
             qemu_mutex_lock(&comp_done_lock);
             param->done = true;
@@ -372,6 +377,7 @@ static void compress_threads_save_cleanup(void)
         qemu_mutex_destroy(&comp_param[i].mutex);
         qemu_cond_destroy(&comp_param[i].cond);
         deflateEnd(&comp_param[i].stream);
+        g_free(comp_param[i].originbuf);
         comp_param[i].stream.opaque = NULL;
     }
     qemu_mutex_destroy(&comp_done_lock);
@@ -395,8 +401,14 @@ static int compress_threads_save_setup(void)
     qemu_cond_init(&comp_done_cond);
     qemu_mutex_init(&comp_done_lock);
     for (i = 0; i < thread_count; i++) {
+        comp_param[i].originbuf = g_try_malloc(TARGET_PAGE_SIZE);
+        if (!comp_param[i].originbuf) {
+            goto exit;
+        }
+
         if (deflateInit(&comp_param[i].stream,
                            migrate_compress_level()) != Z_OK) {
+            g_free(comp_param[i].originbuf);
             goto exit;
         }
         comp_param[i].stream.opaque = &comp_param[i];
@@ -1055,7 +1067,7 @@ static int ram_save_page(RAMState *rs, PageSearchStatus *pss, bool last_stage)
 }
 
 static int do_compress_ram_page(QEMUFile *f, z_stream *stream, RAMBlock *block,
-                                ram_addr_t offset)
+                                ram_addr_t offset, uint8_t *source_buf)
 {
     RAMState *rs = ram_state;
     int bytes_sent, blen;
@@ -1063,7 +1075,14 @@ static int do_compress_ram_page(QEMUFile *f, z_stream *stream, RAMBlock *block,
 
     bytes_sent = save_page_header(rs, f, block, offset |
                                   RAM_SAVE_FLAG_COMPRESS_PAGE);
-    blen = qemu_put_compression_data(f, stream, p, TARGET_PAGE_SIZE);
+
+    /*
+     * copy it to a internal buffer to avoid it being modified by VM
+     * so that we can catch up the error during compression and
+     * decompression
+     */
+    memcpy(source_buf, p, TARGET_PAGE_SIZE);
+    blen = qemu_put_compression_data(f, stream, source_buf, TARGET_PAGE_SIZE);
     if (blen < 0) {
         bytes_sent = 0;
         qemu_file_set_error(migrate_get_current()->to_dst_file, blen);
@@ -2557,7 +2576,7 @@ static void *do_data_decompress(void *opaque)
     DecompressParam *param = opaque;
     unsigned long pagesize;
     uint8_t *des;
-    int len;
+    int len, ret;
 
     qemu_mutex_lock(&param->mutex);
     while (!param->quit) {
@@ -2568,13 +2587,13 @@ static void *do_data_decompress(void *opaque)
             qemu_mutex_unlock(&param->mutex);
 
             pagesize = TARGET_PAGE_SIZE;
-            /* qemu_uncompress_data() will return failed in some case,
-             * especially when the page is dirtied when doing the compression,
-             * it's not a problem because the dirty page will be retransferred
-             * and uncompress() won't break the data in other pages.
-             */
-            qemu_uncompress_data(&param->stream, des, pagesize, param->compbuf,
-                                 len);
+
+            ret = qemu_uncompress_data(&param->stream, des, pagesize,
+                                       param->compbuf, len);
+            if (ret < 0) {
+                error_report("decompress data failed");
+                qemu_file_set_error(param->file, ret);
+            }
 
             qemu_mutex_lock(&decomp_done_lock);
             param->done = true;
@@ -2591,12 +2610,12 @@ static void *do_data_decompress(void *opaque)
     return NULL;
 }
 
-static void wait_for_decompress_done(void)
+static int wait_for_decompress_done(QEMUFile *f)
 {
     int idx, thread_count;
 
     if (!migrate_use_compression()) {
-        return;
+        return 0;
     }
 
     thread_count = migrate_decompress_threads();
@@ -2607,6 +2626,7 @@ static void wait_for_decompress_done(void)
         }
     }
     qemu_mutex_unlock(&decomp_done_lock);
+    return qemu_file_get_error(f);
 }
 
 static void compress_threads_load_cleanup(void)
@@ -2646,7 +2666,7 @@ static void compress_threads_load_cleanup(void)
     decomp_param = NULL;
 }
 
-static int compress_threads_load_setup(void)
+static int compress_threads_load_setup(QEMUFile *f)
 {
     int i, thread_count;
 
@@ -2665,6 +2685,7 @@ static int compress_threads_load_setup(void)
         }
         decomp_param[i].stream.opaque = &decomp_param[i];
 
+        decomp_param[i].file = f;
         qemu_mutex_init(&decomp_param[i].mutex);
         qemu_cond_init(&decomp_param[i].cond);
         decomp_param[i].compbuf = g_malloc0(compressBound(TARGET_PAGE_SIZE));
@@ -2719,7 +2740,7 @@ static void decompress_data_with_multi_threads(QEMUFile *f,
  */
 static int ram_load_setup(QEMUFile *f, void *opaque)
 {
-    if (compress_threads_load_setup()) {
+    if (compress_threads_load_setup(f)) {
         return -1;
     }
 
@@ -3074,7 +3095,7 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
         }
     }
 
-    wait_for_decompress_done();
+    ret |= wait_for_decompress_done(f);
     rcu_read_unlock();
     trace_ram_load_complete(ret, seq_iter);
     return ret;
-- 
2.14.3

  parent reply	other threads:[~2018-03-27  9:10 UTC|newest]

Thread overview: 44+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-03-27  9:10 [PATCH v2 00/10] migration: improve and cleanup compression guangrong.xiao
2018-03-27  9:10 ` [Qemu-devel] " guangrong.xiao
2018-03-27  9:10 ` [PATCH v2 01/10] migration: stop compressing page in migration thread guangrong.xiao
2018-03-27  9:10   ` [Qemu-devel] " guangrong.xiao
2018-03-27  9:10 ` [PATCH v2 02/10] migration: stop compression to allocate and free memory frequently guangrong.xiao
2018-03-27  9:10   ` [Qemu-devel] " guangrong.xiao
2018-03-28  9:25   ` Peter Xu
2018-03-28  9:25     ` [Qemu-devel] " Peter Xu
2018-03-29  3:41     ` Xiao Guangrong
2018-03-29  3:41       ` [Qemu-devel] " Xiao Guangrong
2018-03-27  9:10 ` [PATCH v2 03/10] migration: stop decompression " guangrong.xiao
2018-03-27  9:10   ` [Qemu-devel] " guangrong.xiao
2018-03-28  9:42   ` Peter Xu
2018-03-28  9:42     ` [Qemu-devel] " Peter Xu
2018-03-29  3:43     ` Xiao Guangrong
2018-03-29  3:43       ` [Qemu-devel] " Xiao Guangrong
2018-03-29  4:14       ` Peter Xu
2018-03-29  4:14         ` [Qemu-devel] " Peter Xu
2018-03-27  9:10 ` guangrong.xiao [this message]
2018-03-27  9:10   ` [Qemu-devel] [PATCH v2 04/10] migration: detect compression and decompression errors guangrong.xiao
2018-03-28  9:59   ` Peter Xu
2018-03-28  9:59     ` [Qemu-devel] " Peter Xu
2018-03-29  3:51     ` Xiao Guangrong
2018-03-29  3:51       ` [Qemu-devel] " Xiao Guangrong
2018-03-29  4:25       ` Peter Xu
2018-03-29  4:25         ` [Qemu-devel] " Peter Xu
2018-03-30  3:11         ` Xiao Guangrong
2018-03-30  3:11           ` [Qemu-devel] " Xiao Guangrong
2018-04-02  4:26           ` Peter Xu
2018-04-02  4:26             ` [Qemu-devel] " Peter Xu
2018-03-27  9:10 ` [PATCH v2 05/10] migration: introduce control_save_page() guangrong.xiao
2018-03-27  9:10   ` [Qemu-devel] " guangrong.xiao
2018-03-27  9:10 ` [PATCH v2 06/10] migration: move some code ram_save_host_page guangrong.xiao
2018-03-27  9:10   ` [Qemu-devel] " guangrong.xiao
2018-03-28 10:05   ` Peter Xu
2018-03-28 10:05     ` [Qemu-devel] " Peter Xu
2018-03-27  9:10 ` [PATCH v2 07/10] migration: move calling control_save_page to the common place guangrong.xiao
2018-03-27  9:10   ` [Qemu-devel] " guangrong.xiao
2018-03-27  9:10 ` [PATCH v2 08/10] migration: move calling save_zero_page " guangrong.xiao
2018-03-27  9:10   ` [Qemu-devel] " guangrong.xiao
2018-03-27  9:10 ` [PATCH v2 09/10] migration: introduce save_normal_page() guangrong.xiao
2018-03-27  9:10   ` [Qemu-devel] " guangrong.xiao
2018-03-27  9:10 ` [PATCH v2 10/10] migration: remove ram_save_compressed_page() guangrong.xiao
2018-03-27  9:10   ` [Qemu-devel] " guangrong.xiao

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180327091043.30220-5-xiaoguangrong@tencent.com \
    --to=guangrong.xiao@gmail.com \
    --cc=dgilbert@redhat.com \
    --cc=jiang.biao2@zte.com.cn \
    --cc=kvm@vger.kernel.org \
    --cc=mst@redhat.com \
    --cc=mtosatti@redhat.com \
    --cc=pbonzini@redhat.com \
    --cc=peterx@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=wei.w.wang@intel.com \
    --cc=xiaoguangrong@tencent.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.