[Qemu-devel] [PATCH 0/3] blockjob: correct backup cluster size for incremental backups

qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed

* [Qemu-devel] [PATCH 0/3] blockjob: correct backup cluster size for incremental backups
@ 2016-02-12 23:06 John Snow
  2016-02-12 23:06 ` [Qemu-devel] [PATCH 1/3] block/backup: make backup cluster size configurable John Snow
                   ` (2 more replies)
  0 siblings, 3 replies; 9+ messages in thread
From: John Snow @ 2016-02-12 23:06 UTC (permalink / raw)
  To: qemu-block; +Cc: kwolf, famz, jcody, qemu-devel, stefanha, John Snow

Incremental backups sometimes need a non-64KiB transfer cluster size.
See patch #2 for the detailed justificaton.

________________________________________________________________________________

For convenience, this branch is available at:
https://github.com/jnsnow/qemu.git branch incremental-granularity-fix
https://github.com/jnsnow/qemu/tree/incremental-granularity-fix

This version is tagged incremental-granularity-fix-v1:
https://github.com/jnsnow/qemu/releases/tag/incremental-granularity-fix-v1

John Snow (3):
  block/backup: make backup cluster size configurable
  block/backup: avoid copying less than full target clusters
  iotests/124: Add cluster_size mismatch test

 block/backup.c             | 68 ++++++++++++++++++++++++++--------------------
 tests/qemu-iotests/124     | 58 +++++++++++++++++++++++++++++++++++----
 tests/qemu-iotests/124.out |  4 +--
 3 files changed, 93 insertions(+), 37 deletions(-)

-- 
2.4.3

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [Qemu-devel] [PATCH 1/3] block/backup: make backup cluster size configurable
  2016-02-12 23:06 [Qemu-devel] [PATCH 0/3] blockjob: correct backup cluster size for incremental backups John Snow
@ 2016-02-12 23:06 ` John Snow
  2016-02-14  6:46   ` Fam Zheng
  2016-02-12 23:06 ` [Qemu-devel] [PATCH 2/3] block/backup: avoid copying less than full target clusters John Snow
  2016-02-12 23:06 ` [Qemu-devel] [PATCH 3/3] iotests/124: Add cluster_size mismatch test John Snow
  2 siblings, 1 reply; 9+ messages in thread
From: John Snow @ 2016-02-12 23:06 UTC (permalink / raw)
  To: qemu-block; +Cc: kwolf, famz, jcody, qemu-devel, stefanha, John Snow

64K might not always be appropriate, make this a runtime value.

Signed-off-by: John Snow <jsnow@redhat.com>
---
 block/backup.c | 57 +++++++++++++++++++++++++++++----------------------------
 1 file changed, 29 insertions(+), 28 deletions(-)

diff --git a/block/backup.c b/block/backup.c
index 00cafdb..fcf0043 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -21,10 +21,7 @@
 #include "qemu/ratelimit.h"
 #include "sysemu/block-backend.h"
 
-#define BACKUP_CLUSTER_BITS 16
-#define BACKUP_CLUSTER_SIZE (1 << BACKUP_CLUSTER_BITS)
-#define BACKUP_SECTORS_PER_CLUSTER (BACKUP_CLUSTER_SIZE / BDRV_SECTOR_SIZE)
-
+#define BACKUP_CLUSTER_SIZE_DEFAULT (1 << 16)
 #define SLICE_TIME 100000000ULL /* ns */
 
 typedef struct CowRequest {
@@ -46,6 +43,8 @@ typedef struct BackupBlockJob {
     CoRwlock flush_rwlock;
     uint64_t sectors_read;
     HBitmap *bitmap;
+    int64_t cluster_size;
+    int64_t sectors_per_cluster;
     QLIST_HEAD(, CowRequest) inflight_reqs;
 } BackupBlockJob;
 
@@ -102,8 +101,8 @@ static int coroutine_fn backup_do_cow(BlockDriverState *bs,
 
     qemu_co_rwlock_rdlock(&job->flush_rwlock);
 
-    start = sector_num / BACKUP_SECTORS_PER_CLUSTER;
-    end = DIV_ROUND_UP(sector_num + nb_sectors, BACKUP_SECTORS_PER_CLUSTER);
+    start = sector_num / job->sectors_per_cluster;
+    end = DIV_ROUND_UP(sector_num + nb_sectors, job->sectors_per_cluster);
 
     trace_backup_do_cow_enter(job, start, sector_num, nb_sectors);
 
@@ -118,12 +117,12 @@ static int coroutine_fn backup_do_cow(BlockDriverState *bs,
 
         trace_backup_do_cow_process(job, start);
 
-        n = MIN(BACKUP_SECTORS_PER_CLUSTER,
+        n = MIN(job->sectors_per_cluster,
                 job->common.len / BDRV_SECTOR_SIZE -
-                start * BACKUP_SECTORS_PER_CLUSTER);
+                start * job->sectors_per_cluster);
 
         if (!bounce_buffer) {
-            bounce_buffer = qemu_blockalign(bs, BACKUP_CLUSTER_SIZE);
+            bounce_buffer = qemu_blockalign(bs, job->cluster_size);
         }
         iov.iov_base = bounce_buffer;
         iov.iov_len = n * BDRV_SECTOR_SIZE;
@@ -131,10 +130,10 @@ static int coroutine_fn backup_do_cow(BlockDriverState *bs,
 
         if (is_write_notifier) {
             ret = bdrv_co_readv_no_serialising(bs,
-                                           start * BACKUP_SECTORS_PER_CLUSTER,
+                                           start * job->sectors_per_cluster,
                                            n, &bounce_qiov);
         } else {
-            ret = bdrv_co_readv(bs, start * BACKUP_SECTORS_PER_CLUSTER, n,
+            ret = bdrv_co_readv(bs, start * job->sectors_per_cluster, n,
                                 &bounce_qiov);
         }
         if (ret < 0) {
@@ -147,11 +146,11 @@ static int coroutine_fn backup_do_cow(BlockDriverState *bs,
 
         if (buffer_is_zero(iov.iov_base, iov.iov_len)) {
             ret = bdrv_co_write_zeroes(job->target,
-                                       start * BACKUP_SECTORS_PER_CLUSTER,
+                                       start * job->sectors_per_cluster,
                                        n, BDRV_REQ_MAY_UNMAP);
         } else {
             ret = bdrv_co_writev(job->target,
-                                 start * BACKUP_SECTORS_PER_CLUSTER, n,
+                                 start * job->sectors_per_cluster, n,
                                  &bounce_qiov);
         }
         if (ret < 0) {
@@ -326,17 +325,17 @@ static int coroutine_fn backup_run_incremental(BackupBlockJob *job)
     HBitmapIter hbi;
 
     granularity = bdrv_dirty_bitmap_granularity(job->sync_bitmap);
-    clusters_per_iter = MAX((granularity / BACKUP_CLUSTER_SIZE), 1);
+    clusters_per_iter = MAX((granularity / job->cluster_size), 1);
     bdrv_dirty_iter_init(job->sync_bitmap, &hbi);
 
     /* Find the next dirty sector(s) */
     while ((sector = hbitmap_iter_next(&hbi)) != -1) {
-        cluster = sector / BACKUP_SECTORS_PER_CLUSTER;
+        cluster = sector / job->sectors_per_cluster;
 
         /* Fake progress updates for any clusters we skipped */
         if (cluster != last_cluster + 1) {
             job->common.offset += ((cluster - last_cluster - 1) *
-                                   BACKUP_CLUSTER_SIZE);
+                                   job->cluster_size);
         }
 
         for (end = cluster + clusters_per_iter; cluster < end; cluster++) {
@@ -344,8 +343,8 @@ static int coroutine_fn backup_run_incremental(BackupBlockJob *job)
                 if (yield_and_check(job)) {
                     return ret;
                 }
-                ret = backup_do_cow(bs, cluster * BACKUP_SECTORS_PER_CLUSTER,
-                                    BACKUP_SECTORS_PER_CLUSTER, &error_is_read,
+                ret = backup_do_cow(bs, cluster * job->sectors_per_cluster,
+                                    job->sectors_per_cluster, &error_is_read,
                                     false);
                 if ((ret < 0) &&
                     backup_error_action(job, error_is_read, -ret) ==
@@ -357,17 +356,17 @@ static int coroutine_fn backup_run_incremental(BackupBlockJob *job)
 
         /* If the bitmap granularity is smaller than the backup granularity,
          * we need to advance the iterator pointer to the next cluster. */
-        if (granularity < BACKUP_CLUSTER_SIZE) {
-            bdrv_set_dirty_iter(&hbi, cluster * BACKUP_SECTORS_PER_CLUSTER);
+        if (granularity < job->cluster_size) {
+            bdrv_set_dirty_iter(&hbi, cluster * job->sectors_per_cluster);
         }
 
         last_cluster = cluster - 1;
     }
 
     /* Play some final catchup with the progress meter */
-    end = DIV_ROUND_UP(job->common.len, BACKUP_CLUSTER_SIZE);
+    end = DIV_ROUND_UP(job->common.len, job->cluster_size);
     if (last_cluster + 1 < end) {
-        job->common.offset += ((end - last_cluster - 1) * BACKUP_CLUSTER_SIZE);
+        job->common.offset += ((end - last_cluster - 1) * job->cluster_size);
     }
 
     return ret;
@@ -390,7 +389,7 @@ static void coroutine_fn backup_run(void *opaque)
     qemu_co_rwlock_init(&job->flush_rwlock);
 
     start = 0;
-    end = DIV_ROUND_UP(job->common.len, BACKUP_CLUSTER_SIZE);
+    end = DIV_ROUND_UP(job->common.len, job->cluster_size);
 
     job->bitmap = hbitmap_alloc(end, 0);
 
@@ -427,7 +426,7 @@ static void coroutine_fn backup_run(void *opaque)
                 /* Check to see if these blocks are already in the
                  * backing file. */
 
-                for (i = 0; i < BACKUP_SECTORS_PER_CLUSTER;) {
+                for (i = 0; i < job->sectors_per_cluster;) {
                     /* bdrv_is_allocated() only returns true/false based
                      * on the first set of sectors it comes across that
                      * are are all in the same state.
@@ -436,8 +435,8 @@ static void coroutine_fn backup_run(void *opaque)
                      * needed but at some point that is always the case. */
                     alloced =
                         bdrv_is_allocated(bs,
-                                start * BACKUP_SECTORS_PER_CLUSTER + i,
-                                BACKUP_SECTORS_PER_CLUSTER - i, &n);
+                                start * job->sectors_per_cluster + i,
+                                job->sectors_per_cluster - i, &n);
                     i += n;
 
                     if (alloced == 1 || n == 0) {
@@ -452,8 +451,8 @@ static void coroutine_fn backup_run(void *opaque)
                 }
             }
             /* FULL sync mode we copy the whole drive. */
-            ret = backup_do_cow(bs, start * BACKUP_SECTORS_PER_CLUSTER,
-                    BACKUP_SECTORS_PER_CLUSTER, &error_is_read, false);
+            ret = backup_do_cow(bs, start * job->sectors_per_cluster,
+                    job->sectors_per_cluster, &error_is_read, false);
             if (ret < 0) {
                 /* Depending on error action, fail now or retry cluster */
                 BlockErrorAction action =
@@ -571,6 +570,8 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target,
     job->sync_mode = sync_mode;
     job->sync_bitmap = sync_mode == MIRROR_SYNC_MODE_INCREMENTAL ?
                        sync_bitmap : NULL;
+    job->cluster_size = BACKUP_CLUSTER_SIZE_DEFAULT;
+    job->sectors_per_cluster = job->cluster_size / BDRV_SECTOR_SIZE;
     job->common.len = len;
     job->common.co = qemu_coroutine_create(backup_run);
     block_job_txn_add_job(txn, &job->common);
-- 
2.4.3

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [Qemu-devel] [PATCH 2/3] block/backup: avoid copying less than full target clusters
  2016-02-12 23:06 [Qemu-devel] [PATCH 0/3] blockjob: correct backup cluster size for incremental backups John Snow
  2016-02-12 23:06 ` [Qemu-devel] [PATCH 1/3] block/backup: make backup cluster size configurable John Snow
@ 2016-02-12 23:06 ` John Snow
  2016-02-14  6:49   ` Fam Zheng
  2016-02-12 23:06 ` [Qemu-devel] [PATCH 3/3] iotests/124: Add cluster_size mismatch test John Snow
  2 siblings, 1 reply; 9+ messages in thread
From: John Snow @ 2016-02-12 23:06 UTC (permalink / raw)
  To: qemu-block; +Cc: kwolf, famz, jcody, qemu-devel, stefanha, John Snow

During incremental backups, if the target has a cluster size that is
larger than the backup cluster size and we are backing up to a target
that cannot (for whichever reason) pull clusters up from a backing image,
we may inadvertantly create unusable incremental backup images.

For example:

If the bitmap tracks changes at a 64KB granularity and we transmit 64KB
of data at a time but the target uses a 128KB cluster size, it is
possible that only half of a target cluster will be recognized as dirty
by the backup block job. When the cluster is allocated on the target
image but only half populated with data, we lose the ability to
distinguish between zero padding and uninitialized data.

This does not happen if the target image has a backing file that points
to the last known good backup.

Even if we have a backing file, though, it's likely going to be faster
to just buffer the redundant data ourselves from the live image than
fetching it from the backing file, so let's just always round up to the
target granularity.

Signed-off-by: John Snow <jsnow@redhat.com>
---
 block/backup.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/block/backup.c b/block/backup.c
index fcf0043..62faf81 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -568,9 +568,16 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target,
     job->on_target_error = on_target_error;
     job->target = target;
     job->sync_mode = sync_mode;
-    job->sync_bitmap = sync_mode == MIRROR_SYNC_MODE_INCREMENTAL ?
-                       sync_bitmap : NULL;
-    job->cluster_size = BACKUP_CLUSTER_SIZE_DEFAULT;
+    if (sync_mode == MIRROR_SYNC_MODE_INCREMENTAL) {
+        BlockDriverInfo bdi;
+
+        bdrv_get_info(job->target, &bdi);
+        job->sync_bitmap = sync_bitmap;
+        job->cluster_size = MAX(BACKUP_CLUSTER_SIZE_DEFAULT,
+                                bdi.cluster_size);
+    } else {
+        job->cluster_size = BACKUP_CLUSTER_SIZE_DEFAULT;
+    }
     job->sectors_per_cluster = job->cluster_size / BDRV_SECTOR_SIZE;
     job->common.len = len;
     job->common.co = qemu_coroutine_create(backup_run);
-- 
2.4.3

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [Qemu-devel] [PATCH 3/3] iotests/124: Add cluster_size mismatch test
  2016-02-12 23:06 [Qemu-devel] [PATCH 0/3] blockjob: correct backup cluster size for incremental backups John Snow
  2016-02-12 23:06 ` [Qemu-devel] [PATCH 1/3] block/backup: make backup cluster size configurable John Snow
  2016-02-12 23:06 ` [Qemu-devel] [PATCH 2/3] block/backup: avoid copying less than full target clusters John Snow
@ 2016-02-12 23:06 ` John Snow
  2016-02-14  7:08   ` Fam Zheng
  2 siblings, 1 reply; 9+ messages in thread
From: John Snow @ 2016-02-12 23:06 UTC (permalink / raw)
  To: qemu-block; +Cc: kwolf, famz, jcody, qemu-devel, stefanha, John Snow

If a backing file isn't specified in the target image and the
cluster_size is larger than the bitmap granularity, we run the risk of
creating bitmaps with allocated clusters but empty/no data which will
prevent the proper reading of the backup in the future.

Signed-off-by: John Snow <jsnow@redhat.com>
---
 tests/qemu-iotests/124     | 58 ++++++++++++++++++++++++++++++++++++++++++----
 tests/qemu-iotests/124.out |  4 ++--
 2 files changed, 55 insertions(+), 7 deletions(-)

diff --git a/tests/qemu-iotests/124 b/tests/qemu-iotests/124
index 7d33422..efbb5bd 100644
--- a/tests/qemu-iotests/124
+++ b/tests/qemu-iotests/124
@@ -132,14 +132,16 @@ class TestIncrementalBackupBase(iotests.QMPTestCase):
 
 
     def img_create(self, img, fmt=iotests.imgfmt, size='64M',
-                   parent=None, parentFormat=None):
+                   parent=None, parentFormat=None, **kwargs):
+        optargs = []
+        for k,v in kwargs.iteritems():
+            optargs = optargs + ['-o', '%s=%s' % (k,v)]
+        args = ['create', '-f', fmt] + optargs + [img, size]
         if parent:
             if parentFormat is None:
                 parentFormat = fmt
-            iotests.qemu_img('create', '-f', fmt, img, size,
-                             '-b', parent, '-F', parentFormat)
-        else:
-            iotests.qemu_img('create', '-f', fmt, img, size)
+            args = args + ['-b', parent, '-F', parentFormat]
+        iotests.qemu_img(*args)
         self.files.append(img)
 
 
@@ -307,6 +309,52 @@ class TestIncrementalBackup(TestIncrementalBackupBase):
         return self.do_incremental_simple(granularity=131072)
 
 
+    def test_larger_cluster_target(self):
+        '''
+        Test: Create and verify backups made to a larger cluster size target.
+
+        With a default granularity of 64KiB, verify that backups made to a
+        larger cluster size target of 128KiB works as expected.
+        '''
+        drive0 = self.drives[0]
+
+        # Create a cluster_size=128k full backup / "anchor" backup
+        self.img_create(drive0['backup'], cluster_size='128k')
+        self.assertTrue(self.do_qmp_backup(device=drive0['id'], sync='full',
+                                           format=drive0['fmt'],
+                                           target=drive0['backup'],
+                                           mode='existing'))
+
+        # Create bitmap and dirty it with some new writes.
+        # overwrite [32736, 32799] which will dirty bitmap clusters at
+        # 32M-64K and 32M. 32M+64K will be left undirtied.
+        bitmap0 = self.add_bitmap('bitmap0', drive0)
+        self.hmp_io_writes(drive0['id'],
+                           (('0xab', 0, 512),
+                            ('0xfe', '16M', '256k'),
+                            ('0x64', '32736k', '64k')))
+
+
+        # Prepare a cluster_size=128k backup target without a backing file.
+        (target, _) = bitmap0.new_target()
+        self.img_create(target, bitmap0.drive['fmt'], cluster_size='128k')
+
+        # Perform Incremental Backup
+        self.assertTrue(self.do_qmp_backup(device=bitmap0.drive['id'],
+                                           sync='incremental',
+                                           bitmap=bitmap0.name,
+                                           format=bitmap0.drive['fmt'],
+                                           target=target,
+                                           mode='existing'))
+        self.make_reference_backup(bitmap0)
+
+        # Add the backing file, then compare and exit.
+        iotests.qemu_img('rebase', '-f', drive0['fmt'], '-u', '-b',
+                         drive0['backup'], '-F', drive0['fmt'], target)
+        self.vm.shutdown()
+        self.check_backups()
+
+
     def test_incremental_transaction(self):
         '''Test: Verify backups made from transactionally created bitmaps.
 
diff --git a/tests/qemu-iotests/124.out b/tests/qemu-iotests/124.out
index dae404e..36376be 100644
--- a/tests/qemu-iotests/124.out
+++ b/tests/qemu-iotests/124.out
@@ -1,5 +1,5 @@
-.........
+..........
 ----------------------------------------------------------------------
-Ran 9 tests
+Ran 10 tests
 
 OK
-- 
2.4.3

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [Qemu-devel] [PATCH 1/3] block/backup: make backup cluster size configurable
  2016-02-12 23:06 ` [Qemu-devel] [PATCH 1/3] block/backup: make backup cluster size configurable John Snow
@ 2016-02-14  6:46   ` Fam Zheng
  2016-02-16 13:38     ` John Snow
  0 siblings, 1 reply; 9+ messages in thread
From: Fam Zheng @ 2016-02-14  6:46 UTC (permalink / raw)
  To: John Snow; +Cc: kwolf, jcody, stefanha, qemu-devel, qemu-block

On Fri, 02/12 18:06, John Snow wrote:
>  typedef struct CowRequest {
> @@ -46,6 +43,8 @@ typedef struct BackupBlockJob {
>      CoRwlock flush_rwlock;
>      uint64_t sectors_read;
>      HBitmap *bitmap;
> +    int64_t cluster_size;
> +    int64_t sectors_per_cluster;

I don't think it makes sense to duplicate one value in two units.

Fam

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [Qemu-devel] [PATCH 2/3] block/backup: avoid copying less than full target clusters
  2016-02-12 23:06 ` [Qemu-devel] [PATCH 2/3] block/backup: avoid copying less than full target clusters John Snow
@ 2016-02-14  6:49   ` Fam Zheng
  2016-02-16 15:43     ` John Snow
  0 siblings, 1 reply; 9+ messages in thread
From: Fam Zheng @ 2016-02-14  6:49 UTC (permalink / raw)
  To: John Snow; +Cc: kwolf, jcody, stefanha, qemu-devel, qemu-block

On Fri, 02/12 18:06, John Snow wrote:
> During incremental backups, if the target has a cluster size that is
> larger than the backup cluster size and we are backing up to a target
> that cannot (for whichever reason) pull clusters up from a backing image,
> we may inadvertantly create unusable incremental backup images.
> 
> For example:
> 
> If the bitmap tracks changes at a 64KB granularity and we transmit 64KB
> of data at a time but the target uses a 128KB cluster size, it is
> possible that only half of a target cluster will be recognized as dirty
> by the backup block job. When the cluster is allocated on the target
> image but only half populated with data, we lose the ability to
> distinguish between zero padding and uninitialized data.
> 
> This does not happen if the target image has a backing file that points
> to the last known good backup.
> 
> Even if we have a backing file, though, it's likely going to be faster
> to just buffer the redundant data ourselves from the live image than
> fetching it from the backing file, so let's just always round up to the
> target granularity.
> 
> Signed-off-by: John Snow <jsnow@redhat.com>
> ---
>  block/backup.c | 13 ++++++++++---
>  1 file changed, 10 insertions(+), 3 deletions(-)
> 
> diff --git a/block/backup.c b/block/backup.c
> index fcf0043..62faf81 100644
> --- a/block/backup.c
> +++ b/block/backup.c
> @@ -568,9 +568,16 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target,
>      job->on_target_error = on_target_error;
>      job->target = target;
>      job->sync_mode = sync_mode;
> -    job->sync_bitmap = sync_mode == MIRROR_SYNC_MODE_INCREMENTAL ?
> -                       sync_bitmap : NULL;
> -    job->cluster_size = BACKUP_CLUSTER_SIZE_DEFAULT;
> +    if (sync_mode == MIRROR_SYNC_MODE_INCREMENTAL) {
> +        BlockDriverInfo bdi;
> +
> +        bdrv_get_info(job->target, &bdi);
> +        job->sync_bitmap = sync_bitmap;
> +        job->cluster_size = MAX(BACKUP_CLUSTER_SIZE_DEFAULT,
> +                                bdi.cluster_size);

Why not just do it for all sync modes?

Fam

> +    } else {
> +        job->cluster_size = BACKUP_CLUSTER_SIZE_DEFAULT;
> +    }
>      job->sectors_per_cluster = job->cluster_size / BDRV_SECTOR_SIZE;
>      job->common.len = len;
>      job->common.co = qemu_coroutine_create(backup_run);
> -- 
> 2.4.3
> 

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [Qemu-devel] [PATCH 3/3] iotests/124: Add cluster_size mismatch test
  2016-02-12 23:06 ` [Qemu-devel] [PATCH 3/3] iotests/124: Add cluster_size mismatch test John Snow
@ 2016-02-14  7:08   ` Fam Zheng
  0 siblings, 0 replies; 9+ messages in thread
From: Fam Zheng @ 2016-02-14  7:08 UTC (permalink / raw)
  To: John Snow; +Cc: kwolf, jcody, stefanha, qemu-devel, qemu-block

On Fri, 02/12 18:06, John Snow wrote:
> If a backing file isn't specified in the target image and the
> cluster_size is larger than the bitmap granularity, we run the risk of
> creating bitmaps with allocated clusters but empty/no data which will
> prevent the proper reading of the backup in the future.
> 
> Signed-off-by: John Snow <jsnow@redhat.com>
> ---
>  tests/qemu-iotests/124     | 58 ++++++++++++++++++++++++++++++++++++++++++----
>  tests/qemu-iotests/124.out |  4 ++--
>  2 files changed, 55 insertions(+), 7 deletions(-)
> 
> diff --git a/tests/qemu-iotests/124 b/tests/qemu-iotests/124
> index 7d33422..efbb5bd 100644
> --- a/tests/qemu-iotests/124
> +++ b/tests/qemu-iotests/124
> @@ -132,14 +132,16 @@ class TestIncrementalBackupBase(iotests.QMPTestCase):
>  
>  
>      def img_create(self, img, fmt=iotests.imgfmt, size='64M',
> -                   parent=None, parentFormat=None):
> +                   parent=None, parentFormat=None, **kwargs):
> +        optargs = []
> +        for k,v in kwargs.iteritems():
> +            optargs = optargs + ['-o', '%s=%s' % (k,v)]
> +        args = ['create', '-f', fmt] + optargs + [img, size]
>          if parent:
>              if parentFormat is None:
>                  parentFormat = fmt
> -            iotests.qemu_img('create', '-f', fmt, img, size,
> -                             '-b', parent, '-F', parentFormat)
> -        else:
> -            iotests.qemu_img('create', '-f', fmt, img, size)
> +            args = args + ['-b', parent, '-F', parentFormat]
> +        iotests.qemu_img(*args)
>          self.files.append(img)
>  
>  
> @@ -307,6 +309,52 @@ class TestIncrementalBackup(TestIncrementalBackupBase):
>          return self.do_incremental_simple(granularity=131072)
>  
>  
> +    def test_larger_cluster_target(self):
> +        '''
> +        Test: Create and verify backups made to a larger cluster size target.
> +
> +        With a default granularity of 64KiB, verify that backups made to a
> +        larger cluster size target of 128KiB works as expected.
> +        '''
> +        drive0 = self.drives[0]
> +
> +        # Create a cluster_size=128k full backup / "anchor" backup
> +        self.img_create(drive0['backup'], cluster_size='128k')
> +        self.assertTrue(self.do_qmp_backup(device=drive0['id'], sync='full',
> +                                           format=drive0['fmt'],
> +                                           target=drive0['backup'],
> +                                           mode='existing'))
> +
> +        # Create bitmap and dirty it with some new writes.
> +        # overwrite [32736, 32799] which will dirty bitmap clusters at
> +        # 32M-64K and 32M. 32M+64K will be left undirtied.
> +        bitmap0 = self.add_bitmap('bitmap0', drive0)
> +        self.hmp_io_writes(drive0['id'],
> +                           (('0xab', 0, 512),
> +                            ('0xfe', '16M', '256k'),
> +                            ('0x64', '32736k', '64k')))
> +
> +
> +        # Prepare a cluster_size=128k backup target without a backing file.
> +        (target, _) = bitmap0.new_target()
> +        self.img_create(target, bitmap0.drive['fmt'], cluster_size='128k')
> +
> +        # Perform Incremental Backup
> +        self.assertTrue(self.do_qmp_backup(device=bitmap0.drive['id'],
> +                                           sync='incremental',
> +                                           bitmap=bitmap0.name,
> +                                           format=bitmap0.drive['fmt'],
> +                                           target=target,
> +                                           mode='existing'))
> +        self.make_reference_backup(bitmap0)
> +
> +        # Add the backing file, then compare and exit.
> +        iotests.qemu_img('rebase', '-f', drive0['fmt'], '-u', '-b',
> +                         drive0['backup'], '-F', drive0['fmt'], target)
> +        self.vm.shutdown()
> +        self.check_backups()
> +
> +
>      def test_incremental_transaction(self):
>          '''Test: Verify backups made from transactionally created bitmaps.
>  
> diff --git a/tests/qemu-iotests/124.out b/tests/qemu-iotests/124.out
> index dae404e..36376be 100644
> --- a/tests/qemu-iotests/124.out
> +++ b/tests/qemu-iotests/124.out
> @@ -1,5 +1,5 @@
> -.........
> +..........
>  ----------------------------------------------------------------------
> -Ran 9 tests
> +Ran 10 tests
>  
>  OK
> -- 
> 2.4.3
> 

Reviewed-by: Fam Zheng <famz@redhat.com>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [Qemu-devel] [PATCH 1/3] block/backup: make backup cluster size configurable
  2016-02-14  6:46   ` Fam Zheng
@ 2016-02-16 13:38     ` John Snow
  0 siblings, 0 replies; 9+ messages in thread
From: John Snow @ 2016-02-16 13:38 UTC (permalink / raw)
  To: Fam Zheng; +Cc: kwolf, jcody, stefanha, qemu-devel, qemu-block



On 02/14/2016 01:46 AM, Fam Zheng wrote:
> On Fri, 02/12 18:06, John Snow wrote:
>>  typedef struct CowRequest {
>> @@ -46,6 +43,8 @@ typedef struct BackupBlockJob {
>>      CoRwlock flush_rwlock;
>>      uint64_t sectors_read;
>>      HBitmap *bitmap;
>> +    int64_t cluster_size;
>> +    int64_t sectors_per_cluster;
> 
> I don't think it makes sense to duplicate one value in two units.
> 
> Fam
> 

Eh, fair.

I just didn't want to duplicate the math everywhere. I can add a little
macro or an inline function instead.

--js

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [Qemu-devel] [PATCH 2/3] block/backup: avoid copying less than full target clusters
  2016-02-14  6:49   ` Fam Zheng
@ 2016-02-16 15:43     ` John Snow
  0 siblings, 0 replies; 9+ messages in thread
From: John Snow @ 2016-02-16 15:43 UTC (permalink / raw)
  To: Fam Zheng; +Cc: kwolf, jcody, stefanha, qemu-devel, qemu-block



On 02/14/2016 01:49 AM, Fam Zheng wrote:
> On Fri, 02/12 18:06, John Snow wrote:
>> During incremental backups, if the target has a cluster size that is
>> larger than the backup cluster size and we are backing up to a target
>> that cannot (for whichever reason) pull clusters up from a backing image,
>> we may inadvertantly create unusable incremental backup images.
>>
>> For example:
>>
>> If the bitmap tracks changes at a 64KB granularity and we transmit 64KB
>> of data at a time but the target uses a 128KB cluster size, it is
>> possible that only half of a target cluster will be recognized as dirty
>> by the backup block job. When the cluster is allocated on the target
>> image but only half populated with data, we lose the ability to
>> distinguish between zero padding and uninitialized data.
>>
>> This does not happen if the target image has a backing file that points
>> to the last known good backup.
>>
>> Even if we have a backing file, though, it's likely going to be faster
>> to just buffer the redundant data ourselves from the live image than
>> fetching it from the backing file, so let's just always round up to the
>> target granularity.
>>
>> Signed-off-by: John Snow <jsnow@redhat.com>
>> ---
>>  block/backup.c | 13 ++++++++++---
>>  1 file changed, 10 insertions(+), 3 deletions(-)
>>
>> diff --git a/block/backup.c b/block/backup.c
>> index fcf0043..62faf81 100644
>> --- a/block/backup.c
>> +++ b/block/backup.c
>> @@ -568,9 +568,16 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target,
>>      job->on_target_error = on_target_error;
>>      job->target = target;
>>      job->sync_mode = sync_mode;
>> -    job->sync_bitmap = sync_mode == MIRROR_SYNC_MODE_INCREMENTAL ?
>> -                       sync_bitmap : NULL;
>> -    job->cluster_size = BACKUP_CLUSTER_SIZE_DEFAULT;
>> +    if (sync_mode == MIRROR_SYNC_MODE_INCREMENTAL) {
>> +        BlockDriverInfo bdi;
>> +
>> +        bdrv_get_info(job->target, &bdi);
>> +        job->sync_bitmap = sync_bitmap;
>> +        job->cluster_size = MAX(BACKUP_CLUSTER_SIZE_DEFAULT,
>> +                                bdi.cluster_size);
> 
> Why not just do it for all sync modes?
> 
> Fam
> 

Caught me not thinking about those.

sync=full is probably OK as-is, but top and none suffer from a similar
problem, you're right.

Incremental is the worst offender since the bitmap used to create the
bitmap will have been consumed, but I'll pay heed to the other modes in v2.

>> +    } else {
>> +        job->cluster_size = BACKUP_CLUSTER_SIZE_DEFAULT;
>> +    }
>>      job->sectors_per_cluster = job->cluster_size / BDRV_SECTOR_SIZE;
>>      job->common.len = len;
>>      job->common.co = qemu_coroutine_create(backup_run);
>> -- 
>> 2.4.3
>>

-- 
—js

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2016-02-16 15:43 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-02-12 23:06 [Qemu-devel] [PATCH 0/3] blockjob: correct backup cluster size for incremental backups John Snow
2016-02-12 23:06 ` [Qemu-devel] [PATCH 1/3] block/backup: make backup cluster size configurable John Snow
2016-02-14  6:46   ` Fam Zheng
2016-02-16 13:38     ` John Snow
2016-02-12 23:06 ` [Qemu-devel] [PATCH 2/3] block/backup: avoid copying less than full target clusters John Snow
2016-02-14  6:49   ` Fam Zheng
2016-02-16 15:43     ` John Snow
2016-02-12 23:06 ` [Qemu-devel] [PATCH 3/3] iotests/124: Add cluster_size mismatch test John Snow
2016-02-14  7:08   ` Fam Zheng

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).