All of lore.kernel.org
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH 0/8] qcow2: Fix image repairing
@ 2014-08-13 21:01 Max Reitz
  2014-08-13 21:01 ` [Qemu-devel] [PATCH 1/8] qcow2: Factor out refcount accounting for check Max Reitz
                   ` (7 more replies)
  0 siblings, 8 replies; 19+ messages in thread
From: Max Reitz @ 2014-08-13 21:01 UTC (permalink / raw)
  To: qemu-devel; +Cc: Kevin Wolf, Stefan Hajnoczi, Max Reitz

As can be seen in the final patch of this series, there are certain
cases where the current repair implementation of qcow2 actually damages
the image further because it allocates new clusters for the refcount
structure which overlap with existing but according to the on-disk
refcounts (which are assumed to be wrong to begin with) unallocated
clusters.

This series fixes this by completely recreating the refcount structure
based on the in-memory information calculated during the check operation
if the possibility of damaging the image while repairing the refcount
structures in-place exists.


Max Reitz (8):
  qcow2: Factor out refcount accounting for check
  qcow2: Factor out refcount comparison for check
  qcow2: Fix refcount blocks beyond image end
  qcow2: Do not perform potentially damaging repairs
  qcow2: Rebuild refcount structure during check
  qcow2: Clean up after refcount rebuild
  iotests: Fix test outputs
  iotests: Add test for potentially damaging repairs

 block/qcow2-refcount.c     | 657 +++++++++++++++++++++++++++++++--------------
 tests/qemu-iotests/039.out |  10 +-
 tests/qemu-iotests/060.out |  10 +-
 tests/qemu-iotests/061.out |  18 +-
 tests/qemu-iotests/101     |  98 +++++++
 tests/qemu-iotests/101.out |  46 ++++
 tests/qemu-iotests/group   |   1 +
 7 files changed, 626 insertions(+), 214 deletions(-)
 create mode 100755 tests/qemu-iotests/101
 create mode 100644 tests/qemu-iotests/101.out

-- 
2.0.3

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [Qemu-devel] [PATCH 1/8] qcow2: Factor out refcount accounting for check
  2014-08-13 21:01 [Qemu-devel] [PATCH 0/8] qcow2: Fix image repairing Max Reitz
@ 2014-08-13 21:01 ` Max Reitz
  2014-08-14 11:56   ` Benoît Canet
  2014-08-13 21:01 ` [Qemu-devel] [PATCH 2/8] qcow2: Factor out refcount comparison " Max Reitz
                   ` (6 subsequent siblings)
  7 siblings, 1 reply; 19+ messages in thread
From: Max Reitz @ 2014-08-13 21:01 UTC (permalink / raw)
  To: qemu-devel; +Cc: Kevin Wolf, Stefan Hajnoczi, Max Reitz

Put the code for calculating the reference counts during qemu-img check
into an own function.

Signed-off-by: Max Reitz <mreitz@redhat.com>
---
 block/qcow2-refcount.c | 202 +++++++++++++++++++++++++++++--------------------
 1 file changed, 122 insertions(+), 80 deletions(-)

diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index d60e2fe..9793c27 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -1496,71 +1496,17 @@ done:
 }
 
 /*
- * Checks an image for refcount consistency.
- *
- * Returns 0 if no errors are found, the number of errors in case the image is
- * detected as corrupted, and -errno when an internal error occurred.
+ * Checks consistency of refblocks and accounts for each refblock in
+ * *refcount_table.
  */
-int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
-                          BdrvCheckMode fix)
+static int check_refblocks(BlockDriverState *bs, BdrvCheckResult *res,
+                           BdrvCheckMode fix, uint16_t **refcount_table,
+                           int64_t *nb_clusters)
 {
     BDRVQcowState *s = bs->opaque;
-    int64_t size, i, highest_cluster, nb_clusters;
-    int refcount1, refcount2;
-    QCowSnapshot *sn;
-    uint16_t *refcount_table;
-    int ret;
-
-    size = bdrv_getlength(bs->file);
-    if (size < 0) {
-        res->check_errors++;
-        return size;
-    }
-
-    nb_clusters = size_to_clusters(s, size);
-    if (nb_clusters > INT_MAX) {
-        res->check_errors++;
-        return -EFBIG;
-    }
-
-    refcount_table = g_try_malloc0(nb_clusters * sizeof(uint16_t));
-    if (nb_clusters && refcount_table == NULL) {
-        res->check_errors++;
-        return -ENOMEM;
-    }
-
-    res->bfi.total_clusters =
-        size_to_clusters(s, bs->total_sectors * BDRV_SECTOR_SIZE);
-
-    /* header */
-    inc_refcounts(bs, res, refcount_table, nb_clusters,
-        0, s->cluster_size);
-
-    /* current L1 table */
-    ret = check_refcounts_l1(bs, res, refcount_table, nb_clusters,
-                             s->l1_table_offset, s->l1_size, CHECK_FRAG_INFO);
-    if (ret < 0) {
-        goto fail;
-    }
+    int64_t i;
 
-    /* snapshots */
-    for(i = 0; i < s->nb_snapshots; i++) {
-        sn = s->snapshots + i;
-        ret = check_refcounts_l1(bs, res, refcount_table, nb_clusters,
-            sn->l1_table_offset, sn->l1_size, 0);
-        if (ret < 0) {
-            goto fail;
-        }
-    }
-    inc_refcounts(bs, res, refcount_table, nb_clusters,
-        s->snapshots_offset, s->snapshots_size);
-
-    /* refcount data */
-    inc_refcounts(bs, res, refcount_table, nb_clusters,
-        s->refcount_table_offset,
-        s->refcount_table_size * sizeof(uint64_t));
-
-    for(i = 0; i < s->refcount_table_size; i++) {
+    for (i = 0; i < s->refcount_table_size; i++) {
         uint64_t offset, cluster;
         offset = s->refcount_table[i];
         cluster = offset >> s->cluster_bits;
@@ -1568,12 +1514,12 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
         /* Refcount blocks are cluster aligned */
         if (offset_into_cluster(s, offset)) {
             fprintf(stderr, "ERROR refcount block %" PRId64 " is not "
-                "cluster aligned; refcount table entry corrupted\n", i);
+                    "cluster aligned; refcount table entry corrupted\n", i);
             res->corruptions++;
             continue;
         }
 
-        if (cluster >= nb_clusters) {
+        if (cluster >= *nb_clusters) {
             fprintf(stderr, "ERROR refcount block %" PRId64
                     " is outside image\n", i);
             res->corruptions++;
@@ -1581,14 +1527,14 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
         }
 
         if (offset != 0) {
-            inc_refcounts(bs, res, refcount_table, nb_clusters,
-                offset, s->cluster_size);
-            if (refcount_table[cluster] != 1) {
+            inc_refcounts(bs, res, *refcount_table, *nb_clusters,
+                          offset, s->cluster_size);
+            if ((*refcount_table)[cluster] != 1) {
                 fprintf(stderr, "%s refcount block %" PRId64
-                    " refcount=%d\n",
-                    fix & BDRV_FIX_ERRORS ? "Repairing" :
-                                            "ERROR",
-                    i, refcount_table[cluster]);
+                        " refcount=%d\n",
+                        fix & BDRV_FIX_ERRORS ? "Repairing" :
+                                                "ERROR",
+                        i, (*refcount_table)[cluster]);
 
                 if (fix & BDRV_FIX_ERRORS) {
                     int64_t new_offset;
@@ -1600,18 +1546,25 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
                     }
 
                     /* update refcounts */
-                    if ((new_offset >> s->cluster_bits) >= nb_clusters) {
+                    if ((new_offset >> s->cluster_bits) >= *nb_clusters) {
                         /* increase refcount_table size if necessary */
-                        int old_nb_clusters = nb_clusters;
-                        nb_clusters = (new_offset >> s->cluster_bits) + 1;
-                        refcount_table = g_realloc(refcount_table,
-                                nb_clusters * sizeof(uint16_t));
-                        memset(&refcount_table[old_nb_clusters], 0, (nb_clusters
-                                - old_nb_clusters) * sizeof(uint16_t));
+                        int old_nb_clusters = *nb_clusters;
+                        *nb_clusters = (new_offset >> s->cluster_bits) + 1;
+
+                        *refcount_table = g_try_realloc(*refcount_table,
+                                *nb_clusters * sizeof(uint16_t));
+                        if (!*refcount_table) {
+                            res->check_errors++;
+                            return -ENOMEM;
+                        }
+
+                        memset(&(*refcount_table)[old_nb_clusters], 0,
+                               (*nb_clusters - old_nb_clusters) *
+                               sizeof(uint16_t));
                     }
-                    refcount_table[cluster]--;
-                    inc_refcounts(bs, res, refcount_table, nb_clusters,
-                            new_offset, s->cluster_size);
+                    (*refcount_table)[cluster]--;
+                    inc_refcounts(bs, res, *refcount_table, *nb_clusters,
+                                  new_offset, s->cluster_size);
 
                     res->corruptions_fixed++;
                 } else {
@@ -1621,6 +1574,95 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
         }
     }
 
+    return 0;
+}
+
+/*
+ * Calculates an in-memory refcount table.
+ */
+static int calculate_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
+                               BdrvCheckMode fix, uint16_t **refcount_table,
+                               int64_t *nb_clusters)
+{
+    BDRVQcowState *s = bs->opaque;
+    QCowSnapshot *sn;
+    int64_t i;
+    int ret;
+
+    if (!*refcount_table) {
+        *refcount_table = g_try_malloc0(*nb_clusters * sizeof(uint16_t));
+        if (*nb_clusters && !*refcount_table) {
+            res->check_errors++;
+            return -ENOMEM;
+        }
+    }
+
+    /* header */
+    inc_refcounts(bs, res, *refcount_table, *nb_clusters,
+                  0, s->cluster_size);
+
+    /* current L1 table */
+    ret = check_refcounts_l1(bs, res, *refcount_table, *nb_clusters,
+                             s->l1_table_offset, s->l1_size, CHECK_FRAG_INFO);
+    if (ret < 0) {
+        return ret;
+    }
+
+    /* snapshots */
+    for (i = 0; i < s->nb_snapshots; i++) {
+        sn = s->snapshots + i;
+        ret = check_refcounts_l1(bs, res, *refcount_table, *nb_clusters,
+                                 sn->l1_table_offset, sn->l1_size, 0);
+        if (ret < 0) {
+            return ret;
+        }
+    }
+    inc_refcounts(bs, res, *refcount_table, *nb_clusters,
+                  s->snapshots_offset, s->snapshots_size);
+
+    /* refcount data */
+    inc_refcounts(bs, res, *refcount_table, *nb_clusters,
+                  s->refcount_table_offset,
+                  s->refcount_table_size * sizeof(uint64_t));
+
+    return check_refblocks(bs, res, fix, refcount_table, nb_clusters);
+}
+
+/*
+ * Checks an image for refcount consistency.
+ *
+ * Returns 0 if no errors are found, the number of errors in case the image is
+ * detected as corrupted, and -errno when an internal error occurred.
+ */
+int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
+                          BdrvCheckMode fix)
+{
+    BDRVQcowState *s = bs->opaque;
+    int64_t size, i, highest_cluster, nb_clusters;
+    int refcount1, refcount2;
+    uint16_t *refcount_table = NULL;
+    int ret;
+
+    size = bdrv_getlength(bs->file);
+    if (size < 0) {
+        res->check_errors++;
+        return size;
+    }
+
+    nb_clusters = size_to_clusters(s, size);
+    if (nb_clusters > INT_MAX) {
+        res->check_errors++;
+        return -EFBIG;
+    }
+
+    res->bfi.total_clusters =
+        size_to_clusters(s, bs->total_sectors * BDRV_SECTOR_SIZE);
+
+    ret = calculate_refcounts(bs, res, fix, &refcount_table, &nb_clusters);
+    if (ret < 0) {
+        goto fail;
+    }
+
     /* compare ref counts */
     for (i = 0, highest_cluster = 0; i < nb_clusters; i++) {
         refcount1 = get_refcount(bs, i);
-- 
2.0.3

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [Qemu-devel] [PATCH 2/8] qcow2: Factor out refcount comparison for check
  2014-08-13 21:01 [Qemu-devel] [PATCH 0/8] qcow2: Fix image repairing Max Reitz
  2014-08-13 21:01 ` [Qemu-devel] [PATCH 1/8] qcow2: Factor out refcount accounting for check Max Reitz
@ 2014-08-13 21:01 ` Max Reitz
  2014-08-14 12:02   ` Benoît Canet
  2014-08-13 21:01 ` [Qemu-devel] [PATCH 3/8] qcow2: Fix refcount blocks beyond image end Max Reitz
                   ` (5 subsequent siblings)
  7 siblings, 1 reply; 19+ messages in thread
From: Max Reitz @ 2014-08-13 21:01 UTC (permalink / raw)
  To: qemu-devel; +Cc: Kevin Wolf, Stefan Hajnoczi, Max Reitz

Put the code for comparing the calculated refcounts against the on-disk
refcounts during qemu-img check into an own function.

Signed-off-by: Max Reitz <mreitz@redhat.com>
---
 block/qcow2-refcount.c | 91 ++++++++++++++++++++++++++++----------------------
 1 file changed, 52 insertions(+), 39 deletions(-)

diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index 9793c27..d1da8d5 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -1629,46 +1629,22 @@ static int calculate_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
 }
 
 /*
- * Checks an image for refcount consistency.
- *
- * Returns 0 if no errors are found, the number of errors in case the image is
- * detected as corrupted, and -errno when an internal error occurred.
+ * Compares the actual reference count for each cluster in the image against the
+ * refcount as reported by the refcount structures on-disk.
  */
-int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
-                          BdrvCheckMode fix)
+static void compare_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
+                              BdrvCheckMode fix, int64_t *highest_cluster,
+                              uint16_t *refcount_table, int64_t nb_clusters)
 {
     BDRVQcowState *s = bs->opaque;
-    int64_t size, i, highest_cluster, nb_clusters;
-    int refcount1, refcount2;
-    uint16_t *refcount_table = NULL;
-    int ret;
-
-    size = bdrv_getlength(bs->file);
-    if (size < 0) {
-        res->check_errors++;
-        return size;
-    }
-
-    nb_clusters = size_to_clusters(s, size);
-    if (nb_clusters > INT_MAX) {
-        res->check_errors++;
-        return -EFBIG;
-    }
-
-    res->bfi.total_clusters =
-        size_to_clusters(s, bs->total_sectors * BDRV_SECTOR_SIZE);
-
-    ret = calculate_refcounts(bs, res, fix, &refcount_table, &nb_clusters);
-    if (ret < 0) {
-        goto fail;
-    }
+    int64_t i;
+    int refcount1, refcount2, ret;
 
-    /* compare ref counts */
-    for (i = 0, highest_cluster = 0; i < nb_clusters; i++) {
+    for (i = 0, *highest_cluster = 0; i < nb_clusters; i++) {
         refcount1 = get_refcount(bs, i);
         if (refcount1 < 0) {
             fprintf(stderr, "Can't get refcount for cluster %" PRId64 ": %s\n",
-                i, strerror(-refcount1));
+                    i, strerror(-refcount1));
             res->check_errors++;
             continue;
         }
@@ -1676,11 +1652,10 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
         refcount2 = refcount_table[i];
 
         if (refcount1 > 0 || refcount2 > 0) {
-            highest_cluster = i;
+            *highest_cluster = i;
         }
 
         if (refcount1 != refcount2) {
-
             /* Check if we're allowed to fix the mismatch */
             int *num_fixed = NULL;
             if (refcount1 > refcount2 && (fix & BDRV_FIX_LEAKS)) {
@@ -1690,10 +1665,10 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
             }
 
             fprintf(stderr, "%s cluster %" PRId64 " refcount=%d reference=%d\n",
-                   num_fixed != NULL     ? "Repairing" :
-                   refcount1 < refcount2 ? "ERROR" :
-                                           "Leaked",
-                   i, refcount1, refcount2);
+                    num_fixed != NULL     ? "Repairing" :
+                    refcount1 < refcount2 ? "ERROR" :
+                                            "Leaked",
+                    i, refcount1, refcount2);
 
             if (num_fixed) {
                 ret = update_refcount(bs, i << s->cluster_bits, 1,
@@ -1713,6 +1688,44 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
             }
         }
     }
+}
+
+/*
+ * Checks an image for refcount consistency.
+ *
+ * Returns 0 if no errors are found, the number of errors in case the image is
+ * detected as corrupted, and -errno when an internal error occurred.
+ */
+int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
+                          BdrvCheckMode fix)
+{
+    BDRVQcowState *s = bs->opaque;
+    int64_t size, highest_cluster, nb_clusters;
+    uint16_t *refcount_table = NULL;
+    int ret;
+
+    size = bdrv_getlength(bs->file);
+    if (size < 0) {
+        res->check_errors++;
+        return size;
+    }
+
+    nb_clusters = size_to_clusters(s, size);
+    if (nb_clusters > INT_MAX) {
+        res->check_errors++;
+        return -EFBIG;
+    }
+
+    res->bfi.total_clusters =
+        size_to_clusters(s, bs->total_sectors * BDRV_SECTOR_SIZE);
+
+    ret = calculate_refcounts(bs, res, fix, &refcount_table, &nb_clusters);
+    if (ret < 0) {
+        goto fail;
+    }
+
+    compare_refcounts(bs, res, fix, &highest_cluster, refcount_table,
+                      nb_clusters);
 
     /* check OFLAG_COPIED */
     ret = check_oflag_copied(bs, res, fix);
-- 
2.0.3

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [Qemu-devel] [PATCH 3/8] qcow2: Fix refcount blocks beyond image end
  2014-08-13 21:01 [Qemu-devel] [PATCH 0/8] qcow2: Fix image repairing Max Reitz
  2014-08-13 21:01 ` [Qemu-devel] [PATCH 1/8] qcow2: Factor out refcount accounting for check Max Reitz
  2014-08-13 21:01 ` [Qemu-devel] [PATCH 2/8] qcow2: Factor out refcount comparison " Max Reitz
@ 2014-08-13 21:01 ` Max Reitz
  2014-08-14 12:11   ` Benoît Canet
  2014-08-13 21:01 ` [Qemu-devel] [PATCH 4/8] qcow2: Do not perform potentially damaging repairs Max Reitz
                   ` (4 subsequent siblings)
  7 siblings, 1 reply; 19+ messages in thread
From: Max Reitz @ 2014-08-13 21:01 UTC (permalink / raw)
  To: qemu-devel; +Cc: Kevin Wolf, Stefan Hajnoczi, Max Reitz

If the qcow2 check function detects a refcount block located beyond the
image end, grow the image appropriately. This cannot break anything and
is the logical fix for such a case.

Signed-off-by: Max Reitz <mreitz@redhat.com>
---
 block/qcow2-refcount.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 46 insertions(+), 4 deletions(-)

diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index d1da8d5..a1d93e5 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -1504,7 +1504,8 @@ static int check_refblocks(BlockDriverState *bs, BdrvCheckResult *res,
                            int64_t *nb_clusters)
 {
     BDRVQcowState *s = bs->opaque;
-    int64_t i;
+    int64_t i, size;
+    int ret;
 
     for (i = 0; i < s->refcount_table_size; i++) {
         uint64_t offset, cluster;
@@ -1520,9 +1521,50 @@ static int check_refblocks(BlockDriverState *bs, BdrvCheckResult *res,
         }
 
         if (cluster >= *nb_clusters) {
-            fprintf(stderr, "ERROR refcount block %" PRId64
-                    " is outside image\n", i);
-            res->corruptions++;
+            fprintf(stderr, "%s refcount block %" PRId64 " is outside image\n",
+                    fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR", i);
+
+            if (fix & BDRV_FIX_ERRORS) {
+                int64_t old_nb_clusters = *nb_clusters;
+
+                ret = bdrv_truncate(bs->file, offset + s->cluster_size);
+                if (ret < 0) {
+                    goto resize_fail;
+                }
+                size = bdrv_getlength(bs->file);
+                if (size < 0) {
+                    ret = size;
+                    goto resize_fail;
+                }
+
+                *nb_clusters = size_to_clusters(s, size);
+                assert(*nb_clusters >= old_nb_clusters);
+
+                *refcount_table = g_try_realloc(*refcount_table,
+                        *nb_clusters * sizeof(uint16_t));
+                if (!*refcount_table) {
+                    res->check_errors++;
+                    return -ENOMEM;
+                }
+
+                memset(*refcount_table + old_nb_clusters, 0,
+                       (*nb_clusters - old_nb_clusters) * sizeof(uint16_t));
+
+                if (cluster >= *nb_clusters) {
+                    ret = -EINVAL;
+                    goto resize_fail;
+                }
+
+                res->corruptions_fixed++;
+                continue;
+
+resize_fail:
+                res->corruptions++;
+                fprintf(stderr, "ERROR could not resize image: %s\n",
+                        strerror(-ret));
+            } else {
+                res->corruptions++;
+            }
             continue;
         }
 
-- 
2.0.3

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [Qemu-devel] [PATCH 4/8] qcow2: Do not perform potentially damaging repairs
  2014-08-13 21:01 [Qemu-devel] [PATCH 0/8] qcow2: Fix image repairing Max Reitz
                   ` (2 preceding siblings ...)
  2014-08-13 21:01 ` [Qemu-devel] [PATCH 3/8] qcow2: Fix refcount blocks beyond image end Max Reitz
@ 2014-08-13 21:01 ` Max Reitz
  2014-08-14 12:33   ` Benoît Canet
  2014-08-13 21:01 ` [Qemu-devel] [PATCH 5/8] qcow2: Rebuild refcount structure during check Max Reitz
                   ` (3 subsequent siblings)
  7 siblings, 1 reply; 19+ messages in thread
From: Max Reitz @ 2014-08-13 21:01 UTC (permalink / raw)
  To: qemu-devel; +Cc: Kevin Wolf, Stefan Hajnoczi, Max Reitz

If a referenced cluster has a refcount of 0, increasing its refcount may
result in clusters being allocated for the refcount structures. This may
overwrite the referenced cluster, therefore we cannot simply increase
the refcount then.

In such cases, we can either try to replicate all the refcount
operations solely for the check operation, basing the allocations on the
in-memory refcount table; or we can simply rebuild the whole refcount
structure based on the in-memory refcount table. Since the latter will
be much easier, do that.

To prepare for this, introduce a "rebuild" boolean which should be set
to true whenever a fix is rather dangerous or too complicated using the
current refcount structures. Another example for this is refcount blocks
being referenced more than once.

Signed-off-by: Max Reitz <mreitz@redhat.com>
---
 block/qcow2-refcount.c | 191 +++++++------------------------------------------
 1 file changed, 27 insertions(+), 164 deletions(-)

diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index a1d93e5..6400840 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -1381,127 +1381,12 @@ fail:
 }
 
 /*
- * Writes one sector of the refcount table to the disk
- */
-#define RT_ENTRIES_PER_SECTOR (512 / sizeof(uint64_t))
-static int write_reftable_entry(BlockDriverState *bs, int rt_index)
-{
-    BDRVQcowState *s = bs->opaque;
-    uint64_t buf[RT_ENTRIES_PER_SECTOR];
-    int rt_start_index;
-    int i, ret;
-
-    rt_start_index = rt_index & ~(RT_ENTRIES_PER_SECTOR - 1);
-    for (i = 0; i < RT_ENTRIES_PER_SECTOR; i++) {
-        buf[i] = cpu_to_be64(s->refcount_table[rt_start_index + i]);
-    }
-
-    ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_REFCOUNT_TABLE,
-            s->refcount_table_offset + rt_start_index * sizeof(uint64_t),
-            sizeof(buf));
-    if (ret < 0) {
-        return ret;
-    }
-
-    BLKDBG_EVENT(bs->file, BLKDBG_REFTABLE_UPDATE);
-    ret = bdrv_pwrite_sync(bs->file, s->refcount_table_offset +
-            rt_start_index * sizeof(uint64_t), buf, sizeof(buf));
-    if (ret < 0) {
-        return ret;
-    }
-
-    return 0;
-}
-
-/*
- * Allocates a new cluster for the given refcount block (represented by its
- * offset in the image file) and copies the current content there. This function
- * does _not_ decrement the reference count for the currently occupied cluster.
- *
- * This function prints an informative message to stderr on error (and returns
- * -errno); on success, the offset of the newly allocated cluster is returned.
- */
-static int64_t realloc_refcount_block(BlockDriverState *bs, int reftable_index,
-                                      uint64_t offset)
-{
-    BDRVQcowState *s = bs->opaque;
-    int64_t new_offset = 0;
-    void *refcount_block = NULL;
-    int ret;
-
-    /* allocate new refcount block */
-    new_offset = qcow2_alloc_clusters(bs, s->cluster_size);
-    if (new_offset < 0) {
-        fprintf(stderr, "Could not allocate new cluster: %s\n",
-                strerror(-new_offset));
-        ret = new_offset;
-        goto done;
-    }
-
-    /* fetch current refcount block content */
-    ret = qcow2_cache_get(bs, s->refcount_block_cache, offset, &refcount_block);
-    if (ret < 0) {
-        fprintf(stderr, "Could not fetch refcount block: %s\n", strerror(-ret));
-        goto fail_free_cluster;
-    }
-
-    /* new block has not yet been entered into refcount table, therefore it is
-     * no refcount block yet (regarding this check) */
-    ret = qcow2_pre_write_overlap_check(bs, 0, new_offset, s->cluster_size);
-    if (ret < 0) {
-        fprintf(stderr, "Could not write refcount block; metadata overlap "
-                "check failed: %s\n", strerror(-ret));
-        /* the image will be marked corrupt, so don't even attempt on freeing
-         * the cluster */
-        goto done;
-    }
-
-    /* write to new block */
-    ret = bdrv_write(bs->file, new_offset / BDRV_SECTOR_SIZE, refcount_block,
-            s->cluster_sectors);
-    if (ret < 0) {
-        fprintf(stderr, "Could not write refcount block: %s\n", strerror(-ret));
-        goto fail_free_cluster;
-    }
-
-    /* update refcount table */
-    assert(!offset_into_cluster(s, new_offset));
-    s->refcount_table[reftable_index] = new_offset;
-    ret = write_reftable_entry(bs, reftable_index);
-    if (ret < 0) {
-        fprintf(stderr, "Could not update refcount table: %s\n",
-                strerror(-ret));
-        goto fail_free_cluster;
-    }
-
-    goto done;
-
-fail_free_cluster:
-    qcow2_free_clusters(bs, new_offset, s->cluster_size, QCOW2_DISCARD_OTHER);
-
-done:
-    if (refcount_block) {
-        /* This should never fail, as it would only do so if the given refcount
-         * block cannot be found in the cache. As this is impossible as long as
-         * there are no bugs, assert the success. */
-        int tmp = qcow2_cache_put(bs, s->refcount_block_cache, &refcount_block);
-        assert(tmp == 0);
-    }
-
-    if (ret < 0) {
-        return ret;
-    }
-
-    return new_offset;
-}
-
-/*
  * Checks consistency of refblocks and accounts for each refblock in
  * *refcount_table.
  */
 static int check_refblocks(BlockDriverState *bs, BdrvCheckResult *res,
-                           BdrvCheckMode fix, uint16_t **refcount_table,
-                           int64_t *nb_clusters)
+                           BdrvCheckMode fix, bool *rebuild,
+                           uint16_t **refcount_table, int64_t *nb_clusters)
 {
     BDRVQcowState *s = bs->opaque;
     int64_t i, size;
@@ -1517,6 +1402,7 @@ static int check_refblocks(BlockDriverState *bs, BdrvCheckResult *res,
             fprintf(stderr, "ERROR refcount block %" PRId64 " is not "
                     "cluster aligned; refcount table entry corrupted\n", i);
             res->corruptions++;
+            *rebuild = true;
             continue;
         }
 
@@ -1571,47 +1457,12 @@ resize_fail:
         if (offset != 0) {
             inc_refcounts(bs, res, *refcount_table, *nb_clusters,
                           offset, s->cluster_size);
-            if ((*refcount_table)[cluster] != 1) {
-                fprintf(stderr, "%s refcount block %" PRId64
-                        " refcount=%d\n",
-                        fix & BDRV_FIX_ERRORS ? "Repairing" :
-                                                "ERROR",
-                        i, (*refcount_table)[cluster]);
-
-                if (fix & BDRV_FIX_ERRORS) {
-                    int64_t new_offset;
-
-                    new_offset = realloc_refcount_block(bs, i, offset);
-                    if (new_offset < 0) {
-                        res->corruptions++;
-                        continue;
-                    }
-
-                    /* update refcounts */
-                    if ((new_offset >> s->cluster_bits) >= *nb_clusters) {
-                        /* increase refcount_table size if necessary */
-                        int old_nb_clusters = *nb_clusters;
-                        *nb_clusters = (new_offset >> s->cluster_bits) + 1;
-
-                        *refcount_table = g_try_realloc(*refcount_table,
-                                *nb_clusters * sizeof(uint16_t));
-                        if (!*refcount_table) {
-                            res->check_errors++;
-                            return -ENOMEM;
-                        }
-
-                        memset(&(*refcount_table)[old_nb_clusters], 0,
-                               (*nb_clusters - old_nb_clusters) *
-                               sizeof(uint16_t));
-                    }
-                    (*refcount_table)[cluster]--;
-                    inc_refcounts(bs, res, *refcount_table, *nb_clusters,
-                                  new_offset, s->cluster_size);
 
-                    res->corruptions_fixed++;
-                } else {
-                    res->corruptions++;
-                }
+            if ((*refcount_table)[cluster] != 1) {
+                fprintf(stderr, "ERROR refcount block %" PRId64
+                        " refcount=%d\n", i, (*refcount_table)[cluster]);
+                res->corruptions++;
+                *rebuild = true;
             }
         }
     }
@@ -1623,8 +1474,8 @@ resize_fail:
  * Calculates an in-memory refcount table.
  */
 static int calculate_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
-                               BdrvCheckMode fix, uint16_t **refcount_table,
-                               int64_t *nb_clusters)
+                               BdrvCheckMode fix, bool *rebuild,
+                               uint16_t **refcount_table, int64_t *nb_clusters)
 {
     BDRVQcowState *s = bs->opaque;
     QCowSnapshot *sn;
@@ -1667,7 +1518,7 @@ static int calculate_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
                   s->refcount_table_offset,
                   s->refcount_table_size * sizeof(uint64_t));
 
-    return check_refblocks(bs, res, fix, refcount_table, nb_clusters);
+    return check_refblocks(bs, res, fix, rebuild, refcount_table, nb_clusters);
 }
 
 /*
@@ -1675,7 +1526,8 @@ static int calculate_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
  * refcount as reported by the refcount structures on-disk.
  */
 static void compare_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
-                              BdrvCheckMode fix, int64_t *highest_cluster,
+                              BdrvCheckMode fix, bool *rebuild,
+                              int64_t *highest_cluster,
                               uint16_t *refcount_table, int64_t nb_clusters)
 {
     BDRVQcowState *s = bs->opaque;
@@ -1702,10 +1554,15 @@ static void compare_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
             int *num_fixed = NULL;
             if (refcount1 > refcount2 && (fix & BDRV_FIX_LEAKS)) {
                 num_fixed = &res->leaks_fixed;
-            } else if (refcount1 < refcount2 && (fix & BDRV_FIX_ERRORS)) {
+            } else if (refcount1 > 0 && refcount1 < refcount2 &&
+                       (fix & BDRV_FIX_ERRORS)) {
                 num_fixed = &res->corruptions_fixed;
             }
 
+            if (refcount1 == 0) {
+                *rebuild = true;
+            }
+
             fprintf(stderr, "%s cluster %" PRId64 " refcount=%d reference=%d\n",
                     num_fixed != NULL     ? "Repairing" :
                     refcount1 < refcount2 ? "ERROR" :
@@ -1744,6 +1601,7 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
     BDRVQcowState *s = bs->opaque;
     int64_t size, highest_cluster, nb_clusters;
     uint16_t *refcount_table = NULL;
+    bool rebuild = false;
     int ret;
 
     size = bdrv_getlength(bs->file);
@@ -1761,14 +1619,19 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
     res->bfi.total_clusters =
         size_to_clusters(s, bs->total_sectors * BDRV_SECTOR_SIZE);
 
-    ret = calculate_refcounts(bs, res, fix, &refcount_table, &nb_clusters);
+    ret = calculate_refcounts(bs, res, fix, &rebuild, &refcount_table,
+                              &nb_clusters);
     if (ret < 0) {
         goto fail;
     }
 
-    compare_refcounts(bs, res, fix, &highest_cluster, refcount_table,
+    compare_refcounts(bs, res, fix, &rebuild, &highest_cluster, refcount_table,
                       nb_clusters);
 
+    if (rebuild) {
+        fprintf(stderr, "ERROR need to rebuild refcount structures\n");
+    }
+
     /* check OFLAG_COPIED */
     ret = check_oflag_copied(bs, res, fix);
     if (ret < 0) {
-- 
2.0.3

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [Qemu-devel] [PATCH 5/8] qcow2: Rebuild refcount structure during check
  2014-08-13 21:01 [Qemu-devel] [PATCH 0/8] qcow2: Fix image repairing Max Reitz
                   ` (3 preceding siblings ...)
  2014-08-13 21:01 ` [Qemu-devel] [PATCH 4/8] qcow2: Do not perform potentially damaging repairs Max Reitz
@ 2014-08-13 21:01 ` Max Reitz
  2014-08-14 12:58   ` Benoît Canet
  2014-08-13 21:01 ` [Qemu-devel] [PATCH 6/8] qcow2: Clean up after refcount rebuild Max Reitz
                   ` (2 subsequent siblings)
  7 siblings, 1 reply; 19+ messages in thread
From: Max Reitz @ 2014-08-13 21:01 UTC (permalink / raw)
  To: qemu-devel; +Cc: Kevin Wolf, Stefan Hajnoczi, Max Reitz

The previous commit introduced the "rebuild" variable to qcow2's
implementation of the image consistency check. Now make use of this by
adding a function which creates a completely new refcount structure
based solely on the in-memory information gathered before.

The old refcount structure will be leaked, however.

Signed-off-by: Max Reitz <mreitz@redhat.com>
---
 block/qcow2-refcount.c | 262 ++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 259 insertions(+), 3 deletions(-)

diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index 6400840..e3ca03a 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -1590,6 +1590,242 @@ static void compare_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
 }
 
 /*
+ * Allocates a cluster using an in-memory refcount table (IMRT) in contrast to
+ * the on-disk refcount structures.
+ *
+ * *first_free_cluster does not necessarily point to the first free cluster, but
+ * may point to one cluster as close as possible before it. The offset returned
+ * will never be before that cluster.
+ */
+static int64_t alloc_clusters_imrt(BlockDriverState *bs,
+                                   int cluster_count,
+                                   uint16_t **refcount_table,
+                                   int64_t *nb_clusters,
+                                   int64_t *first_free_cluster)
+{
+    BDRVQcowState *s = bs->opaque;
+    int64_t cluster = *first_free_cluster, i;
+    bool first_gap = true;
+    int contiguous_clusters;
+
+    for (contiguous_clusters = 0;
+         cluster < *nb_clusters && contiguous_clusters < cluster_count;
+         cluster++)
+    {
+        if (!(*refcount_table)[cluster]) {
+            contiguous_clusters++;
+            if (first_gap) {
+                *first_free_cluster = cluster;
+                first_gap = false;
+            }
+        } else if (contiguous_clusters) {
+            contiguous_clusters = 0;
+        }
+    }
+
+    if (contiguous_clusters < cluster_count) {
+        int64_t old_nb_clusters = *nb_clusters;
+
+        *nb_clusters = cluster + cluster_count - contiguous_clusters;
+        *refcount_table = g_try_realloc(*refcount_table,
+                                        *nb_clusters * sizeof(uint16_t));
+        if (!*refcount_table) {
+            return -ENOMEM;
+        }
+
+        memset(*refcount_table + old_nb_clusters, 0,
+               (*nb_clusters - old_nb_clusters) * sizeof(uint16_t));
+    }
+
+    cluster -= contiguous_clusters;
+    for (i = 0; i < cluster_count; i++) {
+        (*refcount_table)[cluster + i] = 1;
+    }
+
+    return cluster << s->cluster_bits;
+}
+
+/*
+ * Creates a new refcount structure based solely on the in-memory information
+ * given through *refcount_table. All necessary allocations will be reflected
+ * in that array.
+ *
+ * On success, the old refcount structure is leaked (it will be covered by the
+ * new refcount structure).
+ */
+static int rebuild_refcount_structure(BlockDriverState *bs,
+                                      BdrvCheckResult *res,
+                                      uint16_t **refcount_table,
+                                      int64_t *nb_clusters)
+{
+    BDRVQcowState *s = bs->opaque;
+    int64_t first_free_cluster = 0, rt_ofs = -1, cluster = 0;
+    int64_t rb_ofs, rb_start, rb_index;
+    uint32_t reftable_size = 0;
+    uint64_t *reftable = NULL;
+    uint16_t *on_disk_rb;
+    uint8_t rt_offset_and_clusters[sizeof(uint64_t) + sizeof(uint32_t)];
+    int i, ret = 0;
+
+    qcow2_cache_empty(bs, s->refcount_block_cache);
+
+write_refblocks:
+    for (; cluster < *nb_clusters; cluster++) {
+        if (!(*refcount_table)[cluster]) {
+            continue;
+        }
+
+        rb_index = cluster >> (s->cluster_bits - 1);
+        rb_start = rb_index << (s->cluster_bits - 1);
+
+        /* Don't allocate a cluster in a refblock already written to disk */
+        if (first_free_cluster < rb_start) {
+            first_free_cluster = rb_start;
+        }
+        rb_ofs = alloc_clusters_imrt(bs, 1, refcount_table, nb_clusters,
+                                     &first_free_cluster);
+        if (rb_ofs < 0) {
+            fprintf(stderr, "ERROR allocating refblock: %s\n", strerror(-ret));
+            res->check_errors++;
+            ret = rb_ofs;
+            goto fail;
+        }
+
+        if (reftable_size <= rb_index) {
+            uint32_t old_rt_size = reftable_size;
+            reftable_size = ROUND_UP((rb_index + 1) * sizeof(uint64_t),
+                                     s->cluster_size) / sizeof(uint64_t);
+            reftable = g_try_realloc(reftable,
+                                     reftable_size * sizeof(uint64_t));
+            if (!reftable) {
+                res->check_errors++;
+                ret = -ENOMEM;
+                goto fail;
+            }
+
+            memset(reftable + old_rt_size, 0,
+                   (reftable_size - old_rt_size) * sizeof(uint64_t));
+
+            /* The offset we have for the reftable is now no longer valid;
+             * this will leak that range, but we can easily fix that by running
+             * a leak-fixing check after this rebuild operation */
+            rt_ofs = -1;
+        }
+        reftable[rb_index] = rb_ofs;
+
+        /* If this is apparently the last refblock (for now), try to squeeze the
+         * reftable in */
+        if (rb_index == (*nb_clusters - 1) >> (s->cluster_bits - 1) &&
+            rt_ofs < 0)
+        {
+            rt_ofs = alloc_clusters_imrt(bs, size_to_clusters(s, reftable_size *
+                                                              sizeof(uint64_t)),
+                                         refcount_table, nb_clusters,
+                                         &first_free_cluster);
+            if (rt_ofs < 0) {
+                fprintf(stderr, "ERROR allocating reftable: %s\n",
+                        strerror(-ret));
+                res->check_errors++;
+                ret = rt_ofs;
+                goto fail;
+            }
+        }
+
+        ret = qcow2_pre_write_overlap_check(bs, 0, rb_ofs, s->cluster_size);
+        if (ret < 0) {
+            fprintf(stderr, "ERROR writing refblock: %s\n", strerror(-ret));
+            goto fail;
+        }
+
+        on_disk_rb = g_malloc0(s->cluster_size);
+        for (i = 0; i < s->cluster_size / sizeof(uint16_t) &&
+                    rb_start + i < *nb_clusters; i++)
+        {
+            on_disk_rb[i] = cpu_to_be16((*refcount_table)[rb_start + i]);
+        }
+
+        ret = bdrv_write(bs->file, rb_ofs / BDRV_SECTOR_SIZE,
+                         (void *)on_disk_rb, s->cluster_sectors);
+        g_free(on_disk_rb);
+        if (ret < 0) {
+            fprintf(stderr, "ERROR writing refblock: %s\n", strerror(-ret));
+            goto fail;
+        }
+
+        /* Go to the end of this refblock */
+        cluster = rb_start + s->cluster_size / sizeof(uint16_t) - 1;
+    }
+
+    if (rt_ofs < 0) {
+        int64_t post_rb_start = ROUND_UP(*nb_clusters,
+                                         s->cluster_size / sizeof(uint16_t));
+
+        /* Not pretty but simple */
+        if (first_free_cluster < post_rb_start) {
+            first_free_cluster = post_rb_start;
+        }
+        rt_ofs = alloc_clusters_imrt(bs, size_to_clusters(s, reftable_size *
+                                                          sizeof(uint64_t)),
+                                     refcount_table, nb_clusters,
+                                     &first_free_cluster);
+        if (rt_ofs < 0) {
+            fprintf(stderr, "ERROR allocating reftable: %s\n", strerror(-ret));
+            res->check_errors++;
+            ret = rt_ofs;
+            goto fail;
+        }
+
+        goto write_refblocks;
+    }
+
+    assert(reftable);
+
+    for (rb_index = 0; rb_index < reftable_size; rb_index++) {
+        cpu_to_be64s(&reftable[rb_index]);
+    }
+
+    ret = qcow2_pre_write_overlap_check(bs, 0, rt_ofs,
+                                        reftable_size * sizeof(uint64_t));
+    if (ret < 0) {
+        fprintf(stderr, "ERROR writing reftable: %s\n", strerror(-ret));
+        goto fail;
+    }
+
+    ret = bdrv_write(bs->file, rt_ofs / BDRV_SECTOR_SIZE, (void *)reftable,
+                     reftable_size * sizeof(uint64_t) / BDRV_SECTOR_SIZE);
+    if (ret < 0) {
+        fprintf(stderr, "ERROR writing reftable: %s\n", strerror(-ret));
+        goto fail;
+    }
+
+    /* Enter new reftable into the image header */
+    cpu_to_be64w((uint64_t *)&rt_offset_and_clusters[0], rt_ofs);
+    cpu_to_be32w((uint32_t *)&rt_offset_and_clusters[sizeof(uint64_t)],
+                 size_to_clusters(s, reftable_size * sizeof(uint64_t)));
+    ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader,
+                                              refcount_table_offset),
+                           rt_offset_and_clusters,
+                           sizeof(rt_offset_and_clusters));
+    if (ret < 0) {
+        fprintf(stderr, "ERROR setting reftable: %s\n", strerror(-ret));
+        goto fail;
+    }
+
+    for (rb_index = 0; rb_index < reftable_size; rb_index++) {
+        be64_to_cpus(&reftable[rb_index]);
+    }
+    s->refcount_table = reftable;
+    s->refcount_table_offset = rt_ofs;
+    s->refcount_table_size = reftable_size;
+
+    return 0;
+
+fail:
+    g_free(reftable);
+    return ret;
+}
+
+/*
  * Checks an image for refcount consistency.
  *
  * Returns 0 if no errors are found, the number of errors in case the image is
@@ -1599,6 +1835,7 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
                           BdrvCheckMode fix)
 {
     BDRVQcowState *s = bs->opaque;
+    BdrvCheckResult pre_compare_res;
     int64_t size, highest_cluster, nb_clusters;
     uint16_t *refcount_table = NULL;
     bool rebuild = false;
@@ -1625,11 +1862,30 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
         goto fail;
     }
 
-    compare_refcounts(bs, res, fix, &rebuild, &highest_cluster, refcount_table,
+    /* In case we don't need to rebuild the refcount structure (but want to fix
+     * something), this function is immediately called again, in which case the
+     * result should be ignored */
+    pre_compare_res = *res;
+    compare_refcounts(bs, res, 0, &rebuild, &highest_cluster, refcount_table,
                       nb_clusters);
 
-    if (rebuild) {
-        fprintf(stderr, "ERROR need to rebuild refcount structures\n");
+    if (rebuild && (fix & BDRV_FIX_ERRORS)) {
+        fprintf(stderr, "Rebuilding refcount structure\n");
+        ret = rebuild_refcount_structure(bs, res, &refcount_table,
+                                         &nb_clusters);
+        if (ret < 0) {
+            goto fail;
+        }
+    } else if (fix) {
+        if (rebuild) {
+            fprintf(stderr, "ERROR need to rebuild refcount structures\n");
+        }
+
+        if (res->leaks || res->corruptions) {
+            *res = pre_compare_res;
+            compare_refcounts(bs, res, fix, &rebuild, &highest_cluster,
+                              refcount_table, nb_clusters);
+        }
     }
 
     /* check OFLAG_COPIED */
-- 
2.0.3

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [Qemu-devel] [PATCH 6/8] qcow2: Clean up after refcount rebuild
  2014-08-13 21:01 [Qemu-devel] [PATCH 0/8] qcow2: Fix image repairing Max Reitz
                   ` (4 preceding siblings ...)
  2014-08-13 21:01 ` [Qemu-devel] [PATCH 5/8] qcow2: Rebuild refcount structure during check Max Reitz
@ 2014-08-13 21:01 ` Max Reitz
  2014-08-13 21:01 ` [Qemu-devel] [PATCH 7/8] iotests: Fix test outputs Max Reitz
  2014-08-13 21:01 ` [Qemu-devel] [PATCH 8/8] iotests: Add test for potentially damaging repairs Max Reitz
  7 siblings, 0 replies; 19+ messages in thread
From: Max Reitz @ 2014-08-13 21:01 UTC (permalink / raw)
  To: qemu-devel; +Cc: Kevin Wolf, Stefan Hajnoczi, Max Reitz

Because the old refcount structure will be leaked after having rebuilt
it, we need to recalculate the refcounts and run a leak-fixing operation
afterwards.

Signed-off-by: Max Reitz <mreitz@redhat.com>
---
 block/qcow2-refcount.c | 33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)

diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index e3ca03a..1518f06 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -1870,12 +1870,45 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
                       nb_clusters);
 
     if (rebuild && (fix & BDRV_FIX_ERRORS)) {
+        BdrvCheckResult old_res = *res;
+
         fprintf(stderr, "Rebuilding refcount structure\n");
         ret = rebuild_refcount_structure(bs, res, &refcount_table,
                                          &nb_clusters);
         if (ret < 0) {
             goto fail;
         }
+
+        res->corruptions = 0;
+        res->leaks = 0;
+
+        /* Because the old reftable has been exchanged for a new one the
+         * references have to be recalculated */
+        rebuild = false;
+        memset(refcount_table, 0, nb_clusters * sizeof(uint16_t));
+        ret = calculate_refcounts(bs, res, 0, &rebuild, &refcount_table,
+                                  &nb_clusters);
+        if (ret < 0) {
+            goto fail;
+        }
+
+        /* The old refcount structures are now leaked, fix it; the result can be
+         * ignored */
+        pre_compare_res = *res;
+        compare_refcounts(bs, res, BDRV_FIX_LEAKS, &rebuild, &highest_cluster,
+                          refcount_table, nb_clusters);
+        if (rebuild) {
+            fprintf(stderr, "ERROR rebuilt refcount structure is still "
+                    "broken\n");
+        }
+        *res = pre_compare_res;
+
+        if (res->corruptions < old_res.corruptions) {
+            res->corruptions_fixed += old_res.corruptions - res->corruptions;
+        }
+        if (res->leaks < old_res.leaks) {
+            res->leaks_fixed += old_res.leaks - res->leaks;
+        }
     } else if (fix) {
         if (rebuild) {
             fprintf(stderr, "ERROR need to rebuild refcount structures\n");
-- 
2.0.3

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [Qemu-devel] [PATCH 7/8] iotests: Fix test outputs
  2014-08-13 21:01 [Qemu-devel] [PATCH 0/8] qcow2: Fix image repairing Max Reitz
                   ` (5 preceding siblings ...)
  2014-08-13 21:01 ` [Qemu-devel] [PATCH 6/8] qcow2: Clean up after refcount rebuild Max Reitz
@ 2014-08-13 21:01 ` Max Reitz
  2014-08-13 21:01 ` [Qemu-devel] [PATCH 8/8] iotests: Add test for potentially damaging repairs Max Reitz
  7 siblings, 0 replies; 19+ messages in thread
From: Max Reitz @ 2014-08-13 21:01 UTC (permalink / raw)
  To: qemu-devel; +Cc: Kevin Wolf, Stefan Hajnoczi, Max Reitz

039, 060 and 061 all create images with referenced clusters having a
refcount of 0. Because previous commits changed handling of such errors,
these tests now have a different output. Fix it.

Furthermore, 060 created a refblock with a refcount greater than one
which now results in having to rebuild the refcount structure as well.

Signed-off-by: Max Reitz <mreitz@redhat.com>
---
 tests/qemu-iotests/039.out | 10 ++++++++--
 tests/qemu-iotests/060.out | 10 ++++++++--
 tests/qemu-iotests/061.out | 18 ++++++++++++------
 3 files changed, 28 insertions(+), 10 deletions(-)

diff --git a/tests/qemu-iotests/039.out b/tests/qemu-iotests/039.out
index 67e7744..0adf153 100644
--- a/tests/qemu-iotests/039.out
+++ b/tests/qemu-iotests/039.out
@@ -25,7 +25,10 @@ read 512/512 bytes at offset 0
 incompatible_features     0x1
 
 == Repairing the image file must succeed ==
-Repairing cluster 5 refcount=0 reference=1
+ERROR cluster 5 refcount=0 reference=1
+Rebuilding refcount structure
+Repairing cluster 1 refcount=1 reference=0
+Repairing cluster 2 refcount=1 reference=0
 The following inconsistencies were found and repaired:
 
     0 leaked clusters
@@ -45,7 +48,10 @@ wrote 512/512 bytes at offset 0
 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 ./039: Aborted                 ( ulimit -c 0; exec "$@" )
 incompatible_features     0x1
-Repairing cluster 5 refcount=0 reference=1
+ERROR cluster 5 refcount=0 reference=1
+Rebuilding refcount structure
+Repairing cluster 1 refcount=1 reference=0
+Repairing cluster 2 refcount=1 reference=0
 wrote 512/512 bytes at offset 0
 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 incompatible_features     0x0
diff --git a/tests/qemu-iotests/060.out b/tests/qemu-iotests/060.out
index c27c952..41ecbc7 100644
--- a/tests/qemu-iotests/060.out
+++ b/tests/qemu-iotests/060.out
@@ -27,11 +27,15 @@ incompatible_features     0x0
 qcow2: Preventing invalid write on metadata (overlaps with refcount block); image marked as corrupt.
 write failed: Input/output error
 incompatible_features     0x2
-Repairing refcount block 0 refcount=2
+ERROR refcount block 0 refcount=2
+ERROR cluster 2 refcount=1 reference=2
+Rebuilding refcount structure
+Repairing cluster 1 refcount=1 reference=0
+Repairing cluster 2 refcount=2 reference=1
 The following inconsistencies were found and repaired:
 
     0 leaked clusters
-    1 corruptions
+    2 corruptions
 
 Double checking the fixed image now...
 No errors were found on the image.
@@ -59,6 +63,8 @@ incompatible_features     0x0
 qcow2: Preventing invalid write on metadata (overlaps with inactive L2 table); image marked as corrupt.
 write failed: Input/output error
 incompatible_features     0x2
+ERROR cluster 4 refcount=1 reference=2
+Leaked cluster 9 refcount=1 reference=0
 Repairing cluster 4 refcount=1 reference=2
 Repairing cluster 9 refcount=1 reference=0
 Repairing OFLAG_COPIED data cluster: l2_entry=8000000000040000 refcount=2
diff --git a/tests/qemu-iotests/061.out b/tests/qemu-iotests/061.out
index e372470..4ae6472 100644
--- a/tests/qemu-iotests/061.out
+++ b/tests/qemu-iotests/061.out
@@ -76,8 +76,11 @@ autoclear_features        0x0
 refcount_order            4
 header_length             104
 
-Repairing cluster 5 refcount=0 reference=1
-Repairing cluster 6 refcount=0 reference=1
+ERROR cluster 5 refcount=0 reference=1
+ERROR cluster 6 refcount=0 reference=1
+Rebuilding refcount structure
+Repairing cluster 1 refcount=1 reference=0
+Repairing cluster 2 refcount=1 reference=0
 magic                     0x514649fb
 version                   2
 backing_file_offset       0x0
@@ -87,7 +90,7 @@ size                      67108864
 crypt_method              0
 l1_size                   1
 l1_table_offset           0x30000
-refcount_table_offset     0x10000
+refcount_table_offset     0x80000
 refcount_table_clusters   1
 nb_snapshots              0
 snapshot_offset           0x0
@@ -230,8 +233,11 @@ autoclear_features        0x0
 refcount_order            4
 header_length             104
 
-Repairing cluster 5 refcount=0 reference=1
-Repairing cluster 6 refcount=0 reference=1
+ERROR cluster 5 refcount=0 reference=1
+ERROR cluster 6 refcount=0 reference=1
+Rebuilding refcount structure
+Repairing cluster 1 refcount=1 reference=0
+Repairing cluster 2 refcount=1 reference=0
 magic                     0x514649fb
 version                   3
 backing_file_offset       0x0
@@ -241,7 +247,7 @@ size                      67108864
 crypt_method              0
 l1_size                   1
 l1_table_offset           0x30000
-refcount_table_offset     0x10000
+refcount_table_offset     0x80000
 refcount_table_clusters   1
 nb_snapshots              0
 snapshot_offset           0x0
-- 
2.0.3

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [Qemu-devel] [PATCH 8/8] iotests: Add test for potentially damaging repairs
  2014-08-13 21:01 [Qemu-devel] [PATCH 0/8] qcow2: Fix image repairing Max Reitz
                   ` (6 preceding siblings ...)
  2014-08-13 21:01 ` [Qemu-devel] [PATCH 7/8] iotests: Fix test outputs Max Reitz
@ 2014-08-13 21:01 ` Max Reitz
  7 siblings, 0 replies; 19+ messages in thread
From: Max Reitz @ 2014-08-13 21:01 UTC (permalink / raw)
  To: qemu-devel; +Cc: Kevin Wolf, Stefan Hajnoczi, Max Reitz

There are certain cases where repairing a qcow2 image might actually
damage it further (or rather, where repairing it has in fact damaged it
further with the old qcow2 check implementation). This should not
happen, so add a test for these cases.

Signed-off-by: Max Reitz <mreitz@redhat.com>
---
 tests/qemu-iotests/101     | 98 ++++++++++++++++++++++++++++++++++++++++++++++
 tests/qemu-iotests/101.out | 46 ++++++++++++++++++++++
 tests/qemu-iotests/group   |  1 +
 3 files changed, 145 insertions(+)
 create mode 100755 tests/qemu-iotests/101
 create mode 100644 tests/qemu-iotests/101.out

diff --git a/tests/qemu-iotests/101 b/tests/qemu-iotests/101
new file mode 100755
index 0000000..cc6d0b3
--- /dev/null
+++ b/tests/qemu-iotests/101
@@ -0,0 +1,98 @@
+#!/bin/bash
+#
+# Test case for repairing qcow2 images which cannot be repaired using
+# the on-disk refcount structures
+#
+# Copyright (C) 2013 Red Hat, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+# creator
+owner=mreitz@redhat.com
+
+seq="$(basename $0)"
+echo "QA output created by $seq"
+
+here="$PWD"
+tmp=/tmp/$$
+status=1	# failure is the default!
+
+_cleanup()
+{
+	_cleanup_test_img
+}
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+# get standard environment, filters and checks
+. ./common.rc
+. ./common.filter
+
+# This tests qocw2-specific low-level functionality
+_supported_fmt qcow2
+_supported_proto file
+_supported_os Linux
+
+echo
+echo '=== Repairing an image without any refcount table ==='
+echo
+
+_make_test_img 64M
+# just write some data
+$QEMU_IO -c 'write -P 42 0 64k' "$TEST_IMG" | _filter_qemu_io
+
+# refcount_table_offset
+poke_file "$TEST_IMG" 48 "\x00\x00\x00\x00\x00\x00\x00\x00"
+# refcount_table_clusters
+poke_file "$TEST_IMG" 56 "\x00\x00\x00\x00"
+
+_check_test_img -r all
+
+$QEMU_IO -c 'read -P 42 0 64k' "$TEST_IMG" | _filter_qemu_io
+
+echo
+echo '=== Repairing unreferenced data cluster in new refblock area ==='
+echo
+
+IMGOPTS='cluster_size=512' _make_test_img 64M
+# Allocate the first 128 kB in the image (first refblock)
+$QEMU_IO -c 'write 0 111104' "$TEST_IMG" | _filter_qemu_io
+# should be 131072
+stat -c '%s' "$TEST_IMG"
+
+# Enter a cluster at 128 kB (0x20000)
+# XXX: This (0x1ccc8) should be the first free entry in the last L2 table, but
+# we cannot be sure
+poke_file "$TEST_IMG" 117960 "\x80\x00\x00\x00\x00\x02\x00\x00"
+
+# Fill the cluster
+truncate -s 131584 "$TEST_IMG"
+$QEMU_IO -c "open -o driver=raw $TEST_IMG" -c 'write -P 42 128k 512' \
+    | _filter_qemu_io
+
+# The data should now appear at this guest offset
+$QEMU_IO -c 'read -P 42 111104 512' "$TEST_IMG" | _filter_qemu_io
+
+# This cluster is unallocated; fix it
+_check_test_img -r all
+
+# This repair operation must have allocated a new refblock; and that refblock
+# should not overlap with the unallocated data cluster. If it does, the data
+# will be damaged, so check it.
+$QEMU_IO -c 'read -P 42 111104 512' "$TEST_IMG" | _filter_qemu_io
+
+# success, all done
+echo '*** done'
+rm -f $seq.full
+status=0
diff --git a/tests/qemu-iotests/101.out b/tests/qemu-iotests/101.out
new file mode 100644
index 0000000..d0e2149
--- /dev/null
+++ b/tests/qemu-iotests/101.out
@@ -0,0 +1,46 @@
+QA output created by 101
+
+=== Repairing an image without any refcount table ===
+
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 
+wrote 65536/65536 bytes at offset 0
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+ERROR cluster 0 refcount=0 reference=1
+ERROR cluster 3 refcount=0 reference=1
+ERROR cluster 4 refcount=0 reference=1
+ERROR cluster 5 refcount=0 reference=1
+Rebuilding refcount structure
+The following inconsistencies were found and repaired:
+
+    0 leaked clusters
+    4 corruptions
+
+Double checking the fixed image now...
+No errors were found on the image.
+read 65536/65536 bytes at offset 0
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+=== Repairing unreferenced data cluster in new refblock area ===
+
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 
+wrote 111104/111104 bytes at offset 0
+108.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+131072
+wrote 512/512 bytes at offset 131072
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 111104
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+ERROR cluster 256 refcount=0 reference=1
+Rebuilding refcount structure
+Repairing cluster 1 refcount=1 reference=0
+Repairing cluster 2 refcount=1 reference=0
+The following inconsistencies were found and repaired:
+
+    0 leaked clusters
+    1 corruptions
+
+Double checking the fixed image now...
+No errors were found on the image.
+read 512/512 bytes at offset 111104
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+*** done
diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group
index 6e67f61..e25e992 100644
--- a/tests/qemu-iotests/group
+++ b/tests/qemu-iotests/group
@@ -100,3 +100,4 @@
 091 rw auto quick
 092 rw auto quick
 095 rw auto quick
+101 rw auto quick
-- 
2.0.3

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] [PATCH 1/8] qcow2: Factor out refcount accounting for check
  2014-08-13 21:01 ` [Qemu-devel] [PATCH 1/8] qcow2: Factor out refcount accounting for check Max Reitz
@ 2014-08-14 11:56   ` Benoît Canet
  0 siblings, 0 replies; 19+ messages in thread
From: Benoît Canet @ 2014-08-14 11:56 UTC (permalink / raw)
  To: Max Reitz; +Cc: Kevin Wolf, qemu-devel, Stefan Hajnoczi

The Wednesday 13 Aug 2014 à 23:01:43 (+0200), Max Reitz wrote :
> Put the code for calculating the reference counts during qemu-img check
> into an own function.
> 
> Signed-off-by: Max Reitz <mreitz@redhat.com>
> ---
>  block/qcow2-refcount.c | 202 +++++++++++++++++++++++++++++--------------------
>  1 file changed, 122 insertions(+), 80 deletions(-)
> 
> diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
> index d60e2fe..9793c27 100644
> --- a/block/qcow2-refcount.c
> +++ b/block/qcow2-refcount.c
> @@ -1496,71 +1496,17 @@ done:
>  }
>  
>  /*
> - * Checks an image for refcount consistency.
> - *
> - * Returns 0 if no errors are found, the number of errors in case the image is
> - * detected as corrupted, and -errno when an internal error occurred.
> + * Checks consistency of refblocks and accounts for each refblock in
> + * *refcount_table.
>   */
> -int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
> -                          BdrvCheckMode fix)
> +static int check_refblocks(BlockDriverState *bs, BdrvCheckResult *res,
> +                           BdrvCheckMode fix, uint16_t **refcount_table,
> +                           int64_t *nb_clusters)
>  {
>      BDRVQcowState *s = bs->opaque;
> -    int64_t size, i, highest_cluster, nb_clusters;
> -    int refcount1, refcount2;
> -    QCowSnapshot *sn;
> -    uint16_t *refcount_table;
> -    int ret;
> -
> -    size = bdrv_getlength(bs->file);
> -    if (size < 0) {
> -        res->check_errors++;
> -        return size;
> -    }
> -
> -    nb_clusters = size_to_clusters(s, size);
> -    if (nb_clusters > INT_MAX) {
> -        res->check_errors++;
> -        return -EFBIG;
> -    }
> -
> -    refcount_table = g_try_malloc0(nb_clusters * sizeof(uint16_t));
> -    if (nb_clusters && refcount_table == NULL) {
> -        res->check_errors++;
> -        return -ENOMEM;
> -    }
> -
> -    res->bfi.total_clusters =
> -        size_to_clusters(s, bs->total_sectors * BDRV_SECTOR_SIZE);
> -
> -    /* header */
> -    inc_refcounts(bs, res, refcount_table, nb_clusters,
> -        0, s->cluster_size);
> -
> -    /* current L1 table */
> -    ret = check_refcounts_l1(bs, res, refcount_table, nb_clusters,
> -                             s->l1_table_offset, s->l1_size, CHECK_FRAG_INFO);
> -    if (ret < 0) {
> -        goto fail;
> -    }
> +    int64_t i;
>  
> -    /* snapshots */
> -    for(i = 0; i < s->nb_snapshots; i++) {
> -        sn = s->snapshots + i;
> -        ret = check_refcounts_l1(bs, res, refcount_table, nb_clusters,
> -            sn->l1_table_offset, sn->l1_size, 0);
> -        if (ret < 0) {
> -            goto fail;
> -        }
> -    }
> -    inc_refcounts(bs, res, refcount_table, nb_clusters,
> -        s->snapshots_offset, s->snapshots_size);
> -
> -    /* refcount data */
> -    inc_refcounts(bs, res, refcount_table, nb_clusters,
> -        s->refcount_table_offset,
> -        s->refcount_table_size * sizeof(uint64_t));
> -
> -    for(i = 0; i < s->refcount_table_size; i++) {
> +    for (i = 0; i < s->refcount_table_size; i++) {
>          uint64_t offset, cluster;
>          offset = s->refcount_table[i];
>          cluster = offset >> s->cluster_bits;
> @@ -1568,12 +1514,12 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
>          /* Refcount blocks are cluster aligned */
>          if (offset_into_cluster(s, offset)) {
>              fprintf(stderr, "ERROR refcount block %" PRId64 " is not "
> -                "cluster aligned; refcount table entry corrupted\n", i);
> +                    "cluster aligned; refcount table entry corrupted\n", i);
>              res->corruptions++;
>              continue;
>          }
>  
> -        if (cluster >= nb_clusters) {
> +        if (cluster >= *nb_clusters) {
>              fprintf(stderr, "ERROR refcount block %" PRId64
>                      " is outside image\n", i);
>              res->corruptions++;
> @@ -1581,14 +1527,14 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
>          }
>  
>          if (offset != 0) {
> -            inc_refcounts(bs, res, refcount_table, nb_clusters,
> -                offset, s->cluster_size);
> -            if (refcount_table[cluster] != 1) {
> +            inc_refcounts(bs, res, *refcount_table, *nb_clusters,
> +                          offset, s->cluster_size);
> +            if ((*refcount_table)[cluster] != 1) {
>                  fprintf(stderr, "%s refcount block %" PRId64
> -                    " refcount=%d\n",
> -                    fix & BDRV_FIX_ERRORS ? "Repairing" :
> -                                            "ERROR",
> -                    i, refcount_table[cluster]);
> +                        " refcount=%d\n",
> +                        fix & BDRV_FIX_ERRORS ? "Repairing" :
> +                                                "ERROR",
> +                        i, (*refcount_table)[cluster]);
>  
>                  if (fix & BDRV_FIX_ERRORS) {
>                      int64_t new_offset;
> @@ -1600,18 +1546,25 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
>                      }
>  
>                      /* update refcounts */
> -                    if ((new_offset >> s->cluster_bits) >= nb_clusters) {
> +                    if ((new_offset >> s->cluster_bits) >= *nb_clusters) {
>                          /* increase refcount_table size if necessary */
> -                        int old_nb_clusters = nb_clusters;
> -                        nb_clusters = (new_offset >> s->cluster_bits) + 1;
> -                        refcount_table = g_realloc(refcount_table,
> -                                nb_clusters * sizeof(uint16_t));
> -                        memset(&refcount_table[old_nb_clusters], 0, (nb_clusters
> -                                - old_nb_clusters) * sizeof(uint16_t));
> +                        int old_nb_clusters = *nb_clusters;
> +                        *nb_clusters = (new_offset >> s->cluster_bits) + 1;
> +
> +                        *refcount_table = g_try_realloc(*refcount_table,
> +                                *nb_clusters * sizeof(uint16_t));
> +                        if (!*refcount_table) {
> +                            res->check_errors++;
> +                            return -ENOMEM;
> +                        }
> +
> +                        memset(&(*refcount_table)[old_nb_clusters], 0,
> +                               (*nb_clusters - old_nb_clusters) *
> +                               sizeof(uint16_t));
>                      }
> -                    refcount_table[cluster]--;
> -                    inc_refcounts(bs, res, refcount_table, nb_clusters,
> -                            new_offset, s->cluster_size);
> +                    (*refcount_table)[cluster]--;
> +                    inc_refcounts(bs, res, *refcount_table, *nb_clusters,
> +                                  new_offset, s->cluster_size);
>  
>                      res->corruptions_fixed++;
>                  } else {
> @@ -1621,6 +1574,95 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
>          }
>      }
>  
> +    return 0;
> +}
> +
> +/*
> + * Calculates an in-memory refcount table.
> + */
> +static int calculate_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
> +                               BdrvCheckMode fix, uint16_t **refcount_table,
> +                               int64_t *nb_clusters)
> +{
> +    BDRVQcowState *s = bs->opaque;
> +    QCowSnapshot *sn;
> +    int64_t i;
> +    int ret;
> +
> +    if (!*refcount_table) {
> +        *refcount_table = g_try_malloc0(*nb_clusters * sizeof(uint16_t));
> +        if (*nb_clusters && !*refcount_table) {
> +            res->check_errors++;
> +            return -ENOMEM;
> +        }
> +    }
> +
> +    /* header */
> +    inc_refcounts(bs, res, *refcount_table, *nb_clusters,
> +                  0, s->cluster_size);
> +
> +    /* current L1 table */
> +    ret = check_refcounts_l1(bs, res, *refcount_table, *nb_clusters,
> +                             s->l1_table_offset, s->l1_size, CHECK_FRAG_INFO);
> +    if (ret < 0) {
> +        return ret;
> +    }
> +
> +    /* snapshots */
> +    for (i = 0; i < s->nb_snapshots; i++) {
> +        sn = s->snapshots + i;
> +        ret = check_refcounts_l1(bs, res, *refcount_table, *nb_clusters,
> +                                 sn->l1_table_offset, sn->l1_size, 0);
> +        if (ret < 0) {
> +            return ret;
> +        }
> +    }
> +    inc_refcounts(bs, res, *refcount_table, *nb_clusters,
> +                  s->snapshots_offset, s->snapshots_size);
> +
> +    /* refcount data */
> +    inc_refcounts(bs, res, *refcount_table, *nb_clusters,
> +                  s->refcount_table_offset,
> +                  s->refcount_table_size * sizeof(uint64_t));
> +
> +    return check_refblocks(bs, res, fix, refcount_table, nb_clusters);
> +}
> +
> +/*
> + * Checks an image for refcount consistency.
> + *
> + * Returns 0 if no errors are found, the number of errors in case the image is
> + * detected as corrupted, and -errno when an internal error occurred.
> + */
> +int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
> +                          BdrvCheckMode fix)
> +{
> +    BDRVQcowState *s = bs->opaque;
> +    int64_t size, i, highest_cluster, nb_clusters;
> +    int refcount1, refcount2;
> +    uint16_t *refcount_table = NULL;
> +    int ret;
> +
> +    size = bdrv_getlength(bs->file);
> +    if (size < 0) {
> +        res->check_errors++;
> +        return size;
> +    }
> +
> +    nb_clusters = size_to_clusters(s, size);
> +    if (nb_clusters > INT_MAX) {
> +        res->check_errors++;
> +        return -EFBIG;
> +    }
> +
> +    res->bfi.total_clusters =
> +        size_to_clusters(s, bs->total_sectors * BDRV_SECTOR_SIZE);
> +
> +    ret = calculate_refcounts(bs, res, fix, &refcount_table, &nb_clusters);
> +    if (ret < 0) {
> +        goto fail;
> +    }
> +
>      /* compare ref counts */
>      for (i = 0, highest_cluster = 0; i < nb_clusters; i++) {
>          refcount1 = get_refcount(bs, i);
> -- 
> 2.0.3
> 
> 

The diff is impressive but the patch is mostly moving stuff around.
So
Reviewed-by: Benoit Canet <benoit.canet@nodalink.com>

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] [PATCH 2/8] qcow2: Factor out refcount comparison for check
  2014-08-13 21:01 ` [Qemu-devel] [PATCH 2/8] qcow2: Factor out refcount comparison " Max Reitz
@ 2014-08-14 12:02   ` Benoît Canet
  2014-08-15 12:31     ` Max Reitz
  0 siblings, 1 reply; 19+ messages in thread
From: Benoît Canet @ 2014-08-14 12:02 UTC (permalink / raw)
  To: Max Reitz; +Cc: Kevin Wolf, qemu-devel, Stefan Hajnoczi

The Wednesday 13 Aug 2014 à 23:01:44 (+0200), Max Reitz wrote :
> Put the code for comparing the calculated refcounts against the on-disk
> refcounts during qemu-img check into an own function.
> 
> Signed-off-by: Max Reitz <mreitz@redhat.com>
> ---
>  block/qcow2-refcount.c | 91 ++++++++++++++++++++++++++++----------------------
>  1 file changed, 52 insertions(+), 39 deletions(-)
> 
> diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
> index 9793c27..d1da8d5 100644
> --- a/block/qcow2-refcount.c
> +++ b/block/qcow2-refcount.c
> @@ -1629,46 +1629,22 @@ static int calculate_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
>  }
>  
>  /*
> - * Checks an image for refcount consistency.
> - *
> - * Returns 0 if no errors are found, the number of errors in case the image is
> - * detected as corrupted, and -errno when an internal error occurred.
> + * Compares the actual reference count for each cluster in the image against the
> + * refcount as reported by the refcount structures on-disk.
>   */
> -int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
> -                          BdrvCheckMode fix)
> +static void compare_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
> +                              BdrvCheckMode fix, int64_t *highest_cluster,
> +                              uint16_t *refcount_table, int64_t nb_clusters)
>  {
>      BDRVQcowState *s = bs->opaque;
> -    int64_t size, i, highest_cluster, nb_clusters;
> -    int refcount1, refcount2;
> -    uint16_t *refcount_table = NULL;
> -    int ret;
> -
> -    size = bdrv_getlength(bs->file);
> -    if (size < 0) {
> -        res->check_errors++;
> -        return size;
> -    }
> -
> -    nb_clusters = size_to_clusters(s, size);
> -    if (nb_clusters > INT_MAX) {
> -        res->check_errors++;
> -        return -EFBIG;
> -    }
> -
> -    res->bfi.total_clusters =
> -        size_to_clusters(s, bs->total_sectors * BDRV_SECTOR_SIZE);
> -
> -    ret = calculate_refcounts(bs, res, fix, &refcount_table, &nb_clusters);
> -    if (ret < 0) {
> -        goto fail;
> -    }
> +    int64_t i;
> +    int refcount1, refcount2, ret;
>  
> -    /* compare ref counts */
> -    for (i = 0, highest_cluster = 0; i < nb_clusters; i++) {
> +    for (i = 0, *highest_cluster = 0; i < nb_clusters; i++) {
>          refcount1 = get_refcount(bs, i);
>          if (refcount1 < 0) {
>              fprintf(stderr, "Can't get refcount for cluster %" PRId64 ": %s\n",
> -                i, strerror(-refcount1));
> +                    i, strerror(-refcount1));

Free coding fix.

>              res->check_errors++;
>              continue;
>          }
> @@ -1676,11 +1652,10 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
>          refcount2 = refcount_table[i];
>  
>          if (refcount1 > 0 || refcount2 > 0) {
> -            highest_cluster = i;
> +            *highest_cluster = i;

idem.
>          }
>  
>          if (refcount1 != refcount2) {
> -

Spurious blank line removal.

>              /* Check if we're allowed to fix the mismatch */
>              int *num_fixed = NULL;
>              if (refcount1 > refcount2 && (fix & BDRV_FIX_LEAKS)) {
> @@ -1690,10 +1665,10 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
>              }
>  
>              fprintf(stderr, "%s cluster %" PRId64 " refcount=%d reference=%d\n",
> -                   num_fixed != NULL     ? "Repairing" :
> -                   refcount1 < refcount2 ? "ERROR" :
> -                                           "Leaked",
> -                   i, refcount1, refcount2);
> +                    num_fixed != NULL     ? "Repairing" :
> +                    refcount1 < refcount2 ? "ERROR" :
> +                                            "Leaked",
> +                    i, refcount1, refcount2);

Gratuitous coding style fix.

Patch 1 have similar issues.
I think the patchs would be smaller and cleaner without these spurious coding style fixes.

>  
>              if (num_fixed) {
>                  ret = update_refcount(bs, i << s->cluster_bits, 1,
> @@ -1713,6 +1688,44 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
>              }
>          }
>      }
> +}
> +
> +/*
> + * Checks an image for refcount consistency.
> + *
> + * Returns 0 if no errors are found, the number of errors in case the image is
> + * detected as corrupted, and -errno when an internal error occurred.
> + */
> +int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
> +                          BdrvCheckMode fix)
> +{
> +    BDRVQcowState *s = bs->opaque;
> +    int64_t size, highest_cluster, nb_clusters;
> +    uint16_t *refcount_table = NULL;
> +    int ret;
> +
> +    size = bdrv_getlength(bs->file);
> +    if (size < 0) {
> +        res->check_errors++;
> +        return size;
> +    }
> +
> +    nb_clusters = size_to_clusters(s, size);
> +    if (nb_clusters > INT_MAX) {
> +        res->check_errors++;
> +        return -EFBIG;
> +    }
> +
> +    res->bfi.total_clusters =
> +        size_to_clusters(s, bs->total_sectors * BDRV_SECTOR_SIZE);
> +
> +    ret = calculate_refcounts(bs, res, fix, &refcount_table, &nb_clusters);
> +    if (ret < 0) {
> +        goto fail;
> +    }
> +
> +    compare_refcounts(bs, res, fix, &highest_cluster, refcount_table,
> +                      nb_clusters);
>  
>      /* check OFLAG_COPIED */
>      ret = check_oflag_copied(bs, res, fix);
> -- 
> 2.0.3
> 
> 

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] [PATCH 3/8] qcow2: Fix refcount blocks beyond image end
  2014-08-13 21:01 ` [Qemu-devel] [PATCH 3/8] qcow2: Fix refcount blocks beyond image end Max Reitz
@ 2014-08-14 12:11   ` Benoît Canet
  2014-08-15 12:36     ` Max Reitz
  0 siblings, 1 reply; 19+ messages in thread
From: Benoît Canet @ 2014-08-14 12:11 UTC (permalink / raw)
  To: Max Reitz; +Cc: Kevin Wolf, qemu-devel, Stefan Hajnoczi

The Wednesday 13 Aug 2014 à 23:01:45 (+0200), Max Reitz wrote :
> If the qcow2 check function detects a refcount block located beyond the
> image end, grow the image appropriately. This cannot break anything and
> is the logical fix for such a case.
> 
> Signed-off-by: Max Reitz <mreitz@redhat.com>
> ---
>  block/qcow2-refcount.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++----
>  1 file changed, 46 insertions(+), 4 deletions(-)
> 
> diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
> index d1da8d5..a1d93e5 100644
> --- a/block/qcow2-refcount.c
> +++ b/block/qcow2-refcount.c
> @@ -1504,7 +1504,8 @@ static int check_refblocks(BlockDriverState *bs, BdrvCheckResult *res,
>                             int64_t *nb_clusters)
>  {
>      BDRVQcowState *s = bs->opaque;
> -    int64_t i;
> +    int64_t i, size;
> +    int ret;
>  
>      for (i = 0; i < s->refcount_table_size; i++) {
>          uint64_t offset, cluster;
> @@ -1520,9 +1521,50 @@ static int check_refblocks(BlockDriverState *bs, BdrvCheckResult *res,
>          }
>  
>          if (cluster >= *nb_clusters) {
> -            fprintf(stderr, "ERROR refcount block %" PRId64
> -                    " is outside image\n", i);
> -            res->corruptions++;
> +            fprintf(stderr, "%s refcount block %" PRId64 " is outside image\n",
> +                    fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR", i);
> +
> +            if (fix & BDRV_FIX_ERRORS) {
> +                int64_t old_nb_clusters = *nb_clusters;
> +
> +                ret = bdrv_truncate(bs->file, offset + s->cluster_size);
> +                if (ret < 0) {
> +                    goto resize_fail;
> +                }
> +                size = bdrv_getlength(bs->file);
> +                if (size < 0) {
> +                    ret = size;
> +                    goto resize_fail;
> +                }
> +
> +                *nb_clusters = size_to_clusters(s, size);
> +                assert(*nb_clusters >= old_nb_clusters);
> +
> +                *refcount_table = g_try_realloc(*refcount_table,
> +                        *nb_clusters * sizeof(uint16_t));
> +                if (!*refcount_table) {
> +                    res->check_errors++;
> +                    return -ENOMEM;

So you really want to make sure the code is not trying anything more
by directly returning -ENOMEM and not doing goto resize_fail.

This makes sense though.

> +                }
> +
> +                memset(*refcount_table + old_nb_clusters, 0,
> +                       (*nb_clusters - old_nb_clusters) * sizeof(uint16_t));
> +
> +                if (cluster >= *nb_clusters) {
> +                    ret = -EINVAL;
> +                    goto resize_fail;
> +                }
> +
> +                res->corruptions_fixed++;
> +                continue;
> +
> +resize_fail:
> +                res->corruptions++;
> +                fprintf(stderr, "ERROR could not resize image: %s\n",
> +                        strerror(-ret));

Isn't a "return ret;" missing here ?
the code will fall in the continue statement without it.

> +            } else {
> +                res->corruptions++;
> +            }
>              continue;
>          }
>  
> -- 
> 2.0.3
> 
> 

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] [PATCH 4/8] qcow2: Do not perform potentially damaging repairs
  2014-08-13 21:01 ` [Qemu-devel] [PATCH 4/8] qcow2: Do not perform potentially damaging repairs Max Reitz
@ 2014-08-14 12:33   ` Benoît Canet
  2014-08-15 12:42     ` Max Reitz
  0 siblings, 1 reply; 19+ messages in thread
From: Benoît Canet @ 2014-08-14 12:33 UTC (permalink / raw)
  To: Max Reitz; +Cc: Kevin Wolf, qemu-devel, Stefan Hajnoczi

The Wednesday 13 Aug 2014 à 23:01:46 (+0200), Max Reitz wrote :
> If a referenced cluster has a refcount of 0, increasing its refcount may
> result in clusters being allocated for the refcount structures. This may
> overwrite the referenced cluster, therefore we cannot simply increase
> the refcount then.
> 
> In such cases, we can either try to replicate all the refcount
> operations solely for the check operation, basing the allocations on the
> in-memory refcount table; or we can simply rebuild the whole refcount
> structure based on the in-memory refcount table. Since the latter will
> be much easier, do that.
> 
> To prepare for this, introduce a "rebuild" boolean which should be set
> to true whenever a fix is rather dangerous or too complicated using the
> current refcount structures. Another example for this is refcount blocks
> being referenced more than once.
> 
> Signed-off-by: Max Reitz <mreitz@redhat.com>
> ---
>  block/qcow2-refcount.c | 191 +++++++------------------------------------------
>  1 file changed, 27 insertions(+), 164 deletions(-)
> 
> diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
> index a1d93e5..6400840 100644
> --- a/block/qcow2-refcount.c
> +++ b/block/qcow2-refcount.c
> @@ -1381,127 +1381,12 @@ fail:
>  }
>  
>  /*
> - * Writes one sector of the refcount table to the disk
> - */
> -#define RT_ENTRIES_PER_SECTOR (512 / sizeof(uint64_t))
> -static int write_reftable_entry(BlockDriverState *bs, int rt_index)
> -{
> -    BDRVQcowState *s = bs->opaque;
> -    uint64_t buf[RT_ENTRIES_PER_SECTOR];
> -    int rt_start_index;
> -    int i, ret;
> -
> -    rt_start_index = rt_index & ~(RT_ENTRIES_PER_SECTOR - 1);
> -    for (i = 0; i < RT_ENTRIES_PER_SECTOR; i++) {
> -        buf[i] = cpu_to_be64(s->refcount_table[rt_start_index + i]);
> -    }
> -
> -    ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_REFCOUNT_TABLE,
> -            s->refcount_table_offset + rt_start_index * sizeof(uint64_t),
> -            sizeof(buf));
> -    if (ret < 0) {
> -        return ret;
> -    }
> -
> -    BLKDBG_EVENT(bs->file, BLKDBG_REFTABLE_UPDATE);
> -    ret = bdrv_pwrite_sync(bs->file, s->refcount_table_offset +
> -            rt_start_index * sizeof(uint64_t), buf, sizeof(buf));
> -    if (ret < 0) {
> -        return ret;
> -    }
> -
> -    return 0;
> -}
> -
> -/*
> - * Allocates a new cluster for the given refcount block (represented by its
> - * offset in the image file) and copies the current content there. This function
> - * does _not_ decrement the reference count for the currently occupied cluster.
> - *
> - * This function prints an informative message to stderr on error (and returns
> - * -errno); on success, the offset of the newly allocated cluster is returned.
> - */
> -static int64_t realloc_refcount_block(BlockDriverState *bs, int reftable_index,
> -                                      uint64_t offset)
> -{
> -    BDRVQcowState *s = bs->opaque;
> -    int64_t new_offset = 0;
> -    void *refcount_block = NULL;
> -    int ret;
> -
> -    /* allocate new refcount block */
> -    new_offset = qcow2_alloc_clusters(bs, s->cluster_size);
> -    if (new_offset < 0) {
> -        fprintf(stderr, "Could not allocate new cluster: %s\n",
> -                strerror(-new_offset));
> -        ret = new_offset;
> -        goto done;
> -    }
> -
> -    /* fetch current refcount block content */
> -    ret = qcow2_cache_get(bs, s->refcount_block_cache, offset, &refcount_block);
> -    if (ret < 0) {
> -        fprintf(stderr, "Could not fetch refcount block: %s\n", strerror(-ret));
> -        goto fail_free_cluster;
> -    }
> -
> -    /* new block has not yet been entered into refcount table, therefore it is
> -     * no refcount block yet (regarding this check) */
> -    ret = qcow2_pre_write_overlap_check(bs, 0, new_offset, s->cluster_size);
> -    if (ret < 0) {
> -        fprintf(stderr, "Could not write refcount block; metadata overlap "
> -                "check failed: %s\n", strerror(-ret));
> -        /* the image will be marked corrupt, so don't even attempt on freeing
> -         * the cluster */
> -        goto done;
> -    }
> -
> -    /* write to new block */
> -    ret = bdrv_write(bs->file, new_offset / BDRV_SECTOR_SIZE, refcount_block,
> -            s->cluster_sectors);
> -    if (ret < 0) {
> -        fprintf(stderr, "Could not write refcount block: %s\n", strerror(-ret));
> -        goto fail_free_cluster;
> -    }
> -
> -    /* update refcount table */
> -    assert(!offset_into_cluster(s, new_offset));
> -    s->refcount_table[reftable_index] = new_offset;
> -    ret = write_reftable_entry(bs, reftable_index);
> -    if (ret < 0) {
> -        fprintf(stderr, "Could not update refcount table: %s\n",
> -                strerror(-ret));
> -        goto fail_free_cluster;
> -    }
> -
> -    goto done;
> -
> -fail_free_cluster:
> -    qcow2_free_clusters(bs, new_offset, s->cluster_size, QCOW2_DISCARD_OTHER);
> -
> -done:
> -    if (refcount_block) {
> -        /* This should never fail, as it would only do so if the given refcount
> -         * block cannot be found in the cache. As this is impossible as long as
> -         * there are no bugs, assert the success. */
> -        int tmp = qcow2_cache_put(bs, s->refcount_block_cache, &refcount_block);
> -        assert(tmp == 0);
> -    }
> -
> -    if (ret < 0) {
> -        return ret;
> -    }
> -
> -    return new_offset;
> -}
> -
> -/*
>   * Checks consistency of refblocks and accounts for each refblock in
>   * *refcount_table.
>   */
>  static int check_refblocks(BlockDriverState *bs, BdrvCheckResult *res,
> -                           BdrvCheckMode fix, uint16_t **refcount_table,
> -                           int64_t *nb_clusters)
> +                           BdrvCheckMode fix, bool *rebuild,
> +                           uint16_t **refcount_table, int64_t *nb_clusters)
>  {
>      BDRVQcowState *s = bs->opaque;
>      int64_t i, size;
> @@ -1517,6 +1402,7 @@ static int check_refblocks(BlockDriverState *bs, BdrvCheckResult *res,
>              fprintf(stderr, "ERROR refcount block %" PRId64 " is not "
>                      "cluster aligned; refcount table entry corrupted\n", i);
>              res->corruptions++;
> +            *rebuild = true;
>              continue;
>          }
>  
> @@ -1571,47 +1457,12 @@ resize_fail:
>          if (offset != 0) {
>              inc_refcounts(bs, res, *refcount_table, *nb_clusters,
>                            offset, s->cluster_size);
> -            if ((*refcount_table)[cluster] != 1) {
> -                fprintf(stderr, "%s refcount block %" PRId64
> -                        " refcount=%d\n",
> -                        fix & BDRV_FIX_ERRORS ? "Repairing" :
> -                                                "ERROR",
> -                        i, (*refcount_table)[cluster]);
> -
> -                if (fix & BDRV_FIX_ERRORS) {
> -                    int64_t new_offset;
> -
> -                    new_offset = realloc_refcount_block(bs, i, offset);
> -                    if (new_offset < 0) {
> -                        res->corruptions++;
> -                        continue;
> -                    }
> -
> -                    /* update refcounts */
> -                    if ((new_offset >> s->cluster_bits) >= *nb_clusters) {
> -                        /* increase refcount_table size if necessary */
> -                        int old_nb_clusters = *nb_clusters;
> -                        *nb_clusters = (new_offset >> s->cluster_bits) + 1;
> -
> -                        *refcount_table = g_try_realloc(*refcount_table,
> -                                *nb_clusters * sizeof(uint16_t));
> -                        if (!*refcount_table) {
> -                            res->check_errors++;
> -                            return -ENOMEM;
> -                        }
> -
> -                        memset(&(*refcount_table)[old_nb_clusters], 0,
> -                               (*nb_clusters - old_nb_clusters) *
> -                               sizeof(uint16_t));
> -                    }
> -                    (*refcount_table)[cluster]--;
> -                    inc_refcounts(bs, res, *refcount_table, *nb_clusters,
> -                                  new_offset, s->cluster_size);
>  
> -                    res->corruptions_fixed++;
> -                } else {
> -                    res->corruptions++;
> -                }
> +            if ((*refcount_table)[cluster] != 1) {
> +                fprintf(stderr, "ERROR refcount block %" PRId64
> +                        " refcount=%d\n", i, (*refcount_table)[cluster]);
> +                res->corruptions++;
> +                *rebuild = true;
>              }
>          }
>      }
> @@ -1623,8 +1474,8 @@ resize_fail:
>   * Calculates an in-memory refcount table.
>   */
>  static int calculate_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
> -                               BdrvCheckMode fix, uint16_t **refcount_table,
> -                               int64_t *nb_clusters)
> +                               BdrvCheckMode fix, bool *rebuild,
> +                               uint16_t **refcount_table, int64_t *nb_clusters)
>  {
>      BDRVQcowState *s = bs->opaque;
>      QCowSnapshot *sn;
> @@ -1667,7 +1518,7 @@ static int calculate_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
>                    s->refcount_table_offset,
>                    s->refcount_table_size * sizeof(uint64_t));
>  
> -    return check_refblocks(bs, res, fix, refcount_table, nb_clusters);
> +    return check_refblocks(bs, res, fix, rebuild, refcount_table, nb_clusters);
>  }
>  
>  /*
> @@ -1675,7 +1526,8 @@ static int calculate_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
>   * refcount as reported by the refcount structures on-disk.
>   */
>  static void compare_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
> -                              BdrvCheckMode fix, int64_t *highest_cluster,
> +                              BdrvCheckMode fix, bool *rebuild,
> +                              int64_t *highest_cluster,
>                                uint16_t *refcount_table, int64_t nb_clusters)
>  {
>      BDRVQcowState *s = bs->opaque;
> @@ -1702,10 +1554,15 @@ static void compare_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
>              int *num_fixed = NULL;
>              if (refcount1 > refcount2 && (fix & BDRV_FIX_LEAKS)) {
>                  num_fixed = &res->leaks_fixed;
> -            } else if (refcount1 < refcount2 && (fix & BDRV_FIX_ERRORS)) {
> +            } else if (refcount1 > 0 && refcount1 < refcount2 &&
> +                       (fix & BDRV_FIX_ERRORS)) {
>                  num_fixed = &res->corruptions_fixed;
>              }
>  
> +            if (refcount1 == 0) {
> +                *rebuild = true;
> +            }
> +
>              fprintf(stderr, "%s cluster %" PRId64 " refcount=%d reference=%d\n",
>                      num_fixed != NULL     ? "Repairing" :
>                      refcount1 < refcount2 ? "ERROR" :
> @@ -1744,6 +1601,7 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
>      BDRVQcowState *s = bs->opaque;
>      int64_t size, highest_cluster, nb_clusters;
>      uint16_t *refcount_table = NULL;
> +    bool rebuild = false;
>      int ret;
>  
>      size = bdrv_getlength(bs->file);
> @@ -1761,14 +1619,19 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
>      res->bfi.total_clusters =
>          size_to_clusters(s, bs->total_sectors * BDRV_SECTOR_SIZE);
>  
> -    ret = calculate_refcounts(bs, res, fix, &refcount_table, &nb_clusters);
> +    ret = calculate_refcounts(bs, res, fix, &rebuild, &refcount_table,
> +                              &nb_clusters);
>      if (ret < 0) {
>          goto fail;
>      }
>  
> -    compare_refcounts(bs, res, fix, &highest_cluster, refcount_table,
> +    compare_refcounts(bs, res, fix, &rebuild, &highest_cluster, refcount_table,
>                        nb_clusters);
>  
> +    if (rebuild) {
> +        fprintf(stderr, "ERROR need to rebuild refcount structures\n");
> +    }

Aren't you afraid that introducing rebuild in this way could
break git bisect ?

Why not introducing the new rebuild code in the previous patch and
use it in this one ?

Best regards

Benoît

> +
>      /* check OFLAG_COPIED */
>      ret = check_oflag_copied(bs, res, fix);
>      if (ret < 0) {
> -- 
> 2.0.3
> 
> 

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] [PATCH 5/8] qcow2: Rebuild refcount structure during check
  2014-08-13 21:01 ` [Qemu-devel] [PATCH 5/8] qcow2: Rebuild refcount structure during check Max Reitz
@ 2014-08-14 12:58   ` Benoît Canet
  2014-08-15 12:49     ` Max Reitz
  0 siblings, 1 reply; 19+ messages in thread
From: Benoît Canet @ 2014-08-14 12:58 UTC (permalink / raw)
  To: Max Reitz; +Cc: Kevin Wolf, qemu-devel, Stefan Hajnoczi

The Wednesday 13 Aug 2014 à 23:01:47 (+0200), Max Reitz wrote :
> The previous commit introduced the "rebuild" variable to qcow2's
> implementation of the image consistency check. Now make use of this by
> adding a function which creates a completely new refcount structure
> based solely on the in-memory information gathered before.
> 
> The old refcount structure will be leaked, however.
> 
> Signed-off-by: Max Reitz <mreitz@redhat.com>
> ---
>  block/qcow2-refcount.c | 262 ++++++++++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 259 insertions(+), 3 deletions(-)
> 
> diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
> index 6400840..e3ca03a 100644
> --- a/block/qcow2-refcount.c
> +++ b/block/qcow2-refcount.c
> @@ -1590,6 +1590,242 @@ static void compare_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
>  }
>  
>  /*
> + * Allocates a cluster using an in-memory refcount table (IMRT) in contrast to
> + * the on-disk refcount structures.
> + *
> + * *first_free_cluster does not necessarily point to the first free cluster, but
> + * may point to one cluster as close as possible before it. The offset returned
> + * will never be before that cluster.
> + */
> +static int64_t alloc_clusters_imrt(BlockDriverState *bs,
> +                                   int cluster_count,
> +                                   uint16_t **refcount_table,

> +                                   int64_t *nb_clusters,

I don't understand this parameters name.
nb_ and the plural seems to imply that it's a cluster count.
While the int64_t imply it's large.

> +                                   int64_t *first_free_cluster)
> +{
> +    BDRVQcowState *s = bs->opaque;
> +    int64_t cluster = *first_free_cluster, i;

Here cluster is a cluster offset.

> +    bool first_gap = true;
> +    int contiguous_clusters;
> +
> +    for (contiguous_clusters = 0;
> +         cluster < *nb_clusters && contiguous_clusters < cluster_count;

And here we compare a cluster offset (cluster) with something named like a
cluster count (nb_clusters).

> +         cluster++)
> +    {
> +        if (!(*refcount_table)[cluster]) {
> +            contiguous_clusters++;
> +            if (first_gap) {
> +                *first_free_cluster = cluster;
> +                first_gap = false;
> +            }
> +        } else if (contiguous_clusters) {
> +            contiguous_clusters = 0;
> +        }
> +    }
> +
> +    if (contiguous_clusters < cluster_count) {
> +        int64_t old_nb_clusters = *nb_clusters;
> +
> +        *nb_clusters = cluster + cluster_count - contiguous_clusters;

Here we compute a cluster offset (nb_cluster) by adding and removing
clusters counts (cluster_count, continuous_cluster) to a cluster offset (cluster)
while (nb_clusters) is named like a cluster count.

> +        *refcount_table = g_try_realloc(*refcount_table,
> +                                        *nb_clusters * sizeof(uint16_t));
This confuse me further.

> +        if (!*refcount_table) {
> +            return -ENOMEM;
> +        }
> +
> +        memset(*refcount_table + old_nb_clusters, 0,
> +               (*nb_clusters - old_nb_clusters) * sizeof(uint16_t));
> +    }
> +
> +    cluster -= contiguous_clusters;
> +    for (i = 0; i < cluster_count; i++) {
> +        (*refcount_table)[cluster + i] = 1;
> +    }
> +
> +    return cluster << s->cluster_bits;
> +}
> +
> +/*
> + * Creates a new refcount structure based solely on the in-memory information
> + * given through *refcount_table. All necessary allocations will be reflected
> + * in that array.
> + *
> + * On success, the old refcount structure is leaked (it will be covered by the
> + * new refcount structure).
> + */
> +static int rebuild_refcount_structure(BlockDriverState *bs,
> +                                      BdrvCheckResult *res,
> +                                      uint16_t **refcount_table,
> +                                      int64_t *nb_clusters)
> +{
> +    BDRVQcowState *s = bs->opaque;
> +    int64_t first_free_cluster = 0, rt_ofs = -1, cluster = 0;
> +    int64_t rb_ofs, rb_start, rb_index;
> +    uint32_t reftable_size = 0;
> +    uint64_t *reftable = NULL;
> +    uint16_t *on_disk_rb;
> +    uint8_t rt_offset_and_clusters[sizeof(uint64_t) + sizeof(uint32_t)];
> +    int i, ret = 0;
> +
> +    qcow2_cache_empty(bs, s->refcount_block_cache);
> +
> +write_refblocks:
> +    for (; cluster < *nb_clusters; cluster++) {
> +        if (!(*refcount_table)[cluster]) {
> +            continue;
> +        }
> +
> +        rb_index = cluster >> (s->cluster_bits - 1);
> +        rb_start = rb_index << (s->cluster_bits - 1);
> +
> +        /* Don't allocate a cluster in a refblock already written to disk */
> +        if (first_free_cluster < rb_start) {
> +            first_free_cluster = rb_start;
> +        }
> +        rb_ofs = alloc_clusters_imrt(bs, 1, refcount_table, nb_clusters,
> +                                     &first_free_cluster);
> +        if (rb_ofs < 0) {
> +            fprintf(stderr, "ERROR allocating refblock: %s\n", strerror(-ret));
> +            res->check_errors++;
> +            ret = rb_ofs;
> +            goto fail;
> +        }
> +
> +        if (reftable_size <= rb_index) {
> +            uint32_t old_rt_size = reftable_size;
> +            reftable_size = ROUND_UP((rb_index + 1) * sizeof(uint64_t),
> +                                     s->cluster_size) / sizeof(uint64_t);
> +            reftable = g_try_realloc(reftable,
> +                                     reftable_size * sizeof(uint64_t));
> +            if (!reftable) {
> +                res->check_errors++;
> +                ret = -ENOMEM;
> +                goto fail;
> +            }
> +
> +            memset(reftable + old_rt_size, 0,
> +                   (reftable_size - old_rt_size) * sizeof(uint64_t));
> +
> +            /* The offset we have for the reftable is now no longer valid;
> +             * this will leak that range, but we can easily fix that by running
> +             * a leak-fixing check after this rebuild operation */
> +            rt_ofs = -1;
> +        }
> +        reftable[rb_index] = rb_ofs;
> +
> +        /* If this is apparently the last refblock (for now), try to squeeze the
> +         * reftable in */
> +        if (rb_index == (*nb_clusters - 1) >> (s->cluster_bits - 1) &&
> +            rt_ofs < 0)
> +        {
> +            rt_ofs = alloc_clusters_imrt(bs, size_to_clusters(s, reftable_size *
> +                                                              sizeof(uint64_t)),
> +                                         refcount_table, nb_clusters,
> +                                         &first_free_cluster);
> +            if (rt_ofs < 0) {
> +                fprintf(stderr, "ERROR allocating reftable: %s\n",
> +                        strerror(-ret));
> +                res->check_errors++;
> +                ret = rt_ofs;
> +                goto fail;
> +            }
> +        }
> +
> +        ret = qcow2_pre_write_overlap_check(bs, 0, rb_ofs, s->cluster_size);
> +        if (ret < 0) {
> +            fprintf(stderr, "ERROR writing refblock: %s\n", strerror(-ret));
> +            goto fail;
> +        }
> +
> +        on_disk_rb = g_malloc0(s->cluster_size);
> +        for (i = 0; i < s->cluster_size / sizeof(uint16_t) &&
> +                    rb_start + i < *nb_clusters; i++)
> +        {
> +            on_disk_rb[i] = cpu_to_be16((*refcount_table)[rb_start + i]);
> +        }
> +
> +        ret = bdrv_write(bs->file, rb_ofs / BDRV_SECTOR_SIZE,
> +                         (void *)on_disk_rb, s->cluster_sectors);
> +        g_free(on_disk_rb);
> +        if (ret < 0) {
> +            fprintf(stderr, "ERROR writing refblock: %s\n", strerror(-ret));
> +            goto fail;
> +        }
> +
> +        /* Go to the end of this refblock */
> +        cluster = rb_start + s->cluster_size / sizeof(uint16_t) - 1;
> +    }
> +
> +    if (rt_ofs < 0) {
> +        int64_t post_rb_start = ROUND_UP(*nb_clusters,
> +                                         s->cluster_size / sizeof(uint16_t));
> +
> +        /* Not pretty but simple */
> +        if (first_free_cluster < post_rb_start) {
> +            first_free_cluster = post_rb_start;
> +        }
> +        rt_ofs = alloc_clusters_imrt(bs, size_to_clusters(s, reftable_size *
> +                                                          sizeof(uint64_t)),
> +                                     refcount_table, nb_clusters,
> +                                     &first_free_cluster);
> +        if (rt_ofs < 0) {
> +            fprintf(stderr, "ERROR allocating reftable: %s\n", strerror(-ret));
> +            res->check_errors++;
> +            ret = rt_ofs;
> +            goto fail;
> +        }
> +
> +        goto write_refblocks;
> +    }
> +
> +    assert(reftable);
> +
> +    for (rb_index = 0; rb_index < reftable_size; rb_index++) {
> +        cpu_to_be64s(&reftable[rb_index]);
> +    }
> +
> +    ret = qcow2_pre_write_overlap_check(bs, 0, rt_ofs,
> +                                        reftable_size * sizeof(uint64_t));
> +    if (ret < 0) {
> +        fprintf(stderr, "ERROR writing reftable: %s\n", strerror(-ret));
> +        goto fail;
> +    }
> +
> +    ret = bdrv_write(bs->file, rt_ofs / BDRV_SECTOR_SIZE, (void *)reftable,
> +                     reftable_size * sizeof(uint64_t) / BDRV_SECTOR_SIZE);
> +    if (ret < 0) {
> +        fprintf(stderr, "ERROR writing reftable: %s\n", strerror(-ret));
> +        goto fail;
> +    }
> +
> +    /* Enter new reftable into the image header */
> +    cpu_to_be64w((uint64_t *)&rt_offset_and_clusters[0], rt_ofs);
> +    cpu_to_be32w((uint32_t *)&rt_offset_and_clusters[sizeof(uint64_t)],
> +                 size_to_clusters(s, reftable_size * sizeof(uint64_t)));
> +    ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader,
> +                                              refcount_table_offset),
> +                           rt_offset_and_clusters,
> +                           sizeof(rt_offset_and_clusters));
> +    if (ret < 0) {
> +        fprintf(stderr, "ERROR setting reftable: %s\n", strerror(-ret));
> +        goto fail;
> +    }
> +
> +    for (rb_index = 0; rb_index < reftable_size; rb_index++) {
> +        be64_to_cpus(&reftable[rb_index]);
> +    }
> +    s->refcount_table = reftable;
> +    s->refcount_table_offset = rt_ofs;
> +    s->refcount_table_size = reftable_size;
> +
> +    return 0;
> +
> +fail:
> +    g_free(reftable);
> +    return ret;
> +}
> +
> +/*
>   * Checks an image for refcount consistency.
>   *
>   * Returns 0 if no errors are found, the number of errors in case the image is
> @@ -1599,6 +1835,7 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
>                            BdrvCheckMode fix)
>  {
>      BDRVQcowState *s = bs->opaque;
> +    BdrvCheckResult pre_compare_res;
>      int64_t size, highest_cluster, nb_clusters;
>      uint16_t *refcount_table = NULL;
>      bool rebuild = false;
> @@ -1625,11 +1862,30 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
>          goto fail;
>      }
>  
> -    compare_refcounts(bs, res, fix, &rebuild, &highest_cluster, refcount_table,
> +    /* In case we don't need to rebuild the refcount structure (but want to fix
> +     * something), this function is immediately called again, in which case the
> +     * result should be ignored */
> +    pre_compare_res = *res;
> +    compare_refcounts(bs, res, 0, &rebuild, &highest_cluster, refcount_table,
>                        nb_clusters);
>  
> -    if (rebuild) {
> -        fprintf(stderr, "ERROR need to rebuild refcount structures\n");
> +    if (rebuild && (fix & BDRV_FIX_ERRORS)) {
> +        fprintf(stderr, "Rebuilding refcount structure\n");
> +        ret = rebuild_refcount_structure(bs, res, &refcount_table,
> +                                         &nb_clusters);
> +        if (ret < 0) {
> +            goto fail;
> +        }
> +    } else if (fix) {
> +        if (rebuild) {
> +            fprintf(stderr, "ERROR need to rebuild refcount structures\n");
> +        }
> +
> +        if (res->leaks || res->corruptions) {
> +            *res = pre_compare_res;
> +            compare_refcounts(bs, res, fix, &rebuild, &highest_cluster,
> +                              refcount_table, nb_clusters);
> +        }
>      }
>  
>      /* check OFLAG_COPIED */
> -- 
> 2.0.3
> 
> 

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] [PATCH 2/8] qcow2: Factor out refcount comparison for check
  2014-08-14 12:02   ` Benoît Canet
@ 2014-08-15 12:31     ` Max Reitz
  2014-08-15 13:47       ` Max Reitz
  0 siblings, 1 reply; 19+ messages in thread
From: Max Reitz @ 2014-08-15 12:31 UTC (permalink / raw)
  To: Benoît Canet; +Cc: Kevin Wolf, qemu-devel, Stefan Hajnoczi

On 14.08.2014 14:02, Benoît Canet wrote:
> The Wednesday 13 Aug 2014 à 23:01:44 (+0200), Max Reitz wrote :
>> Put the code for comparing the calculated refcounts against the on-disk
>> refcounts during qemu-img check into an own function.
>>
>> Signed-off-by: Max Reitz <mreitz@redhat.com>
>> ---
>>   block/qcow2-refcount.c | 91 ++++++++++++++++++++++++++++----------------------
>>   1 file changed, 52 insertions(+), 39 deletions(-)
>>
>> diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
>> index 9793c27..d1da8d5 100644
>> --- a/block/qcow2-refcount.c
>> +++ b/block/qcow2-refcount.c
>> @@ -1629,46 +1629,22 @@ static int calculate_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
>>   }
>>   
>>   /*
>> - * Checks an image for refcount consistency.
>> - *
>> - * Returns 0 if no errors are found, the number of errors in case the image is
>> - * detected as corrupted, and -errno when an internal error occurred.
>> + * Compares the actual reference count for each cluster in the image against the
>> + * refcount as reported by the refcount structures on-disk.
>>    */
>> -int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
>> -                          BdrvCheckMode fix)
>> +static void compare_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
>> +                              BdrvCheckMode fix, int64_t *highest_cluster,
>> +                              uint16_t *refcount_table, int64_t nb_clusters)
>>   {
>>       BDRVQcowState *s = bs->opaque;
>> -    int64_t size, i, highest_cluster, nb_clusters;
>> -    int refcount1, refcount2;
>> -    uint16_t *refcount_table = NULL;
>> -    int ret;
>> -
>> -    size = bdrv_getlength(bs->file);
>> -    if (size < 0) {
>> -        res->check_errors++;
>> -        return size;
>> -    }
>> -
>> -    nb_clusters = size_to_clusters(s, size);
>> -    if (nb_clusters > INT_MAX) {
>> -        res->check_errors++;
>> -        return -EFBIG;
>> -    }
>> -
>> -    res->bfi.total_clusters =
>> -        size_to_clusters(s, bs->total_sectors * BDRV_SECTOR_SIZE);
>> -
>> -    ret = calculate_refcounts(bs, res, fix, &refcount_table, &nb_clusters);
>> -    if (ret < 0) {
>> -        goto fail;
>> -    }
>> +    int64_t i;
>> +    int refcount1, refcount2, ret;
>>   
>> -    /* compare ref counts */
>> -    for (i = 0, highest_cluster = 0; i < nb_clusters; i++) {
>> +    for (i = 0, *highest_cluster = 0; i < nb_clusters; i++) {
>>           refcount1 = get_refcount(bs, i);
>>           if (refcount1 < 0) {
>>               fprintf(stderr, "Can't get refcount for cluster %" PRId64 ": %s\n",
>> -                i, strerror(-refcount1));
>> +                    i, strerror(-refcount1));
> Free coding fix.
>
>>               res->check_errors++;
>>               continue;
>>           }
>> @@ -1676,11 +1652,10 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
>>           refcount2 = refcount_table[i];
>>   
>>           if (refcount1 > 0 || refcount2 > 0) {
>> -            highest_cluster = i;
>> +            *highest_cluster = i;
> idem.
>>           }
>>   
>>           if (refcount1 != refcount2) {
>> -
> Spurious blank line removal.
>
>>               /* Check if we're allowed to fix the mismatch */
>>               int *num_fixed = NULL;
>>               if (refcount1 > refcount2 && (fix & BDRV_FIX_LEAKS)) {
>> @@ -1690,10 +1665,10 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
>>               }
>>   
>>               fprintf(stderr, "%s cluster %" PRId64 " refcount=%d reference=%d\n",
>> -                   num_fixed != NULL     ? "Repairing" :
>> -                   refcount1 < refcount2 ? "ERROR" :
>> -                                           "Leaked",
>> -                   i, refcount1, refcount2);
>> +                    num_fixed != NULL     ? "Repairing" :
>> +                    refcount1 < refcount2 ? "ERROR" :
>> +                                            "Leaked",
>> +                    i, refcount1, refcount2);
> Gratuitous coding style fix.
>
> Patch 1 have similar issues.
> I think the patchs would be smaller and cleaner without these spurious coding style fixes.

Well, I did them on purpose to avoid an additional patch. I'll rework it 
for v2.

Max

>>   
>>               if (num_fixed) {
>>                   ret = update_refcount(bs, i << s->cluster_bits, 1,
>> @@ -1713,6 +1688,44 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
>>               }
>>           }
>>       }
>> +}
>> +
>> +/*
>> + * Checks an image for refcount consistency.
>> + *
>> + * Returns 0 if no errors are found, the number of errors in case the image is
>> + * detected as corrupted, and -errno when an internal error occurred.
>> + */
>> +int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
>> +                          BdrvCheckMode fix)
>> +{
>> +    BDRVQcowState *s = bs->opaque;
>> +    int64_t size, highest_cluster, nb_clusters;
>> +    uint16_t *refcount_table = NULL;
>> +    int ret;
>> +
>> +    size = bdrv_getlength(bs->file);
>> +    if (size < 0) {
>> +        res->check_errors++;
>> +        return size;
>> +    }
>> +
>> +    nb_clusters = size_to_clusters(s, size);
>> +    if (nb_clusters > INT_MAX) {
>> +        res->check_errors++;
>> +        return -EFBIG;
>> +    }
>> +
>> +    res->bfi.total_clusters =
>> +        size_to_clusters(s, bs->total_sectors * BDRV_SECTOR_SIZE);
>> +
>> +    ret = calculate_refcounts(bs, res, fix, &refcount_table, &nb_clusters);
>> +    if (ret < 0) {
>> +        goto fail;
>> +    }
>> +
>> +    compare_refcounts(bs, res, fix, &highest_cluster, refcount_table,
>> +                      nb_clusters);
>>   
>>       /* check OFLAG_COPIED */
>>       ret = check_oflag_copied(bs, res, fix);
>> -- 
>> 2.0.3
>>
>>

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] [PATCH 3/8] qcow2: Fix refcount blocks beyond image end
  2014-08-14 12:11   ` Benoît Canet
@ 2014-08-15 12:36     ` Max Reitz
  0 siblings, 0 replies; 19+ messages in thread
From: Max Reitz @ 2014-08-15 12:36 UTC (permalink / raw)
  To: Benoît Canet; +Cc: Kevin Wolf, qemu-devel, Stefan Hajnoczi

On 14.08.2014 14:11, Benoît Canet wrote:
> The Wednesday 13 Aug 2014 à 23:01:45 (+0200), Max Reitz wrote :
>> If the qcow2 check function detects a refcount block located beyond the
>> image end, grow the image appropriately. This cannot break anything and
>> is the logical fix for such a case.
>>
>> Signed-off-by: Max Reitz <mreitz@redhat.com>
>> ---
>>   block/qcow2-refcount.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++----
>>   1 file changed, 46 insertions(+), 4 deletions(-)
>>
>> diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
>> index d1da8d5..a1d93e5 100644
>> --- a/block/qcow2-refcount.c
>> +++ b/block/qcow2-refcount.c
>> @@ -1504,7 +1504,8 @@ static int check_refblocks(BlockDriverState *bs, BdrvCheckResult *res,
>>                              int64_t *nb_clusters)
>>   {
>>       BDRVQcowState *s = bs->opaque;
>> -    int64_t i;
>> +    int64_t i, size;
>> +    int ret;
>>   
>>       for (i = 0; i < s->refcount_table_size; i++) {
>>           uint64_t offset, cluster;
>> @@ -1520,9 +1521,50 @@ static int check_refblocks(BlockDriverState *bs, BdrvCheckResult *res,
>>           }
>>   
>>           if (cluster >= *nb_clusters) {
>> -            fprintf(stderr, "ERROR refcount block %" PRId64
>> -                    " is outside image\n", i);
>> -            res->corruptions++;
>> +            fprintf(stderr, "%s refcount block %" PRId64 " is outside image\n",
>> +                    fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR", i);
>> +
>> +            if (fix & BDRV_FIX_ERRORS) {
>> +                int64_t old_nb_clusters = *nb_clusters;
>> +
>> +                ret = bdrv_truncate(bs->file, offset + s->cluster_size);
>> +                if (ret < 0) {
>> +                    goto resize_fail;
>> +                }
>> +                size = bdrv_getlength(bs->file);
>> +                if (size < 0) {
>> +                    ret = size;
>> +                    goto resize_fail;
>> +                }
>> +
>> +                *nb_clusters = size_to_clusters(s, size);
>> +                assert(*nb_clusters >= old_nb_clusters);
>> +
>> +                *refcount_table = g_try_realloc(*refcount_table,
>> +                        *nb_clusters * sizeof(uint16_t));
>> +                if (!*refcount_table) {
>> +                    res->check_errors++;
>> +                    return -ENOMEM;
> So you really want to make sure the code is not trying anything more
> by directly returning -ENOMEM and not doing goto resize_fail.
>
> This makes sense though.
>
>> +                }
>> +
>> +                memset(*refcount_table + old_nb_clusters, 0,
>> +                       (*nb_clusters - old_nb_clusters) * sizeof(uint16_t));
>> +
>> +                if (cluster >= *nb_clusters) {
>> +                    ret = -EINVAL;
>> +                    goto resize_fail;
>> +                }
>> +
>> +                res->corruptions_fixed++;
>> +                continue;
>> +
>> +resize_fail:
>> +                res->corruptions++;
>> +                fprintf(stderr, "ERROR could not resize image: %s\n",
>> +                        strerror(-ret));
> Isn't a "return ret;" missing here ?
> the code will fall in the continue statement without it.

And that it should. A corruption is reported to stderr, res->corruptions 
is incremented and that's it - just as it was without this patch. The 
only reason I see why we should completely abort here is because 
resizing the file should always work; if it doesn't, something may be 
completely wrong. But even that is no real reason to jump the shark; we 
can still continue with the check and if everything is indeed completely 
broken, we'll receive EIOs soon enough.

Perhaps I should add a *rebuild = true; here and in the else branch in 
the next patch, though.

Max

>> +            } else {
>> +                res->corruptions++;
>> +            }
>>               continue;
>>           }
>>   
>> -- 
>> 2.0.3
>>
>>

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] [PATCH 4/8] qcow2: Do not perform potentially damaging repairs
  2014-08-14 12:33   ` Benoît Canet
@ 2014-08-15 12:42     ` Max Reitz
  0 siblings, 0 replies; 19+ messages in thread
From: Max Reitz @ 2014-08-15 12:42 UTC (permalink / raw)
  To: Benoît Canet; +Cc: Kevin Wolf, qemu-devel, Stefan Hajnoczi

On 14.08.2014 14:33, Benoît Canet wrote:
> The Wednesday 13 Aug 2014 à 23:01:46 (+0200), Max Reitz wrote :
>> If a referenced cluster has a refcount of 0, increasing its refcount may
>> result in clusters being allocated for the refcount structures. This may
>> overwrite the referenced cluster, therefore we cannot simply increase
>> the refcount then.
>>
>> In such cases, we can either try to replicate all the refcount
>> operations solely for the check operation, basing the allocations on the
>> in-memory refcount table; or we can simply rebuild the whole refcount
>> structure based on the in-memory refcount table. Since the latter will
>> be much easier, do that.
>>
>> To prepare for this, introduce a "rebuild" boolean which should be set
>> to true whenever a fix is rather dangerous or too complicated using the
>> current refcount structures. Another example for this is refcount blocks
>> being referenced more than once.
>>
>> Signed-off-by: Max Reitz <mreitz@redhat.com>
>> ---
>>   block/qcow2-refcount.c | 191 +++++++------------------------------------------
>>   1 file changed, 27 insertions(+), 164 deletions(-)
>>
>> diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
>> index a1d93e5..6400840 100644
>> --- a/block/qcow2-refcount.c
>> +++ b/block/qcow2-refcount.c
>> @@ -1381,127 +1381,12 @@ fail:
>>   }
>>   
>>   /*
>> - * Writes one sector of the refcount table to the disk
>> - */
>> -#define RT_ENTRIES_PER_SECTOR (512 / sizeof(uint64_t))
>> -static int write_reftable_entry(BlockDriverState *bs, int rt_index)
>> -{
>> -    BDRVQcowState *s = bs->opaque;
>> -    uint64_t buf[RT_ENTRIES_PER_SECTOR];
>> -    int rt_start_index;
>> -    int i, ret;
>> -
>> -    rt_start_index = rt_index & ~(RT_ENTRIES_PER_SECTOR - 1);
>> -    for (i = 0; i < RT_ENTRIES_PER_SECTOR; i++) {
>> -        buf[i] = cpu_to_be64(s->refcount_table[rt_start_index + i]);
>> -    }
>> -
>> -    ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_REFCOUNT_TABLE,
>> -            s->refcount_table_offset + rt_start_index * sizeof(uint64_t),
>> -            sizeof(buf));
>> -    if (ret < 0) {
>> -        return ret;
>> -    }
>> -
>> -    BLKDBG_EVENT(bs->file, BLKDBG_REFTABLE_UPDATE);
>> -    ret = bdrv_pwrite_sync(bs->file, s->refcount_table_offset +
>> -            rt_start_index * sizeof(uint64_t), buf, sizeof(buf));
>> -    if (ret < 0) {
>> -        return ret;
>> -    }
>> -
>> -    return 0;
>> -}
>> -
>> -/*
>> - * Allocates a new cluster for the given refcount block (represented by its
>> - * offset in the image file) and copies the current content there. This function
>> - * does _not_ decrement the reference count for the currently occupied cluster.
>> - *
>> - * This function prints an informative message to stderr on error (and returns
>> - * -errno); on success, the offset of the newly allocated cluster is returned.
>> - */
>> -static int64_t realloc_refcount_block(BlockDriverState *bs, int reftable_index,
>> -                                      uint64_t offset)
>> -{
>> -    BDRVQcowState *s = bs->opaque;
>> -    int64_t new_offset = 0;
>> -    void *refcount_block = NULL;
>> -    int ret;
>> -
>> -    /* allocate new refcount block */
>> -    new_offset = qcow2_alloc_clusters(bs, s->cluster_size);
>> -    if (new_offset < 0) {
>> -        fprintf(stderr, "Could not allocate new cluster: %s\n",
>> -                strerror(-new_offset));
>> -        ret = new_offset;
>> -        goto done;
>> -    }
>> -
>> -    /* fetch current refcount block content */
>> -    ret = qcow2_cache_get(bs, s->refcount_block_cache, offset, &refcount_block);
>> -    if (ret < 0) {
>> -        fprintf(stderr, "Could not fetch refcount block: %s\n", strerror(-ret));
>> -        goto fail_free_cluster;
>> -    }
>> -
>> -    /* new block has not yet been entered into refcount table, therefore it is
>> -     * no refcount block yet (regarding this check) */
>> -    ret = qcow2_pre_write_overlap_check(bs, 0, new_offset, s->cluster_size);
>> -    if (ret < 0) {
>> -        fprintf(stderr, "Could not write refcount block; metadata overlap "
>> -                "check failed: %s\n", strerror(-ret));
>> -        /* the image will be marked corrupt, so don't even attempt on freeing
>> -         * the cluster */
>> -        goto done;
>> -    }
>> -
>> -    /* write to new block */
>> -    ret = bdrv_write(bs->file, new_offset / BDRV_SECTOR_SIZE, refcount_block,
>> -            s->cluster_sectors);
>> -    if (ret < 0) {
>> -        fprintf(stderr, "Could not write refcount block: %s\n", strerror(-ret));
>> -        goto fail_free_cluster;
>> -    }
>> -
>> -    /* update refcount table */
>> -    assert(!offset_into_cluster(s, new_offset));
>> -    s->refcount_table[reftable_index] = new_offset;
>> -    ret = write_reftable_entry(bs, reftable_index);
>> -    if (ret < 0) {
>> -        fprintf(stderr, "Could not update refcount table: %s\n",
>> -                strerror(-ret));
>> -        goto fail_free_cluster;
>> -    }
>> -
>> -    goto done;
>> -
>> -fail_free_cluster:
>> -    qcow2_free_clusters(bs, new_offset, s->cluster_size, QCOW2_DISCARD_OTHER);
>> -
>> -done:
>> -    if (refcount_block) {
>> -        /* This should never fail, as it would only do so if the given refcount
>> -         * block cannot be found in the cache. As this is impossible as long as
>> -         * there are no bugs, assert the success. */
>> -        int tmp = qcow2_cache_put(bs, s->refcount_block_cache, &refcount_block);
>> -        assert(tmp == 0);
>> -    }
>> -
>> -    if (ret < 0) {
>> -        return ret;
>> -    }
>> -
>> -    return new_offset;
>> -}
>> -
>> -/*
>>    * Checks consistency of refblocks and accounts for each refblock in
>>    * *refcount_table.
>>    */
>>   static int check_refblocks(BlockDriverState *bs, BdrvCheckResult *res,
>> -                           BdrvCheckMode fix, uint16_t **refcount_table,
>> -                           int64_t *nb_clusters)
>> +                           BdrvCheckMode fix, bool *rebuild,
>> +                           uint16_t **refcount_table, int64_t *nb_clusters)
>>   {
>>       BDRVQcowState *s = bs->opaque;
>>       int64_t i, size;
>> @@ -1517,6 +1402,7 @@ static int check_refblocks(BlockDriverState *bs, BdrvCheckResult *res,
>>               fprintf(stderr, "ERROR refcount block %" PRId64 " is not "
>>                       "cluster aligned; refcount table entry corrupted\n", i);
>>               res->corruptions++;
>> +            *rebuild = true;
>>               continue;
>>           }
>>   
>> @@ -1571,47 +1457,12 @@ resize_fail:
>>           if (offset != 0) {
>>               inc_refcounts(bs, res, *refcount_table, *nb_clusters,
>>                             offset, s->cluster_size);
>> -            if ((*refcount_table)[cluster] != 1) {
>> -                fprintf(stderr, "%s refcount block %" PRId64
>> -                        " refcount=%d\n",
>> -                        fix & BDRV_FIX_ERRORS ? "Repairing" :
>> -                                                "ERROR",
>> -                        i, (*refcount_table)[cluster]);
>> -
>> -                if (fix & BDRV_FIX_ERRORS) {
>> -                    int64_t new_offset;
>> -
>> -                    new_offset = realloc_refcount_block(bs, i, offset);
>> -                    if (new_offset < 0) {
>> -                        res->corruptions++;
>> -                        continue;
>> -                    }
>> -
>> -                    /* update refcounts */
>> -                    if ((new_offset >> s->cluster_bits) >= *nb_clusters) {
>> -                        /* increase refcount_table size if necessary */
>> -                        int old_nb_clusters = *nb_clusters;
>> -                        *nb_clusters = (new_offset >> s->cluster_bits) + 1;
>> -
>> -                        *refcount_table = g_try_realloc(*refcount_table,
>> -                                *nb_clusters * sizeof(uint16_t));
>> -                        if (!*refcount_table) {
>> -                            res->check_errors++;
>> -                            return -ENOMEM;
>> -                        }
>> -
>> -                        memset(&(*refcount_table)[old_nb_clusters], 0,
>> -                               (*nb_clusters - old_nb_clusters) *
>> -                               sizeof(uint16_t));
>> -                    }
>> -                    (*refcount_table)[cluster]--;
>> -                    inc_refcounts(bs, res, *refcount_table, *nb_clusters,
>> -                                  new_offset, s->cluster_size);
>>   
>> -                    res->corruptions_fixed++;
>> -                } else {
>> -                    res->corruptions++;
>> -                }
>> +            if ((*refcount_table)[cluster] != 1) {
>> +                fprintf(stderr, "ERROR refcount block %" PRId64
>> +                        " refcount=%d\n", i, (*refcount_table)[cluster]);
>> +                res->corruptions++;
>> +                *rebuild = true;
>>               }
>>           }
>>       }
>> @@ -1623,8 +1474,8 @@ resize_fail:
>>    * Calculates an in-memory refcount table.
>>    */
>>   static int calculate_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
>> -                               BdrvCheckMode fix, uint16_t **refcount_table,
>> -                               int64_t *nb_clusters)
>> +                               BdrvCheckMode fix, bool *rebuild,
>> +                               uint16_t **refcount_table, int64_t *nb_clusters)
>>   {
>>       BDRVQcowState *s = bs->opaque;
>>       QCowSnapshot *sn;
>> @@ -1667,7 +1518,7 @@ static int calculate_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
>>                     s->refcount_table_offset,
>>                     s->refcount_table_size * sizeof(uint64_t));
>>   
>> -    return check_refblocks(bs, res, fix, refcount_table, nb_clusters);
>> +    return check_refblocks(bs, res, fix, rebuild, refcount_table, nb_clusters);
>>   }
>>   
>>   /*
>> @@ -1675,7 +1526,8 @@ static int calculate_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
>>    * refcount as reported by the refcount structures on-disk.
>>    */
>>   static void compare_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
>> -                              BdrvCheckMode fix, int64_t *highest_cluster,
>> +                              BdrvCheckMode fix, bool *rebuild,
>> +                              int64_t *highest_cluster,
>>                                 uint16_t *refcount_table, int64_t nb_clusters)
>>   {
>>       BDRVQcowState *s = bs->opaque;
>> @@ -1702,10 +1554,15 @@ static void compare_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
>>               int *num_fixed = NULL;
>>               if (refcount1 > refcount2 && (fix & BDRV_FIX_LEAKS)) {
>>                   num_fixed = &res->leaks_fixed;
>> -            } else if (refcount1 < refcount2 && (fix & BDRV_FIX_ERRORS)) {
>> +            } else if (refcount1 > 0 && refcount1 < refcount2 &&
>> +                       (fix & BDRV_FIX_ERRORS)) {
>>                   num_fixed = &res->corruptions_fixed;
>>               }
>>   
>> +            if (refcount1 == 0) {
>> +                *rebuild = true;
>> +            }
>> +
>>               fprintf(stderr, "%s cluster %" PRId64 " refcount=%d reference=%d\n",
>>                       num_fixed != NULL     ? "Repairing" :
>>                       refcount1 < refcount2 ? "ERROR" :
>> @@ -1744,6 +1601,7 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
>>       BDRVQcowState *s = bs->opaque;
>>       int64_t size, highest_cluster, nb_clusters;
>>       uint16_t *refcount_table = NULL;
>> +    bool rebuild = false;
>>       int ret;
>>   
>>       size = bdrv_getlength(bs->file);
>> @@ -1761,14 +1619,19 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
>>       res->bfi.total_clusters =
>>           size_to_clusters(s, bs->total_sectors * BDRV_SECTOR_SIZE);
>>   
>> -    ret = calculate_refcounts(bs, res, fix, &refcount_table, &nb_clusters);
>> +    ret = calculate_refcounts(bs, res, fix, &rebuild, &refcount_table,
>> +                              &nb_clusters);
>>       if (ret < 0) {
>>           goto fail;
>>       }
>>   
>> -    compare_refcounts(bs, res, fix, &highest_cluster, refcount_table,
>> +    compare_refcounts(bs, res, fix, &rebuild, &highest_cluster, refcount_table,
>>                         nb_clusters);
>>   
>> +    if (rebuild) {
>> +        fprintf(stderr, "ERROR need to rebuild refcount structures\n");
>> +    }
> Aren't you afraid that introducing rebuild in this way could
> break git bisect ?
>
> Why not introducing the new rebuild code in the previous patch and
> use it in this one ?

Because it's a static function and an unused static function really 
breaks git bisect.

The worst this patch does is that it prevents qemu from repairing files 
in a way which could actually break the file. It does not fix the repair 
operation (that's the next patch), but it at least prevents it, which is 
in my opinion not really breaking things (because things are already 
broken).

If one runs some tests against some expected output (like the 
qemu-iotests do), bisect is broken anyway because the output from after 
this series differs from before (that's why patch 7 exists).

The best I could probably do is to pull patch 5 before this one (like 
you suggest) while creating the rebuild function as a non-static one in 
that patch and making it static here so that gcc does not complain.

Max

> Best regards
>
> Benoît
>
>> +
>>       /* check OFLAG_COPIED */
>>       ret = check_oflag_copied(bs, res, fix);
>>       if (ret < 0) {
>> -- 
>> 2.0.3
>>
>>

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] [PATCH 5/8] qcow2: Rebuild refcount structure during check
  2014-08-14 12:58   ` Benoît Canet
@ 2014-08-15 12:49     ` Max Reitz
  0 siblings, 0 replies; 19+ messages in thread
From: Max Reitz @ 2014-08-15 12:49 UTC (permalink / raw)
  To: Benoît Canet; +Cc: Kevin Wolf, qemu-devel, Stefan Hajnoczi

On 14.08.2014 14:58, Benoît Canet wrote:
> The Wednesday 13 Aug 2014 à 23:01:47 (+0200), Max Reitz wrote :
>> The previous commit introduced the "rebuild" variable to qcow2's
>> implementation of the image consistency check. Now make use of this by
>> adding a function which creates a completely new refcount structure
>> based solely on the in-memory information gathered before.
>>
>> The old refcount structure will be leaked, however.
>>
>> Signed-off-by: Max Reitz <mreitz@redhat.com>
>> ---
>>   block/qcow2-refcount.c | 262 ++++++++++++++++++++++++++++++++++++++++++++++++-
>>   1 file changed, 259 insertions(+), 3 deletions(-)
>>
>> diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
>> index 6400840..e3ca03a 100644
>> --- a/block/qcow2-refcount.c
>> +++ b/block/qcow2-refcount.c
>> @@ -1590,6 +1590,242 @@ static void compare_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
>>   }
>>   
>>   /*
>> + * Allocates a cluster using an in-memory refcount table (IMRT) in contrast to
>> + * the on-disk refcount structures.
>> + *
>> + * *first_free_cluster does not necessarily point to the first free cluster, but
>> + * may point to one cluster as close as possible before it. The offset returned
>> + * will never be before that cluster.
>> + */
>> +static int64_t alloc_clusters_imrt(BlockDriverState *bs,
>> +                                   int cluster_count,
>> +                                   uint16_t **refcount_table,
>> +                                   int64_t *nb_clusters,
> I don't understand this parameters name.
> nb_ and the plural seems to imply that it's a cluster count.
> While the int64_t imply it's large.

It certainly is. It's the total cluster count of the file (which is what 
"nb_clusters" is always used for in the check code).

>> +                                   int64_t *first_free_cluster)
>> +{
>> +    BDRVQcowState *s = bs->opaque;
>> +    int64_t cluster = *first_free_cluster, i;
> Here cluster is a cluster offset.

A cluster index.


>> +    bool first_gap = true;
>> +    int contiguous_clusters;
>> +
>> +    for (contiguous_clusters = 0;
>> +         cluster < *nb_clusters && contiguous_clusters < cluster_count;
> And here we compare a cluster offset (cluster) with something named like a
> cluster count (nb_clusters).

Because cluster is an index.

>> +         cluster++)
>> +    {
>> +        if (!(*refcount_table)[cluster]) {
>> +            contiguous_clusters++;
>> +            if (first_gap) {
>> +                *first_free_cluster = cluster;
>> +                first_gap = false;
>> +            }
>> +        } else if (contiguous_clusters) {
>> +            contiguous_clusters = 0;
>> +        }
>> +    }
>> +
>> +    if (contiguous_clusters < cluster_count) {
>> +        int64_t old_nb_clusters = *nb_clusters;
>> +
>> +        *nb_clusters = cluster + cluster_count - contiguous_clusters;
> Here we compute a cluster offset (nb_cluster) by adding and removing
> clusters counts (cluster_count, continuous_cluster) to a cluster offset (cluster)
> while (nb_clusters) is named like a cluster count.

*nb_clusters is a cluster count. cluster is a cluster index.

>
>> +        *refcount_table = g_try_realloc(*refcount_table,
>> +                                        *nb_clusters * sizeof(uint16_t));
> This confuse me further.

*nb_clusters is the number of clusters in the file. The refcount table 
has one uint16_t entry per cluster.


I'll state more clearly in the comment above this function that 
"first_free_cluster" is a cluster index albeit the function returns an 
offset. Without further protest, I won't change this difference, though; 
it's much more convenient to work with first_free_cluster being an index 
and the returned value being an offset than both having the same unit.

Max

>> +        if (!*refcount_table) {
>> +            return -ENOMEM;
>> +        }
>> +
>> +        memset(*refcount_table + old_nb_clusters, 0,
>> +               (*nb_clusters - old_nb_clusters) * sizeof(uint16_t));
>> +    }
>> +
>> +    cluster -= contiguous_clusters;
>> +    for (i = 0; i < cluster_count; i++) {
>> +        (*refcount_table)[cluster + i] = 1;
>> +    }
>> +
>> +    return cluster << s->cluster_bits;
>> +}
>> +
>> +/*
>> + * Creates a new refcount structure based solely on the in-memory information
>> + * given through *refcount_table. All necessary allocations will be reflected
>> + * in that array.
>> + *
>> + * On success, the old refcount structure is leaked (it will be covered by the
>> + * new refcount structure).
>> + */
>> +static int rebuild_refcount_structure(BlockDriverState *bs,
>> +                                      BdrvCheckResult *res,
>> +                                      uint16_t **refcount_table,
>> +                                      int64_t *nb_clusters)
>> +{
>> +    BDRVQcowState *s = bs->opaque;
>> +    int64_t first_free_cluster = 0, rt_ofs = -1, cluster = 0;
>> +    int64_t rb_ofs, rb_start, rb_index;
>> +    uint32_t reftable_size = 0;
>> +    uint64_t *reftable = NULL;
>> +    uint16_t *on_disk_rb;
>> +    uint8_t rt_offset_and_clusters[sizeof(uint64_t) + sizeof(uint32_t)];
>> +    int i, ret = 0;
>> +
>> +    qcow2_cache_empty(bs, s->refcount_block_cache);
>> +
>> +write_refblocks:
>> +    for (; cluster < *nb_clusters; cluster++) {
>> +        if (!(*refcount_table)[cluster]) {
>> +            continue;
>> +        }
>> +
>> +        rb_index = cluster >> (s->cluster_bits - 1);
>> +        rb_start = rb_index << (s->cluster_bits - 1);
>> +
>> +        /* Don't allocate a cluster in a refblock already written to disk */
>> +        if (first_free_cluster < rb_start) {
>> +            first_free_cluster = rb_start;
>> +        }
>> +        rb_ofs = alloc_clusters_imrt(bs, 1, refcount_table, nb_clusters,
>> +                                     &first_free_cluster);
>> +        if (rb_ofs < 0) {
>> +            fprintf(stderr, "ERROR allocating refblock: %s\n", strerror(-ret));
>> +            res->check_errors++;
>> +            ret = rb_ofs;
>> +            goto fail;
>> +        }
>> +
>> +        if (reftable_size <= rb_index) {
>> +            uint32_t old_rt_size = reftable_size;
>> +            reftable_size = ROUND_UP((rb_index + 1) * sizeof(uint64_t),
>> +                                     s->cluster_size) / sizeof(uint64_t);
>> +            reftable = g_try_realloc(reftable,
>> +                                     reftable_size * sizeof(uint64_t));
>> +            if (!reftable) {
>> +                res->check_errors++;
>> +                ret = -ENOMEM;
>> +                goto fail;
>> +            }
>> +
>> +            memset(reftable + old_rt_size, 0,
>> +                   (reftable_size - old_rt_size) * sizeof(uint64_t));
>> +
>> +            /* The offset we have for the reftable is now no longer valid;
>> +             * this will leak that range, but we can easily fix that by running
>> +             * a leak-fixing check after this rebuild operation */
>> +            rt_ofs = -1;
>> +        }
>> +        reftable[rb_index] = rb_ofs;
>> +
>> +        /* If this is apparently the last refblock (for now), try to squeeze the
>> +         * reftable in */
>> +        if (rb_index == (*nb_clusters - 1) >> (s->cluster_bits - 1) &&
>> +            rt_ofs < 0)
>> +        {
>> +            rt_ofs = alloc_clusters_imrt(bs, size_to_clusters(s, reftable_size *
>> +                                                              sizeof(uint64_t)),
>> +                                         refcount_table, nb_clusters,
>> +                                         &first_free_cluster);
>> +            if (rt_ofs < 0) {
>> +                fprintf(stderr, "ERROR allocating reftable: %s\n",
>> +                        strerror(-ret));
>> +                res->check_errors++;
>> +                ret = rt_ofs;
>> +                goto fail;
>> +            }
>> +        }
>> +
>> +        ret = qcow2_pre_write_overlap_check(bs, 0, rb_ofs, s->cluster_size);
>> +        if (ret < 0) {
>> +            fprintf(stderr, "ERROR writing refblock: %s\n", strerror(-ret));
>> +            goto fail;
>> +        }
>> +
>> +        on_disk_rb = g_malloc0(s->cluster_size);
>> +        for (i = 0; i < s->cluster_size / sizeof(uint16_t) &&
>> +                    rb_start + i < *nb_clusters; i++)
>> +        {
>> +            on_disk_rb[i] = cpu_to_be16((*refcount_table)[rb_start + i]);
>> +        }
>> +
>> +        ret = bdrv_write(bs->file, rb_ofs / BDRV_SECTOR_SIZE,
>> +                         (void *)on_disk_rb, s->cluster_sectors);
>> +        g_free(on_disk_rb);
>> +        if (ret < 0) {
>> +            fprintf(stderr, "ERROR writing refblock: %s\n", strerror(-ret));
>> +            goto fail;
>> +        }
>> +
>> +        /* Go to the end of this refblock */
>> +        cluster = rb_start + s->cluster_size / sizeof(uint16_t) - 1;
>> +    }
>> +
>> +    if (rt_ofs < 0) {
>> +        int64_t post_rb_start = ROUND_UP(*nb_clusters,
>> +                                         s->cluster_size / sizeof(uint16_t));
>> +
>> +        /* Not pretty but simple */
>> +        if (first_free_cluster < post_rb_start) {
>> +            first_free_cluster = post_rb_start;
>> +        }
>> +        rt_ofs = alloc_clusters_imrt(bs, size_to_clusters(s, reftable_size *
>> +                                                          sizeof(uint64_t)),
>> +                                     refcount_table, nb_clusters,
>> +                                     &first_free_cluster);
>> +        if (rt_ofs < 0) {
>> +            fprintf(stderr, "ERROR allocating reftable: %s\n", strerror(-ret));
>> +            res->check_errors++;
>> +            ret = rt_ofs;
>> +            goto fail;
>> +        }
>> +
>> +        goto write_refblocks;
>> +    }
>> +
>> +    assert(reftable);
>> +
>> +    for (rb_index = 0; rb_index < reftable_size; rb_index++) {
>> +        cpu_to_be64s(&reftable[rb_index]);
>> +    }
>> +
>> +    ret = qcow2_pre_write_overlap_check(bs, 0, rt_ofs,
>> +                                        reftable_size * sizeof(uint64_t));
>> +    if (ret < 0) {
>> +        fprintf(stderr, "ERROR writing reftable: %s\n", strerror(-ret));
>> +        goto fail;
>> +    }
>> +
>> +    ret = bdrv_write(bs->file, rt_ofs / BDRV_SECTOR_SIZE, (void *)reftable,
>> +                     reftable_size * sizeof(uint64_t) / BDRV_SECTOR_SIZE);
>> +    if (ret < 0) {
>> +        fprintf(stderr, "ERROR writing reftable: %s\n", strerror(-ret));
>> +        goto fail;
>> +    }
>> +
>> +    /* Enter new reftable into the image header */
>> +    cpu_to_be64w((uint64_t *)&rt_offset_and_clusters[0], rt_ofs);
>> +    cpu_to_be32w((uint32_t *)&rt_offset_and_clusters[sizeof(uint64_t)],
>> +                 size_to_clusters(s, reftable_size * sizeof(uint64_t)));
>> +    ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader,
>> +                                              refcount_table_offset),
>> +                           rt_offset_and_clusters,
>> +                           sizeof(rt_offset_and_clusters));
>> +    if (ret < 0) {
>> +        fprintf(stderr, "ERROR setting reftable: %s\n", strerror(-ret));
>> +        goto fail;
>> +    }
>> +
>> +    for (rb_index = 0; rb_index < reftable_size; rb_index++) {
>> +        be64_to_cpus(&reftable[rb_index]);
>> +    }
>> +    s->refcount_table = reftable;
>> +    s->refcount_table_offset = rt_ofs;
>> +    s->refcount_table_size = reftable_size;
>> +
>> +    return 0;
>> +
>> +fail:
>> +    g_free(reftable);
>> +    return ret;
>> +}
>> +
>> +/*
>>    * Checks an image for refcount consistency.
>>    *
>>    * Returns 0 if no errors are found, the number of errors in case the image is
>> @@ -1599,6 +1835,7 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
>>                             BdrvCheckMode fix)
>>   {
>>       BDRVQcowState *s = bs->opaque;
>> +    BdrvCheckResult pre_compare_res;
>>       int64_t size, highest_cluster, nb_clusters;
>>       uint16_t *refcount_table = NULL;
>>       bool rebuild = false;
>> @@ -1625,11 +1862,30 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
>>           goto fail;
>>       }
>>   
>> -    compare_refcounts(bs, res, fix, &rebuild, &highest_cluster, refcount_table,
>> +    /* In case we don't need to rebuild the refcount structure (but want to fix
>> +     * something), this function is immediately called again, in which case the
>> +     * result should be ignored */
>> +    pre_compare_res = *res;
>> +    compare_refcounts(bs, res, 0, &rebuild, &highest_cluster, refcount_table,
>>                         nb_clusters);
>>   
>> -    if (rebuild) {
>> -        fprintf(stderr, "ERROR need to rebuild refcount structures\n");
>> +    if (rebuild && (fix & BDRV_FIX_ERRORS)) {
>> +        fprintf(stderr, "Rebuilding refcount structure\n");
>> +        ret = rebuild_refcount_structure(bs, res, &refcount_table,
>> +                                         &nb_clusters);
>> +        if (ret < 0) {
>> +            goto fail;
>> +        }
>> +    } else if (fix) {
>> +        if (rebuild) {
>> +            fprintf(stderr, "ERROR need to rebuild refcount structures\n");
>> +        }
>> +
>> +        if (res->leaks || res->corruptions) {
>> +            *res = pre_compare_res;
>> +            compare_refcounts(bs, res, fix, &rebuild, &highest_cluster,
>> +                              refcount_table, nb_clusters);
>> +        }
>>       }
>>   
>>       /* check OFLAG_COPIED */
>> -- 
>> 2.0.3
>>
>>

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] [PATCH 2/8] qcow2: Factor out refcount comparison for check
  2014-08-15 12:31     ` Max Reitz
@ 2014-08-15 13:47       ` Max Reitz
  0 siblings, 0 replies; 19+ messages in thread
From: Max Reitz @ 2014-08-15 13:47 UTC (permalink / raw)
  To: Benoît Canet; +Cc: Kevin Wolf, qemu-devel, Stefan Hajnoczi

On 15.08.2014 14:31, Max Reitz wrote:
> On 14.08.2014 14:02, Benoît Canet wrote:
>> The Wednesday 13 Aug 2014 à 23:01:44 (+0200), Max Reitz wrote :
>>> Put the code for comparing the calculated refcounts against the on-disk
>>> refcounts during qemu-img check into an own function.
>>>
>>> Signed-off-by: Max Reitz <mreitz@redhat.com>
>>> ---
>>>   block/qcow2-refcount.c | 91 
>>> ++++++++++++++++++++++++++++----------------------
>>>   1 file changed, 52 insertions(+), 39 deletions(-)
>>>
>>> diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
>>> index 9793c27..d1da8d5 100644
>>> --- a/block/qcow2-refcount.c
>>> +++ b/block/qcow2-refcount.c
>>> @@ -1629,46 +1629,22 @@ static int 
>>> calculate_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
>>>   }
>>>     /*
>>> - * Checks an image for refcount consistency.
>>> - *
>>> - * Returns 0 if no errors are found, the number of errors in case 
>>> the image is
>>> - * detected as corrupted, and -errno when an internal error occurred.
>>> + * Compares the actual reference count for each cluster in the 
>>> image against the
>>> + * refcount as reported by the refcount structures on-disk.
>>>    */
>>> -int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
>>> -                          BdrvCheckMode fix)
>>> +static void compare_refcounts(BlockDriverState *bs, BdrvCheckResult 
>>> *res,
>>> +                              BdrvCheckMode fix, int64_t 
>>> *highest_cluster,
>>> +                              uint16_t *refcount_table, int64_t 
>>> nb_clusters)
>>>   {
>>>       BDRVQcowState *s = bs->opaque;
>>> -    int64_t size, i, highest_cluster, nb_clusters;
>>> -    int refcount1, refcount2;
>>> -    uint16_t *refcount_table = NULL;
>>> -    int ret;
>>> -
>>> -    size = bdrv_getlength(bs->file);
>>> -    if (size < 0) {
>>> -        res->check_errors++;
>>> -        return size;
>>> -    }
>>> -
>>> -    nb_clusters = size_to_clusters(s, size);
>>> -    if (nb_clusters > INT_MAX) {
>>> -        res->check_errors++;
>>> -        return -EFBIG;
>>> -    }
>>> -
>>> -    res->bfi.total_clusters =
>>> -        size_to_clusters(s, bs->total_sectors * BDRV_SECTOR_SIZE);
>>> -
>>> -    ret = calculate_refcounts(bs, res, fix, &refcount_table, 
>>> &nb_clusters);
>>> -    if (ret < 0) {
>>> -        goto fail;
>>> -    }
>>> +    int64_t i;
>>> +    int refcount1, refcount2, ret;
>>>   -    /* compare ref counts */
>>> -    for (i = 0, highest_cluster = 0; i < nb_clusters; i++) {
>>> +    for (i = 0, *highest_cluster = 0; i < nb_clusters; i++) {
>>>           refcount1 = get_refcount(bs, i);
>>>           if (refcount1 < 0) {
>>>               fprintf(stderr, "Can't get refcount for cluster %" 
>>> PRId64 ": %s\n",
>>> -                i, strerror(-refcount1));
>>> +                    i, strerror(-refcount1));
>> Free coding fix.
>>
>>>               res->check_errors++;
>>>               continue;
>>>           }
>>> @@ -1676,11 +1652,10 @@ int qcow2_check_refcounts(BlockDriverState 
>>> *bs, BdrvCheckResult *res,
>>>           refcount2 = refcount_table[i];
>>>             if (refcount1 > 0 || refcount2 > 0) {
>>> -            highest_cluster = i;
>>> +            *highest_cluster = i;
>> idem.
>>>           }
>>>             if (refcount1 != refcount2) {
>>> -
>> Spurious blank line removal.
>>
>>>               /* Check if we're allowed to fix the mismatch */
>>>               int *num_fixed = NULL;
>>>               if (refcount1 > refcount2 && (fix & BDRV_FIX_LEAKS)) {
>>> @@ -1690,10 +1665,10 @@ int qcow2_check_refcounts(BlockDriverState 
>>> *bs, BdrvCheckResult *res,
>>>               }
>>>                 fprintf(stderr, "%s cluster %" PRId64 " refcount=%d 
>>> reference=%d\n",
>>> -                   num_fixed != NULL     ? "Repairing" :
>>> -                   refcount1 < refcount2 ? "ERROR" :
>>> -                                           "Leaked",
>>> -                   i, refcount1, refcount2);
>>> +                    num_fixed != NULL     ? "Repairing" :
>>> +                    refcount1 < refcount2 ? "ERROR" :
>>> +                                            "Leaked",
>>> +                    i, refcount1, refcount2);
>> Gratuitous coding style fix.
>>
>> Patch 1 have similar issues.
>> I think the patchs would be smaller and cleaner without these 
>> spurious coding style fixes.
>
> Well, I did them on purpose to avoid an additional patch. I'll rework 
> it for v2.

Okay, if I add an explicit patch for fixing the coding style issues, I 
probably need to fix all of qcow2-refcount.c. But looking at it, this 
patch would probably be rather large, so I guess I'll just keep the 
issues and don't fix them at all.

Max

> Max
>
>>>                 if (num_fixed) {
>>>                   ret = update_refcount(bs, i << s->cluster_bits, 1,
>>> @@ -1713,6 +1688,44 @@ int qcow2_check_refcounts(BlockDriverState 
>>> *bs, BdrvCheckResult *res,
>>>               }
>>>           }
>>>       }
>>> +}
>>> +
>>> +/*
>>> + * Checks an image for refcount consistency.
>>> + *
>>> + * Returns 0 if no errors are found, the number of errors in case 
>>> the image is
>>> + * detected as corrupted, and -errno when an internal error occurred.
>>> + */
>>> +int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
>>> +                          BdrvCheckMode fix)
>>> +{
>>> +    BDRVQcowState *s = bs->opaque;
>>> +    int64_t size, highest_cluster, nb_clusters;
>>> +    uint16_t *refcount_table = NULL;
>>> +    int ret;
>>> +
>>> +    size = bdrv_getlength(bs->file);
>>> +    if (size < 0) {
>>> +        res->check_errors++;
>>> +        return size;
>>> +    }
>>> +
>>> +    nb_clusters = size_to_clusters(s, size);
>>> +    if (nb_clusters > INT_MAX) {
>>> +        res->check_errors++;
>>> +        return -EFBIG;
>>> +    }
>>> +
>>> +    res->bfi.total_clusters =
>>> +        size_to_clusters(s, bs->total_sectors * BDRV_SECTOR_SIZE);
>>> +
>>> +    ret = calculate_refcounts(bs, res, fix, &refcount_table, 
>>> &nb_clusters);
>>> +    if (ret < 0) {
>>> +        goto fail;
>>> +    }
>>> +
>>> +    compare_refcounts(bs, res, fix, &highest_cluster, refcount_table,
>>> +                      nb_clusters);
>>>         /* check OFLAG_COPIED */
>>>       ret = check_oflag_copied(bs, res, fix);
>>> -- 
>>> 2.0.3
>>>
>>>
>

^ permalink raw reply	[flat|nested] 19+ messages in thread

end of thread, other threads:[~2014-08-15 13:47 UTC | newest]

Thread overview: 19+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-08-13 21:01 [Qemu-devel] [PATCH 0/8] qcow2: Fix image repairing Max Reitz
2014-08-13 21:01 ` [Qemu-devel] [PATCH 1/8] qcow2: Factor out refcount accounting for check Max Reitz
2014-08-14 11:56   ` Benoît Canet
2014-08-13 21:01 ` [Qemu-devel] [PATCH 2/8] qcow2: Factor out refcount comparison " Max Reitz
2014-08-14 12:02   ` Benoît Canet
2014-08-15 12:31     ` Max Reitz
2014-08-15 13:47       ` Max Reitz
2014-08-13 21:01 ` [Qemu-devel] [PATCH 3/8] qcow2: Fix refcount blocks beyond image end Max Reitz
2014-08-14 12:11   ` Benoît Canet
2014-08-15 12:36     ` Max Reitz
2014-08-13 21:01 ` [Qemu-devel] [PATCH 4/8] qcow2: Do not perform potentially damaging repairs Max Reitz
2014-08-14 12:33   ` Benoît Canet
2014-08-15 12:42     ` Max Reitz
2014-08-13 21:01 ` [Qemu-devel] [PATCH 5/8] qcow2: Rebuild refcount structure during check Max Reitz
2014-08-14 12:58   ` Benoît Canet
2014-08-15 12:49     ` Max Reitz
2014-08-13 21:01 ` [Qemu-devel] [PATCH 6/8] qcow2: Clean up after refcount rebuild Max Reitz
2014-08-13 21:01 ` [Qemu-devel] [PATCH 7/8] iotests: Fix test outputs Max Reitz
2014-08-13 21:01 ` [Qemu-devel] [PATCH 8/8] iotests: Add test for potentially damaging repairs Max Reitz

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.