All of lore.kernel.org
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH v7 0/3] Quorum maintainance operations
@ 2014-06-09 20:45 Benoît Canet
  2014-06-09 20:45 ` [Qemu-devel] [PATCH v7 1/3] quorum: Add the rewrite-corrupted parameter to quorum Benoît Canet
                   ` (2 more replies)
  0 siblings, 3 replies; 6+ messages in thread
From: Benoît Canet @ 2014-06-09 20:45 UTC (permalink / raw)
  To: qemu-devel; +Cc: kwolf, Benoît Canet, stefanha, mreitz

in v7:
        rebase on latest Stefan's block branch
        Revert to extending drive-mirror by adding two parameters [Stefan]

Benoît Canet (3):
  quorum: Add the rewrite-corrupted parameter to quorum.
  block: Add node-name and to-replace-node-name arguments to
    drive-mirror
  qemu-iotests: Add TestRepairQuorum to 041 to test drive-mirror
    node-name mode.

 block.c                    |  27 +++++++
 block/mirror.c             |  62 +++++++++++---
 block/quorum.c             |  97 ++++++++++++++++++++--
 blockdev.c                 |  51 +++++++++++-
 hmp.c                      |   1 +
 include/block/block.h      |   5 ++
 include/block/block_int.h  |   4 +-
 qapi/block-core.json       |  14 +++-
 qmp-commands.hx            |   5 ++
 tests/qemu-iotests/041     | 196 ++++++++++++++++++++++++++++++++++++++++++++-
 tests/qemu-iotests/041.out |   4 +-
 tests/qemu-iotests/081     |  15 +++-
 tests/qemu-iotests/081.out |  10 +++
 13 files changed, 460 insertions(+), 31 deletions(-)

-- 
1.9.1

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [Qemu-devel] [PATCH v7 1/3] quorum: Add the rewrite-corrupted parameter to quorum.
  2014-06-09 20:45 [Qemu-devel] [PATCH v7 0/3] Quorum maintainance operations Benoît Canet
@ 2014-06-09 20:45 ` Benoît Canet
  2014-06-09 20:46 ` [Qemu-devel] [PATCH v7 2/3] block: Add node-name and to-replace-node-name arguments to drive-mirror Benoît Canet
  2014-06-09 20:46 ` [Qemu-devel] [PATCH v7 3/3] qemu-iotests: Add TestRepairQuorum to 041 to test drive-mirror node-name mode Benoît Canet
  2 siblings, 0 replies; 6+ messages in thread
From: Benoît Canet @ 2014-06-09 20:45 UTC (permalink / raw)
  To: qemu-devel; +Cc: kwolf, Benoît Canet, Benoit Canet, mreitz, stefanha

On read operations when this parameter is set and some replicas are corrupted
while quorum can be reached quorum will proceed to rewrite the correct version
of the data to fix the corrupted replicas.

This will shine with SSD where the FTL will remap the same block at another
place on rewrite.

Signed-off-by: Benoit Canet <benoit@irqsave.net>
---
 block/quorum.c             | 97 +++++++++++++++++++++++++++++++++++++++++++---
 qapi/block-core.json       |  5 ++-
 tests/qemu-iotests/081     | 15 ++++++-
 tests/qemu-iotests/081.out | 10 +++++
 4 files changed, 119 insertions(+), 8 deletions(-)

diff --git a/block/quorum.c b/block/quorum.c
index 426077a..1a7a01e 100644
--- a/block/quorum.c
+++ b/block/quorum.c
@@ -22,6 +22,7 @@
 
 #define QUORUM_OPT_VOTE_THRESHOLD "vote-threshold"
 #define QUORUM_OPT_BLKVERIFY      "blkverify"
+#define QUORUM_OPT_REWRITE        "rewrite-corrupted"
 
 /* This union holds a vote hash value */
 typedef union QuorumVoteValue {
@@ -69,6 +70,9 @@ typedef struct BDRVQuorumState {
                             * It is useful to debug other block drivers by
                             * comparing them with a reference one.
                             */
+    bool rewrite_corrupted;/* true if the driver must rewrite-on-read corrupted
+                            * block if Quorum is reached.
+                            */
 } BDRVQuorumState;
 
 typedef struct QuorumAIOCB QuorumAIOCB;
@@ -104,13 +108,17 @@ struct QuorumAIOCB {
     int count;                  /* number of completed AIOCB */
     int success_count;          /* number of successfully completed AIOCB */
 
+    int rewrite_count;          /* number of replica to rewrite: count down to
+                                 * zero once writes are fired
+                                 */
+
     QuorumVotes votes;
 
     bool is_read;
     int vote_ret;
 };
 
-static void quorum_vote(QuorumAIOCB *acb);
+static bool quorum_vote(QuorumAIOCB *acb);
 
 static void quorum_aio_cancel(BlockDriverAIOCB *blockacb)
 {
@@ -182,6 +190,7 @@ static QuorumAIOCB *quorum_aio_get(BDRVQuorumState *s,
     acb->qcrs = g_new0(QuorumChildRequest, s->num_children);
     acb->count = 0;
     acb->success_count = 0;
+    acb->rewrite_count = 0;
     acb->votes.compare = quorum_sha256_compare;
     QLIST_INIT(&acb->votes.vote_list);
     acb->is_read = false;
@@ -241,11 +250,27 @@ static bool quorum_has_too_much_io_failed(QuorumAIOCB *acb)
     return false;
 }
 
+static void quorum_rewrite_aio_cb(void *opaque, int ret)
+{
+    QuorumAIOCB *acb = opaque;
+
+    /* one less rewrite to do */
+    acb->rewrite_count--;
+
+    /* wait until all rewrite callbacks have completed */
+    if (acb->rewrite_count) {
+        return;
+    }
+
+    quorum_aio_finalize(acb);
+}
+
 static void quorum_aio_cb(void *opaque, int ret)
 {
     QuorumChildRequest *sacb = opaque;
     QuorumAIOCB *acb = sacb->parent;
     BDRVQuorumState *s = acb->common.bs->opaque;
+    bool rewrite = false;
 
     sacb->ret = ret;
     acb->count++;
@@ -262,12 +287,15 @@ static void quorum_aio_cb(void *opaque, int ret)
 
     /* Do the vote on read */
     if (acb->is_read) {
-        quorum_vote(acb);
+        rewrite = quorum_vote(acb);
     } else {
         quorum_has_too_much_io_failed(acb);
     }
 
-    quorum_aio_finalize(acb);
+    /* if no rewrite is done the code will finish right away */
+    if (!rewrite) {
+        quorum_aio_finalize(acb);
+    }
 }
 
 static void quorum_report_bad_versions(BDRVQuorumState *s,
@@ -287,6 +315,43 @@ static void quorum_report_bad_versions(BDRVQuorumState *s,
     }
 }
 
+static bool quorum_rewrite_bad_versions(BDRVQuorumState *s, QuorumAIOCB *acb,
+                                        QuorumVoteValue *value)
+{
+    QuorumVoteVersion *version;
+    QuorumVoteItem *item;
+    int count = 0;
+
+    /* first count the number of bad versions: done first to avoid concurrency
+     * issues.
+     */
+    QLIST_FOREACH(version, &acb->votes.vote_list, next) {
+        if (acb->votes.compare(&version->value, value)) {
+            continue;
+        }
+        QLIST_FOREACH(item, &version->items, next) {
+            count++;
+        }
+    }
+
+    /* quorum_rewrite_aio_cb will count down this to zero */
+    acb->rewrite_count = count;
+
+    /* now fire the correcting rewrites */
+    QLIST_FOREACH(version, &acb->votes.vote_list, next) {
+        if (acb->votes.compare(&version->value, value)) {
+            continue;
+        }
+        QLIST_FOREACH(item, &version->items, next) {
+            bdrv_aio_writev(s->bs[item->index], acb->sector_num, acb->qiov,
+                            acb->nb_sectors, quorum_rewrite_aio_cb, acb);
+        }
+    }
+
+    /* return true if any rewrite is done else false */
+    return count;
+}
+
 static void quorum_copy_qiov(QEMUIOVector *dest, QEMUIOVector *source)
 {
     int i;
@@ -477,16 +542,17 @@ static int quorum_vote_error(QuorumAIOCB *acb)
     return ret;
 }
 
-static void quorum_vote(QuorumAIOCB *acb)
+static bool quorum_vote(QuorumAIOCB *acb)
 {
     bool quorum = true;
+    bool rewrite = false;
     int i, j, ret;
     QuorumVoteValue hash;
     BDRVQuorumState *s = acb->common.bs->opaque;
     QuorumVoteVersion *winner;
 
     if (quorum_has_too_much_io_failed(acb)) {
-        return;
+        return false;
     }
 
     /* get the index of the first successful read */
@@ -514,7 +580,7 @@ static void quorum_vote(QuorumAIOCB *acb)
     /* Every successful read agrees */
     if (quorum) {
         quorum_copy_qiov(acb->qiov, &acb->qcrs[i].qiov);
-        return;
+        return false;
     }
 
     /* compute hashes for each successful read, also store indexes */
@@ -547,9 +613,15 @@ static void quorum_vote(QuorumAIOCB *acb)
     /* some versions are bad print them */
     quorum_report_bad_versions(s, acb, &winner->value);
 
+    /* corruption correction is enabled */
+    if (s->rewrite_corrupted) {
+        rewrite = quorum_rewrite_bad_versions(s, acb, &winner->value);
+    }
+
 free_exit:
     /* free lists */
     quorum_free_vote_list(&acb->votes);
+    return rewrite;
 }
 
 static BlockDriverAIOCB *quorum_aio_readv(BlockDriverState *bs,
@@ -714,6 +786,11 @@ static QemuOptsList quorum_runtime_opts = {
             .type = QEMU_OPT_BOOL,
             .help = "Trigger block verify mode if set",
         },
+        {
+            .name = QUORUM_OPT_REWRITE,
+            .type = QEMU_OPT_BOOL,
+            .help = "Rewrite corrupted block on read quorum",
+        },
         { /* end of list */ }
     },
 };
@@ -775,6 +852,14 @@ static int quorum_open(BlockDriverState *bs, QDict *options, int flags,
                 "and using two files with vote_threshold=2\n");
     }
 
+    s->rewrite_corrupted = qemu_opt_get_bool(opts, QUORUM_OPT_REWRITE, false);
+    if (s->rewrite_corrupted && s->is_blkverify) {
+        error_setg(&local_err,
+                   "rewrite-corrupted=on cannot be used with blkverify=on");
+        ret = -EINVAL;
+        goto exit;
+    }
+
     /* allocate the children BlockDriverState array */
     s->bs = g_new0(BlockDriverState *, s->num_children);
     opened = g_new0(bool, s->num_children);
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 33bd93f..eec881b 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -1333,12 +1333,15 @@
 #
 # @vote-threshold: the vote limit under which a read will fail
 #
+# @rewrite-corrupted: #optional rewrite corrupted data when quorum is reached
+#                     (Since 2.1)
+#
 # Since: 2.0
 ##
 { 'type': 'BlockdevOptionsQuorum',
   'data': { '*blkverify': 'bool',
             'children': [ 'BlockdevRef' ],
-            'vote-threshold': 'int' } }
+            'vote-threshold': 'int', '*rewrite-corrupted': 'bool' } }
 
 ##
 # @BlockdevOptions
diff --git a/tests/qemu-iotests/081 b/tests/qemu-iotests/081
index b512d00..7ae4be2 100755
--- a/tests/qemu-iotests/081
+++ b/tests/qemu-iotests/081
@@ -134,15 +134,28 @@ run_qemu -drive "file=$TEST_DIR/2.raw,format=$IMGFMT,if=none,id=drive2" <<EOF
 EOF
 
 echo
+echo "== using quorum rewrite corrupted mode =="
+
+quorum="$quorum,file.rewrite-corrupted=on"
+
+$QEMU_IO -c "open -o $quorum" -c "read -P 0x32 0 $size" | _filter_qemu_io
+
+echo
+echo "== checking that quorum has corrected the corrupted file =="
+
+$QEMU_IO -c "read -P 0x32 0 $size" "$TEST_DIR/2.raw" | _filter_qemu_io
+
+echo
 echo "== breaking quorum =="
 
 $QEMU_IO -c "write -P 0x41 0 $size" "$TEST_DIR/1.raw" | _filter_qemu_io
+$QEMU_IO -c "write -P 0x42 0 $size" "$TEST_DIR/2.raw" | _filter_qemu_io
+
 echo
 echo "== checking that quorum is broken =="
 
 $QEMU_IO -c "open -o $quorum" -c "read -P 0x32 0 $size" | _filter_qemu_io
 
-
 # success, all done
 echo "*** done"
 rm -f $seq.full
diff --git a/tests/qemu-iotests/081.out b/tests/qemu-iotests/081.out
index 84aeb0c..1629d2a 100644
--- a/tests/qemu-iotests/081.out
+++ b/tests/qemu-iotests/081.out
@@ -40,9 +40,19 @@ read 10485760/10485760 bytes at offset 0
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "DEVICE_TRAY_MOVED", "data": {"device": "floppy0", "tray-open": true}}
 
 
+== using quorum rewrite corrupted mode ==
+read 10485760/10485760 bytes at offset 0
+10 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+== checking that quorum has corrected the corrupted file ==
+read 10485760/10485760 bytes at offset 0
+10 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
 == breaking quorum ==
 wrote 10485760/10485760 bytes at offset 0
 10 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 10485760/10485760 bytes at offset 0
+10 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 
 == checking that quorum is broken ==
 qemu-io: can't open device (null): Could not read image for determining its format: Input/output error
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [Qemu-devel] [PATCH v7 2/3] block: Add node-name and to-replace-node-name arguments to drive-mirror
  2014-06-09 20:45 [Qemu-devel] [PATCH v7 0/3] Quorum maintainance operations Benoît Canet
  2014-06-09 20:45 ` [Qemu-devel] [PATCH v7 1/3] quorum: Add the rewrite-corrupted parameter to quorum Benoît Canet
@ 2014-06-09 20:46 ` Benoît Canet
  2014-06-09 21:08   ` Eric Blake
  2014-06-09 20:46 ` [Qemu-devel] [PATCH v7 3/3] qemu-iotests: Add TestRepairQuorum to 041 to test drive-mirror node-name mode Benoît Canet
  2 siblings, 1 reply; 6+ messages in thread
From: Benoît Canet @ 2014-06-09 20:46 UTC (permalink / raw)
  To: qemu-devel; +Cc: kwolf, Benoît Canet, Benoit Canet, mreitz, stefanha

node-name gives a name to the created BDS and registers it in the node graph.

to-replace-node-name can be used when drive-mirror is called with sync=full.

The purpose of these fields is to be able to reconstruct and replace a broken
quorum file.

drive-mirror will bdrv_swap the new BDS named node-name with the one
pointed by to-replace-node-name when the mirroring is finished.

Signed-off-by: Benoit Canet <benoit@irqsave.net>
---
 block.c                   | 27 +++++++++++++++++++++
 block/mirror.c            | 62 +++++++++++++++++++++++++++++++++++++----------
 blockdev.c                | 51 +++++++++++++++++++++++++++++++++++---
 hmp.c                     |  1 +
 include/block/block.h     |  5 ++++
 include/block/block_int.h |  4 ++-
 qapi/block-core.json      |  9 +++++++
 qmp-commands.hx           |  5 ++++
 8 files changed, 147 insertions(+), 17 deletions(-)

diff --git a/block.c b/block.c
index 17f763d..5c03c2b 100644
--- a/block.c
+++ b/block.c
@@ -5795,3 +5795,30 @@ bool bdrv_is_first_non_filter(BlockDriverState *candidate)
 
     return false;
 }
+
+BlockDriverState *check_to_replace_node(const char *to_replace_node_name,
+                                        Error **errp)
+{
+    BlockDriverState *to_replace_bs = bdrv_find_node(to_replace_node_name);
+    if (!to_replace_bs) {
+        error_setg(errp, "to-replace-node-name=%s not found",
+                   to_replace_node_name);
+        return NULL;
+    }
+
+    /* the code should only be able to replace the top first non filter
+     * node of the graph. For example the first BDS under a quorum.
+     */
+    if (!bdrv_is_first_non_filter(to_replace_bs)) {
+        error_set(errp, QERR_FEATURE_DISABLED,
+                  "drive-mirror and replace node-name");
+        return NULL;
+    }
+
+    if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) {
+        return NULL;
+    }
+
+    return to_replace_bs;
+}
+
diff --git a/block/mirror.c b/block/mirror.c
index 94c8661..68526ae 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -32,6 +32,12 @@ typedef struct MirrorBlockJob {
     RateLimit limit;
     BlockDriverState *target;
     BlockDriverState *base;
+    /* The name of the graph node to replace */
+    char *to_replace_node_name;
+    /* The block BDS to replace */
+    BlockDriverState *to_replace;
+    /* Used to block operations on the drive-mirror-replace target */
+    Error *replace_blocker;
     bool is_none_mode;
     BlockdevOnError on_source_error, on_target_error;
     bool synced;
@@ -490,10 +496,14 @@ immediate_exit:
     bdrv_release_dirty_bitmap(bs, s->dirty_bitmap);
     bdrv_iostatus_disable(s->target);
     if (s->should_complete && ret == 0) {
-        if (bdrv_get_flags(s->target) != bdrv_get_flags(s->common.bs)) {
-            bdrv_reopen(s->target, bdrv_get_flags(s->common.bs), NULL);
+        BlockDriverState *to_replace = s->common.bs;
+        if (s->to_replace) {
+            to_replace = s->to_replace;
         }
-        bdrv_swap(s->target, s->common.bs);
+        if (bdrv_get_flags(s->target) != bdrv_get_flags(to_replace)) {
+            bdrv_reopen(s->target, bdrv_get_flags(to_replace), NULL);
+        }
+        bdrv_swap(s->target, to_replace);
         if (s->common.driver->job_type == BLOCK_JOB_TYPE_COMMIT) {
             /* drop the bs loop chain formed by the swap: break the loop then
              * trigger the unref from the top one */
@@ -502,6 +512,11 @@ immediate_exit:
             bdrv_unref(p);
         }
     }
+    if (s->to_replace) {
+        bdrv_op_unblock_all(s->to_replace, s->replace_blocker);
+        error_free(s->replace_blocker);
+        bdrv_unref(s->to_replace);
+    }
     bdrv_unref(s->target);
     block_job_completed(&s->common, ret);
 }
@@ -540,6 +555,23 @@ static void mirror_complete(BlockJob *job, Error **errp)
         return;
     }
 
+    /* check the target bs is not block and block all operations on it */
+    if (s->to_replace_node_name) {
+        s->to_replace = check_to_replace_node(s->to_replace_node_name, errp);
+
+        if (!s->to_replace) {
+            return;
+        }
+
+        error_setg(&s->replace_blocker,
+                   "block device is in use by block-job-complete");
+        bdrv_op_block_all(s->to_replace, s->replace_blocker);
+        bdrv_ref(s->to_replace);
+
+        g_free(s->to_replace_node_name);
+        s->to_replace_node_name = NULL;
+    }
+
     s->should_complete = true;
     block_job_resume(job);
 }
@@ -562,14 +594,15 @@ static const BlockJobDriver commit_active_job_driver = {
 };
 
 static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target,
-                            int64_t speed, int64_t granularity,
-                            int64_t buf_size,
-                            BlockdevOnError on_source_error,
-                            BlockdevOnError on_target_error,
-                            BlockDriverCompletionFunc *cb,
-                            void *opaque, Error **errp,
-                            const BlockJobDriver *driver,
-                            bool is_none_mode, BlockDriverState *base)
+                             const char *to_replace_node_name,
+                             int64_t speed, int64_t granularity,
+                             int64_t buf_size,
+                             BlockdevOnError on_source_error,
+                             BlockdevOnError on_target_error,
+                             BlockDriverCompletionFunc *cb,
+                             void *opaque, Error **errp,
+                             const BlockJobDriver *driver,
+                             bool is_none_mode, BlockDriverState *base)
 {
     MirrorBlockJob *s;
 
@@ -600,6 +633,7 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target,
         return;
     }
 
+    s->to_replace_node_name = g_strdup(to_replace_node_name);
     s->on_source_error = on_source_error;
     s->on_target_error = on_target_error;
     s->target = target;
@@ -621,6 +655,7 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target,
 }
 
 void mirror_start(BlockDriverState *bs, BlockDriverState *target,
+                  const char *to_replace_node_name,
                   int64_t speed, int64_t granularity, int64_t buf_size,
                   MirrorSyncMode mode, BlockdevOnError on_source_error,
                   BlockdevOnError on_target_error,
@@ -632,7 +667,8 @@ void mirror_start(BlockDriverState *bs, BlockDriverState *target,
 
     is_none_mode = mode == MIRROR_SYNC_MODE_NONE;
     base = mode == MIRROR_SYNC_MODE_TOP ? bs->backing_hd : NULL;
-    mirror_start_job(bs, target, speed, granularity, buf_size,
+    mirror_start_job(bs, target, to_replace_node_name,
+                     speed, granularity, buf_size,
                      on_source_error, on_target_error, cb, opaque, errp,
                      &mirror_job_driver, is_none_mode, base);
 }
@@ -680,7 +716,7 @@ void commit_active_start(BlockDriverState *bs, BlockDriverState *base,
     }
 
     bdrv_ref(base);
-    mirror_start_job(bs, base, speed, 0, 0,
+    mirror_start_job(bs, base, NULL, speed, 0, 0,
                      on_error, on_error, cb, opaque, &local_err,
                      &commit_active_job_driver, false, base);
     if (local_err) {
diff --git a/blockdev.c b/blockdev.c
index 4cbcc56..bccc2f6 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -2106,6 +2106,9 @@ BlockDeviceInfoList *qmp_query_named_block_nodes(Error **errp)
 
 void qmp_drive_mirror(const char *device, const char *target,
                       bool has_format, const char *format,
+                      bool has_new_node_name, const char *new_node_name,
+                      bool has_to_replace_node_name,
+                      const char *to_replace_node_name,
                       enum MirrorSyncMode sync,
                       bool has_mode, enum NewImageMode mode,
                       bool has_speed, int64_t speed,
@@ -2119,6 +2122,7 @@ void qmp_drive_mirror(const char *device, const char *target,
     BlockDriverState *source, *target_bs;
     BlockDriver *drv = NULL;
     Error *local_err = NULL;
+    QDict *options = NULL;
     int flags;
     int64_t size;
     int ret;
@@ -2192,6 +2196,28 @@ void qmp_drive_mirror(const char *device, const char *target,
         return;
     }
 
+    if (has_to_replace_node_name) {
+        BlockDriverState *to_replace_bs;
+
+        if (!has_new_node_name) {
+            error_setg(errp, "a new-node-name must be provided when replacing a"
+                             " named node of the graph");
+            return;
+        }
+
+        to_replace_bs = check_to_replace_node(to_replace_node_name, errp);
+
+        if (!to_replace_bs) {
+            return;
+        }
+
+        if (size < bdrv_getlength(to_replace_bs)) {
+            error_setg(errp, "cannot replace to-replace-node-name image with a "
+                             "mirror image that would be smaller in size");
+            return;
+        }
+    }
+
     if ((sync == MIRROR_SYNC_MODE_FULL || !source)
         && mode != NEW_IMAGE_MODE_EXISTING)
     {
@@ -2220,18 +2246,37 @@ void qmp_drive_mirror(const char *device, const char *target,
         return;
     }
 
+    /* if we are planning to replace a graph node name the code should do a full
+     * mirror of the source image
+     */
+    if (has_to_replace_node_name && sync != MIRROR_SYNC_MODE_FULL) {
+        error_setg(errp,
+                   "to-replace-node-name can only be used with sync=full");
+        return;
+    }
+
+    if (has_new_node_name) {
+        options = qdict_new();
+        qdict_put(options, "node-name", qstring_from_str(new_node_name));
+    }
+
     /* Mirroring takes care of copy-on-write using the source's backing
      * file.
      */
     target_bs = NULL;
-    ret = bdrv_open(&target_bs, target, NULL, NULL, flags | BDRV_O_NO_BACKING,
-                    drv, &local_err);
+    ret = bdrv_open(&target_bs, target, NULL, options,
+                    flags | BDRV_O_NO_BACKING, drv, &local_err);
     if (ret < 0) {
         error_propagate(errp, local_err);
         return;
     }
 
-    mirror_start(bs, target_bs, speed, granularity, buf_size, sync,
+    /* pass the node name to replace to mirror start since it's loose coupling
+     * and will allow to check whether the node still exist at mirror completion
+     */
+    mirror_start(bs, target_bs,
+                 has_to_replace_node_name ? to_replace_node_name : NULL,
+                 speed, granularity, buf_size, sync,
                  on_source_error, on_target_error,
                  block_job_cb, bs, &local_err);
     if (local_err != NULL) {
diff --git a/hmp.c b/hmp.c
index ccc35d4..9a4b8da 100644
--- a/hmp.c
+++ b/hmp.c
@@ -930,6 +930,7 @@ void hmp_drive_mirror(Monitor *mon, const QDict *qdict)
     }
 
     qmp_drive_mirror(device, filename, !!format, format,
+                     false, NULL, false, NULL,
                      full ? MIRROR_SYNC_MODE_FULL : MIRROR_SYNC_MODE_TOP,
                      true, mode, false, 0, false, 0, false, 0,
                      false, 0, false, 0, &err);
diff --git a/include/block/block.h b/include/block/block.h
index 7d86e29..96afd01 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -179,6 +179,7 @@ typedef enum BlockOpType {
     BLOCK_OP_TYPE_MIRROR,
     BLOCK_OP_TYPE_RESIZE,
     BLOCK_OP_TYPE_STREAM,
+    BLOCK_OP_TYPE_REPLACE,
     BLOCK_OP_TYPE_MAX,
 } BlockOpType;
 
@@ -319,6 +320,10 @@ bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs,
                                       BlockDriverState *candidate);
 bool bdrv_is_first_non_filter(BlockDriverState *candidate);
 
+/* check if a named node can be replaced when doing drive-mirror */
+BlockDriverState *check_to_replace_node(const char *to_replace_node_name,
+                                        Error **errp);
+
 /* async block I/O */
 typedef void BlockDriverDirtyHandler(BlockDriverState *bs, int64_t sector,
                                      int sector_num);
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 8d58334..ce65ed8 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -86,7 +86,6 @@ struct BlockDriver {
      */
     bool (*bdrv_recurse_is_first_non_filter)(BlockDriverState *bs,
                                              BlockDriverState *candidate);
-
     int (*bdrv_probe)(const uint8_t *buf, int buf_size, const char *filename);
     int (*bdrv_probe_device)(const char *filename);
 
@@ -492,6 +491,8 @@ void commit_active_start(BlockDriverState *bs, BlockDriverState *base,
  * mirror_start:
  * @bs: Block device to operate on.
  * @target: Block device to write to.
+ * @to_replace_node_name: Block graph node name to replace once the mirror is
+ *              done. Can only be used when full mirroring is selected.
  * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
  * @granularity: The chosen granularity for the dirty bitmap.
  * @buf_size: The amount of data that can be in flight at one time.
@@ -508,6 +509,7 @@ void commit_active_start(BlockDriverState *bs, BlockDriverState *base,
  * @bs will be switched to read from @target.
  */
 void mirror_start(BlockDriverState *bs, BlockDriverState *target,
+                  const char *to_replace_node_name,
                   int64_t speed, int64_t granularity, int64_t buf_size,
                   MirrorSyncMode mode, BlockdevOnError on_source_error,
                   BlockdevOnError on_target_error,
diff --git a/qapi/block-core.json b/qapi/block-core.json
index eec881b..23dcecd 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -769,6 +769,14 @@
 # @format: #optional the format of the new destination, default is to
 #          probe if @mode is 'existing', else the format of the source
 #
+# @new-node-name: #optional the new block driver state node name in the graph
+#                 (Since 2.1)
+#
+# @to-replace-node-name: #optional with sync=full graph node name to be
+#                        replaced by the new image when a whole image copy is
+#                        done. This can be used to repair broken Quorum files.
+#                        (Since 2.1)
+#
 # @mode: #optional whether and how QEMU should create a new image, default is
 #        'absolute-paths'.
 #
@@ -801,6 +809,7 @@
 ##
 { 'command': 'drive-mirror',
   'data': { 'device': 'str', 'target': 'str', '*format': 'str',
+            '*new-node-name': 'str', '*to-replace-node-name': 'str',
             'sync': 'MirrorSyncMode', '*mode': 'NewImageMode',
             '*speed': 'int', '*granularity': 'uint32',
             '*buf-size': 'int', '*on-source-error': 'BlockdevOnError',
diff --git a/qmp-commands.hx b/qmp-commands.hx
index d8aa4ed..bae74f5 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -1293,6 +1293,7 @@ EQMP
     {
         .name       = "drive-mirror",
         .args_type  = "sync:s,device:B,target:s,speed:i?,mode:s?,format:s?,"
+                      "new-node-name:s?,to-replace-node-name:s?,"
                       "on-source-error:s?,on-target-error:s?,"
                       "granularity:i?,buf-size:i?",
         .mhandler.cmd_new = qmp_marshal_input_drive_mirror,
@@ -1314,6 +1315,10 @@ Arguments:
 - "device": device name to operate on (json-string)
 - "target": name of new image file (json-string)
 - "format": format of new image (json-string, optional)
+- "new-node-name": the name of the new block driver state in the node graph
+                   (json-string, optional)
+- "to-replace-node-name": the block driver node name to replace when finished
+                          (json-string, optional)
 - "mode": how an image file should be created into the target
   file/device (NewImageMode, optional, default 'absolute-paths')
 - "speed": maximum speed of the streaming job, in bytes per second
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [Qemu-devel] [PATCH v7 3/3] qemu-iotests: Add TestRepairQuorum to 041 to test drive-mirror node-name mode.
  2014-06-09 20:45 [Qemu-devel] [PATCH v7 0/3] Quorum maintainance operations Benoît Canet
  2014-06-09 20:45 ` [Qemu-devel] [PATCH v7 1/3] quorum: Add the rewrite-corrupted parameter to quorum Benoît Canet
  2014-06-09 20:46 ` [Qemu-devel] [PATCH v7 2/3] block: Add node-name and to-replace-node-name arguments to drive-mirror Benoît Canet
@ 2014-06-09 20:46 ` Benoît Canet
  2 siblings, 0 replies; 6+ messages in thread
From: Benoît Canet @ 2014-06-09 20:46 UTC (permalink / raw)
  To: qemu-devel; +Cc: kwolf, Benoît Canet, Benoit Canet, mreitz, stefanha

The to-replace-node-name is designed to allow repairing of broken Quorum file.
This patch introduce a new class TestRepairQuorum testing that the feature
works.
Some further work will be done on QEMU to improve the robutness of the tests.

Signed-off-by: Benoit Canet <benoit@irqsave.net>
---
 tests/qemu-iotests/041     | 196 ++++++++++++++++++++++++++++++++++++++++++++-
 tests/qemu-iotests/041.out |   4 +-
 2 files changed, 194 insertions(+), 6 deletions(-)

diff --git a/tests/qemu-iotests/041 b/tests/qemu-iotests/041
index ec470b2..866df87 100755
--- a/tests/qemu-iotests/041
+++ b/tests/qemu-iotests/041
@@ -28,6 +28,12 @@ target_backing_img = os.path.join(iotests.test_dir, 'target-backing.img')
 test_img = os.path.join(iotests.test_dir, 'test.img')
 target_img = os.path.join(iotests.test_dir, 'target.img')
 
+quorum_img1 = os.path.join(iotests.test_dir, 'quorum1.img')
+quorum_img2 = os.path.join(iotests.test_dir, 'quorum2.img')
+quorum_img3 = os.path.join(iotests.test_dir, 'quorum3.img')
+quorum_repair_img = os.path.join(iotests.test_dir, 'quorum_repair.img')
+quorum_snapshot_file = os.path.join(iotests.test_dir, 'quorum_snapshot.img')
+
 class ImageMirroringTestCase(iotests.QMPTestCase):
     '''Abstract base class for image mirroring test cases'''
 
@@ -42,8 +48,8 @@ class ImageMirroringTestCase(iotests.QMPTestCase):
                     ready = True
 
     def wait_ready_and_cancel(self, drive='drive0'):
-        self.wait_ready(drive)
-        event = self.cancel_and_wait()
+        self.wait_ready(drive=drive)
+        event = self.cancel_and_wait(drive=drive)
         self.assertEquals(event['event'], 'BLOCK_JOB_COMPLETED')
         self.assert_qmp(event, 'data/type', 'mirror')
         self.assert_qmp(event, 'data/offset', self.image_len)
@@ -52,12 +58,12 @@ class ImageMirroringTestCase(iotests.QMPTestCase):
     def complete_and_wait(self, drive='drive0', wait_ready=True):
         '''Complete a block job and wait for it to finish'''
         if wait_ready:
-            self.wait_ready()
+            self.wait_ready(drive=drive)
 
         result = self.vm.qmp('block-job-complete', device=drive)
         self.assert_qmp(result, 'return', {})
 
-        event = self.wait_until_completed()
+        event = self.wait_until_completed(drive=drive)
         self.assert_qmp(event, 'data/type', 'mirror')
 
 class TestSingleDrive(ImageMirroringTestCase):
@@ -718,5 +724,187 @@ class TestUnbackedSource(ImageMirroringTestCase):
         self.complete_and_wait()
         self.assert_no_active_block_jobs()
 
+class TestRepairQuorum(ImageMirroringTestCase):
+    """ This class test quorum file repair using drive-mirror.
+        It's mostly a fork of TestSingleDrive """
+    image_len = 1 * 1024 * 1024 # MB
+    IMAGES = [ quorum_img1, quorum_img2, quorum_img3 ]
+
+    def setUp(self):
+        self.vm = iotests.VM()
+
+        # Add each individual quorum images
+        for i in self.IMAGES:
+            qemu_img('create', '-f', iotests.imgfmt, i,
+                     str(TestSingleDrive.image_len))
+            # Assign a node name to each quorum image in order to manipulate
+            # them
+            opts = "node-name=img%i" % self.IMAGES.index(i)
+            self.vm = self.vm.add_drive(i, opts)
+
+        self.vm.launch()
+
+        #assemble the quorum block device from the individual files
+        args = { "options" : { "driver": "quorum", "id": "quorum0",
+                 "vote-threshold": 2, "children": [ "img0", "img1", "img2" ] } }
+        result = self.vm.qmp("blockdev-add", **args)
+        self.assert_qmp(result, 'return', {})
+
+
+    def tearDown(self):
+        self.vm.shutdown()
+        for i in self.IMAGES + [ quorum_repair_img ]:
+            # Do a try/except because the test may have deleted some images
+            try:
+                os.remove(i)
+            except OSError:
+                pass
+
+    def test_complete(self):
+        self.assert_no_active_block_jobs()
+
+        result = self.vm.qmp('drive-mirror', device='quorum0', sync='full',
+                             new_node_name="repair0",
+                             to_replace_node_name="img1",
+                             target=quorum_repair_img, format=iotests.imgfmt)
+        self.assert_qmp(result, 'return', {})
+
+        self.complete_and_wait(drive="quorum0")
+        result = self.vm.qmp('query-named-block-nodes')
+        self.assert_qmp(result, 'return[0]/file', quorum_repair_img)
+        # TODO: a better test requiring some QEMU infrastructure will be added
+        #       to check that this file is really driven by quorum
+        self.vm.shutdown()
+        self.assertTrue(iotests.compare_images(quorum_img2, quorum_repair_img),
+                        'target image does not match source after mirroring')
+
+    def test_cancel(self):
+        self.assert_no_active_block_jobs()
+
+        result = self.vm.qmp('drive-mirror', device='quorum0', sync='full',
+                             new_node_name="repair0",
+                             to_replace_node_name="img1",
+                             target=quorum_repair_img, format=iotests.imgfmt)
+        self.assert_qmp(result, 'return', {})
+
+        self.cancel_and_wait(drive="quorum0", force=True)
+        # here we check that the last registered quorum file has not been
+        # swapped out and unref
+        result = self.vm.qmp('query-named-block-nodes')
+        self.assert_qmp(result, 'return[0]/file', quorum_img3)
+        self.vm.shutdown()
+
+    def test_cancel_after_ready(self):
+        self.assert_no_active_block_jobs()
+
+        result = self.vm.qmp('drive-mirror', device='quorum0', sync='full',
+                             new_node_name="repair0",
+                             to_replace_node_name="img1",
+                             target=quorum_repair_img, format=iotests.imgfmt)
+        self.assert_qmp(result, 'return', {})
+
+        self.wait_ready_and_cancel(drive="quorum0")
+        result = self.vm.qmp('query-named-block-nodes')
+        # here we check that the last registered quorum file has not been
+        # swapped out and unref
+        self.assert_qmp(result, 'return[0]/file', quorum_img3)
+        self.vm.shutdown()
+        self.assertTrue(iotests.compare_images(quorum_img2, quorum_repair_img),
+                        'target image does not match source after mirroring')
+
+    def test_pause(self):
+        self.assert_no_active_block_jobs()
+
+        result = self.vm.qmp('drive-mirror', device='quorum0', sync='full',
+                             new_node_name="repair0",
+                             to_replace_node_name="img1",
+                             target=quorum_repair_img, format=iotests.imgfmt)
+        self.assert_qmp(result, 'return', {})
+
+        result = self.vm.qmp('block-job-pause', device='quorum0')
+        self.assert_qmp(result, 'return', {})
+
+        time.sleep(1)
+        result = self.vm.qmp('query-block-jobs')
+        offset = self.dictpath(result, 'return[0]/offset')
+
+        time.sleep(1)
+        result = self.vm.qmp('query-block-jobs')
+        self.assert_qmp(result, 'return[0]/offset', offset)
+
+        result = self.vm.qmp('block-job-resume', device='quorum0')
+        self.assert_qmp(result, 'return', {})
+
+        self.complete_and_wait(drive="quorum0")
+        self.vm.shutdown()
+        self.assertTrue(iotests.compare_images(quorum_img2, quorum_repair_img),
+                        'target image does not match source after mirroring')
+
+    def test_medium_not_found(self):
+        result = self.vm.qmp('drive-mirror', device='ide1-cd0', sync='full',
+                             new_node_name='repair0',
+                             to_replace_node_name='img1',
+                             target=quorum_repair_img, format=iotests.imgfmt)
+        self.assert_qmp(result, 'error/class', 'GenericError')
+
+    def test_image_not_found(self):
+        result = self.vm.qmp('drive-mirror', device='quorum0', sync='full',
+                             new_node_name='repair0',
+                             to_replace_node_name='img1',
+                             mode='existing',
+                             target=quorum_repair_img, format=iotests.imgfmt)
+        self.assert_qmp(result, 'error/class', 'GenericError')
+
+    def test_device_not_found(self):
+        result = self.vm.qmp('drive-mirror', device='nonexistent', sync='full',
+                             new_node_name='repair0',
+                             to_replace_node_name='img1',
+                             target=quorum_repair_img, format=iotests.imgfmt)
+        self.assert_qmp(result, 'error/class', 'DeviceNotFound')
+
+    def test_wrong_sync_mode(self):
+        result = self.vm.qmp('drive-mirror', device='quorum0',
+                             new_node_name='repair0',
+                             to_replace_node_name='img1',
+                             target=quorum_repair_img, format=iotests.imgfmt)
+        self.assert_qmp(result, 'error/class', 'GenericError')
+
+    def test_no_new_node_name(self):
+        result = self.vm.qmp('drive-mirror', device='quorum0', sync='full',
+                             to_replace_node_name='img1',
+                             target=quorum_repair_img, format=iotests.imgfmt)
+        self.assert_qmp(result, 'error/class', 'GenericError')
+
+    def test_unexistant_to_replace_node_name(self):
+        result = self.vm.qmp('drive-mirror', device='quorum0', sync='full',
+                             new_node_name='repair0',
+                             to_replace_node_name='img77',
+                             target=quorum_repair_img, format=iotests.imgfmt)
+        self.assert_qmp(result, 'error/class', 'GenericError')
+
+    def test_after_a_quorum_snapshot(self):
+        result = self.vm.qmp('blockdev-snapshot-sync', node_name='img1',
+                             snapshot_file=quorum_snapshot_file,
+                             snapshot_node_name="snap1");
+
+        result = self.vm.qmp('drive-mirror', device='quorum0', sync='full',
+                             new_node_name='repair0',
+                             to_replace_node_name="img1",
+                             target=quorum_repair_img, format=iotests.imgfmt)
+        self.assert_qmp(result, 'error/class', 'GenericError')
+
+        result = self.vm.qmp('drive-mirror', device='quorum0', sync='full',
+                             new_node_name='repair0',
+                             to_replace_node_name="snap1",
+                             target=quorum_repair_img, format=iotests.imgfmt)
+        self.assert_qmp(result, 'return', {})
+
+        self.complete_and_wait(drive="quorum0")
+        result = self.vm.qmp('query-named-block-nodes')
+        self.assert_qmp(result, 'return[0]/file', quorum_repair_img)
+        # TODO: a better test requiring some QEMU infrastructure will be added
+        #       to check that this file is really driven by quorum
+        self.vm.shutdown()
+
 if __name__ == '__main__':
     iotests.main(supported_fmts=['qcow2', 'qed'])
diff --git a/tests/qemu-iotests/041.out b/tests/qemu-iotests/041.out
index 6d9bee1..73e375a 100644
--- a/tests/qemu-iotests/041.out
+++ b/tests/qemu-iotests/041.out
@@ -1,5 +1,5 @@
-...........................
+......................................
 ----------------------------------------------------------------------
-Ran 27 tests
+Ran 38 tests
 
 OK
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [Qemu-devel] [PATCH v7 2/3] block: Add node-name and to-replace-node-name arguments to drive-mirror
  2014-06-09 20:46 ` [Qemu-devel] [PATCH v7 2/3] block: Add node-name and to-replace-node-name arguments to drive-mirror Benoît Canet
@ 2014-06-09 21:08   ` Eric Blake
  2014-06-10  6:12     ` Benoît Canet
  0 siblings, 1 reply; 6+ messages in thread
From: Eric Blake @ 2014-06-09 21:08 UTC (permalink / raw)
  To: Benoît Canet, qemu-devel; +Cc: kwolf, Benoit Canet, stefanha, mreitz

[-- Attachment #1: Type: text/plain, Size: 3980 bytes --]

On 06/09/2014 02:46 PM, Benoît Canet wrote:
> node-name gives a name to the created BDS and registers it in the node graph.
> 
> to-replace-node-name can be used when drive-mirror is called with sync=full.

Why can't it work with other modes?  That is, if I have:

base1 <- snap1 \
base2 <- snap2  > quorum
base3 <- snap3 /

and want to replace the 'base3 <- snap3' arm of the quorum with 'base4
<- snap4', where base3 and base4 are identical, the fact that you are
forcing sync=full will not let me do so.  There's a lot of things where
if management does something stupid, then the guest will see data
instantly corrupted; but that doesn't mean that we necessarily have to
cripple the power of the command.

> 
> The purpose of these fields is to be able to reconstruct and replace a broken
> quorum file.
> 
> drive-mirror will bdrv_swap the new BDS named node-name with the one
> pointed by to-replace-node-name when the mirroring is finished.
> 
> Signed-off-by: Benoit Canet <benoit@irqsave.net>
> ---

> +
> +        if (size < bdrv_getlength(to_replace_bs)) {
> +            error_setg(errp, "cannot replace to-replace-node-name image with a "
> +                             "mirror image that would be smaller in size");

Should this be enforcing equality in size, rather than just prohibiting
shrinking?  Doesn't the quorum code already require that all quorum
members have equal size?


>  
> +    /* if we are planning to replace a graph node name the code should do a full
> +     * mirror of the source image
> +     */
> +    if (has_to_replace_node_name && sync != MIRROR_SYNC_MODE_FULL) {
> +        error_setg(errp,
> +                   "to-replace-node-name can only be used with sync=full");
> +        return;
> +    }

Again, I'm not sure if this restriction makes sense.

> +++ b/qapi/block-core.json
> @@ -769,6 +769,14 @@
>  # @format: #optional the format of the new destination, default is to
>  #          probe if @mode is 'existing', else the format of the source
>  #
> +# @new-node-name: #optional the new block driver state node name in the graph
> +#                 (Since 2.1)

Is it worth splitting this patch into two? The ability to name the new
node of a drive-mirror makes sense as an independent patch, which might
be applied sooner even while worrying about the semantics of how
replacement will work.

> +#
> +# @to-replace-node-name: #optional with sync=full graph node name to be
> +#                        replaced by the new image when a whole image copy is
> +#                        done. This can be used to repair broken Quorum files.
> +#                        (Since 2.1)

So if I understand correctly, the point of this command is that if I
have a quorum with three backing named nodes, and want to hotswap out
one of those modes, then I create a drive-mirror that names which of the
three nodes is the victim, and on completion, the quorum now has the
remaining two nodes and my new mirror as its new three node setup.

Am I correct that to-replace-node-name can only be used on a node that
is already composed of other nodes, and that the replacement must be one
of those nodes?

What if I have a 3/5 quorum - can I replace 2 nodes at once?

> +#
>  # @mode: #optional whether and how QEMU should create a new image, default is
>  #        'absolute-paths'.
>  #
> @@ -801,6 +809,7 @@
>  ##
>  { 'command': 'drive-mirror',
>    'data': { 'device': 'str', 'target': 'str', '*format': 'str',
> +            '*new-node-name': 'str', '*to-replace-node-name': 'str',

Bikeshedding: those are some long names.  Is it sufficient to go with
something shorter, '*node-name' for what to name the new mirror (again,
might be worth splitting that into its own patch), and '*replaces' for
the name of a node to be replaced?

-- 
Eric Blake   eblake redhat com    +1-919-301-3266
Libvirt virtualization library http://libvirt.org


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 604 bytes --]

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [Qemu-devel] [PATCH v7 2/3] block: Add node-name and to-replace-node-name arguments to drive-mirror
  2014-06-09 21:08   ` Eric Blake
@ 2014-06-10  6:12     ` Benoît Canet
  0 siblings, 0 replies; 6+ messages in thread
From: Benoît Canet @ 2014-06-10  6:12 UTC (permalink / raw)
  To: Eric Blake; +Cc: Benoît Canet, kwolf, qemu-devel, stefanha, mreitz

The Monday 09 Jun 2014 à 15:08:29 (-0600), Eric Blake wrote :
> On 06/09/2014 02:46 PM, Benoît Canet wrote:
> > node-name gives a name to the created BDS and registers it in the node graph.
> > 
> > to-replace-node-name can be used when drive-mirror is called with sync=full.
> 
> Why can't it work with other modes?  That is, if I have:
> 
> base1 <- snap1 \
> base2 <- snap2  > quorum
> base3 <- snap3 /
> 
> and want to replace the 'base3 <- snap3' arm of the quorum with 'base4
> <- snap4', where base3 and base4 are identical, the fact that you are
> forcing sync=full will not let me do so.  There's a lot of things where
> if management does something stupid, then the guest will see data
> instantly corrupted; but that doesn't mean that we necessarily have to
> cripple the power of the command.

I am affraid that the user could form loop in the graph.

> 
> > 
> > The purpose of these fields is to be able to reconstruct and replace a broken
> > quorum file.
> > 
> > drive-mirror will bdrv_swap the new BDS named node-name with the one
> > pointed by to-replace-node-name when the mirroring is finished.
> > 
> > Signed-off-by: Benoit Canet <benoit@irqsave.net>
> > ---
> 
> > +
> > +        if (size < bdrv_getlength(to_replace_bs)) {
> > +            error_setg(errp, "cannot replace to-replace-node-name image with a "
> > +                             "mirror image that would be smaller in size");
> 
> Should this be enforcing equality in size, rather than just prohibiting
> shrinking?  Doesn't the quorum code already require that all quorum
> members have equal size?

I though that quorum was still returning the smallest image size for getlength
and that it would be a way to grow the quorum by replacing with bigger image.
But it's not the case I will enforce equality in size.

> 
> 
> >  
> > +    /* if we are planning to replace a graph node name the code should do a full
> > +     * mirror of the source image
> > +     */
> > +    if (has_to_replace_node_name && sync != MIRROR_SYNC_MODE_FULL) {
> > +        error_setg(errp,
> > +                   "to-replace-node-name can only be used with sync=full");
> > +        return;
> > +    }
> 
> Again, I'm not sure if this restriction makes sense.
> 
> > +++ b/qapi/block-core.json
> > @@ -769,6 +769,14 @@
> >  # @format: #optional the format of the new destination, default is to
> >  #          probe if @mode is 'existing', else the format of the source
> >  #
> > +# @new-node-name: #optional the new block driver state node name in the graph
> > +#                 (Since 2.1)
> 
> Is it worth splitting this patch into two? The ability to name the new
> node of a drive-mirror makes sense as an independent patch, which might
> be applied sooner even while worrying about the semantics of how
> replacement will work.

ok

> 
> > +#
> > +# @to-replace-node-name: #optional with sync=full graph node name to be
> > +#                        replaced by the new image when a whole image copy is
> > +#                        done. This can be used to repair broken Quorum files.
> > +#                        (Since 2.1)
> 
> So if I understand correctly, the point of this command is that if I
> have a quorum with three backing named nodes, and want to hotswap out
> one of those modes, then I create a drive-mirror that names which of the
> three nodes is the victim, and on completion, the quorum now has the
> remaining two nodes and my new mirror as its new three node setup.
> 
> Am I correct that to-replace-node-name can only be used on a node that
> is already composed of other nodes, and that the replacement must be one
> of those nodes?
> 
> What if I have a 3/5 quorum - can I replace 2 nodes at once?

No you can't.
The first drive-mirror will lock the quorum device.

> 
> > +#
> >  # @mode: #optional whether and how QEMU should create a new image, default is
> >  #        'absolute-paths'.
> >  #
> > @@ -801,6 +809,7 @@
> >  ##
> >  { 'command': 'drive-mirror',
> >    'data': { 'device': 'str', 'target': 'str', '*format': 'str',
> > +            '*new-node-name': 'str', '*to-replace-node-name': 'str',
> 
> Bikeshedding: those are some long names.  Is it sufficient to go with
> something shorter, '*node-name' for what to name the new mirror (again,
> might be worth splitting that into its own patch), and '*replaces' for
> the name of a node to be replaced?

ok

> 
> -- 
> Eric Blake   eblake redhat com    +1-919-301-3266
> Libvirt virtualization library http://libvirt.org
> 

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2014-06-10  6:12 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-06-09 20:45 [Qemu-devel] [PATCH v7 0/3] Quorum maintainance operations Benoît Canet
2014-06-09 20:45 ` [Qemu-devel] [PATCH v7 1/3] quorum: Add the rewrite-corrupted parameter to quorum Benoît Canet
2014-06-09 20:46 ` [Qemu-devel] [PATCH v7 2/3] block: Add node-name and to-replace-node-name arguments to drive-mirror Benoît Canet
2014-06-09 21:08   ` Eric Blake
2014-06-10  6:12     ` Benoît Canet
2014-06-09 20:46 ` [Qemu-devel] [PATCH v7 3/3] qemu-iotests: Add TestRepairQuorum to 041 to test drive-mirror node-name mode Benoît Canet

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.