All of lore.kernel.org
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH v10 0/4] Quorum maintainance operations
@ 2014-06-16 10:00 Benoît Canet
  2014-06-16 10:00 ` [Qemu-devel] [PATCH v10 1/4] quorum: Add the rewrite-corrupted parameter to quorum Benoît Canet
                   ` (5 more replies)
  0 siblings, 6 replies; 17+ messages in thread
From: Benoît Canet @ 2014-06-16 10:00 UTC (permalink / raw)
  To: qemu-devel; +Cc: kwolf, Benoît Canet, stefanha, mreitz

in v10:
        address max comments

Benoît Canet (4):
  quorum: Add the rewrite-corrupted parameter to quorum
  block: Add node-name argument to drive-mirror
  block: Add replaces argument to drive-mirror
  qemu-iotests: Add TestRepairQuorum to 041 to test drive-mirror
    node-name mode.

 block.c                    |  17 ++++
 block/mirror.c             |  60 +++++++++++---
 block/quorum.c             |  97 ++++++++++++++++++++--
 blockdev.c                 |  41 +++++++++-
 hmp.c                      |   1 +
 include/block/block.h      |   4 +
 include/block/block_int.h  |   3 +
 qapi/block-core.json       |  13 ++-
 qmp-commands.hx            |   5 ++
 tests/qemu-iotests/041     | 196 ++++++++++++++++++++++++++++++++++++++++++++-
 tests/qemu-iotests/041.out |   4 +-
 tests/qemu-iotests/081     |  15 +++-
 tests/qemu-iotests/081.out |  10 +++
 13 files changed, 436 insertions(+), 30 deletions(-)

-- 
1.9.1

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [Qemu-devel] [PATCH v10 1/4] quorum: Add the rewrite-corrupted parameter to quorum
  2014-06-16 10:00 [Qemu-devel] [PATCH v10 0/4] Quorum maintainance operations Benoît Canet
@ 2014-06-16 10:00 ` Benoît Canet
  2014-06-16 10:00 ` [Qemu-devel] [PATCH v10 2/4] block: Add node-name argument to drive-mirror Benoît Canet
                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 17+ messages in thread
From: Benoît Canet @ 2014-06-16 10:00 UTC (permalink / raw)
  To: qemu-devel; +Cc: kwolf, Benoît Canet, Benoit Canet, mreitz, stefanha

On read operations when this parameter is set and some replicas are corrupted
while quorum can be reached quorum will proceed to rewrite the correct version
of the data to fix the corrupted replicas.

This will shine with SSD where the FTL will remap the same block at another
place on rewrite.

Signed-off-by: Benoit Canet <benoit@irqsave.net>
Reviewed-by: Max Reitz <mreitz@redhat.com>
---
 block/quorum.c             | 97 +++++++++++++++++++++++++++++++++++++++++++---
 qapi/block-core.json       |  5 ++-
 tests/qemu-iotests/081     | 15 ++++++-
 tests/qemu-iotests/081.out | 10 +++++
 4 files changed, 119 insertions(+), 8 deletions(-)

diff --git a/block/quorum.c b/block/quorum.c
index 426077a..1a7a01e 100644
--- a/block/quorum.c
+++ b/block/quorum.c
@@ -22,6 +22,7 @@
 
 #define QUORUM_OPT_VOTE_THRESHOLD "vote-threshold"
 #define QUORUM_OPT_BLKVERIFY      "blkverify"
+#define QUORUM_OPT_REWRITE        "rewrite-corrupted"
 
 /* This union holds a vote hash value */
 typedef union QuorumVoteValue {
@@ -69,6 +70,9 @@ typedef struct BDRVQuorumState {
                             * It is useful to debug other block drivers by
                             * comparing them with a reference one.
                             */
+    bool rewrite_corrupted;/* true if the driver must rewrite-on-read corrupted
+                            * block if Quorum is reached.
+                            */
 } BDRVQuorumState;
 
 typedef struct QuorumAIOCB QuorumAIOCB;
@@ -104,13 +108,17 @@ struct QuorumAIOCB {
     int count;                  /* number of completed AIOCB */
     int success_count;          /* number of successfully completed AIOCB */
 
+    int rewrite_count;          /* number of replica to rewrite: count down to
+                                 * zero once writes are fired
+                                 */
+
     QuorumVotes votes;
 
     bool is_read;
     int vote_ret;
 };
 
-static void quorum_vote(QuorumAIOCB *acb);
+static bool quorum_vote(QuorumAIOCB *acb);
 
 static void quorum_aio_cancel(BlockDriverAIOCB *blockacb)
 {
@@ -182,6 +190,7 @@ static QuorumAIOCB *quorum_aio_get(BDRVQuorumState *s,
     acb->qcrs = g_new0(QuorumChildRequest, s->num_children);
     acb->count = 0;
     acb->success_count = 0;
+    acb->rewrite_count = 0;
     acb->votes.compare = quorum_sha256_compare;
     QLIST_INIT(&acb->votes.vote_list);
     acb->is_read = false;
@@ -241,11 +250,27 @@ static bool quorum_has_too_much_io_failed(QuorumAIOCB *acb)
     return false;
 }
 
+static void quorum_rewrite_aio_cb(void *opaque, int ret)
+{
+    QuorumAIOCB *acb = opaque;
+
+    /* one less rewrite to do */
+    acb->rewrite_count--;
+
+    /* wait until all rewrite callbacks have completed */
+    if (acb->rewrite_count) {
+        return;
+    }
+
+    quorum_aio_finalize(acb);
+}
+
 static void quorum_aio_cb(void *opaque, int ret)
 {
     QuorumChildRequest *sacb = opaque;
     QuorumAIOCB *acb = sacb->parent;
     BDRVQuorumState *s = acb->common.bs->opaque;
+    bool rewrite = false;
 
     sacb->ret = ret;
     acb->count++;
@@ -262,12 +287,15 @@ static void quorum_aio_cb(void *opaque, int ret)
 
     /* Do the vote on read */
     if (acb->is_read) {
-        quorum_vote(acb);
+        rewrite = quorum_vote(acb);
     } else {
         quorum_has_too_much_io_failed(acb);
     }
 
-    quorum_aio_finalize(acb);
+    /* if no rewrite is done the code will finish right away */
+    if (!rewrite) {
+        quorum_aio_finalize(acb);
+    }
 }
 
 static void quorum_report_bad_versions(BDRVQuorumState *s,
@@ -287,6 +315,43 @@ static void quorum_report_bad_versions(BDRVQuorumState *s,
     }
 }
 
+static bool quorum_rewrite_bad_versions(BDRVQuorumState *s, QuorumAIOCB *acb,
+                                        QuorumVoteValue *value)
+{
+    QuorumVoteVersion *version;
+    QuorumVoteItem *item;
+    int count = 0;
+
+    /* first count the number of bad versions: done first to avoid concurrency
+     * issues.
+     */
+    QLIST_FOREACH(version, &acb->votes.vote_list, next) {
+        if (acb->votes.compare(&version->value, value)) {
+            continue;
+        }
+        QLIST_FOREACH(item, &version->items, next) {
+            count++;
+        }
+    }
+
+    /* quorum_rewrite_aio_cb will count down this to zero */
+    acb->rewrite_count = count;
+
+    /* now fire the correcting rewrites */
+    QLIST_FOREACH(version, &acb->votes.vote_list, next) {
+        if (acb->votes.compare(&version->value, value)) {
+            continue;
+        }
+        QLIST_FOREACH(item, &version->items, next) {
+            bdrv_aio_writev(s->bs[item->index], acb->sector_num, acb->qiov,
+                            acb->nb_sectors, quorum_rewrite_aio_cb, acb);
+        }
+    }
+
+    /* return true if any rewrite is done else false */
+    return count;
+}
+
 static void quorum_copy_qiov(QEMUIOVector *dest, QEMUIOVector *source)
 {
     int i;
@@ -477,16 +542,17 @@ static int quorum_vote_error(QuorumAIOCB *acb)
     return ret;
 }
 
-static void quorum_vote(QuorumAIOCB *acb)
+static bool quorum_vote(QuorumAIOCB *acb)
 {
     bool quorum = true;
+    bool rewrite = false;
     int i, j, ret;
     QuorumVoteValue hash;
     BDRVQuorumState *s = acb->common.bs->opaque;
     QuorumVoteVersion *winner;
 
     if (quorum_has_too_much_io_failed(acb)) {
-        return;
+        return false;
     }
 
     /* get the index of the first successful read */
@@ -514,7 +580,7 @@ static void quorum_vote(QuorumAIOCB *acb)
     /* Every successful read agrees */
     if (quorum) {
         quorum_copy_qiov(acb->qiov, &acb->qcrs[i].qiov);
-        return;
+        return false;
     }
 
     /* compute hashes for each successful read, also store indexes */
@@ -547,9 +613,15 @@ static void quorum_vote(QuorumAIOCB *acb)
     /* some versions are bad print them */
     quorum_report_bad_versions(s, acb, &winner->value);
 
+    /* corruption correction is enabled */
+    if (s->rewrite_corrupted) {
+        rewrite = quorum_rewrite_bad_versions(s, acb, &winner->value);
+    }
+
 free_exit:
     /* free lists */
     quorum_free_vote_list(&acb->votes);
+    return rewrite;
 }
 
 static BlockDriverAIOCB *quorum_aio_readv(BlockDriverState *bs,
@@ -714,6 +786,11 @@ static QemuOptsList quorum_runtime_opts = {
             .type = QEMU_OPT_BOOL,
             .help = "Trigger block verify mode if set",
         },
+        {
+            .name = QUORUM_OPT_REWRITE,
+            .type = QEMU_OPT_BOOL,
+            .help = "Rewrite corrupted block on read quorum",
+        },
         { /* end of list */ }
     },
 };
@@ -775,6 +852,14 @@ static int quorum_open(BlockDriverState *bs, QDict *options, int flags,
                 "and using two files with vote_threshold=2\n");
     }
 
+    s->rewrite_corrupted = qemu_opt_get_bool(opts, QUORUM_OPT_REWRITE, false);
+    if (s->rewrite_corrupted && s->is_blkverify) {
+        error_setg(&local_err,
+                   "rewrite-corrupted=on cannot be used with blkverify=on");
+        ret = -EINVAL;
+        goto exit;
+    }
+
     /* allocate the children BlockDriverState array */
     s->bs = g_new0(BlockDriverState *, s->num_children);
     opened = g_new0(bool, s->num_children);
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 33bd93f..eec881b 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -1333,12 +1333,15 @@
 #
 # @vote-threshold: the vote limit under which a read will fail
 #
+# @rewrite-corrupted: #optional rewrite corrupted data when quorum is reached
+#                     (Since 2.1)
+#
 # Since: 2.0
 ##
 { 'type': 'BlockdevOptionsQuorum',
   'data': { '*blkverify': 'bool',
             'children': [ 'BlockdevRef' ],
-            'vote-threshold': 'int' } }
+            'vote-threshold': 'int', '*rewrite-corrupted': 'bool' } }
 
 ##
 # @BlockdevOptions
diff --git a/tests/qemu-iotests/081 b/tests/qemu-iotests/081
index b512d00..7ae4be2 100755
--- a/tests/qemu-iotests/081
+++ b/tests/qemu-iotests/081
@@ -134,15 +134,28 @@ run_qemu -drive "file=$TEST_DIR/2.raw,format=$IMGFMT,if=none,id=drive2" <<EOF
 EOF
 
 echo
+echo "== using quorum rewrite corrupted mode =="
+
+quorum="$quorum,file.rewrite-corrupted=on"
+
+$QEMU_IO -c "open -o $quorum" -c "read -P 0x32 0 $size" | _filter_qemu_io
+
+echo
+echo "== checking that quorum has corrected the corrupted file =="
+
+$QEMU_IO -c "read -P 0x32 0 $size" "$TEST_DIR/2.raw" | _filter_qemu_io
+
+echo
 echo "== breaking quorum =="
 
 $QEMU_IO -c "write -P 0x41 0 $size" "$TEST_DIR/1.raw" | _filter_qemu_io
+$QEMU_IO -c "write -P 0x42 0 $size" "$TEST_DIR/2.raw" | _filter_qemu_io
+
 echo
 echo "== checking that quorum is broken =="
 
 $QEMU_IO -c "open -o $quorum" -c "read -P 0x32 0 $size" | _filter_qemu_io
 
-
 # success, all done
 echo "*** done"
 rm -f $seq.full
diff --git a/tests/qemu-iotests/081.out b/tests/qemu-iotests/081.out
index 84aeb0c..1629d2a 100644
--- a/tests/qemu-iotests/081.out
+++ b/tests/qemu-iotests/081.out
@@ -40,9 +40,19 @@ read 10485760/10485760 bytes at offset 0
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "DEVICE_TRAY_MOVED", "data": {"device": "floppy0", "tray-open": true}}
 
 
+== using quorum rewrite corrupted mode ==
+read 10485760/10485760 bytes at offset 0
+10 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+== checking that quorum has corrected the corrupted file ==
+read 10485760/10485760 bytes at offset 0
+10 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
 == breaking quorum ==
 wrote 10485760/10485760 bytes at offset 0
 10 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 10485760/10485760 bytes at offset 0
+10 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 
 == checking that quorum is broken ==
 qemu-io: can't open device (null): Could not read image for determining its format: Input/output error
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [Qemu-devel] [PATCH v10 2/4] block: Add node-name argument to drive-mirror
  2014-06-16 10:00 [Qemu-devel] [PATCH v10 0/4] Quorum maintainance operations Benoît Canet
  2014-06-16 10:00 ` [Qemu-devel] [PATCH v10 1/4] quorum: Add the rewrite-corrupted parameter to quorum Benoît Canet
@ 2014-06-16 10:00 ` Benoît Canet
  2014-06-16 10:00 ` [Qemu-devel] [PATCH v10 3/4] block: Add replaces " Benoît Canet
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 17+ messages in thread
From: Benoît Canet @ 2014-06-16 10:00 UTC (permalink / raw)
  To: qemu-devel; +Cc: kwolf, Benoît Canet, Benoit Canet, mreitz, stefanha

This new argument can be used to specify the node-name of the new mirrored BDS.

Signed-off-by: Benoit Canet <benoit@irqsave.net>
Reviewed-by: Max Reitz <mreitz@redhat.com>
---
 blockdev.c           | 11 +++++++++--
 hmp.c                |  1 +
 qapi/block-core.json |  4 ++++
 qmp-commands.hx      |  3 +++
 4 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/blockdev.c b/blockdev.c
index 4cbcc56..06b14f2 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -2106,6 +2106,7 @@ BlockDeviceInfoList *qmp_query_named_block_nodes(Error **errp)
 
 void qmp_drive_mirror(const char *device, const char *target,
                       bool has_format, const char *format,
+                      bool has_node_name, const char *node_name,
                       enum MirrorSyncMode sync,
                       bool has_mode, enum NewImageMode mode,
                       bool has_speed, int64_t speed,
@@ -2119,6 +2120,7 @@ void qmp_drive_mirror(const char *device, const char *target,
     BlockDriverState *source, *target_bs;
     BlockDriver *drv = NULL;
     Error *local_err = NULL;
+    QDict *options = NULL;
     int flags;
     int64_t size;
     int ret;
@@ -2220,12 +2222,17 @@ void qmp_drive_mirror(const char *device, const char *target,
         return;
     }
 
+    if (has_node_name) {
+        options = qdict_new();
+        qdict_put(options, "node-name", qstring_from_str(node_name));
+    }
+
     /* Mirroring takes care of copy-on-write using the source's backing
      * file.
      */
     target_bs = NULL;
-    ret = bdrv_open(&target_bs, target, NULL, NULL, flags | BDRV_O_NO_BACKING,
-                    drv, &local_err);
+    ret = bdrv_open(&target_bs, target, NULL, options,
+                    flags | BDRV_O_NO_BACKING, drv, &local_err);
     if (ret < 0) {
         error_propagate(errp, local_err);
         return;
diff --git a/hmp.c b/hmp.c
index ccc35d4..ef0d583 100644
--- a/hmp.c
+++ b/hmp.c
@@ -930,6 +930,7 @@ void hmp_drive_mirror(Monitor *mon, const QDict *qdict)
     }
 
     qmp_drive_mirror(device, filename, !!format, format,
+                     false, NULL,
                      full ? MIRROR_SYNC_MODE_FULL : MIRROR_SYNC_MODE_TOP,
                      true, mode, false, 0, false, 0, false, 0,
                      false, 0, false, 0, &err);
diff --git a/qapi/block-core.json b/qapi/block-core.json
index eec881b..4c28d7b 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -769,6 +769,9 @@
 # @format: #optional the format of the new destination, default is to
 #          probe if @mode is 'existing', else the format of the source
 #
+# @node-name: #optional the new block driver state node name in the graph
+#             (Since 2.1)
+#
 # @mode: #optional whether and how QEMU should create a new image, default is
 #        'absolute-paths'.
 #
@@ -801,6 +804,7 @@
 ##
 { 'command': 'drive-mirror',
   'data': { 'device': 'str', 'target': 'str', '*format': 'str',
+            '*node-name': 'str',
             'sync': 'MirrorSyncMode', '*mode': 'NewImageMode',
             '*speed': 'int', '*granularity': 'uint32',
             '*buf-size': 'int', '*on-source-error': 'BlockdevOnError',
diff --git a/qmp-commands.hx b/qmp-commands.hx
index d8aa4ed..98c28f5 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -1293,6 +1293,7 @@ EQMP
     {
         .name       = "drive-mirror",
         .args_type  = "sync:s,device:B,target:s,speed:i?,mode:s?,format:s?,"
+                      "node-name:s?,"
                       "on-source-error:s?,on-target-error:s?,"
                       "granularity:i?,buf-size:i?",
         .mhandler.cmd_new = qmp_marshal_input_drive_mirror,
@@ -1314,6 +1315,8 @@ Arguments:
 - "device": device name to operate on (json-string)
 - "target": name of new image file (json-string)
 - "format": format of new image (json-string, optional)
+- "node-name": the name of the new block driver state in the node graph
+               (json-string, optional)
 - "mode": how an image file should be created into the target
   file/device (NewImageMode, optional, default 'absolute-paths')
 - "speed": maximum speed of the streaming job, in bytes per second
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [Qemu-devel] [PATCH v10 3/4] block: Add replaces argument to drive-mirror
  2014-06-16 10:00 [Qemu-devel] [PATCH v10 0/4] Quorum maintainance operations Benoît Canet
  2014-06-16 10:00 ` [Qemu-devel] [PATCH v10 1/4] quorum: Add the rewrite-corrupted parameter to quorum Benoît Canet
  2014-06-16 10:00 ` [Qemu-devel] [PATCH v10 2/4] block: Add node-name argument to drive-mirror Benoît Canet
@ 2014-06-16 10:00 ` Benoît Canet
  2014-06-19  9:19   ` Stefan Hajnoczi
  2014-06-27 11:57   ` Kevin Wolf
  2014-06-16 10:00 ` [Qemu-devel] [PATCH v10 4/4] qemu-iotests: Add TestRepairQuorum to 041 to test drive-mirror node-name mode Benoît Canet
                   ` (2 subsequent siblings)
  5 siblings, 2 replies; 17+ messages in thread
From: Benoît Canet @ 2014-06-16 10:00 UTC (permalink / raw)
  To: qemu-devel; +Cc: kwolf, Benoît Canet, Benoit Canet, mreitz, stefanha

drive-mirror will bdrv_swap the new BDS named node-name with the one
pointed by replaces when the mirroring is finished.

Signed-off-by: Benoit Canet <benoit@irqsave.net>
Reviewed-by: Max Reitz <mreitz@redhat.com>
---
 block.c                   | 17 ++++++++++++++
 block/mirror.c            | 60 +++++++++++++++++++++++++++++++++++++----------
 blockdev.c                | 30 +++++++++++++++++++++++-
 hmp.c                     |  2 +-
 include/block/block.h     |  4 ++++
 include/block/block_int.h |  3 +++
 qapi/block-core.json      |  6 ++++-
 qmp-commands.hx           |  4 +++-
 8 files changed, 109 insertions(+), 17 deletions(-)

diff --git a/block.c b/block.c
index 17f763d..318f1e6 100644
--- a/block.c
+++ b/block.c
@@ -5795,3 +5795,20 @@ bool bdrv_is_first_non_filter(BlockDriverState *candidate)
 
     return false;
 }
+
+BlockDriverState *check_to_replace_node(const char *node_name, Error **errp)
+{
+    BlockDriverState *to_replace_bs = bdrv_find_node(node_name);
+    if (!to_replace_bs) {
+        error_setg(errp, "Node name '%s' not found",
+                   node_name);
+        return NULL;
+    }
+
+    if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) {
+        return NULL;
+    }
+
+    return to_replace_bs;
+}
+
diff --git a/block/mirror.c b/block/mirror.c
index 94c8661..151167e 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -32,6 +32,12 @@ typedef struct MirrorBlockJob {
     RateLimit limit;
     BlockDriverState *target;
     BlockDriverState *base;
+    /* The name of the graph node to replace */
+    char *replaces;
+    /* The BDS to replace */
+    BlockDriverState *to_replace;
+    /* Used to block operations on the drive-mirror-replace target */
+    Error *replace_blocker;
     bool is_none_mode;
     BlockdevOnError on_source_error, on_target_error;
     bool synced;
@@ -490,10 +496,14 @@ immediate_exit:
     bdrv_release_dirty_bitmap(bs, s->dirty_bitmap);
     bdrv_iostatus_disable(s->target);
     if (s->should_complete && ret == 0) {
-        if (bdrv_get_flags(s->target) != bdrv_get_flags(s->common.bs)) {
-            bdrv_reopen(s->target, bdrv_get_flags(s->common.bs), NULL);
+        BlockDriverState *to_replace = s->common.bs;
+        if (s->to_replace) {
+            to_replace = s->to_replace;
         }
-        bdrv_swap(s->target, s->common.bs);
+        if (bdrv_get_flags(s->target) != bdrv_get_flags(to_replace)) {
+            bdrv_reopen(s->target, bdrv_get_flags(to_replace), NULL);
+        }
+        bdrv_swap(s->target, to_replace);
         if (s->common.driver->job_type == BLOCK_JOB_TYPE_COMMIT) {
             /* drop the bs loop chain formed by the swap: break the loop then
              * trigger the unref from the top one */
@@ -502,6 +512,12 @@ immediate_exit:
             bdrv_unref(p);
         }
     }
+    if (s->to_replace) {
+        bdrv_op_unblock_all(s->to_replace, s->replace_blocker);
+        error_free(s->replace_blocker);
+        bdrv_unref(s->to_replace);
+    }
+    g_free(s->replaces);
     bdrv_unref(s->target);
     block_job_completed(&s->common, ret);
 }
@@ -540,6 +556,20 @@ static void mirror_complete(BlockJob *job, Error **errp)
         return;
     }
 
+    /* check the target bs is not blocked and block all operations on it */
+    if (s->replaces) {
+        s->to_replace = check_to_replace_node(s->replaces, errp);
+
+        if (!s->to_replace) {
+            return;
+        }
+
+        error_setg(&s->replace_blocker,
+                   "block device is in use by block-job-complete");
+        bdrv_op_block_all(s->to_replace, s->replace_blocker);
+        bdrv_ref(s->to_replace);
+    }
+
     s->should_complete = true;
     block_job_resume(job);
 }
@@ -562,14 +592,15 @@ static const BlockJobDriver commit_active_job_driver = {
 };
 
 static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target,
-                            int64_t speed, int64_t granularity,
-                            int64_t buf_size,
-                            BlockdevOnError on_source_error,
-                            BlockdevOnError on_target_error,
-                            BlockDriverCompletionFunc *cb,
-                            void *opaque, Error **errp,
-                            const BlockJobDriver *driver,
-                            bool is_none_mode, BlockDriverState *base)
+                             const char *replaces,
+                             int64_t speed, int64_t granularity,
+                             int64_t buf_size,
+                             BlockdevOnError on_source_error,
+                             BlockdevOnError on_target_error,
+                             BlockDriverCompletionFunc *cb,
+                             void *opaque, Error **errp,
+                             const BlockJobDriver *driver,
+                             bool is_none_mode, BlockDriverState *base)
 {
     MirrorBlockJob *s;
 
@@ -600,6 +631,7 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target,
         return;
     }
 
+    s->replaces = g_strdup(replaces);
     s->on_source_error = on_source_error;
     s->on_target_error = on_target_error;
     s->target = target;
@@ -621,6 +653,7 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target,
 }
 
 void mirror_start(BlockDriverState *bs, BlockDriverState *target,
+                  const char *replaces,
                   int64_t speed, int64_t granularity, int64_t buf_size,
                   MirrorSyncMode mode, BlockdevOnError on_source_error,
                   BlockdevOnError on_target_error,
@@ -632,7 +665,8 @@ void mirror_start(BlockDriverState *bs, BlockDriverState *target,
 
     is_none_mode = mode == MIRROR_SYNC_MODE_NONE;
     base = mode == MIRROR_SYNC_MODE_TOP ? bs->backing_hd : NULL;
-    mirror_start_job(bs, target, speed, granularity, buf_size,
+    mirror_start_job(bs, target, replaces,
+                     speed, granularity, buf_size,
                      on_source_error, on_target_error, cb, opaque, errp,
                      &mirror_job_driver, is_none_mode, base);
 }
@@ -680,7 +714,7 @@ void commit_active_start(BlockDriverState *bs, BlockDriverState *base,
     }
 
     bdrv_ref(base);
-    mirror_start_job(bs, base, speed, 0, 0,
+    mirror_start_job(bs, base, NULL, speed, 0, 0,
                      on_error, on_error, cb, opaque, &local_err,
                      &commit_active_job_driver, false, base);
     if (local_err) {
diff --git a/blockdev.c b/blockdev.c
index 06b14f2..237a548 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -2107,6 +2107,7 @@ BlockDeviceInfoList *qmp_query_named_block_nodes(Error **errp)
 void qmp_drive_mirror(const char *device, const char *target,
                       bool has_format, const char *format,
                       bool has_node_name, const char *node_name,
+                      bool has_replaces, const char *replaces,
                       enum MirrorSyncMode sync,
                       bool has_mode, enum NewImageMode mode,
                       bool has_speed, int64_t speed,
@@ -2194,6 +2195,28 @@ void qmp_drive_mirror(const char *device, const char *target,
         return;
     }
 
+    if (has_replaces) {
+        BlockDriverState *to_replace_bs;
+
+        if (!has_node_name) {
+            error_setg(errp, "a node-name must be provided when replacing a"
+                             " named node of the graph");
+            return;
+        }
+
+        to_replace_bs = check_to_replace_node(replaces, errp);
+
+        if (!to_replace_bs) {
+            return;
+        }
+
+        if (size != bdrv_getlength(to_replace_bs)) {
+            error_setg(errp, "cannot replace image with a mirror image of "
+                             "different size");
+            return;
+        }
+    }
+
     if ((sync == MIRROR_SYNC_MODE_FULL || !source)
         && mode != NEW_IMAGE_MODE_EXISTING)
     {
@@ -2238,7 +2261,12 @@ void qmp_drive_mirror(const char *device, const char *target,
         return;
     }
 
-    mirror_start(bs, target_bs, speed, granularity, buf_size, sync,
+    /* pass the node name to replace to mirror start since it's loose coupling
+     * and will allow to check whether the node still exist at mirror completion
+     */
+    mirror_start(bs, target_bs,
+                 has_replaces ? replaces : NULL,
+                 speed, granularity, buf_size, sync,
                  on_source_error, on_target_error,
                  block_job_cb, bs, &local_err);
     if (local_err != NULL) {
diff --git a/hmp.c b/hmp.c
index ef0d583..9a4b8da 100644
--- a/hmp.c
+++ b/hmp.c
@@ -930,7 +930,7 @@ void hmp_drive_mirror(Monitor *mon, const QDict *qdict)
     }
 
     qmp_drive_mirror(device, filename, !!format, format,
-                     false, NULL,
+                     false, NULL, false, NULL,
                      full ? MIRROR_SYNC_MODE_FULL : MIRROR_SYNC_MODE_TOP,
                      true, mode, false, 0, false, 0, false, 0,
                      false, 0, false, 0, &err);
diff --git a/include/block/block.h b/include/block/block.h
index 7d86e29..b0ed701 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -179,6 +179,7 @@ typedef enum BlockOpType {
     BLOCK_OP_TYPE_MIRROR,
     BLOCK_OP_TYPE_RESIZE,
     BLOCK_OP_TYPE_STREAM,
+    BLOCK_OP_TYPE_REPLACE,
     BLOCK_OP_TYPE_MAX,
 } BlockOpType;
 
@@ -319,6 +320,9 @@ bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs,
                                       BlockDriverState *candidate);
 bool bdrv_is_first_non_filter(BlockDriverState *candidate);
 
+/* check if a named node can be replaced when doing drive-mirror */
+BlockDriverState *check_to_replace_node(const char *node_name, Error **errp);
+
 /* async block I/O */
 typedef void BlockDriverDirtyHandler(BlockDriverState *bs, int64_t sector,
                                      int sector_num);
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 8d58334..81ce2ee 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -492,6 +492,8 @@ void commit_active_start(BlockDriverState *bs, BlockDriverState *base,
  * mirror_start:
  * @bs: Block device to operate on.
  * @target: Block device to write to.
+ * @replaces: Block graph node name to replace once the mirror is done. Can
+ *            only be used when full mirroring is selected.
  * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
  * @granularity: The chosen granularity for the dirty bitmap.
  * @buf_size: The amount of data that can be in flight at one time.
@@ -508,6 +510,7 @@ void commit_active_start(BlockDriverState *bs, BlockDriverState *base,
  * @bs will be switched to read from @target.
  */
 void mirror_start(BlockDriverState *bs, BlockDriverState *target,
+                  const char *replaces,
                   int64_t speed, int64_t granularity, int64_t buf_size,
                   MirrorSyncMode mode, BlockdevOnError on_source_error,
                   BlockdevOnError on_target_error,
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 4c28d7b..0f8e703 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -772,6 +772,10 @@
 # @node-name: #optional the new block driver state node name in the graph
 #             (Since 2.1)
 #
+# @replaces: #optional with sync=full graph node name to be replaced by the new
+#            image when a whole image copy is done. This can be used to repair
+#            broken Quorum files. (Since 2.1)
+#
 # @mode: #optional whether and how QEMU should create a new image, default is
 #        'absolute-paths'.
 #
@@ -804,7 +808,7 @@
 ##
 { 'command': 'drive-mirror',
   'data': { 'device': 'str', 'target': 'str', '*format': 'str',
-            '*node-name': 'str',
+            '*node-name': 'str', '*replaces': 'str',
             'sync': 'MirrorSyncMode', '*mode': 'NewImageMode',
             '*speed': 'int', '*granularity': 'uint32',
             '*buf-size': 'int', '*on-source-error': 'BlockdevOnError',
diff --git a/qmp-commands.hx b/qmp-commands.hx
index 98c28f5..4a76054 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -1293,7 +1293,7 @@ EQMP
     {
         .name       = "drive-mirror",
         .args_type  = "sync:s,device:B,target:s,speed:i?,mode:s?,format:s?,"
-                      "node-name:s?,"
+                      "node-name:s?,replaces:s?,"
                       "on-source-error:s?,on-target-error:s?,"
                       "granularity:i?,buf-size:i?",
         .mhandler.cmd_new = qmp_marshal_input_drive_mirror,
@@ -1317,6 +1317,8 @@ Arguments:
 - "format": format of new image (json-string, optional)
 - "node-name": the name of the new block driver state in the node graph
                (json-string, optional)
+- "replaces": the block driver node name to replace when finished
+              (json-string, optional)
 - "mode": how an image file should be created into the target
   file/device (NewImageMode, optional, default 'absolute-paths')
 - "speed": maximum speed of the streaming job, in bytes per second
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [Qemu-devel] [PATCH v10 4/4] qemu-iotests: Add TestRepairQuorum to 041 to test drive-mirror node-name mode.
  2014-06-16 10:00 [Qemu-devel] [PATCH v10 0/4] Quorum maintainance operations Benoît Canet
                   ` (2 preceding siblings ...)
  2014-06-16 10:00 ` [Qemu-devel] [PATCH v10 3/4] block: Add replaces " Benoît Canet
@ 2014-06-16 10:00 ` Benoît Canet
  2014-06-17 16:17   ` Max Reitz
  2014-06-17 18:21   ` Max Reitz
  2014-06-19  4:46 ` [Qemu-devel] [PATCH v10 0/4] Quorum maintainance operations Stefan Hajnoczi
  2014-06-27 12:19 ` Kevin Wolf
  5 siblings, 2 replies; 17+ messages in thread
From: Benoît Canet @ 2014-06-16 10:00 UTC (permalink / raw)
  To: qemu-devel; +Cc: kwolf, Benoît Canet, Benoit Canet, mreitz, stefanha

The to-replace-node-name is designed to allow repairing a broken Quorum file.
This patch introduces a new class TestRepairQuorum testing that the feature
works.
Some further work will be done on QEMU to improve the robustness of the tests.

Signed-off-by: Benoit Canet <benoit@irqsave.net>
---
 tests/qemu-iotests/041     | 196 ++++++++++++++++++++++++++++++++++++++++++++-
 tests/qemu-iotests/041.out |   4 +-
 2 files changed, 194 insertions(+), 6 deletions(-)

diff --git a/tests/qemu-iotests/041 b/tests/qemu-iotests/041
index ec470b2..c0480e1 100755
--- a/tests/qemu-iotests/041
+++ b/tests/qemu-iotests/041
@@ -28,6 +28,12 @@ target_backing_img = os.path.join(iotests.test_dir, 'target-backing.img')
 test_img = os.path.join(iotests.test_dir, 'test.img')
 target_img = os.path.join(iotests.test_dir, 'target.img')
 
+quorum_img1 = os.path.join(iotests.test_dir, 'quorum1.img')
+quorum_img2 = os.path.join(iotests.test_dir, 'quorum2.img')
+quorum_img3 = os.path.join(iotests.test_dir, 'quorum3.img')
+quorum_repair_img = os.path.join(iotests.test_dir, 'quorum_repair.img')
+quorum_snapshot_file = os.path.join(iotests.test_dir, 'quorum_snapshot.img')
+
 class ImageMirroringTestCase(iotests.QMPTestCase):
     '''Abstract base class for image mirroring test cases'''
 
@@ -42,8 +48,8 @@ class ImageMirroringTestCase(iotests.QMPTestCase):
                     ready = True
 
     def wait_ready_and_cancel(self, drive='drive0'):
-        self.wait_ready(drive)
-        event = self.cancel_and_wait()
+        self.wait_ready(drive=drive)
+        event = self.cancel_and_wait(drive=drive)
         self.assertEquals(event['event'], 'BLOCK_JOB_COMPLETED')
         self.assert_qmp(event, 'data/type', 'mirror')
         self.assert_qmp(event, 'data/offset', self.image_len)
@@ -52,12 +58,12 @@ class ImageMirroringTestCase(iotests.QMPTestCase):
     def complete_and_wait(self, drive='drive0', wait_ready=True):
         '''Complete a block job and wait for it to finish'''
         if wait_ready:
-            self.wait_ready()
+            self.wait_ready(drive=drive)
 
         result = self.vm.qmp('block-job-complete', device=drive)
         self.assert_qmp(result, 'return', {})
 
-        event = self.wait_until_completed()
+        event = self.wait_until_completed(drive=drive)
         self.assert_qmp(event, 'data/type', 'mirror')
 
 class TestSingleDrive(ImageMirroringTestCase):
@@ -718,5 +724,187 @@ class TestUnbackedSource(ImageMirroringTestCase):
         self.complete_and_wait()
         self.assert_no_active_block_jobs()
 
+class TestRepairQuorum(ImageMirroringTestCase):
+    """ This class test quorum file repair using drive-mirror.
+        It's mostly a fork of TestSingleDrive """
+    image_len = 1 * 1024 * 1024 # MB
+    IMAGES = [ quorum_img1, quorum_img2, quorum_img3 ]
+
+    def setUp(self):
+        self.vm = iotests.VM()
+
+        # Add each individual quorum images
+        for i in self.IMAGES:
+            qemu_img('create', '-f', iotests.imgfmt, i,
+                     str(TestSingleDrive.image_len))
+            # Assign a node name to each quorum image in order to manipulate
+            # them
+            opts = "node-name=img%i" % self.IMAGES.index(i)
+            self.vm = self.vm.add_drive(i, opts)
+
+        self.vm.launch()
+
+        #assemble the quorum block device from the individual files
+        args = { "options" : { "driver": "quorum", "id": "quorum0",
+                 "vote-threshold": 2, "children": [ "img0", "img1", "img2" ] } }
+        result = self.vm.qmp("blockdev-add", **args)
+        self.assert_qmp(result, 'return', {})
+
+
+    def tearDown(self):
+        self.vm.shutdown()
+        for i in self.IMAGES + [ quorum_repair_img ]:
+            # Do a try/except because the test may have deleted some images
+            try:
+                os.remove(i)
+            except OSError:
+                pass
+
+    def test_complete(self):
+        self.assert_no_active_block_jobs()
+
+        result = self.vm.qmp('drive-mirror', device='quorum0', sync='full',
+                             node_name="repair0",
+                             replaces="img1",
+                             target=quorum_repair_img, format=iotests.imgfmt)
+        self.assert_qmp(result, 'return', {})
+
+        self.complete_and_wait(drive="quorum0")
+        result = self.vm.qmp('query-named-block-nodes')
+        self.assert_qmp(result, 'return[0]/file', quorum_repair_img)
+        # TODO: a better test requiring some QEMU infrastructure will be added
+        #       to check that this file is really driven by quorum
+        self.vm.shutdown()
+        self.assertTrue(iotests.compare_images(quorum_img2, quorum_repair_img),
+                        'target image does not match source after mirroring')
+
+    def test_cancel(self):
+        self.assert_no_active_block_jobs()
+
+        result = self.vm.qmp('drive-mirror', device='quorum0', sync='full',
+                             node_name="repair0",
+                             replaces="img1",
+                             target=quorum_repair_img, format=iotests.imgfmt)
+        self.assert_qmp(result, 'return', {})
+
+        self.cancel_and_wait(drive="quorum0", force=True)
+        # here we check that the last registered quorum file has not been
+        # swapped out and unref
+        result = self.vm.qmp('query-named-block-nodes')
+        self.assert_qmp(result, 'return[0]/file', quorum_img3)
+        self.vm.shutdown()
+
+    def test_cancel_after_ready(self):
+        self.assert_no_active_block_jobs()
+
+        result = self.vm.qmp('drive-mirror', device='quorum0', sync='full',
+                             node_name="repair0",
+                             replaces="img1",
+                             target=quorum_repair_img, format=iotests.imgfmt)
+        self.assert_qmp(result, 'return', {})
+
+        self.wait_ready_and_cancel(drive="quorum0")
+        result = self.vm.qmp('query-named-block-nodes')
+        # here we check that the last registered quorum file has not been
+        # swapped out and unref
+        self.assert_qmp(result, 'return[0]/file', quorum_img3)
+        self.vm.shutdown()
+        self.assertTrue(iotests.compare_images(quorum_img2, quorum_repair_img),
+                        'target image does not match source after mirroring')
+
+    def test_pause(self):
+        self.assert_no_active_block_jobs()
+
+        result = self.vm.qmp('drive-mirror', device='quorum0', sync='full',
+                             node_name="repair0",
+                             replaces="img1",
+                             target=quorum_repair_img, format=iotests.imgfmt)
+        self.assert_qmp(result, 'return', {})
+
+        result = self.vm.qmp('block-job-pause', device='quorum0')
+        self.assert_qmp(result, 'return', {})
+
+        time.sleep(1)
+        result = self.vm.qmp('query-block-jobs')
+        offset = self.dictpath(result, 'return[0]/offset')
+
+        time.sleep(1)
+        result = self.vm.qmp('query-block-jobs')
+        self.assert_qmp(result, 'return[0]/offset', offset)
+
+        result = self.vm.qmp('block-job-resume', device='quorum0')
+        self.assert_qmp(result, 'return', {})
+
+        self.complete_and_wait(drive="quorum0")
+        self.vm.shutdown()
+        self.assertTrue(iotests.compare_images(quorum_img2, quorum_repair_img),
+                        'target image does not match source after mirroring')
+
+    def test_medium_not_found(self):
+        result = self.vm.qmp('drive-mirror', device='ide1-cd0', sync='full',
+                             node_name='repair0',
+                             replaces='img1',
+                             target=quorum_repair_img, format=iotests.imgfmt)
+        self.assert_qmp(result, 'error/class', 'GenericError')
+
+    def test_image_not_found(self):
+        result = self.vm.qmp('drive-mirror', device='quorum0', sync='full',
+                             node_name='repair0',
+                             replaces='img1',
+                             mode='existing',
+                             target=quorum_repair_img, format=iotests.imgfmt)
+        self.assert_qmp(result, 'error/class', 'GenericError')
+
+    def test_device_not_found(self):
+        result = self.vm.qmp('drive-mirror', device='nonexistent', sync='full',
+                             node_name='repair0',
+                             replaces='img1',
+                             target=quorum_repair_img, format=iotests.imgfmt)
+        self.assert_qmp(result, 'error/class', 'DeviceNotFound')
+
+    def test_wrong_sync_mode(self):
+        result = self.vm.qmp('drive-mirror', device='quorum0',
+                             node_name='repair0',
+                             replaces='img1',
+                             target=quorum_repair_img, format=iotests.imgfmt)
+        self.assert_qmp(result, 'error/class', 'GenericError')
+
+    def test_no_node_name(self):
+        result = self.vm.qmp('drive-mirror', device='quorum0', sync='full',
+                             replaces='img1',
+                             target=quorum_repair_img, format=iotests.imgfmt)
+        self.assert_qmp(result, 'error/class', 'GenericError')
+
+    def test_unexistant_replaces(self):
+        result = self.vm.qmp('drive-mirror', device='quorum0', sync='full',
+                             node_name='repair0',
+                             replaces='img77',
+                             target=quorum_repair_img, format=iotests.imgfmt)
+        self.assert_qmp(result, 'error/class', 'GenericError')
+
+    def test_after_a_quorum_snapshot(self):
+        result = self.vm.qmp('blockdev-snapshot-sync', node_name='img1',
+                             snapshot_file=quorum_snapshot_file,
+                             snapshot_node_name="snap1");
+
+        result = self.vm.qmp('drive-mirror', device='quorum0', sync='full',
+                             node_name='repair0',
+                             replaces="img1",
+                             target=quorum_repair_img, format=iotests.imgfmt)
+        self.assert_qmp(result, 'error/class', 'GenericError')
+
+        result = self.vm.qmp('drive-mirror', device='quorum0', sync='full',
+                             node_name='repair0',
+                             replaces="snap1",
+                             target=quorum_repair_img, format=iotests.imgfmt)
+        self.assert_qmp(result, 'return', {})
+
+        self.complete_and_wait(drive="quorum0")
+        result = self.vm.qmp('query-named-block-nodes')
+        self.assert_qmp(result, 'return[0]/file', quorum_repair_img)
+        # TODO: a better test requiring some QEMU infrastructure will be added
+        #       to check that this file is really driven by quorum
+        self.vm.shutdown()
+
 if __name__ == '__main__':
     iotests.main(supported_fmts=['qcow2', 'qed'])
diff --git a/tests/qemu-iotests/041.out b/tests/qemu-iotests/041.out
index 6d9bee1..73e375a 100644
--- a/tests/qemu-iotests/041.out
+++ b/tests/qemu-iotests/041.out
@@ -1,5 +1,5 @@
-...........................
+......................................
 ----------------------------------------------------------------------
-Ran 27 tests
+Ran 38 tests
 
 OK
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [Qemu-devel] [PATCH v10 4/4] qemu-iotests: Add TestRepairQuorum to 041 to test drive-mirror node-name mode.
  2014-06-16 10:00 ` [Qemu-devel] [PATCH v10 4/4] qemu-iotests: Add TestRepairQuorum to 041 to test drive-mirror node-name mode Benoît Canet
@ 2014-06-17 16:17   ` Max Reitz
  2014-06-17 18:19     ` Benoît Canet
  2014-06-17 18:21   ` Max Reitz
  1 sibling, 1 reply; 17+ messages in thread
From: Max Reitz @ 2014-06-17 16:17 UTC (permalink / raw)
  To: Benoît Canet, qemu-devel; +Cc: kwolf, Benoit Canet, stefanha

On 16.06.2014 12:00, Benoît Canet wrote:
> The to-replace-node-name is designed to allow repairing a broken Quorum file.
> This patch introduces a new class TestRepairQuorum testing that the feature
> works.
> Some further work will be done on QEMU to improve the robustness of the tests.
>
> Signed-off-by: Benoit Canet <benoit@irqsave.net>
> ---
>   tests/qemu-iotests/041     | 196 ++++++++++++++++++++++++++++++++++++++++++++-
>   tests/qemu-iotests/041.out |   4 +-
>   2 files changed, 194 insertions(+), 6 deletions(-)

The test looks fine in general and it does work, so I would give it a 
reviewed-by. However, I still have some open questions from v9 about why 
this test works, so I'll still wait. ;-)

Max

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [Qemu-devel] [PATCH v10 4/4] qemu-iotests: Add TestRepairQuorum to 041 to test drive-mirror node-name mode.
  2014-06-17 16:17   ` Max Reitz
@ 2014-06-17 18:19     ` Benoît Canet
  2014-06-17 18:20       ` Max Reitz
  0 siblings, 1 reply; 17+ messages in thread
From: Benoît Canet @ 2014-06-17 18:19 UTC (permalink / raw)
  To: Max Reitz; +Cc: Benoît Canet, kwolf, qemu-devel, stefanha

The Tuesday 17 Jun 2014 à 18:17:46 (+0200), Max Reitz wrote :
> On 16.06.2014 12:00, Benoît Canet wrote:
> >The to-replace-node-name is designed to allow repairing a broken Quorum file.
> >This patch introduces a new class TestRepairQuorum testing that the feature
> >works.
> >Some further work will be done on QEMU to improve the robustness of the tests.
> >
> >Signed-off-by: Benoit Canet <benoit@irqsave.net>
> >---
> >  tests/qemu-iotests/041     | 196 ++++++++++++++++++++++++++++++++++++++++++++-
> >  tests/qemu-iotests/041.out |   4 +-
> >  2 files changed, 194 insertions(+), 6 deletions(-)
> 
> The test looks fine in general and it does work, so I would give it
> a reviewed-by. However, I still have some open questions from v9
> about why this test works, so I'll still wait. ;-)

It works because node name is a linked list so the insertion order make the
positions in the json predictible.


> 
> Max

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [Qemu-devel] [PATCH v10 4/4] qemu-iotests: Add TestRepairQuorum to 041 to test drive-mirror node-name mode.
  2014-06-17 18:19     ` Benoît Canet
@ 2014-06-17 18:20       ` Max Reitz
  0 siblings, 0 replies; 17+ messages in thread
From: Max Reitz @ 2014-06-17 18:20 UTC (permalink / raw)
  To: Benoît Canet; +Cc: kwolf, qemu-devel, stefanha

On 17.06.2014 20:19, Benoît Canet wrote:
> The Tuesday 17 Jun 2014 à 18:17:46 (+0200), Max Reitz wrote :
>> On 16.06.2014 12:00, Benoît Canet wrote:
>>> The to-replace-node-name is designed to allow repairing a broken Quorum file.
>>> This patch introduces a new class TestRepairQuorum testing that the feature
>>> works.
>>> Some further work will be done on QEMU to improve the robustness of the tests.
>>>
>>> Signed-off-by: Benoit Canet <benoit@irqsave.net>
>>> ---
>>>   tests/qemu-iotests/041     | 196 ++++++++++++++++++++++++++++++++++++++++++++-
>>>   tests/qemu-iotests/041.out |   4 +-
>>>   2 files changed, 194 insertions(+), 6 deletions(-)
>> The test looks fine in general and it does work, so I would give it
>> a reviewed-by. However, I still have some open questions from v9
>> about why this test works, so I'll still wait. ;-)
> It works because node name is a linked list so the insertion order make the
> positions in the json predictible.

Oh, sorry, I just saw your reply to my review for v9. Thank you. :-)

Max

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [Qemu-devel] [PATCH v10 4/4] qemu-iotests: Add TestRepairQuorum to 041 to test drive-mirror node-name mode.
  2014-06-16 10:00 ` [Qemu-devel] [PATCH v10 4/4] qemu-iotests: Add TestRepairQuorum to 041 to test drive-mirror node-name mode Benoît Canet
  2014-06-17 16:17   ` Max Reitz
@ 2014-06-17 18:21   ` Max Reitz
  1 sibling, 0 replies; 17+ messages in thread
From: Max Reitz @ 2014-06-17 18:21 UTC (permalink / raw)
  To: Benoît Canet, qemu-devel; +Cc: kwolf, Benoit Canet, stefanha

On 16.06.2014 12:00, Benoît Canet wrote:
> The to-replace-node-name is designed to allow repairing a broken Quorum file.
> This patch introduces a new class TestRepairQuorum testing that the feature
> works.
> Some further work will be done on QEMU to improve the robustness of the tests.
>
> Signed-off-by: Benoit Canet <benoit@irqsave.net>
> ---
>   tests/qemu-iotests/041     | 196 ++++++++++++++++++++++++++++++++++++++++++++-
>   tests/qemu-iotests/041.out |   4 +-
>   2 files changed, 194 insertions(+), 6 deletions(-)

Reviewed-by: Max Reitz <mreitz@redhat.com>

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [Qemu-devel] [PATCH v10 0/4] Quorum maintainance operations
  2014-06-16 10:00 [Qemu-devel] [PATCH v10 0/4] Quorum maintainance operations Benoît Canet
                   ` (3 preceding siblings ...)
  2014-06-16 10:00 ` [Qemu-devel] [PATCH v10 4/4] qemu-iotests: Add TestRepairQuorum to 041 to test drive-mirror node-name mode Benoît Canet
@ 2014-06-19  4:46 ` Stefan Hajnoczi
  2014-06-27 12:19 ` Kevin Wolf
  5 siblings, 0 replies; 17+ messages in thread
From: Stefan Hajnoczi @ 2014-06-19  4:46 UTC (permalink / raw)
  To: Benoît Canet; +Cc: kwolf, qemu-devel, stefanha, mreitz

[-- Attachment #1: Type: text/plain, Size: 1413 bytes --]

On Mon, Jun 16, 2014 at 12:00:53PM +0200, Benoît Canet wrote:
> in v10:
>         address max comments
> 
> Benoît Canet (4):
>   quorum: Add the rewrite-corrupted parameter to quorum
>   block: Add node-name argument to drive-mirror
>   block: Add replaces argument to drive-mirror
>   qemu-iotests: Add TestRepairQuorum to 041 to test drive-mirror
>     node-name mode.
> 
>  block.c                    |  17 ++++
>  block/mirror.c             |  60 +++++++++++---
>  block/quorum.c             |  97 ++++++++++++++++++++--
>  blockdev.c                 |  41 +++++++++-
>  hmp.c                      |   1 +
>  include/block/block.h      |   4 +
>  include/block/block_int.h  |   3 +
>  qapi/block-core.json       |  13 ++-
>  qmp-commands.hx            |   5 ++
>  tests/qemu-iotests/041     | 196 ++++++++++++++++++++++++++++++++++++++++++++-
>  tests/qemu-iotests/041.out |   4 +-
>  tests/qemu-iotests/081     |  15 +++-
>  tests/qemu-iotests/081.out |  10 +++
>  13 files changed, 436 insertions(+), 30 deletions(-)

I'm happy with this but need to review Jeff's series first.  Your series
assumes that nodes are protected by op blockers, which they currently
aren't (only the root node has op blockers).  So we need a solution to
that before it's safe to allow operations on nodes - I think Jeff's
series tackles that.

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>

[-- Attachment #2: Type: application/pgp-signature, Size: 473 bytes --]

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [Qemu-devel] [PATCH v10 3/4] block: Add replaces argument to drive-mirror
  2014-06-16 10:00 ` [Qemu-devel] [PATCH v10 3/4] block: Add replaces " Benoît Canet
@ 2014-06-19  9:19   ` Stefan Hajnoczi
  2014-06-27 11:57   ` Kevin Wolf
  1 sibling, 0 replies; 17+ messages in thread
From: Stefan Hajnoczi @ 2014-06-19  9:19 UTC (permalink / raw)
  To: Benoît Canet; +Cc: kwolf, mreitz, qemu-devel, stefanha, Benoit Canet

[-- Attachment #1: Type: text/plain, Size: 838 bytes --]

On Mon, Jun 16, 2014 at 12:00:56PM +0200, Benoît Canet wrote:
> +BlockDriverState *check_to_replace_node(const char *node_name, Error **errp)
> +{
> +    BlockDriverState *to_replace_bs = bdrv_find_node(node_name);
> +    if (!to_replace_bs) {
> +        error_setg(errp, "Node name '%s' not found",
> +                   node_name);
> +        return NULL;
> +    }
> +
> +    if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) {
> +        return NULL;
> +    }

Currently bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)
is not effective since child nodes never have op blockers.  Only the
root node/drive has op blockers.

We discussed propagating blockers to child nodes.  Can you send a patch
to address this?  I think Jeff will need to rebase his node-name series
too.

Stefan

[-- Attachment #2: Type: application/pgp-signature, Size: 473 bytes --]

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [Qemu-devel] [PATCH v10 3/4] block: Add replaces argument to drive-mirror
  2014-06-16 10:00 ` [Qemu-devel] [PATCH v10 3/4] block: Add replaces " Benoît Canet
  2014-06-19  9:19   ` Stefan Hajnoczi
@ 2014-06-27 11:57   ` Kevin Wolf
  2014-06-27 12:03     ` Benoît Canet
  2014-06-27 12:53     ` Benoît Canet
  1 sibling, 2 replies; 17+ messages in thread
From: Kevin Wolf @ 2014-06-27 11:57 UTC (permalink / raw)
  To: Benoît Canet; +Cc: Benoit Canet, qemu-devel, stefanha, mreitz

Am 16.06.2014 um 12:00 hat Benoît Canet geschrieben:
> drive-mirror will bdrv_swap the new BDS named node-name with the one
> pointed by replaces when the mirroring is finished.
> 
> Signed-off-by: Benoit Canet <benoit@irqsave.net>
> Reviewed-by: Max Reitz <mreitz@redhat.com>
> ---
>  block.c                   | 17 ++++++++++++++
>  block/mirror.c            | 60 +++++++++++++++++++++++++++++++++++++----------
>  blockdev.c                | 30 +++++++++++++++++++++++-
>  hmp.c                     |  2 +-
>  include/block/block.h     |  4 ++++
>  include/block/block_int.h |  3 +++
>  qapi/block-core.json      |  6 ++++-
>  qmp-commands.hx           |  4 +++-
>  8 files changed, 109 insertions(+), 17 deletions(-)
> 
> diff --git a/block.c b/block.c
> index 17f763d..318f1e6 100644
> --- a/block.c
> +++ b/block.c
> @@ -5795,3 +5795,20 @@ bool bdrv_is_first_non_filter(BlockDriverState *candidate)
>  
>      return false;
>  }
> +
> +BlockDriverState *check_to_replace_node(const char *node_name, Error **errp)
> +{
> +    BlockDriverState *to_replace_bs = bdrv_find_node(node_name);
> +    if (!to_replace_bs) {
> +        error_setg(errp, "Node name '%s' not found",
> +                   node_name);

Unnecessary line break.

> +        return NULL;
> +    }
> +
> +    if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) {
> +        return NULL;
> +    }
> +
> +    return to_replace_bs;
> +}
> +

Empty line before EOF.

> diff --git a/block/mirror.c b/block/mirror.c
> index 94c8661..151167e 100644
> --- a/block/mirror.c
> +++ b/block/mirror.c
> @@ -32,6 +32,12 @@ typedef struct MirrorBlockJob {
>      RateLimit limit;
>      BlockDriverState *target;
>      BlockDriverState *base;
> +    /* The name of the graph node to replace */
> +    char *replaces;
> +    /* The BDS to replace */
> +    BlockDriverState *to_replace;
> +    /* Used to block operations on the drive-mirror-replace target */
> +    Error *replace_blocker;
>      bool is_none_mode;
>      BlockdevOnError on_source_error, on_target_error;
>      bool synced;
> @@ -490,10 +496,14 @@ immediate_exit:
>      bdrv_release_dirty_bitmap(bs, s->dirty_bitmap);
>      bdrv_iostatus_disable(s->target);
>      if (s->should_complete && ret == 0) {
> -        if (bdrv_get_flags(s->target) != bdrv_get_flags(s->common.bs)) {
> -            bdrv_reopen(s->target, bdrv_get_flags(s->common.bs), NULL);
> +        BlockDriverState *to_replace = s->common.bs;
> +        if (s->to_replace) {
> +            to_replace = s->to_replace;
>          }
> -        bdrv_swap(s->target, s->common.bs);
> +        if (bdrv_get_flags(s->target) != bdrv_get_flags(to_replace)) {
> +            bdrv_reopen(s->target, bdrv_get_flags(to_replace), NULL);
> +        }
> +        bdrv_swap(s->target, to_replace);
>          if (s->common.driver->job_type == BLOCK_JOB_TYPE_COMMIT) {
>              /* drop the bs loop chain formed by the swap: break the loop then
>               * trigger the unref from the top one */
> @@ -502,6 +512,12 @@ immediate_exit:
>              bdrv_unref(p);
>          }
>      }
> +    if (s->to_replace) {
> +        bdrv_op_unblock_all(s->to_replace, s->replace_blocker);
> +        error_free(s->replace_blocker);
> +        bdrv_unref(s->to_replace);
> +    }
> +    g_free(s->replaces);
>      bdrv_unref(s->target);
>      block_job_completed(&s->common, ret);
>  }
> @@ -540,6 +556,20 @@ static void mirror_complete(BlockJob *job, Error **errp)
>          return;
>      }
>  
> +    /* check the target bs is not blocked and block all operations on it */
> +    if (s->replaces) {
> +        s->to_replace = check_to_replace_node(s->replaces, errp);
> +

This empty line looks unusual.

> +        if (!s->to_replace) {
> +            return;
> +        }

So here is the thing that I really wanted to comment on. In the case of
a REPLACE blocker being set, this is a silent failure. The completion
command will return success, but s->should_complete won't actually be
set, so the completion doesn't happen. The only thing that actually
happens is the bdrv_open_backing_file(s->target) (which looks somewhat
questionable, too...)

Now I would expect that the REPLACE blocker is actually set for any
backing file, because that is what bdrv_set_backing_hd() does. For
quorum it does work as expected because quorum children don't get any
backing_blocker (we need to check whether they should get something
similar from the quorum BDS), so this is probably why it escaped your
testing. We'll need a test case that tries replacing some ordinary
backing file.

Now I think the (accidental?) restriction to only replacing quorum nodes
actually makes this patch pretty safe, so maybe it would be nice to keep
this behaviour; but we need to fix it to not fail silently but return an
explicit error.

> +        error_setg(&s->replace_blocker,
> +                   "block device is in use by block-job-complete");
> +        bdrv_op_block_all(s->to_replace, s->replace_blocker);
> +        bdrv_ref(s->to_replace);
> +    }
> +
>      s->should_complete = true;
>      block_job_resume(job);
>  }
> @@ -562,14 +592,15 @@ static const BlockJobDriver commit_active_job_driver = {
>  };
>  
>  static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target,
> -                            int64_t speed, int64_t granularity,
> -                            int64_t buf_size,
> -                            BlockdevOnError on_source_error,
> -                            BlockdevOnError on_target_error,
> -                            BlockDriverCompletionFunc *cb,
> -                            void *opaque, Error **errp,
> -                            const BlockJobDriver *driver,
> -                            bool is_none_mode, BlockDriverState *base)
> +                             const char *replaces,
> +                             int64_t speed, int64_t granularity,
> +                             int64_t buf_size,
> +                             BlockdevOnError on_source_error,
> +                             BlockdevOnError on_target_error,
> +                             BlockDriverCompletionFunc *cb,
> +                             void *opaque, Error **errp,
> +                             const BlockJobDriver *driver,
> +                             bool is_none_mode, BlockDriverState *base)
>  {
>      MirrorBlockJob *s;
>  
> @@ -600,6 +631,7 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target,
>          return;
>      }
>  
> +    s->replaces = g_strdup(replaces);
>      s->on_source_error = on_source_error;
>      s->on_target_error = on_target_error;
>      s->target = target;

One design question that isn't quite clear to me yet is why you resolve
the device name only in mirror_complete() and not here. This means that
the drive-mirror QMP command can refer to one BDS with node-name foo,
which then gets removed and another BDS with node-name foo is added, and
then it would refer to the new BDS on completion time.

I would find it less surprising if we took a reference to the old BDS
here so that you can't remove it. Perhaps setting the replace_blocker
here already would be safer, too.

> @@ -621,6 +653,7 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target,
>  }
>  
>  void mirror_start(BlockDriverState *bs, BlockDriverState *target,
> +                  const char *replaces,
>                    int64_t speed, int64_t granularity, int64_t buf_size,
>                    MirrorSyncMode mode, BlockdevOnError on_source_error,
>                    BlockdevOnError on_target_error,
> @@ -632,7 +665,8 @@ void mirror_start(BlockDriverState *bs, BlockDriverState *target,
>  
>      is_none_mode = mode == MIRROR_SYNC_MODE_NONE;
>      base = mode == MIRROR_SYNC_MODE_TOP ? bs->backing_hd : NULL;
> -    mirror_start_job(bs, target, speed, granularity, buf_size,
> +    mirror_start_job(bs, target, replaces,
> +                     speed, granularity, buf_size,
>                       on_source_error, on_target_error, cb, opaque, errp,
>                       &mirror_job_driver, is_none_mode, base);
>  }
> @@ -680,7 +714,7 @@ void commit_active_start(BlockDriverState *bs, BlockDriverState *base,
>      }
>  
>      bdrv_ref(base);
> -    mirror_start_job(bs, base, speed, 0, 0,
> +    mirror_start_job(bs, base, NULL, speed, 0, 0,
>                       on_error, on_error, cb, opaque, &local_err,
>                       &commit_active_job_driver, false, base);
>      if (local_err) {
> diff --git a/blockdev.c b/blockdev.c
> index 06b14f2..237a548 100644
> --- a/blockdev.c
> +++ b/blockdev.c
> @@ -2107,6 +2107,7 @@ BlockDeviceInfoList *qmp_query_named_block_nodes(Error **errp)
>  void qmp_drive_mirror(const char *device, const char *target,
>                        bool has_format, const char *format,
>                        bool has_node_name, const char *node_name,
> +                      bool has_replaces, const char *replaces,
>                        enum MirrorSyncMode sync,
>                        bool has_mode, enum NewImageMode mode,
>                        bool has_speed, int64_t speed,
> @@ -2194,6 +2195,28 @@ void qmp_drive_mirror(const char *device, const char *target,
>          return;
>      }
>  
> +    if (has_replaces) {
> +        BlockDriverState *to_replace_bs;
> +
> +        if (!has_node_name) {
> +            error_setg(errp, "a node-name must be provided when replacing a"
> +                             " named node of the graph");
> +            return;
> +        }
> +
> +        to_replace_bs = check_to_replace_node(replaces, errp);
> +
> +        if (!to_replace_bs) {
> +            return;
> +        }
> +
> +        if (size != bdrv_getlength(to_replace_bs)) {
> +            error_setg(errp, "cannot replace image with a mirror image of "
> +                             "different size");
> +            return;
> +        }

We may want to loosen some of these restrictions later, but it's good to
start with more restrictions if in doubt.

> +    }
> +
>      if ((sync == MIRROR_SYNC_MODE_FULL || !source)
>          && mode != NEW_IMAGE_MODE_EXISTING)
>      {

Kevin

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [Qemu-devel] [PATCH v10 3/4] block: Add replaces argument to drive-mirror
  2014-06-27 11:57   ` Kevin Wolf
@ 2014-06-27 12:03     ` Benoît Canet
  2014-06-27 12:53     ` Benoît Canet
  1 sibling, 0 replies; 17+ messages in thread
From: Benoît Canet @ 2014-06-27 12:03 UTC (permalink / raw)
  To: Kevin Wolf; +Cc: Benoît Canet, qemu-devel, stefanha, mreitz

The Friday 27 Jun 2014 à 13:57:02 (+0200), Kevin Wolf wrote :
> Am 16.06.2014 um 12:00 hat Benoît Canet geschrieben:
> > drive-mirror will bdrv_swap the new BDS named node-name with the one
> > pointed by replaces when the mirroring is finished.
> > 
> > Signed-off-by: Benoit Canet <benoit@irqsave.net>
> > Reviewed-by: Max Reitz <mreitz@redhat.com>
> > ---
> >  block.c                   | 17 ++++++++++++++
> >  block/mirror.c            | 60 +++++++++++++++++++++++++++++++++++++----------
> >  blockdev.c                | 30 +++++++++++++++++++++++-
> >  hmp.c                     |  2 +-
> >  include/block/block.h     |  4 ++++
> >  include/block/block_int.h |  3 +++
> >  qapi/block-core.json      |  6 ++++-
> >  qmp-commands.hx           |  4 +++-
> >  8 files changed, 109 insertions(+), 17 deletions(-)
> > 
> > diff --git a/block.c b/block.c
> > index 17f763d..318f1e6 100644
> > --- a/block.c
> > +++ b/block.c
> > @@ -5795,3 +5795,20 @@ bool bdrv_is_first_non_filter(BlockDriverState *candidate)
> >  
> >      return false;
> >  }
> > +
> > +BlockDriverState *check_to_replace_node(const char *node_name, Error **errp)
> > +{
> > +    BlockDriverState *to_replace_bs = bdrv_find_node(node_name);
> > +    if (!to_replace_bs) {
> > +        error_setg(errp, "Node name '%s' not found",
> > +                   node_name);
> 
> Unnecessary line break.
> 
> > +        return NULL;
> > +    }
> > +
> > +    if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) {
> > +        return NULL;
> > +    }
> > +
> > +    return to_replace_bs;
> > +}
> > +
> 
> Empty line before EOF.
> 
> > diff --git a/block/mirror.c b/block/mirror.c
> > index 94c8661..151167e 100644
> > --- a/block/mirror.c
> > +++ b/block/mirror.c
> > @@ -32,6 +32,12 @@ typedef struct MirrorBlockJob {
> >      RateLimit limit;
> >      BlockDriverState *target;
> >      BlockDriverState *base;
> > +    /* The name of the graph node to replace */
> > +    char *replaces;
> > +    /* The BDS to replace */
> > +    BlockDriverState *to_replace;
> > +    /* Used to block operations on the drive-mirror-replace target */
> > +    Error *replace_blocker;
> >      bool is_none_mode;
> >      BlockdevOnError on_source_error, on_target_error;
> >      bool synced;
> > @@ -490,10 +496,14 @@ immediate_exit:
> >      bdrv_release_dirty_bitmap(bs, s->dirty_bitmap);
> >      bdrv_iostatus_disable(s->target);
> >      if (s->should_complete && ret == 0) {
> > -        if (bdrv_get_flags(s->target) != bdrv_get_flags(s->common.bs)) {
> > -            bdrv_reopen(s->target, bdrv_get_flags(s->common.bs), NULL);
> > +        BlockDriverState *to_replace = s->common.bs;
> > +        if (s->to_replace) {
> > +            to_replace = s->to_replace;
> >          }
> > -        bdrv_swap(s->target, s->common.bs);
> > +        if (bdrv_get_flags(s->target) != bdrv_get_flags(to_replace)) {
> > +            bdrv_reopen(s->target, bdrv_get_flags(to_replace), NULL);
> > +        }
> > +        bdrv_swap(s->target, to_replace);
> >          if (s->common.driver->job_type == BLOCK_JOB_TYPE_COMMIT) {
> >              /* drop the bs loop chain formed by the swap: break the loop then
> >               * trigger the unref from the top one */
> > @@ -502,6 +512,12 @@ immediate_exit:
> >              bdrv_unref(p);
> >          }
> >      }
> > +    if (s->to_replace) {
> > +        bdrv_op_unblock_all(s->to_replace, s->replace_blocker);
> > +        error_free(s->replace_blocker);
> > +        bdrv_unref(s->to_replace);
> > +    }
> > +    g_free(s->replaces);
> >      bdrv_unref(s->target);
> >      block_job_completed(&s->common, ret);
> >  }
> > @@ -540,6 +556,20 @@ static void mirror_complete(BlockJob *job, Error **errp)
> >          return;
> >      }
> >  
> > +    /* check the target bs is not blocked and block all operations on it */
> > +    if (s->replaces) {
> > +        s->to_replace = check_to_replace_node(s->replaces, errp);
> > +
> 
> This empty line looks unusual.
> 
> > +        if (!s->to_replace) {
> > +            return;
> > +        }
> 
> So here is the thing that I really wanted to comment on. In the case of
> a REPLACE blocker being set, this is a silent failure. The completion
> command will return success, but s->should_complete won't actually be
> set, so the completion doesn't happen. The only thing that actually
> happens is the bdrv_open_backing_file(s->target) (which looks somewhat
> questionable, too...)
> 
> Now I would expect that the REPLACE blocker is actually set for any
> backing file, because that is what bdrv_set_backing_hd() does. For
> quorum it does work as expected because quorum children don't get any
> backing_blocker (we need to check whether they should get something
> similar from the quorum BDS), so this is probably why it escaped your
> testing. We'll need a test case that tries replacing some ordinary
> backing file.
> 
> Now I think the (accidental?) restriction to only replacing quorum nodes
> actually makes this patch pretty safe, so maybe it would be nice to keep
> this behaviour; but we need to fix it to not fail silently but return an
> explicit error.
> 
> > +        error_setg(&s->replace_blocker,
> > +                   "block device is in use by block-job-complete");
> > +        bdrv_op_block_all(s->to_replace, s->replace_blocker);
> > +        bdrv_ref(s->to_replace);
> > +    }
> > +
> >      s->should_complete = true;
> >      block_job_resume(job);
> >  }
> > @@ -562,14 +592,15 @@ static const BlockJobDriver commit_active_job_driver = {
> >  };
> >  
> >  static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target,
> > -                            int64_t speed, int64_t granularity,
> > -                            int64_t buf_size,
> > -                            BlockdevOnError on_source_error,
> > -                            BlockdevOnError on_target_error,
> > -                            BlockDriverCompletionFunc *cb,
> > -                            void *opaque, Error **errp,
> > -                            const BlockJobDriver *driver,
> > -                            bool is_none_mode, BlockDriverState *base)
> > +                             const char *replaces,
> > +                             int64_t speed, int64_t granularity,
> > +                             int64_t buf_size,
> > +                             BlockdevOnError on_source_error,
> > +                             BlockdevOnError on_target_error,
> > +                             BlockDriverCompletionFunc *cb,
> > +                             void *opaque, Error **errp,
> > +                             const BlockJobDriver *driver,
> > +                             bool is_none_mode, BlockDriverState *base)
> >  {
> >      MirrorBlockJob *s;
> >  
> > @@ -600,6 +631,7 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target,
> >          return;
> >      }
> >  
> > +    s->replaces = g_strdup(replaces);
> >      s->on_source_error = on_source_error;
> >      s->on_target_error = on_target_error;
> >      s->target = target;
> 
> One design question that isn't quite clear to me yet is why you resolve
> the device name only in mirror_complete() and not here. This means that
> the drive-mirror QMP command can refer to one BDS with node-name foo,
> which then gets removed and another BDS with node-name foo is added, and
> then it would refer to the new BDS on completion time.
> 
> I would find it less surprising if we took a reference to the old BDS
> here so that you can't remove it. Perhaps setting the replace_blocker
> here already would be safer, too.

I have done late binding because this allow the code to block the to replace
BDS late. If I get a reference to it early the code must block it early.

> 
> > @@ -621,6 +653,7 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target,
> >  }
> >  
> >  void mirror_start(BlockDriverState *bs, BlockDriverState *target,
> > +                  const char *replaces,
> >                    int64_t speed, int64_t granularity, int64_t buf_size,
> >                    MirrorSyncMode mode, BlockdevOnError on_source_error,
> >                    BlockdevOnError on_target_error,
> > @@ -632,7 +665,8 @@ void mirror_start(BlockDriverState *bs, BlockDriverState *target,
> >  
> >      is_none_mode = mode == MIRROR_SYNC_MODE_NONE;
> >      base = mode == MIRROR_SYNC_MODE_TOP ? bs->backing_hd : NULL;
> > -    mirror_start_job(bs, target, speed, granularity, buf_size,
> > +    mirror_start_job(bs, target, replaces,
> > +                     speed, granularity, buf_size,
> >                       on_source_error, on_target_error, cb, opaque, errp,
> >                       &mirror_job_driver, is_none_mode, base);
> >  }
> > @@ -680,7 +714,7 @@ void commit_active_start(BlockDriverState *bs, BlockDriverState *base,
> >      }
> >  
> >      bdrv_ref(base);
> > -    mirror_start_job(bs, base, speed, 0, 0,
> > +    mirror_start_job(bs, base, NULL, speed, 0, 0,
> >                       on_error, on_error, cb, opaque, &local_err,
> >                       &commit_active_job_driver, false, base);
> >      if (local_err) {
> > diff --git a/blockdev.c b/blockdev.c
> > index 06b14f2..237a548 100644
> > --- a/blockdev.c
> > +++ b/blockdev.c
> > @@ -2107,6 +2107,7 @@ BlockDeviceInfoList *qmp_query_named_block_nodes(Error **errp)
> >  void qmp_drive_mirror(const char *device, const char *target,
> >                        bool has_format, const char *format,
> >                        bool has_node_name, const char *node_name,
> > +                      bool has_replaces, const char *replaces,
> >                        enum MirrorSyncMode sync,
> >                        bool has_mode, enum NewImageMode mode,
> >                        bool has_speed, int64_t speed,
> > @@ -2194,6 +2195,28 @@ void qmp_drive_mirror(const char *device, const char *target,
> >          return;
> >      }
> >  
> > +    if (has_replaces) {
> > +        BlockDriverState *to_replace_bs;
> > +
> > +        if (!has_node_name) {
> > +            error_setg(errp, "a node-name must be provided when replacing a"
> > +                             " named node of the graph");
> > +            return;
> > +        }
> > +
> > +        to_replace_bs = check_to_replace_node(replaces, errp);
> > +
> > +        if (!to_replace_bs) {
> > +            return;
> > +        }
> > +
> > +        if (size != bdrv_getlength(to_replace_bs)) {
> > +            error_setg(errp, "cannot replace image with a mirror image of "
> > +                             "different size");
> > +            return;
> > +        }
> 
> We may want to loosen some of these restrictions later, but it's good to
> start with more restrictions if in doubt.
> 
> > +    }
> > +
> >      if ((sync == MIRROR_SYNC_MODE_FULL || !source)
> >          && mode != NEW_IMAGE_MODE_EXISTING)
> >      {
> 
> Kevin

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [Qemu-devel] [PATCH v10 0/4] Quorum maintainance operations
  2014-06-16 10:00 [Qemu-devel] [PATCH v10 0/4] Quorum maintainance operations Benoît Canet
                   ` (4 preceding siblings ...)
  2014-06-19  4:46 ` [Qemu-devel] [PATCH v10 0/4] Quorum maintainance operations Stefan Hajnoczi
@ 2014-06-27 12:19 ` Kevin Wolf
  5 siblings, 0 replies; 17+ messages in thread
From: Kevin Wolf @ 2014-06-27 12:19 UTC (permalink / raw)
  To: Benoît Canet; +Cc: qemu-devel, stefanha, mreitz

Am 16.06.2014 um 12:00 hat Benoît Canet geschrieben:
> in v10:
>         address max comments
> 
> Benoît Canet (4):
>   quorum: Add the rewrite-corrupted parameter to quorum
>   block: Add node-name argument to drive-mirror
>   block: Add replaces argument to drive-mirror
>   qemu-iotests: Add TestRepairQuorum to 041 to test drive-mirror
>     node-name mode.

Thanks, applied patches 1 and 2. There are still some open points for
the node replacement in mirroring.

Kevin

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [Qemu-devel] [PATCH v10 3/4] block: Add replaces argument to drive-mirror
  2014-06-27 11:57   ` Kevin Wolf
  2014-06-27 12:03     ` Benoît Canet
@ 2014-06-27 12:53     ` Benoît Canet
  2014-06-27 13:37       ` Kevin Wolf
  1 sibling, 1 reply; 17+ messages in thread
From: Benoît Canet @ 2014-06-27 12:53 UTC (permalink / raw)
  To: Kevin Wolf; +Cc: Benoît Canet, qemu-devel, stefanha, mreitz

The Friday 27 Jun 2014 à 13:57:02 (+0200), Kevin Wolf wrote :
> Am 16.06.2014 um 12:00 hat Benoît Canet geschrieben:
> > drive-mirror will bdrv_swap the new BDS named node-name with the one
> > pointed by replaces when the mirroring is finished.
> > 
> > Signed-off-by: Benoit Canet <benoit@irqsave.net>
> > Reviewed-by: Max Reitz <mreitz@redhat.com>
> > ---
> >  block.c                   | 17 ++++++++++++++
> >  block/mirror.c            | 60 +++++++++++++++++++++++++++++++++++++----------
> >  blockdev.c                | 30 +++++++++++++++++++++++-
> >  hmp.c                     |  2 +-
> >  include/block/block.h     |  4 ++++
> >  include/block/block_int.h |  3 +++
> >  qapi/block-core.json      |  6 ++++-
> >  qmp-commands.hx           |  4 +++-
> >  8 files changed, 109 insertions(+), 17 deletions(-)
> > 
> > diff --git a/block.c b/block.c
> > index 17f763d..318f1e6 100644
> > --- a/block.c
> > +++ b/block.c
> > @@ -5795,3 +5795,20 @@ bool bdrv_is_first_non_filter(BlockDriverState *candidate)
> >  
> >      return false;
> >  }
> > +
> > +BlockDriverState *check_to_replace_node(const char *node_name, Error **errp)
> > +{
> > +    BlockDriverState *to_replace_bs = bdrv_find_node(node_name);
> > +    if (!to_replace_bs) {
> > +        error_setg(errp, "Node name '%s' not found",
> > +                   node_name);
> 
> Unnecessary line break.
> 
> > +        return NULL;
> > +    }
> > +
> > +    if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) {
> > +        return NULL;
> > +    }
> > +
> > +    return to_replace_bs;
> > +}
> > +
> 
> Empty line before EOF.
> 
> > diff --git a/block/mirror.c b/block/mirror.c
> > index 94c8661..151167e 100644
> > --- a/block/mirror.c
> > +++ b/block/mirror.c
> > @@ -32,6 +32,12 @@ typedef struct MirrorBlockJob {
> >      RateLimit limit;
> >      BlockDriverState *target;
> >      BlockDriverState *base;
> > +    /* The name of the graph node to replace */
> > +    char *replaces;
> > +    /* The BDS to replace */
> > +    BlockDriverState *to_replace;
> > +    /* Used to block operations on the drive-mirror-replace target */
> > +    Error *replace_blocker;
> >      bool is_none_mode;
> >      BlockdevOnError on_source_error, on_target_error;
> >      bool synced;
> > @@ -490,10 +496,14 @@ immediate_exit:
> >      bdrv_release_dirty_bitmap(bs, s->dirty_bitmap);
> >      bdrv_iostatus_disable(s->target);
> >      if (s->should_complete && ret == 0) {
> > -        if (bdrv_get_flags(s->target) != bdrv_get_flags(s->common.bs)) {
> > -            bdrv_reopen(s->target, bdrv_get_flags(s->common.bs), NULL);
> > +        BlockDriverState *to_replace = s->common.bs;
> > +        if (s->to_replace) {
> > +            to_replace = s->to_replace;
> >          }
> > -        bdrv_swap(s->target, s->common.bs);
> > +        if (bdrv_get_flags(s->target) != bdrv_get_flags(to_replace)) {
> > +            bdrv_reopen(s->target, bdrv_get_flags(to_replace), NULL);
> > +        }
> > +        bdrv_swap(s->target, to_replace);
> >          if (s->common.driver->job_type == BLOCK_JOB_TYPE_COMMIT) {
> >              /* drop the bs loop chain formed by the swap: break the loop then
> >               * trigger the unref from the top one */
> > @@ -502,6 +512,12 @@ immediate_exit:
> >              bdrv_unref(p);
> >          }
> >      }
> > +    if (s->to_replace) {
> > +        bdrv_op_unblock_all(s->to_replace, s->replace_blocker);
> > +        error_free(s->replace_blocker);
> > +        bdrv_unref(s->to_replace);
> > +    }
> > +    g_free(s->replaces);
> >      bdrv_unref(s->target);
> >      block_job_completed(&s->common, ret);
> >  }
> > @@ -540,6 +556,20 @@ static void mirror_complete(BlockJob *job, Error **errp)
> >          return;
> >      }
> >  
> > +    /* check the target bs is not blocked and block all operations on it */
> > +    if (s->replaces) {
> > +        s->to_replace = check_to_replace_node(s->replaces, errp);
> > +
> 
> This empty line looks unusual.
> 
> > +        if (!s->to_replace) {
> > +            return;
> > +        }
> 
> So here is the thing that I really wanted to comment on. In the case of
> a REPLACE blocker being set, this is a silent failure.

Why would it be silent ?
errp is directly passed to check_to_replace_node.

> The completion
> command will return success, but s->should_complete won't actually be
> set, so the completion doesn't happen. The only thing that actually
> happens is the bdrv_open_backing_file(s->target) (which looks somewhat
> questionable, too...)
> 
> Now I would expect that the REPLACE blocker is actually set for any
> backing file, because that is what bdrv_set_backing_hd() does. For
> quorum it does work as expected because quorum children don't get any
> backing_blocker (we need to check whether they should get something
> similar from the quorum BDS), so this is probably why it escaped your
> testing. We'll need a test case that tries replacing some ordinary
> backing file.
> 
> Now I think the (accidental?) restriction to only replacing quorum nodes
> actually makes this patch pretty safe, so maybe it would be nice to keep
> this behaviour; but we need to fix it to not fail silently but return an
> explicit error.
> 
> > +        error_setg(&s->replace_blocker,
> > +                   "block device is in use by block-job-complete");
> > +        bdrv_op_block_all(s->to_replace, s->replace_blocker);
> > +        bdrv_ref(s->to_replace);
> > +    }
> > +
> >      s->should_complete = true;
> >      block_job_resume(job);
> >  }
> > @@ -562,14 +592,15 @@ static const BlockJobDriver commit_active_job_driver = {
> >  };
> >  
> >  static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target,
> > -                            int64_t speed, int64_t granularity,
> > -                            int64_t buf_size,
> > -                            BlockdevOnError on_source_error,
> > -                            BlockdevOnError on_target_error,
> > -                            BlockDriverCompletionFunc *cb,
> > -                            void *opaque, Error **errp,
> > -                            const BlockJobDriver *driver,
> > -                            bool is_none_mode, BlockDriverState *base)
> > +                             const char *replaces,
> > +                             int64_t speed, int64_t granularity,
> > +                             int64_t buf_size,
> > +                             BlockdevOnError on_source_error,
> > +                             BlockdevOnError on_target_error,
> > +                             BlockDriverCompletionFunc *cb,
> > +                             void *opaque, Error **errp,
> > +                             const BlockJobDriver *driver,
> > +                             bool is_none_mode, BlockDriverState *base)
> >  {
> >      MirrorBlockJob *s;
> >  
> > @@ -600,6 +631,7 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target,
> >          return;
> >      }
> >  
> > +    s->replaces = g_strdup(replaces);
> >      s->on_source_error = on_source_error;
> >      s->on_target_error = on_target_error;
> >      s->target = target;
> 
> One design question that isn't quite clear to me yet is why you resolve
> the device name only in mirror_complete() and not here. This means that
> the drive-mirror QMP command can refer to one BDS with node-name foo,
> which then gets removed and another BDS with node-name foo is added, and
> then it would refer to the new BDS on completion time.
> 
> I would find it less surprising if we took a reference to the old BDS
> here so that you can't remove it. Perhaps setting the replace_blocker
> here already would be safer, too.
> 
> > @@ -621,6 +653,7 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target,
> >  }
> >  
> >  void mirror_start(BlockDriverState *bs, BlockDriverState *target,
> > +                  const char *replaces,
> >                    int64_t speed, int64_t granularity, int64_t buf_size,
> >                    MirrorSyncMode mode, BlockdevOnError on_source_error,
> >                    BlockdevOnError on_target_error,
> > @@ -632,7 +665,8 @@ void mirror_start(BlockDriverState *bs, BlockDriverState *target,
> >  
> >      is_none_mode = mode == MIRROR_SYNC_MODE_NONE;
> >      base = mode == MIRROR_SYNC_MODE_TOP ? bs->backing_hd : NULL;
> > -    mirror_start_job(bs, target, speed, granularity, buf_size,
> > +    mirror_start_job(bs, target, replaces,
> > +                     speed, granularity, buf_size,
> >                       on_source_error, on_target_error, cb, opaque, errp,
> >                       &mirror_job_driver, is_none_mode, base);
> >  }
> > @@ -680,7 +714,7 @@ void commit_active_start(BlockDriverState *bs, BlockDriverState *base,
> >      }
> >  
> >      bdrv_ref(base);
> > -    mirror_start_job(bs, base, speed, 0, 0,
> > +    mirror_start_job(bs, base, NULL, speed, 0, 0,
> >                       on_error, on_error, cb, opaque, &local_err,
> >                       &commit_active_job_driver, false, base);
> >      if (local_err) {
> > diff --git a/blockdev.c b/blockdev.c
> > index 06b14f2..237a548 100644
> > --- a/blockdev.c
> > +++ b/blockdev.c
> > @@ -2107,6 +2107,7 @@ BlockDeviceInfoList *qmp_query_named_block_nodes(Error **errp)
> >  void qmp_drive_mirror(const char *device, const char *target,
> >                        bool has_format, const char *format,
> >                        bool has_node_name, const char *node_name,
> > +                      bool has_replaces, const char *replaces,
> >                        enum MirrorSyncMode sync,
> >                        bool has_mode, enum NewImageMode mode,
> >                        bool has_speed, int64_t speed,
> > @@ -2194,6 +2195,28 @@ void qmp_drive_mirror(const char *device, const char *target,
> >          return;
> >      }
> >  
> > +    if (has_replaces) {
> > +        BlockDriverState *to_replace_bs;
> > +
> > +        if (!has_node_name) {
> > +            error_setg(errp, "a node-name must be provided when replacing a"
> > +                             " named node of the graph");
> > +            return;
> > +        }
> > +
> > +        to_replace_bs = check_to_replace_node(replaces, errp);
> > +
> > +        if (!to_replace_bs) {
> > +            return;
> > +        }
> > +
> > +        if (size != bdrv_getlength(to_replace_bs)) {
> > +            error_setg(errp, "cannot replace image with a mirror image of "
> > +                             "different size");
> > +            return;
> > +        }
> 
> We may want to loosen some of these restrictions later, but it's good to
> start with more restrictions if in doubt.
> 
> > +    }
> > +
> >      if ((sync == MIRROR_SYNC_MODE_FULL || !source)
> >          && mode != NEW_IMAGE_MODE_EXISTING)
> >      {
> 
> Kevin

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [Qemu-devel] [PATCH v10 3/4] block: Add replaces argument to drive-mirror
  2014-06-27 12:53     ` Benoît Canet
@ 2014-06-27 13:37       ` Kevin Wolf
  2014-06-27 14:30         ` Benoît Canet
  0 siblings, 1 reply; 17+ messages in thread
From: Kevin Wolf @ 2014-06-27 13:37 UTC (permalink / raw)
  To: Benoît Canet; +Cc: qemu-devel, stefanha, mreitz

Am 27.06.2014 um 14:53 hat Benoît Canet geschrieben:
> The Friday 27 Jun 2014 à 13:57:02 (+0200), Kevin Wolf wrote :
> > Am 16.06.2014 um 12:00 hat Benoît Canet geschrieben:
> > > drive-mirror will bdrv_swap the new BDS named node-name with the one
> > > pointed by replaces when the mirroring is finished.
> > > 
> > > Signed-off-by: Benoit Canet <benoit@irqsave.net>
> > > Reviewed-by: Max Reitz <mreitz@redhat.com>
> > > ---
> > >  block.c                   | 17 ++++++++++++++
> > >  block/mirror.c            | 60 +++++++++++++++++++++++++++++++++++++----------
> > >  blockdev.c                | 30 +++++++++++++++++++++++-
> > >  hmp.c                     |  2 +-
> > >  include/block/block.h     |  4 ++++
> > >  include/block/block_int.h |  3 +++
> > >  qapi/block-core.json      |  6 ++++-
> > >  qmp-commands.hx           |  4 +++-
> > >  8 files changed, 109 insertions(+), 17 deletions(-)
> > > 
> > > diff --git a/block.c b/block.c
> > > index 17f763d..318f1e6 100644
> > > --- a/block.c
> > > +++ b/block.c
> > > @@ -5795,3 +5795,20 @@ bool bdrv_is_first_non_filter(BlockDriverState *candidate)
> > >  
> > >      return false;
> > >  }
> > > +
> > > +BlockDriverState *check_to_replace_node(const char *node_name, Error **errp)
> > > +{
> > > +    BlockDriverState *to_replace_bs = bdrv_find_node(node_name);
> > > +    if (!to_replace_bs) {
> > > +        error_setg(errp, "Node name '%s' not found",
> > > +                   node_name);
> > 
> > Unnecessary line break.
> > 
> > > +        return NULL;
> > > +    }
> > > +
> > > +    if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) {
> > > +        return NULL;
> > > +    }
> > > +
> > > +    return to_replace_bs;
> > > +}
> > > +
> > 
> > Empty line before EOF.
> > 
> > > diff --git a/block/mirror.c b/block/mirror.c
> > > index 94c8661..151167e 100644
> > > --- a/block/mirror.c
> > > +++ b/block/mirror.c
> > > @@ -32,6 +32,12 @@ typedef struct MirrorBlockJob {
> > >      RateLimit limit;
> > >      BlockDriverState *target;
> > >      BlockDriverState *base;
> > > +    /* The name of the graph node to replace */
> > > +    char *replaces;
> > > +    /* The BDS to replace */
> > > +    BlockDriverState *to_replace;
> > > +    /* Used to block operations on the drive-mirror-replace target */
> > > +    Error *replace_blocker;
> > >      bool is_none_mode;
> > >      BlockdevOnError on_source_error, on_target_error;
> > >      bool synced;
> > > @@ -490,10 +496,14 @@ immediate_exit:
> > >      bdrv_release_dirty_bitmap(bs, s->dirty_bitmap);
> > >      bdrv_iostatus_disable(s->target);
> > >      if (s->should_complete && ret == 0) {
> > > -        if (bdrv_get_flags(s->target) != bdrv_get_flags(s->common.bs)) {
> > > -            bdrv_reopen(s->target, bdrv_get_flags(s->common.bs), NULL);
> > > +        BlockDriverState *to_replace = s->common.bs;
> > > +        if (s->to_replace) {
> > > +            to_replace = s->to_replace;
> > >          }
> > > -        bdrv_swap(s->target, s->common.bs);
> > > +        if (bdrv_get_flags(s->target) != bdrv_get_flags(to_replace)) {
> > > +            bdrv_reopen(s->target, bdrv_get_flags(to_replace), NULL);
> > > +        }
> > > +        bdrv_swap(s->target, to_replace);
> > >          if (s->common.driver->job_type == BLOCK_JOB_TYPE_COMMIT) {
> > >              /* drop the bs loop chain formed by the swap: break the loop then
> > >               * trigger the unref from the top one */
> > > @@ -502,6 +512,12 @@ immediate_exit:
> > >              bdrv_unref(p);
> > >          }
> > >      }
> > > +    if (s->to_replace) {
> > > +        bdrv_op_unblock_all(s->to_replace, s->replace_blocker);
> > > +        error_free(s->replace_blocker);
> > > +        bdrv_unref(s->to_replace);
> > > +    }
> > > +    g_free(s->replaces);
> > >      bdrv_unref(s->target);
> > >      block_job_completed(&s->common, ret);
> > >  }
> > > @@ -540,6 +556,20 @@ static void mirror_complete(BlockJob *job, Error **errp)
> > >          return;
> > >      }
> > >  
> > > +    /* check the target bs is not blocked and block all operations on it */
> > > +    if (s->replaces) {
> > > +        s->to_replace = check_to_replace_node(s->replaces, errp);
> > > +
> > 
> > This empty line looks unusual.
> > 
> > > +        if (!s->to_replace) {
> > > +            return;
> > > +        }
> > 
> > So here is the thing that I really wanted to comment on. In the case of
> > a REPLACE blocker being set, this is a silent failure.
> 
> Why would it be silent ?
> errp is directly passed to check_to_replace_node.

Ah, sorry, my bad. I missed that bdrv_op_is_blocked() sets errp. Which
is a bit odd for a function with this name. I had expected that it
simply returns true if the op is blocked and sets an error only if it
can't answer the question.

Not a problem of your patch, though.

Kevin

> > The completion
> > command will return success, but s->should_complete won't actually be
> > set, so the completion doesn't happen. The only thing that actually
> > happens is the bdrv_open_backing_file(s->target) (which looks somewhat
> > questionable, too...)
> > 
> > Now I would expect that the REPLACE blocker is actually set for any
> > backing file, because that is what bdrv_set_backing_hd() does. For
> > quorum it does work as expected because quorum children don't get any
> > backing_blocker (we need to check whether they should get something
> > similar from the quorum BDS), so this is probably why it escaped your
> > testing. We'll need a test case that tries replacing some ordinary
> > backing file.
> > 
> > Now I think the (accidental?) restriction to only replacing quorum nodes
> > actually makes this patch pretty safe, so maybe it would be nice to keep
> > this behaviour; but we need to fix it to not fail silently but return an
> > explicit error.

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [Qemu-devel] [PATCH v10 3/4] block: Add replaces argument to drive-mirror
  2014-06-27 13:37       ` Kevin Wolf
@ 2014-06-27 14:30         ` Benoît Canet
  0 siblings, 0 replies; 17+ messages in thread
From: Benoît Canet @ 2014-06-27 14:30 UTC (permalink / raw)
  To: Kevin Wolf; +Cc: Benoît Canet, qemu-devel, stefanha, mreitz

The Friday 27 Jun 2014 à 15:37:00 (+0200), Kevin Wolf wrote :
> Am 27.06.2014 um 14:53 hat Benoît Canet geschrieben:
> > The Friday 27 Jun 2014 à 13:57:02 (+0200), Kevin Wolf wrote :
> > > Am 16.06.2014 um 12:00 hat Benoît Canet geschrieben:
> > > > drive-mirror will bdrv_swap the new BDS named node-name with the one
> > > > pointed by replaces when the mirroring is finished.
> > > > 
> > > > Signed-off-by: Benoit Canet <benoit@irqsave.net>
> > > > Reviewed-by: Max Reitz <mreitz@redhat.com>
> > > > ---
> > > >  block.c                   | 17 ++++++++++++++
> > > >  block/mirror.c            | 60 +++++++++++++++++++++++++++++++++++++----------
> > > >  blockdev.c                | 30 +++++++++++++++++++++++-
> > > >  hmp.c                     |  2 +-
> > > >  include/block/block.h     |  4 ++++
> > > >  include/block/block_int.h |  3 +++
> > > >  qapi/block-core.json      |  6 ++++-
> > > >  qmp-commands.hx           |  4 +++-
> > > >  8 files changed, 109 insertions(+), 17 deletions(-)
> > > > 
> > > > diff --git a/block.c b/block.c
> > > > index 17f763d..318f1e6 100644
> > > > --- a/block.c
> > > > +++ b/block.c
> > > > @@ -5795,3 +5795,20 @@ bool bdrv_is_first_non_filter(BlockDriverState *candidate)
> > > >  
> > > >      return false;
> > > >  }
> > > > +
> > > > +BlockDriverState *check_to_replace_node(const char *node_name, Error **errp)
> > > > +{
> > > > +    BlockDriverState *to_replace_bs = bdrv_find_node(node_name);
> > > > +    if (!to_replace_bs) {
> > > > +        error_setg(errp, "Node name '%s' not found",
> > > > +                   node_name);
> > > 
> > > Unnecessary line break.
> > > 
> > > > +        return NULL;
> > > > +    }
> > > > +
> > > > +    if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) {
> > > > +        return NULL;
> > > > +    }
> > > > +
> > > > +    return to_replace_bs;
> > > > +}
> > > > +
> > > 
> > > Empty line before EOF.
> > > 
> > > > diff --git a/block/mirror.c b/block/mirror.c
> > > > index 94c8661..151167e 100644
> > > > --- a/block/mirror.c
> > > > +++ b/block/mirror.c
> > > > @@ -32,6 +32,12 @@ typedef struct MirrorBlockJob {
> > > >      RateLimit limit;
> > > >      BlockDriverState *target;
> > > >      BlockDriverState *base;
> > > > +    /* The name of the graph node to replace */
> > > > +    char *replaces;
> > > > +    /* The BDS to replace */
> > > > +    BlockDriverState *to_replace;
> > > > +    /* Used to block operations on the drive-mirror-replace target */
> > > > +    Error *replace_blocker;
> > > >      bool is_none_mode;
> > > >      BlockdevOnError on_source_error, on_target_error;
> > > >      bool synced;
> > > > @@ -490,10 +496,14 @@ immediate_exit:
> > > >      bdrv_release_dirty_bitmap(bs, s->dirty_bitmap);
> > > >      bdrv_iostatus_disable(s->target);
> > > >      if (s->should_complete && ret == 0) {
> > > > -        if (bdrv_get_flags(s->target) != bdrv_get_flags(s->common.bs)) {
> > > > -            bdrv_reopen(s->target, bdrv_get_flags(s->common.bs), NULL);
> > > > +        BlockDriverState *to_replace = s->common.bs;
> > > > +        if (s->to_replace) {
> > > > +            to_replace = s->to_replace;
> > > >          }
> > > > -        bdrv_swap(s->target, s->common.bs);
> > > > +        if (bdrv_get_flags(s->target) != bdrv_get_flags(to_replace)) {
> > > > +            bdrv_reopen(s->target, bdrv_get_flags(to_replace), NULL);
> > > > +        }
> > > > +        bdrv_swap(s->target, to_replace);
> > > >          if (s->common.driver->job_type == BLOCK_JOB_TYPE_COMMIT) {
> > > >              /* drop the bs loop chain formed by the swap: break the loop then
> > > >               * trigger the unref from the top one */
> > > > @@ -502,6 +512,12 @@ immediate_exit:
> > > >              bdrv_unref(p);
> > > >          }
> > > >      }
> > > > +    if (s->to_replace) {
> > > > +        bdrv_op_unblock_all(s->to_replace, s->replace_blocker);
> > > > +        error_free(s->replace_blocker);
> > > > +        bdrv_unref(s->to_replace);
> > > > +    }
> > > > +    g_free(s->replaces);
> > > >      bdrv_unref(s->target);
> > > >      block_job_completed(&s->common, ret);
> > > >  }
> > > > @@ -540,6 +556,20 @@ static void mirror_complete(BlockJob *job, Error **errp)
> > > >          return;
> > > >      }
> > > >  
> > > > +    /* check the target bs is not blocked and block all operations on it */
> > > > +    if (s->replaces) {
> > > > +        s->to_replace = check_to_replace_node(s->replaces, errp);
> > > > +
> > > 
> > > This empty line looks unusual.
> > > 
> > > > +        if (!s->to_replace) {
> > > > +            return;
> > > > +        }
> > > 
> > > So here is the thing that I really wanted to comment on. In the case of
> > > a REPLACE blocker being set, this is a silent failure.
> > 
> > Why would it be silent ?
> > errp is directly passed to check_to_replace_node.
> 
> Ah, sorry, my bad. I missed that bdrv_op_is_blocked() sets errp. Which
> is a bit odd for a function with this name. I had expected that it
> simply returns true if the op is blocked and sets an error only if it
> can't answer the question.

I passed &local_err and propagated it to errp to make it clearer in the
new version.

Best regards

Benoît

> 
> Not a problem of your patch, though.
> 
> Kevin
> 
> > > The completion
> > > command will return success, but s->should_complete won't actually be
> > > set, so the completion doesn't happen. The only thing that actually
> > > happens is the bdrv_open_backing_file(s->target) (which looks somewhat
> > > questionable, too...)
> > > 
> > > Now I would expect that the REPLACE blocker is actually set for any
> > > backing file, because that is what bdrv_set_backing_hd() does. For
> > > quorum it does work as expected because quorum children don't get any
> > > backing_blocker (we need to check whether they should get something
> > > similar from the quorum BDS), so this is probably why it escaped your
> > > testing. We'll need a test case that tries replacing some ordinary
> > > backing file.
> > > 
> > > Now I think the (accidental?) restriction to only replacing quorum nodes
> > > actually makes this patch pretty safe, so maybe it would be nice to keep
> > > this behaviour; but we need to fix it to not fail silently but return an
> > > explicit error.
> 

^ permalink raw reply	[flat|nested] 17+ messages in thread

end of thread, other threads:[~2014-06-27 14:31 UTC | newest]

Thread overview: 17+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-06-16 10:00 [Qemu-devel] [PATCH v10 0/4] Quorum maintainance operations Benoît Canet
2014-06-16 10:00 ` [Qemu-devel] [PATCH v10 1/4] quorum: Add the rewrite-corrupted parameter to quorum Benoît Canet
2014-06-16 10:00 ` [Qemu-devel] [PATCH v10 2/4] block: Add node-name argument to drive-mirror Benoît Canet
2014-06-16 10:00 ` [Qemu-devel] [PATCH v10 3/4] block: Add replaces " Benoît Canet
2014-06-19  9:19   ` Stefan Hajnoczi
2014-06-27 11:57   ` Kevin Wolf
2014-06-27 12:03     ` Benoît Canet
2014-06-27 12:53     ` Benoît Canet
2014-06-27 13:37       ` Kevin Wolf
2014-06-27 14:30         ` Benoît Canet
2014-06-16 10:00 ` [Qemu-devel] [PATCH v10 4/4] qemu-iotests: Add TestRepairQuorum to 041 to test drive-mirror node-name mode Benoît Canet
2014-06-17 16:17   ` Max Reitz
2014-06-17 18:19     ` Benoît Canet
2014-06-17 18:20       ` Max Reitz
2014-06-17 18:21   ` Max Reitz
2014-06-19  4:46 ` [Qemu-devel] [PATCH v10 0/4] Quorum maintainance operations Stefan Hajnoczi
2014-06-27 12:19 ` Kevin Wolf

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.