All of lore.kernel.org
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH] Implement recursive op blockers
@ 2014-08-22 16:11 Benoît Canet
  2014-08-22 16:11 ` [Qemu-devel] [PATCH] block: Make op blockers recursive Benoît Canet
  0 siblings, 1 reply; 21+ messages in thread
From: Benoît Canet @ 2014-08-22 16:11 UTC (permalink / raw)
  To: qemu-devel; +Cc: kwolf, jcody, famz, Benoît Canet, stefanha

This self contained patch implements recursives op blockers while passing all the
./check -qcow2 tests.

Benoît Canet (1):
  block: Make op blockers recursive

 block.c                   | 69 ++++++++++++++++++++++++++++++++++++++++++++---
 block/blkverify.c         | 21 +++++++++++++++
 block/commit.c            |  3 +++
 block/mirror.c            | 17 ++++++++----
 block/quorum.c            | 25 +++++++++++++++++
 block/stream.c            |  3 +++
 block/vmdk.c              | 34 +++++++++++++++++++++++
 include/block/block.h     |  2 +-
 include/block/block_int.h |  6 +++++
 9 files changed, 171 insertions(+), 9 deletions(-)

-- 
2.1.0

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [Qemu-devel] [PATCH] block: Make op blockers recursive
  2014-08-22 16:11 [Qemu-devel] [PATCH] Implement recursive op blockers Benoît Canet
@ 2014-08-22 16:11 ` Benoît Canet
  2014-08-25  6:04   ` Fam Zheng
  2014-09-09 11:56   ` Kevin Wolf
  0 siblings, 2 replies; 21+ messages in thread
From: Benoît Canet @ 2014-08-22 16:11 UTC (permalink / raw)
  To: qemu-devel; +Cc: kwolf, jcody, famz, Benoît Canet, stefanha

Since the block layer code is starting to modify the BDS graph right in the
middle of BDS chains (block-mirror's replace parameter for example) QEMU needs
to properly block and unblock whole BDS subtrees; recursion is a neat way to
achieve this task.

This patch also takes care of modifying the op blockers users.

Signed-off-by: Benoit Canet <benoit.canet@nodalink.com>
---
 block.c                   | 69 ++++++++++++++++++++++++++++++++++++++++++++---
 block/blkverify.c         | 21 +++++++++++++++
 block/commit.c            |  3 +++
 block/mirror.c            | 17 ++++++++----
 block/quorum.c            | 25 +++++++++++++++++
 block/stream.c            |  3 +++
 block/vmdk.c              | 34 +++++++++++++++++++++++
 include/block/block.h     |  2 +-
 include/block/block_int.h |  6 +++++
 9 files changed, 171 insertions(+), 9 deletions(-)

diff --git a/block.c b/block.c
index 6fa0201..d964b6c 100644
--- a/block.c
+++ b/block.c
@@ -5446,7 +5446,9 @@ bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp)
     return false;
 }
 
-void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason)
+/* do the real work of blocking a BDS */
+static void bdrv_do_op_block(BlockDriverState *bs, BlockOpType op,
+                             Error *reason)
 {
     BdrvOpBlocker *blocker;
     assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
@@ -5456,7 +5458,9 @@ void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason)
     QLIST_INSERT_HEAD(&bs->op_blockers[op], blocker, list);
 }
 
-void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason)
+/* do the real work of unblocking a BDS */
+static void bdrv_do_op_unblock(BlockDriverState *bs, BlockOpType op,
+                               Error *reason)
 {
     BdrvOpBlocker *blocker, *next;
     assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
@@ -5468,6 +5472,65 @@ void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason)
     }
 }
 
+static bool bdrv_op_is_blocked_by(BlockDriverState *bs, BlockOpType op,
+                                  Error *reason)
+{
+    BdrvOpBlocker *blocker, *next;
+    assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
+    QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) {
+        if (blocker->reason == reason) {
+            return true;
+        }
+    }
+    return false;
+}
+
+/* block recursively a BDS */
+void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason)
+{
+    if (!bs) {
+        return;
+    }
+
+    /* prevent recursion loop */
+    if (bdrv_op_is_blocked_by(bs, op, reason)) {
+        return;
+    }
+
+    /* block first for recursion loop protection to work */
+    bdrv_do_op_block(bs, op, reason);
+
+    bdrv_op_block(bs->file, op, reason);
+    bdrv_op_block(bs->backing_hd, op, reason);
+
+    if (bs->drv && bs->drv->bdrv_op_recursive_block) {
+        bs->drv->bdrv_op_recursive_block(bs, op, reason);
+    }
+}
+
+/* unblock recursively a BDS */
+void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason)
+{
+    if (!bs) {
+        return;
+    }
+
+    /* prevent recursion loop */
+    if (!bdrv_op_is_blocked_by(bs, op, reason)) {
+        return;
+    }
+
+    /* unblock first for recursion loop protection to work */
+    bdrv_do_op_unblock(bs, op, reason);
+
+    bdrv_op_unblock(bs->file, op, reason);
+    bdrv_op_unblock(bs->backing_hd, op, reason);
+
+    if (bs->drv && bs->drv->bdrv_op_recursive_unblock) {
+        bs->drv->bdrv_op_recursive_unblock(bs, op, reason);
+    }
+}
+
 void bdrv_op_block_all(BlockDriverState *bs, Error *reason)
 {
     int i;
@@ -5848,7 +5911,7 @@ BlockDriverState *check_to_replace_node(const char *node_name, Error **errp)
         return NULL;
     }
 
-    if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) {
+    if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_MIRROR_REPLACE, errp)) {
         return NULL;
     }
 
diff --git a/block/blkverify.c b/block/blkverify.c
index 621b785..75ec3df 100644
--- a/block/blkverify.c
+++ b/block/blkverify.c
@@ -320,6 +320,24 @@ static void blkverify_attach_aio_context(BlockDriverState *bs,
     bdrv_attach_aio_context(s->test_file, new_context);
 }
 
+static void blkverify_op_recursive_block(BlockDriverState *bs, BlockOpType op,
+                                         Error *reason)
+{
+    BDRVBlkverifyState *s = bs->opaque;
+
+    bdrv_op_block(bs->file, op, reason);
+    bdrv_op_block(s->test_file, op, reason);
+}
+
+static void blkverify_op_recursive_unblock(BlockDriverState *bs, BlockOpType op,
+                                           Error *reason)
+{
+    BDRVBlkverifyState *s = bs->opaque;
+
+    bdrv_op_unblock(bs->file, op, reason);
+    bdrv_op_unblock(s->test_file, op, reason);
+}
+
 static BlockDriver bdrv_blkverify = {
     .format_name                      = "blkverify",
     .protocol_name                    = "blkverify",
@@ -337,6 +355,9 @@ static BlockDriver bdrv_blkverify = {
     .bdrv_attach_aio_context          = blkverify_attach_aio_context,
     .bdrv_detach_aio_context          = blkverify_detach_aio_context,
 
+    .bdrv_op_recursive_block          = blkverify_op_recursive_block,
+    .bdrv_op_recursive_unblock        = blkverify_op_recursive_unblock,
+
     .is_filter                        = true,
     .bdrv_recurse_is_first_non_filter = blkverify_recurse_is_first_non_filter,
 };
diff --git a/block/commit.c b/block/commit.c
index 91517d3..8a122b7 100644
--- a/block/commit.c
+++ b/block/commit.c
@@ -142,6 +142,9 @@ wait:
 
     if (!block_job_is_cancelled(&s->common) && sector_num == end) {
         /* success */
+        /* unblock only BDS to be dropped */
+        bdrv_op_unblock_all(top, s->common.blocker);
+        bdrv_op_block_all(base, s->common.blocker);
         ret = bdrv_drop_intermediate(active, top, base, s->backing_file_str);
     }
 
diff --git a/block/mirror.c b/block/mirror.c
index 5e7a166..28ed47d 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -324,6 +324,7 @@ static void coroutine_fn mirror_run(void *opaque)
     uint64_t last_pause_ns;
     BlockDriverInfo bdi;
     char backing_filename[1024];
+    BlockDriverState *to_replace;
     int ret = 0;
     int n;
 
@@ -512,14 +513,16 @@ immediate_exit:
     g_free(s->in_flight_bitmap);
     bdrv_release_dirty_bitmap(bs, s->dirty_bitmap);
     bdrv_iostatus_disable(s->target);
+    to_replace = s->common.bs;
+    if (s->to_replace) {
+        bdrv_op_unblock_all(s->to_replace, s->replace_blocker);
+        to_replace = s->to_replace;
+    }
     if (s->should_complete && ret == 0) {
-        BlockDriverState *to_replace = s->common.bs;
-        if (s->to_replace) {
-            to_replace = s->to_replace;
-        }
         if (bdrv_get_flags(s->target) != bdrv_get_flags(to_replace)) {
             bdrv_reopen(s->target, bdrv_get_flags(to_replace), NULL);
         }
+        bdrv_op_unblock_all(to_replace, bs->job->blocker);
         bdrv_swap(s->target, to_replace);
         if (s->common.driver->job_type == BLOCK_JOB_TYPE_COMMIT) {
             /* drop the bs loop chain formed by the swap: break the loop then
@@ -530,7 +533,6 @@ immediate_exit:
         }
     }
     if (s->to_replace) {
-        bdrv_op_unblock_all(s->to_replace, s->replace_blocker);
         error_free(s->replace_blocker);
         bdrv_unref(s->to_replace);
     }
@@ -648,6 +650,11 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target,
         return;
     }
 
+    /* remove BLOCK_OP_TYPE_MIRROR_REPLACE from the list of
+     * blocked operations so the replaces parameter can work
+     */
+    bdrv_op_unblock(bs, BLOCK_OP_TYPE_MIRROR_REPLACE, bs->job->blocker);
+
     s->replaces = g_strdup(replaces);
     s->on_source_error = on_source_error;
     s->on_target_error = on_target_error;
diff --git a/block/quorum.c b/block/quorum.c
index d5ee9c0..c260171 100644
--- a/block/quorum.c
+++ b/block/quorum.c
@@ -945,6 +945,28 @@ static void quorum_attach_aio_context(BlockDriverState *bs,
     }
 }
 
+static void quorum_op_recursive_block(BlockDriverState *bs, BlockOpType op,
+                                      Error *reason)
+{
+    BDRVQuorumState *s = bs->opaque;
+    int i;
+
+    for (i = 0; i < s->num_children; i++) {
+        bdrv_op_block(s->bs[i], op, reason);
+    }
+}
+
+static void quorum_op_recursive_unblock(BlockDriverState *bs, BlockOpType op,
+                                        Error *reason)
+{
+    BDRVQuorumState *s = bs->opaque;
+    int i;
+
+    for (i = 0; i < s->num_children; i++) {
+        bdrv_op_unblock(s->bs[i], op, reason);
+    }
+}
+
 static BlockDriver bdrv_quorum = {
     .format_name                        = "quorum",
     .protocol_name                      = "quorum",
@@ -965,6 +987,9 @@ static BlockDriver bdrv_quorum = {
     .bdrv_detach_aio_context            = quorum_detach_aio_context,
     .bdrv_attach_aio_context            = quorum_attach_aio_context,
 
+    .bdrv_op_recursive_block            = quorum_op_recursive_block,
+    .bdrv_op_recursive_unblock          = quorum_op_recursive_unblock,
+
     .is_filter                          = true,
     .bdrv_recurse_is_first_non_filter   = quorum_recurse_is_first_non_filter,
 };
diff --git a/block/stream.c b/block/stream.c
index cdea3e8..2c917b7 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -192,6 +192,9 @@ wait:
             }
         }
         ret = bdrv_change_backing_file(bs, base_id, base_fmt);
+        /* unblock only BDS to be dropped */
+        bdrv_op_unblock_all(bs->backing_hd, s->common.blocker);
+        bdrv_op_block_all(base, s->common.blocker);
         close_unused_images(bs, base, base_id);
     }
 
diff --git a/block/vmdk.c b/block/vmdk.c
index 01412a8..bc5b17f 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -2220,6 +2220,38 @@ static QemuOptsList vmdk_create_opts = {
     }
 };
 
+static void vmdk_op_recursive_block(BlockDriverState *bs, BlockOpType op,
+                                      Error *reason)
+{
+    BDRVVmdkState *s = bs->opaque;
+    int i;
+
+    bdrv_op_block(bs->file, op, reason);
+    bdrv_op_block(bs->backing_hd, op, reason);
+
+    for (i = 0; i < s->num_extents; i++) {
+        if (s->extents[i].file) {
+            bdrv_op_block(s->extents[i].file, op, reason);
+        }
+    }
+}
+
+static void vmdk_op_recursive_unblock(BlockDriverState *bs, BlockOpType op,
+                                        Error *reason)
+{
+    BDRVVmdkState *s = bs->opaque;
+    int i;
+
+    bdrv_op_unblock(bs->file, op, reason);
+    bdrv_op_unblock(bs->backing_hd, op, reason);
+
+    for (i = 0; i < s->num_extents; i++) {
+        if (s->extents[i].file) {
+            bdrv_op_unblock(s->extents[i].file, op, reason);
+        }
+    }
+}
+
 static BlockDriver bdrv_vmdk = {
     .format_name                  = "vmdk",
     .instance_size                = sizeof(BDRVVmdkState),
@@ -2242,6 +2274,8 @@ static BlockDriver bdrv_vmdk = {
     .bdrv_get_info                = vmdk_get_info,
     .bdrv_detach_aio_context      = vmdk_detach_aio_context,
     .bdrv_attach_aio_context      = vmdk_attach_aio_context,
+    .bdrv_op_recursive_block      = vmdk_op_recursive_block,
+    .bdrv_op_recursive_unblock    = vmdk_op_recursive_unblock,
 
     .supports_backing             = true,
     .create_opts                  = &vmdk_create_opts,
diff --git a/include/block/block.h b/include/block/block.h
index e94b701..4729a0a 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -175,7 +175,7 @@ typedef enum BlockOpType {
     BLOCK_OP_TYPE_MIRROR,
     BLOCK_OP_TYPE_RESIZE,
     BLOCK_OP_TYPE_STREAM,
-    BLOCK_OP_TYPE_REPLACE,
+    BLOCK_OP_TYPE_MIRROR_REPLACE,
     BLOCK_OP_TYPE_MAX,
 } BlockOpType;
 
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 7b541a0..a802abc 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -266,6 +266,12 @@ struct BlockDriver {
     void (*bdrv_io_unplug)(BlockDriverState *bs);
     void (*bdrv_flush_io_queue)(BlockDriverState *bs);
 
+    /* helps blockers to propagate recursively */
+    void (*bdrv_op_recursive_block)(BlockDriverState *bs, BlockOpType op,
+                                    Error *reason);
+    void (*bdrv_op_recursive_unblock)(BlockDriverState *bs, BlockOpType op,
+                                      Error *reason);
+
     QLIST_ENTRY(BlockDriver) list;
 };
 
-- 
2.1.0

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* Re: [Qemu-devel] [PATCH] block: Make op blockers recursive
  2014-08-22 16:11 ` [Qemu-devel] [PATCH] block: Make op blockers recursive Benoît Canet
@ 2014-08-25  6:04   ` Fam Zheng
  2014-08-25  9:06     ` Benoît Canet
  2014-09-09 11:56   ` Kevin Wolf
  1 sibling, 1 reply; 21+ messages in thread
From: Fam Zheng @ 2014-08-25  6:04 UTC (permalink / raw)
  To: Benoît Canet; +Cc: kwolf, jcody, qemu-devel, stefanha

On Fri, 08/22 18:11, Benoît Canet wrote:
> Since the block layer code is starting to modify the BDS graph right in the
> middle of BDS chains (block-mirror's replace parameter for example) QEMU needs
> to properly block and unblock whole BDS subtrees; recursion is a neat way to
> achieve this task.
> 
> This patch also takes care of modifying the op blockers users.

Is this going to replace backing_blocker?

I think it is too general an approach to control the operation properly,
because the op blocker may not work in the same way for all types of BDS
connections.  In other words, the choosing of op blockers are likely
conditional on graph edge types, that's why backing_blocker was added here. For
example, A VMDK extent connection will probably need a different set of
blockers than bs->file connection.

So could you explain in which cases is the recursive blocking/unblocking
useful?

Fam

> 
> Signed-off-by: Benoit Canet <benoit.canet@nodalink.com>
> ---
>  block.c                   | 69 ++++++++++++++++++++++++++++++++++++++++++++---
>  block/blkverify.c         | 21 +++++++++++++++
>  block/commit.c            |  3 +++
>  block/mirror.c            | 17 ++++++++----
>  block/quorum.c            | 25 +++++++++++++++++
>  block/stream.c            |  3 +++
>  block/vmdk.c              | 34 +++++++++++++++++++++++
>  include/block/block.h     |  2 +-
>  include/block/block_int.h |  6 +++++
>  9 files changed, 171 insertions(+), 9 deletions(-)
> 
> diff --git a/block.c b/block.c
> index 6fa0201..d964b6c 100644
> --- a/block.c
> +++ b/block.c
> @@ -5446,7 +5446,9 @@ bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp)
>      return false;
>  }
>  
> -void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason)
> +/* do the real work of blocking a BDS */
> +static void bdrv_do_op_block(BlockDriverState *bs, BlockOpType op,
> +                             Error *reason)
>  {
>      BdrvOpBlocker *blocker;
>      assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
> @@ -5456,7 +5458,9 @@ void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason)
>      QLIST_INSERT_HEAD(&bs->op_blockers[op], blocker, list);
>  }
>  
> -void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason)
> +/* do the real work of unblocking a BDS */
> +static void bdrv_do_op_unblock(BlockDriverState *bs, BlockOpType op,
> +                               Error *reason)
>  {
>      BdrvOpBlocker *blocker, *next;
>      assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
> @@ -5468,6 +5472,65 @@ void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason)
>      }
>  }
>  
> +static bool bdrv_op_is_blocked_by(BlockDriverState *bs, BlockOpType op,
> +                                  Error *reason)
> +{
> +    BdrvOpBlocker *blocker, *next;
> +    assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
> +    QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) {
> +        if (blocker->reason == reason) {
> +            return true;
> +        }
> +    }
> +    return false;
> +}
> +
> +/* block recursively a BDS */
> +void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason)
> +{
> +    if (!bs) {
> +        return;
> +    }
> +
> +    /* prevent recursion loop */
> +    if (bdrv_op_is_blocked_by(bs, op, reason)) {
> +        return;
> +    }
> +
> +    /* block first for recursion loop protection to work */
> +    bdrv_do_op_block(bs, op, reason);
> +
> +    bdrv_op_block(bs->file, op, reason);
> +    bdrv_op_block(bs->backing_hd, op, reason);
> +
> +    if (bs->drv && bs->drv->bdrv_op_recursive_block) {
> +        bs->drv->bdrv_op_recursive_block(bs, op, reason);
> +    }
> +}
> +
> +/* unblock recursively a BDS */
> +void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason)
> +{
> +    if (!bs) {
> +        return;
> +    }
> +
> +    /* prevent recursion loop */
> +    if (!bdrv_op_is_blocked_by(bs, op, reason)) {
> +        return;
> +    }
> +
> +    /* unblock first for recursion loop protection to work */
> +    bdrv_do_op_unblock(bs, op, reason);
> +
> +    bdrv_op_unblock(bs->file, op, reason);
> +    bdrv_op_unblock(bs->backing_hd, op, reason);
> +
> +    if (bs->drv && bs->drv->bdrv_op_recursive_unblock) {
> +        bs->drv->bdrv_op_recursive_unblock(bs, op, reason);
> +    }
> +}
> +
>  void bdrv_op_block_all(BlockDriverState *bs, Error *reason)
>  {
>      int i;
> @@ -5848,7 +5911,7 @@ BlockDriverState *check_to_replace_node(const char *node_name, Error **errp)
>          return NULL;
>      }
>  
> -    if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) {
> +    if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_MIRROR_REPLACE, errp)) {
>          return NULL;
>      }
>  
> diff --git a/block/blkverify.c b/block/blkverify.c
> index 621b785..75ec3df 100644
> --- a/block/blkverify.c
> +++ b/block/blkverify.c
> @@ -320,6 +320,24 @@ static void blkverify_attach_aio_context(BlockDriverState *bs,
>      bdrv_attach_aio_context(s->test_file, new_context);
>  }
>  
> +static void blkverify_op_recursive_block(BlockDriverState *bs, BlockOpType op,
> +                                         Error *reason)
> +{
> +    BDRVBlkverifyState *s = bs->opaque;
> +
> +    bdrv_op_block(bs->file, op, reason);
> +    bdrv_op_block(s->test_file, op, reason);
> +}
> +
> +static void blkverify_op_recursive_unblock(BlockDriverState *bs, BlockOpType op,
> +                                           Error *reason)
> +{
> +    BDRVBlkverifyState *s = bs->opaque;
> +
> +    bdrv_op_unblock(bs->file, op, reason);
> +    bdrv_op_unblock(s->test_file, op, reason);
> +}
> +
>  static BlockDriver bdrv_blkverify = {
>      .format_name                      = "blkverify",
>      .protocol_name                    = "blkverify",
> @@ -337,6 +355,9 @@ static BlockDriver bdrv_blkverify = {
>      .bdrv_attach_aio_context          = blkverify_attach_aio_context,
>      .bdrv_detach_aio_context          = blkverify_detach_aio_context,
>  
> +    .bdrv_op_recursive_block          = blkverify_op_recursive_block,
> +    .bdrv_op_recursive_unblock        = blkverify_op_recursive_unblock,
> +
>      .is_filter                        = true,
>      .bdrv_recurse_is_first_non_filter = blkverify_recurse_is_first_non_filter,
>  };
> diff --git a/block/commit.c b/block/commit.c
> index 91517d3..8a122b7 100644
> --- a/block/commit.c
> +++ b/block/commit.c
> @@ -142,6 +142,9 @@ wait:
>  
>      if (!block_job_is_cancelled(&s->common) && sector_num == end) {
>          /* success */
> +        /* unblock only BDS to be dropped */
> +        bdrv_op_unblock_all(top, s->common.blocker);
> +        bdrv_op_block_all(base, s->common.blocker);
>          ret = bdrv_drop_intermediate(active, top, base, s->backing_file_str);
>      }
>  
> diff --git a/block/mirror.c b/block/mirror.c
> index 5e7a166..28ed47d 100644
> --- a/block/mirror.c
> +++ b/block/mirror.c
> @@ -324,6 +324,7 @@ static void coroutine_fn mirror_run(void *opaque)
>      uint64_t last_pause_ns;
>      BlockDriverInfo bdi;
>      char backing_filename[1024];
> +    BlockDriverState *to_replace;
>      int ret = 0;
>      int n;
>  
> @@ -512,14 +513,16 @@ immediate_exit:
>      g_free(s->in_flight_bitmap);
>      bdrv_release_dirty_bitmap(bs, s->dirty_bitmap);
>      bdrv_iostatus_disable(s->target);
> +    to_replace = s->common.bs;
> +    if (s->to_replace) {
> +        bdrv_op_unblock_all(s->to_replace, s->replace_blocker);
> +        to_replace = s->to_replace;
> +    }
>      if (s->should_complete && ret == 0) {
> -        BlockDriverState *to_replace = s->common.bs;
> -        if (s->to_replace) {
> -            to_replace = s->to_replace;
> -        }
>          if (bdrv_get_flags(s->target) != bdrv_get_flags(to_replace)) {
>              bdrv_reopen(s->target, bdrv_get_flags(to_replace), NULL);
>          }
> +        bdrv_op_unblock_all(to_replace, bs->job->blocker);
>          bdrv_swap(s->target, to_replace);
>          if (s->common.driver->job_type == BLOCK_JOB_TYPE_COMMIT) {
>              /* drop the bs loop chain formed by the swap: break the loop then
> @@ -530,7 +533,6 @@ immediate_exit:
>          }
>      }
>      if (s->to_replace) {
> -        bdrv_op_unblock_all(s->to_replace, s->replace_blocker);
>          error_free(s->replace_blocker);
>          bdrv_unref(s->to_replace);
>      }
> @@ -648,6 +650,11 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target,
>          return;
>      }
>  
> +    /* remove BLOCK_OP_TYPE_MIRROR_REPLACE from the list of
> +     * blocked operations so the replaces parameter can work
> +     */
> +    bdrv_op_unblock(bs, BLOCK_OP_TYPE_MIRROR_REPLACE, bs->job->blocker);
> +
>      s->replaces = g_strdup(replaces);
>      s->on_source_error = on_source_error;
>      s->on_target_error = on_target_error;
> diff --git a/block/quorum.c b/block/quorum.c
> index d5ee9c0..c260171 100644
> --- a/block/quorum.c
> +++ b/block/quorum.c
> @@ -945,6 +945,28 @@ static void quorum_attach_aio_context(BlockDriverState *bs,
>      }
>  }
>  
> +static void quorum_op_recursive_block(BlockDriverState *bs, BlockOpType op,
> +                                      Error *reason)
> +{
> +    BDRVQuorumState *s = bs->opaque;
> +    int i;
> +
> +    for (i = 0; i < s->num_children; i++) {
> +        bdrv_op_block(s->bs[i], op, reason);
> +    }
> +}
> +
> +static void quorum_op_recursive_unblock(BlockDriverState *bs, BlockOpType op,
> +                                        Error *reason)
> +{
> +    BDRVQuorumState *s = bs->opaque;
> +    int i;
> +
> +    for (i = 0; i < s->num_children; i++) {
> +        bdrv_op_unblock(s->bs[i], op, reason);
> +    }
> +}
> +
>  static BlockDriver bdrv_quorum = {
>      .format_name                        = "quorum",
>      .protocol_name                      = "quorum",
> @@ -965,6 +987,9 @@ static BlockDriver bdrv_quorum = {
>      .bdrv_detach_aio_context            = quorum_detach_aio_context,
>      .bdrv_attach_aio_context            = quorum_attach_aio_context,
>  
> +    .bdrv_op_recursive_block            = quorum_op_recursive_block,
> +    .bdrv_op_recursive_unblock          = quorum_op_recursive_unblock,
> +
>      .is_filter                          = true,
>      .bdrv_recurse_is_first_non_filter   = quorum_recurse_is_first_non_filter,
>  };
> diff --git a/block/stream.c b/block/stream.c
> index cdea3e8..2c917b7 100644
> --- a/block/stream.c
> +++ b/block/stream.c
> @@ -192,6 +192,9 @@ wait:
>              }
>          }
>          ret = bdrv_change_backing_file(bs, base_id, base_fmt);
> +        /* unblock only BDS to be dropped */
> +        bdrv_op_unblock_all(bs->backing_hd, s->common.blocker);
> +        bdrv_op_block_all(base, s->common.blocker);
>          close_unused_images(bs, base, base_id);
>      }
>  
> diff --git a/block/vmdk.c b/block/vmdk.c
> index 01412a8..bc5b17f 100644
> --- a/block/vmdk.c
> +++ b/block/vmdk.c
> @@ -2220,6 +2220,38 @@ static QemuOptsList vmdk_create_opts = {
>      }
>  };
>  
> +static void vmdk_op_recursive_block(BlockDriverState *bs, BlockOpType op,
> +                                      Error *reason)
> +{
> +    BDRVVmdkState *s = bs->opaque;
> +    int i;
> +
> +    bdrv_op_block(bs->file, op, reason);
> +    bdrv_op_block(bs->backing_hd, op, reason);
> +
> +    for (i = 0; i < s->num_extents; i++) {
> +        if (s->extents[i].file) {
> +            bdrv_op_block(s->extents[i].file, op, reason);
> +        }
> +    }
> +}
> +
> +static void vmdk_op_recursive_unblock(BlockDriverState *bs, BlockOpType op,
> +                                        Error *reason)
> +{
> +    BDRVVmdkState *s = bs->opaque;
> +    int i;
> +
> +    bdrv_op_unblock(bs->file, op, reason);
> +    bdrv_op_unblock(bs->backing_hd, op, reason);
> +
> +    for (i = 0; i < s->num_extents; i++) {
> +        if (s->extents[i].file) {
> +            bdrv_op_unblock(s->extents[i].file, op, reason);
> +        }
> +    }
> +}
> +
>  static BlockDriver bdrv_vmdk = {
>      .format_name                  = "vmdk",
>      .instance_size                = sizeof(BDRVVmdkState),
> @@ -2242,6 +2274,8 @@ static BlockDriver bdrv_vmdk = {
>      .bdrv_get_info                = vmdk_get_info,
>      .bdrv_detach_aio_context      = vmdk_detach_aio_context,
>      .bdrv_attach_aio_context      = vmdk_attach_aio_context,
> +    .bdrv_op_recursive_block      = vmdk_op_recursive_block,
> +    .bdrv_op_recursive_unblock    = vmdk_op_recursive_unblock,
>  
>      .supports_backing             = true,
>      .create_opts                  = &vmdk_create_opts,
> diff --git a/include/block/block.h b/include/block/block.h
> index e94b701..4729a0a 100644
> --- a/include/block/block.h
> +++ b/include/block/block.h
> @@ -175,7 +175,7 @@ typedef enum BlockOpType {
>      BLOCK_OP_TYPE_MIRROR,
>      BLOCK_OP_TYPE_RESIZE,
>      BLOCK_OP_TYPE_STREAM,
> -    BLOCK_OP_TYPE_REPLACE,
> +    BLOCK_OP_TYPE_MIRROR_REPLACE,
>      BLOCK_OP_TYPE_MAX,
>  } BlockOpType;
>  
> diff --git a/include/block/block_int.h b/include/block/block_int.h
> index 7b541a0..a802abc 100644
> --- a/include/block/block_int.h
> +++ b/include/block/block_int.h
> @@ -266,6 +266,12 @@ struct BlockDriver {
>      void (*bdrv_io_unplug)(BlockDriverState *bs);
>      void (*bdrv_flush_io_queue)(BlockDriverState *bs);
>  
> +    /* helps blockers to propagate recursively */
> +    void (*bdrv_op_recursive_block)(BlockDriverState *bs, BlockOpType op,
> +                                    Error *reason);
> +    void (*bdrv_op_recursive_unblock)(BlockDriverState *bs, BlockOpType op,
> +                                      Error *reason);
> +
>      QLIST_ENTRY(BlockDriver) list;
>  };
>  
> -- 
> 2.1.0
> 

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Qemu-devel] [PATCH] block: Make op blockers recursive
  2014-08-25  6:04   ` Fam Zheng
@ 2014-08-25  9:06     ` Benoît Canet
  2014-08-25  9:37       ` Fam Zheng
  0 siblings, 1 reply; 21+ messages in thread
From: Benoît Canet @ 2014-08-25  9:06 UTC (permalink / raw)
  To: Fam Zheng; +Cc: kwolf, jcody, stefanha, qemu-devel, Benoît Canet

On Mon, Aug 25, 2014 at 02:04:24PM +0800, Fam Zheng wrote:
> On Fri, 08/22 18:11, Benoît Canet wrote:
> > Since the block layer code is starting to modify the BDS graph right in the
> > middle of BDS chains (block-mirror's replace parameter for example) QEMU needs
> > to properly block and unblock whole BDS subtrees; recursion is a neat way to
> > achieve this task.
> > 
> > This patch also takes care of modifying the op blockers users.
> 
> Is this going to replace backing_blocker?
> 
> I think it is too general an approach to control the operation properly,
> because the op blocker may not work in the same way for all types of BDS
> connections.  In other words, the choosing of op blockers are likely
> conditional on graph edge types, that's why backing_blocker was added here. For
> example, A VMDK extent connection will probably need a different set of
> blockers than bs->file connection.
> 
> So could you explain in which cases is the recursive blocking/unblocking
> useful?

It's designed for the new crop of block operations operating on BDS located in
the middle of the backing chain: Jeff's patches, intermediate live streaming or
intermediate mirroring.
Recursively blocking BDS allows to do these operations safely.

Best regards

Benoît


> 
> Fam
> 
> > 
> > Signed-off-by: Benoit Canet <benoit.canet@nodalink.com>
> > ---
> >  block.c                   | 69 ++++++++++++++++++++++++++++++++++++++++++++---
> >  block/blkverify.c         | 21 +++++++++++++++
> >  block/commit.c            |  3 +++
> >  block/mirror.c            | 17 ++++++++----
> >  block/quorum.c            | 25 +++++++++++++++++
> >  block/stream.c            |  3 +++
> >  block/vmdk.c              | 34 +++++++++++++++++++++++
> >  include/block/block.h     |  2 +-
> >  include/block/block_int.h |  6 +++++
> >  9 files changed, 171 insertions(+), 9 deletions(-)
> > 
> > diff --git a/block.c b/block.c
> > index 6fa0201..d964b6c 100644
> > --- a/block.c
> > +++ b/block.c
> > @@ -5446,7 +5446,9 @@ bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp)
> >      return false;
> >  }
> >  
> > -void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason)
> > +/* do the real work of blocking a BDS */
> > +static void bdrv_do_op_block(BlockDriverState *bs, BlockOpType op,
> > +                             Error *reason)
> >  {
> >      BdrvOpBlocker *blocker;
> >      assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
> > @@ -5456,7 +5458,9 @@ void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason)
> >      QLIST_INSERT_HEAD(&bs->op_blockers[op], blocker, list);
> >  }
> >  
> > -void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason)
> > +/* do the real work of unblocking a BDS */
> > +static void bdrv_do_op_unblock(BlockDriverState *bs, BlockOpType op,
> > +                               Error *reason)
> >  {
> >      BdrvOpBlocker *blocker, *next;
> >      assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
> > @@ -5468,6 +5472,65 @@ void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason)
> >      }
> >  }
> >  
> > +static bool bdrv_op_is_blocked_by(BlockDriverState *bs, BlockOpType op,
> > +                                  Error *reason)
> > +{
> > +    BdrvOpBlocker *blocker, *next;
> > +    assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
> > +    QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) {
> > +        if (blocker->reason == reason) {
> > +            return true;
> > +        }
> > +    }
> > +    return false;
> > +}
> > +
> > +/* block recursively a BDS */
> > +void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason)
> > +{
> > +    if (!bs) {
> > +        return;
> > +    }
> > +
> > +    /* prevent recursion loop */
> > +    if (bdrv_op_is_blocked_by(bs, op, reason)) {
> > +        return;
> > +    }
> > +
> > +    /* block first for recursion loop protection to work */
> > +    bdrv_do_op_block(bs, op, reason);
> > +
> > +    bdrv_op_block(bs->file, op, reason);
> > +    bdrv_op_block(bs->backing_hd, op, reason);
> > +
> > +    if (bs->drv && bs->drv->bdrv_op_recursive_block) {
> > +        bs->drv->bdrv_op_recursive_block(bs, op, reason);
> > +    }
> > +}
> > +
> > +/* unblock recursively a BDS */
> > +void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason)
> > +{
> > +    if (!bs) {
> > +        return;
> > +    }
> > +
> > +    /* prevent recursion loop */
> > +    if (!bdrv_op_is_blocked_by(bs, op, reason)) {
> > +        return;
> > +    }
> > +
> > +    /* unblock first for recursion loop protection to work */
> > +    bdrv_do_op_unblock(bs, op, reason);
> > +
> > +    bdrv_op_unblock(bs->file, op, reason);
> > +    bdrv_op_unblock(bs->backing_hd, op, reason);
> > +
> > +    if (bs->drv && bs->drv->bdrv_op_recursive_unblock) {
> > +        bs->drv->bdrv_op_recursive_unblock(bs, op, reason);
> > +    }
> > +}
> > +
> >  void bdrv_op_block_all(BlockDriverState *bs, Error *reason)
> >  {
> >      int i;
> > @@ -5848,7 +5911,7 @@ BlockDriverState *check_to_replace_node(const char *node_name, Error **errp)
> >          return NULL;
> >      }
> >  
> > -    if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) {
> > +    if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_MIRROR_REPLACE, errp)) {
> >          return NULL;
> >      }
> >  
> > diff --git a/block/blkverify.c b/block/blkverify.c
> > index 621b785..75ec3df 100644
> > --- a/block/blkverify.c
> > +++ b/block/blkverify.c
> > @@ -320,6 +320,24 @@ static void blkverify_attach_aio_context(BlockDriverState *bs,
> >      bdrv_attach_aio_context(s->test_file, new_context);
> >  }
> >  
> > +static void blkverify_op_recursive_block(BlockDriverState *bs, BlockOpType op,
> > +                                         Error *reason)
> > +{
> > +    BDRVBlkverifyState *s = bs->opaque;
> > +
> > +    bdrv_op_block(bs->file, op, reason);
> > +    bdrv_op_block(s->test_file, op, reason);
> > +}
> > +
> > +static void blkverify_op_recursive_unblock(BlockDriverState *bs, BlockOpType op,
> > +                                           Error *reason)
> > +{
> > +    BDRVBlkverifyState *s = bs->opaque;
> > +
> > +    bdrv_op_unblock(bs->file, op, reason);
> > +    bdrv_op_unblock(s->test_file, op, reason);
> > +}
> > +
> >  static BlockDriver bdrv_blkverify = {
> >      .format_name                      = "blkverify",
> >      .protocol_name                    = "blkverify",
> > @@ -337,6 +355,9 @@ static BlockDriver bdrv_blkverify = {
> >      .bdrv_attach_aio_context          = blkverify_attach_aio_context,
> >      .bdrv_detach_aio_context          = blkverify_detach_aio_context,
> >  
> > +    .bdrv_op_recursive_block          = blkverify_op_recursive_block,
> > +    .bdrv_op_recursive_unblock        = blkverify_op_recursive_unblock,
> > +
> >      .is_filter                        = true,
> >      .bdrv_recurse_is_first_non_filter = blkverify_recurse_is_first_non_filter,
> >  };
> > diff --git a/block/commit.c b/block/commit.c
> > index 91517d3..8a122b7 100644
> > --- a/block/commit.c
> > +++ b/block/commit.c
> > @@ -142,6 +142,9 @@ wait:
> >  
> >      if (!block_job_is_cancelled(&s->common) && sector_num == end) {
> >          /* success */
> > +        /* unblock only BDS to be dropped */
> > +        bdrv_op_unblock_all(top, s->common.blocker);
> > +        bdrv_op_block_all(base, s->common.blocker);
> >          ret = bdrv_drop_intermediate(active, top, base, s->backing_file_str);
> >      }
> >  
> > diff --git a/block/mirror.c b/block/mirror.c
> > index 5e7a166..28ed47d 100644
> > --- a/block/mirror.c
> > +++ b/block/mirror.c
> > @@ -324,6 +324,7 @@ static void coroutine_fn mirror_run(void *opaque)
> >      uint64_t last_pause_ns;
> >      BlockDriverInfo bdi;
> >      char backing_filename[1024];
> > +    BlockDriverState *to_replace;
> >      int ret = 0;
> >      int n;
> >  
> > @@ -512,14 +513,16 @@ immediate_exit:
> >      g_free(s->in_flight_bitmap);
> >      bdrv_release_dirty_bitmap(bs, s->dirty_bitmap);
> >      bdrv_iostatus_disable(s->target);
> > +    to_replace = s->common.bs;
> > +    if (s->to_replace) {
> > +        bdrv_op_unblock_all(s->to_replace, s->replace_blocker);
> > +        to_replace = s->to_replace;
> > +    }
> >      if (s->should_complete && ret == 0) {
> > -        BlockDriverState *to_replace = s->common.bs;
> > -        if (s->to_replace) {
> > -            to_replace = s->to_replace;
> > -        }
> >          if (bdrv_get_flags(s->target) != bdrv_get_flags(to_replace)) {
> >              bdrv_reopen(s->target, bdrv_get_flags(to_replace), NULL);
> >          }
> > +        bdrv_op_unblock_all(to_replace, bs->job->blocker);
> >          bdrv_swap(s->target, to_replace);
> >          if (s->common.driver->job_type == BLOCK_JOB_TYPE_COMMIT) {
> >              /* drop the bs loop chain formed by the swap: break the loop then
> > @@ -530,7 +533,6 @@ immediate_exit:
> >          }
> >      }
> >      if (s->to_replace) {
> > -        bdrv_op_unblock_all(s->to_replace, s->replace_blocker);
> >          error_free(s->replace_blocker);
> >          bdrv_unref(s->to_replace);
> >      }
> > @@ -648,6 +650,11 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target,
> >          return;
> >      }
> >  
> > +    /* remove BLOCK_OP_TYPE_MIRROR_REPLACE from the list of
> > +     * blocked operations so the replaces parameter can work
> > +     */
> > +    bdrv_op_unblock(bs, BLOCK_OP_TYPE_MIRROR_REPLACE, bs->job->blocker);
> > +
> >      s->replaces = g_strdup(replaces);
> >      s->on_source_error = on_source_error;
> >      s->on_target_error = on_target_error;
> > diff --git a/block/quorum.c b/block/quorum.c
> > index d5ee9c0..c260171 100644
> > --- a/block/quorum.c
> > +++ b/block/quorum.c
> > @@ -945,6 +945,28 @@ static void quorum_attach_aio_context(BlockDriverState *bs,
> >      }
> >  }
> >  
> > +static void quorum_op_recursive_block(BlockDriverState *bs, BlockOpType op,
> > +                                      Error *reason)
> > +{
> > +    BDRVQuorumState *s = bs->opaque;
> > +    int i;
> > +
> > +    for (i = 0; i < s->num_children; i++) {
> > +        bdrv_op_block(s->bs[i], op, reason);
> > +    }
> > +}
> > +
> > +static void quorum_op_recursive_unblock(BlockDriverState *bs, BlockOpType op,
> > +                                        Error *reason)
> > +{
> > +    BDRVQuorumState *s = bs->opaque;
> > +    int i;
> > +
> > +    for (i = 0; i < s->num_children; i++) {
> > +        bdrv_op_unblock(s->bs[i], op, reason);
> > +    }
> > +}
> > +
> >  static BlockDriver bdrv_quorum = {
> >      .format_name                        = "quorum",
> >      .protocol_name                      = "quorum",
> > @@ -965,6 +987,9 @@ static BlockDriver bdrv_quorum = {
> >      .bdrv_detach_aio_context            = quorum_detach_aio_context,
> >      .bdrv_attach_aio_context            = quorum_attach_aio_context,
> >  
> > +    .bdrv_op_recursive_block            = quorum_op_recursive_block,
> > +    .bdrv_op_recursive_unblock          = quorum_op_recursive_unblock,
> > +
> >      .is_filter                          = true,
> >      .bdrv_recurse_is_first_non_filter   = quorum_recurse_is_first_non_filter,
> >  };
> > diff --git a/block/stream.c b/block/stream.c
> > index cdea3e8..2c917b7 100644
> > --- a/block/stream.c
> > +++ b/block/stream.c
> > @@ -192,6 +192,9 @@ wait:
> >              }
> >          }
> >          ret = bdrv_change_backing_file(bs, base_id, base_fmt);
> > +        /* unblock only BDS to be dropped */
> > +        bdrv_op_unblock_all(bs->backing_hd, s->common.blocker);
> > +        bdrv_op_block_all(base, s->common.blocker);
> >          close_unused_images(bs, base, base_id);
> >      }
> >  
> > diff --git a/block/vmdk.c b/block/vmdk.c
> > index 01412a8..bc5b17f 100644
> > --- a/block/vmdk.c
> > +++ b/block/vmdk.c
> > @@ -2220,6 +2220,38 @@ static QemuOptsList vmdk_create_opts = {
> >      }
> >  };
> >  
> > +static void vmdk_op_recursive_block(BlockDriverState *bs, BlockOpType op,
> > +                                      Error *reason)
> > +{
> > +    BDRVVmdkState *s = bs->opaque;
> > +    int i;
> > +
> > +    bdrv_op_block(bs->file, op, reason);
> > +    bdrv_op_block(bs->backing_hd, op, reason);
> > +
> > +    for (i = 0; i < s->num_extents; i++) {
> > +        if (s->extents[i].file) {
> > +            bdrv_op_block(s->extents[i].file, op, reason);
> > +        }
> > +    }
> > +}
> > +
> > +static void vmdk_op_recursive_unblock(BlockDriverState *bs, BlockOpType op,
> > +                                        Error *reason)
> > +{
> > +    BDRVVmdkState *s = bs->opaque;
> > +    int i;
> > +
> > +    bdrv_op_unblock(bs->file, op, reason);
> > +    bdrv_op_unblock(bs->backing_hd, op, reason);
> > +
> > +    for (i = 0; i < s->num_extents; i++) {
> > +        if (s->extents[i].file) {
> > +            bdrv_op_unblock(s->extents[i].file, op, reason);
> > +        }
> > +    }
> > +}
> > +
> >  static BlockDriver bdrv_vmdk = {
> >      .format_name                  = "vmdk",
> >      .instance_size                = sizeof(BDRVVmdkState),
> > @@ -2242,6 +2274,8 @@ static BlockDriver bdrv_vmdk = {
> >      .bdrv_get_info                = vmdk_get_info,
> >      .bdrv_detach_aio_context      = vmdk_detach_aio_context,
> >      .bdrv_attach_aio_context      = vmdk_attach_aio_context,
> > +    .bdrv_op_recursive_block      = vmdk_op_recursive_block,
> > +    .bdrv_op_recursive_unblock    = vmdk_op_recursive_unblock,
> >  
> >      .supports_backing             = true,
> >      .create_opts                  = &vmdk_create_opts,
> > diff --git a/include/block/block.h b/include/block/block.h
> > index e94b701..4729a0a 100644
> > --- a/include/block/block.h
> > +++ b/include/block/block.h
> > @@ -175,7 +175,7 @@ typedef enum BlockOpType {
> >      BLOCK_OP_TYPE_MIRROR,
> >      BLOCK_OP_TYPE_RESIZE,
> >      BLOCK_OP_TYPE_STREAM,
> > -    BLOCK_OP_TYPE_REPLACE,
> > +    BLOCK_OP_TYPE_MIRROR_REPLACE,
> >      BLOCK_OP_TYPE_MAX,
> >  } BlockOpType;
> >  
> > diff --git a/include/block/block_int.h b/include/block/block_int.h
> > index 7b541a0..a802abc 100644
> > --- a/include/block/block_int.h
> > +++ b/include/block/block_int.h
> > @@ -266,6 +266,12 @@ struct BlockDriver {
> >      void (*bdrv_io_unplug)(BlockDriverState *bs);
> >      void (*bdrv_flush_io_queue)(BlockDriverState *bs);
> >  
> > +    /* helps blockers to propagate recursively */
> > +    void (*bdrv_op_recursive_block)(BlockDriverState *bs, BlockOpType op,
> > +                                    Error *reason);
> > +    void (*bdrv_op_recursive_unblock)(BlockDriverState *bs, BlockOpType op,
> > +                                      Error *reason);
> > +
> >      QLIST_ENTRY(BlockDriver) list;
> >  };
> >  
> > -- 
> > 2.1.0
> > 

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Qemu-devel] [PATCH] block: Make op blockers recursive
  2014-08-25  9:06     ` Benoît Canet
@ 2014-08-25  9:37       ` Fam Zheng
  2014-08-25 12:12         ` Benoît Canet
  0 siblings, 1 reply; 21+ messages in thread
From: Fam Zheng @ 2014-08-25  9:37 UTC (permalink / raw)
  To: Benoît Canet; +Cc: kwolf, jcody, qemu-devel, stefanha

On Mon, 08/25 09:06, Benoît Canet wrote:
> On Mon, Aug 25, 2014 at 02:04:24PM +0800, Fam Zheng wrote:
> > On Fri, 08/22 18:11, Benoît Canet wrote:
> > > Since the block layer code is starting to modify the BDS graph right in the
> > > middle of BDS chains (block-mirror's replace parameter for example) QEMU needs
> > > to properly block and unblock whole BDS subtrees; recursion is a neat way to
> > > achieve this task.
> > > 
> > > This patch also takes care of modifying the op blockers users.
> > 
> > Is this going to replace backing_blocker?
> > 
> > I think it is too general an approach to control the operation properly,
> > because the op blocker may not work in the same way for all types of BDS
> > connections.  In other words, the choosing of op blockers are likely
> > conditional on graph edge types, that's why backing_blocker was added here. For
> > example, A VMDK extent connection will probably need a different set of
> > blockers than bs->file connection.
> > 
> > So could you explain in which cases is the recursive blocking/unblocking
> > useful?
> 
> It's designed for the new crop of block operations operating on BDS located in
> the middle of the backing chain: Jeff's patches, intermediate live streaming or
> intermediate mirroring.
> Recursively blocking BDS allows to do these operations safely.

Sorry I may be slow on this, but it's still not clear to me.

That doesn't immediately show how backing_blocker doesn't work. These
operations are in the category of operations that update graph topology,
meaning that they drop, add or swap some nodes in the middle of the chain. It
is not safe because there are used by the other nodes, but they are supposed to
be protected by backing_blocker. Could you be more specific?

I can think of something more than backing_hd: there are also link types other
than backing_hd, for example ->file, (vmdk)->extents or (quorum)->qcrs, etc.
They should be protected as well.

But it seems to me that these are not recursive association, so we don't need
to apply the blocker recursively. Shouldn't it be enough to only block bs->file
when assigning this pointer, and unblock it when unassigning?

I'm not trying to push back this series. I am asking just because that my
understanding to the question still needs some forging.

Fam

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Qemu-devel] [PATCH] block: Make op blockers recursive
  2014-08-25  9:37       ` Fam Zheng
@ 2014-08-25 12:12         ` Benoît Canet
  2014-08-26  4:42           ` Fam Zheng
  0 siblings, 1 reply; 21+ messages in thread
From: Benoît Canet @ 2014-08-25 12:12 UTC (permalink / raw)
  To: Fam Zheng; +Cc: kwolf, jcody, stefanha, Benoît Canet, qemu-devel

On Mon, Aug 25, 2014 at 05:37:37PM +0800, Fam Zheng wrote:
> On Mon, 08/25 09:06, Benoît Canet wrote:
> > On Mon, Aug 25, 2014 at 02:04:24PM +0800, Fam Zheng wrote:
> > > On Fri, 08/22 18:11, Benoît Canet wrote:
> > > > Since the block layer code is starting to modify the BDS graph right in the
> > > > middle of BDS chains (block-mirror's replace parameter for example) QEMU needs
> > > > to properly block and unblock whole BDS subtrees; recursion is a neat way to
> > > > achieve this task.
> > > > 
> > > > This patch also takes care of modifying the op blockers users.
> > > 
> > > Is this going to replace backing_blocker?
> > > 
> > > I think it is too general an approach to control the operation properly,
> > > because the op blocker may not work in the same way for all types of BDS
> > > connections.  In other words, the choosing of op blockers are likely
> > > conditional on graph edge types, that's why backing_blocker was added here. For
> > > example, A VMDK extent connection will probably need a different set of
> > > blockers than bs->file connection.
> > > 
> > > So could you explain in which cases is the recursive blocking/unblocking
> > > useful?
> > 
> > It's designed for the new crop of block operations operating on BDS located in
> > the middle of the backing chain: Jeff's patches, intermediate live streaming or
> > intermediate mirroring.
> > Recursively blocking BDS allows to do these operations safely.
> 
> Sorry I may be slow on this, but it's still not clear to me.
> 
> That doesn't immediately show how backing_blocker doesn't work. These
> operations are in the category of operations that update graph topology,
> meaning that they drop, add or swap some nodes in the middle of the chain. It
> is not safe because there are used by the other nodes, but they are supposed to
> be protected by backing_blocker. Could you be more specific?

I don't know particularly about the backing blocker case.

> 
> I can think of something more than backing_hd: there are also link types other
> than backing_hd, for example ->file, (vmdk)->extents or (quorum)->qcrs, etc.
> They should be protected as well.

This patch takes cares of recursing everywhere.

I can give you an example for quorum.

If a streaming operation is running on a quorum block backend the recursive
blocking will help to block any operation done directly on any of the children.

It's usefull since we introduced drive-mirror replace which will replace an arbitrary
node of a quorum at the end of the mirroring operation.

> 
> But it seems to me that these are not recursive association, so we don't need
> to apply the blocker recursively. Shouldn't it be enough to only block bs->file
> when assigning this pointer, and unblock it when unassigning?

Maybe their is other reason to implements recursive blocker: I have not decided on
my own to implements them. It's a decision that took place at the Stuttgart meeting.

Best regards

Benoît

> 
> I'm not trying to push back this series. I am asking just because that my
> understanding to the question still needs some forging.
> 
> Fam

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Qemu-devel] [PATCH] block: Make op blockers recursive
  2014-08-25 12:12         ` Benoît Canet
@ 2014-08-26  4:42           ` Fam Zheng
  2014-08-26  6:45             ` Benoît Canet
  0 siblings, 1 reply; 21+ messages in thread
From: Fam Zheng @ 2014-08-26  4:42 UTC (permalink / raw)
  To: Benoît Canet; +Cc: kwolf, jcody, qemu-devel, stefanha

On Mon, 08/25 12:12, Benoît Canet wrote:
> On Mon, Aug 25, 2014 at 05:37:37PM +0800, Fam Zheng wrote:
> > On Mon, 08/25 09:06, Benoît Canet wrote:
> > > On Mon, Aug 25, 2014 at 02:04:24PM +0800, Fam Zheng wrote:
> > > > On Fri, 08/22 18:11, Benoît Canet wrote:
> > > > > Since the block layer code is starting to modify the BDS graph right in the
> > > > > middle of BDS chains (block-mirror's replace parameter for example) QEMU needs
> > > > > to properly block and unblock whole BDS subtrees; recursion is a neat way to
> > > > > achieve this task.
> > > > > 
> > > > > This patch also takes care of modifying the op blockers users.
> > > > 
> > > > Is this going to replace backing_blocker?
> > > > 
> > > > I think it is too general an approach to control the operation properly,
> > > > because the op blocker may not work in the same way for all types of BDS
> > > > connections.  In other words, the choosing of op blockers are likely
> > > > conditional on graph edge types, that's why backing_blocker was added here. For
> > > > example, A VMDK extent connection will probably need a different set of
> > > > blockers than bs->file connection.
> > > > 
> > > > So could you explain in which cases is the recursive blocking/unblocking
> > > > useful?
> > > 
> > > It's designed for the new crop of block operations operating on BDS located in
> > > the middle of the backing chain: Jeff's patches, intermediate live streaming or
> > > intermediate mirroring.
> > > Recursively blocking BDS allows to do these operations safely.
> > 
> > Sorry I may be slow on this, but it's still not clear to me.
> > 
> > That doesn't immediately show how backing_blocker doesn't work. These
> > operations are in the category of operations that update graph topology,
> > meaning that they drop, add or swap some nodes in the middle of the chain. It
> > is not safe because there are used by the other nodes, but they are supposed to
> > be protected by backing_blocker. Could you be more specific?
> 
> I don't know particularly about the backing blocker case.
> 
> > 
> > I can think of something more than backing_hd: there are also link types other
> > than backing_hd, for example ->file, (vmdk)->extents or (quorum)->qcrs, etc.
> > They should be protected as well.
> 
> This patch takes cares of recursing everywhere.
> 
> I can give you an example for quorum.
> 
> If a streaming operation is running on a quorum block backend the recursive
> blocking will help to block any operation done directly on any of the children.

At what points should block layer recursively block/unblock the operations in
this quorum case?

Fam

> 
> It's usefull since we introduced drive-mirror replace which will replace an arbitrary
> node of a quorum at the end of the mirroring operation.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Qemu-devel] [PATCH] block: Make op blockers recursive
  2014-08-26  4:42           ` Fam Zheng
@ 2014-08-26  6:45             ` Benoît Canet
  2014-09-04 20:42               ` Stefan Hajnoczi
  0 siblings, 1 reply; 21+ messages in thread
From: Benoît Canet @ 2014-08-26  6:45 UTC (permalink / raw)
  To: Fam Zheng; +Cc: kwolf, jcody, stefanha, Benoît Canet, qemu-devel

On Tue, Aug 26, 2014 at 12:42:04PM +0800, Fam Zheng wrote:
> On Mon, 08/25 12:12, Benoît Canet wrote:
> > On Mon, Aug 25, 2014 at 05:37:37PM +0800, Fam Zheng wrote:
> > > On Mon, 08/25 09:06, Benoît Canet wrote:
> > > > On Mon, Aug 25, 2014 at 02:04:24PM +0800, Fam Zheng wrote:
> > > > > On Fri, 08/22 18:11, Benoît Canet wrote:
> > > > > > Since the block layer code is starting to modify the BDS graph right in the
> > > > > > middle of BDS chains (block-mirror's replace parameter for example) QEMU needs
> > > > > > to properly block and unblock whole BDS subtrees; recursion is a neat way to
> > > > > > achieve this task.
> > > > > > 
> > > > > > This patch also takes care of modifying the op blockers users.
> > > > > 
> > > > > Is this going to replace backing_blocker?
> > > > > 
> > > > > I think it is too general an approach to control the operation properly,
> > > > > because the op blocker may not work in the same way for all types of BDS
> > > > > connections.  In other words, the choosing of op blockers are likely
> > > > > conditional on graph edge types, that's why backing_blocker was added here. For
> > > > > example, A VMDK extent connection will probably need a different set of
> > > > > blockers than bs->file connection.
> > > > > 
> > > > > So could you explain in which cases is the recursive blocking/unblocking
> > > > > useful?
> > > > 
> > > > It's designed for the new crop of block operations operating on BDS located in
> > > > the middle of the backing chain: Jeff's patches, intermediate live streaming or
> > > > intermediate mirroring.
> > > > Recursively blocking BDS allows to do these operations safely.
> > > 
> > > Sorry I may be slow on this, but it's still not clear to me.
> > > 
> > > That doesn't immediately show how backing_blocker doesn't work. These
> > > operations are in the category of operations that update graph topology,
> > > meaning that they drop, add or swap some nodes in the middle of the chain. It
> > > is not safe because there are used by the other nodes, but they are supposed to
> > > be protected by backing_blocker. Could you be more specific?
> > 
> > I don't know particularly about the backing blocker case.
> > 
> > > 
> > > I can think of something more than backing_hd: there are also link types other
> > > than backing_hd, for example ->file, (vmdk)->extents or (quorum)->qcrs, etc.
> > > They should be protected as well.
> > 
> > This patch takes cares of recursing everywhere.
> > 
> > I can give you an example for quorum.
> > 
> > If a streaming operation is running on a quorum block backend the recursive
> > blocking will help to block any operation done directly on any of the children.
> 
> At what points should block layer recursively block/unblock the operations in
> this quorum case?

When the streaming starts it should block all the top bs children.
So after when an operation tries to operate on a child of the top bs it will be
forbidden.

The beauty of it is that recursive blockers can easily replace regular blockers.

> 
> Fam
> 
> > 
> > It's usefull since we introduced drive-mirror replace which will replace an arbitrary
> > node of a quorum at the end of the mirroring operation.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Qemu-devel] [PATCH] block: Make op blockers recursive
  2014-08-26  6:45             ` Benoît Canet
@ 2014-09-04 20:42               ` Stefan Hajnoczi
  2014-09-10  8:54                 ` Fam Zheng
  0 siblings, 1 reply; 21+ messages in thread
From: Stefan Hajnoczi @ 2014-09-04 20:42 UTC (permalink / raw)
  To: Benoît Canet; +Cc: kwolf, jcody, Fam Zheng, qemu-devel, stefanha

[-- Attachment #1: Type: text/plain, Size: 4185 bytes --]

On Tue, Aug 26, 2014 at 06:45:54AM +0000, Benoît Canet wrote:
> On Tue, Aug 26, 2014 at 12:42:04PM +0800, Fam Zheng wrote:
> > On Mon, 08/25 12:12, Benoît Canet wrote:
> > > On Mon, Aug 25, 2014 at 05:37:37PM +0800, Fam Zheng wrote:
> > > > On Mon, 08/25 09:06, Benoît Canet wrote:
> > > > > On Mon, Aug 25, 2014 at 02:04:24PM +0800, Fam Zheng wrote:
> > > > > > On Fri, 08/22 18:11, Benoît Canet wrote:
> > > > > > > Since the block layer code is starting to modify the BDS graph right in the
> > > > > > > middle of BDS chains (block-mirror's replace parameter for example) QEMU needs
> > > > > > > to properly block and unblock whole BDS subtrees; recursion is a neat way to
> > > > > > > achieve this task.
> > > > > > > 
> > > > > > > This patch also takes care of modifying the op blockers users.
> > > > > > 
> > > > > > Is this going to replace backing_blocker?
> > > > > > 
> > > > > > I think it is too general an approach to control the operation properly,
> > > > > > because the op blocker may not work in the same way for all types of BDS
> > > > > > connections.  In other words, the choosing of op blockers are likely
> > > > > > conditional on graph edge types, that's why backing_blocker was added here. For
> > > > > > example, A VMDK extent connection will probably need a different set of
> > > > > > blockers than bs->file connection.
> > > > > > 
> > > > > > So could you explain in which cases is the recursive blocking/unblocking
> > > > > > useful?
> > > > > 
> > > > > It's designed for the new crop of block operations operating on BDS located in
> > > > > the middle of the backing chain: Jeff's patches, intermediate live streaming or
> > > > > intermediate mirroring.
> > > > > Recursively blocking BDS allows to do these operations safely.
> > > > 
> > > > Sorry I may be slow on this, but it's still not clear to me.
> > > > 
> > > > That doesn't immediately show how backing_blocker doesn't work. These
> > > > operations are in the category of operations that update graph topology,
> > > > meaning that they drop, add or swap some nodes in the middle of the chain. It
> > > > is not safe because there are used by the other nodes, but they are supposed to
> > > > be protected by backing_blocker. Could you be more specific?
> > > 
> > > I don't know particularly about the backing blocker case.
> > > 
> > > > 
> > > > I can think of something more than backing_hd: there are also link types other
> > > > than backing_hd, for example ->file, (vmdk)->extents or (quorum)->qcrs, etc.
> > > > They should be protected as well.
> > > 
> > > This patch takes cares of recursing everywhere.
> > > 
> > > I can give you an example for quorum.
> > > 
> > > If a streaming operation is running on a quorum block backend the recursive
> > > blocking will help to block any operation done directly on any of the children.
> > 
> > At what points should block layer recursively block/unblock the operations in
> > this quorum case?
> 
> When the streaming starts it should block all the top bs children.
> So after when an operation tries to operate on a child of the top bs it will be
> forbidden.
> 
> The beauty of it is that recursive blockers can easily replace regular blockers.

Let's think of a situation that recursive blockers protect but
backing_blocker does not:

a <- b <- c <- d

c is the backing file and is therefore protected by the op blocker.

The block-commit command works with node-names, however, so we can
manipulate any nodes in the graph, not just the topmost one.  Try this:

block-commit d
block-commit b

I haven't checked yet but I suspect it will launch two block-commit jobs
on the same partial chain (that's a bad thing because it can lead to
corruption).

With recursive blockers, not just c but also a and b are protected.

To me, this demonstrates that recursive op blockers are safer than
non-recursive backing_blocker and will prevent real-world cases that
could lead to corruption.

BTW I'm fairly happy with the patch itself, but like Fam I'm still
thinking through different cases to convince myself the design is sound.

Stefan

[-- Attachment #2: Type: application/pgp-signature, Size: 473 bytes --]

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Qemu-devel] [PATCH] block: Make op blockers recursive
  2014-08-22 16:11 ` [Qemu-devel] [PATCH] block: Make op blockers recursive Benoît Canet
  2014-08-25  6:04   ` Fam Zheng
@ 2014-09-09 11:56   ` Kevin Wolf
  2014-09-09 14:28     ` Benoît Canet
  1 sibling, 1 reply; 21+ messages in thread
From: Kevin Wolf @ 2014-09-09 11:56 UTC (permalink / raw)
  To: Benoît Canet; +Cc: jcody, famz, qemu-devel, stefanha

Am 22.08.2014 um 18:11 hat Benoît Canet geschrieben:
> Since the block layer code is starting to modify the BDS graph right in the
> middle of BDS chains (block-mirror's replace parameter for example) QEMU needs
> to properly block and unblock whole BDS subtrees; recursion is a neat way to
> achieve this task.
> 
> This patch also takes care of modifying the op blockers users.
> 
> Signed-off-by: Benoit Canet <benoit.canet@nodalink.com>
> ---
>  block.c                   | 69 ++++++++++++++++++++++++++++++++++++++++++++---
>  block/blkverify.c         | 21 +++++++++++++++
>  block/commit.c            |  3 +++
>  block/mirror.c            | 17 ++++++++----
>  block/quorum.c            | 25 +++++++++++++++++
>  block/stream.c            |  3 +++
>  block/vmdk.c              | 34 +++++++++++++++++++++++
>  include/block/block.h     |  2 +-
>  include/block/block_int.h |  6 +++++
>  9 files changed, 171 insertions(+), 9 deletions(-)
> 
> diff --git a/block.c b/block.c
> index 6fa0201..d964b6c 100644
> --- a/block.c
> +++ b/block.c
> @@ -5446,7 +5446,9 @@ bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp)
>      return false;
>  }
>  
> -void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason)
> +/* do the real work of blocking a BDS */
> +static void bdrv_do_op_block(BlockDriverState *bs, BlockOpType op,
> +                             Error *reason)
>  {
>      BdrvOpBlocker *blocker;
>      assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
> @@ -5456,7 +5458,9 @@ void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason)
>      QLIST_INSERT_HEAD(&bs->op_blockers[op], blocker, list);
>  }
>  
> -void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason)
> +/* do the real work of unblocking a BDS */
> +static void bdrv_do_op_unblock(BlockDriverState *bs, BlockOpType op,
> +                               Error *reason)
>  {
>      BdrvOpBlocker *blocker, *next;
>      assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
> @@ -5468,6 +5472,65 @@ void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason)
>      }
>  }
>  
> +static bool bdrv_op_is_blocked_by(BlockDriverState *bs, BlockOpType op,
> +                                  Error *reason)
> +{
> +    BdrvOpBlocker *blocker, *next;
> +    assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
> +    QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) {

This doesn't actually need the SAFE version.

> +        if (blocker->reason == reason) {
> +            return true;
> +        }
> +    }
> +    return false;
> +}
> +
> +/* block recursively a BDS */
> +void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason)
> +{
> +    if (!bs) {
> +        return;
> +    }
> +
> +    /* prevent recursion loop */
> +    if (bdrv_op_is_blocked_by(bs, op, reason)) {
> +        return;
> +    }
> +
> +    /* block first for recursion loop protection to work */
> +    bdrv_do_op_block(bs, op, reason);
> +
> +    bdrv_op_block(bs->file, op, reason);
> +    bdrv_op_block(bs->backing_hd, op, reason);
> +
> +    if (bs->drv && bs->drv->bdrv_op_recursive_block) {
> +        bs->drv->bdrv_op_recursive_block(bs, op, reason);
> +    }

Here you block bs->file/bs->backing_hd automatically, no matter whether
the block driver implements the callback or not. I'm not sure if we can
do it like this for all times, but for now that should be okay.

> +}
> +
> +/* unblock recursively a BDS */
> +void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason)
> +{
> +    if (!bs) {
> +        return;
> +    }
> +
> +    /* prevent recursion loop */
> +    if (!bdrv_op_is_blocked_by(bs, op, reason)) {
> +        return;
> +    }
> +
> +    /* unblock first for recursion loop protection to work */
> +    bdrv_do_op_unblock(bs, op, reason);
> +
> +    bdrv_op_unblock(bs->file, op, reason);
> +    bdrv_op_unblock(bs->backing_hd, op, reason);
> +
> +    if (bs->drv && bs->drv->bdrv_op_recursive_unblock) {
> +        bs->drv->bdrv_op_recursive_unblock(bs, op, reason);
> +    }
> +}
> +
>  void bdrv_op_block_all(BlockDriverState *bs, Error *reason)
>  {
>      int i;
> @@ -5848,7 +5911,7 @@ BlockDriverState *check_to_replace_node(const char *node_name, Error **errp)
>          return NULL;
>      }
>  
> -    if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) {
> +    if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_MIRROR_REPLACE, errp)) {

That rename could have been a separate patch.

>          return NULL;
>      }
>  
> diff --git a/block/blkverify.c b/block/blkverify.c
> index 621b785..75ec3df 100644
> --- a/block/blkverify.c
> +++ b/block/blkverify.c
> @@ -320,6 +320,24 @@ static void blkverify_attach_aio_context(BlockDriverState *bs,
>      bdrv_attach_aio_context(s->test_file, new_context);
>  }
>  
> +static void blkverify_op_recursive_block(BlockDriverState *bs, BlockOpType op,
> +                                         Error *reason)
> +{
> +    BDRVBlkverifyState *s = bs->opaque;
> +
> +    bdrv_op_block(bs->file, op, reason);
> +    bdrv_op_block(s->test_file, op, reason);
> +}
> +
> +static void blkverify_op_recursive_unblock(BlockDriverState *bs, BlockOpType op,
> +                                           Error *reason)
> +{
> +    BDRVBlkverifyState *s = bs->opaque;
> +
> +    bdrv_op_unblock(bs->file, op, reason);
> +    bdrv_op_unblock(s->test_file, op, reason);
> +}

Here you block bs->file again, even though the block.c function has
already done it. Nothing bad happens, because bdrv_op_block() simply
stops when the blocker is already set, but the code would be nicer if we
did block bs->file only in one place. (I'm leaning towards doing it in
the drivers, but that's your decision.)

>  static BlockDriver bdrv_blkverify = {
>      .format_name                      = "blkverify",
>      .protocol_name                    = "blkverify",
> @@ -337,6 +355,9 @@ static BlockDriver bdrv_blkverify = {
>      .bdrv_attach_aio_context          = blkverify_attach_aio_context,
>      .bdrv_detach_aio_context          = blkverify_detach_aio_context,
>  
> +    .bdrv_op_recursive_block          = blkverify_op_recursive_block,
> +    .bdrv_op_recursive_unblock        = blkverify_op_recursive_unblock,
> +
>      .is_filter                        = true,
>      .bdrv_recurse_is_first_non_filter = blkverify_recurse_is_first_non_filter,
>  };
> diff --git a/block/commit.c b/block/commit.c
> index 91517d3..8a122b7 100644
> --- a/block/commit.c
> +++ b/block/commit.c
> @@ -142,6 +142,9 @@ wait:
>  
>      if (!block_job_is_cancelled(&s->common) && sector_num == end) {
>          /* success */
> +        /* unblock only BDS to be dropped */
> +        bdrv_op_unblock_all(top, s->common.blocker);
> +        bdrv_op_block_all(base, s->common.blocker);

This suggests that bdrv_op_(un)block_all() doesn't provide the right
interface, but we rather want an optional base argument in it, so that
you can (un)block a specific part of the backing file chain.

>          ret = bdrv_drop_intermediate(active, top, base, s->backing_file_str);
>      }
>  
> diff --git a/block/mirror.c b/block/mirror.c
> index 5e7a166..28ed47d 100644
> --- a/block/mirror.c
> +++ b/block/mirror.c
> @@ -324,6 +324,7 @@ static void coroutine_fn mirror_run(void *opaque)
>      uint64_t last_pause_ns;
>      BlockDriverInfo bdi;
>      char backing_filename[1024];
> +    BlockDriverState *to_replace;
>      int ret = 0;
>      int n;
>  
> @@ -512,14 +513,16 @@ immediate_exit:
>      g_free(s->in_flight_bitmap);
>      bdrv_release_dirty_bitmap(bs, s->dirty_bitmap);
>      bdrv_iostatus_disable(s->target);
> +    to_replace = s->common.bs;
> +    if (s->to_replace) {
> +        bdrv_op_unblock_all(s->to_replace, s->replace_blocker);
> +        to_replace = s->to_replace;
> +    }
>      if (s->should_complete && ret == 0) {
> -        BlockDriverState *to_replace = s->common.bs;
> -        if (s->to_replace) {
> -            to_replace = s->to_replace;
> -        }
>          if (bdrv_get_flags(s->target) != bdrv_get_flags(to_replace)) {
>              bdrv_reopen(s->target, bdrv_get_flags(to_replace), NULL);
>          }
> +        bdrv_op_unblock_all(to_replace, bs->job->blocker);

You need to unblock all of the BDSes that are going to be removed.
Aren't you unblocking more than that here?

It's not a real problem because block_job_completed() would unblock the
rest anyway and its bdrv_op_unblock_all() call is simply ignored now,
but it would be nicer to just unblock here what's actually needed.

>          bdrv_swap(s->target, to_replace);
>          if (s->common.driver->job_type == BLOCK_JOB_TYPE_COMMIT) {
>              /* drop the bs loop chain formed by the swap: break the loop then
> @@ -530,7 +533,6 @@ immediate_exit:
>          }
>      }
>      if (s->to_replace) {
> -        bdrv_op_unblock_all(s->to_replace, s->replace_blocker);
>          error_free(s->replace_blocker);
>          bdrv_unref(s->to_replace);
>      }
> @@ -648,6 +650,11 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target,
>          return;
>      }
>  
> +    /* remove BLOCK_OP_TYPE_MIRROR_REPLACE from the list of
> +     * blocked operations so the replaces parameter can work
> +     */
> +    bdrv_op_unblock(bs, BLOCK_OP_TYPE_MIRROR_REPLACE, bs->job->blocker);

What purpose does a blocker serve when it's disabled before it is
checked? I would only ever expect a bdrv_op_unblock() after some
operation on the BDS has finished, but not when starting an operation
that needs it.

>      s->replaces = g_strdup(replaces);
>      s->on_source_error = on_source_error;
>      s->on_target_error = on_target_error;
> diff --git a/block/stream.c b/block/stream.c
> index cdea3e8..2c917b7 100644
> --- a/block/stream.c
> +++ b/block/stream.c
> @@ -192,6 +192,9 @@ wait:
>              }
>          }
>          ret = bdrv_change_backing_file(bs, base_id, base_fmt);
> +        /* unblock only BDS to be dropped */
> +        bdrv_op_unblock_all(bs->backing_hd, s->common.blocker);
> +        bdrv_op_block_all(base, s->common.blocker);

Same thing as for commit.

>          close_unused_images(bs, base, base_id);
>      }
>  
> diff --git a/block/vmdk.c b/block/vmdk.c
> index 01412a8..bc5b17f 100644
> --- a/block/vmdk.c
> +++ b/block/vmdk.c
> @@ -2220,6 +2220,38 @@ static QemuOptsList vmdk_create_opts = {
>      }
>  };
>  
> +static void vmdk_op_recursive_block(BlockDriverState *bs, BlockOpType op,
> +                                      Error *reason)
> +{
> +    BDRVVmdkState *s = bs->opaque;
> +    int i;
> +
> +    bdrv_op_block(bs->file, op, reason);
> +    bdrv_op_block(bs->backing_hd, op, reason);

Already blocked in block.c, remove one of the two

> +    for (i = 0; i < s->num_extents; i++) {
> +        if (s->extents[i].file) {
> +            bdrv_op_block(s->extents[i].file, op, reason);
> +        }
> +    }
> +}

Kevin

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Qemu-devel] [PATCH] block: Make op blockers recursive
  2014-09-09 11:56   ` Kevin Wolf
@ 2014-09-09 14:28     ` Benoît Canet
  2014-09-12  3:48       ` Fam Zheng
  0 siblings, 1 reply; 21+ messages in thread
From: Benoît Canet @ 2014-09-09 14:28 UTC (permalink / raw)
  To: Kevin Wolf; +Cc: famz, jcody, stefanha, qemu-devel, Benoît Canet

On Tue, Sep 09, 2014 at 01:56:46PM +0200, Kevin Wolf wrote:
> Am 22.08.2014 um 18:11 hat Benoît Canet geschrieben:
> > Since the block layer code is starting to modify the BDS graph right in the
> > middle of BDS chains (block-mirror's replace parameter for example) QEMU needs
> > to properly block and unblock whole BDS subtrees; recursion is a neat way to
> > achieve this task.
> > 
> > This patch also takes care of modifying the op blockers users.
> > 
> > Signed-off-by: Benoit Canet <benoit.canet@nodalink.com>
> > ---
> >  block.c                   | 69 ++++++++++++++++++++++++++++++++++++++++++++---
> >  block/blkverify.c         | 21 +++++++++++++++
> >  block/commit.c            |  3 +++
> >  block/mirror.c            | 17 ++++++++----
> >  block/quorum.c            | 25 +++++++++++++++++
> >  block/stream.c            |  3 +++
> >  block/vmdk.c              | 34 +++++++++++++++++++++++
> >  include/block/block.h     |  2 +-
> >  include/block/block_int.h |  6 +++++
> >  9 files changed, 171 insertions(+), 9 deletions(-)
> > 
> > diff --git a/block.c b/block.c
> > index 6fa0201..d964b6c 100644
> > --- a/block.c
> > +++ b/block.c
> > @@ -5446,7 +5446,9 @@ bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp)
> >      return false;
> >  }
> >  
> > -void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason)
> > +/* do the real work of blocking a BDS */
> > +static void bdrv_do_op_block(BlockDriverState *bs, BlockOpType op,
> > +                             Error *reason)
> >  {
> >      BdrvOpBlocker *blocker;
> >      assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
> > @@ -5456,7 +5458,9 @@ void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason)
> >      QLIST_INSERT_HEAD(&bs->op_blockers[op], blocker, list);
> >  }
> >  
> > -void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason)
> > +/* do the real work of unblocking a BDS */
> > +static void bdrv_do_op_unblock(BlockDriverState *bs, BlockOpType op,
> > +                               Error *reason)
> >  {
> >      BdrvOpBlocker *blocker, *next;
> >      assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
> > @@ -5468,6 +5472,65 @@ void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason)
> >      }
> >  }
> >  
> > +static bool bdrv_op_is_blocked_by(BlockDriverState *bs, BlockOpType op,
> > +                                  Error *reason)
> > +{
> > +    BdrvOpBlocker *blocker, *next;
> > +    assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
> > +    QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) {
> 
> This doesn't actually need the SAFE version.
> 
> > +        if (blocker->reason == reason) {
> > +            return true;
> > +        }
> > +    }
> > +    return false;
> > +}
> > +
> > +/* block recursively a BDS */
> > +void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason)
> > +{
> > +    if (!bs) {
> > +        return;
> > +    }
> > +
> > +    /* prevent recursion loop */
> > +    if (bdrv_op_is_blocked_by(bs, op, reason)) {
> > +        return;
> > +    }
> > +
> > +    /* block first for recursion loop protection to work */
> > +    bdrv_do_op_block(bs, op, reason);
> > +
> > +    bdrv_op_block(bs->file, op, reason);
> > +    bdrv_op_block(bs->backing_hd, op, reason);
> > +
> > +    if (bs->drv && bs->drv->bdrv_op_recursive_block) {
> > +        bs->drv->bdrv_op_recursive_block(bs, op, reason);
> > +    }
> 
> Here you block bs->file/bs->backing_hd automatically, no matter whether
> the block driver implements the callback or not. I'm not sure if we can
> do it like this for all times, but for now that should be okay.
> 
> > +}
> > +
> > +/* unblock recursively a BDS */
> > +void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason)
> > +{
> > +    if (!bs) {
> > +        return;
> > +    }
> > +
> > +    /* prevent recursion loop */
> > +    if (!bdrv_op_is_blocked_by(bs, op, reason)) {
> > +        return;
> > +    }
> > +
> > +    /* unblock first for recursion loop protection to work */
> > +    bdrv_do_op_unblock(bs, op, reason);
> > +
> > +    bdrv_op_unblock(bs->file, op, reason);
> > +    bdrv_op_unblock(bs->backing_hd, op, reason);
> > +
> > +    if (bs->drv && bs->drv->bdrv_op_recursive_unblock) {
> > +        bs->drv->bdrv_op_recursive_unblock(bs, op, reason);
> > +    }
> > +}
> > +
> >  void bdrv_op_block_all(BlockDriverState *bs, Error *reason)
> >  {
> >      int i;
> > @@ -5848,7 +5911,7 @@ BlockDriverState *check_to_replace_node(const char *node_name, Error **errp)
> >          return NULL;
> >      }
> >  
> > -    if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) {
> > +    if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_MIRROR_REPLACE, errp)) {
> 
> That rename could have been a separate patch.
> 
> >          return NULL;
> >      }
> >  
> > diff --git a/block/blkverify.c b/block/blkverify.c
> > index 621b785..75ec3df 100644
> > --- a/block/blkverify.c
> > +++ b/block/blkverify.c
> > @@ -320,6 +320,24 @@ static void blkverify_attach_aio_context(BlockDriverState *bs,
> >      bdrv_attach_aio_context(s->test_file, new_context);
> >  }
> >  
> > +static void blkverify_op_recursive_block(BlockDriverState *bs, BlockOpType op,
> > +                                         Error *reason)
> > +{
> > +    BDRVBlkverifyState *s = bs->opaque;
> > +
> > +    bdrv_op_block(bs->file, op, reason);
> > +    bdrv_op_block(s->test_file, op, reason);
> > +}
> > +
> > +static void blkverify_op_recursive_unblock(BlockDriverState *bs, BlockOpType op,
> > +                                           Error *reason)
> > +{
> > +    BDRVBlkverifyState *s = bs->opaque;
> > +
> > +    bdrv_op_unblock(bs->file, op, reason);
> > +    bdrv_op_unblock(s->test_file, op, reason);
> > +}
> 
> Here you block bs->file again, even though the block.c function has
> already done it. Nothing bad happens, because bdrv_op_block() simply
> stops when the blocker is already set, but the code would be nicer if we
> did block bs->file only in one place. (I'm leaning towards doing it in
> the drivers, but that's your decision.)
> 
> >  static BlockDriver bdrv_blkverify = {
> >      .format_name                      = "blkverify",
> >      .protocol_name                    = "blkverify",
> > @@ -337,6 +355,9 @@ static BlockDriver bdrv_blkverify = {
> >      .bdrv_attach_aio_context          = blkverify_attach_aio_context,
> >      .bdrv_detach_aio_context          = blkverify_detach_aio_context,
> >  
> > +    .bdrv_op_recursive_block          = blkverify_op_recursive_block,
> > +    .bdrv_op_recursive_unblock        = blkverify_op_recursive_unblock,
> > +
> >      .is_filter                        = true,
> >      .bdrv_recurse_is_first_non_filter = blkverify_recurse_is_first_non_filter,
> >  };
> > diff --git a/block/commit.c b/block/commit.c
> > index 91517d3..8a122b7 100644
> > --- a/block/commit.c
> > +++ b/block/commit.c
> > @@ -142,6 +142,9 @@ wait:
> >  
> >      if (!block_job_is_cancelled(&s->common) && sector_num == end) {
> >          /* success */
> > +        /* unblock only BDS to be dropped */
> > +        bdrv_op_unblock_all(top, s->common.blocker);
> > +        bdrv_op_block_all(base, s->common.blocker);
> 
> This suggests that bdrv_op_(un)block_all() doesn't provide the right
> interface, but we rather want an optional base argument in it, so that
> you can (un)block a specific part of the backing file chain.

Good idea.

> 
> >          ret = bdrv_drop_intermediate(active, top, base, s->backing_file_str);
> >      }
> >  
> > diff --git a/block/mirror.c b/block/mirror.c
> > index 5e7a166..28ed47d 100644
> > --- a/block/mirror.c
> > +++ b/block/mirror.c
> > @@ -324,6 +324,7 @@ static void coroutine_fn mirror_run(void *opaque)
> >      uint64_t last_pause_ns;
> >      BlockDriverInfo bdi;
> >      char backing_filename[1024];
> > +    BlockDriverState *to_replace;
> >      int ret = 0;
> >      int n;
> >  
> > @@ -512,14 +513,16 @@ immediate_exit:
> >      g_free(s->in_flight_bitmap);
> >      bdrv_release_dirty_bitmap(bs, s->dirty_bitmap);
> >      bdrv_iostatus_disable(s->target);
> > +    to_replace = s->common.bs;
> > +    if (s->to_replace) {
> > +        bdrv_op_unblock_all(s->to_replace, s->replace_blocker);
> > +        to_replace = s->to_replace;
> > +    }
> >      if (s->should_complete && ret == 0) {
> > -        BlockDriverState *to_replace = s->common.bs;
> > -        if (s->to_replace) {
> > -            to_replace = s->to_replace;
> > -        }
> >          if (bdrv_get_flags(s->target) != bdrv_get_flags(to_replace)) {
> >              bdrv_reopen(s->target, bdrv_get_flags(to_replace), NULL);
> >          }
> > +        bdrv_op_unblock_all(to_replace, bs->job->blocker);
> 
> You need to unblock all of the BDSes that are going to be removed.
> Aren't you unblocking more than that here?
> 
> It's not a real problem because block_job_completed() would unblock the
> rest anyway and its bdrv_op_unblock_all() call is simply ignored now,
> but it would be nicer to just unblock here what's actually needed.
> 
> >          bdrv_swap(s->target, to_replace);
> >          if (s->common.driver->job_type == BLOCK_JOB_TYPE_COMMIT) {
> >              /* drop the bs loop chain formed by the swap: break the loop then
> > @@ -530,7 +533,6 @@ immediate_exit:
> >          }
> >      }
> >      if (s->to_replace) {
> > -        bdrv_op_unblock_all(s->to_replace, s->replace_blocker);
> >          error_free(s->replace_blocker);
> >          bdrv_unref(s->to_replace);
> >      }
> > @@ -648,6 +650,11 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target,
> >          return;
> >      }
> >  
> > +    /* remove BLOCK_OP_TYPE_MIRROR_REPLACE from the list of
> > +     * blocked operations so the replaces parameter can work
> > +     */
> > +    bdrv_op_unblock(bs, BLOCK_OP_TYPE_MIRROR_REPLACE, bs->job->blocker);
> 
> What purpose does a blocker serve when it's disabled before it is
> checked? I would only ever expect a bdrv_op_unblock() after some
> operation on the BDS has finished, but not when starting an operation
> that needs it.

BLOCK_OP_TYPE_MIRROR_REPLACE is checked and blocked by block-job-complete
during the time the mirror finish when an arbitrary node of the graph must be
replaced.

The start of the mirroring block everything including
BLOCK_OP_TYPE_MIRROR_REPLACE so this hunk relax the blocking so block-job-complete
can have a chance of being able to block it.

The comment of this hunk seems to be deficient and the code not self explanatory.

On the other way maybe block-job-complete is done wrong from the start but
relaxing BLOCK_OP_TYPE_MIRROR_REPLACE when the mirror start is the only way
to make block-job-complete work as intended.

Thanks

Benoît
> 
> >      s->replaces = g_strdup(replaces);
> >      s->on_source_error = on_source_error;
> >      s->on_target_error = on_target_error;
> > diff --git a/block/stream.c b/block/stream.c
> > index cdea3e8..2c917b7 100644
> > --- a/block/stream.c
> > +++ b/block/stream.c
> > @@ -192,6 +192,9 @@ wait:
> >              }
> >          }
> >          ret = bdrv_change_backing_file(bs, base_id, base_fmt);
> > +        /* unblock only BDS to be dropped */
> > +        bdrv_op_unblock_all(bs->backing_hd, s->common.blocker);
> > +        bdrv_op_block_all(base, s->common.blocker);
> 
> Same thing as for commit.
> 
> >          close_unused_images(bs, base, base_id);
> >      }
> >  
> > diff --git a/block/vmdk.c b/block/vmdk.c
> > index 01412a8..bc5b17f 100644
> > --- a/block/vmdk.c
> > +++ b/block/vmdk.c
> > @@ -2220,6 +2220,38 @@ static QemuOptsList vmdk_create_opts = {
> >      }
> >  };
> >  
> > +static void vmdk_op_recursive_block(BlockDriverState *bs, BlockOpType op,
> > +                                      Error *reason)
> > +{
> > +    BDRVVmdkState *s = bs->opaque;
> > +    int i;
> > +
> > +    bdrv_op_block(bs->file, op, reason);
> > +    bdrv_op_block(bs->backing_hd, op, reason);
> 
> Already blocked in block.c, remove one of the two
> 
> > +    for (i = 0; i < s->num_extents; i++) {
> > +        if (s->extents[i].file) {
> > +            bdrv_op_block(s->extents[i].file, op, reason);
> > +        }
> > +    }
> > +}
> 
> Kevin

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Qemu-devel] [PATCH] block: Make op blockers recursive
  2014-09-04 20:42               ` Stefan Hajnoczi
@ 2014-09-10  8:54                 ` Fam Zheng
  2014-09-10 14:18                   ` Benoît Canet
  2014-09-10 15:14                   ` Eric Blake
  0 siblings, 2 replies; 21+ messages in thread
From: Fam Zheng @ 2014-09-10  8:54 UTC (permalink / raw)
  To: Stefan Hajnoczi; +Cc: kwolf, jcody, stefanha, Benoît Canet, qemu-devel

On Thu, 09/04 21:42, Stefan Hajnoczi wrote:
> On Tue, Aug 26, 2014 at 06:45:54AM +0000, Benoît Canet wrote:
> > On Tue, Aug 26, 2014 at 12:42:04PM +0800, Fam Zheng wrote:
> > > On Mon, 08/25 12:12, Benoît Canet wrote:
> > > > On Mon, Aug 25, 2014 at 05:37:37PM +0800, Fam Zheng wrote:
> > > > > On Mon, 08/25 09:06, Benoît Canet wrote:
> > > > > > On Mon, Aug 25, 2014 at 02:04:24PM +0800, Fam Zheng wrote:
> > > > > > > On Fri, 08/22 18:11, Benoît Canet wrote:
> > > > > > > > Since the block layer code is starting to modify the BDS graph right in the
> > > > > > > > middle of BDS chains (block-mirror's replace parameter for example) QEMU needs
> > > > > > > > to properly block and unblock whole BDS subtrees; recursion is a neat way to
> > > > > > > > achieve this task.
> > > > > > > > 
> > > > > > > > This patch also takes care of modifying the op blockers users.
> > > > > > > 
> > > > > > > Is this going to replace backing_blocker?
> > > > > > > 
> > > > > > > I think it is too general an approach to control the operation properly,
> > > > > > > because the op blocker may not work in the same way for all types of BDS
> > > > > > > connections.  In other words, the choosing of op blockers are likely
> > > > > > > conditional on graph edge types, that's why backing_blocker was added here. For
> > > > > > > example, A VMDK extent connection will probably need a different set of
> > > > > > > blockers than bs->file connection.
> > > > > > > 
> > > > > > > So could you explain in which cases is the recursive blocking/unblocking
> > > > > > > useful?
> > > > > > 
> > > > > > It's designed for the new crop of block operations operating on BDS located in
> > > > > > the middle of the backing chain: Jeff's patches, intermediate live streaming or
> > > > > > intermediate mirroring.
> > > > > > Recursively blocking BDS allows to do these operations safely.
> > > > > 
> > > > > Sorry I may be slow on this, but it's still not clear to me.
> > > > > 
> > > > > That doesn't immediately show how backing_blocker doesn't work. These
> > > > > operations are in the category of operations that update graph topology,
> > > > > meaning that they drop, add or swap some nodes in the middle of the chain. It
> > > > > is not safe because there are used by the other nodes, but they are supposed to
> > > > > be protected by backing_blocker. Could you be more specific?
> > > > 
> > > > I don't know particularly about the backing blocker case.
> > > > 
> > > > > 
> > > > > I can think of something more than backing_hd: there are also link types other
> > > > > than backing_hd, for example ->file, (vmdk)->extents or (quorum)->qcrs, etc.
> > > > > They should be protected as well.
> > > > 
> > > > This patch takes cares of recursing everywhere.
> > > > 
> > > > I can give you an example for quorum.
> > > > 
> > > > If a streaming operation is running on a quorum block backend the recursive
> > > > blocking will help to block any operation done directly on any of the children.
> > > 
> > > At what points should block layer recursively block/unblock the operations in
> > > this quorum case?
> > 
> > When the streaming starts it should block all the top bs children.
> > So after when an operation tries to operate on a child of the top bs it will be
> > forbidden.
> > 
> > The beauty of it is that recursive blockers can easily replace regular blockers.
> 
> Let's think of a situation that recursive blockers protect but
> backing_blocker does not:
> 
> a <- b <- c <- d
> 
> c is the backing file and is therefore protected by the op blocker.
> 
> The block-commit command works with node-names, however, so we can
> manipulate any nodes in the graph, not just the topmost one.  Try this:
> 
> block-commit d
> block-commit b
> 
> I haven't checked yet but I suspect it will launch two block-commit jobs
> on the same partial chain (that's a bad thing because it can lead to
> corruption).

1) Does block-commit work with node-names already? In other words, is
   block-commit b possible now? I only see drive-mirror works with it, but not
   drive-backup, block-mirror or block-commit.

2) Regardless of the answer to 1), I think we could use a similar approach as
   drive-backup here: split BLOCK_OP_TYPE_COMMIT to
   BLOCK_OP_TYPE_COMMIT_{SOURCE,TARGET}, and only unblock
   BLOCK_OP_TYPE_COMMIT_TARGET in bdrv_set_backing_hd.

Unblocking BLOCK_OP_TYPE_COMMIT on backing_hd is wrong as long as we expose "d"
to block-commit, with node-name.

As the next step, let's think about what it takes to safely allow commit d to b
with:

a <- b <- c <- d <- e <- f <- g

I think the answer is: if the commit job checks and sets blockers on range [d, b],
it is safe. Because from e, f and g's points of view, the whole backing chain
is always complete and working, just like how device model thinks of the top node
when it's being active committed nowadays.

Based on that, we can even start commit on non-intersecting partial chains at
the same time:

block-commit g up to e
block-commit d up to b

, if we want to.

That is only possible with a more specific blocking logic, instead of recursive
blocker.

Fam

> 
> With recursive blockers, not just c but also a and b are protected.
> 
> To me, this demonstrates that recursive op blockers are safer than
> non-recursive backing_blocker and will prevent real-world cases that
> could lead to corruption.
> 
> BTW I'm fairly happy with the patch itself, but like Fam I'm still
> thinking through different cases to convince myself the design is sound.
> 
> Stefan

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Qemu-devel] [PATCH] block: Make op blockers recursive
  2014-09-10  8:54                 ` Fam Zheng
@ 2014-09-10 14:18                   ` Benoît Canet
  2014-09-10 15:14                   ` Eric Blake
  1 sibling, 0 replies; 21+ messages in thread
From: Benoît Canet @ 2014-09-10 14:18 UTC (permalink / raw)
  To: Fam Zheng
  Cc: kwolf, Stefan Hajnoczi, jcody, qemu-devel, stefanha, Benoît Canet

On Wed, Sep 10, 2014 at 04:54:19PM +0800, Fam Zheng wrote:
> On Thu, 09/04 21:42, Stefan Hajnoczi wrote:
> > On Tue, Aug 26, 2014 at 06:45:54AM +0000, Benoît Canet wrote:
> > > On Tue, Aug 26, 2014 at 12:42:04PM +0800, Fam Zheng wrote:
> > > > On Mon, 08/25 12:12, Benoît Canet wrote:
> > > > > On Mon, Aug 25, 2014 at 05:37:37PM +0800, Fam Zheng wrote:
> > > > > > On Mon, 08/25 09:06, Benoît Canet wrote:
> > > > > > > On Mon, Aug 25, 2014 at 02:04:24PM +0800, Fam Zheng wrote:
> > > > > > > > On Fri, 08/22 18:11, Benoît Canet wrote:
> > > > > > > > > Since the block layer code is starting to modify the BDS graph right in the
> > > > > > > > > middle of BDS chains (block-mirror's replace parameter for example) QEMU needs
> > > > > > > > > to properly block and unblock whole BDS subtrees; recursion is a neat way to
> > > > > > > > > achieve this task.
> > > > > > > > > 
> > > > > > > > > This patch also takes care of modifying the op blockers users.
> > > > > > > > 
> > > > > > > > Is this going to replace backing_blocker?
> > > > > > > > 
> > > > > > > > I think it is too general an approach to control the operation properly,
> > > > > > > > because the op blocker may not work in the same way for all types of BDS
> > > > > > > > connections.  In other words, the choosing of op blockers are likely
> > > > > > > > conditional on graph edge types, that's why backing_blocker was added here. For
> > > > > > > > example, A VMDK extent connection will probably need a different set of
> > > > > > > > blockers than bs->file connection.
> > > > > > > > 
> > > > > > > > So could you explain in which cases is the recursive blocking/unblocking
> > > > > > > > useful?
> > > > > > > 
> > > > > > > It's designed for the new crop of block operations operating on BDS located in
> > > > > > > the middle of the backing chain: Jeff's patches, intermediate live streaming or
> > > > > > > intermediate mirroring.
> > > > > > > Recursively blocking BDS allows to do these operations safely.
> > > > > > 
> > > > > > Sorry I may be slow on this, but it's still not clear to me.
> > > > > > 
> > > > > > That doesn't immediately show how backing_blocker doesn't work. These
> > > > > > operations are in the category of operations that update graph topology,
> > > > > > meaning that they drop, add or swap some nodes in the middle of the chain. It
> > > > > > is not safe because there are used by the other nodes, but they are supposed to
> > > > > > be protected by backing_blocker. Could you be more specific?
> > > > > 
> > > > > I don't know particularly about the backing blocker case.
> > > > > 
> > > > > > 
> > > > > > I can think of something more than backing_hd: there are also link types other
> > > > > > than backing_hd, for example ->file, (vmdk)->extents or (quorum)->qcrs, etc.
> > > > > > They should be protected as well.
> > > > > 
> > > > > This patch takes cares of recursing everywhere.
> > > > > 
> > > > > I can give you an example for quorum.
> > > > > 
> > > > > If a streaming operation is running on a quorum block backend the recursive
> > > > > blocking will help to block any operation done directly on any of the children.
> > > > 
> > > > At what points should block layer recursively block/unblock the operations in
> > > > this quorum case?
> > > 
> > > When the streaming starts it should block all the top bs children.
> > > So after when an operation tries to operate on a child of the top bs it will be
> > > forbidden.
> > > 
> > > The beauty of it is that recursive blockers can easily replace regular blockers.
> > 
> > Let's think of a situation that recursive blockers protect but
> > backing_blocker does not:
> > 
> > a <- b <- c <- d
> > 
> > c is the backing file and is therefore protected by the op blocker.
> > 
> > The block-commit command works with node-names, however, so we can
> > manipulate any nodes in the graph, not just the topmost one.  Try this:
> > 
> > block-commit d
> > block-commit b
> > 
> > I haven't checked yet but I suspect it will launch two block-commit jobs
> > on the same partial chain (that's a bad thing because it can lead to
> > corruption).
> 
> 1) Does block-commit work with node-names already? In other words, is
>    block-commit b possible now? I only see drive-mirror works with it, but not
>    drive-backup, block-mirror or block-commit.
> 
> 2) Regardless of the answer to 1), I think we could use a similar approach as
>    drive-backup here: split BLOCK_OP_TYPE_COMMIT to
>    BLOCK_OP_TYPE_COMMIT_{SOURCE,TARGET}, and only unblock
>    BLOCK_OP_TYPE_COMMIT_TARGET in bdrv_set_backing_hd.

I'll try to implement this while being at it.

> 
> Unblocking BLOCK_OP_TYPE_COMMIT on backing_hd is wrong as long as we expose "d"
> to block-commit, with node-name.
> 
> As the next step, let's think about what it takes to safely allow commit d to b
> with:
> 
> a <- b <- c <- d <- e <- f <- g
> 
> I think the answer is: if the commit job checks and sets blockers on range [d, b],
> it is safe. Because from e, f and g's points of view, the whole backing chain
> is always complete and working, just like how device model thinks of the top node
> when it's being active committed nowadays.
> 
> Based on that, we can even start commit on non-intersecting partial chains at
> the same time:
> 
> block-commit g up to e
> block-commit d up to b
> 
> , if we want to.
> 
> That is only possible with a more specific blocking logic, instead of recursive
> blocker.

Here your advice is the same as Kevin's one: make op blocker work on ranges.
I'll change them to do so.

Thanks for the input.

Benoît

> 
> Fam
> 
> > 
> > With recursive blockers, not just c but also a and b are protected.
> > 
> > To me, this demonstrates that recursive op blockers are safer than
> > non-recursive backing_blocker and will prevent real-world cases that
> > could lead to corruption.
> > 
> > BTW I'm fairly happy with the patch itself, but like Fam I'm still
> > thinking through different cases to convince myself the design is sound.
> > 
> > Stefan
> 
> 

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Qemu-devel] [PATCH] block: Make op blockers recursive
  2014-09-10  8:54                 ` Fam Zheng
  2014-09-10 14:18                   ` Benoît Canet
@ 2014-09-10 15:14                   ` Eric Blake
  2014-09-10 15:49                     ` Benoît Canet
  2014-09-11  0:50                     ` Fam Zheng
  1 sibling, 2 replies; 21+ messages in thread
From: Eric Blake @ 2014-09-10 15:14 UTC (permalink / raw)
  To: Fam Zheng, Stefan Hajnoczi
  Cc: kwolf, Jeff Cody, Benoît Canet, stefanha, qemu-devel

[-- Attachment #1: Type: text/plain, Size: 1592 bytes --]

On 09/10/2014 02:54 AM, Fam Zheng wrote:

>> Let's think of a situation that recursive blockers protect but
>> backing_blocker does not:
>>
>> a <- b <- c <- d
>>
>> c is the backing file and is therefore protected by the op blocker.
>>
>> The block-commit command works with node-names, however, so we can
>> manipulate any nodes in the graph, not just the topmost one.  Try this:
>>
>> block-commit d
>> block-commit b
>>
>> I haven't checked yet but I suspect it will launch two block-commit jobs
>> on the same partial chain (that's a bad thing because it can lead to
>> corruption).
> 
> 1) Does block-commit work with node-names already? In other words, is
>    block-commit b possible now? I only see drive-mirror works with it, but not
>    drive-backup, block-mirror or block-commit.

IIRC, Jeff Cody proposed patches for qemu 2.1 that would have done this,
but we dropped them for that release in order to get the recursive
blockers sorted out first.

 >
> 2) Regardless of the answer to 1), I think we could use a similar approach as
>    drive-backup here: split BLOCK_OP_TYPE_COMMIT to
>    BLOCK_OP_TYPE_COMMIT_{SOURCE,TARGET}, and only unblock
>    BLOCK_OP_TYPE_COMMIT_TARGET in bdrv_set_backing_hd.

In that earlier thread, Jeff had some ideas that it is not so much the
operation name that should be the blocker, but the lower-level action(s)
implied by each operation (read metadata, write metadata, read image,
write image)

-- 
Eric Blake   eblake redhat com    +1-919-301-3266
Libvirt virtualization library http://libvirt.org


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 539 bytes --]

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Qemu-devel] [PATCH] block: Make op blockers recursive
  2014-09-10 15:14                   ` Eric Blake
@ 2014-09-10 15:49                     ` Benoît Canet
  2014-09-11 11:22                       ` Kevin Wolf
  2014-09-11  0:50                     ` Fam Zheng
  1 sibling, 1 reply; 21+ messages in thread
From: Benoît Canet @ 2014-09-10 15:49 UTC (permalink / raw)
  To: Eric Blake
  Cc: kwolf, Fam Zheng, Stefan Hajnoczi, jcody, qemu-devel, stefanha,
	Benoît Canet

On Wed, Sep 10, 2014 at 09:14:35AM -0600, Eric Blake wrote:
> On 09/10/2014 02:54 AM, Fam Zheng wrote:
> 
> >> Let's think of a situation that recursive blockers protect but
> >> backing_blocker does not:
> >>
> >> a <- b <- c <- d
> >>
> >> c is the backing file and is therefore protected by the op blocker.
> >>
> >> The block-commit command works with node-names, however, so we can
> >> manipulate any nodes in the graph, not just the topmost one.  Try this:
> >>
> >> block-commit d
> >> block-commit b
> >>
> >> I haven't checked yet but I suspect it will launch two block-commit jobs
> >> on the same partial chain (that's a bad thing because it can lead to
> >> corruption).
> > 
> > 1) Does block-commit work with node-names already? In other words, is
> >    block-commit b possible now? I only see drive-mirror works with it, but not
> >    drive-backup, block-mirror or block-commit.
> 
> IIRC, Jeff Cody proposed patches for qemu 2.1 that would have done this,
> but we dropped them for that release in order to get the recursive
> blockers sorted out first.
> 
>  >
> > 2) Regardless of the answer to 1), I think we could use a similar approach as
> >    drive-backup here: split BLOCK_OP_TYPE_COMMIT to
> >    BLOCK_OP_TYPE_COMMIT_{SOURCE,TARGET}, and only unblock
> >    BLOCK_OP_TYPE_COMMIT_TARGET in bdrv_set_backing_hd.
> 
> In that earlier thread, Jeff had some ideas that it is not so much the
> operation name that should be the blocker, but the lower-level action(s)
> implied by each operation (read metadata, write metadata, read image,
> write image)

Does it mean I should pause this current series and task switch to another
infrastucture task ?
I could switch to the block I/O accouting work.

What does the other developpers and maintainers think about it ?

> 
> -- 
> Eric Blake   eblake redhat com    +1-919-301-3266
> Libvirt virtualization library http://libvirt.org
> 

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Qemu-devel] [PATCH] block: Make op blockers recursive
  2014-09-10 15:14                   ` Eric Blake
  2014-09-10 15:49                     ` Benoît Canet
@ 2014-09-11  0:50                     ` Fam Zheng
  1 sibling, 0 replies; 21+ messages in thread
From: Fam Zheng @ 2014-09-11  0:50 UTC (permalink / raw)
  To: Eric Blake
  Cc: kwolf, Stefan Hajnoczi, jcody, qemu-devel, stefanha, Benoît Canet

On Wed, 09/10 09:14, Eric Blake wrote:
> On 09/10/2014 02:54 AM, Fam Zheng wrote:
> 
> >> Let's think of a situation that recursive blockers protect but
> >> backing_blocker does not:
> >>
> >> a <- b <- c <- d
> >>
> >> c is the backing file and is therefore protected by the op blocker.
> >>
> >> The block-commit command works with node-names, however, so we can
> >> manipulate any nodes in the graph, not just the topmost one.  Try this:
> >>
> >> block-commit d
> >> block-commit b
> >>
> >> I haven't checked yet but I suspect it will launch two block-commit jobs
> >> on the same partial chain (that's a bad thing because it can lead to
> >> corruption).
> > 
> > 1) Does block-commit work with node-names already? In other words, is
> >    block-commit b possible now? I only see drive-mirror works with it, but not
> >    drive-backup, block-mirror or block-commit.
> 
> IIRC, Jeff Cody proposed patches for qemu 2.1 that would have done this,
> but we dropped them for that release in order to get the recursive
> blockers sorted out first.
> 
>  >
> > 2) Regardless of the answer to 1), I think we could use a similar approach as
> >    drive-backup here: split BLOCK_OP_TYPE_COMMIT to
> >    BLOCK_OP_TYPE_COMMIT_{SOURCE,TARGET}, and only unblock
> >    BLOCK_OP_TYPE_COMMIT_TARGET in bdrv_set_backing_hd.
> 
> In that earlier thread, Jeff had some ideas that it is not so much the
> operation name that should be the blocker, but the lower-level action(s)
> implied by each operation (read metadata, write metadata, read image,
> write image)
> 

Yes, that is a good idea.

I had an open question back in that thread, which was whether we care about any
other operation types at all - I think the only disruptive operations here are
those that call bdrv_swap(). Any others?

Fam

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Qemu-devel] [PATCH] block: Make op blockers recursive
  2014-09-10 15:49                     ` Benoît Canet
@ 2014-09-11 11:22                       ` Kevin Wolf
  0 siblings, 0 replies; 21+ messages in thread
From: Kevin Wolf @ 2014-09-11 11:22 UTC (permalink / raw)
  To: Benoît Canet; +Cc: Fam Zheng, Stefan Hajnoczi, jcody, qemu-devel, stefanha

Am 10.09.2014 um 17:49 hat Benoît Canet geschrieben:
> On Wed, Sep 10, 2014 at 09:14:35AM -0600, Eric Blake wrote:
> > On 09/10/2014 02:54 AM, Fam Zheng wrote:
> > 
> > >> Let's think of a situation that recursive blockers protect but
> > >> backing_blocker does not:
> > >>
> > >> a <- b <- c <- d
> > >>
> > >> c is the backing file and is therefore protected by the op blocker.
> > >>
> > >> The block-commit command works with node-names, however, so we can
> > >> manipulate any nodes in the graph, not just the topmost one.  Try this:
> > >>
> > >> block-commit d
> > >> block-commit b
> > >>
> > >> I haven't checked yet but I suspect it will launch two block-commit jobs
> > >> on the same partial chain (that's a bad thing because it can lead to
> > >> corruption).
> > > 
> > > 1) Does block-commit work with node-names already? In other words, is
> > >    block-commit b possible now? I only see drive-mirror works with it, but not
> > >    drive-backup, block-mirror or block-commit.
> > 
> > IIRC, Jeff Cody proposed patches for qemu 2.1 that would have done this,
> > but we dropped them for that release in order to get the recursive
> > blockers sorted out first.
> > 
> >  >
> > > 2) Regardless of the answer to 1), I think we could use a similar approach as
> > >    drive-backup here: split BLOCK_OP_TYPE_COMMIT to
> > >    BLOCK_OP_TYPE_COMMIT_{SOURCE,TARGET}, and only unblock
> > >    BLOCK_OP_TYPE_COMMIT_TARGET in bdrv_set_backing_hd.
> > 
> > In that earlier thread, Jeff had some ideas that it is not so much the
> > operation name that should be the blocker, but the lower-level action(s)
> > implied by each operation (read metadata, write metadata, read image,
> > write image)
> 
> Does it mean I should pause this current series and task switch to another
> infrastucture task ?
> I could switch to the block I/O accouting work.
> 
> What does the other developpers and maintainers think about it ?

No, I think we should get this series, with comments addressed, merged.
It's not a solution for all eternity because it tends to be more
restrictive than necessary, but that's okay for now and it makes things
safer.

We'll have to discuss more about blockers at KVM Forum, but that's the
second part: Lifting unnecessary restrictions. This is tricky and won't
be done for 2.2, so in the meantime we want this patch (and I expect we
can reuse some parts of it even with Jeff's approach, so it won't be
wasted work).

We just shouldn't try to sink much time here because we know that it's
only preliminary. Let's just get something that works good enough for
now, it doesn't have to be perfect. Splitting into SOURCE/TARGET and
similar things to make it less restrictive are probably not worth it.

Kevin

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Qemu-devel] [PATCH] block: Make op blockers recursive
  2014-09-09 14:28     ` Benoît Canet
@ 2014-09-12  3:48       ` Fam Zheng
  2014-09-15 15:17         ` Benoît Canet
  0 siblings, 1 reply; 21+ messages in thread
From: Fam Zheng @ 2014-09-12  3:48 UTC (permalink / raw)
  To: Benoît Canet; +Cc: Kevin Wolf, jcody, qemu-devel, stefanha

On Tue, 09/09 14:28, Benoît Canet wrote:
> On Tue, Sep 09, 2014 at 01:56:46PM +0200, Kevin Wolf wrote:
> > Am 22.08.2014 um 18:11 hat Benoît Canet geschrieben:
> > > Since the block layer code is starting to modify the BDS graph right in the
> > > middle of BDS chains (block-mirror's replace parameter for example) QEMU needs
> > > to properly block and unblock whole BDS subtrees; recursion is a neat way to
> > > achieve this task.
> > > 
> > > This patch also takes care of modifying the op blockers users.
> > > 
> > > Signed-off-by: Benoit Canet <benoit.canet@nodalink.com>
> > > ---
> > >  block.c                   | 69 ++++++++++++++++++++++++++++++++++++++++++++---
> > >  block/blkverify.c         | 21 +++++++++++++++
> > >  block/commit.c            |  3 +++
> > >  block/mirror.c            | 17 ++++++++----
> > >  block/quorum.c            | 25 +++++++++++++++++
> > >  block/stream.c            |  3 +++
> > >  block/vmdk.c              | 34 +++++++++++++++++++++++
> > >  include/block/block.h     |  2 +-
> > >  include/block/block_int.h |  6 +++++
> > >  9 files changed, 171 insertions(+), 9 deletions(-)
> > > 
> > > diff --git a/block.c b/block.c
> > > index 6fa0201..d964b6c 100644
> > > --- a/block.c
> > > +++ b/block.c
> > > @@ -5446,7 +5446,9 @@ bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp)
> > >      return false;
> > >  }
> > >  
> > > -void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason)
> > > +/* do the real work of blocking a BDS */
> > > +static void bdrv_do_op_block(BlockDriverState *bs, BlockOpType op,
> > > +                             Error *reason)
> > >  {
> > >      BdrvOpBlocker *blocker;
> > >      assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
> > > @@ -5456,7 +5458,9 @@ void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason)
> > >      QLIST_INSERT_HEAD(&bs->op_blockers[op], blocker, list);
> > >  }
> > >  
> > > -void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason)
> > > +/* do the real work of unblocking a BDS */
> > > +static void bdrv_do_op_unblock(BlockDriverState *bs, BlockOpType op,
> > > +                               Error *reason)
> > >  {
> > >      BdrvOpBlocker *blocker, *next;
> > >      assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
> > > @@ -5468,6 +5472,65 @@ void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason)
> > >      }
> > >  }
> > >  
> > > +static bool bdrv_op_is_blocked_by(BlockDriverState *bs, BlockOpType op,
> > > +                                  Error *reason)
> > > +{
> > > +    BdrvOpBlocker *blocker, *next;
> > > +    assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
> > > +    QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) {
> > 
> > This doesn't actually need the SAFE version.
> > 
> > > +        if (blocker->reason == reason) {
> > > +            return true;
> > > +        }
> > > +    }
> > > +    return false;
> > > +}
> > > +
> > > +/* block recursively a BDS */
> > > +void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason)
> > > +{
> > > +    if (!bs) {
> > > +        return;
> > > +    }
> > > +
> > > +    /* prevent recursion loop */
> > > +    if (bdrv_op_is_blocked_by(bs, op, reason)) {
> > > +        return;
> > > +    }
> > > +
> > > +    /* block first for recursion loop protection to work */
> > > +    bdrv_do_op_block(bs, op, reason);
> > > +
> > > +    bdrv_op_block(bs->file, op, reason);
> > > +    bdrv_op_block(bs->backing_hd, op, reason);
> > > +
> > > +    if (bs->drv && bs->drv->bdrv_op_recursive_block) {
> > > +        bs->drv->bdrv_op_recursive_block(bs, op, reason);
> > > +    }
> > 
> > Here you block bs->file/bs->backing_hd automatically, no matter whether
> > the block driver implements the callback or not. I'm not sure if we can
> > do it like this for all times, but for now that should be okay.
> > 
> > > +}
> > > +
> > > +/* unblock recursively a BDS */
> > > +void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason)
> > > +{
> > > +    if (!bs) {
> > > +        return;
> > > +    }
> > > +
> > > +    /* prevent recursion loop */
> > > +    if (!bdrv_op_is_blocked_by(bs, op, reason)) {
> > > +        return;
> > > +    }
> > > +
> > > +    /* unblock first for recursion loop protection to work */
> > > +    bdrv_do_op_unblock(bs, op, reason);
> > > +
> > > +    bdrv_op_unblock(bs->file, op, reason);
> > > +    bdrv_op_unblock(bs->backing_hd, op, reason);
> > > +
> > > +    if (bs->drv && bs->drv->bdrv_op_recursive_unblock) {
> > > +        bs->drv->bdrv_op_recursive_unblock(bs, op, reason);
> > > +    }
> > > +}
> > > +
> > >  void bdrv_op_block_all(BlockDriverState *bs, Error *reason)
> > >  {
> > >      int i;
> > > @@ -5848,7 +5911,7 @@ BlockDriverState *check_to_replace_node(const char *node_name, Error **errp)
> > >          return NULL;
> > >      }
> > >  
> > > -    if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) {
> > > +    if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_MIRROR_REPLACE, errp)) {
> > 
> > That rename could have been a separate patch.
> > 
> > >          return NULL;
> > >      }
> > >  
> > > diff --git a/block/blkverify.c b/block/blkverify.c
> > > index 621b785..75ec3df 100644
> > > --- a/block/blkverify.c
> > > +++ b/block/blkverify.c
> > > @@ -320,6 +320,24 @@ static void blkverify_attach_aio_context(BlockDriverState *bs,
> > >      bdrv_attach_aio_context(s->test_file, new_context);
> > >  }
> > >  
> > > +static void blkverify_op_recursive_block(BlockDriverState *bs, BlockOpType op,
> > > +                                         Error *reason)
> > > +{
> > > +    BDRVBlkverifyState *s = bs->opaque;
> > > +
> > > +    bdrv_op_block(bs->file, op, reason);
> > > +    bdrv_op_block(s->test_file, op, reason);
> > > +}
> > > +
> > > +static void blkverify_op_recursive_unblock(BlockDriverState *bs, BlockOpType op,
> > > +                                           Error *reason)
> > > +{
> > > +    BDRVBlkverifyState *s = bs->opaque;
> > > +
> > > +    bdrv_op_unblock(bs->file, op, reason);
> > > +    bdrv_op_unblock(s->test_file, op, reason);
> > > +}
> > 
> > Here you block bs->file again, even though the block.c function has
> > already done it. Nothing bad happens, because bdrv_op_block() simply
> > stops when the blocker is already set, but the code would be nicer if we
> > did block bs->file only in one place. (I'm leaning towards doing it in
> > the drivers, but that's your decision.)
> > 
> > >  static BlockDriver bdrv_blkverify = {
> > >      .format_name                      = "blkverify",
> > >      .protocol_name                    = "blkverify",
> > > @@ -337,6 +355,9 @@ static BlockDriver bdrv_blkverify = {
> > >      .bdrv_attach_aio_context          = blkverify_attach_aio_context,
> > >      .bdrv_detach_aio_context          = blkverify_detach_aio_context,
> > >  
> > > +    .bdrv_op_recursive_block          = blkverify_op_recursive_block,
> > > +    .bdrv_op_recursive_unblock        = blkverify_op_recursive_unblock,
> > > +
> > >      .is_filter                        = true,
> > >      .bdrv_recurse_is_first_non_filter = blkverify_recurse_is_first_non_filter,
> > >  };
> > > diff --git a/block/commit.c b/block/commit.c
> > > index 91517d3..8a122b7 100644
> > > --- a/block/commit.c
> > > +++ b/block/commit.c
> > > @@ -142,6 +142,9 @@ wait:
> > >  
> > >      if (!block_job_is_cancelled(&s->common) && sector_num == end) {
> > >          /* success */
> > > +        /* unblock only BDS to be dropped */
> > > +        bdrv_op_unblock_all(top, s->common.blocker);
> > > +        bdrv_op_block_all(base, s->common.blocker);
> > 
> > This suggests that bdrv_op_(un)block_all() doesn't provide the right
> > interface, but we rather want an optional base argument in it, so that
> > you can (un)block a specific part of the backing file chain.
> 
> Good idea.
> 
> > 
> > >          ret = bdrv_drop_intermediate(active, top, base, s->backing_file_str);
> > >      }
> > >  
> > > diff --git a/block/mirror.c b/block/mirror.c
> > > index 5e7a166..28ed47d 100644
> > > --- a/block/mirror.c
> > > +++ b/block/mirror.c
> > > @@ -324,6 +324,7 @@ static void coroutine_fn mirror_run(void *opaque)
> > >      uint64_t last_pause_ns;
> > >      BlockDriverInfo bdi;
> > >      char backing_filename[1024];
> > > +    BlockDriverState *to_replace;
> > >      int ret = 0;
> > >      int n;
> > >  
> > > @@ -512,14 +513,16 @@ immediate_exit:
> > >      g_free(s->in_flight_bitmap);
> > >      bdrv_release_dirty_bitmap(bs, s->dirty_bitmap);
> > >      bdrv_iostatus_disable(s->target);
> > > +    to_replace = s->common.bs;
> > > +    if (s->to_replace) {
> > > +        bdrv_op_unblock_all(s->to_replace, s->replace_blocker);
> > > +        to_replace = s->to_replace;
> > > +    }
> > >      if (s->should_complete && ret == 0) {
> > > -        BlockDriverState *to_replace = s->common.bs;
> > > -        if (s->to_replace) {
> > > -            to_replace = s->to_replace;
> > > -        }
> > >          if (bdrv_get_flags(s->target) != bdrv_get_flags(to_replace)) {
> > >              bdrv_reopen(s->target, bdrv_get_flags(to_replace), NULL);
> > >          }
> > > +        bdrv_op_unblock_all(to_replace, bs->job->blocker);
> > 
> > You need to unblock all of the BDSes that are going to be removed.
> > Aren't you unblocking more than that here?
> > 
> > It's not a real problem because block_job_completed() would unblock the
> > rest anyway and its bdrv_op_unblock_all() call is simply ignored now,
> > but it would be nicer to just unblock here what's actually needed.
> > 
> > >          bdrv_swap(s->target, to_replace);
> > >          if (s->common.driver->job_type == BLOCK_JOB_TYPE_COMMIT) {
> > >              /* drop the bs loop chain formed by the swap: break the loop then
> > > @@ -530,7 +533,6 @@ immediate_exit:
> > >          }
> > >      }
> > >      if (s->to_replace) {
> > > -        bdrv_op_unblock_all(s->to_replace, s->replace_blocker);
> > >          error_free(s->replace_blocker);
> > >          bdrv_unref(s->to_replace);
> > >      }
> > > @@ -648,6 +650,11 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target,
> > >          return;
> > >      }
> > >  
> > > +    /* remove BLOCK_OP_TYPE_MIRROR_REPLACE from the list of
> > > +     * blocked operations so the replaces parameter can work
> > > +     */
> > > +    bdrv_op_unblock(bs, BLOCK_OP_TYPE_MIRROR_REPLACE, bs->job->blocker);
> > 
> > What purpose does a blocker serve when it's disabled before it is
> > checked? I would only ever expect a bdrv_op_unblock() after some
> > operation on the BDS has finished, but not when starting an operation
> > that needs it.

I agree. It makes no sense if the blocker is also the checker.

> 
> BLOCK_OP_TYPE_MIRROR_REPLACE is checked and blocked by block-job-complete
> during the time the mirror finish when an arbitrary node of the graph must be
> replaced.

It seems to me mirror unblocks this operation from the job->blocker when job
starts, and never block it (with the job->blocker) again. It's leaked.

Fam

> 
> The start of the mirroring block everything including
> BLOCK_OP_TYPE_MIRROR_REPLACE so this hunk relax the blocking so block-job-complete
> can have a chance of being able to block it.
> 
> The comment of this hunk seems to be deficient and the code not self explanatory.
> 
> On the other way maybe block-job-complete is done wrong from the start but
> relaxing BLOCK_OP_TYPE_MIRROR_REPLACE when the mirror start is the only way
> to make block-job-complete work as intended.
> 
> Thanks
> 
> Benoît

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Qemu-devel] [PATCH] block: Make op blockers recursive
  2014-09-12  3:48       ` Fam Zheng
@ 2014-09-15 15:17         ` Benoît Canet
  2014-09-18  2:57           ` Fam Zheng
  0 siblings, 1 reply; 21+ messages in thread
From: Benoît Canet @ 2014-09-15 15:17 UTC (permalink / raw)
  To: Fam Zheng; +Cc: Kevin Wolf, jcody, stefanha, qemu-devel, Benoît Canet

On Fri, Sep 12, 2014 at 11:48:33AM +0800, Fam Zheng wrote:
> On Tue, 09/09 14:28, Benoît Canet wrote:
> > On Tue, Sep 09, 2014 at 01:56:46PM +0200, Kevin Wolf wrote:
> > > Am 22.08.2014 um 18:11 hat Benoît Canet geschrieben:
> > > > Since the block layer code is starting to modify the BDS graph right in the
> > > > middle of BDS chains (block-mirror's replace parameter for example) QEMU needs
> > > > to properly block and unblock whole BDS subtrees; recursion is a neat way to
> > > > achieve this task.
> > > > 
> > > > This patch also takes care of modifying the op blockers users.
> > > > 
> > > > Signed-off-by: Benoit Canet <benoit.canet@nodalink.com>
> > > > ---
> > > >  block.c                   | 69 ++++++++++++++++++++++++++++++++++++++++++++---
> > > >  block/blkverify.c         | 21 +++++++++++++++
> > > >  block/commit.c            |  3 +++
> > > >  block/mirror.c            | 17 ++++++++----
> > > >  block/quorum.c            | 25 +++++++++++++++++
> > > >  block/stream.c            |  3 +++
> > > >  block/vmdk.c              | 34 +++++++++++++++++++++++
> > > >  include/block/block.h     |  2 +-
> > > >  include/block/block_int.h |  6 +++++
> > > >  9 files changed, 171 insertions(+), 9 deletions(-)
> > > > 
> > > > diff --git a/block.c b/block.c
> > > > index 6fa0201..d964b6c 100644
> > > > --- a/block.c
> > > > +++ b/block.c
> > > > @@ -5446,7 +5446,9 @@ bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp)
> > > >      return false;
> > > >  }
> > > >  
> > > > -void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason)
> > > > +/* do the real work of blocking a BDS */
> > > > +static void bdrv_do_op_block(BlockDriverState *bs, BlockOpType op,
> > > > +                             Error *reason)
> > > >  {
> > > >      BdrvOpBlocker *blocker;
> > > >      assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
> > > > @@ -5456,7 +5458,9 @@ void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason)
> > > >      QLIST_INSERT_HEAD(&bs->op_blockers[op], blocker, list);
> > > >  }
> > > >  
> > > > -void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason)
> > > > +/* do the real work of unblocking a BDS */
> > > > +static void bdrv_do_op_unblock(BlockDriverState *bs, BlockOpType op,
> > > > +                               Error *reason)
> > > >  {
> > > >      BdrvOpBlocker *blocker, *next;
> > > >      assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
> > > > @@ -5468,6 +5472,65 @@ void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason)
> > > >      }
> > > >  }
> > > >  
> > > > +static bool bdrv_op_is_blocked_by(BlockDriverState *bs, BlockOpType op,
> > > > +                                  Error *reason)
> > > > +{
> > > > +    BdrvOpBlocker *blocker, *next;
> > > > +    assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
> > > > +    QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) {
> > > 
> > > This doesn't actually need the SAFE version.
> > > 
> > > > +        if (blocker->reason == reason) {
> > > > +            return true;
> > > > +        }
> > > > +    }
> > > > +    return false;
> > > > +}
> > > > +
> > > > +/* block recursively a BDS */
> > > > +void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason)
> > > > +{
> > > > +    if (!bs) {
> > > > +        return;
> > > >  
> > > > +    /* remove BLOCK_OP_TYPE_MIRROR_REPLACE from the list of
> > > > +     * blocked operations so the replaces parameter can work
> > > > +     */
> > > > +    bdrv_op_unblock(bs, BLOCK_OP_TYPE_MIRROR_REPLACE, bs->job->blocker);
> > > 
> > > What purpose does a blocker serve when it's disabled before it is
> > > checked? I would only ever expect a bdrv_op_unblock() after some
> > > operation on the BDS has finished, but not when starting an operation
> > > that needs it.
> 
> I agree. It makes no sense if the blocker is also the checker.
> 
> > 
> > BLOCK_OP_TYPE_MIRROR_REPLACE is checked and blocked by block-job-complete
> > during the time the mirror finish when an arbitrary node of the graph must be
> > replaced.
> 
> It seems to me mirror unblocks this operation from the job->blocker when job
> starts, and never block it (with the job->blocker) again. It's leaked.
> 

block-job-complete will block it in mirror_complete.

BLOCK_OP_TYPE_MIRROR_REPLACE is blocked by driver-mirror code triggered by
block-job complete to block the "replaces" BDS during the end of the mirroring.

If you find silly that block-job-complete prevent itself from running twice on
the same BDS by checking the blocker then blocking it then the existing code is
wrong.

Else the code in this current patch is correct.

Could you have a glance at "static void mirror_complete(BlockJob *job, Error **errp)"
and tell me what you think about the situation. You should also look at
check_to_replace_node.

Best regards

Benoît

> Fam

> 
> > 
> > The start of the mirroring block everything including
> > BLOCK_OP_TYPE_MIRROR_REPLACE so this hunk relax the blocking so block-job-complete
> > can have a chance of being able to block it.
> > 
> > The comment of this hunk seems to be deficient and the code not self explanatory.
> > 
> > On the other way maybe block-job-complete is done wrong from the start but
> > relaxing BLOCK_OP_TYPE_MIRROR_REPLACE when the mirror start is the only way
> > to make block-job-complete work as intended.
> > 
> > Thanks
> > 
> > Benoît

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Qemu-devel] [PATCH] block: Make op blockers recursive
  2014-09-15 15:17         ` Benoît Canet
@ 2014-09-18  2:57           ` Fam Zheng
  2014-09-22 12:33             ` Benoît Canet
  0 siblings, 1 reply; 21+ messages in thread
From: Fam Zheng @ 2014-09-18  2:57 UTC (permalink / raw)
  To: Benoît Canet; +Cc: Kevin Wolf, jcody, qemu-devel, stefanha

On Mon, 09/15 15:17, Benoît Canet wrote:
> On Fri, Sep 12, 2014 at 11:48:33AM +0800, Fam Zheng wrote:
> > On Tue, 09/09 14:28, Benoît Canet wrote:
> > > On Tue, Sep 09, 2014 at 01:56:46PM +0200, Kevin Wolf wrote:
> > > > Am 22.08.2014 um 18:11 hat Benoît Canet geschrieben:
> > > > > Since the block layer code is starting to modify the BDS graph right in the
> > > > > middle of BDS chains (block-mirror's replace parameter for example) QEMU needs
> > > > > to properly block and unblock whole BDS subtrees; recursion is a neat way to
> > > > > achieve this task.
> > > > > 
> > > > > This patch also takes care of modifying the op blockers users.
> > > > > 
> > > > > Signed-off-by: Benoit Canet <benoit.canet@nodalink.com>
> > > > > ---
> > > > >  block.c                   | 69 ++++++++++++++++++++++++++++++++++++++++++++---
> > > > >  block/blkverify.c         | 21 +++++++++++++++
> > > > >  block/commit.c            |  3 +++
> > > > >  block/mirror.c            | 17 ++++++++----
> > > > >  block/quorum.c            | 25 +++++++++++++++++
> > > > >  block/stream.c            |  3 +++
> > > > >  block/vmdk.c              | 34 +++++++++++++++++++++++
> > > > >  include/block/block.h     |  2 +-
> > > > >  include/block/block_int.h |  6 +++++
> > > > >  9 files changed, 171 insertions(+), 9 deletions(-)
> > > > > 
> > > > > diff --git a/block.c b/block.c
> > > > > index 6fa0201..d964b6c 100644
> > > > > --- a/block.c
> > > > > +++ b/block.c
> > > > > @@ -5446,7 +5446,9 @@ bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp)
> > > > >      return false;
> > > > >  }
> > > > >  
> > > > > -void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason)
> > > > > +/* do the real work of blocking a BDS */
> > > > > +static void bdrv_do_op_block(BlockDriverState *bs, BlockOpType op,
> > > > > +                             Error *reason)
> > > > >  {
> > > > >      BdrvOpBlocker *blocker;
> > > > >      assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
> > > > > @@ -5456,7 +5458,9 @@ void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason)
> > > > >      QLIST_INSERT_HEAD(&bs->op_blockers[op], blocker, list);
> > > > >  }
> > > > >  
> > > > > -void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason)
> > > > > +/* do the real work of unblocking a BDS */
> > > > > +static void bdrv_do_op_unblock(BlockDriverState *bs, BlockOpType op,
> > > > > +                               Error *reason)
> > > > >  {
> > > > >      BdrvOpBlocker *blocker, *next;
> > > > >      assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
> > > > > @@ -5468,6 +5472,65 @@ void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason)
> > > > >      }
> > > > >  }
> > > > >  
> > > > > +static bool bdrv_op_is_blocked_by(BlockDriverState *bs, BlockOpType op,
> > > > > +                                  Error *reason)
> > > > > +{
> > > > > +    BdrvOpBlocker *blocker, *next;
> > > > > +    assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
> > > > > +    QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) {
> > > > 
> > > > This doesn't actually need the SAFE version.
> > > > 
> > > > > +        if (blocker->reason == reason) {
> > > > > +            return true;
> > > > > +        }
> > > > > +    }
> > > > > +    return false;
> > > > > +}
> > > > > +
> > > > > +/* block recursively a BDS */
> > > > > +void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason)
> > > > > +{
> > > > > +    if (!bs) {
> > > > > +        return;
> > > > >  
> > > > > +    /* remove BLOCK_OP_TYPE_MIRROR_REPLACE from the list of
> > > > > +     * blocked operations so the replaces parameter can work
> > > > > +     */
> > > > > +    bdrv_op_unblock(bs, BLOCK_OP_TYPE_MIRROR_REPLACE, bs->job->blocker);
> > > > 
> > > > What purpose does a blocker serve when it's disabled before it is
> > > > checked? I would only ever expect a bdrv_op_unblock() after some
> > > > operation on the BDS has finished, but not when starting an operation
> > > > that needs it.
> > 
> > I agree. It makes no sense if the blocker is also the checker.
> > 
> > > 
> > > BLOCK_OP_TYPE_MIRROR_REPLACE is checked and blocked by block-job-complete
> > > during the time the mirror finish when an arbitrary node of the graph must be
> > > replaced.
> > 
> > It seems to me mirror unblocks this operation from the job->blocker when job
> > starts, and never block it (with the job->blocker) again. It's leaked.
> > 
> 
> block-job-complete will block it in mirror_complete.
> 
> BLOCK_OP_TYPE_MIRROR_REPLACE is blocked by driver-mirror code triggered by
> block-job complete to block the "replaces" BDS during the end of the mirroring.
> 
> If you find silly that block-job-complete prevent itself from running twice on
> the same BDS by checking the blocker then blocking it then the existing code is
> wrong.
> 
> Else the code in this current patch is correct.
> 
> Could you have a glance at "static void mirror_complete(BlockJob *job, Error **errp)"
> and tell me what you think about the situation. You should also look at
> check_to_replace_node.
> 

I'd prefer early error from user's point of view, so maybe checking and
blocking can be done during mirror_start instead, then we don't need the
relaxation. What's the advantage to delay the check to block-job-complete?

Fam

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Qemu-devel] [PATCH] block: Make op blockers recursive
  2014-09-18  2:57           ` Fam Zheng
@ 2014-09-22 12:33             ` Benoît Canet
  0 siblings, 0 replies; 21+ messages in thread
From: Benoît Canet @ 2014-09-22 12:33 UTC (permalink / raw)
  To: Fam Zheng; +Cc: Kevin Wolf, jcody, stefanha, qemu-devel, Benoît Canet

On Thu, Sep 18, 2014 at 10:57:48AM +0800, Fam Zheng wrote:
> > > > 
> > > > BLOCK_OP_TYPE_MIRROR_REPLACE is checked and blocked by block-job-complete
> > > > during the time the mirror finish when an arbitrary node of the graph must be
> > > > replaced.
> > > 
> > > It seems to me mirror unblocks this operation from the job->blocker when job
> > > starts, and never block it (with the job->blocker) again. It's leaked.
> > > 
> > 
> > block-job-complete will block it in mirror_complete.
> > 
> > BLOCK_OP_TYPE_MIRROR_REPLACE is blocked by driver-mirror code triggered by
> > block-job complete to block the "replaces" BDS during the end of the mirroring.
> > 
> > If you find silly that block-job-complete prevent itself from running twice on
> > the same BDS by checking the blocker then blocking it then the existing code is
> > wrong.
> > 
> > Else the code in this current patch is correct.
> > 
> > Could you have a glance at "static void mirror_complete(BlockJob *job, Error **errp)"
> > and tell me what you think about the situation. You should also look at
> > check_to_replace_node.
> > 
> 
> I'd prefer early error from user's point of view, so maybe checking and
> blocking can be done during mirror_start instead, then we don't need the
> relaxation. What's the advantage to delay the check to block-job-complete?
> 
> Fam

BLOCK_OP_TYPE_MIRROR_REPLACE is the blocker for the replacement work so I blocked
it where the replacement happen.
Another point is that until block-job-complete happen we are not completely sure
that this replacement operation will happen.

Best regards

Benoît

^ permalink raw reply	[flat|nested] 21+ messages in thread

end of thread, other threads:[~2014-09-22 12:33 UTC | newest]

Thread overview: 21+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-08-22 16:11 [Qemu-devel] [PATCH] Implement recursive op blockers Benoît Canet
2014-08-22 16:11 ` [Qemu-devel] [PATCH] block: Make op blockers recursive Benoît Canet
2014-08-25  6:04   ` Fam Zheng
2014-08-25  9:06     ` Benoît Canet
2014-08-25  9:37       ` Fam Zheng
2014-08-25 12:12         ` Benoît Canet
2014-08-26  4:42           ` Fam Zheng
2014-08-26  6:45             ` Benoît Canet
2014-09-04 20:42               ` Stefan Hajnoczi
2014-09-10  8:54                 ` Fam Zheng
2014-09-10 14:18                   ` Benoît Canet
2014-09-10 15:14                   ` Eric Blake
2014-09-10 15:49                     ` Benoît Canet
2014-09-11 11:22                       ` Kevin Wolf
2014-09-11  0:50                     ` Fam Zheng
2014-09-09 11:56   ` Kevin Wolf
2014-09-09 14:28     ` Benoît Canet
2014-09-12  3:48       ` Fam Zheng
2014-09-15 15:17         ` Benoît Canet
2014-09-18  2:57           ` Fam Zheng
2014-09-22 12:33             ` Benoît Canet

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.