All of lore.kernel.org
 help / color / mirror / Atom feed
From: Max Reitz <mreitz@redhat.com>
To: qemu-block@nongnu.org
Cc: Kevin Wolf <kwolf@redhat.com>,
	Andrey Shinkevich <andrey.shinkevich@virtuozzo.com>,
	Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>,
	qemu-devel@nongnu.org, Max Reitz <mreitz@redhat.com>
Subject: [PATCH v8 29/43] mirror: Deal with filters
Date: Tue,  1 Sep 2020 16:34:10 +0200	[thread overview]
Message-ID: <20200901143424.884735-30-mreitz@redhat.com> (raw)
In-Reply-To: <20200901143424.884735-1-mreitz@redhat.com>

This includes some permission limiting (for example, we only need to
take the RESIZE permission for active commits where the base is smaller
than the top).

base_overlay is introduced so we can query bdrv_is_allocated_above() on
it - we cannot do that with base itself, because a filter's block_status
is the same as its child node, so if there are filters on base,
bdrv_is_allocated_above() on base would return information including
base.

Use this opportunity to rename qmp_drive_mirror()'s "source" BDS to
"target_backing_bs", because that is what it really refers to.

Signed-off-by: Max Reitz <mreitz@redhat.com>
---
 qapi/block-core.json |   6 ++-
 block/mirror.c       | 118 +++++++++++++++++++++++++++++++++----------
 blockdev.c           |  32 ++++++++----
 3 files changed, 118 insertions(+), 38 deletions(-)

diff --git a/qapi/block-core.json b/qapi/block-core.json
index f8f42cc836..8d180b584c 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -1948,7 +1948,8 @@
 #
 # @replaces: with sync=full graph node name to be replaced by the new
 #            image when a whole image copy is done. This can be used to repair
-#            broken Quorum files. (Since 2.1)
+#            broken Quorum files.  By default, @device is replaced, although
+#            implicitly created filters on it are kept. (Since 2.1)
 #
 # @mode: whether and how QEMU should create a new image, default is
 #        'absolute-paths'.
@@ -2259,7 +2260,8 @@
 #
 # @replaces: with sync=full graph node name to be replaced by the new
 #            image when a whole image copy is done. This can be used to repair
-#            broken Quorum files.
+#            broken Quorum files.  By default, @device is replaced, although
+#            implicitly created filters on it are kept.
 #
 # @speed:  the maximum speed, in bytes per second
 #
diff --git a/block/mirror.c b/block/mirror.c
index e8e8844afc..f16b0d62bc 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -42,6 +42,7 @@ typedef struct MirrorBlockJob {
     BlockBackend *target;
     BlockDriverState *mirror_top_bs;
     BlockDriverState *base;
+    BlockDriverState *base_overlay;
 
     /* The name of the graph node to replace */
     char *replaces;
@@ -677,8 +678,10 @@ static int mirror_exit_common(Job *job)
                              &error_abort);
     if (!abort && s->backing_mode == MIRROR_SOURCE_BACKING_CHAIN) {
         BlockDriverState *backing = s->is_none_mode ? src : s->base;
-        if (backing_bs(target_bs) != backing) {
-            bdrv_set_backing_hd(target_bs, backing, &local_err);
+        BlockDriverState *unfiltered_target = bdrv_skip_filters(target_bs);
+
+        if (bdrv_cow_bs(unfiltered_target) != backing) {
+            bdrv_set_backing_hd(unfiltered_target, backing, &local_err);
             if (local_err) {
                 error_report_err(local_err);
                 local_err = NULL;
@@ -740,7 +743,7 @@ static int mirror_exit_common(Job *job)
      * valid.
      */
     block_job_remove_all_bdrv(bjob);
-    bdrv_replace_node(mirror_top_bs, backing_bs(mirror_top_bs), &error_abort);
+    bdrv_replace_node(mirror_top_bs, mirror_top_bs->backing->bs, &error_abort);
 
     /* We just changed the BDS the job BB refers to (with either or both of the
      * bdrv_replace_node() calls), so switch the BB back so the cleanup does
@@ -786,7 +789,6 @@ static void coroutine_fn mirror_throttle(MirrorBlockJob *s)
 static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s)
 {
     int64_t offset;
-    BlockDriverState *base = s->base;
     BlockDriverState *bs = s->mirror_top_bs->backing->bs;
     BlockDriverState *target_bs = blk_bs(s->target);
     int ret;
@@ -837,7 +839,8 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s)
             return 0;
         }
 
-        ret = bdrv_is_allocated_above(bs, base, false, offset, bytes, &count);
+        ret = bdrv_is_allocated_above(bs, s->base_overlay, true, offset, bytes,
+                                      &count);
         if (ret < 0) {
             return ret;
         }
@@ -936,7 +939,7 @@ static int coroutine_fn mirror_run(Job *job, Error **errp)
     } else {
         s->target_cluster_size = BDRV_SECTOR_SIZE;
     }
-    if (backing_filename[0] && !target_bs->backing &&
+    if (backing_filename[0] && !bdrv_backing_chain_next(target_bs) &&
         s->granularity < s->target_cluster_size) {
         s->buf_size = MAX(s->buf_size, s->target_cluster_size);
         s->cow_bitmap = bitmap_new(length);
@@ -1116,8 +1119,9 @@ static void mirror_complete(Job *job, Error **errp)
     if (s->backing_mode == MIRROR_OPEN_BACKING_CHAIN) {
         int ret;
 
-        assert(!target->backing);
-        ret = bdrv_open_backing_file(target, NULL, "backing", errp);
+        assert(!bdrv_backing_chain_next(target));
+        ret = bdrv_open_backing_file(bdrv_skip_filters(target), NULL,
+                                     "backing", errp);
         if (ret < 0) {
             return;
         }
@@ -1555,8 +1559,8 @@ static BlockJob *mirror_start_job(
     MirrorBlockJob *s;
     MirrorBDSOpaque *bs_opaque;
     BlockDriverState *mirror_top_bs;
-    bool target_graph_mod;
     bool target_is_backing;
+    uint64_t target_perms, target_shared_perms;
     Error *local_err = NULL;
     int ret;
 
@@ -1575,7 +1579,7 @@ static BlockJob *mirror_start_job(
         buf_size = DEFAULT_MIRROR_BUF_SIZE;
     }
 
-    if (bs == target) {
+    if (bdrv_skip_filters(bs) == bdrv_skip_filters(target)) {
         error_setg(errp, "Can't mirror node into itself");
         return NULL;
     }
@@ -1639,15 +1643,50 @@ static BlockJob *mirror_start_job(
      * In the case of active commit, things look a bit different, though,
      * because the target is an already populated backing file in active use.
      * We can allow anything except resize there.*/
+
+    target_perms = BLK_PERM_WRITE;
+    target_shared_perms = BLK_PERM_WRITE_UNCHANGED;
+
     target_is_backing = bdrv_chain_contains(bs, target);
-    target_graph_mod = (backing_mode != MIRROR_LEAVE_BACKING_CHAIN);
+    if (target_is_backing) {
+        int64_t bs_size, target_size;
+        bs_size = bdrv_getlength(bs);
+        if (bs_size < 0) {
+            error_setg_errno(errp, -bs_size,
+                             "Could not inquire top image size");
+            goto fail;
+        }
+
+        target_size = bdrv_getlength(target);
+        if (target_size < 0) {
+            error_setg_errno(errp, -target_size,
+                             "Could not inquire base image size");
+            goto fail;
+        }
+
+        if (target_size < bs_size) {
+            target_perms |= BLK_PERM_RESIZE;
+        }
+
+        target_shared_perms |= BLK_PERM_CONSISTENT_READ
+                            |  BLK_PERM_WRITE
+                            |  BLK_PERM_GRAPH_MOD;
+    } else if (bdrv_chain_contains(bs, bdrv_skip_filters(target))) {
+        /*
+         * We may want to allow this in the future, but it would
+         * require taking some extra care.
+         */
+        error_setg(errp, "Cannot mirror to a filter on top of a node in the "
+                   "source's backing chain");
+        goto fail;
+    }
+
+    if (backing_mode != MIRROR_LEAVE_BACKING_CHAIN) {
+        target_perms |= BLK_PERM_GRAPH_MOD;
+    }
+
     s->target = blk_new(s->common.job.aio_context,
-                        BLK_PERM_WRITE | BLK_PERM_RESIZE |
-                        (target_graph_mod ? BLK_PERM_GRAPH_MOD : 0),
-                        BLK_PERM_WRITE_UNCHANGED |
-                        (target_is_backing ? BLK_PERM_CONSISTENT_READ |
-                                             BLK_PERM_WRITE |
-                                             BLK_PERM_GRAPH_MOD : 0));
+                        target_perms, target_shared_perms);
     ret = blk_insert_bs(s->target, target, errp);
     if (ret < 0) {
         goto fail;
@@ -1672,6 +1711,7 @@ static BlockJob *mirror_start_job(
     s->zero_target = zero_target;
     s->copy_mode = copy_mode;
     s->base = base;
+    s->base_overlay = bdrv_find_overlay(bs, base);
     s->granularity = granularity;
     s->buf_size = ROUND_UP(buf_size, granularity);
     s->unmap = unmap;
@@ -1702,15 +1742,39 @@ static BlockJob *mirror_start_job(
     /* In commit_active_start() all intermediate nodes disappear, so
      * any jobs in them must be blocked */
     if (target_is_backing) {
-        BlockDriverState *iter;
-        for (iter = backing_bs(bs); iter != target; iter = backing_bs(iter)) {
-            /* XXX BLK_PERM_WRITE needs to be allowed so we don't block
-             * ourselves at s->base (if writes are blocked for a node, they are
-             * also blocked for its backing file). The other options would be a
-             * second filter driver above s->base (== target). */
+        BlockDriverState *iter, *filtered_target;
+        uint64_t iter_shared_perms;
+
+        /*
+         * The topmost node with
+         * bdrv_skip_filters(filtered_target) == bdrv_skip_filters(target)
+         */
+        filtered_target = bdrv_cow_bs(bdrv_find_overlay(bs, target));
+
+        assert(bdrv_skip_filters(filtered_target) ==
+               bdrv_skip_filters(target));
+
+        /*
+         * XXX BLK_PERM_WRITE needs to be allowed so we don't block
+         * ourselves at s->base (if writes are blocked for a node, they are
+         * also blocked for its backing file). The other options would be a
+         * second filter driver above s->base (== target).
+         */
+        iter_shared_perms = BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE;
+
+        for (iter = bdrv_filter_or_cow_bs(bs); iter != target;
+             iter = bdrv_filter_or_cow_bs(iter))
+        {
+            if (iter == filtered_target) {
+                /*
+                 * From here on, all nodes are filters on the base.
+                 * This allows us to share BLK_PERM_CONSISTENT_READ.
+                 */
+                iter_shared_perms |= BLK_PERM_CONSISTENT_READ;
+            }
+
             ret = block_job_add_bdrv(&s->common, "intermediate node", iter, 0,
-                                     BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE,
-                                     errp);
+                                     iter_shared_perms, errp);
             if (ret < 0) {
                 goto fail;
             }
@@ -1746,7 +1810,7 @@ fail:
     bs_opaque->stop = true;
     bdrv_child_refresh_perms(mirror_top_bs, mirror_top_bs->backing,
                              &error_abort);
-    bdrv_replace_node(mirror_top_bs, backing_bs(mirror_top_bs), &error_abort);
+    bdrv_replace_node(mirror_top_bs, mirror_top_bs->backing->bs, &error_abort);
 
     bdrv_unref(mirror_top_bs);
 
@@ -1774,7 +1838,7 @@ void mirror_start(const char *job_id, BlockDriverState *bs,
         return;
     }
     is_none_mode = mode == MIRROR_SYNC_MODE_NONE;
-    base = mode == MIRROR_SYNC_MODE_TOP ? backing_bs(bs) : NULL;
+    base = mode == MIRROR_SYNC_MODE_TOP ? bdrv_backing_chain_next(bs) : NULL;
     mirror_start_job(job_id, bs, creation_flags, target, replaces,
                      speed, granularity, buf_size, backing_mode, zero_target,
                      on_source_error, on_target_error, unmap, NULL, NULL,
diff --git a/blockdev.c b/blockdev.c
index 57ee41b73e..73d96ce21c 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -2899,6 +2899,7 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
                                    bool has_auto_dismiss, bool auto_dismiss,
                                    Error **errp)
 {
+    BlockDriverState *unfiltered_bs;
     int job_flags = JOB_DEFAULT;
 
     if (!has_speed) {
@@ -2950,10 +2951,19 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
         return;
     }
 
-    if (!bs->backing && sync == MIRROR_SYNC_MODE_TOP) {
+    if (!bdrv_backing_chain_next(bs) && sync == MIRROR_SYNC_MODE_TOP) {
         sync = MIRROR_SYNC_MODE_FULL;
     }
 
+    if (!has_replaces) {
+        /* We want to mirror from @bs, but keep implicit filters on top */
+        unfiltered_bs = bdrv_skip_implicit_filters(bs);
+        if (unfiltered_bs != bs) {
+            replaces = unfiltered_bs->node_name;
+            has_replaces = true;
+        }
+    }
+
     if (has_replaces) {
         BlockDriverState *to_replace_bs;
         AioContext *replace_aio_context;
@@ -3000,7 +3010,7 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
 void qmp_drive_mirror(DriveMirror *arg, Error **errp)
 {
     BlockDriverState *bs;
-    BlockDriverState *source, *target_bs;
+    BlockDriverState *target_backing_bs, *target_bs;
     AioContext *aio_context;
     AioContext *old_context;
     BlockMirrorBackingMode backing_mode;
@@ -3035,12 +3045,12 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
     }
 
     flags = bs->open_flags | BDRV_O_RDWR;
-    source = backing_bs(bs);
-    if (!source && arg->sync == MIRROR_SYNC_MODE_TOP) {
+    target_backing_bs = bdrv_cow_bs(bdrv_skip_filters(bs));
+    if (!target_backing_bs && arg->sync == MIRROR_SYNC_MODE_TOP) {
         arg->sync = MIRROR_SYNC_MODE_FULL;
     }
     if (arg->sync == MIRROR_SYNC_MODE_NONE) {
-        source = bs;
+        target_backing_bs = bs;
     }
 
     size = bdrv_getlength(bs);
@@ -3066,7 +3076,7 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
     /* Don't open backing image in create() */
     flags |= BDRV_O_NO_BACKING;
 
-    if ((arg->sync == MIRROR_SYNC_MODE_FULL || !source)
+    if ((arg->sync == MIRROR_SYNC_MODE_FULL || !target_backing_bs)
         && arg->mode != NEW_IMAGE_MODE_EXISTING)
     {
         /* create new image w/o backing file */
@@ -3074,15 +3084,19 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
         bdrv_img_create(arg->target, format,
                         NULL, NULL, NULL, size, flags, false, &local_err);
     } else {
+        /* Implicit filters should not appear in the filename */
+        BlockDriverState *explicit_backing =
+            bdrv_skip_implicit_filters(target_backing_bs);
+
         switch (arg->mode) {
         case NEW_IMAGE_MODE_EXISTING:
             break;
         case NEW_IMAGE_MODE_ABSOLUTE_PATHS:
             /* create new image with backing file */
-            bdrv_refresh_filename(source);
+            bdrv_refresh_filename(explicit_backing);
             bdrv_img_create(arg->target, format,
-                            source->filename,
-                            source->drv->format_name,
+                            explicit_backing->filename,
+                            explicit_backing->drv->format_name,
                             NULL, size, flags, false, &local_err);
             break;
         default:
-- 
2.26.2



  parent reply	other threads:[~2020-09-01 14:52 UTC|newest]

Thread overview: 49+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-09-01 14:33 [PATCH v8 00/43] block: Deal with filters Max Reitz
2020-09-01 14:33 ` [PATCH v8 01/43] block: Add child access functions Max Reitz
2020-09-01 14:33 ` [PATCH v8 02/43] block: Add chain helper functions Max Reitz
2020-09-01 14:33 ` [PATCH v8 03/43] block: bdrv_cow_child() for bdrv_has_zero_init() Max Reitz
2020-09-01 14:33 ` [PATCH v8 04/43] block: bdrv_set_backing_hd() is about bs->backing Max Reitz
2020-09-01 14:33 ` [PATCH v8 05/43] block: Include filters when freezing backing chain Max Reitz
2020-09-01 14:33 ` [PATCH v8 06/43] block: Drop bdrv_is_encrypted() Max Reitz
2020-09-01 14:33 ` [PATCH v8 07/43] block: Add bdrv_supports_compressed_writes() Max Reitz
2020-09-01 14:33 ` [PATCH v8 08/43] throttle: Support compressed writes Max Reitz
2020-09-01 14:33 ` [PATCH v8 09/43] copy-on-read: " Max Reitz
2020-09-01 14:33 ` [PATCH v8 10/43] block: Use bdrv_filter_(bs|child) where obvious Max Reitz
2020-09-01 14:33 ` [PATCH v8 11/43] block: Use CAFs in block status functions Max Reitz
2020-09-01 14:33 ` [PATCH v8 12/43] stream: Deal with filters Max Reitz
2020-09-01 14:33 ` [PATCH v8 13/43] block: Use CAFs when working with backing chains Max Reitz
2020-09-01 14:33 ` [PATCH v8 14/43] block: Use bdrv_cow_child() in bdrv_co_truncate() Max Reitz
2020-09-01 14:33 ` [PATCH v8 15/43] block: Re-evaluate backing file handling in reopen Max Reitz
2020-09-01 14:33 ` [PATCH v8 16/43] block: Flush all children in generic code Max Reitz
2020-09-01 14:33 ` [PATCH v8 17/43] vmdk: Drop vmdk_co_flush() Max Reitz
2020-09-01 14:33 ` [PATCH v8 18/43] block: Iterate over children in refresh_limits Max Reitz
2020-09-01 14:34 ` [PATCH v8 19/43] block: Use CAFs in bdrv_refresh_filename() Max Reitz
2020-09-01 14:34 ` [PATCH v8 20/43] block: Use CAF in bdrv_co_rw_vmstate() Max Reitz
2020-09-01 14:34 ` [PATCH v8 21/43] block/snapshot: Fix fallback Max Reitz
2020-09-01 14:34 ` [PATCH v8 22/43] block: Use CAFs for debug breakpoints Max Reitz
2020-09-01 14:34 ` [PATCH v8 23/43] block: Improve get_allocated_file_size's default Max Reitz
2020-09-01 14:34 ` [PATCH v8 24/43] block/null: Implement bdrv_get_allocated_file_size Max Reitz
2020-09-01 14:34 ` [PATCH v8 25/43] blockdev: Use CAF in external_snapshot_prepare() Max Reitz
2020-09-01 14:34 ` [PATCH v8 26/43] block: Report data child for query-blockstats Max Reitz
2020-09-01 14:34 ` [PATCH v8 27/43] block: Use child access functions for QAPI queries Max Reitz
2020-09-01 14:34 ` [PATCH v8 28/43] block-copy: Use CAF to find sync=top base Max Reitz
2020-09-01 14:34 ` Max Reitz [this message]
2020-09-02  8:53   ` [PATCH v8 29/43] mirror: Deal with filters Kevin Wolf
2020-09-02 10:19     ` Max Reitz
2020-09-01 14:34 ` [PATCH v8 30/43] backup: " Max Reitz
2020-09-01 14:34 ` [PATCH v8 31/43] commit: " Max Reitz
2020-09-01 14:34 ` [PATCH v8 32/43] nbd: Use CAF when looking for dirty bitmap Max Reitz
2020-09-01 14:34 ` [PATCH v8 33/43] qemu-img: Use child access functions Max Reitz
2020-09-01 14:34 ` [PATCH v8 34/43] block: Drop backing_bs() Max Reitz
2020-09-01 14:34 ` [PATCH v8 35/43] blockdev: Fix active commit choice Max Reitz
2020-09-02  9:10   ` Kevin Wolf
2020-09-01 14:34 ` [PATCH v8 36/43] block: Inline bdrv_co_block_status_from_*() Max Reitz
2020-09-01 14:34 ` [PATCH v8 37/43] block: Leave BDS.backing_{file,format} constant Max Reitz
2020-09-01 14:34 ` [PATCH v8 38/43] iotests: Test that qcow2's data-file is flushed Max Reitz
2020-09-01 14:34 ` [PATCH v8 39/43] iotests: Let complete_and_wait() work with commit Max Reitz
2020-09-01 14:34 ` [PATCH v8 40/43] iotests: Add filter commit test cases Max Reitz
2020-09-01 14:34 ` [PATCH v8 41/43] iotests: Add filter mirror " Max Reitz
2020-09-01 14:34 ` [PATCH v8 42/43] iotests: Add test for commit in sub directory Max Reitz
2020-09-01 14:34 ` [PATCH v8 43/43] iotests: Test committing to overridden backing Max Reitz
2020-09-02 10:23 ` [PATCH v8 00/43] block: Deal with filters Kevin Wolf
2020-09-02 12:26   ` Max Reitz

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200901143424.884735-30-mreitz@redhat.com \
    --to=mreitz@redhat.com \
    --cc=andrey.shinkevich@virtuozzo.com \
    --cc=kwolf@redhat.com \
    --cc=qemu-block@nongnu.org \
    --cc=qemu-devel@nongnu.org \
    --cc=vsementsov@virtuozzo.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.