All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v3 0/4] replication: Properly attach children
@ 2021-07-07 18:15 Lukas Straub
  2021-07-07 18:15 ` [PATCH v3 1/4] replication: Remove s->active_disk Lukas Straub
                   ` (3 more replies)
  0 siblings, 4 replies; 13+ messages in thread
From: Lukas Straub @ 2021-07-07 18:15 UTC (permalink / raw)
  To: qemu-devel
  Cc: Kevin Wolf, Vladimir Sementsov-Ogievskiy, qemu-block,
	Wen Congyang, Xie Changlong, Max Reitz

[-- Attachment #1: Type: text/plain, Size: 925 bytes --]

Hello Everyone,
A while ago Kevin noticed that the replication driver doesn't properly attach
the children it wants to use. Instead, it directly copies the BdrvChilds from
it's backing file, which is wrong. Ths Patchset fixes the problem and removes
the workaround that was put in place back then.

Regards,
Lukas Straub

Changes:

-v3:
    -Split up into multiple patches
    -Remove s->active_disk
    -Clarify child permissions in commit message

-v2: Test for BDRV_CHILD_PRIMARY in replication_child_perm, since
     bs->file might not be set yet. (Vladimir)

Lukas Straub (4):
  replication: Remove s->active_disk
  replication: Reduce usage of s->hidden_disk and s->secondary_disk
  replication: Properly attach children
  replication: Remove workaround

 block/replication.c | 115 +++++++++++++++++++++++++++-----------------
 1 file changed, 72 insertions(+), 43 deletions(-)

--
2.20.1

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH v3 1/4] replication: Remove s->active_disk
  2021-07-07 18:15 [PATCH v3 0/4] replication: Properly attach children Lukas Straub
@ 2021-07-07 18:15 ` Lukas Straub
  2021-07-09  7:11   ` Vladimir Sementsov-Ogievskiy
  2021-07-07 18:15 ` [PATCH v3 2/4] replication: Reduce usage of s->hidden_disk and s->secondary_disk Lukas Straub
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 13+ messages in thread
From: Lukas Straub @ 2021-07-07 18:15 UTC (permalink / raw)
  To: qemu-devel
  Cc: Kevin Wolf, Vladimir Sementsov-Ogievskiy, qemu-block,
	Wen Congyang, Xie Changlong, Max Reitz

[-- Attachment #1: Type: text/plain, Size: 6252 bytes --]

s->active_disk is bs->file. Remove it and use local variables instead.

Signed-off-by: Lukas Straub <lukasstraub2@web.de>
---
 block/replication.c | 38 +++++++++++++++++++++-----------------
 1 file changed, 21 insertions(+), 17 deletions(-)

diff --git a/block/replication.c b/block/replication.c
index 52163f2d1f..50940fbe33 100644
--- a/block/replication.c
+++ b/block/replication.c
@@ -35,7 +35,6 @@ typedef enum {
 typedef struct BDRVReplicationState {
     ReplicationMode mode;
     ReplicationStage stage;
-    BdrvChild *active_disk;
     BlockJob *commit_job;
     BdrvChild *hidden_disk;
     BdrvChild *secondary_disk;
@@ -307,11 +306,15 @@ out:
     return ret;
 }

-static void secondary_do_checkpoint(BDRVReplicationState *s, Error **errp)
+static void secondary_do_checkpoint(BlockDriverState *bs, Error **errp)
 {
+    BDRVReplicationState *s = bs->opaque;
+    BdrvChild *active_disk;
     Error *local_err = NULL;
     int ret;

+    active_disk = bs->file;
+
     if (!s->backup_job) {
         error_setg(errp, "Backup job was cancelled unexpectedly");
         return;
@@ -323,13 +326,13 @@ static void secondary_do_checkpoint(BDRVReplicationState *s, Error **errp)
         return;
     }

-    if (!s->active_disk->bs->drv) {
+    if (!active_disk->bs->drv) {
         error_setg(errp, "Active disk %s is ejected",
-                   s->active_disk->bs->node_name);
+                   active_disk->bs->node_name);
         return;
     }

-    ret = bdrv_make_empty(s->active_disk, errp);
+    ret = bdrv_make_empty(active_disk, errp);
     if (ret < 0) {
         return;
     }
@@ -451,6 +454,7 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode,
     BlockDriverState *bs = rs->opaque;
     BDRVReplicationState *s;
     BlockDriverState *top_bs;
+    BdrvChild *active_disk;
     int64_t active_length, hidden_length, disk_length;
     AioContext *aio_context;
     Error *local_err = NULL;
@@ -488,15 +492,14 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode,
     case REPLICATION_MODE_PRIMARY:
         break;
     case REPLICATION_MODE_SECONDARY:
-        s->active_disk = bs->file;
-        if (!s->active_disk || !s->active_disk->bs ||
-                                    !s->active_disk->bs->backing) {
+        active_disk = bs->file;
+        if (!active_disk || !active_disk->bs || !active_disk->bs->backing) {
             error_setg(errp, "Active disk doesn't have backing file");
             aio_context_release(aio_context);
             return;
         }

-        s->hidden_disk = s->active_disk->bs->backing;
+        s->hidden_disk = active_disk->bs->backing;
         if (!s->hidden_disk->bs || !s->hidden_disk->bs->backing) {
             error_setg(errp, "Hidden disk doesn't have backing file");
             aio_context_release(aio_context);
@@ -511,7 +514,7 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode,
         }

         /* verify the length */
-        active_length = bdrv_getlength(s->active_disk->bs);
+        active_length = bdrv_getlength(active_disk->bs);
         hidden_length = bdrv_getlength(s->hidden_disk->bs);
         disk_length = bdrv_getlength(s->secondary_disk->bs);
         if (active_length < 0 || hidden_length < 0 || disk_length < 0 ||
@@ -523,9 +526,9 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode,
         }

         /* Must be true, or the bdrv_getlength() calls would have failed */
-        assert(s->active_disk->bs->drv && s->hidden_disk->bs->drv);
+        assert(active_disk->bs->drv && s->hidden_disk->bs->drv);

-        if (!s->active_disk->bs->drv->bdrv_make_empty ||
+        if (!active_disk->bs->drv->bdrv_make_empty ||
             !s->hidden_disk->bs->drv->bdrv_make_empty) {
             error_setg(errp,
                        "Active disk or hidden disk doesn't support make_empty");
@@ -579,7 +582,7 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode,
     s->stage = BLOCK_REPLICATION_RUNNING;

     if (s->mode == REPLICATION_MODE_SECONDARY) {
-        secondary_do_checkpoint(s, errp);
+        secondary_do_checkpoint(bs, errp);
     }

     s->error = 0;
@@ -608,7 +611,7 @@ static void replication_do_checkpoint(ReplicationState *rs, Error **errp)
     }

     if (s->mode == REPLICATION_MODE_SECONDARY) {
-        secondary_do_checkpoint(s, errp);
+        secondary_do_checkpoint(bs, errp);
     }
     aio_context_release(aio_context);
 }
@@ -645,7 +648,6 @@ static void replication_done(void *opaque, int ret)
     if (ret == 0) {
         s->stage = BLOCK_REPLICATION_DONE;

-        s->active_disk = NULL;
         s->secondary_disk = NULL;
         s->hidden_disk = NULL;
         s->error = 0;
@@ -659,11 +661,13 @@ static void replication_stop(ReplicationState *rs, bool failover, Error **errp)
 {
     BlockDriverState *bs = rs->opaque;
     BDRVReplicationState *s;
+    BdrvChild *active_disk;
     AioContext *aio_context;

     aio_context = bdrv_get_aio_context(bs);
     aio_context_acquire(aio_context);
     s = bs->opaque;
+    active_disk = bs->file;

     if (s->stage == BLOCK_REPLICATION_DONE ||
         s->stage == BLOCK_REPLICATION_FAILOVER) {
@@ -698,7 +702,7 @@ static void replication_stop(ReplicationState *rs, bool failover, Error **errp)
         }

         if (!failover) {
-            secondary_do_checkpoint(s, errp);
+            secondary_do_checkpoint(bs, errp);
             s->stage = BLOCK_REPLICATION_DONE;
             aio_context_release(aio_context);
             return;
@@ -706,7 +710,7 @@ static void replication_stop(ReplicationState *rs, bool failover, Error **errp)

         s->stage = BLOCK_REPLICATION_FAILOVER;
         s->commit_job = commit_active_start(
-                            NULL, s->active_disk->bs, s->secondary_disk->bs,
+                            NULL, active_disk->bs, s->secondary_disk->bs,
                             JOB_INTERNAL, 0, BLOCKDEV_ON_ERROR_REPORT,
                             NULL, replication_done, bs, true, errp);
         break;
--
2.20.1


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH v3 2/4] replication: Reduce usage of s->hidden_disk and s->secondary_disk
  2021-07-07 18:15 [PATCH v3 0/4] replication: Properly attach children Lukas Straub
  2021-07-07 18:15 ` [PATCH v3 1/4] replication: Remove s->active_disk Lukas Straub
@ 2021-07-07 18:15 ` Lukas Straub
  2021-07-09  7:20   ` Vladimir Sementsov-Ogievskiy
  2021-07-07 18:15 ` [PATCH v3 3/4] replication: Properly attach children Lukas Straub
  2021-07-07 18:15 ` [PATCH v3 4/4] replication: Remove workaround Lukas Straub
  3 siblings, 1 reply; 13+ messages in thread
From: Lukas Straub @ 2021-07-07 18:15 UTC (permalink / raw)
  To: qemu-devel
  Cc: Kevin Wolf, Vladimir Sementsov-Ogievskiy, qemu-block,
	Wen Congyang, Xie Changlong, Max Reitz

[-- Attachment #1: Type: text/plain, Size: 5804 bytes --]

In preparation for the next patch, initialize s->hidden_disk and
s->secondary_disk later and replace access to them with local variables
in the places where they aren't initialized yet.

Signed-off-by: Lukas Straub <lukasstraub2@web.de>
---
 block/replication.c | 45 ++++++++++++++++++++++++++++-----------------
 1 file changed, 28 insertions(+), 17 deletions(-)

diff --git a/block/replication.c b/block/replication.c
index 50940fbe33..74adf30f54 100644
--- a/block/replication.c
+++ b/block/replication.c
@@ -368,27 +368,35 @@ static void reopen_backing_file(BlockDriverState *bs, bool writable,
                                 Error **errp)
 {
     BDRVReplicationState *s = bs->opaque;
+    BdrvChild *hidden_disk, *secondary_disk;
     BlockReopenQueue *reopen_queue = NULL;

+    /*
+     * s->hidden_disk and s->secondary_disk may not be set yet, as they will
+     * only be set after the children are writable.
+     */
+    hidden_disk = bs->file->bs->backing;
+    secondary_disk = hidden_disk->bs->backing;
+
     if (writable) {
-        s->orig_hidden_read_only = bdrv_is_read_only(s->hidden_disk->bs);
-        s->orig_secondary_read_only = bdrv_is_read_only(s->secondary_disk->bs);
+        s->orig_hidden_read_only = bdrv_is_read_only(hidden_disk->bs);
+        s->orig_secondary_read_only = bdrv_is_read_only(secondary_disk->bs);
     }

-    bdrv_subtree_drained_begin(s->hidden_disk->bs);
-    bdrv_subtree_drained_begin(s->secondary_disk->bs);
+    bdrv_subtree_drained_begin(hidden_disk->bs);
+    bdrv_subtree_drained_begin(secondary_disk->bs);

     if (s->orig_hidden_read_only) {
         QDict *opts = qdict_new();
         qdict_put_bool(opts, BDRV_OPT_READ_ONLY, !writable);
-        reopen_queue = bdrv_reopen_queue(reopen_queue, s->hidden_disk->bs,
+        reopen_queue = bdrv_reopen_queue(reopen_queue, hidden_disk->bs,
                                          opts, true);
     }

     if (s->orig_secondary_read_only) {
         QDict *opts = qdict_new();
         qdict_put_bool(opts, BDRV_OPT_READ_ONLY, !writable);
-        reopen_queue = bdrv_reopen_queue(reopen_queue, s->secondary_disk->bs,
+        reopen_queue = bdrv_reopen_queue(reopen_queue, secondary_disk->bs,
                                          opts, true);
     }

@@ -396,8 +404,8 @@ static void reopen_backing_file(BlockDriverState *bs, bool writable,
         bdrv_reopen_multiple(reopen_queue, errp);
     }

-    bdrv_subtree_drained_end(s->hidden_disk->bs);
-    bdrv_subtree_drained_end(s->secondary_disk->bs);
+    bdrv_subtree_drained_end(hidden_disk->bs);
+    bdrv_subtree_drained_end(secondary_disk->bs);
 }

 static void backup_job_cleanup(BlockDriverState *bs)
@@ -454,7 +462,7 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode,
     BlockDriverState *bs = rs->opaque;
     BDRVReplicationState *s;
     BlockDriverState *top_bs;
-    BdrvChild *active_disk;
+    BdrvChild *active_disk, *hidden_disk, *secondary_disk;
     int64_t active_length, hidden_length, disk_length;
     AioContext *aio_context;
     Error *local_err = NULL;
@@ -499,15 +507,15 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode,
             return;
         }

-        s->hidden_disk = active_disk->bs->backing;
-        if (!s->hidden_disk->bs || !s->hidden_disk->bs->backing) {
+        hidden_disk = active_disk->bs->backing;
+        if (!hidden_disk->bs || !hidden_disk->bs->backing) {
             error_setg(errp, "Hidden disk doesn't have backing file");
             aio_context_release(aio_context);
             return;
         }

-        s->secondary_disk = s->hidden_disk->bs->backing;
-        if (!s->secondary_disk->bs || !bdrv_has_blk(s->secondary_disk->bs)) {
+        secondary_disk = hidden_disk->bs->backing;
+        if (!secondary_disk->bs || !bdrv_has_blk(secondary_disk->bs)) {
             error_setg(errp, "The secondary disk doesn't have block backend");
             aio_context_release(aio_context);
             return;
@@ -515,8 +523,8 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode,

         /* verify the length */
         active_length = bdrv_getlength(active_disk->bs);
-        hidden_length = bdrv_getlength(s->hidden_disk->bs);
-        disk_length = bdrv_getlength(s->secondary_disk->bs);
+        hidden_length = bdrv_getlength(hidden_disk->bs);
+        disk_length = bdrv_getlength(secondary_disk->bs);
         if (active_length < 0 || hidden_length < 0 || disk_length < 0 ||
             active_length != hidden_length || hidden_length != disk_length) {
             error_setg(errp, "Active disk, hidden disk, secondary disk's length"
@@ -526,10 +534,10 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode,
         }

         /* Must be true, or the bdrv_getlength() calls would have failed */
-        assert(active_disk->bs->drv && s->hidden_disk->bs->drv);
+        assert(active_disk->bs->drv && hidden_disk->bs->drv);

         if (!active_disk->bs->drv->bdrv_make_empty ||
-            !s->hidden_disk->bs->drv->bdrv_make_empty) {
+            !hidden_disk->bs->drv->bdrv_make_empty) {
             error_setg(errp,
                        "Active disk or hidden disk doesn't support make_empty");
             aio_context_release(aio_context);
@@ -544,6 +552,9 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode,
             return;
         }

+        s->hidden_disk = hidden_disk;
+        s->secondary_disk = secondary_disk;
+
         /* start backup job now */
         error_setg(&s->blocker,
                    "Block device is in use by internal backup job");
--
2.20.1


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH v3 3/4] replication: Properly attach children
  2021-07-07 18:15 [PATCH v3 0/4] replication: Properly attach children Lukas Straub
  2021-07-07 18:15 ` [PATCH v3 1/4] replication: Remove s->active_disk Lukas Straub
  2021-07-07 18:15 ` [PATCH v3 2/4] replication: Reduce usage of s->hidden_disk and s->secondary_disk Lukas Straub
@ 2021-07-07 18:15 ` Lukas Straub
  2021-07-09  7:41   ` Vladimir Sementsov-Ogievskiy
  2021-07-07 18:15 ` [PATCH v3 4/4] replication: Remove workaround Lukas Straub
  3 siblings, 1 reply; 13+ messages in thread
From: Lukas Straub @ 2021-07-07 18:15 UTC (permalink / raw)
  To: qemu-devel
  Cc: Kevin Wolf, Vladimir Sementsov-Ogievskiy, qemu-block,
	Wen Congyang, Xie Changlong, Max Reitz

[-- Attachment #1: Type: text/plain, Size: 2702 bytes --]

The replication driver needs access to the children block-nodes of
it's child so it can issue bdrv_make_empty() and bdrv_co_pwritev()
to manage the replication. However, it does this by directly copying
the BdrvChilds, which is wrong.

Fix this by properly attaching the block-nodes with
bdrv_attach_child() and requesting the required permissions.

Signed-off-by: Lukas Straub <lukasstraub2@web.de>
---
 block/replication.c | 30 +++++++++++++++++++++++++++---
 1 file changed, 27 insertions(+), 3 deletions(-)

diff --git a/block/replication.c b/block/replication.c
index 74adf30f54..c0d4a6c264 100644
--- a/block/replication.c
+++ b/block/replication.c
@@ -165,7 +165,12 @@ static void replication_child_perm(BlockDriverState *bs, BdrvChild *c,
                                    uint64_t perm, uint64_t shared,
                                    uint64_t *nperm, uint64_t *nshared)
 {
-    *nperm = BLK_PERM_CONSISTENT_READ;
+    if (role & BDRV_CHILD_PRIMARY) {
+        *nperm = BLK_PERM_CONSISTENT_READ;
+    } else {
+        *nperm = 0;
+    }
+
     if ((bs->open_flags & (BDRV_O_INACTIVE | BDRV_O_RDWR)) == BDRV_O_RDWR) {
         *nperm |= BLK_PERM_WRITE;
     }
@@ -552,8 +557,25 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode,
             return;
         }

-        s->hidden_disk = hidden_disk;
-        s->secondary_disk = secondary_disk;
+        bdrv_ref(hidden_disk->bs);
+        s->hidden_disk = bdrv_attach_child(bs, hidden_disk->bs, "hidden disk",
+                                           &child_of_bds, BDRV_CHILD_DATA,
+                                           &local_err);
+        if (local_err) {
+            error_propagate(errp, local_err);
+            aio_context_release(aio_context);
+            return;
+        }
+
+        bdrv_ref(secondary_disk->bs);
+        s->secondary_disk = bdrv_attach_child(bs, secondary_disk->bs,
+                                              "secondary disk", &child_of_bds,
+                                              BDRV_CHILD_DATA, &local_err);
+        if (local_err) {
+            error_propagate(errp, local_err);
+            aio_context_release(aio_context);
+            return;
+        }

         /* start backup job now */
         error_setg(&s->blocker,
@@ -659,7 +681,9 @@ static void replication_done(void *opaque, int ret)
     if (ret == 0) {
         s->stage = BLOCK_REPLICATION_DONE;

+        bdrv_unref_child(bs, s->secondary_disk);
         s->secondary_disk = NULL;
+        bdrv_unref_child(bs, s->hidden_disk);
         s->hidden_disk = NULL;
         s->error = 0;
     } else {
--
2.20.1


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH v3 4/4] replication: Remove workaround
  2021-07-07 18:15 [PATCH v3 0/4] replication: Properly attach children Lukas Straub
                   ` (2 preceding siblings ...)
  2021-07-07 18:15 ` [PATCH v3 3/4] replication: Properly attach children Lukas Straub
@ 2021-07-07 18:15 ` Lukas Straub
  2021-07-09  7:49   ` Vladimir Sementsov-Ogievskiy
  3 siblings, 1 reply; 13+ messages in thread
From: Lukas Straub @ 2021-07-07 18:15 UTC (permalink / raw)
  To: qemu-devel
  Cc: Kevin Wolf, Vladimir Sementsov-Ogievskiy, qemu-block,
	Wen Congyang, Xie Changlong, Max Reitz

[-- Attachment #1: Type: text/plain, Size: 1306 bytes --]

Remove the workaround introduced in commit
6ecbc6c52672db5c13805735ca02784879ce8285
"replication: Avoid blk_make_empty() on read-only child".

It is not needed anymore since s->hidden_disk is guaranteed to be
writable when secondary_do_checkpoint() runs. Because replication_start(),
_do_checkpoint() and _stop() are only called by COLO migration code
and COLO-migration doesn't inactivate disks.

Signed-off-by: Lukas Straub <lukasstraub2@web.de>
---
 block/replication.c | 12 +-----------
 1 file changed, 1 insertion(+), 11 deletions(-)

diff --git a/block/replication.c b/block/replication.c
index c0d4a6c264..68b46d65a8 100644
--- a/block/replication.c
+++ b/block/replication.c
@@ -348,17 +348,7 @@ static void secondary_do_checkpoint(BlockDriverState *bs, Error **errp)
         return;
     }

-    BlockBackend *blk = blk_new(qemu_get_current_aio_context(),
-                                BLK_PERM_WRITE, BLK_PERM_ALL);
-    blk_insert_bs(blk, s->hidden_disk->bs, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        blk_unref(blk);
-        return;
-    }
-
-    ret = blk_make_empty(blk, errp);
-    blk_unref(blk);
+    ret = bdrv_make_empty(s->hidden_disk, errp);
     if (ret < 0) {
         return;
     }
--
2.20.1

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH v3 1/4] replication: Remove s->active_disk
  2021-07-07 18:15 ` [PATCH v3 1/4] replication: Remove s->active_disk Lukas Straub
@ 2021-07-09  7:11   ` Vladimir Sementsov-Ogievskiy
  2021-07-09 12:11     ` Lukas Straub
  0 siblings, 1 reply; 13+ messages in thread
From: Vladimir Sementsov-Ogievskiy @ 2021-07-09  7:11 UTC (permalink / raw)
  To: Lukas Straub, qemu-devel
  Cc: qemu-block, Wen Congyang, Xie Changlong, Kevin Wolf, Max Reitz

07.07.2021 21:15, Lukas Straub wrote:
> s->active_disk is bs->file. Remove it and use local variables instead.
> 
> Signed-off-by: Lukas Straub <lukasstraub2@web.de>
> ---
>   block/replication.c | 38 +++++++++++++++++++++-----------------
>   1 file changed, 21 insertions(+), 17 deletions(-)
> 
> diff --git a/block/replication.c b/block/replication.c
> index 52163f2d1f..50940fbe33 100644
> --- a/block/replication.c
> +++ b/block/replication.c
> @@ -35,7 +35,6 @@ typedef enum {
>   typedef struct BDRVReplicationState {
>       ReplicationMode mode;
>       ReplicationStage stage;
> -    BdrvChild *active_disk;
>       BlockJob *commit_job;
>       BdrvChild *hidden_disk;
>       BdrvChild *secondary_disk;
> @@ -307,11 +306,15 @@ out:
>       return ret;
>   }
> 
> -static void secondary_do_checkpoint(BDRVReplicationState *s, Error **errp)
> +static void secondary_do_checkpoint(BlockDriverState *bs, Error **errp)
>   {
> +    BDRVReplicationState *s = bs->opaque;
> +    BdrvChild *active_disk;

Why not to combine initialization into definition:

BdrvChild *active_disk = bs->file;

>       Error *local_err = NULL;
>       int ret;
> 
> +    active_disk = bs->file;
> +
>       if (!s->backup_job) {
>           error_setg(errp, "Backup job was cancelled unexpectedly");
>           return;
> @@ -323,13 +326,13 @@ static void secondary_do_checkpoint(BDRVReplicationState *s, Error **errp)
>           return;
>       }
> 
> -    if (!s->active_disk->bs->drv) {
> +    if (!active_disk->bs->drv) {
>           error_setg(errp, "Active disk %s is ejected",
> -                   s->active_disk->bs->node_name);
> +                   active_disk->bs->node_name);
>           return;
>       }
> 
> -    ret = bdrv_make_empty(s->active_disk, errp);
> +    ret = bdrv_make_empty(active_disk, errp);
>       if (ret < 0) {
>           return;
>       }
> @@ -451,6 +454,7 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode,
>       BlockDriverState *bs = rs->opaque;
>       BDRVReplicationState *s;
>       BlockDriverState *top_bs;
> +    BdrvChild *active_disk;
>       int64_t active_length, hidden_length, disk_length;
>       AioContext *aio_context;
>       Error *local_err = NULL;
> @@ -488,15 +492,14 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode,
>       case REPLICATION_MODE_PRIMARY:
>           break;
>       case REPLICATION_MODE_SECONDARY:
> -        s->active_disk = bs->file;
> -        if (!s->active_disk || !s->active_disk->bs ||
> -                                    !s->active_disk->bs->backing) {
> +        active_disk = bs->file;

Here initializing active_disk only here makes sense: we consider "active disk" only in secondary mode. Right?

> +        if (!active_disk || !active_disk->bs || !active_disk->bs->backing) {
>               error_setg(errp, "Active disk doesn't have backing file");
>               aio_context_release(aio_context);
>               return;
>           }
> 
> -        s->hidden_disk = s->active_disk->bs->backing;
> +        s->hidden_disk = active_disk->bs->backing;
>           if (!s->hidden_disk->bs || !s->hidden_disk->bs->backing) {
>               error_setg(errp, "Hidden disk doesn't have backing file");
>               aio_context_release(aio_context);
> @@ -511,7 +514,7 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode,
>           }
> 
>           /* verify the length */
> -        active_length = bdrv_getlength(s->active_disk->bs);
> +        active_length = bdrv_getlength(active_disk->bs);
>           hidden_length = bdrv_getlength(s->hidden_disk->bs);
>           disk_length = bdrv_getlength(s->secondary_disk->bs);
>           if (active_length < 0 || hidden_length < 0 || disk_length < 0 ||
> @@ -523,9 +526,9 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode,
>           }
> 
>           /* Must be true, or the bdrv_getlength() calls would have failed */
> -        assert(s->active_disk->bs->drv && s->hidden_disk->bs->drv);
> +        assert(active_disk->bs->drv && s->hidden_disk->bs->drv);
> 
> -        if (!s->active_disk->bs->drv->bdrv_make_empty ||
> +        if (!active_disk->bs->drv->bdrv_make_empty ||
>               !s->hidden_disk->bs->drv->bdrv_make_empty) {
>               error_setg(errp,
>                          "Active disk or hidden disk doesn't support make_empty");
> @@ -579,7 +582,7 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode,
>       s->stage = BLOCK_REPLICATION_RUNNING;
> 
>       if (s->mode == REPLICATION_MODE_SECONDARY) {
> -        secondary_do_checkpoint(s, errp);
> +        secondary_do_checkpoint(bs, errp);
>       }
> 
>       s->error = 0;
> @@ -608,7 +611,7 @@ static void replication_do_checkpoint(ReplicationState *rs, Error **errp)
>       }
> 
>       if (s->mode == REPLICATION_MODE_SECONDARY) {
> -        secondary_do_checkpoint(s, errp);
> +        secondary_do_checkpoint(bs, errp);
>       }
>       aio_context_release(aio_context);
>   }
> @@ -645,7 +648,6 @@ static void replication_done(void *opaque, int ret)
>       if (ret == 0) {
>           s->stage = BLOCK_REPLICATION_DONE;
> 
> -        s->active_disk = NULL;
>           s->secondary_disk = NULL;
>           s->hidden_disk = NULL;
>           s->error = 0;
> @@ -659,11 +661,13 @@ static void replication_stop(ReplicationState *rs, bool failover, Error **errp)
>   {
>       BlockDriverState *bs = rs->opaque;
>       BDRVReplicationState *s;
> +    BdrvChild *active_disk;
>       AioContext *aio_context;
> 
>       aio_context = bdrv_get_aio_context(bs);
>       aio_context_acquire(aio_context);
>       s = bs->opaque;
> +    active_disk = bs->file;
> 
>       if (s->stage == BLOCK_REPLICATION_DONE ||
>           s->stage == BLOCK_REPLICATION_FAILOVER) {
> @@ -698,7 +702,7 @@ static void replication_stop(ReplicationState *rs, bool failover, Error **errp)
>           }
> 
>           if (!failover) {
> -            secondary_do_checkpoint(s, errp);
> +            secondary_do_checkpoint(bs, errp);
>               s->stage = BLOCK_REPLICATION_DONE;
>               aio_context_release(aio_context);
>               return;
> @@ -706,7 +710,7 @@ static void replication_stop(ReplicationState *rs, bool failover, Error **errp)
> 
>           s->stage = BLOCK_REPLICATION_FAILOVER;

For consistency, it seems right to initialize active_disk only in "case REPLICATION_MODE_SECONDARY:", like above..

But then, it becomes obvious that no sense in creating additional variable to use it once.. So here I'd just use bs->file->bs

>           s->commit_job = commit_active_start(
> -                            NULL, s->active_disk->bs, s->secondary_disk->bs,
> +                            NULL, active_disk->bs, s->secondary_disk->bs,
>                               JOB_INTERNAL, 0, BLOCKDEV_ON_ERROR_REPORT,
>                               NULL, replication_done, bs, true, errp);
>           break;
> --
> 2.20.1
> 


Anyway:
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>

-- 
Best regards,
Vladimir


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v3 2/4] replication: Reduce usage of s->hidden_disk and s->secondary_disk
  2021-07-07 18:15 ` [PATCH v3 2/4] replication: Reduce usage of s->hidden_disk and s->secondary_disk Lukas Straub
@ 2021-07-09  7:20   ` Vladimir Sementsov-Ogievskiy
  0 siblings, 0 replies; 13+ messages in thread
From: Vladimir Sementsov-Ogievskiy @ 2021-07-09  7:20 UTC (permalink / raw)
  To: Lukas Straub, qemu-devel
  Cc: qemu-block, Wen Congyang, Xie Changlong, Kevin Wolf, Max Reitz

07.07.2021 21:15, Lukas Straub wrote:
> In preparation for the next patch, initialize s->hidden_disk and
> s->secondary_disk later and replace access to them with local variables
> in the places where they aren't initialized yet.
> 
> Signed-off-by: Lukas Straub<lukasstraub2@web.de>

Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>

-- 
Best regards,
Vladimir


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v3 3/4] replication: Properly attach children
  2021-07-07 18:15 ` [PATCH v3 3/4] replication: Properly attach children Lukas Straub
@ 2021-07-09  7:41   ` Vladimir Sementsov-Ogievskiy
  0 siblings, 0 replies; 13+ messages in thread
From: Vladimir Sementsov-Ogievskiy @ 2021-07-09  7:41 UTC (permalink / raw)
  To: Lukas Straub, qemu-devel
  Cc: qemu-block, Wen Congyang, Xie Changlong, Kevin Wolf, Max Reitz

07.07.2021 21:15, Lukas Straub wrote:
> The replication driver needs access to the children block-nodes of
> it's child so it can issue bdrv_make_empty() and bdrv_co_pwritev()
> to manage the replication. However, it does this by directly copying
> the BdrvChilds, which is wrong.
> 
> Fix this by properly attaching the block-nodes with
> bdrv_attach_child() and requesting the required permissions.
> 
> Signed-off-by: Lukas Straub <lukasstraub2@web.de>
> ---
>   block/replication.c | 30 +++++++++++++++++++++++++++---
>   1 file changed, 27 insertions(+), 3 deletions(-)
> 
> diff --git a/block/replication.c b/block/replication.c
> index 74adf30f54..c0d4a6c264 100644
> --- a/block/replication.c
> +++ b/block/replication.c
> @@ -165,7 +165,12 @@ static void replication_child_perm(BlockDriverState *bs, BdrvChild *c,
>                                      uint64_t perm, uint64_t shared,
>                                      uint64_t *nperm, uint64_t *nshared)
>   {
> -    *nperm = BLK_PERM_CONSISTENT_READ;
> +    if (role & BDRV_CHILD_PRIMARY) {
> +        *nperm = BLK_PERM_CONSISTENT_READ;
> +    } else {
> +        *nperm = 0;
> +    }
> +
>       if ((bs->open_flags & (BDRV_O_INACTIVE | BDRV_O_RDWR)) == BDRV_O_RDWR) {
>           *nperm |= BLK_PERM_WRITE;
>       }
> @@ -552,8 +557,25 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode,
>               return;
>           }
> 
> -        s->hidden_disk = hidden_disk;
> -        s->secondary_disk = secondary_disk;
> +        bdrv_ref(hidden_disk->bs);
> +        s->hidden_disk = bdrv_attach_child(bs, hidden_disk->bs, "hidden disk",
> +                                           &child_of_bds, BDRV_CHILD_DATA,
> +                                           &local_err);
> +        if (local_err) {
> +            error_propagate(errp, local_err);
> +            aio_context_release(aio_context);
> +            return;
> +        }
> +
> +        bdrv_ref(secondary_disk->bs);
> +        s->secondary_disk = bdrv_attach_child(bs, secondary_disk->bs,
> +                                              "secondary disk", &child_of_bds,
> +                                              BDRV_CHILD_DATA, &local_err);
> +        if (local_err) {
> +            error_propagate(errp, local_err);
> +            aio_context_release(aio_context);
> +            return;
> +        }

Now looking at this I can say that design is a bit strange:

If these children are children of replication state, we probably want something like bdrv_root_attach_child(), and not attach children to the active disk but to the state itself (like block-job children).. But than we'll need new class of bdrv children (child_of_replication?) and that obviously not worth the complexity..

So, we want new children to be children of our filter driver. Than, we probably should create them in repliction_open(), together with bs->file..

Still, it should work as is I think:

Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>


> 
>           /* start backup job now */
>           error_setg(&s->blocker,
> @@ -659,7 +681,9 @@ static void replication_done(void *opaque, int ret)
>       if (ret == 0) {
>           s->stage = BLOCK_REPLICATION_DONE;
> 
> +        bdrv_unref_child(bs, s->secondary_disk);
>           s->secondary_disk = NULL;
> +        bdrv_unref_child(bs, s->hidden_disk);
>           s->hidden_disk = NULL;
>           s->error = 0;
>       } else {
> --
> 2.20.1
> 


-- 
Best regards,
Vladimir


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v3 4/4] replication: Remove workaround
  2021-07-07 18:15 ` [PATCH v3 4/4] replication: Remove workaround Lukas Straub
@ 2021-07-09  7:49   ` Vladimir Sementsov-Ogievskiy
  2021-07-11 20:33     ` Lukas Straub
  0 siblings, 1 reply; 13+ messages in thread
From: Vladimir Sementsov-Ogievskiy @ 2021-07-09  7:49 UTC (permalink / raw)
  To: Lukas Straub, qemu-devel
  Cc: qemu-block, Wen Congyang, Xie Changlong, Kevin Wolf, Max Reitz

07.07.2021 21:15, Lukas Straub wrote:
> Remove the workaround introduced in commit
> 6ecbc6c52672db5c13805735ca02784879ce8285
> "replication: Avoid blk_make_empty() on read-only child".
> 
> It is not needed anymore since s->hidden_disk is guaranteed to be
> writable when secondary_do_checkpoint() runs. Because replication_start(),
> _do_checkpoint() and _stop() are only called by COLO migration code
> and COLO-migration doesn't inactivate disks.

If look at replication_child_perm() you should also be sure that it always works only with RW disks..

Actually, I think that it would be correct just require BLK_PERM_WRITE in replication_child_perm() unconditionally. Let generic layer care about all these RD/WR things. In _child_perm() we can require WRITE and don't care. If something goes wrong and we can't get WRITE permission we should see clean error-out.

Opposite, if we don't require WRITE permission in some case and still do WRITE request, it may crash.

Still, this may be considered as a preexisting problem of replication_child_perm() and fixed separately.

> 
> Signed-off-by: Lukas Straub <lukasstraub2@web.de>

So, for this one commit (with probably updated commit message accordingly to my comments, or even rebased on fixed replication_child_perm()):

Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>


> ---
>   block/replication.c | 12 +-----------
>   1 file changed, 1 insertion(+), 11 deletions(-)
> 
> diff --git a/block/replication.c b/block/replication.c
> index c0d4a6c264..68b46d65a8 100644
> --- a/block/replication.c
> +++ b/block/replication.c
> @@ -348,17 +348,7 @@ static void secondary_do_checkpoint(BlockDriverState *bs, Error **errp)
>           return;
>       }
> 
> -    BlockBackend *blk = blk_new(qemu_get_current_aio_context(),
> -                                BLK_PERM_WRITE, BLK_PERM_ALL);
> -    blk_insert_bs(blk, s->hidden_disk->bs, &local_err);
> -    if (local_err) {
> -        error_propagate(errp, local_err);
> -        blk_unref(blk);
> -        return;
> -    }
> -
> -    ret = blk_make_empty(blk, errp);
> -    blk_unref(blk);
> +    ret = bdrv_make_empty(s->hidden_disk, errp);
>       if (ret < 0) {
>           return;
>       }
> --
> 2.20.1
> 


-- 
Best regards,
Vladimir


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v3 1/4] replication: Remove s->active_disk
  2021-07-09  7:11   ` Vladimir Sementsov-Ogievskiy
@ 2021-07-09 12:11     ` Lukas Straub
  0 siblings, 0 replies; 13+ messages in thread
From: Lukas Straub @ 2021-07-09 12:11 UTC (permalink / raw)
  To: Vladimir Sementsov-Ogievskiy
  Cc: Kevin Wolf, qemu-block, Wen Congyang, Xie Changlong, qemu-devel,
	Max Reitz

[-- Attachment #1: Type: text/plain, Size: 7782 bytes --]

On Fri, 9 Jul 2021 10:11:15 +0300
Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> wrote:

> 07.07.2021 21:15, Lukas Straub wrote:
> > s->active_disk is bs->file. Remove it and use local variables instead.
> > 
> > Signed-off-by: Lukas Straub <lukasstraub2@web.de>
> > ---
> >   block/replication.c | 38 +++++++++++++++++++++-----------------
> >   1 file changed, 21 insertions(+), 17 deletions(-)
> > 
> > diff --git a/block/replication.c b/block/replication.c
> > index 52163f2d1f..50940fbe33 100644
> > --- a/block/replication.c
> > +++ b/block/replication.c
> > @@ -35,7 +35,6 @@ typedef enum {
> >   typedef struct BDRVReplicationState {
> >       ReplicationMode mode;
> >       ReplicationStage stage;
> > -    BdrvChild *active_disk;
> >       BlockJob *commit_job;
> >       BdrvChild *hidden_disk;
> >       BdrvChild *secondary_disk;
> > @@ -307,11 +306,15 @@ out:
> >       return ret;
> >   }
> > 
> > -static void secondary_do_checkpoint(BDRVReplicationState *s, Error **errp)
> > +static void secondary_do_checkpoint(BlockDriverState *bs, Error **errp)
> >   {
> > +    BDRVReplicationState *s = bs->opaque;
> > +    BdrvChild *active_disk;  
> 
> Why not to combine initialization into definition:
> 
> BdrvChild *active_disk = bs->file;

Ok, will fix.

> >       Error *local_err = NULL;
> >       int ret;
> > 
> > +    active_disk = bs->file;
> > +
> >       if (!s->backup_job) {
> >           error_setg(errp, "Backup job was cancelled unexpectedly");
> >           return;
> > @@ -323,13 +326,13 @@ static void secondary_do_checkpoint(BDRVReplicationState *s, Error **errp)
> >           return;
> >       }
> > 
> > -    if (!s->active_disk->bs->drv) {
> > +    if (!active_disk->bs->drv) {
> >           error_setg(errp, "Active disk %s is ejected",
> > -                   s->active_disk->bs->node_name);
> > +                   active_disk->bs->node_name);
> >           return;
> >       }
> > 
> > -    ret = bdrv_make_empty(s->active_disk, errp);
> > +    ret = bdrv_make_empty(active_disk, errp);
> >       if (ret < 0) {
> >           return;
> >       }
> > @@ -451,6 +454,7 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode,
> >       BlockDriverState *bs = rs->opaque;
> >       BDRVReplicationState *s;
> >       BlockDriverState *top_bs;
> > +    BdrvChild *active_disk;
> >       int64_t active_length, hidden_length, disk_length;
> >       AioContext *aio_context;
> >       Error *local_err = NULL;
> > @@ -488,15 +492,14 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode,
> >       case REPLICATION_MODE_PRIMARY:
> >           break;
> >       case REPLICATION_MODE_SECONDARY:
> > -        s->active_disk = bs->file;
> > -        if (!s->active_disk || !s->active_disk->bs ||
> > -                                    !s->active_disk->bs->backing) {
> > +        active_disk = bs->file;  
> 
> Here initializing active_disk only here makes sense: we consider "active disk" only in secondary mode. Right?

Yes.

> > +        if (!active_disk || !active_disk->bs || !active_disk->bs->backing) {
> >               error_setg(errp, "Active disk doesn't have backing file");
> >               aio_context_release(aio_context);
> >               return;
> >           }
> > 
> > -        s->hidden_disk = s->active_disk->bs->backing;
> > +        s->hidden_disk = active_disk->bs->backing;
> >           if (!s->hidden_disk->bs || !s->hidden_disk->bs->backing) {
> >               error_setg(errp, "Hidden disk doesn't have backing file");
> >               aio_context_release(aio_context);
> > @@ -511,7 +514,7 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode,
> >           }
> > 
> >           /* verify the length */
> > -        active_length = bdrv_getlength(s->active_disk->bs);
> > +        active_length = bdrv_getlength(active_disk->bs);
> >           hidden_length = bdrv_getlength(s->hidden_disk->bs);
> >           disk_length = bdrv_getlength(s->secondary_disk->bs);
> >           if (active_length < 0 || hidden_length < 0 || disk_length < 0 ||
> > @@ -523,9 +526,9 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode,
> >           }
> > 
> >           /* Must be true, or the bdrv_getlength() calls would have failed */
> > -        assert(s->active_disk->bs->drv && s->hidden_disk->bs->drv);
> > +        assert(active_disk->bs->drv && s->hidden_disk->bs->drv);
> > 
> > -        if (!s->active_disk->bs->drv->bdrv_make_empty ||
> > +        if (!active_disk->bs->drv->bdrv_make_empty ||
> >               !s->hidden_disk->bs->drv->bdrv_make_empty) {
> >               error_setg(errp,
> >                          "Active disk or hidden disk doesn't support make_empty");
> > @@ -579,7 +582,7 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode,
> >       s->stage = BLOCK_REPLICATION_RUNNING;
> > 
> >       if (s->mode == REPLICATION_MODE_SECONDARY) {
> > -        secondary_do_checkpoint(s, errp);
> > +        secondary_do_checkpoint(bs, errp);
> >       }
> > 
> >       s->error = 0;
> > @@ -608,7 +611,7 @@ static void replication_do_checkpoint(ReplicationState *rs, Error **errp)
> >       }
> > 
> >       if (s->mode == REPLICATION_MODE_SECONDARY) {
> > -        secondary_do_checkpoint(s, errp);
> > +        secondary_do_checkpoint(bs, errp);
> >       }
> >       aio_context_release(aio_context);
> >   }
> > @@ -645,7 +648,6 @@ static void replication_done(void *opaque, int ret)
> >       if (ret == 0) {
> >           s->stage = BLOCK_REPLICATION_DONE;
> > 
> > -        s->active_disk = NULL;
> >           s->secondary_disk = NULL;
> >           s->hidden_disk = NULL;
> >           s->error = 0;
> > @@ -659,11 +661,13 @@ static void replication_stop(ReplicationState *rs, bool failover, Error **errp)
> >   {
> >       BlockDriverState *bs = rs->opaque;
> >       BDRVReplicationState *s;
> > +    BdrvChild *active_disk;
> >       AioContext *aio_context;
> > 
> >       aio_context = bdrv_get_aio_context(bs);
> >       aio_context_acquire(aio_context);
> >       s = bs->opaque;
> > +    active_disk = bs->file;
> > 
> >       if (s->stage == BLOCK_REPLICATION_DONE ||
> >           s->stage == BLOCK_REPLICATION_FAILOVER) {
> > @@ -698,7 +702,7 @@ static void replication_stop(ReplicationState *rs, bool failover, Error **errp)
> >           }
> > 
> >           if (!failover) {
> > -            secondary_do_checkpoint(s, errp);
> > +            secondary_do_checkpoint(bs, errp);
> >               s->stage = BLOCK_REPLICATION_DONE;
> >               aio_context_release(aio_context);
> >               return;
> > @@ -706,7 +710,7 @@ static void replication_stop(ReplicationState *rs, bool failover, Error **errp)
> > 
> >           s->stage = BLOCK_REPLICATION_FAILOVER;  
> 
> For consistency, it seems right to initialize active_disk only in "case REPLICATION_MODE_SECONDARY:", like above..
> 
> But then, it becomes obvious that no sense in creating additional variable to use it once.. So here I'd just use bs->file->bs

Ok, will fix.

> >           s->commit_job = commit_active_start(
> > -                            NULL, s->active_disk->bs, s->secondary_disk->bs,
> > +                            NULL, active_disk->bs, s->secondary_disk->bs,
> >                               JOB_INTERNAL, 0, BLOCKDEV_ON_ERROR_REPORT,
> >                               NULL, replication_done, bs, true, errp);
> >           break;
> > --
> > 2.20.1
> >   
> 
> 
> Anyway:
> Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
> 

Thanks,
Lukas Straub

-- 


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v3 4/4] replication: Remove workaround
  2021-07-09  7:49   ` Vladimir Sementsov-Ogievskiy
@ 2021-07-11 20:33     ` Lukas Straub
  2021-07-12 10:06       ` Vladimir Sementsov-Ogievskiy
  0 siblings, 1 reply; 13+ messages in thread
From: Lukas Straub @ 2021-07-11 20:33 UTC (permalink / raw)
  To: Vladimir Sementsov-Ogievskiy
  Cc: Kevin Wolf, qemu-block, Wen Congyang, Xie Changlong, qemu-devel,
	Max Reitz

[-- Attachment #1: Type: text/plain, Size: 2870 bytes --]

On Fri, 9 Jul 2021 10:49:23 +0300
Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> wrote:

> 07.07.2021 21:15, Lukas Straub wrote:
> > Remove the workaround introduced in commit
> > 6ecbc6c52672db5c13805735ca02784879ce8285
> > "replication: Avoid blk_make_empty() on read-only child".
> > 
> > It is not needed anymore since s->hidden_disk is guaranteed to be
> > writable when secondary_do_checkpoint() runs. Because replication_start(),
> > _do_checkpoint() and _stop() are only called by COLO migration code
> > and COLO-migration doesn't inactivate disks.  
> 
> If look at replication_child_perm() you should also be sure that it always works only with RW disks..
> 
> Actually, I think that it would be correct just require BLK_PERM_WRITE in replication_child_perm() unconditionally. Let generic layer care about all these RD/WR things. In _child_perm() we can require WRITE and don't care. If something goes wrong and we can't get WRITE permission we should see clean error-out.
> 
> Opposite, if we don't require WRITE permission in some case and still do WRITE request, it may crash.
> 
> Still, this may be considered as a preexisting problem of replication_child_perm() and fixed separately.

Hmm, unconditionally requesting write doesn't work, since qemu on the
secondary side is started with "-miration incoming", it goes into
runstate RUN_STATE_INMIGRATE from the beginning and then blockdev_init()
opens every blockdev with BDRV_O_INACTIVE and then it errors out with
-drive driver=replication,...: Block node is read-only.

> > 
> > Signed-off-by: Lukas Straub <lukasstraub2@web.de>  
> 
> So, for this one commit (with probably updated commit message accordingly to my comments, or even rebased on fixed replication_child_perm()):
> 
> Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
> 
> 
> > ---
> >   block/replication.c | 12 +-----------
> >   1 file changed, 1 insertion(+), 11 deletions(-)
> > 
> > diff --git a/block/replication.c b/block/replication.c
> > index c0d4a6c264..68b46d65a8 100644
> > --- a/block/replication.c
> > +++ b/block/replication.c
> > @@ -348,17 +348,7 @@ static void secondary_do_checkpoint(BlockDriverState *bs, Error **errp)
> >           return;
> >       }
> > 
> > -    BlockBackend *blk = blk_new(qemu_get_current_aio_context(),
> > -                                BLK_PERM_WRITE, BLK_PERM_ALL);
> > -    blk_insert_bs(blk, s->hidden_disk->bs, &local_err);
> > -    if (local_err) {
> > -        error_propagate(errp, local_err);
> > -        blk_unref(blk);
> > -        return;
> > -    }
> > -
> > -    ret = blk_make_empty(blk, errp);
> > -    blk_unref(blk);
> > +    ret = bdrv_make_empty(s->hidden_disk, errp);
> >       if (ret < 0) {
> >           return;
> >       }
> > --
> > 2.20.1
> >   
> 
> 



-- 


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v3 4/4] replication: Remove workaround
  2021-07-11 20:33     ` Lukas Straub
@ 2021-07-12 10:06       ` Vladimir Sementsov-Ogievskiy
  2021-07-12 11:12         ` Lukas Straub
  0 siblings, 1 reply; 13+ messages in thread
From: Vladimir Sementsov-Ogievskiy @ 2021-07-12 10:06 UTC (permalink / raw)
  To: Lukas Straub
  Cc: qemu-devel, qemu-block, Wen Congyang, Xie Changlong, Kevin Wolf,
	Max Reitz

11.07.2021 23:33, Lukas Straub wrote:
> On Fri, 9 Jul 2021 10:49:23 +0300
> Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> wrote:
> 
>> 07.07.2021 21:15, Lukas Straub wrote:
>>> Remove the workaround introduced in commit
>>> 6ecbc6c52672db5c13805735ca02784879ce8285
>>> "replication: Avoid blk_make_empty() on read-only child".
>>>
>>> It is not needed anymore since s->hidden_disk is guaranteed to be
>>> writable when secondary_do_checkpoint() runs. Because replication_start(),
>>> _do_checkpoint() and _stop() are only called by COLO migration code
>>> and COLO-migration doesn't inactivate disks.
>>
>> If look at replication_child_perm() you should also be sure that it always works only with RW disks..
>>
>> Actually, I think that it would be correct just require BLK_PERM_WRITE in replication_child_perm() unconditionally. Let generic layer care about all these RD/WR things. In _child_perm() we can require WRITE and don't care. If something goes wrong and we can't get WRITE permission we should see clean error-out.
>>
>> Opposite, if we don't require WRITE permission in some case and still do WRITE request, it may crash.
>>
>> Still, this may be considered as a preexisting problem of replication_child_perm() and fixed separately.
> 
> Hmm, unconditionally requesting write doesn't work, since qemu on the
> secondary side is started with "-miration incoming", it goes into
> runstate RUN_STATE_INMIGRATE from the beginning and then blockdev_init()
> opens every blockdev with BDRV_O_INACTIVE and then it errors out with
> -drive driver=replication,...: Block node is read-only.

Ah, OK. So we need this check in _child_perm().. Then, maybe, leave check or assertion in secondary_do_checkpoint, that hidden_disk is writable?

> 
>>>
>>> Signed-off-by: Lukas Straub <lukasstraub2@web.de>
>>
>> So, for this one commit (with probably updated commit message accordingly to my comments, or even rebased on fixed replication_child_perm()):
>>
>> Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
>>
>>
>>> ---
>>>    block/replication.c | 12 +-----------
>>>    1 file changed, 1 insertion(+), 11 deletions(-)
>>>
>>> diff --git a/block/replication.c b/block/replication.c
>>> index c0d4a6c264..68b46d65a8 100644
>>> --- a/block/replication.c
>>> +++ b/block/replication.c
>>> @@ -348,17 +348,7 @@ static void secondary_do_checkpoint(BlockDriverState *bs, Error **errp)
>>>            return;
>>>        }
>>>
>>> -    BlockBackend *blk = blk_new(qemu_get_current_aio_context(),
>>> -                                BLK_PERM_WRITE, BLK_PERM_ALL);
>>> -    blk_insert_bs(blk, s->hidden_disk->bs, &local_err);
>>> -    if (local_err) {
>>> -        error_propagate(errp, local_err);
>>> -        blk_unref(blk);
>>> -        return;
>>> -    }
>>> -
>>> -    ret = blk_make_empty(blk, errp);
>>> -    blk_unref(blk);
>>> +    ret = bdrv_make_empty(s->hidden_disk, errp);
>>>        if (ret < 0) {
>>>            return;
>>>        }
>>> --
>>> 2.20.1
>>>    
>>
>>
> 
> 
> 


-- 
Best regards,
Vladimir


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v3 4/4] replication: Remove workaround
  2021-07-12 10:06       ` Vladimir Sementsov-Ogievskiy
@ 2021-07-12 11:12         ` Lukas Straub
  0 siblings, 0 replies; 13+ messages in thread
From: Lukas Straub @ 2021-07-12 11:12 UTC (permalink / raw)
  To: Vladimir Sementsov-Ogievskiy
  Cc: Kevin Wolf, qemu-block, Wen Congyang, Xie Changlong, qemu-devel,
	Max Reitz

[-- Attachment #1: Type: text/plain, Size: 3470 bytes --]

On Mon, 12 Jul 2021 13:06:19 +0300
Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> wrote:

> 11.07.2021 23:33, Lukas Straub wrote:
> > On Fri, 9 Jul 2021 10:49:23 +0300
> > Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> wrote:
> >   
> >> 07.07.2021 21:15, Lukas Straub wrote:  
> >>> Remove the workaround introduced in commit
> >>> 6ecbc6c52672db5c13805735ca02784879ce8285
> >>> "replication: Avoid blk_make_empty() on read-only child".
> >>>
> >>> It is not needed anymore since s->hidden_disk is guaranteed to be
> >>> writable when secondary_do_checkpoint() runs. Because replication_start(),
> >>> _do_checkpoint() and _stop() are only called by COLO migration code
> >>> and COLO-migration doesn't inactivate disks.  
> >>
> >> If look at replication_child_perm() you should also be sure that it always works only with RW disks..
> >>
> >> Actually, I think that it would be correct just require BLK_PERM_WRITE in replication_child_perm() unconditionally. Let generic layer care about all these RD/WR things. In _child_perm() we can require WRITE and don't care. If something goes wrong and we can't get WRITE permission we should see clean error-out.
> >>
> >> Opposite, if we don't require WRITE permission in some case and still do WRITE request, it may crash.
> >>
> >> Still, this may be considered as a preexisting problem of replication_child_perm() and fixed separately.  
> > 
> > Hmm, unconditionally requesting write doesn't work, since qemu on the
> > secondary side is started with "-miration incoming", it goes into
> > runstate RUN_STATE_INMIGRATE from the beginning and then blockdev_init()
> > opens every blockdev with BDRV_O_INACTIVE and then it errors out with
> > -drive driver=replication,...: Block node is read-only.  
> 
> Ah, OK. So we need this check in _child_perm().. Then, maybe, leave check or assertion in secondary_do_checkpoint, that hidden_disk is writable?

Good Idea. I will add assertions to secondary_do_checkpoint and replication_co_writev too.

> >   
> >>>
> >>> Signed-off-by: Lukas Straub <lukasstraub2@web.de>  
> >>
> >> So, for this one commit (with probably updated commit message accordingly to my comments, or even rebased on fixed replication_child_perm()):
> >>
> >> Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
> >>
> >>  
> >>> ---
> >>>    block/replication.c | 12 +-----------
> >>>    1 file changed, 1 insertion(+), 11 deletions(-)
> >>>
> >>> diff --git a/block/replication.c b/block/replication.c
> >>> index c0d4a6c264..68b46d65a8 100644
> >>> --- a/block/replication.c
> >>> +++ b/block/replication.c
> >>> @@ -348,17 +348,7 @@ static void secondary_do_checkpoint(BlockDriverState *bs, Error **errp)
> >>>            return;
> >>>        }
> >>>
> >>> -    BlockBackend *blk = blk_new(qemu_get_current_aio_context(),
> >>> -                                BLK_PERM_WRITE, BLK_PERM_ALL);
> >>> -    blk_insert_bs(blk, s->hidden_disk->bs, &local_err);
> >>> -    if (local_err) {
> >>> -        error_propagate(errp, local_err);
> >>> -        blk_unref(blk);
> >>> -        return;
> >>> -    }
> >>> -
> >>> -    ret = blk_make_empty(blk, errp);
> >>> -    blk_unref(blk);
> >>> +    ret = bdrv_make_empty(s->hidden_disk, errp);
> >>>        if (ret < 0) {
> >>>            return;
> >>>        }
> >>> --
> >>> 2.20.1
> >>>      
> >>
> >>  
> > 
> > 
> >   
> 
> 



-- 


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2021-07-12 11:15 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-07-07 18:15 [PATCH v3 0/4] replication: Properly attach children Lukas Straub
2021-07-07 18:15 ` [PATCH v3 1/4] replication: Remove s->active_disk Lukas Straub
2021-07-09  7:11   ` Vladimir Sementsov-Ogievskiy
2021-07-09 12:11     ` Lukas Straub
2021-07-07 18:15 ` [PATCH v3 2/4] replication: Reduce usage of s->hidden_disk and s->secondary_disk Lukas Straub
2021-07-09  7:20   ` Vladimir Sementsov-Ogievskiy
2021-07-07 18:15 ` [PATCH v3 3/4] replication: Properly attach children Lukas Straub
2021-07-09  7:41   ` Vladimir Sementsov-Ogievskiy
2021-07-07 18:15 ` [PATCH v3 4/4] replication: Remove workaround Lukas Straub
2021-07-09  7:49   ` Vladimir Sementsov-Ogievskiy
2021-07-11 20:33     ` Lukas Straub
2021-07-12 10:06       ` Vladimir Sementsov-Ogievskiy
2021-07-12 11:12         ` Lukas Straub

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.