QEMU-Devel Archive on lore.kernel.org
 help / color / Atom feed
From: Max Reitz <mreitz@redhat.com>
To: qemu-block@nongnu.org
Cc: Kevin Wolf <kwolf@redhat.com>,
	Peter Maydell <peter.maydell@linaro.org>,
	qemu-devel@nongnu.org, Max Reitz <mreitz@redhat.com>
Subject: [PULL 32/36] block: introduce backup-top filter driver
Date: Thu, 10 Oct 2019 13:42:56 +0200
Message-ID: <20191010114300.7746-33-mreitz@redhat.com> (raw)
In-Reply-To: <20191010114300.7746-1-mreitz@redhat.com>

From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>

Backup-top filter caches write operations and does copy-before-write
operations.

The driver will be used in backup instead of write-notifiers.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Message-id: 20191001131409.14202-5-vsementsov@virtuozzo.com
Reviewed-by: Max Reitz <mreitz@redhat.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
---
 block/Makefile.objs |   1 +
 block/backup-top.h  |  41 +++++++
 block/backup-top.c  | 281 ++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 323 insertions(+)
 create mode 100644 block/backup-top.h
 create mode 100644 block/backup-top.c

diff --git a/block/Makefile.objs b/block/Makefile.objs
index f06f1fa1ac..e394fe0b6c 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -42,6 +42,7 @@ block-obj-y += block-copy.o
 block-obj-y += crypto.o
 
 block-obj-y += aio_task.o
+block-obj-y += backup-top.o
 
 common-obj-y += stream.o
 
diff --git a/block/backup-top.h b/block/backup-top.h
new file mode 100644
index 0000000000..e5cabfa197
--- /dev/null
+++ b/block/backup-top.h
@@ -0,0 +1,41 @@
+/*
+ * backup-top filter driver
+ *
+ * The driver performs Copy-Before-Write (CBW) operation: it is injected above
+ * some node, and before each write it copies _old_ data to the target node.
+ *
+ * Copyright (c) 2018-2019 Virtuozzo International GmbH.
+ *
+ * Author:
+ *  Sementsov-Ogievskiy Vladimir <vsementsov@virtuozzo.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef BACKUP_TOP_H
+#define BACKUP_TOP_H
+
+#include "block/block_int.h"
+#include "block/block-copy.h"
+
+BlockDriverState *bdrv_backup_top_append(BlockDriverState *source,
+                                         BlockDriverState *target,
+                                         const char *filter_node_name,
+                                         uint64_t cluster_size,
+                                         BdrvRequestFlags write_flags,
+                                         BlockCopyState **bcs,
+                                         Error **errp);
+void bdrv_backup_top_drop(BlockDriverState *bs);
+
+#endif /* BACKUP_TOP_H */
diff --git a/block/backup-top.c b/block/backup-top.c
new file mode 100644
index 0000000000..75a315744d
--- /dev/null
+++ b/block/backup-top.c
@@ -0,0 +1,281 @@
+/*
+ * backup-top filter driver
+ *
+ * The driver performs Copy-Before-Write (CBW) operation: it is injected above
+ * some node, and before each write it copies _old_ data to the target node.
+ *
+ * Copyright (c) 2018-2019 Virtuozzo International GmbH.
+ *
+ * Author:
+ *  Sementsov-Ogievskiy Vladimir <vsementsov@virtuozzo.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+
+#include "sysemu/block-backend.h"
+#include "qemu/cutils.h"
+#include "qapi/error.h"
+#include "block/block_int.h"
+#include "block/qdict.h"
+#include "block/block-copy.h"
+
+#include "block/backup-top.h"
+
+typedef struct BDRVBackupTopState {
+    BlockCopyState *bcs;
+    BdrvChild *target;
+    bool active;
+} BDRVBackupTopState;
+
+static coroutine_fn int backup_top_co_preadv(
+        BlockDriverState *bs, uint64_t offset, uint64_t bytes,
+        QEMUIOVector *qiov, int flags)
+{
+    return bdrv_co_preadv(bs->backing, offset, bytes, qiov, flags);
+}
+
+static coroutine_fn int backup_top_cbw(BlockDriverState *bs, uint64_t offset,
+                                       uint64_t bytes)
+{
+    /*
+     * Here we'd like to use block_copy(), but block-copy need to be moved to
+     * use BdrvChildren to correctly use it in backup-top filter. It's a TODO.
+     */
+
+    abort();
+}
+
+static int coroutine_fn backup_top_co_pdiscard(BlockDriverState *bs,
+                                               int64_t offset, int bytes)
+{
+    int ret = backup_top_cbw(bs, offset, bytes);
+    if (ret < 0) {
+        return ret;
+    }
+
+    return bdrv_co_pdiscard(bs->backing, offset, bytes);
+}
+
+static int coroutine_fn backup_top_co_pwrite_zeroes(BlockDriverState *bs,
+        int64_t offset, int bytes, BdrvRequestFlags flags)
+{
+    int ret = backup_top_cbw(bs, offset, bytes);
+    if (ret < 0) {
+        return ret;
+    }
+
+    return bdrv_co_pwrite_zeroes(bs->backing, offset, bytes, flags);
+}
+
+static coroutine_fn int backup_top_co_pwritev(BlockDriverState *bs,
+                                              uint64_t offset,
+                                              uint64_t bytes,
+                                              QEMUIOVector *qiov, int flags)
+{
+    if (!(flags & BDRV_REQ_WRITE_UNCHANGED)) {
+        int ret = backup_top_cbw(bs, offset, bytes);
+        if (ret < 0) {
+            return ret;
+        }
+    }
+
+    return bdrv_co_pwritev(bs->backing, offset, bytes, qiov, flags);
+}
+
+static int coroutine_fn backup_top_co_flush(BlockDriverState *bs)
+{
+    if (!bs->backing) {
+        return 0;
+    }
+
+    return bdrv_co_flush(bs->backing->bs);
+}
+
+static void backup_top_refresh_filename(BlockDriverState *bs)
+{
+    if (bs->backing == NULL) {
+        /*
+         * we can be here after failed bdrv_attach_child in
+         * bdrv_set_backing_hd
+         */
+        return;
+    }
+    pstrcpy(bs->exact_filename, sizeof(bs->exact_filename),
+            bs->backing->bs->filename);
+}
+
+static void backup_top_child_perm(BlockDriverState *bs, BdrvChild *c,
+                                  const BdrvChildRole *role,
+                                  BlockReopenQueue *reopen_queue,
+                                  uint64_t perm, uint64_t shared,
+                                  uint64_t *nperm, uint64_t *nshared)
+{
+    BDRVBackupTopState *s = bs->opaque;
+
+    if (!s->active) {
+        /*
+         * The filter node may be in process of bdrv_append(), which firstly do
+         * bdrv_set_backing_hd() and then bdrv_replace_node(). This means that
+         * we can't unshare BLK_PERM_WRITE during bdrv_append() operation. So,
+         * let's require nothing during bdrv_append() and refresh permissions
+         * after it (see bdrv_backup_top_append()).
+         */
+        *nperm = 0;
+        *nshared = BLK_PERM_ALL;
+        return;
+    }
+
+    if (role == &child_file) {
+        /*
+         * Target child
+         *
+         * Share write to target (child_file), to not interfere
+         * with guest writes to its disk which may be in target backing chain.
+         */
+        *nshared = BLK_PERM_ALL;
+        *nperm = BLK_PERM_WRITE;
+    } else {
+        /* Source child */
+        bdrv_filter_default_perms(bs, c, role, reopen_queue, perm, shared,
+                                  nperm, nshared);
+
+        if (perm & BLK_PERM_WRITE) {
+            *nperm = *nperm | BLK_PERM_CONSISTENT_READ;
+        }
+        *nshared &= ~BLK_PERM_WRITE;
+    }
+}
+
+BlockDriver bdrv_backup_top_filter = {
+    .format_name = "backup-top",
+    .instance_size = sizeof(BDRVBackupTopState),
+
+    .bdrv_co_preadv             = backup_top_co_preadv,
+    .bdrv_co_pwritev            = backup_top_co_pwritev,
+    .bdrv_co_pwrite_zeroes      = backup_top_co_pwrite_zeroes,
+    .bdrv_co_pdiscard           = backup_top_co_pdiscard,
+    .bdrv_co_flush              = backup_top_co_flush,
+
+    .bdrv_co_block_status       = bdrv_co_block_status_from_backing,
+
+    .bdrv_refresh_filename      = backup_top_refresh_filename,
+
+    .bdrv_child_perm            = backup_top_child_perm,
+
+    .is_filter = true,
+};
+
+BlockDriverState *bdrv_backup_top_append(BlockDriverState *source,
+                                         BlockDriverState *target,
+                                         const char *filter_node_name,
+                                         uint64_t cluster_size,
+                                         BdrvRequestFlags write_flags,
+                                         BlockCopyState **bcs,
+                                         Error **errp)
+{
+    Error *local_err = NULL;
+    BDRVBackupTopState *state;
+    BlockDriverState *top = bdrv_new_open_driver(&bdrv_backup_top_filter,
+                                                 filter_node_name,
+                                                 BDRV_O_RDWR, errp);
+
+    if (!top) {
+        return NULL;
+    }
+
+    top->total_sectors = source->total_sectors;
+    top->opaque = state = g_new0(BDRVBackupTopState, 1);
+
+    bdrv_ref(target);
+    state->target = bdrv_attach_child(top, target, "target", &child_file, errp);
+    if (!state->target) {
+        bdrv_unref(target);
+        bdrv_unref(top);
+        return NULL;
+    }
+
+    bdrv_drained_begin(source);
+
+    bdrv_ref(top);
+    bdrv_append(top, source, &local_err);
+    if (local_err) {
+        error_prepend(&local_err, "Cannot append backup-top filter: ");
+        goto append_failed;
+    }
+
+    /*
+     * bdrv_append() finished successfully, now we can require permissions
+     * we want.
+     */
+    state->active = true;
+    bdrv_child_refresh_perms(top, top->backing, &local_err);
+    if (local_err) {
+        error_prepend(&local_err,
+                      "Cannot set permissions for backup-top filter: ");
+        goto failed_after_append;
+    }
+
+    /*
+     * TODO: Create block-copy-state here (which will utilize @cluster_size and
+     * @write_flags parameters which are unused now). For this, block-copy
+     * should be refactored to use BdrvChildren.
+     */
+    state->bcs = NULL;
+    if (!state->bcs) {
+        error_setg(&local_err, "Cannot create block-copy-state");
+        goto failed_after_append;
+    }
+    *bcs = state->bcs;
+
+    bdrv_drained_end(source);
+
+    return top;
+
+failed_after_append:
+    state->active = false;
+    bdrv_backup_top_drop(top);
+
+append_failed:
+    bdrv_drained_end(source);
+    bdrv_unref_child(top, state->target);
+    bdrv_unref(top);
+    error_propagate(errp, local_err);
+
+    return NULL;
+}
+
+void bdrv_backup_top_drop(BlockDriverState *bs)
+{
+    BDRVBackupTopState *s = bs->opaque;
+    AioContext *aio_context = bdrv_get_aio_context(bs);
+
+    block_copy_state_free(s->bcs);
+
+    aio_context_acquire(aio_context);
+
+    bdrv_drained_begin(bs);
+
+    s->active = false;
+    bdrv_child_refresh_perms(bs, bs->backing, &error_abort);
+    bdrv_replace_node(bs, backing_bs(bs), &error_abort);
+    bdrv_set_backing_hd(bs, NULL, &error_abort);
+
+    bdrv_drained_end(bs);
+
+    bdrv_unref(bs);
+
+    aio_context_release(aio_context);
+}
-- 
2.21.0



  parent reply index

Thread overview: 40+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-10-10 11:42 [PULL 00/36] Block patches Max Reitz
2019-10-10 11:42 ` [PULL 01/36] qemu-iotests: ignore leaks on failure paths in 026 Max Reitz
2019-10-10 11:42 ` [PULL 02/36] block: introduce aio task pool Max Reitz
2019-10-10 11:42 ` [PULL 03/36] block/qcow2: refactor qcow2_co_preadv_part Max Reitz
2019-10-10 11:42 ` [PULL 04/36] block/qcow2: refactor qcow2_co_pwritev_part Max Reitz
2019-10-10 11:42 ` [PULL 05/36] block/qcow2: introduce parallel subrequest handling in read and write Max Reitz
2019-10-10 11:42 ` [PULL 06/36] block/backup: fix max_transfer handling for copy_range Max Reitz
2019-10-10 11:42 ` [PULL 07/36] block/backup: fix backup_cow_with_offload for last cluster Max Reitz
2019-10-10 11:42 ` [PULL 08/36] block/backup: split shareable copying part from backup_do_cow Max Reitz
2019-10-10 11:42 ` [PULL 09/36] block/backup: improve comment about image fleecing Max Reitz
2019-10-10 11:42 ` [PULL 10/36] block/backup: introduce BlockCopyState Max Reitz
2019-10-10 11:42 ` [PULL 11/36] block/backup: fix block-comment style Max Reitz
2019-10-10 11:42 ` [PULL 12/36] block: move block_copy from block/backup.c to separate file Max Reitz
2019-10-10 11:42 ` [PULL 13/36] block: teach bdrv_debug_breakpoint skip filters with backing Max Reitz
2019-10-10 11:42 ` [PULL 14/36] iotests: prepare 124 and 257 bitmap querying for backup-top filter Max Reitz
2019-10-10 11:42 ` [PULL 15/36] iotests: 257: drop unused Drive.device field Max Reitz
2019-10-10 11:42 ` [PULL 16/36] iotests: 257: drop device_add Max Reitz
2019-10-10 11:42 ` [PULL 17/36] qapi: group BlockDeviceStats fields Max Reitz
2019-10-10 11:42 ` [PULL 18/36] qapi: add unmap to BlockDeviceStats Max Reitz
2019-10-10 11:42 ` [PULL 19/36] block: add empty account cookie type Max Reitz
2019-10-10 11:42 ` [PULL 20/36] ide: account UNMAP (TRIM) operations Max Reitz
2019-10-10 11:42 ` [PULL 21/36] scsi: store unmap offset and nb_sectors in request struct Max Reitz
2019-10-10 11:42 ` [PULL 22/36] scsi: move unmap error checking to the complete callback Max Reitz
2019-10-10 11:42 ` [PULL 23/36] scsi: account unmap operations Max Reitz
2019-10-10 11:42 ` [PULL 24/36] file-posix: account discard operations Max Reitz
2019-10-10 11:42 ` [PULL 25/36] qapi: query-blockstat: add driver specific file-posix stats Max Reitz
2019-10-10 11:42 ` [PULL 26/36] iotests: Fix 125 for growth_mode = metadata Max Reitz
2019-10-10 11:42 ` [PULL 27/36] iotests: Disable 125 on broken XFS versions Max Reitz
2019-10-10 11:42 ` [PULL 28/36] iotests: Use stat -c %b in 125 Max Reitz
2019-10-10 11:42 ` [PULL 29/36] block/backup: move in-flight requests handling from backup to block-copy Max Reitz
2019-10-10 11:42 ` [PULL 30/36] block/backup: move write_flags calculation inside backup_job_create Max Reitz
2019-10-10 11:42 ` [PULL 31/36] block/block-copy: split block_copy_set_callbacks function Max Reitz
2019-10-10 11:42 ` Max Reitz [this message]
2019-10-10 11:42 ` [PULL 33/36] block/backup: use backup-top instead of write notifiers Max Reitz
2019-10-17 12:04   ` Peter Maydell
2019-10-17 13:40     ` Vladimir Sementsov-Ogievskiy
2019-10-10 11:42 ` [PULL 34/36] nbd: add empty .bdrv_reopen_prepare Max Reitz
2019-10-10 11:42 ` [PULL 35/36] tests: fix I/O test for hosts defaulting to LUKSv2 Max Reitz
2019-10-10 11:43 ` [PULL 36/36] iotests/162: Fix for newer Linux 5.3+ Max Reitz
2019-10-14 12:34 ` [PULL 00/36] Block patches Peter Maydell

Reply instructions:

You may reply publically to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20191010114300.7746-33-mreitz@redhat.com \
    --to=mreitz@redhat.com \
    --cc=kwolf@redhat.com \
    --cc=peter.maydell@linaro.org \
    --cc=qemu-block@nongnu.org \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

QEMU-Devel Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/qemu-devel/0 qemu-devel/git/0.git
	git clone --mirror https://lore.kernel.org/qemu-devel/1 qemu-devel/git/1.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 qemu-devel qemu-devel/ https://lore.kernel.org/qemu-devel \
		qemu-devel@nongnu.org
	public-inbox-index qemu-devel

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.nongnu.qemu-devel


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git