All of lore.kernel.org
 help / color / mirror / Atom feed
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
To: qemu-block@nongnu.org
Cc: kwolf@redhat.com, vsementsov@virtuozzo.com, ehabkost@redhat.com,
	wencongyang2@huawei.com, xiechanglong.d@gmail.com,
	armbru@redhat.com, qemu-devel@nongnu.org, jsnow@redhat.com,
	crosa@redhat.com, den@openvz.org, mreitz@redhat.com
Subject: [RFC 06/24] block/block-copy: reduce intersecting request lock
Date: Fri, 15 Nov 2019 17:14:26 +0300	[thread overview]
Message-ID: <20191115141444.24155-7-vsementsov@virtuozzo.com> (raw)
In-Reply-To: <20191115141444.24155-1-vsementsov@virtuozzo.com>

Currently, block_copy operation lock the whole requested region. But
there is no reason to lock clusters, which are already copied, it will
disturb other parallel block_copy requests for no reason.

Let's instead do the following:

Lock only sub-region, which we are going to operate on. Then, after
copying all dirty sub-regions, we should wait for intersecting
requests block-copy, if they failed, we should retry these new dirty
clusters.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
---
 block/block-copy.c | 116 +++++++++++++++++++++++++++++++++++++--------
 1 file changed, 95 insertions(+), 21 deletions(-)

diff --git a/block/block-copy.c b/block/block-copy.c
index 20068cd699..aca44b13fb 100644
--- a/block/block-copy.c
+++ b/block/block-copy.c
@@ -39,29 +39,62 @@ static BlockCopyInFlightReq *block_copy_find_inflight_req(BlockCopyState *s,
     return NULL;
 }
 
-static void coroutine_fn block_copy_wait_inflight_reqs(BlockCopyState *s,
-                                                       int64_t offset,
-                                                       int64_t bytes)
+/*
+ * If there are no intersecting requests return false. Otherwise, wait for the
+ * first found intersecting request to finish and return true.
+ */
+static bool coroutine_fn block_copy_wait_one(BlockCopyState *s, int64_t start,
+                                             int64_t end)
 {
-    BlockCopyInFlightReq *req;
+    BlockCopyInFlightReq *req = block_copy_find_inflight_req(s, start, end);
 
-    while ((req = block_copy_find_inflight_req(s, offset, bytes))) {
-        qemu_co_queue_wait(&req->wait_queue, NULL);
+    if (!req) {
+        return false;
     }
+
+    qemu_co_queue_wait(&req->wait_queue, NULL);
+
+    return true;
 }
 
+/* Called only on full-dirty region */
 static void block_copy_inflight_req_begin(BlockCopyState *s,
                                           BlockCopyInFlightReq *req,
                                           int64_t offset, int64_t bytes)
 {
+    assert(!block_copy_find_inflight_req(s, offset, bytes));
+
+    bdrv_reset_dirty_bitmap(s->copy_bitmap, offset, bytes);
+
     req->offset = offset;
     req->bytes = bytes;
     qemu_co_queue_init(&req->wait_queue);
     QLIST_INSERT_HEAD(&s->inflight_reqs, req, list);
 }
 
-static void coroutine_fn block_copy_inflight_req_end(BlockCopyInFlightReq *req)
+static void coroutine_fn block_copy_inflight_req_shrink(BlockCopyState *s,
+        BlockCopyInFlightReq *req, int64_t new_bytes)
 {
+    if (new_bytes == req->bytes) {
+        return;
+    }
+
+    assert(new_bytes > 0 && new_bytes < req->bytes);
+
+    bdrv_set_dirty_bitmap(s->copy_bitmap,
+                          req->offset + new_bytes, req->bytes - new_bytes);
+
+    req->bytes = new_bytes;
+    qemu_co_queue_restart_all(&req->wait_queue);
+}
+
+static void coroutine_fn block_copy_inflight_req_end(BlockCopyState *s,
+                                                     BlockCopyInFlightReq *req,
+                                                     int ret)
+{
+    if (ret < 0) {
+        bdrv_set_dirty_bitmap(s->copy_bitmap, req->offset, req->bytes);
+    }
     QLIST_REMOVE(req, list);
     qemu_co_queue_restart_all(&req->wait_queue);
 }
@@ -344,12 +377,19 @@ int64_t block_copy_reset_unallocated(BlockCopyState *s,
     return ret;
 }
 
-int coroutine_fn block_copy(BlockCopyState *s,
-                            int64_t offset, uint64_t bytes,
-                            bool *error_is_read)
+/*
+ * block_copy_dirty_clusters
+ *
+ * Copy dirty clusters in @start/@bytes range.
+ * Returns 1 if dirty clusters found and successfully copied, 0 if no dirty
+ * clusters found and -errno on failure.
+ */
+static int coroutine_fn block_copy_dirty_clusters(BlockCopyState *s,
+                                                  int64_t offset, int64_t bytes,
+                                                  bool *error_is_read)
 {
     int ret = 0;
-    BlockCopyInFlightReq req;
+    bool found_dirty = false;
 
     /*
      * block_copy() user is responsible for keeping source and target in same
@@ -361,10 +401,8 @@ int coroutine_fn block_copy(BlockCopyState *s,
     assert(QEMU_IS_ALIGNED(offset, s->cluster_size));
     assert(QEMU_IS_ALIGNED(bytes, s->cluster_size));
 
-    block_copy_wait_inflight_reqs(s, offset, bytes);
-    block_copy_inflight_req_begin(s, &req, offset, bytes);
-
     while (bytes) {
+        BlockCopyInFlightReq req;
         int64_t next_zero, cur_bytes, status_bytes;
 
         if (!bdrv_dirty_bitmap_get(s->copy_bitmap, offset)) {
@@ -374,6 +412,8 @@ int coroutine_fn block_copy(BlockCopyState *s,
             continue; /* already copied */
         }
 
+        found_dirty = true;
+
         cur_bytes = MIN(bytes, s->copy_size);
 
         next_zero = bdrv_dirty_bitmap_next_zero(s->copy_bitmap, offset,
@@ -383,10 +423,12 @@ int coroutine_fn block_copy(BlockCopyState *s,
             assert(next_zero < offset + cur_bytes); /* no need to do MIN() */
             cur_bytes = next_zero - offset;
         }
+        block_copy_inflight_req_begin(s, &req, offset, cur_bytes);
 
         ret = block_copy_block_status(s, offset, cur_bytes, &status_bytes);
+        block_copy_inflight_req_shrink(s, &req, status_bytes);
         if (s->skip_unallocated && !(ret & BDRV_BLOCK_ALLOCATED)) {
-            bdrv_reset_dirty_bitmap(s->copy_bitmap, offset, status_bytes);
+            block_copy_inflight_req_end(s, &req, 0);
             s->progress_reset_callback(s->progress_opaque);
             trace_block_copy_skip_range(s, offset, status_bytes);
             offset += status_bytes;
@@ -398,15 +440,13 @@ int coroutine_fn block_copy(BlockCopyState *s,
 
         trace_block_copy_process(s, offset);
 
-        bdrv_reset_dirty_bitmap(s->copy_bitmap, offset, cur_bytes);
-
         co_get_from_shres(s->mem, cur_bytes);
         ret = block_copy_do_copy(s, offset, cur_bytes, ret & BDRV_BLOCK_ZERO,
                                  error_is_read);
         co_put_to_shres(s->mem, cur_bytes);
+        block_copy_inflight_req_end(s, &req, ret);
         if (ret < 0) {
-            bdrv_set_dirty_bitmap(s->copy_bitmap, offset, cur_bytes);
-            break;
+            return ret;
         }
 
         s->progress_bytes_callback(cur_bytes, s->progress_opaque);
@@ -414,7 +454,41 @@ int coroutine_fn block_copy(BlockCopyState *s,
         bytes -= cur_bytes;
     }
 
-    block_copy_inflight_req_end(&req);
+    return found_dirty;
+}
 
-    return ret;
+int coroutine_fn block_copy(BlockCopyState *s, int64_t start, uint64_t bytes,
+                            bool *error_is_read)
+{
+    while (true) {
+        int ret = block_copy_dirty_clusters(s, start, bytes, error_is_read);
+
+        if (ret < 0) {
+            /*
+             * IO operation failed, which means the whole block_copy request
+             * failed.
+             */
+            return ret;
+        }
+        if (ret) {
+            /*
+             * Something was copied, which means that there were yield points
+             * and some new dirty bits may appered (due to failed parallel
+             * block-copy requests).
+             */
+            continue;
+        }
+
+        /*
+         * Here ret == 0, which means that there is no dirty clusters in
+         * requested region.
+         */
+
+        if (!block_copy_wait_one(s, start, bytes)) {
+            /* No dirty bits and nothing to wait: the whole request is done */
+            break;
+        }
+    }
+
+    return 0;
 }
-- 
2.21.0



  parent reply	other threads:[~2019-11-15 14:35 UTC|newest]

Thread overview: 29+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-11-15 14:14 [RFC 00/24] backup performance: block_status + async Vladimir Sementsov-Ogievskiy
2019-11-15 14:14 ` [RFC 01/24] block/block-copy: specialcase first copy_range request Vladimir Sementsov-Ogievskiy
2019-11-15 14:14 ` [RFC 02/24] block/block-copy: use block_status Vladimir Sementsov-Ogievskiy
2019-11-15 14:14 ` [RFC 03/24] block/block-copy: factor out block_copy_find_inflight_req Vladimir Sementsov-Ogievskiy
2019-11-15 14:14 ` [RFC 04/24] block/block-copy: refactor interfaces to use bytes instead of end Vladimir Sementsov-Ogievskiy
2019-11-15 14:14 ` [RFC 05/24] block/block-copy: rename start to offset in interfaces Vladimir Sementsov-Ogievskiy
2019-11-15 14:14 ` Vladimir Sementsov-Ogievskiy [this message]
2019-11-15 14:14 ` [RFC 07/24] block/block-copy: hide structure definitions Vladimir Sementsov-Ogievskiy
2019-11-15 14:14 ` [RFC 08/24] block/block-copy: rename in-flight requests to tasks Vladimir Sementsov-Ogievskiy
2019-11-15 14:14 ` [RFC 09/24] block/block-copy: alloc task on each iteration Vladimir Sementsov-Ogievskiy
2019-11-15 14:14 ` [RFC 10/24] block/block-copy: add state pointer to BlockCopyTask Vladimir Sementsov-Ogievskiy
2019-11-15 14:14 ` [RFC 11/24] block/block-copy: move task size initial calculation to _task_create Vladimir Sementsov-Ogievskiy
2019-11-15 14:14 ` [RFC 12/24] block/block-copy: move block_copy_task_create down Vladimir Sementsov-Ogievskiy
2019-11-15 14:14 ` [RFC 13/24] block/block-copy: use aio-task-pool API Vladimir Sementsov-Ogievskiy
2019-11-15 14:14 ` [RFC 14/24] block/block-copy: More explicit call_state Vladimir Sementsov-Ogievskiy
2019-11-15 14:14 ` [RFC 15/24] block/block-copy: implement block_copy_async Vladimir Sementsov-Ogievskiy
2019-11-15 14:14 ` [RFC 16/24] block/block-copy: add max_chunk and max_workers paramters Vladimir Sementsov-Ogievskiy
2019-11-15 14:14 ` [RFC 17/24] block/block-copy: add ratelimit to block-copy Vladimir Sementsov-Ogievskiy
2019-11-15 14:14 ` [RFC 18/24] block/block-copy: add block_copy_cancel Vladimir Sementsov-Ogievskiy
2019-11-15 14:14 ` [RFC 19/24] blockjob: add set_speed to BlockJobDriver Vladimir Sementsov-Ogievskiy
2019-11-15 14:14 ` [RFC 20/24] job: call job_enter from job_user_pause Vladimir Sementsov-Ogievskiy
2019-11-15 14:14 ` [RFC 21/24] backup: move to block-copy Vladimir Sementsov-Ogievskiy
2019-11-15 17:58   ` Eric Blake
2019-11-16  9:28     ` Vladimir Sementsov-Ogievskiy
2019-11-15 14:14 ` [RFC 22/24] python: add simplebench.py Vladimir Sementsov-Ogievskiy
2019-11-15 14:14 ` [RFC 23/24] python: add qemu/bench_block_job.py Vladimir Sementsov-Ogievskiy
2019-11-15 14:14 ` [RFC 24/24] python: benchmark new backup architecture Vladimir Sementsov-Ogievskiy
2019-11-15 17:30 ` [RFC 00/24] backup performance: block_status + async no-reply
2019-11-15 17:33   ` Vladimir Sementsov-Ogievskiy

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20191115141444.24155-7-vsementsov@virtuozzo.com \
    --to=vsementsov@virtuozzo.com \
    --cc=armbru@redhat.com \
    --cc=crosa@redhat.com \
    --cc=den@openvz.org \
    --cc=ehabkost@redhat.com \
    --cc=jsnow@redhat.com \
    --cc=kwolf@redhat.com \
    --cc=mreitz@redhat.com \
    --cc=qemu-block@nongnu.org \
    --cc=qemu-devel@nongnu.org \
    --cc=wencongyang2@huawei.com \
    --cc=xiechanglong.d@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.