All of lore.kernel.org
 help / color / mirror / Atom feed
From: Max Reitz <mreitz@redhat.com>
To: qemu-block@nongnu.org
Cc: Kevin Wolf <kwolf@redhat.com>,
	Peter Maydell <peter.maydell@linaro.org>,
	qemu-devel@nongnu.org, Max Reitz <mreitz@redhat.com>
Subject: [PULL 05/36] block/qcow2: introduce parallel subrequest handling in read and write
Date: Thu, 10 Oct 2019 13:42:29 +0200	[thread overview]
Message-ID: <20191010114300.7746-6-mreitz@redhat.com> (raw)
In-Reply-To: <20191010114300.7746-1-mreitz@redhat.com>

From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>

It improves performance for fragmented qcow2 images.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Message-id: 20190916175324.18478-6-vsementsov@virtuozzo.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
---
 block/qcow2.h      |   3 ++
 block/qcow2.c      | 125 ++++++++++++++++++++++++++++++++++++++++-----
 block/trace-events |   1 +
 3 files changed, 117 insertions(+), 12 deletions(-)

diff --git a/block/qcow2.h b/block/qcow2.h
index a488d761ff..f51f478e34 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -65,6 +65,9 @@
 #define QCOW2_MAX_BITMAPS 65535
 #define QCOW2_MAX_BITMAP_DIRECTORY_SIZE (1024 * QCOW2_MAX_BITMAPS)
 
+/* Maximum of parallel sub-request per guest request */
+#define QCOW2_MAX_WORKERS 8
+
 /* indicate that the refcount of the referenced cluster is exactly one. */
 #define QCOW_OFLAG_COPIED     (1ULL << 63)
 /* indicate that the cluster is compressed (they never have the copied flag) */
diff --git a/block/qcow2.c b/block/qcow2.c
index 164b22e4a2..7961c05783 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -41,6 +41,7 @@
 #include "qapi/qobject-input-visitor.h"
 #include "qapi/qapi-visit-block-core.h"
 #include "crypto.h"
+#include "block/aio_task.h"
 
 /*
   Differences with QCOW:
@@ -2025,6 +2026,60 @@ fail:
     return ret;
 }
 
+typedef struct Qcow2AioTask {
+    AioTask task;
+
+    BlockDriverState *bs;
+    QCow2ClusterType cluster_type; /* only for read */
+    uint64_t file_cluster_offset;
+    uint64_t offset;
+    uint64_t bytes;
+    QEMUIOVector *qiov;
+    uint64_t qiov_offset;
+    QCowL2Meta *l2meta; /* only for write */
+} Qcow2AioTask;
+
+static coroutine_fn int qcow2_co_preadv_task_entry(AioTask *task);
+static coroutine_fn int qcow2_add_task(BlockDriverState *bs,
+                                       AioTaskPool *pool,
+                                       AioTaskFunc func,
+                                       QCow2ClusterType cluster_type,
+                                       uint64_t file_cluster_offset,
+                                       uint64_t offset,
+                                       uint64_t bytes,
+                                       QEMUIOVector *qiov,
+                                       size_t qiov_offset,
+                                       QCowL2Meta *l2meta)
+{
+    Qcow2AioTask local_task;
+    Qcow2AioTask *task = pool ? g_new(Qcow2AioTask, 1) : &local_task;
+
+    *task = (Qcow2AioTask) {
+        .task.func = func,
+        .bs = bs,
+        .cluster_type = cluster_type,
+        .qiov = qiov,
+        .file_cluster_offset = file_cluster_offset,
+        .offset = offset,
+        .bytes = bytes,
+        .qiov_offset = qiov_offset,
+        .l2meta = l2meta,
+    };
+
+    trace_qcow2_add_task(qemu_coroutine_self(), bs, pool,
+                         func == qcow2_co_preadv_task_entry ? "read" : "write",
+                         cluster_type, file_cluster_offset, offset, bytes,
+                         qiov, qiov_offset);
+
+    if (!pool) {
+        return func(&task->task);
+    }
+
+    aio_task_pool_start_task(pool, &task->task);
+
+    return 0;
+}
+
 static coroutine_fn int qcow2_co_preadv_task(BlockDriverState *bs,
                                              QCow2ClusterType cluster_type,
                                              uint64_t file_cluster_offset,
@@ -2074,18 +2129,28 @@ static coroutine_fn int qcow2_co_preadv_task(BlockDriverState *bs,
     g_assert_not_reached();
 }
 
+static coroutine_fn int qcow2_co_preadv_task_entry(AioTask *task)
+{
+    Qcow2AioTask *t = container_of(task, Qcow2AioTask, task);
+
+    assert(!t->l2meta);
+
+    return qcow2_co_preadv_task(t->bs, t->cluster_type, t->file_cluster_offset,
+                                t->offset, t->bytes, t->qiov, t->qiov_offset);
+}
+
 static coroutine_fn int qcow2_co_preadv_part(BlockDriverState *bs,
                                              uint64_t offset, uint64_t bytes,
                                              QEMUIOVector *qiov,
                                              size_t qiov_offset, int flags)
 {
     BDRVQcow2State *s = bs->opaque;
-    int ret;
+    int ret = 0;
     unsigned int cur_bytes; /* number of bytes in current iteration */
     uint64_t cluster_offset = 0;
+    AioTaskPool *aio = NULL;
 
-    while (bytes != 0) {
-
+    while (bytes != 0 && aio_task_pool_status(aio) == 0) {
         /* prepare next request */
         cur_bytes = MIN(bytes, INT_MAX);
         if (s->crypto) {
@@ -2097,7 +2162,7 @@ static coroutine_fn int qcow2_co_preadv_part(BlockDriverState *bs,
         ret = qcow2_get_cluster_offset(bs, offset, &cur_bytes, &cluster_offset);
         qemu_co_mutex_unlock(&s->lock);
         if (ret < 0) {
-            return ret;
+            goto out;
         }
 
         if (ret == QCOW2_CLUSTER_ZERO_PLAIN ||
@@ -2106,11 +2171,14 @@ static coroutine_fn int qcow2_co_preadv_part(BlockDriverState *bs,
         {
             qemu_iovec_memset(qiov, qiov_offset, 0, cur_bytes);
         } else {
-            ret = qcow2_co_preadv_task(bs, ret,
-                                       cluster_offset, offset, cur_bytes,
-                                       qiov, qiov_offset);
+            if (!aio && cur_bytes != bytes) {
+                aio = aio_task_pool_new(QCOW2_MAX_WORKERS);
+            }
+            ret = qcow2_add_task(bs, aio, qcow2_co_preadv_task_entry, ret,
+                                 cluster_offset, offset, cur_bytes,
+                                 qiov, qiov_offset, NULL);
             if (ret < 0) {
-                return ret;
+                goto out;
             }
         }
 
@@ -2119,7 +2187,16 @@ static coroutine_fn int qcow2_co_preadv_part(BlockDriverState *bs,
         qiov_offset += cur_bytes;
     }
 
-    return 0;
+out:
+    if (aio) {
+        aio_task_pool_wait_all(aio);
+        if (ret == 0) {
+            ret = aio_task_pool_status(aio);
+        }
+        g_free(aio);
+    }
+
+    return ret;
 }
 
 /* Check if it's possible to merge a write request with the writing of
@@ -2324,6 +2401,17 @@ out_locked:
     return ret;
 }
 
+static coroutine_fn int qcow2_co_pwritev_task_entry(AioTask *task)
+{
+    Qcow2AioTask *t = container_of(task, Qcow2AioTask, task);
+
+    assert(!t->cluster_type);
+
+    return qcow2_co_pwritev_task(t->bs, t->file_cluster_offset,
+                                 t->offset, t->bytes, t->qiov, t->qiov_offset,
+                                 t->l2meta);
+}
+
 static coroutine_fn int qcow2_co_pwritev_part(
         BlockDriverState *bs, uint64_t offset, uint64_t bytes,
         QEMUIOVector *qiov, size_t qiov_offset, int flags)
@@ -2334,10 +2422,11 @@ static coroutine_fn int qcow2_co_pwritev_part(
     unsigned int cur_bytes; /* number of sectors in current iteration */
     uint64_t cluster_offset;
     QCowL2Meta *l2meta = NULL;
+    AioTaskPool *aio = NULL;
 
     trace_qcow2_writev_start_req(qemu_coroutine_self(), offset, bytes);
 
-    while (bytes != 0) {
+    while (bytes != 0 && aio_task_pool_status(aio) == 0) {
 
         l2meta = NULL;
 
@@ -2369,8 +2458,12 @@ static coroutine_fn int qcow2_co_pwritev_part(
 
         qemu_co_mutex_unlock(&s->lock);
 
-        ret = qcow2_co_pwritev_task(bs, cluster_offset, offset, cur_bytes,
-                                    qiov, qiov_offset, l2meta);
+        if (!aio && cur_bytes != bytes) {
+            aio = aio_task_pool_new(QCOW2_MAX_WORKERS);
+        }
+        ret = qcow2_add_task(bs, aio, qcow2_co_pwritev_task_entry, 0,
+                             cluster_offset, offset, cur_bytes,
+                             qiov, qiov_offset, l2meta);
         l2meta = NULL; /* l2meta is consumed by qcow2_co_pwritev_task() */
         if (ret < 0) {
             goto fail_nometa;
@@ -2391,6 +2484,14 @@ out_locked:
     qemu_co_mutex_unlock(&s->lock);
 
 fail_nometa:
+    if (aio) {
+        aio_task_pool_wait_all(aio);
+        if (ret == 0) {
+            ret = aio_task_pool_status(aio);
+        }
+        g_free(aio);
+    }
+
     trace_qcow2_writev_done_req(qemu_coroutine_self(), ret);
 
     return ret;
diff --git a/block/trace-events b/block/trace-events
index 04209f058d..3aa27e6b1e 100644
--- a/block/trace-events
+++ b/block/trace-events
@@ -62,6 +62,7 @@ file_paio_submit(void *acb, void *opaque, int64_t offset, int count, int type) "
 file_copy_file_range(void *bs, int src, int64_t src_off, int dst, int64_t dst_off, int64_t bytes, int flags, int64_t ret) "bs %p src_fd %d offset %"PRIu64" dst_fd %d offset %"PRIu64" bytes %"PRIu64" flags %d ret %"PRId64
 
 # qcow2.c
+qcow2_add_task(void *co, void *bs, void *pool, const char *action, int cluster_type, uint64_t file_cluster_offset, uint64_t offset, uint64_t bytes, void *qiov, size_t qiov_offset) "co %p bs %p pool %p: %s: cluster_type %d file_cluster_offset %" PRIu64 " offset %" PRIu64 " bytes %" PRIu64 " qiov %p qiov_offset %zu"
 qcow2_writev_start_req(void *co, int64_t offset, int bytes) "co %p offset 0x%" PRIx64 " bytes %d"
 qcow2_writev_done_req(void *co, int ret) "co %p ret %d"
 qcow2_writev_start_part(void *co) "co %p"
-- 
2.21.0



  parent reply	other threads:[~2019-10-10 12:20 UTC|newest]

Thread overview: 40+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-10-10 11:42 [PULL 00/36] Block patches Max Reitz
2019-10-10 11:42 ` [PULL 01/36] qemu-iotests: ignore leaks on failure paths in 026 Max Reitz
2019-10-10 11:42 ` [PULL 02/36] block: introduce aio task pool Max Reitz
2019-10-10 11:42 ` [PULL 03/36] block/qcow2: refactor qcow2_co_preadv_part Max Reitz
2019-10-10 11:42 ` [PULL 04/36] block/qcow2: refactor qcow2_co_pwritev_part Max Reitz
2019-10-10 11:42 ` Max Reitz [this message]
2019-10-10 11:42 ` [PULL 06/36] block/backup: fix max_transfer handling for copy_range Max Reitz
2019-10-10 11:42 ` [PULL 07/36] block/backup: fix backup_cow_with_offload for last cluster Max Reitz
2019-10-10 11:42 ` [PULL 08/36] block/backup: split shareable copying part from backup_do_cow Max Reitz
2019-10-10 11:42 ` [PULL 09/36] block/backup: improve comment about image fleecing Max Reitz
2019-10-10 11:42 ` [PULL 10/36] block/backup: introduce BlockCopyState Max Reitz
2019-10-10 11:42 ` [PULL 11/36] block/backup: fix block-comment style Max Reitz
2019-10-10 11:42 ` [PULL 12/36] block: move block_copy from block/backup.c to separate file Max Reitz
2019-10-10 11:42 ` [PULL 13/36] block: teach bdrv_debug_breakpoint skip filters with backing Max Reitz
2019-10-10 11:42 ` [PULL 14/36] iotests: prepare 124 and 257 bitmap querying for backup-top filter Max Reitz
2019-10-10 11:42 ` [PULL 15/36] iotests: 257: drop unused Drive.device field Max Reitz
2019-10-10 11:42 ` [PULL 16/36] iotests: 257: drop device_add Max Reitz
2019-10-10 11:42 ` [PULL 17/36] qapi: group BlockDeviceStats fields Max Reitz
2019-10-10 11:42 ` [PULL 18/36] qapi: add unmap to BlockDeviceStats Max Reitz
2019-10-10 11:42 ` [PULL 19/36] block: add empty account cookie type Max Reitz
2019-10-10 11:42 ` [PULL 20/36] ide: account UNMAP (TRIM) operations Max Reitz
2019-10-10 11:42 ` [PULL 21/36] scsi: store unmap offset and nb_sectors in request struct Max Reitz
2019-10-10 11:42 ` [PULL 22/36] scsi: move unmap error checking to the complete callback Max Reitz
2019-10-10 11:42 ` [PULL 23/36] scsi: account unmap operations Max Reitz
2019-10-10 11:42 ` [PULL 24/36] file-posix: account discard operations Max Reitz
2019-10-10 11:42 ` [PULL 25/36] qapi: query-blockstat: add driver specific file-posix stats Max Reitz
2019-10-10 11:42 ` [PULL 26/36] iotests: Fix 125 for growth_mode = metadata Max Reitz
2019-10-10 11:42 ` [PULL 27/36] iotests: Disable 125 on broken XFS versions Max Reitz
2019-10-10 11:42 ` [PULL 28/36] iotests: Use stat -c %b in 125 Max Reitz
2019-10-10 11:42 ` [PULL 29/36] block/backup: move in-flight requests handling from backup to block-copy Max Reitz
2019-10-10 11:42 ` [PULL 30/36] block/backup: move write_flags calculation inside backup_job_create Max Reitz
2019-10-10 11:42 ` [PULL 31/36] block/block-copy: split block_copy_set_callbacks function Max Reitz
2019-10-10 11:42 ` [PULL 32/36] block: introduce backup-top filter driver Max Reitz
2019-10-10 11:42 ` [PULL 33/36] block/backup: use backup-top instead of write notifiers Max Reitz
2019-10-17 12:04   ` Peter Maydell
2019-10-17 13:40     ` Vladimir Sementsov-Ogievskiy
2019-10-10 11:42 ` [PULL 34/36] nbd: add empty .bdrv_reopen_prepare Max Reitz
2019-10-10 11:42 ` [PULL 35/36] tests: fix I/O test for hosts defaulting to LUKSv2 Max Reitz
2019-10-10 11:43 ` [PULL 36/36] iotests/162: Fix for newer Linux 5.3+ Max Reitz
2019-10-14 12:34 ` [PULL 00/36] Block patches Peter Maydell

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20191010114300.7746-6-mreitz@redhat.com \
    --to=mreitz@redhat.com \
    --cc=kwolf@redhat.com \
    --cc=peter.maydell@linaro.org \
    --cc=qemu-block@nongnu.org \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.