QEMU-Devel Archive on lore.kernel.org
 help / color / Atom feed
From: Max Reitz <mreitz@redhat.com>
To: qemu-block@nongnu.org
Cc: Kevin Wolf <kwolf@redhat.com>,
	Peter Maydell <peter.maydell@linaro.org>,
	qemu-devel@nongnu.org, Max Reitz <mreitz@redhat.com>
Subject: [PULL 05/36] block/qcow2: introduce parallel subrequest handling in read and write
Date: Thu, 10 Oct 2019 13:42:29 +0200
Message-ID: <20191010114300.7746-6-mreitz@redhat.com> (raw)
In-Reply-To: <20191010114300.7746-1-mreitz@redhat.com>

From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>

It improves performance for fragmented qcow2 images.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Message-id: 20190916175324.18478-6-vsementsov@virtuozzo.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
---
 block/qcow2.h      |   3 ++
 block/qcow2.c      | 125 ++++++++++++++++++++++++++++++++++++++++-----
 block/trace-events |   1 +
 3 files changed, 117 insertions(+), 12 deletions(-)

diff --git a/block/qcow2.h b/block/qcow2.h
index a488d761ff..f51f478e34 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -65,6 +65,9 @@
 #define QCOW2_MAX_BITMAPS 65535
 #define QCOW2_MAX_BITMAP_DIRECTORY_SIZE (1024 * QCOW2_MAX_BITMAPS)
 
+/* Maximum of parallel sub-request per guest request */
+#define QCOW2_MAX_WORKERS 8
+
 /* indicate that the refcount of the referenced cluster is exactly one. */
 #define QCOW_OFLAG_COPIED     (1ULL << 63)
 /* indicate that the cluster is compressed (they never have the copied flag) */
diff --git a/block/qcow2.c b/block/qcow2.c
index 164b22e4a2..7961c05783 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -41,6 +41,7 @@
 #include "qapi/qobject-input-visitor.h"
 #include "qapi/qapi-visit-block-core.h"
 #include "crypto.h"
+#include "block/aio_task.h"
 
 /*
   Differences with QCOW:
@@ -2025,6 +2026,60 @@ fail:
     return ret;
 }
 
+typedef struct Qcow2AioTask {
+    AioTask task;
+
+    BlockDriverState *bs;
+    QCow2ClusterType cluster_type; /* only for read */
+    uint64_t file_cluster_offset;
+    uint64_t offset;
+    uint64_t bytes;
+    QEMUIOVector *qiov;
+    uint64_t qiov_offset;
+    QCowL2Meta *l2meta; /* only for write */
+} Qcow2AioTask;
+
+static coroutine_fn int qcow2_co_preadv_task_entry(AioTask *task);
+static coroutine_fn int qcow2_add_task(BlockDriverState *bs,
+                                       AioTaskPool *pool,
+                                       AioTaskFunc func,
+                                       QCow2ClusterType cluster_type,
+                                       uint64_t file_cluster_offset,
+                                       uint64_t offset,
+                                       uint64_t bytes,
+                                       QEMUIOVector *qiov,
+                                       size_t qiov_offset,
+                                       QCowL2Meta *l2meta)
+{
+    Qcow2AioTask local_task;
+    Qcow2AioTask *task = pool ? g_new(Qcow2AioTask, 1) : &local_task;
+
+    *task = (Qcow2AioTask) {
+        .task.func = func,
+        .bs = bs,
+        .cluster_type = cluster_type,
+        .qiov = qiov,
+        .file_cluster_offset = file_cluster_offset,
+        .offset = offset,
+        .bytes = bytes,
+        .qiov_offset = qiov_offset,
+        .l2meta = l2meta,
+    };
+
+    trace_qcow2_add_task(qemu_coroutine_self(), bs, pool,
+                         func == qcow2_co_preadv_task_entry ? "read" : "write",
+                         cluster_type, file_cluster_offset, offset, bytes,
+                         qiov, qiov_offset);
+
+    if (!pool) {
+        return func(&task->task);
+    }
+
+    aio_task_pool_start_task(pool, &task->task);
+
+    return 0;
+}
+
 static coroutine_fn int qcow2_co_preadv_task(BlockDriverState *bs,
                                              QCow2ClusterType cluster_type,
                                              uint64_t file_cluster_offset,
@@ -2074,18 +2129,28 @@ static coroutine_fn int qcow2_co_preadv_task(BlockDriverState *bs,
     g_assert_not_reached();
 }
 
+static coroutine_fn int qcow2_co_preadv_task_entry(AioTask *task)
+{
+    Qcow2AioTask *t = container_of(task, Qcow2AioTask, task);
+
+    assert(!t->l2meta);
+
+    return qcow2_co_preadv_task(t->bs, t->cluster_type, t->file_cluster_offset,
+                                t->offset, t->bytes, t->qiov, t->qiov_offset);
+}
+
 static coroutine_fn int qcow2_co_preadv_part(BlockDriverState *bs,
                                              uint64_t offset, uint64_t bytes,
                                              QEMUIOVector *qiov,
                                              size_t qiov_offset, int flags)
 {
     BDRVQcow2State *s = bs->opaque;
-    int ret;
+    int ret = 0;
     unsigned int cur_bytes; /* number of bytes in current iteration */
     uint64_t cluster_offset = 0;
+    AioTaskPool *aio = NULL;
 
-    while (bytes != 0) {
-
+    while (bytes != 0 && aio_task_pool_status(aio) == 0) {
         /* prepare next request */
         cur_bytes = MIN(bytes, INT_MAX);
         if (s->crypto) {
@@ -2097,7 +2162,7 @@ static coroutine_fn int qcow2_co_preadv_part(BlockDriverState *bs,
         ret = qcow2_get_cluster_offset(bs, offset, &cur_bytes, &cluster_offset);
         qemu_co_mutex_unlock(&s->lock);
         if (ret < 0) {
-            return ret;
+            goto out;
         }
 
         if (ret == QCOW2_CLUSTER_ZERO_PLAIN ||
@@ -2106,11 +2171,14 @@ static coroutine_fn int qcow2_co_preadv_part(BlockDriverState *bs,
         {
             qemu_iovec_memset(qiov, qiov_offset, 0, cur_bytes);
         } else {
-            ret = qcow2_co_preadv_task(bs, ret,
-                                       cluster_offset, offset, cur_bytes,
-                                       qiov, qiov_offset);
+            if (!aio && cur_bytes != bytes) {
+                aio = aio_task_pool_new(QCOW2_MAX_WORKERS);
+            }
+            ret = qcow2_add_task(bs, aio, qcow2_co_preadv_task_entry, ret,
+                                 cluster_offset, offset, cur_bytes,
+                                 qiov, qiov_offset, NULL);
             if (ret < 0) {
-                return ret;
+                goto out;
             }
         }
 
@@ -2119,7 +2187,16 @@ static coroutine_fn int qcow2_co_preadv_part(BlockDriverState *bs,
         qiov_offset += cur_bytes;
     }
 
-    return 0;
+out:
+    if (aio) {
+        aio_task_pool_wait_all(aio);
+        if (ret == 0) {
+            ret = aio_task_pool_status(aio);
+        }
+        g_free(aio);
+    }
+
+    return ret;
 }
 
 /* Check if it's possible to merge a write request with the writing of
@@ -2324,6 +2401,17 @@ out_locked:
     return ret;
 }
 
+static coroutine_fn int qcow2_co_pwritev_task_entry(AioTask *task)
+{
+    Qcow2AioTask *t = container_of(task, Qcow2AioTask, task);
+
+    assert(!t->cluster_type);
+
+    return qcow2_co_pwritev_task(t->bs, t->file_cluster_offset,
+                                 t->offset, t->bytes, t->qiov, t->qiov_offset,
+                                 t->l2meta);
+}
+
 static coroutine_fn int qcow2_co_pwritev_part(
         BlockDriverState *bs, uint64_t offset, uint64_t bytes,
         QEMUIOVector *qiov, size_t qiov_offset, int flags)
@@ -2334,10 +2422,11 @@ static coroutine_fn int qcow2_co_pwritev_part(
     unsigned int cur_bytes; /* number of sectors in current iteration */
     uint64_t cluster_offset;
     QCowL2Meta *l2meta = NULL;
+    AioTaskPool *aio = NULL;
 
     trace_qcow2_writev_start_req(qemu_coroutine_self(), offset, bytes);
 
-    while (bytes != 0) {
+    while (bytes != 0 && aio_task_pool_status(aio) == 0) {
 
         l2meta = NULL;
 
@@ -2369,8 +2458,12 @@ static coroutine_fn int qcow2_co_pwritev_part(
 
         qemu_co_mutex_unlock(&s->lock);
 
-        ret = qcow2_co_pwritev_task(bs, cluster_offset, offset, cur_bytes,
-                                    qiov, qiov_offset, l2meta);
+        if (!aio && cur_bytes != bytes) {
+            aio = aio_task_pool_new(QCOW2_MAX_WORKERS);
+        }
+        ret = qcow2_add_task(bs, aio, qcow2_co_pwritev_task_entry, 0,
+                             cluster_offset, offset, cur_bytes,
+                             qiov, qiov_offset, l2meta);
         l2meta = NULL; /* l2meta is consumed by qcow2_co_pwritev_task() */
         if (ret < 0) {
             goto fail_nometa;
@@ -2391,6 +2484,14 @@ out_locked:
     qemu_co_mutex_unlock(&s->lock);
 
 fail_nometa:
+    if (aio) {
+        aio_task_pool_wait_all(aio);
+        if (ret == 0) {
+            ret = aio_task_pool_status(aio);
+        }
+        g_free(aio);
+    }
+
     trace_qcow2_writev_done_req(qemu_coroutine_self(), ret);
 
     return ret;
diff --git a/block/trace-events b/block/trace-events
index 04209f058d..3aa27e6b1e 100644
--- a/block/trace-events
+++ b/block/trace-events
@@ -62,6 +62,7 @@ file_paio_submit(void *acb, void *opaque, int64_t offset, int count, int type) "
 file_copy_file_range(void *bs, int src, int64_t src_off, int dst, int64_t dst_off, int64_t bytes, int flags, int64_t ret) "bs %p src_fd %d offset %"PRIu64" dst_fd %d offset %"PRIu64" bytes %"PRIu64" flags %d ret %"PRId64
 
 # qcow2.c
+qcow2_add_task(void *co, void *bs, void *pool, const char *action, int cluster_type, uint64_t file_cluster_offset, uint64_t offset, uint64_t bytes, void *qiov, size_t qiov_offset) "co %p bs %p pool %p: %s: cluster_type %d file_cluster_offset %" PRIu64 " offset %" PRIu64 " bytes %" PRIu64 " qiov %p qiov_offset %zu"
 qcow2_writev_start_req(void *co, int64_t offset, int bytes) "co %p offset 0x%" PRIx64 " bytes %d"
 qcow2_writev_done_req(void *co, int ret) "co %p ret %d"
 qcow2_writev_start_part(void *co) "co %p"
-- 
2.21.0



  parent reply index

Thread overview: 40+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-10-10 11:42 [PULL 00/36] Block patches Max Reitz
2019-10-10 11:42 ` [PULL 01/36] qemu-iotests: ignore leaks on failure paths in 026 Max Reitz
2019-10-10 11:42 ` [PULL 02/36] block: introduce aio task pool Max Reitz
2019-10-10 11:42 ` [PULL 03/36] block/qcow2: refactor qcow2_co_preadv_part Max Reitz
2019-10-10 11:42 ` [PULL 04/36] block/qcow2: refactor qcow2_co_pwritev_part Max Reitz
2019-10-10 11:42 ` Max Reitz [this message]
2019-10-10 11:42 ` [PULL 06/36] block/backup: fix max_transfer handling for copy_range Max Reitz
2019-10-10 11:42 ` [PULL 07/36] block/backup: fix backup_cow_with_offload for last cluster Max Reitz
2019-10-10 11:42 ` [PULL 08/36] block/backup: split shareable copying part from backup_do_cow Max Reitz
2019-10-10 11:42 ` [PULL 09/36] block/backup: improve comment about image fleecing Max Reitz
2019-10-10 11:42 ` [PULL 10/36] block/backup: introduce BlockCopyState Max Reitz
2019-10-10 11:42 ` [PULL 11/36] block/backup: fix block-comment style Max Reitz
2019-10-10 11:42 ` [PULL 12/36] block: move block_copy from block/backup.c to separate file Max Reitz
2019-10-10 11:42 ` [PULL 13/36] block: teach bdrv_debug_breakpoint skip filters with backing Max Reitz
2019-10-10 11:42 ` [PULL 14/36] iotests: prepare 124 and 257 bitmap querying for backup-top filter Max Reitz
2019-10-10 11:42 ` [PULL 15/36] iotests: 257: drop unused Drive.device field Max Reitz
2019-10-10 11:42 ` [PULL 16/36] iotests: 257: drop device_add Max Reitz
2019-10-10 11:42 ` [PULL 17/36] qapi: group BlockDeviceStats fields Max Reitz
2019-10-10 11:42 ` [PULL 18/36] qapi: add unmap to BlockDeviceStats Max Reitz
2019-10-10 11:42 ` [PULL 19/36] block: add empty account cookie type Max Reitz
2019-10-10 11:42 ` [PULL 20/36] ide: account UNMAP (TRIM) operations Max Reitz
2019-10-10 11:42 ` [PULL 21/36] scsi: store unmap offset and nb_sectors in request struct Max Reitz
2019-10-10 11:42 ` [PULL 22/36] scsi: move unmap error checking to the complete callback Max Reitz
2019-10-10 11:42 ` [PULL 23/36] scsi: account unmap operations Max Reitz
2019-10-10 11:42 ` [PULL 24/36] file-posix: account discard operations Max Reitz
2019-10-10 11:42 ` [PULL 25/36] qapi: query-blockstat: add driver specific file-posix stats Max Reitz
2019-10-10 11:42 ` [PULL 26/36] iotests: Fix 125 for growth_mode = metadata Max Reitz
2019-10-10 11:42 ` [PULL 27/36] iotests: Disable 125 on broken XFS versions Max Reitz
2019-10-10 11:42 ` [PULL 28/36] iotests: Use stat -c %b in 125 Max Reitz
2019-10-10 11:42 ` [PULL 29/36] block/backup: move in-flight requests handling from backup to block-copy Max Reitz
2019-10-10 11:42 ` [PULL 30/36] block/backup: move write_flags calculation inside backup_job_create Max Reitz
2019-10-10 11:42 ` [PULL 31/36] block/block-copy: split block_copy_set_callbacks function Max Reitz
2019-10-10 11:42 ` [PULL 32/36] block: introduce backup-top filter driver Max Reitz
2019-10-10 11:42 ` [PULL 33/36] block/backup: use backup-top instead of write notifiers Max Reitz
2019-10-17 12:04   ` Peter Maydell
2019-10-17 13:40     ` Vladimir Sementsov-Ogievskiy
2019-10-10 11:42 ` [PULL 34/36] nbd: add empty .bdrv_reopen_prepare Max Reitz
2019-10-10 11:42 ` [PULL 35/36] tests: fix I/O test for hosts defaulting to LUKSv2 Max Reitz
2019-10-10 11:43 ` [PULL 36/36] iotests/162: Fix for newer Linux 5.3+ Max Reitz
2019-10-14 12:34 ` [PULL 00/36] Block patches Peter Maydell

Reply instructions:

You may reply publically to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20191010114300.7746-6-mreitz@redhat.com \
    --to=mreitz@redhat.com \
    --cc=kwolf@redhat.com \
    --cc=peter.maydell@linaro.org \
    --cc=qemu-block@nongnu.org \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

QEMU-Devel Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/qemu-devel/0 qemu-devel/git/0.git
	git clone --mirror https://lore.kernel.org/qemu-devel/1 qemu-devel/git/1.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 qemu-devel qemu-devel/ https://lore.kernel.org/qemu-devel \
		qemu-devel@nongnu.org
	public-inbox-index qemu-devel

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.nongnu.qemu-devel


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git