From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
To: qemu-block@nongnu.org
Cc: qemu-devel@nongnu.org, crosa@redhat.com, ehabkost@redhat.com,
mreitz@redhat.com, kwolf@redhat.com, vsementsov@virtuozzo.com,
jsnow@redhat.com
Subject: [PATCH v3 6/6] block/qcow2: use seqcache for compressed writes
Date: Fri, 5 Mar 2021 20:35:07 +0300 [thread overview]
Message-ID: <20210305173507.393137-7-vsementsov@virtuozzo.com> (raw)
In-Reply-To: <20210305173507.393137-1-vsementsov@virtuozzo.com>
Compressed writes are unaligned to 512, which works very slow in
O_DIRECT mode. Let's use the cache.
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
---
block/coroutines.h | 3 +
block/qcow2.h | 4 ++
block/qcow2-refcount.c | 10 +++
block/qcow2.c | 158 ++++++++++++++++++++++++++++++++++++++---
4 files changed, 164 insertions(+), 11 deletions(-)
diff --git a/block/coroutines.h b/block/coroutines.h
index 4cfb4946e6..cb8e05830e 100644
--- a/block/coroutines.h
+++ b/block/coroutines.h
@@ -66,4 +66,7 @@ int coroutine_fn bdrv_co_readv_vmstate(BlockDriverState *bs,
int coroutine_fn bdrv_co_writev_vmstate(BlockDriverState *bs,
QEMUIOVector *qiov, int64_t pos);
+int coroutine_fn qcow2_co_flush_compressed_cache(BlockDriverState *bs);
+int generated_co_wrapper qcow2_flush_compressed_cache(BlockDriverState *bs);
+
#endif /* BLOCK_COROUTINES_INT_H */
diff --git a/block/qcow2.h b/block/qcow2.h
index e18d8dfbae..8b8fed0950 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -28,6 +28,7 @@
#include "crypto/block.h"
#include "qemu/coroutine.h"
#include "qemu/units.h"
+#include "qemu/seqcache.h"
#include "block/block_int.h"
//#define DEBUG_ALLOC
@@ -422,6 +423,9 @@ typedef struct BDRVQcow2State {
Qcow2CompressionType compression_type;
GHashTable *inflight_writes_counters;
+
+ SeqCache *compressed_cache;
+ int compressed_flush_ret;
} BDRVQcow2State;
typedef struct Qcow2COWRegion {
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index 464d133368..218917308e 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -29,6 +29,7 @@
#include "qemu/bswap.h"
#include "qemu/cutils.h"
#include "trace.h"
+#include "block/coroutines.h"
static int64_t alloc_clusters_noref(BlockDriverState *bs, uint64_t size,
uint64_t max);
@@ -1040,6 +1041,10 @@ static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs,
qcow2_cache_discard(s->l2_table_cache, table);
}
+ if (s->compressed_cache) {
+ seqcache_discard_cluster(s->compressed_cache, cluster_offset);
+ }
+
if (s->discard_passthrough[type]) {
update_refcount_discard(bs, cluster_offset, s->cluster_size);
}
@@ -1349,6 +1354,11 @@ int coroutine_fn qcow2_write_caches(BlockDriverState *bs)
BDRVQcow2State *s = bs->opaque;
int ret;
+ ret = qcow2_flush_compressed_cache(bs);
+ if (ret < 0) {
+ return ret;
+ }
+
ret = qcow2_cache_write(bs, s->l2_table_cache);
if (ret < 0) {
return ret;
diff --git a/block/qcow2.c b/block/qcow2.c
index 6ee94421e0..5f3713cd6f 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -42,6 +42,7 @@
#include "qapi/qapi-visit-block-core.h"
#include "crypto.h"
#include "block/aio_task.h"
+#include "block/coroutines.h"
/*
Differences with QCOW:
@@ -1834,6 +1835,10 @@ static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options,
s->inflight_writes_counters =
g_hash_table_new_full(g_int64_hash, g_int64_equal, g_free, g_free);
+ if (!has_data_file(bs) && (bs->file->bs->open_flags & BDRV_O_NOCACHE)) {
+ s->compressed_cache = seqcache_new(s->cluster_size);
+ }
+
return ret;
fail:
@@ -2653,6 +2658,91 @@ fail_nometa:
return ret;
}
+/*
+ * Flush one cluster of compressed cache if exists. If @unfinished is true and
+ * there is no finished cluster to flush, flush the unfinished one. Otherwise
+ * flush only finished clusters.
+ *
+ * Return 0 if nothing to flush, 1 if one cluster successfully flushed and <0 on
+ * error.
+ */
+static int coroutine_fn
+qcow2_co_compressed_flush_one(BlockDriverState *bs, bool unfinished)
+{
+ BDRVQcow2State *s = bs->opaque;
+ int ret;
+ int64_t align = 1;
+ int64_t offset, bytes;
+ uint8_t *buf;
+
+ if (!s->compressed_cache) {
+ return 0;
+ }
+
+ if (!seqcache_get_next_flush(s->compressed_cache, &offset, &bytes, &buf,
+ &unfinished))
+ {
+ return 0;
+ }
+
+ qcow2_inflight_writes_inc(bs, offset, bytes);
+
+ /*
+ * Don't try to align-up unfinished cached cluster, parallel write to it is
+ * possible! For finished host clusters data in the tail of the cluster will
+ * be never used. So, take some good alignment for speed.
+ *
+ * Note also, that seqcache guarantees that allocated size of @buf is enough
+ * to fill the cluster up to the end, so we are safe to align up with
+ * align <= cluster_size.
+ */
+ if (!unfinished) {
+ align = MIN(s->cluster_size,
+ MAX(s->data_file->bs->bl.request_alignment,
+ bdrv_opt_mem_align(bs)));
+ }
+
+ BLKDBG_EVENT(s->data_file, BLKDBG_WRITE_COMPRESSED);
+ ret = bdrv_co_pwrite(s->data_file, offset,
+ QEMU_ALIGN_UP(offset + bytes, align) - offset,
+ buf, 0);
+ if (ret < 0 && s->compressed_flush_ret == 0) {
+ /*
+ * The data that was "written" earlier is lost. We don't want to
+ * care with storing it somehow to retry flushing later. Also, how much
+ * data will we have to store, if not drop it after failed flush?
+ * After this point qcow2_co_flush_compressed_cache() (and
+ * qcow2_flush_compressed_cache()) never return success.
+ */
+ s->compressed_flush_ret = ret;
+ }
+
+ seqcache_discard_cluster(s->compressed_cache, offset);
+
+ qcow2_inflight_writes_dec(bs, offset, bytes);
+
+ return ret < 0 ? ret : 1;
+}
+
+int coroutine_fn qcow2_co_flush_compressed_cache(BlockDriverState *bs)
+{
+ BDRVQcow2State *s = bs->opaque;
+
+ if (s->compressed_cache) {
+ uint64_t nb_clusters = seqcache_nb_clusters(s->compressed_cache);
+
+ /*
+ * If parallel writes are possible we don't want to loop forever. So
+ * flush only clusters available at start of flush operation.
+ */
+ while (nb_clusters--) {
+ qcow2_co_compressed_flush_one(bs, true);
+ }
+ }
+
+ return s->compressed_flush_ret;
+}
+
static int qcow2_inactivate(BlockDriverState *bs)
{
BDRVQcow2State *s = bs->opaque;
@@ -2667,6 +2757,13 @@ static int qcow2_inactivate(BlockDriverState *bs)
bdrv_get_device_or_node_name(bs));
}
+ ret = qcow2_flush_compressed_cache(bs);
+ if (ret) {
+ result = ret;
+ error_report("Failed to flush the compressed write cache: %s",
+ strerror(-ret));
+ }
+
ret = qcow2_cache_flush(bs, s->l2_table_cache);
if (ret) {
result = ret;
@@ -2699,6 +2796,12 @@ static void qcow2_close(BlockDriverState *bs)
qcow2_inactivate(bs);
}
+ /*
+ * Cache should be flushed in qcow2_inactivate() and should be empty in
+ * inactive mode. So we are safe to free it.
+ */
+ seqcache_free(s->compressed_cache);
+
cache_clean_timer_del(bs);
qcow2_cache_destroy(s->l2_table_cache);
qcow2_cache_destroy(s->refcount_block_cache);
@@ -4558,18 +4661,42 @@ qcow2_co_pwritev_compressed_task(BlockDriverState *bs,
goto fail;
}
- qcow2_inflight_writes_inc(bs, cluster_offset, out_len);
+ if (s->compressed_cache) {
+ /*
+ * It's important to do seqcache_write() in the same critical section
+ * (by s->lock) as qcow2_alloc_compressed_cluster_offset(), so that the
+ * cache is filled sequentially.
+ */
+ seqcache_write(s->compressed_cache, cluster_offset, out_len, out_buf);
- qemu_co_mutex_unlock(&s->lock);
+ qemu_co_mutex_unlock(&s->lock);
- BLKDBG_EVENT(s->data_file, BLKDBG_WRITE_COMPRESSED);
- ret = bdrv_co_pwrite(s->data_file, cluster_offset, out_len, out_buf, 0);
+ ret = qcow2_co_compressed_flush_one(bs, false);
+ if (ret < 0) {
+ /*
+ * if ret < 0 it probably not this request which failed, but some
+ * previous one that was cached. Moreover, when we write to the
+ * cache we probably may always report success, because
+ * seqcache_write() never fails. Still, it's better to inform
+ * compressed backup now then wait until final flush.
+ */
+ goto fail;
+ }
+ } else {
+ qcow2_inflight_writes_inc(bs, cluster_offset, out_len);
- qcow2_inflight_writes_dec(bs, cluster_offset, out_len);
+ qemu_co_mutex_unlock(&s->lock);
- if (ret < 0) {
- goto fail;
+ BLKDBG_EVENT(s->data_file, BLKDBG_WRITE_COMPRESSED);
+ ret = bdrv_co_pwrite(s->data_file, cluster_offset, out_len, out_buf, 0);
+
+ qcow2_inflight_writes_dec(bs, cluster_offset, out_len);
+
+ if (ret < 0) {
+ goto fail;
+ }
}
+
success:
ret = 0;
fail:
@@ -4681,10 +4808,19 @@ qcow2_co_preadv_compressed(BlockDriverState *bs,
out_buf = qemu_blockalign(bs, s->cluster_size);
- BLKDBG_EVENT(bs->file, BLKDBG_READ_COMPRESSED);
- ret = bdrv_co_pread(bs->file, coffset, csize, buf, 0);
- if (ret < 0) {
- goto fail;
+ /*
+ * seqcache_read may return less bytes than csize, as csize may exceed
+ * actual compressed data size. So we are OK if seqcache_read returns
+ * something > 0.
+ */
+ if (!s->compressed_cache ||
+ seqcache_read(s->compressed_cache, coffset, csize, buf) <= 0)
+ {
+ BLKDBG_EVENT(bs->file, BLKDBG_READ_COMPRESSED);
+ ret = bdrv_co_pread(bs->file, coffset, csize, buf, 0);
+ if (ret < 0) {
+ goto fail;
+ }
}
if (qcow2_co_decompress(bs, out_buf, s->cluster_size, buf, csize) < 0) {
--
2.29.2
next prev parent reply other threads:[~2021-03-05 18:35 UTC|newest]
Thread overview: 35+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-03-05 17:35 [PATCH v3 0/6] qcow2: compressed write cache Vladimir Sementsov-Ogievskiy
2021-03-05 17:35 ` [PATCH v3 1/6] block-jobs: flush target at the end of .run() Vladimir Sementsov-Ogievskiy
2021-03-11 16:57 ` Max Reitz
2021-03-05 17:35 ` [PATCH v3 2/6] iotests: add qcow2-discard-during-rewrite Vladimir Sementsov-Ogievskiy
2021-03-05 17:35 ` [PATCH v3 3/6] block/qcow2: introduce inflight writes counters: fix discard Vladimir Sementsov-Ogievskiy
2021-03-11 19:58 ` Max Reitz
2021-03-12 9:09 ` Vladimir Sementsov-Ogievskiy
2021-03-12 11:17 ` Max Reitz
2021-03-12 12:32 ` Vladimir Sementsov-Ogievskiy
2021-03-12 12:42 ` Vladimir Sementsov-Ogievskiy
2021-03-12 15:01 ` Max Reitz
2021-03-12 12:46 ` Vladimir Sementsov-Ogievskiy
2021-03-12 15:10 ` Max Reitz
2021-03-12 15:24 ` Vladimir Sementsov-Ogievskiy
2021-03-12 15:52 ` Max Reitz
2021-03-12 16:03 ` Vladimir Sementsov-Ogievskiy
2021-03-12 14:58 ` Max Reitz
2021-03-12 15:39 ` Vladimir Sementsov-Ogievskiy
2021-03-05 17:35 ` [PATCH v3 4/6] util: implement seqcache Vladimir Sementsov-Ogievskiy
2021-03-12 13:41 ` Max Reitz
2021-03-12 14:37 ` Vladimir Sementsov-Ogievskiy
2021-03-12 15:13 ` Max Reitz
2021-06-04 14:31 ` Vladimir Sementsov-Ogievskiy
2021-03-05 17:35 ` [PATCH v3 5/6] block-coroutine-wrapper: allow non bdrv_ prefix Vladimir Sementsov-Ogievskiy
2021-03-12 16:53 ` Max Reitz
2021-03-05 17:35 ` Vladimir Sementsov-Ogievskiy [this message]
2021-03-12 18:15 ` [PATCH v3 6/6] block/qcow2: use seqcache for compressed writes Max Reitz
2021-03-12 18:43 ` Vladimir Sementsov-Ogievskiy
2021-03-15 9:58 ` Max Reitz
2021-03-15 14:40 ` Vladimir Sementsov-Ogievskiy
2021-03-16 12:25 ` Max Reitz
2021-03-16 17:48 ` Vladimir Sementsov-Ogievskiy
2021-03-17 8:09 ` Max Reitz
2021-03-12 18:45 ` Vladimir Sementsov-Ogievskiy
2021-03-29 20:18 ` Vladimir Sementsov-Ogievskiy
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20210305173507.393137-7-vsementsov@virtuozzo.com \
--to=vsementsov@virtuozzo.com \
--cc=crosa@redhat.com \
--cc=ehabkost@redhat.com \
--cc=jsnow@redhat.com \
--cc=kwolf@redhat.com \
--cc=mreitz@redhat.com \
--cc=qemu-block@nongnu.org \
--cc=qemu-devel@nongnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).