All of lore.kernel.org
 help / color / mirror / Atom feed
From: Anton Nefedov <anton.nefedov@virtuozzo.com>
To: qemu-devel@nongnu.org
Cc: qemu-block@nongnu.org, den@virtuozzo.com, kwolf@redhat.com,
	mreitz@redhat.com, eblake@redhat.com,
	Anton Nefedov <anton.nefedov@virtuozzo.com>
Subject: [Qemu-devel] [PATCH v3 10/13] qcow2: skip writing zero buffers to empty COW areas
Date: Mon, 31 Jul 2017 19:22:02 +0300	[thread overview]
Message-ID: <1501518125-29851-11-git-send-email-anton.nefedov@virtuozzo.com> (raw)
In-Reply-To: <1501518125-29851-1-git-send-email-anton.nefedov@virtuozzo.com>

It can be detected that
  1. COW alignment of a write request is zeroes
  2. Respective areas on the underlying BDS already read as zeroes
     after being preallocated previously

If both of these true, COW may be skipped

Signed-off-by: Anton Nefedov <anton.nefedov@virtuozzo.com>
---
 block/qcow2.h         | 12 ++++++++++
 block/qcow2-cluster.c |  5 +++-
 block/qcow2.c         | 64 ++++++++++++++++++++++++++++++++++++++++++++-------
 block/trace-events    |  1 +
 4 files changed, 73 insertions(+), 9 deletions(-)

diff --git a/block/qcow2.h b/block/qcow2.h
index 595ed9c..db1c6f5 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -363,6 +363,12 @@ typedef struct QCowL2Meta
     bool keep_old_clusters;
 
     /**
+     * True if the area is allocated at the end of data area
+     * (i.e. >= BDRVQcow2State::data_end)
+     */
+    bool clusters_are_trailing;
+
+    /**
      * Requests that overlap with this allocation and wait to be restarted
      * when the allocating request has completed.
      */
@@ -381,6 +387,12 @@ typedef struct QCowL2Meta
     Qcow2COWRegion cow_end;
 
     /**
+     * Indicates that both COW areas are empty (nb_bytes == 0)
+     * or filled with zeroes and do not require any more copying
+     */
+    bool zero_cow;
+
+    /**
      * The I/O vector with the data from the actual guest write request.
      * If non-NULL, this is meant to be merged together with the data
      * from @cow_start and @cow_end into one single write operation.
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index f274daa..61148ea 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -735,7 +735,7 @@ static int perform_cow(BlockDriverState *bs, QCowL2Meta *m)
     assert(start->offset + start->nb_bytes <= end->offset);
     assert(!m->data_qiov || m->data_qiov->size == data_bytes);
 
-    if (start->nb_bytes == 0 && end->nb_bytes == 0) {
+    if ((start->nb_bytes == 0 && end->nb_bytes == 0) || m->zero_cow) {
         return 0;
     }
 
@@ -1203,6 +1203,7 @@ static int handle_alloc(BlockDriverState *bs, uint64_t guest_offset,
     uint64_t *host_offset, uint64_t *bytes, QCowL2Meta **m)
 {
     BDRVQcow2State *s = bs->opaque;
+    const uint64_t old_data_end = s->data_end;
     int l2_index;
     uint64_t *l2_table;
     uint64_t entry;
@@ -1324,6 +1325,7 @@ static int handle_alloc(BlockDriverState *bs, uint64_t guest_offset,
         .alloc_offset   = alloc_cluster_offset,
         .offset         = start_of_cluster(s, guest_offset),
         .nb_clusters    = nb_clusters,
+        .clusters_are_trailing = alloc_cluster_offset >= old_data_end,
 
         .keep_old_clusters  = keep_old_clusters,
 
@@ -1335,6 +1337,7 @@ static int handle_alloc(BlockDriverState *bs, uint64_t guest_offset,
             .offset     = nb_bytes,
             .nb_bytes   = avail_bytes - nb_bytes,
         },
+        .zero_cow = false,
     };
     qemu_co_queue_init(&(*m)->dependent_requests);
     QLIST_INSERT_HEAD(&s->cluster_allocs, *m, next_in_flight);
diff --git a/block/qcow2.c b/block/qcow2.c
index 5c7c2b5..38aee65 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -1918,6 +1918,11 @@ static bool merge_cow(uint64_t offset, unsigned bytes,
             continue;
         }
 
+        /* If both COW regions are zeroes already, skip this too */
+        if (m->zero_cow) {
+            continue;
+        }
+
         /* The data (middle) region must be immediately after the
          * start region */
         if (l2meta_cow_start(m) + m->cow_start.nb_bytes != offset) {
@@ -1965,26 +1970,65 @@ static bool is_zero_sectors(BlockDriverState *bs, int64_t start,
 /*
  * If the specified area is beyond EOF, allocates it + prealloc_size
  * bytes ahead.
+ *
+ * Returns
+ *   true if the space is allocated and contains zeroes
  */
-static void coroutine_fn handle_prealloc(BlockDriverState *bs,
+static bool coroutine_fn handle_prealloc(BlockDriverState *bs,
                                          const QCowL2Meta *m)
 {
     BDRVQcow2State *s = bs->opaque;
     uint64_t start = m->alloc_offset;
     uint64_t end = start + m->nb_clusters * s->cluster_size;
+    int ret;
     int64_t flen = bdrv_getlength(bs->file->bs);
 
     if (flen < 0) {
-        return;
+        return false;
     }
 
     if (end > flen) {
         /* try to alloc host space in one chunk for better locality */
-        bdrv_co_pwrite_zeroes(bs->file, flen,
-                              QEMU_ALIGN_UP(end + s->prealloc_size - flen,
-                                            s->cluster_size),
-                              BDRV_REQ_ALLOCATE);
+        ret = bdrv_co_pwrite_zeroes(bs->file, flen,
+                                    QEMU_ALIGN_UP(end + s->prealloc_size - flen,
+                                                  s->cluster_size),
+                                    BDRV_REQ_ALLOCATE);
+        if (ret < 0) {
+            return false;
+        }
     }
+
+    /* We're safe to assume that the area is zeroes if the area
+     * was allocated at the end of data (s->data_end).
+     * In this case, the only way for file length to be bigger is that
+     * the area was preallocated by this or another request.
+     */
+    return m->clusters_are_trailing;
+}
+
+static bool check_zero_cow(BlockDriverState *bs, QCowL2Meta *m)
+{
+    if (bs->encrypted) {
+        return false;
+    }
+
+    if (m->cow_start.nb_bytes != 0 &&
+        !is_zero_sectors(bs,
+                         (m->offset + m->cow_start.offset) >> BDRV_SECTOR_BITS,
+                         m->cow_start.nb_bytes >> BDRV_SECTOR_BITS))
+    {
+        return false;
+    }
+
+    if (m->cow_end.nb_bytes != 0 &&
+        !is_zero_sectors(bs,
+                         (m->offset + m->cow_end.offset) >> BDRV_SECTOR_BITS,
+                         m->cow_end.nb_bytes >> BDRV_SECTOR_BITS))
+    {
+        return false;
+    }
+
+    return true;
 }
 
 static void handle_alloc_space(BlockDriverState *bs, QCowL2Meta *l2meta)
@@ -1993,8 +2037,12 @@ static void handle_alloc_space(BlockDriverState *bs, QCowL2Meta *l2meta)
     QCowL2Meta *m;
 
     for (m = l2meta; m != NULL; m = m->next) {
-        if (s->prealloc_size) {
-            handle_prealloc(bs, m);
+        if (s->prealloc_size && handle_prealloc(bs, m)) {
+            if (check_zero_cow(bs, m)) {
+                trace_qcow2_skip_cow(qemu_coroutine_self(), m->offset,
+                                     m->nb_clusters);
+                m->zero_cow = true;
+            }
         }
     }
 }
diff --git a/block/trace-events b/block/trace-events
index b9746a7..422959f 100644
--- a/block/trace-events
+++ b/block/trace-events
@@ -62,6 +62,7 @@ qcow2_writev_done_part(void *co, int cur_bytes) "co %p cur_bytes %d"
 qcow2_writev_data(void *co, uint64_t offset) "co %p offset %" PRIx64
 qcow2_pwrite_zeroes_start_req(void *co, int64_t offset, int count) "co %p offset %" PRIx64 " count %d"
 qcow2_pwrite_zeroes(void *co, int64_t offset, int count) "co %p offset %" PRIx64 " count %d"
+qcow2_skip_cow(void* co, uint64_t offset, int nb_clusters) "co %p offset %" PRIx64 " nb_clusters %d"
 
 # block/qcow2-cluster.c
 qcow2_alloc_clusters_offset(void *co, uint64_t offset, int bytes) "co %p offset %" PRIx64 " bytes %d"
-- 
2.7.4

  parent reply	other threads:[~2017-07-31 16:22 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-07-31 16:21 [Qemu-devel] [PATCH v3 00/13] qcow2: space preallocation and COW improvements Anton Nefedov
2017-07-31 16:21 ` [Qemu-devel] [PATCH v3 01/13] block: introduce BDRV_REQ_ALLOCATE flag Anton Nefedov
2017-07-31 16:21 ` [Qemu-devel] [PATCH v3 02/13] block: treat BDRV_REQ_ALLOCATE as serialising Anton Nefedov
2017-07-31 16:21 ` [Qemu-devel] [PATCH v3 03/13] file-posix: support BDRV_REQ_ALLOCATE Anton Nefedov
2017-07-31 16:21 ` [Qemu-devel] [PATCH v3 04/13] block: support BDRV_REQ_ALLOCATE in passthrough drivers Anton Nefedov
2017-07-31 19:11   ` Eric Blake
2017-08-01 12:58     ` Anton Nefedov
2017-07-31 16:21 ` [Qemu-devel] [PATCH v3 05/13] qcow2: preallocation at image expand Anton Nefedov
2017-07-31 16:21 ` [Qemu-devel] [PATCH v3 06/13] qcow2: set inactive flag Anton Nefedov
2017-07-31 16:21 ` [Qemu-devel] [PATCH v3 07/13] qcow2: truncate preallocated space Anton Nefedov
2017-07-31 16:22 ` [Qemu-devel] [PATCH v3 08/13] qcow2: check space leak at the end of the image Anton Nefedov
2017-07-31 16:22 ` [Qemu-devel] [PATCH v3 09/13] qcow2: move is_zero_sectors() up Anton Nefedov
2017-07-31 19:13   ` Eric Blake
2017-08-01 12:59     ` Anton Nefedov
2017-07-31 16:22 ` Anton Nefedov [this message]
2017-07-31 16:22 ` [Qemu-devel] [PATCH v3 11/13] qcow2: allocate image space by-cluster Anton Nefedov
2017-07-31 16:22 ` [Qemu-devel] [PATCH v3 12/13] iotest 190: test BDRV_REQ_ALLOCATE Anton Nefedov
2017-07-31 16:22 ` [Qemu-devel] [PATCH v3 13/13] iotest 134: test cluster-misaligned encrypted write Anton Nefedov
2017-07-31 16:39 ` [Qemu-devel] [PATCH v3 00/13] qcow2: space preallocation and COW improvements no-reply

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1501518125-29851-11-git-send-email-anton.nefedov@virtuozzo.com \
    --to=anton.nefedov@virtuozzo.com \
    --cc=den@virtuozzo.com \
    --cc=eblake@redhat.com \
    --cc=kwolf@redhat.com \
    --cc=mreitz@redhat.com \
    --cc=qemu-block@nongnu.org \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.