All of lore.kernel.org
 help / color / mirror / Atom feed
From: Kevin Wolf <kwolf@redhat.com>
To: qemu-block@nongnu.org
Cc: kwolf@redhat.com, qemu-devel@nongnu.org
Subject: [Qemu-devel] [PULL 02/39] qcow2: Implement .bdrv_co_preadv()
Date: Thu, 16 Jun 2016 16:07:51 +0200	[thread overview]
Message-ID: <1466086108-24868-3-git-send-email-kwolf@redhat.com> (raw)
In-Reply-To: <1466086108-24868-1-git-send-email-kwolf@redhat.com>

Reading from qcow2 images is now byte granularity.

Most of the affected code in qcow2 actually gets simpler with this
change. The only exception is encryption, which is fixed on 512 bytes
blocks; in order to keep this working, bs->request_alignment is set for
encrypted images.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
---
 block/qcow2-cluster.c |  27 ++++++-------
 block/qcow2.c         | 108 +++++++++++++++++++++++++++-----------------------
 block/qcow2.h         |   2 +-
 3 files changed, 72 insertions(+), 65 deletions(-)

diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index a59fb34..277764f 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -424,7 +424,8 @@ static int coroutine_fn copy_sectors(BlockDriverState *bs,
      * interface.  This avoids double I/O throttling and request tracking,
      * which can lead to deadlock when block layer copy-on-read is enabled.
      */
-    ret = bs->drv->bdrv_co_readv(bs, start_sect + n_start, n, &qiov);
+    ret = bs->drv->bdrv_co_preadv(bs, (start_sect + n_start) * BDRV_SECTOR_SIZE,
+                                  n * BDRV_SECTOR_SIZE, &qiov, 0);
     if (ret < 0) {
         goto out;
     }
@@ -464,19 +465,21 @@ out:
 /*
  * get_cluster_offset
  *
- * For a given offset of the disk image, find the cluster offset in
- * qcow2 file. The offset is stored in *cluster_offset.
+ * For a given offset of the virtual disk, find the cluster type and offset in
+ * the qcow2 file. The offset is stored in *cluster_offset.
  *
- * on entry, *num is the number of contiguous sectors we'd like to
- * access following offset.
+ * On entry, *bytes is the maximum number of contiguous bytes starting at
+ * offset that we are interested in.
  *
- * on exit, *num is the number of contiguous sectors we can read.
+ * On exit, *bytes is the number of bytes starting at offset that have the same
+ * cluster type and (if applicable) are stored contiguously in the image file.
+ * Compressed clusters are always returned one by one.
  *
  * Returns the cluster type (QCOW2_CLUSTER_*) on success, -errno in error
  * cases.
  */
 int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
-    int *num, uint64_t *cluster_offset)
+                             unsigned int *bytes, uint64_t *cluster_offset)
 {
     BDRVQcow2State *s = bs->opaque;
     unsigned int l2_index;
@@ -485,13 +488,9 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
     unsigned int offset_in_cluster, nb_clusters;
     uint64_t bytes_available, bytes_needed;
     int ret;
-    unsigned int bytes;
-
-    assert(*num <= BDRV_REQUEST_MAX_SECTORS);
-    bytes = *num * BDRV_SECTOR_SIZE;
 
     offset_in_cluster = offset_into_cluster(s, offset);
-    bytes_needed = (uint64_t) bytes + offset_in_cluster;
+    bytes_needed = (uint64_t) *bytes + offset_in_cluster;
 
     l1_bits = s->l2_bits + s->cluster_bits;
 
@@ -595,9 +594,7 @@ out:
         bytes_available = bytes_needed;
     }
 
-    bytes = bytes_available - offset_in_cluster;
-    assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
-    *num = bytes >> BDRV_SECTOR_BITS;
+    *bytes = bytes_available - offset_in_cluster;
 
     return ret;
 
diff --git a/block/qcow2.c b/block/qcow2.c
index 6f5fb81..545dea0 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -975,6 +975,9 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
         }
 
         bs->encrypted = 1;
+
+        /* Encryption works on a sector granularity */
+        bs->request_alignment = BDRV_SECTOR_SIZE;
     }
 
     s->l2_bits = s->cluster_bits - 3; /* L2 is always one cluster */
@@ -1331,16 +1334,20 @@ static int64_t coroutine_fn qcow2_co_get_block_status(BlockDriverState *bs,
     BDRVQcow2State *s = bs->opaque;
     uint64_t cluster_offset;
     int index_in_cluster, ret;
+    unsigned int bytes;
     int64_t status = 0;
 
-    *pnum = nb_sectors;
+    bytes = MIN(INT_MAX, nb_sectors * BDRV_SECTOR_SIZE);
     qemu_co_mutex_lock(&s->lock);
-    ret = qcow2_get_cluster_offset(bs, sector_num << 9, pnum, &cluster_offset);
+    ret = qcow2_get_cluster_offset(bs, sector_num << 9, &bytes,
+                                   &cluster_offset);
     qemu_co_mutex_unlock(&s->lock);
     if (ret < 0) {
         return ret;
     }
 
+    *pnum = bytes >> BDRV_SECTOR_BITS;
+
     if (cluster_offset != 0 && ret != QCOW2_CLUSTER_COMPRESSED &&
         !s->cipher) {
         index_in_cluster = sector_num & (s->cluster_sectors - 1);
@@ -1358,28 +1365,34 @@ static int64_t coroutine_fn qcow2_co_get_block_status(BlockDriverState *bs,
 
 /* handle reading after the end of the backing file */
 int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov,
-                  int64_t sector_num, int nb_sectors)
+                        int64_t offset, int bytes)
 {
+    uint64_t bs_size = bs->total_sectors * BDRV_SECTOR_SIZE;
     int n1;
-    if ((sector_num + nb_sectors) <= bs->total_sectors)
-        return nb_sectors;
-    if (sector_num >= bs->total_sectors)
+
+    if ((offset + bytes) <= bs_size) {
+        return bytes;
+    }
+
+    if (offset >= bs_size) {
         n1 = 0;
-    else
-        n1 = bs->total_sectors - sector_num;
+    } else {
+        n1 = bs_size - offset;
+    }
 
-    qemu_iovec_memset(qiov, 512 * n1, 0, 512 * (nb_sectors - n1));
+    qemu_iovec_memset(qiov, n1, 0, bytes - n1);
 
     return n1;
 }
 
-static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
-                          int remaining_sectors, QEMUIOVector *qiov)
+static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset,
+                                        uint64_t bytes, QEMUIOVector *qiov,
+                                        int flags)
 {
     BDRVQcow2State *s = bs->opaque;
-    int index_in_cluster, n1;
+    int offset_in_cluster, n1;
     int ret;
-    int cur_nr_sectors; /* number of sectors in current iteration */
+    unsigned int cur_bytes; /* number of bytes in current iteration */
     uint64_t cluster_offset = 0;
     uint64_t bytes_done = 0;
     QEMUIOVector hd_qiov;
@@ -1389,26 +1402,24 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
 
     qemu_co_mutex_lock(&s->lock);
 
-    while (remaining_sectors != 0) {
+    while (bytes != 0) {
 
         /* prepare next request */
-        cur_nr_sectors = remaining_sectors;
+        cur_bytes = MIN(bytes, INT_MAX);
         if (s->cipher) {
-            cur_nr_sectors = MIN(cur_nr_sectors,
-                QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors);
+            cur_bytes = MIN(cur_bytes,
+                            QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
         }
 
-        ret = qcow2_get_cluster_offset(bs, sector_num << 9,
-            &cur_nr_sectors, &cluster_offset);
+        ret = qcow2_get_cluster_offset(bs, offset, &cur_bytes, &cluster_offset);
         if (ret < 0) {
             goto fail;
         }
 
-        index_in_cluster = sector_num & (s->cluster_sectors - 1);
+        offset_in_cluster = offset_into_cluster(s, offset);
 
         qemu_iovec_reset(&hd_qiov);
-        qemu_iovec_concat(&hd_qiov, qiov, bytes_done,
-            cur_nr_sectors * 512);
+        qemu_iovec_concat(&hd_qiov, qiov, bytes_done, cur_bytes);
 
         switch (ret) {
         case QCOW2_CLUSTER_UNALLOCATED:
@@ -1416,18 +1427,17 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
             if (bs->backing) {
                 /* read from the base image */
                 n1 = qcow2_backing_read1(bs->backing->bs, &hd_qiov,
-                    sector_num, cur_nr_sectors);
+                                         offset, cur_bytes);
                 if (n1 > 0) {
                     QEMUIOVector local_qiov;
 
                     qemu_iovec_init(&local_qiov, hd_qiov.niov);
-                    qemu_iovec_concat(&local_qiov, &hd_qiov, 0,
-                                      n1 * BDRV_SECTOR_SIZE);
+                    qemu_iovec_concat(&local_qiov, &hd_qiov, 0, n1);
 
                     BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO);
                     qemu_co_mutex_unlock(&s->lock);
-                    ret = bdrv_co_readv(bs->backing->bs, sector_num,
-                                        n1, &local_qiov);
+                    ret = bdrv_co_preadv(bs->backing->bs, offset, n1,
+                                         &local_qiov, 0);
                     qemu_co_mutex_lock(&s->lock);
 
                     qemu_iovec_destroy(&local_qiov);
@@ -1438,12 +1448,12 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
                 }
             } else {
                 /* Note: in this case, no need to wait */
-                qemu_iovec_memset(&hd_qiov, 0, 0, 512 * cur_nr_sectors);
+                qemu_iovec_memset(&hd_qiov, 0, 0, cur_bytes);
             }
             break;
 
         case QCOW2_CLUSTER_ZERO:
-            qemu_iovec_memset(&hd_qiov, 0, 0, 512 * cur_nr_sectors);
+            qemu_iovec_memset(&hd_qiov, 0, 0, cur_bytes);
             break;
 
         case QCOW2_CLUSTER_COMPRESSED:
@@ -1454,8 +1464,8 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
             }
 
             qemu_iovec_from_buf(&hd_qiov, 0,
-                s->cluster_cache + index_in_cluster * 512,
-                512 * cur_nr_sectors);
+                                s->cluster_cache + offset_in_cluster,
+                                cur_bytes);
             break;
 
         case QCOW2_CLUSTER_NORMAL:
@@ -1482,34 +1492,34 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
                     }
                 }
 
-                assert(cur_nr_sectors <=
-                    QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors);
+                assert(cur_bytes <= QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
                 qemu_iovec_reset(&hd_qiov);
-                qemu_iovec_add(&hd_qiov, cluster_data,
-                    512 * cur_nr_sectors);
+                qemu_iovec_add(&hd_qiov, cluster_data, cur_bytes);
             }
 
             BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
             qemu_co_mutex_unlock(&s->lock);
-            ret = bdrv_co_readv(bs->file->bs,
-                                (cluster_offset >> 9) + index_in_cluster,
-                                cur_nr_sectors, &hd_qiov);
+            ret = bdrv_co_preadv(bs->file->bs,
+                                 cluster_offset + offset_in_cluster,
+                                 cur_bytes, &hd_qiov, 0);
             qemu_co_mutex_lock(&s->lock);
             if (ret < 0) {
                 goto fail;
             }
             if (bs->encrypted) {
                 assert(s->cipher);
+                assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
+                assert((cur_bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
                 Error *err = NULL;
-                if (qcow2_encrypt_sectors(s, sector_num,  cluster_data,
-                                          cluster_data, cur_nr_sectors, false,
-                                          &err) < 0) {
+                if (qcow2_encrypt_sectors(s, offset >> BDRV_SECTOR_BITS,
+                                          cluster_data, cluster_data,
+                                          cur_bytes >> BDRV_SECTOR_BITS,
+                                          false, &err) < 0) {
                     error_free(err);
                     ret = -EIO;
                     goto fail;
                 }
-                qemu_iovec_from_buf(qiov, bytes_done,
-                    cluster_data, 512 * cur_nr_sectors);
+                qemu_iovec_from_buf(qiov, bytes_done, cluster_data, cur_bytes);
             }
             break;
 
@@ -1519,9 +1529,9 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
             goto fail;
         }
 
-        remaining_sectors -= cur_nr_sectors;
-        sector_num += cur_nr_sectors;
-        bytes_done += cur_nr_sectors * 512;
+        bytes -= cur_bytes;
+        offset += cur_bytes;
+        bytes_done += cur_bytes;
     }
     ret = 0;
 
@@ -2435,7 +2445,7 @@ static coroutine_fn int qcow2_co_pwrite_zeroes(BlockDriverState *bs,
     if (head || tail) {
         int64_t cl_start = (offset - head) >> BDRV_SECTOR_BITS;
         uint64_t off;
-        int nr;
+        unsigned int nr;
 
         assert(head + count <= s->cluster_size);
 
@@ -2452,7 +2462,7 @@ static coroutine_fn int qcow2_co_pwrite_zeroes(BlockDriverState *bs,
         /* We can have new write after previous check */
         offset = cl_start << BDRV_SECTOR_BITS;
         count = s->cluster_size;
-        nr = s->cluster_sectors;
+        nr = s->cluster_size;
         ret = qcow2_get_cluster_offset(bs, offset, &nr, &off);
         if (ret != QCOW2_CLUSTER_UNALLOCATED && ret != QCOW2_CLUSTER_ZERO) {
             qemu_co_mutex_unlock(&s->lock);
@@ -3368,7 +3378,7 @@ BlockDriver bdrv_qcow2 = {
     .bdrv_co_get_block_status = qcow2_co_get_block_status,
     .bdrv_set_key       = qcow2_set_key,
 
-    .bdrv_co_readv          = qcow2_co_readv,
+    .bdrv_co_preadv         = qcow2_co_preadv,
     .bdrv_co_writev         = qcow2_co_writev,
     .bdrv_co_flush_to_os    = qcow2_co_flush_to_os,
 
diff --git a/block/qcow2.h b/block/qcow2.h
index 7db9795..e2c42d5 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -544,7 +544,7 @@ int qcow2_encrypt_sectors(BDRVQcow2State *s, int64_t sector_num,
                           int nb_sectors, bool enc, Error **errp);
 
 int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
-    int *num, uint64_t *cluster_offset);
+                             unsigned int *bytes, uint64_t *cluster_offset);
 int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset,
     int *num, uint64_t *host_offset, QCowL2Meta **m);
 uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
-- 
1.8.3.1

  parent reply	other threads:[~2016-06-16 14:08 UTC|newest]

Thread overview: 43+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-06-16 14:07 [Qemu-devel] [PULL 00/39] Block layer patches Kevin Wolf
2016-06-16 14:07 ` [Qemu-devel] [PULL 01/39] qcow2: Work with bytes in qcow2_get_cluster_offset() Kevin Wolf
2016-06-16 14:07 ` Kevin Wolf [this message]
2016-06-16 14:07 ` [Qemu-devel] [PULL 03/39] qcow2: Make copy_sectors() byte based Kevin Wolf
2016-06-16 14:07 ` [Qemu-devel] [PULL 04/39] qcow2: Use bytes instead of sectors for QCowL2Meta Kevin Wolf
2016-06-16 14:07 ` [Qemu-devel] [PULL 05/39] qcow2: Implement .bdrv_co_pwritev() Kevin Wolf
2016-06-16 14:07 ` [Qemu-devel] [PULL 06/39] blockdev: clarify error on attempt to open locked tray Kevin Wolf
2016-06-16 14:07 ` [Qemu-devel] [PULL 07/39] hmp: acquire aio_context in hmp_qemu_io Kevin Wolf
2016-06-16 14:07 ` [Qemu-devel] [PULL 08/39] m25p80: fix test on blk_pread() return value Kevin Wolf
2016-06-16 14:07 ` [Qemu-devel] [PULL 09/39] qemu-img bench: Fix uninitialised writethrough mode Kevin Wolf
2016-06-16 14:07 ` [Qemu-devel] [PULL 10/39] block: Avoid bogus flags during mirroring Kevin Wolf
2016-06-16 14:08 ` [Qemu-devel] [PULL 11/39] block: Assert that flags are in range Kevin Wolf
2016-06-16 14:08 ` [Qemu-devel] [PULL 12/39] block: drop support for using qcow[2] encryption with system emulators Kevin Wolf
2016-06-16 14:08 ` [Qemu-devel] [PULL 13/39] block: Byte-based bdrv_co_do_copy_on_readv() Kevin Wolf
2016-06-16 14:08 ` [Qemu-devel] [PULL 14/39] block: Prepare bdrv_aligned_preadv() for byte-aligned requests Kevin Wolf
2016-06-16 14:08 ` [Qemu-devel] [PULL 15/39] block: Prepare bdrv_aligned_pwritev() " Kevin Wolf
2016-06-16 14:08 ` [Qemu-devel] [PULL 16/39] raw-posix: Switch to bdrv_co_* interfaces Kevin Wolf
2016-06-16 14:08 ` [Qemu-devel] [PULL 17/39] raw-posix: Implement .bdrv_co_preadv/pwritev Kevin Wolf
2016-06-16 14:08 ` [Qemu-devel] [PULL 18/39] block: Don't enforce 512 byte minimum alignment Kevin Wolf
2016-06-16 14:08 ` [Qemu-devel] [PULL 19/39] linux-aio: Cancel BH if not needed Kevin Wolf
2016-06-16 14:08 ` [Qemu-devel] [PULL 20/39] doc: Fix mailing list address in tests/qemu-iotests/README Kevin Wolf
2016-06-16 14:08 ` [Qemu-devel] [PULL 21/39] block: Introduce bdrv_preadv() Kevin Wolf
2016-06-16 14:08 ` [Qemu-devel] [PULL 22/39] block: Make .bdrv_load_vmstate() vectored Kevin Wolf
2016-06-16 14:08 ` [Qemu-devel] [PULL 23/39] block: Allow .bdrv_load/save_vmstate() to return 0/-errno Kevin Wolf
2016-06-16 14:08 ` [Qemu-devel] [PULL 24/39] block: Make bdrv_load/save_vmstate coroutine_fns Kevin Wolf
2016-06-16 14:08 ` [Qemu-devel] [PULL 25/39] qcow2: Let vmstate call qcow2_co_preadv/pwrite directly Kevin Wolf
2016-06-16 14:08 ` [Qemu-devel] [PULL 26/39] block: Remove bs->zero_beyond_eof Kevin Wolf
2016-06-16 14:08 ` [Qemu-devel] [PULL 27/39] block: Fix snapshot=on with aio=native Kevin Wolf
2016-06-16 14:08 ` [Qemu-devel] [PULL 28/39] block: use the block job list in bdrv_drain_all() Kevin Wolf
2016-06-16 14:08 ` [Qemu-devel] [PULL 29/39] block: use the block job list in qmp_query_block_jobs() Kevin Wolf
2016-06-16 14:08 ` [Qemu-devel] [PULL 30/39] block: Prevent sleeping jobs from resuming if they have been paused Kevin Wolf
2016-06-16 14:08 ` [Qemu-devel] [PULL 31/39] block: Create the commit block job before reopening any image Kevin Wolf
2016-06-16 14:08 ` [Qemu-devel] [PULL 32/39] iotests: 095: Clean up QEMU before showing image info Kevin Wolf
2016-06-16 14:08 ` [Qemu-devel] [PULL 33/39] rbd:change error_setg() to error_setg_errno() Kevin Wolf
2016-06-16 14:08 ` [Qemu-devel] [PULL 34/39] block: Allow replacement of a BDS by its overlay Kevin Wolf
2016-06-16 14:08 ` [Qemu-devel] [PULL 35/39] block/mirror: Fix target backing BDS Kevin Wolf
2016-06-16 14:08 ` [Qemu-devel] [PULL 36/39] block/null: Implement bdrv_refresh_filename() Kevin Wolf
2016-06-16 14:08 ` [Qemu-devel] [PULL 37/39] iotests: Add test for post-mirror backing chains Kevin Wolf
2016-06-16 14:08 ` [Qemu-devel] [PULL 38/39] iotests: Add test for oVirt-like storage migration Kevin Wolf
2016-06-16 14:08 ` [Qemu-devel] [PULL 39/39] hbitmap: add 'pos < size' asserts Kevin Wolf
2016-06-16 15:06 ` [Qemu-devel] [PULL 00/39] Block layer patches Peter Maydell
2016-06-16 17:04   ` Eric Blake
2016-06-16 17:08     ` Peter Maydell

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1466086108-24868-3-git-send-email-kwolf@redhat.com \
    --to=kwolf@redhat.com \
    --cc=qemu-block@nongnu.org \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.