All of lore.kernel.org
 help / color / mirror / Atom feed
From: Kevin Wolf <kwolf@redhat.com>
To: qemu-block@nongnu.org
Cc: kwolf@redhat.com, mreitz@redhat.com, stefanha@redhat.com,
	famz@redhat.com, sw@weilnetz.de, qemu-devel@nongnu.org
Subject: [Qemu-devel] [PATCH v2 13/17] vmdk: Implement .bdrv_co_pwritev() interface
Date: Thu, 28 Apr 2016 15:16:42 +0200	[thread overview]
Message-ID: <1461849406-29743-14-git-send-email-kwolf@redhat.com> (raw)
In-Reply-To: <1461849406-29743-1-git-send-email-kwolf@redhat.com>

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block/vmdk.c | 209 ++++++++++++++++++++++++++++++++++++++---------------------
 1 file changed, 135 insertions(+), 74 deletions(-)

diff --git a/block/vmdk.c b/block/vmdk.c
index 6c447ad..f243527 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -1016,27 +1016,26 @@ static void vmdk_refresh_limits(BlockDriverState *bs, Error **errp)
  */
 static int get_whole_cluster(BlockDriverState *bs,
                              VmdkExtent *extent,
-                             uint64_t cluster_sector_num,
-                             uint64_t sector_num,
-                             uint64_t skip_start_sector,
-                             uint64_t skip_end_sector)
+                             uint64_t cluster_offset,
+                             uint64_t offset,
+                             uint64_t skip_start_bytes,
+                             uint64_t skip_end_bytes)
 {
     int ret = VMDK_OK;
     int64_t cluster_bytes;
     uint8_t *whole_grain;
 
     /* For COW, align request sector_num to cluster start */
-    sector_num = QEMU_ALIGN_DOWN(sector_num, extent->cluster_sectors);
     cluster_bytes = extent->cluster_sectors << BDRV_SECTOR_BITS;
+    offset = QEMU_ALIGN_DOWN(offset, cluster_bytes);
     whole_grain = qemu_blockalign(bs, cluster_bytes);
 
     if (!bs->backing) {
-        memset(whole_grain, 0,  skip_start_sector << BDRV_SECTOR_BITS);
-        memset(whole_grain + (skip_end_sector << BDRV_SECTOR_BITS), 0,
-               cluster_bytes - (skip_end_sector << BDRV_SECTOR_BITS));
+        memset(whole_grain, 0, skip_start_bytes);
+        memset(whole_grain + skip_end_bytes, 0, cluster_bytes - skip_end_bytes);
     }
 
-    assert(skip_end_sector <= extent->cluster_sectors);
+    assert(skip_end_bytes <= cluster_bytes);
     /* we will be here if it's first write on non-exist grain(cluster).
      * try to read from parent image, if exist */
     if (bs->backing && !vmdk_is_cid_valid(bs)) {
@@ -1045,42 +1044,43 @@ static int get_whole_cluster(BlockDriverState *bs,
     }
 
     /* Read backing data before skip range */
-    if (skip_start_sector > 0) {
+    if (skip_start_bytes > 0) {
         if (bs->backing) {
-            ret = bdrv_read(bs->backing->bs, sector_num,
-                            whole_grain, skip_start_sector);
+            ret = bdrv_pread(bs->backing->bs, offset, whole_grain,
+                             skip_start_bytes);
             if (ret < 0) {
                 ret = VMDK_ERROR;
                 goto exit;
             }
         }
-        ret = bdrv_write(extent->file->bs, cluster_sector_num, whole_grain,
-                         skip_start_sector);
+        ret = bdrv_pwrite(extent->file->bs, cluster_offset, whole_grain,
+                          skip_start_bytes);
         if (ret < 0) {
             ret = VMDK_ERROR;
             goto exit;
         }
     }
     /* Read backing data after skip range */
-    if (skip_end_sector < extent->cluster_sectors) {
+    if (skip_end_bytes < cluster_bytes) {
         if (bs->backing) {
-            ret = bdrv_read(bs->backing->bs, sector_num + skip_end_sector,
-                            whole_grain + (skip_end_sector << BDRV_SECTOR_BITS),
-                            extent->cluster_sectors - skip_end_sector);
+            ret = bdrv_pread(bs->backing->bs, offset + skip_end_bytes,
+                             whole_grain + skip_end_bytes,
+                             cluster_bytes - skip_end_bytes);
             if (ret < 0) {
                 ret = VMDK_ERROR;
                 goto exit;
             }
         }
-        ret = bdrv_write(extent->file->bs, cluster_sector_num + skip_end_sector,
-                         whole_grain + (skip_end_sector << BDRV_SECTOR_BITS),
-                         extent->cluster_sectors - skip_end_sector);
+        ret = bdrv_pwrite(extent->file->bs, cluster_offset + skip_end_bytes,
+                          whole_grain + skip_end_bytes,
+                          cluster_bytes - skip_end_bytes);
         if (ret < 0) {
             ret = VMDK_ERROR;
             goto exit;
         }
     }
 
+    ret = VMDK_OK;
 exit:
     qemu_vfree(whole_grain);
     return ret;
@@ -1142,8 +1142,8 @@ static int get_cluster_offset(BlockDriverState *bs,
                               uint64_t offset,
                               bool allocate,
                               uint64_t *cluster_offset,
-                              uint64_t skip_start_sector,
-                              uint64_t skip_end_sector)
+                              uint64_t skip_start_bytes,
+                              uint64_t skip_end_bytes)
 {
     unsigned int l1_index, l2_offset, l2_index;
     int min_index, i, j;
@@ -1230,10 +1230,8 @@ static int get_cluster_offset(BlockDriverState *bs,
          * This problem may occur because of insufficient space on host disk
          * or inappropriate VM shutdown.
          */
-        ret = get_whole_cluster(bs, extent,
-                                cluster_sector,
-                                offset >> BDRV_SECTOR_BITS,
-                                skip_start_sector, skip_end_sector);
+        ret = get_whole_cluster(bs, extent, cluster_sector * BDRV_SECTOR_SIZE,
+                                offset, skip_start_bytes, skip_end_bytes);
         if (ret) {
             return ret;
         }
@@ -1330,38 +1328,57 @@ static int64_t coroutine_fn vmdk_co_get_block_status(BlockDriverState *bs,
 }
 
 static int vmdk_write_extent(VmdkExtent *extent, int64_t cluster_offset,
-                            int64_t offset_in_cluster, const uint8_t *buf,
-                            int nb_sectors, int64_t sector_num)
+                            int64_t offset_in_cluster, QEMUIOVector *qiov,
+                            uint64_t qiov_offset, uint64_t n_bytes,
+                            uint64_t offset)
 {
     int ret;
     VmdkGrainMarker *data = NULL;
     uLongf buf_len;
-    const uint8_t *write_buf = buf;
-    int write_len = nb_sectors * 512;
+    QEMUIOVector local_qiov;
+    struct iovec iov;
     int64_t write_offset;
     int64_t write_end_sector;
 
     if (extent->compressed) {
+        void *compressed_data;
+
         if (!extent->has_marker) {
             ret = -EINVAL;
             goto out;
         }
         buf_len = (extent->cluster_sectors << 9) * 2;
         data = g_malloc(buf_len + sizeof(VmdkGrainMarker));
-        if (compress(data->data, &buf_len, buf, nb_sectors << 9) != Z_OK ||
-                buf_len == 0) {
+
+        compressed_data = g_malloc(n_bytes);
+        qemu_iovec_to_buf(qiov, qiov_offset, compressed_data, n_bytes);
+        ret = compress(data->data, &buf_len, compressed_data, n_bytes);
+        g_free(compressed_data);
+
+        if (ret != Z_OK || buf_len == 0) {
             ret = -EINVAL;
             goto out;
         }
-        data->lba = sector_num;
+
+        data->lba = offset >> BDRV_SECTOR_BITS;
         data->size = buf_len;
-        write_buf = (uint8_t *)data;
-        write_len = buf_len + sizeof(VmdkGrainMarker);
+
+        n_bytes = buf_len + sizeof(VmdkGrainMarker);
+        iov = (struct iovec) {
+            .iov_base   = data,
+            .iov_len    = n_bytes,
+        };
+        qemu_iovec_init_external(&local_qiov, &iov, 1);
+    } else {
+        qemu_iovec_init(&local_qiov, qiov->niov);
+        qemu_iovec_concat(&local_qiov, qiov, qiov_offset, n_bytes);
     }
+
     write_offset = cluster_offset + offset_in_cluster,
-    ret = bdrv_pwrite(extent->file->bs, write_offset, write_buf, write_len);
+    ret = bdrv_co_pwritev(extent->file->bs, write_offset, n_bytes,
+                          &local_qiov, 0);
 
-    write_end_sector = DIV_ROUND_UP(write_offset + write_len, BDRV_SECTOR_SIZE);
+    write_end_sector = DIV_ROUND_UP(write_offset + n_bytes, BDRV_SECTOR_SIZE);
 
     if (extent->compressed) {
         extent->next_cluster_sector = write_end_sector;
@@ -1370,13 +1387,15 @@ static int vmdk_write_extent(VmdkExtent *extent, int64_t cluster_offset,
                                           write_end_sector);
     }
 
-    if (ret != write_len) {
-        ret = ret < 0 ? ret : -EIO;
+    if (ret < 0) {
         goto out;
     }
     ret = 0;
  out:
     g_free(data);
+    if (!extent->compressed) {
+        qemu_iovec_destroy(&local_qiov);
+    }
     return ret;
 }
 
@@ -1525,38 +1544,38 @@ fail:
  *
  * Returns: error code with 0 for success.
  */
-static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
-                      const uint8_t *buf, int nb_sectors,
-                      bool zeroed, bool zero_dry_run)
+static int vmdk_pwritev(BlockDriverState *bs, uint64_t offset,
+                       uint64_t bytes, QEMUIOVector *qiov,
+                       bool zeroed, bool zero_dry_run)
 {
     BDRVVmdkState *s = bs->opaque;
     VmdkExtent *extent = NULL;
     int ret;
-    int64_t index_in_cluster, n;
+    int64_t offset_in_cluster, n_bytes;
     uint64_t cluster_offset;
+    uint64_t bytes_done = 0;
     VmdkMetaData m_data;
 
-    if (sector_num > bs->total_sectors) {
-        error_report("Wrong offset: sector_num=0x%" PRIx64
+    if (DIV_ROUND_UP(offset, BDRV_SECTOR_SIZE) > bs->total_sectors) {
+        error_report("Wrong offset: offset=0x%" PRIx64
                      " total_sectors=0x%" PRIx64,
-                     sector_num, bs->total_sectors);
+                     offset, bs->total_sectors);
         return -EIO;
     }
 
-    while (nb_sectors > 0) {
-        extent = find_extent(s, sector_num, extent);
+    while (bytes > 0) {
+        extent = find_extent(s, offset >> BDRV_SECTOR_BITS, extent);
         if (!extent) {
             return -EIO;
         }
-        index_in_cluster = vmdk_find_index_in_cluster(extent, sector_num);
-        n = extent->cluster_sectors - index_in_cluster;
-        if (n > nb_sectors) {
-            n = nb_sectors;
-        }
-        ret = get_cluster_offset(bs, extent, &m_data, sector_num << 9,
+        offset_in_cluster = vmdk_find_offset_in_cluster(extent, offset);
+        n_bytes = MIN(bytes, extent->cluster_sectors * BDRV_SECTOR_SIZE
+                             - offset_in_cluster);
+
+        ret = get_cluster_offset(bs, extent, &m_data, offset,
                                  !(extent->compressed || zeroed),
-                                 &cluster_offset,
-                                 index_in_cluster, index_in_cluster + n);
+                                 &cluster_offset, offset_in_cluster,
+                                 offset_in_cluster + n_bytes);
         if (extent->compressed) {
             if (ret == VMDK_OK) {
                 /* Refuse write to allocated cluster for streamOptimized */
@@ -1565,7 +1584,7 @@ static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
                 return -EIO;
             } else {
                 /* allocate */
-                ret = get_cluster_offset(bs, extent, &m_data, sector_num << 9,
+                ret = get_cluster_offset(bs, extent, &m_data, offset,
                                          true, &cluster_offset, 0, 0);
             }
         }
@@ -1575,9 +1594,9 @@ static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
         if (zeroed) {
             /* Do zeroed write, buf is ignored */
             if (extent->has_zero_grain &&
-                    index_in_cluster == 0 &&
-                    n >= extent->cluster_sectors) {
-                n = extent->cluster_sectors;
+                    offset_in_cluster == 0 &&
+                    n_bytes >= extent->cluster_sectors * BDRV_SECTOR_SIZE) {
+                n_bytes = extent->cluster_sectors * BDRV_SECTOR_SIZE;
                 if (!zero_dry_run) {
                     /* update L2 tables */
                     if (vmdk_L2update(extent, &m_data, VMDK_GTE_ZEROED)
@@ -1589,9 +1608,8 @@ static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
                 return -ENOTSUP;
             }
         } else {
-            ret = vmdk_write_extent(extent,
-                            cluster_offset, index_in_cluster * 512,
-                            buf, n, sector_num);
+            ret = vmdk_write_extent(extent, cluster_offset, offset_in_cluster,
+                                    qiov, bytes_done, n_bytes, offset);
             if (ret) {
                 return ret;
             }
@@ -1604,9 +1622,9 @@ static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
                 }
             }
         }
-        nb_sectors -= n;
-        sector_num += n;
-        buf += n * 512;
+        bytes -= n_bytes;
+        offset += n_bytes;
+        bytes_done += n_bytes;
 
         /* update CID on the first write every time the virtual disk is
          * opened */
@@ -1621,25 +1639,65 @@ static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
     return 0;
 }
 
-static coroutine_fn int vmdk_co_write(BlockDriverState *bs, int64_t sector_num,
-                                      const uint8_t *buf, int nb_sectors)
+static int coroutine_fn
+vmdk_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
+                QEMUIOVector *qiov, int flags)
 {
     int ret;
     BDRVVmdkState *s = bs->opaque;
     qemu_co_mutex_lock(&s->lock);
-    ret = vmdk_write(bs, sector_num, buf, nb_sectors, false, false);
+    ret = vmdk_pwritev(bs, offset, bytes, qiov, false, false);
     qemu_co_mutex_unlock(&s->lock);
     return ret;
 }
 
+typedef struct VmdkWriteCompressedCo {
+    BlockDriverState *bs;
+    int64_t sector_num;
+    const uint8_t *buf;
+    int nb_sectors;
+    int ret;
+} VmdkWriteCompressedCo;
+
+static void vmdk_co_write_compressed(void *opaque)
+{
+    VmdkWriteCompressedCo *co = opaque;
+    QEMUIOVector local_qiov;
+    uint64_t offset = co->sector_num * BDRV_SECTOR_SIZE;
+    uint64_t bytes = co->nb_sectors * BDRV_SECTOR_SIZE;
+
+    struct iovec iov = (struct iovec) {
+        .iov_base   = (uint8_t*) co->buf,
+        .iov_len    = bytes,
+    };
+    qemu_iovec_init_external(&local_qiov, &iov, 1);
+
+    co->ret = vmdk_pwritev(co->bs, offset, bytes, &local_qiov, false, false);
+}
+
 static int vmdk_write_compressed(BlockDriverState *bs,
                                  int64_t sector_num,
                                  const uint8_t *buf,
                                  int nb_sectors)
 {
     BDRVVmdkState *s = bs->opaque;
+
     if (s->num_extents == 1 && s->extents[0].compressed) {
-        return vmdk_write(bs, sector_num, buf, nb_sectors, false, false);
+        Coroutine *co;
+        AioContext *aio_context = bdrv_get_aio_context(bs);
+        VmdkWriteCompressedCo data = {
+            .bs         = bs,
+            .sector_num = sector_num,
+            .buf        = buf,
+            .nb_sectors = nb_sectors,
+            .ret        = -EINPROGRESS,
+        };
+        co = qemu_coroutine_create(vmdk_co_write_compressed);
+        qemu_coroutine_enter(co, &data);
+        while (data.ret == -EINPROGRESS) {
+            aio_poll(aio_context, true);
+        }
+        return data.ret;
     } else {
         return -ENOTSUP;
     }
@@ -1652,12 +1710,15 @@ static int coroutine_fn vmdk_co_write_zeroes(BlockDriverState *bs,
 {
     int ret;
     BDRVVmdkState *s = bs->opaque;
+    uint64_t offset = sector_num * BDRV_SECTOR_SIZE;
+    uint64_t bytes = nb_sectors * BDRV_SECTOR_SIZE;
+
     qemu_co_mutex_lock(&s->lock);
     /* write zeroes could fail if sectors not aligned to cluster, test it with
      * dry_run == true before really updating image */
-    ret = vmdk_write(bs, sector_num, NULL, nb_sectors, true, true);
+    ret = vmdk_pwritev(bs, offset, bytes, NULL, true, true);
     if (!ret) {
-        ret = vmdk_write(bs, sector_num, NULL, nb_sectors, true, false);
+        ret = vmdk_pwritev(bs, offset, bytes, NULL, true, false);
     }
     qemu_co_mutex_unlock(&s->lock);
     return ret;
@@ -2341,7 +2402,7 @@ static BlockDriver bdrv_vmdk = {
     .bdrv_check                   = vmdk_check,
     .bdrv_reopen_prepare          = vmdk_reopen_prepare,
     .bdrv_co_preadv               = vmdk_co_preadv,
-    .bdrv_write                   = vmdk_co_write,
+    .bdrv_co_pwritev              = vmdk_co_pwritev,
     .bdrv_write_compressed        = vmdk_write_compressed,
     .bdrv_co_write_zeroes         = vmdk_co_write_zeroes,
     .bdrv_close                   = vmdk_close,
-- 
1.8.3.1

  parent reply	other threads:[~2016-04-28 13:17 UTC|newest]

Thread overview: 34+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-04-28 13:16 [Qemu-devel] [PATCH v2 00/17] block: Convert .bdrv_read/write to .bdrv_preadv/pwritev Kevin Wolf
2016-04-28 13:16 ` [Qemu-devel] [PATCH v2 01/17] block: Introduce bdrv_driver_preadv() Kevin Wolf
2016-04-28 13:16 ` [Qemu-devel] [PATCH v2 02/17] block: Introduce bdrv_driver_pwritev() Kevin Wolf
2016-04-29  1:14   ` Fam Zheng
2016-04-30 21:54   ` Eric Blake
2016-04-28 13:16 ` [Qemu-devel] [PATCH v2 03/17] block: Support AIO drivers in bdrv_driver_preadv/pwritev() Kevin Wolf
2016-04-29  1:22   ` Fam Zheng
2016-04-29  3:25     ` Fam Zheng
2016-04-28 13:16 ` [Qemu-devel] [PATCH v2 04/17] block: Rename bdrv_co_do_preadv/writev to bdrv_co_preadv/writev Kevin Wolf
2016-04-28 13:16 ` [Qemu-devel] [PATCH v2 05/17] block: Introduce .bdrv_co_preadv/pwritev BlockDriver function Kevin Wolf
2016-04-28 15:21   ` Eric Blake
2016-04-28 13:16 ` [Qemu-devel] [PATCH v2 06/17] bochs: Implement .bdrv_co_preadv() interface Kevin Wolf
2016-04-28 15:50   ` Eric Blake
2016-04-28 13:16 ` [Qemu-devel] [PATCH v2 07/17] cloop: " Kevin Wolf
2016-04-29  2:28   ` Eric Blake
2016-04-28 13:16 ` [Qemu-devel] [PATCH v2 08/17] dmg: " Kevin Wolf
2016-04-29  2:39   ` Eric Blake
2016-04-28 13:16 ` [Qemu-devel] [PATCH v2 09/17] vdi: " Kevin Wolf
2016-04-28 13:16 ` [Qemu-devel] [PATCH v2 10/17] vdi: Implement .bdrv_co_pwritev() interface Kevin Wolf
2016-04-28 13:16 ` [Qemu-devel] [PATCH v2 11/17] vmdk: Add vmdk_find_offset_in_cluster() Kevin Wolf
2016-04-28 13:16 ` [Qemu-devel] [PATCH v2 12/17] vmdk: Implement .bdrv_co_preadv() interface Kevin Wolf
2016-04-28 13:16 ` Kevin Wolf [this message]
2016-04-29  3:08   ` [Qemu-devel] [PATCH v2 13/17] vmdk: Implement .bdrv_co_pwritev() interface Fam Zheng
2016-04-29  7:41     ` Kevin Wolf
2016-04-29  8:49     ` Pavel Butsykin
2016-04-29  9:49       ` Kevin Wolf
2016-04-29 10:31         ` Pavel Butsykin
2016-04-28 13:16 ` [Qemu-devel] [PATCH v2 14/17] vpc: Implement .bdrv_co_preadv() interface Kevin Wolf
2016-04-29  3:14   ` Fam Zheng
2016-04-28 13:16 ` [Qemu-devel] [PATCH v2 15/17] vpc: Implement .bdrv_co_pwritev() interface Kevin Wolf
2016-04-28 13:16 ` [Qemu-devel] [PATCH v2 16/17] vvfat: Implement .bdrv_co_preadv/pwritev interfaces Kevin Wolf
2016-04-28 13:16 ` [Qemu-devel] [PATCH v2 17/17] block: Remove BlockDriver.bdrv_read/write Kevin Wolf
2016-04-29  3:29 ` [Qemu-devel] [PATCH v2 00/17] block: Convert .bdrv_read/write to .bdrv_preadv/pwritev Fam Zheng
2016-04-29  9:57 ` Kevin Wolf

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1461849406-29743-14-git-send-email-kwolf@redhat.com \
    --to=kwolf@redhat.com \
    --cc=famz@redhat.com \
    --cc=mreitz@redhat.com \
    --cc=qemu-block@nongnu.org \
    --cc=qemu-devel@nongnu.org \
    --cc=stefanha@redhat.com \
    --cc=sw@weilnetz.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.