All of lore.kernel.org
 help / color / mirror / Atom feed
From: Xiaodong Gong <gordongong0350@gmail.com>
To: kwolf@redhat.com
Cc: qemu-devel@nongnu.org, stefanha@redhat.com,
	Xiaodong Gong <gordongong0350@gmail.com>
Subject: [Qemu-devel] [PATCH v2] Support vhd type VHD_DIFFERENCING
Date: Mon,  8 Sep 2014 22:41:39 +0800	[thread overview]
Message-ID: <1410187299-8463-1-git-send-email-gordon@localhost.localdomain> (raw)

From: Xiaodong Gong <gordongong0350@gmail.com>

Now qemu only supports vhd type VHD_FIXED and VHD_DYNAMIC,
so qemu can't read snapshot volume of vhd, and can't support
other storage features of vhd file.

This patch add read parent information in function "vpc_open",
read bitmap in "vpc_read", and change bitmap in "vpc_write".

Signed-off-by: Xiaodong Gong <gordongong0350@gmail.com>
---
 block/vpc.c | 329 +++++++++++++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 261 insertions(+), 68 deletions(-)

diff --git a/block/vpc.c b/block/vpc.c
index c024b4c..3ba0d57 100644
--- a/block/vpc.c
+++ b/block/vpc.c
@@ -33,13 +33,18 @@
 /**************************************************************/
 
 #define HEADER_SIZE 512
+#define DYNAMIC_HEADER_SIZE 1024
+#define PARENT_LOCATOR_NUM 8
+#define PARENT_PREFIX_LEN 7 /* such as file:// */
+#define TBBATMAP_HEAD_SIZE 28
+#define MACX 0x5863614d /* big endian */
 
 //#define CACHE
 
 enum vhd_type {
     VHD_FIXED           = 2,
     VHD_DYNAMIC         = 3,
-    VHD_DIFFERENCING    = 4,
+    VHD_DIFF            = 4,
 };
 
 // Seconds since Jan 1, 2000 0:00:00 (UTC)
@@ -138,6 +143,15 @@ typedef struct BDRVVPCState {
     Error *migration_blocker;
 } BDRVVPCState;
 
+typedef struct vhd_tdbatmap_header {
+    char magic[8]; /* always "tdbatmap" */
+
+    uint64_t batmap_offset;
+    uint32_t batmap_size;
+    uint32_t batmap_version;
+    uint32_t checksum;
+} QEMU_PACKED VHDTdBatmapHeader;
+
 static uint32_t vpc_checksum(uint8_t* buf, size_t size)
 {
     uint32_t res = 0;
@@ -153,7 +167,7 @@ static uint32_t vpc_checksum(uint8_t* buf, size_t size)
 static int vpc_probe(const uint8_t *buf, int buf_size, const char *filename)
 {
     if (buf_size >= 8 && !strncmp((char *)buf, "conectix", 8))
-	return 100;
+        return 100;
     return 0;
 }
 
@@ -164,11 +178,17 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
     int i;
     VHDFooter *footer;
     VHDDynDiskHeader *dyndisk_header;
-    uint8_t buf[HEADER_SIZE];
+    uint8_t buf[DYNAMIC_HEADER_SIZE];
+    uint8_t tdbatmap_header_buf[TBBATMAP_HEAD_SIZE];
     uint32_t checksum;
     uint64_t computed_size;
-    int disk_type = VHD_DYNAMIC;
+    uint32_t disk_type;
     int ret;
+    VHDTdBatmapHeader *tdbatmap_header;
+    int parent_locator_offset = 0;
+    int64_t data_offset = 0;
+    int data_length = 0;
+    uint32_t platform;
 
     ret = bdrv_pread(bs->file, 0, s->footer_buf, HEADER_SIZE);
     if (ret < 0) {
@@ -176,6 +196,8 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
     }
 
     footer = (VHDFooter *) s->footer_buf;
+    disk_type = be32_to_cpu(footer->type);
+
     if (strncmp(footer->creator, "conectix", 8)) {
         int64_t offset = bdrv_getlength(bs->file);
         if (offset < 0) {
@@ -230,9 +252,9 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
         goto fail;
     }
 
-    if (disk_type == VHD_DYNAMIC) {
+    if (disk_type == VHD_DYNAMIC || disk_type == VHD_DIFF) {
         ret = bdrv_pread(bs->file, be64_to_cpu(footer->data_offset), buf,
-                         HEADER_SIZE);
+                         DYNAMIC_HEADER_SIZE);
         if (ret < 0) {
             goto fail;
         }
@@ -286,6 +308,56 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
         s->free_data_block_offset =
             (s->bat_offset + (s->max_table_entries * 4) + 511) & ~511;
 
+        /* Read tdbatmap header by offset */
+        ret = bdrv_pread(bs->file, s->free_data_block_offset,
+            tdbatmap_header_buf, TBBATMAP_HEAD_SIZE);
+        if (ret < 0) {
+            goto fail;
+        }
+
+        tdbatmap_header = (VHDTdBatmapHeader *) tdbatmap_header_buf;
+        if (!strncmp(tdbatmap_header->magic, "tdbatmap", 8)) {
+            s->free_data_block_offset =
+                be32_to_cpu(tdbatmap_header->batmap_size) * 512
+                + be64_to_cpu(tdbatmap_header->batmap_offset);
+        }
+
+        /* Read backing file location from dyn header table */
+        if (dyndisk_header->parent_name[0] || dyndisk_header->parent_name[1]) {
+            for (i = 0; i < PARENT_LOCATOR_NUM; i++) {
+                data_offset =
+                    be64_to_cpu(dyndisk_header->parent_locator[i].data_offset);
+                data_length =
+                    be32_to_cpu(dyndisk_header->parent_locator[i].data_length);
+                platform = dyndisk_header->parent_locator[i].platform;
+
+                if (MACX == platform) {
+                    if (data_offset + PARENT_PREFIX_LEN >
+                        s->max_table_entries * s->block_size) {
+                            goto fail;
+                    }
+                        if (data_length - PARENT_PREFIX_LEN > 1024) {
+                            goto fail;
+                    }
+                    ret = bdrv_pread(bs->file, data_offset + PARENT_PREFIX_LEN,
+                        bs->backing_file, data_length - PARENT_PREFIX_LEN);
+                    if (ret < 0) {
+                        goto fail;
+                    }
+
+                    bs->backing_file[data_length - PARENT_PREFIX_LEN] = '\0';
+                }
+
+                if (data_offset > parent_locator_offset) {
+                    parent_locator_offset = data_offset;
+                }
+            }
+        }
+
+        if (parent_locator_offset + 512 > s->free_data_block_offset) {
+            s->free_data_block_offset = parent_locator_offset + 512;
+        }
+
         for (i = 0; i < s->max_table_entries; i++) {
             be32_to_cpus(&s->pagetable[i]);
             if (s->pagetable[i] != 0xFFFFFFFF) {
@@ -363,19 +435,6 @@ static inline int64_t get_sector_offset(BlockDriverState *bs,
     bitmap_offset = 512 * (uint64_t) s->pagetable[pagetable_index];
     block_offset = bitmap_offset + s->bitmap_size + (512 * pageentry_index);
 
-    // We must ensure that we don't write to any sectors which are marked as
-    // unused in the bitmap. We get away with setting all bits in the block
-    // bitmap each time we write to a new block. This might cause Virtual PC to
-    // miss sparse read optimization, but it's not a problem in terms of
-    // correctness.
-    if (write && (s->last_bitmap_offset != bitmap_offset)) {
-        uint8_t bitmap[s->bitmap_size];
-
-        s->last_bitmap_offset = bitmap_offset;
-        memset(bitmap, 0xff, s->bitmap_size);
-        bdrv_pwrite_sync(bs->file, bitmap_offset, bitmap, s->bitmap_size);
-    }
-
 //    printf("sector: %" PRIx64 ", index: %x, offset: %x, bioff: %" PRIx64 ", bloff: %" PRIx64 "\n",
 //	sector_num, pagetable_index, pageentry_index,
 //	bitmap_offset, block_offset);
@@ -412,6 +471,53 @@ static inline int64_t get_sector_offset(BlockDriverState *bs,
 }
 
 /*
+ * Returns the absolute byte offset of the given sector in the differencing
+ * image file.
+ *
+ * If the sector is not allocated, -1 is returned instead. If the sector is
+ * allocated in the backing file, -2 is returned. If the sector is allocated
+ * in current file, the block offset is returned.
+ */
+static inline int64_t get_sector_offset_diff(BlockDriverState *bs,
+    int64_t sector_num)
+{
+    BDRVVPCState *s = bs->opaque;
+    uint64_t offset = sector_num << BDRV_SECTOR_BITS;
+    uint64_t bitmap_offset;
+    uint64_t block_offset;
+    uint32_t pagetable_index, pageentry_index;
+    uint32_t bitmap_index, bitmapentry_index;
+    uint8_t bitmap[s->bitmap_size];
+    int ret;
+
+    pagetable_index = offset / s->block_size;
+    if (pagetable_index >= s->max_table_entries) {
+        return -1;
+    } else if (0xffffffff == s->pagetable[pagetable_index]) {
+        return -2;
+    }
+
+    bitmap_offset = 512 * (uint64_t) s->pagetable[pagetable_index];
+    if (bitmap_offset > s->max_table_entries * s->block_size) {
+        return -1;
+    }
+    ret = bdrv_pread(bs->file, bitmap_offset, bitmap, s->bitmap_size);
+    if (ret < 0) {
+        return -1;
+    }
+
+    pageentry_index = (offset % s->block_size) / 512;
+    bitmap_index = pageentry_index / 8;
+    bitmapentry_index = 7 - pageentry_index % 8;
+    if (bitmap[bitmap_index] & 0x1 << bitmapentry_index) {
+        block_offset = bitmap_offset + s->bitmap_size + (512 * pageentry_index);
+        return block_offset;
+    } else {
+         return -2;
+    }
+}
+
+/*
  * Writes the footer to the end of the image file. This is needed when the
  * file grows as it overwrites the old footer
  *
@@ -437,7 +543,8 @@ static int rewrite_footer(BlockDriverState* bs)
  *
  * Returns the sectors' offset in the image file on success and < 0 on error
  */
-static int64_t alloc_block(BlockDriverState* bs, int64_t sector_num)
+static int64_t alloc_block(BlockDriverState *bs, int64_t sector_num,
+    bool isdiff)
 {
     BDRVVPCState *s = bs->opaque;
     int64_t bat_offset;
@@ -457,7 +564,11 @@ static int64_t alloc_block(BlockDriverState* bs, int64_t sector_num)
     s->pagetable[index] = s->free_data_block_offset / 512;
 
     // Initialize the block's bitmap
-    memset(bitmap, 0xff, s->bitmap_size);
+    if (isdiff) {
+        memset(bitmap, 0x0, s->bitmap_size);
+    } else {
+        memset(bitmap, 0xff, s->bitmap_size);
+    }
     ret = bdrv_pwrite_sync(bs->file, s->free_data_block_offset, bitmap,
         s->bitmap_size);
     if (ret < 0) {
@@ -501,36 +612,62 @@ static int vpc_read(BlockDriverState *bs, int64_t sector_num,
                     uint8_t *buf, int nb_sectors)
 {
     BDRVVPCState *s = bs->opaque;
-    int ret;
-    int64_t offset;
-    int64_t sectors, sectors_per_block;
     VHDFooter *footer = (VHDFooter *) s->footer_buf;
+    int64_t sectors_per_block = s->block_size >> BDRV_SECTOR_BITS;
+    int64_t offset, sectors;
+    int ret;
 
-    if (be32_to_cpu(footer->type) == VHD_FIXED) {
+    switch (be32_to_cpu(footer->type)) {
+    case VHD_FIXED:
         return bdrv_read(bs->file, sector_num, buf, nb_sectors);
-    }
-    while (nb_sectors > 0) {
-        offset = get_sector_offset(bs, sector_num, 0);
-
-        sectors_per_block = s->block_size >> BDRV_SECTOR_BITS;
-        sectors = sectors_per_block - (sector_num % sectors_per_block);
-        if (sectors > nb_sectors) {
-            sectors = nb_sectors;
-        }
+    case VHD_DYNAMIC:
+        while (nb_sectors > 0) {
+            sectors = sectors_per_block - (sector_num % sectors_per_block);
+            if (sectors > nb_sectors) {
+                sectors = nb_sectors;
+            }
 
-        if (offset == -1) {
-            memset(buf, 0, sectors * BDRV_SECTOR_SIZE);
-        } else {
-            ret = bdrv_pread(bs->file, offset, buf,
-                sectors * BDRV_SECTOR_SIZE);
-            if (ret != sectors * BDRV_SECTOR_SIZE) {
-                return -1;
+            offset = get_sector_offset(bs, sector_num, 0);
+            if (-1 == offset) {
+                memset(buf, 0, sectors * BDRV_SECTOR_SIZE);
+            } else {
+                ret = bdrv_pread(bs->file, offset, buf,
+                    sectors * BDRV_SECTOR_SIZE);
+                if (ret != sectors * BDRV_SECTOR_SIZE) {
+                    return -1;
+                }
             }
+
+            nb_sectors -= sectors;
+            sector_num += sectors;
+            buf += sectors * BDRV_SECTOR_SIZE;
         }
+        break;
+    case VHD_DIFF:
+        while (nb_sectors > 0) {
+            offset = get_sector_offset_diff(bs, sector_num);
+            if (-1 == offset) {
+                memset(buf, 0, BDRV_SECTOR_SIZE);
+            } else if (-2 == offset) {
+                ret = bdrv_pread(bs->backing_hd, sector_num << BDRV_SECTOR_BITS
+                    , buf, BDRV_SECTOR_SIZE);
+                if (ret < 0) {
+                    return -1;
+                }
+            } else {
+                ret = bdrv_pread(bs->file, offset, buf, BDRV_SECTOR_SIZE);
+                if (ret != BDRV_SECTOR_SIZE) {
+                    return -1;
+                }
+            }
 
-        nb_sectors -= sectors;
-        sector_num += sectors;
-        buf += sectors * BDRV_SECTOR_SIZE;
+            nb_sectors--;
+            sector_num++;
+            buf += BDRV_SECTOR_SIZE;
+        }
+        break;
+    default:
+        return -1;
     }
     return 0;
 }
@@ -546,43 +683,98 @@ static coroutine_fn int vpc_co_read(BlockDriverState *bs, int64_t sector_num,
     return ret;
 }
 
+static inline int64_t write_bitmap(BlockDriverState *bs, int64_t sector_num,
+    int64_t sectors)
+{
+    BDRVVPCState *s = bs->opaque;
+    uint64_t offset = sector_num << BDRV_SECTOR_BITS;
+    uint64_t bitmap_offset;
+    uint32_t pagetable_index, pageentry_index;
+    uint8_t bitmap[s->bitmap_size];
+    uint32_t bitmap_index, bitmapbit_index;
+    int i;
+    int ret;
+
+    pagetable_index = offset / s->block_size;
+    pageentry_index = (offset % s->block_size) / 512;
+    bitmap_offset = 512 * (uint64_t) s->pagetable[pagetable_index];
+
+    if (bitmap_offset > s->max_table_entries * s->block_size) {
+        return -1;
+    }
+    ret = bdrv_pread(bs->file, bitmap_offset, bitmap, s->bitmap_size);
+    if (ret < 0) {
+        return -1;
+    }
+
+    for (i = 0; i < sectors; i++) {
+        bitmap_index = pageentry_index / 8;
+        bitmapbit_index = 7 - pageentry_index % 8;
+        bitmap[bitmap_index] |= (0x1 << bitmapbit_index);
+        pageentry_index++;
+    }
+    ret = bdrv_pwrite(bs->file, bitmap_offset, bitmap, s->bitmap_size);
+    if (ret < 0) {
+        return -1;
+    }
+
+    return 0;
+}
+
 static int vpc_write(BlockDriverState *bs, int64_t sector_num,
     const uint8_t *buf, int nb_sectors)
 {
     BDRVVPCState *s = bs->opaque;
-    int64_t offset;
-    int64_t sectors, sectors_per_block;
+    VHDFooter *footer = (VHDFooter *) s->footer_buf;
+    int64_t sectors_per_block = s->block_size >> BDRV_SECTOR_BITS;
+    int64_t offset, sectors;
+    bool isdiff = true;
     int ret;
-    VHDFooter *footer =  (VHDFooter *) s->footer_buf;
 
-    if (be32_to_cpu(footer->type) == VHD_FIXED) {
+    switch (be32_to_cpu(footer->type)) {
+    case VHD_FIXED:
         return bdrv_write(bs->file, sector_num, buf, nb_sectors);
-    }
-    while (nb_sectors > 0) {
-        offset = get_sector_offset(bs, sector_num, 1);
-
-        sectors_per_block = s->block_size >> BDRV_SECTOR_BITS;
-        sectors = sectors_per_block - (sector_num % sectors_per_block);
-        if (sectors > nb_sectors) {
-            sectors = nb_sectors;
+    case VHD_DYNAMIC:
+    case VHD_DIFF:
+        if (VHD_DYNAMIC == be32_to_cpu(footer->type)) {
+            isdiff = false;
         }
 
-        if (offset == -1) {
-            offset = alloc_block(bs, sector_num);
-            if (offset < 0)
+        while (nb_sectors > 0) {
+            sectors = sectors_per_block - (sector_num % sectors_per_block);
+            if (sectors > nb_sectors) {
+                sectors = nb_sectors;
+            }
+
+            offset = get_sector_offset(bs, sector_num, 1);
+            if (offset == -1) {
+                offset = alloc_block(bs, sector_num, isdiff);
+                if (offset < 0) {
+                    return -1;
+                }
+            }
+
+            ret = bdrv_pwrite(bs->file, offset, buf,
+                sectors * BDRV_SECTOR_SIZE);
+            if (ret != sectors * BDRV_SECTOR_SIZE) {
                 return -1;
-        }
+            }
 
-        ret = bdrv_pwrite(bs->file, offset, buf, sectors * BDRV_SECTOR_SIZE);
-        if (ret != sectors * BDRV_SECTOR_SIZE) {
-            return -1;
-        }
+            if (true == isdiff) {
+                ret = write_bitmap(bs, sector_num, sectors);
+                if (ret < 0) {
+                    return -1;
+                }
+            }
 
-        nb_sectors -= sectors;
-        sector_num += sectors;
-        buf += sectors * BDRV_SECTOR_SIZE;
+            nb_sectors -= sectors;
+            sector_num += sectors;
+            buf += sectors * BDRV_SECTOR_SIZE;
+        }
+        break;
+    default:
+        return -1;
     }
-
     return 0;
 }
 
@@ -910,6 +1102,7 @@ static BlockDriver bdrv_vpc = {
     .bdrv_close             = vpc_close,
     .bdrv_reopen_prepare    = vpc_reopen_prepare,
     .bdrv_create            = vpc_create,
+    .supports_backing       = true,
 
     .bdrv_read              = vpc_co_read,
     .bdrv_write             = vpc_co_write,
-- 
1.8.3.1

             reply	other threads:[~2014-09-08 14:42 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-09-08 14:41 Xiaodong Gong [this message]
2014-09-08 16:15 ` [Qemu-devel] [PATCH v2] Support vhd type VHD_DIFFERENCING Kevin Wolf
2014-09-09  9:00 ` Philipp Hahn

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1410187299-8463-1-git-send-email-gordon@localhost.localdomain \
    --to=gordongong0350@gmail.com \
    --cc=kwolf@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=stefanha@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.