All of lore.kernel.org
 help / color / mirror / Atom feed
From: Kevin Wolf <kwolf@redhat.com>
To: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
Cc: Anthony Liguori <aliguori@us.ibm.com>,
	Christoph Hellwig <hch@lst.de>,
	qemu-devel@nongnu.org, Avi Kivity <avi@redhat.com>
Subject: [Qemu-devel] Re: [PATCH v4 2/5] qed: Add QEMU Enhanced Disk image format
Date: Fri, 12 Nov 2010 16:43:01 +0100	[thread overview]
Message-ID: <4CDD6085.8020007@redhat.com> (raw)
In-Reply-To: <1288263684-18892-3-git-send-email-stefanha@linux.vnet.ibm.com>

Am 28.10.2010 13:01, schrieb Stefan Hajnoczi:
> This patch introduces the qed on-disk layout and implements image
> creation.  Later patches add read/write and other functionality.
> 
> Signed-off-by: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
> ---
>  Makefile.objs |    1 +
>  block/qed.c   |  548 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  block/qed.h   |  145 +++++++++++++++
>  block_int.h   |    1 +
>  4 files changed, 695 insertions(+), 0 deletions(-)
>  create mode 100644 block/qed.c
>  create mode 100644 block/qed.h
> 
> diff --git a/Makefile.objs b/Makefile.objs
> index f07fb01..7bae72a 100644
> --- a/Makefile.objs
> +++ b/Makefile.objs
> @@ -14,6 +14,7 @@ block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
>  
>  block-nested-y += raw.o cow.o qcow.o vdi.o vmdk.o cloop.o dmg.o bochs.o vpc.o vvfat.o
>  block-nested-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o
> +block-nested-y += qed.o
>  block-nested-y += parallels.o nbd.o blkdebug.o sheepdog.o blkverify.o
>  block-nested-$(CONFIG_WIN32) += raw-win32.o
>  block-nested-$(CONFIG_POSIX) += raw-posix.o
> diff --git a/block/qed.c b/block/qed.c
> new file mode 100644
> index 0000000..8469cf0
> --- /dev/null
> +++ b/block/qed.c
> @@ -0,0 +1,548 @@
> +/*
> + * QEMU Enhanced Disk Format
> + *
> + * Copyright IBM, Corp. 2010
> + *
> + * Authors:
> + *  Stefan Hajnoczi   <stefanha@linux.vnet.ibm.com>
> + *  Anthony Liguori   <aliguori@us.ibm.com>
> + *
> + * This work is licensed under the terms of the GNU LGPL, version 2 or later.
> + * See the COPYING.LIB file in the top-level directory.
> + *
> + */
> +
> +#include "qed.h"
> +
> +static int bdrv_qed_probe(const uint8_t *buf, int buf_size,
> +                          const char *filename)
> +{
> +    const QEDHeader *header = (const QEDHeader *)buf;
> +
> +    if (buf_size < sizeof(*header)) {
> +        return 0;
> +    }
> +    if (le32_to_cpu(header->magic) != QED_MAGIC) {
> +        return 0;
> +    }
> +    return 100;
> +}
> +
> +/**
> + * Check whether an image format is raw
> + *
> + * @fmt:    Backing file format, may be NULL
> + */
> +static bool qed_fmt_is_raw(const char *fmt)
> +{
> +    return fmt && strcmp(fmt, "raw") == 0;
> +}

People shouldn't use them directly, but should we also consider file,
host_device, etc.?

> +
> +static void qed_header_le_to_cpu(const QEDHeader *le, QEDHeader *cpu)
> +{
> +    cpu->magic = le32_to_cpu(le->magic);
> +    cpu->cluster_size = le32_to_cpu(le->cluster_size);
> +    cpu->table_size = le32_to_cpu(le->table_size);
> +    cpu->header_size = le32_to_cpu(le->header_size);
> +    cpu->features = le64_to_cpu(le->features);
> +    cpu->compat_features = le64_to_cpu(le->compat_features);
> +    cpu->autoclear_features = le64_to_cpu(le->autoclear_features);
> +    cpu->l1_table_offset = le64_to_cpu(le->l1_table_offset);
> +    cpu->image_size = le64_to_cpu(le->image_size);
> +    cpu->backing_filename_offset = le32_to_cpu(le->backing_filename_offset);
> +    cpu->backing_filename_size = le32_to_cpu(le->backing_filename_size);
> +}
> +
> +static void qed_header_cpu_to_le(const QEDHeader *cpu, QEDHeader *le)
> +{
> +    le->magic = cpu_to_le32(cpu->magic);
> +    le->cluster_size = cpu_to_le32(cpu->cluster_size);
> +    le->table_size = cpu_to_le32(cpu->table_size);
> +    le->header_size = cpu_to_le32(cpu->header_size);
> +    le->features = cpu_to_le64(cpu->features);
> +    le->compat_features = cpu_to_le64(cpu->compat_features);
> +    le->autoclear_features = cpu_to_le64(cpu->autoclear_features);
> +    le->l1_table_offset = cpu_to_le64(cpu->l1_table_offset);
> +    le->image_size = cpu_to_le64(cpu->image_size);
> +    le->backing_filename_offset = cpu_to_le32(cpu->backing_filename_offset);
> +    le->backing_filename_size = cpu_to_le32(cpu->backing_filename_size);
> +}
> +
> +static int qed_write_header_sync(BDRVQEDState *s)
> +{
> +    QEDHeader le;
> +    int ret;
> +
> +    qed_header_cpu_to_le(&s->header, &le);
> +    ret = bdrv_pwrite(s->bs->file, 0, &le, sizeof(le));
> +    if (ret != sizeof(le)) {
> +        return ret;
> +    }
> +    return 0;
> +}
> +
> +static uint64_t qed_max_image_size(uint32_t cluster_size, uint32_t table_size)
> +{
> +    uint64_t table_entries;
> +    uint64_t l2_size;
> +
> +    table_entries = (table_size * cluster_size) / sizeof(uint64_t);
> +    l2_size = table_entries * cluster_size;
> +
> +    return l2_size * table_entries;
> +}
> +
> +static bool qed_is_cluster_size_valid(uint32_t cluster_size)
> +{
> +    if (cluster_size < QED_MIN_CLUSTER_SIZE ||
> +        cluster_size > QED_MAX_CLUSTER_SIZE) {
> +        return false;
> +    }
> +    if (cluster_size & (cluster_size - 1)) {
> +        return false; /* not power of 2 */
> +    }
> +    return true;
> +}
> +
> +static bool qed_is_table_size_valid(uint32_t table_size)
> +{
> +    if (table_size < QED_MIN_TABLE_SIZE ||
> +        table_size > QED_MAX_TABLE_SIZE) {
> +        return false;
> +    }
> +    if (table_size & (table_size - 1)) {
> +        return false; /* not power of 2 */
> +    }
> +    return true;
> +}
> +
> +static bool qed_is_image_size_valid(uint64_t image_size, uint32_t cluster_size,
> +                                    uint32_t table_size)
> +{
> +    if (image_size % BDRV_SECTOR_SIZE != 0) {
> +        return false; /* not multiple of sector size */
> +    }
> +    if (image_size > qed_max_image_size(cluster_size, table_size)) {
> +        return false; /* image is too large */
> +    }
> +    return true;
> +}
> +
> +/**
> + * Read a string of known length from the image file
> + *
> + * @file:       Image file
> + * @offset:     File offset to start of string, in bytes
> + * @n:          String length in bytes
> + * @buf:        Destination buffer
> + * @buflen:     Destination buffer length in bytes
> + * @ret:        0 on success, -errno on failure
> + *
> + * The string is NUL-terminated.
> + */
> +static int qed_read_string(BlockDriverState *file, uint64_t offset, size_t n,
> +                           char *buf, size_t buflen)
> +{
> +    int ret;
> +    if (n >= buflen) {
> +        return -EINVAL;
> +    }
> +    ret = bdrv_pread(file, offset, buf, n);
> +    if (ret < 0) {
> +        return ret;
> +    }
> +    buf[n] = '\0';
> +    return 0;
> +}
> +
> +static int bdrv_qed_open(BlockDriverState *bs, int flags)
> +{
> +    BDRVQEDState *s = bs->opaque;
> +    QEDHeader le_header;
> +    int64_t file_size;
> +    int ret;
> +
> +    s->bs = bs;
> +
> +    ret = bdrv_pread(bs->file, 0, &le_header, sizeof(le_header));
> +    if (ret < 0) {
> +        return ret;
> +    }
> +    ret = 0; /* ret should always be 0 or -errno */
> +    qed_header_le_to_cpu(&le_header, &s->header);
> +
> +    if (s->header.magic != QED_MAGIC) {
> +        return -ENOENT;
> +    }

ENOENT seems a bit odd for a wrong magic number, especially if it's used
for the error message. Wouldn't EINVAL or ENOTSUP be a closer match?

> +    if (s->header.features & ~QED_FEATURE_MASK) {
> +        return -ENOTSUP; /* image uses unsupported feature bits */
> +    }
> +    if (!qed_is_cluster_size_valid(s->header.cluster_size)) {
> +        return -EINVAL;
> +    }
> +
> +    /* Round down file size to the last cluster */
> +    file_size = bdrv_getlength(bs->file);
> +    if (file_size < 0) {
> +        return file_size;
> +    }
> +    s->file_size = qed_start_of_cluster(s, file_size);
> +
> +    if (!qed_is_table_size_valid(s->header.table_size)) {
> +        return -EINVAL;
> +    }
> +    if (!qed_is_image_size_valid(s->header.image_size,
> +                                 s->header.cluster_size,
> +                                 s->header.table_size)) {
> +        return -EINVAL;
> +    }
> +    if (!qed_check_table_offset(s, s->header.l1_table_offset)) {
> +        return -EINVAL;
> +    }
> +
> +    s->table_nelems = (s->header.cluster_size * s->header.table_size) /
> +                      sizeof(uint64_t);
> +    s->l2_shift = ffs(s->header.cluster_size) - 1;
> +    s->l2_mask = s->table_nelems - 1;
> +    s->l1_shift = s->l2_shift + ffs(s->table_nelems) - 1;
> +
> +    if ((s->header.features & QED_F_BACKING_FILE)) {
> +        ret = qed_read_string(bs->file, s->header.backing_filename_offset,
> +                              s->header.backing_filename_size, bs->backing_file,
> +                              sizeof(bs->backing_file));
> +        if (ret < 0) {
> +            return ret;
> +        }
> +
> +        if (s->header.features & QED_F_BACKING_FORMAT_NO_PROBE) {
> +            pstrcpy(bs->backing_format, sizeof(bs->backing_format), "raw");
> +        }
> +    }
> +
> +    /* Reset unknown autoclear feature bits.  This is a backwards
> +     * compatibility mechanism that allows images to be opened by older
> +     * programs, which "knock out" unknown feature bits.  When an image is
> +     * opened by a newer program again it can detect that the autoclear
> +     * feature is no longer valid.
> +     */
> +    if ((s->header.autoclear_features & ~QED_AUTOCLEAR_FEATURE_MASK) != 0 &&
> +        !bdrv_is_read_only(bs->file)) {
> +        s->header.autoclear_features &= QED_AUTOCLEAR_FEATURE_MASK;
> +
> +        ret = qed_write_header_sync(s);
> +        if (ret) {
> +            return ret;
> +        }
> +
> +        /* From here on only known autoclear feature bits are valid */
> +        bdrv_flush(bs->file);
> +    }
> +
> +    return ret;
> +}
> +
> +static void bdrv_qed_close(BlockDriverState *bs)
> +{
> +}
> +
> +static void bdrv_qed_flush(BlockDriverState *bs)
> +{
> +    bdrv_flush(bs->file);
> +}

This conflicts with one of my recent changes. bdrv_flush should return
an int now.

> +
> +static int qed_create(const char *filename, uint32_t cluster_size,
> +                      uint64_t image_size, uint32_t table_size,
> +                      const char *backing_file, const char *backing_fmt)
> +{
> +    QEDHeader header = {
> +        .magic = QED_MAGIC,
> +        .cluster_size = cluster_size,
> +        .table_size = table_size,
> +        .header_size = 1,
> +        .features = 0,
> +        .compat_features = 0,
> +        .l1_table_offset = cluster_size,
> +        .image_size = image_size,
> +    };
> +    QEDHeader le_header;
> +    uint8_t *l1_table = NULL;
> +    size_t l1_size = header.cluster_size * header.table_size;
> +    int ret = 0;
> +    BlockDriverState *bs = NULL;
> +
> +    ret = bdrv_create_file(filename, NULL);
> +    if (ret < 0) {
> +        return ret;
> +    }
> +
> +    ret = bdrv_file_open(&bs, filename, BDRV_O_RDWR | BDRV_O_CACHE_WB);
> +    if (ret < 0) {
> +        return ret;
> +    }
> +
> +    if (backing_file) {
> +        header.features |= QED_F_BACKING_FILE;
> +        header.backing_filename_offset = sizeof(le_header);
> +        header.backing_filename_size = strlen(backing_file);
> +
> +        if (qed_fmt_is_raw(backing_fmt)) {
> +            header.features |= QED_F_BACKING_FORMAT_NO_PROBE;
> +        }
> +    }
> +
> +    qed_header_cpu_to_le(&header, &le_header);
> +    ret = bdrv_pwrite(bs, 0, &le_header, sizeof(le_header));
> +    if (ret < 0) {
> +        goto out;
> +    }
> +    ret = bdrv_pwrite(bs, sizeof(le_header), backing_file,
> +                      header.backing_filename_size);
> +    if (ret < 0) {
> +        goto out;
> +    }
> +
> +    l1_table = qemu_mallocz(l1_size);
> +    ret = bdrv_pwrite(bs, header.l1_table_offset, l1_table, l1_size);
> +    if (ret < 0) {
> +        goto out;
> +    }
> +
> +    ret = 0; /* success */
> +out:
> +    qemu_free(l1_table);
> +    bdrv_delete(bs);
> +    return ret;
> +}
> +
> +static int bdrv_qed_create(const char *filename, QEMUOptionParameter *options)
> +{
> +    uint64_t image_size = 0;
> +    uint32_t cluster_size = QED_DEFAULT_CLUSTER_SIZE;
> +    uint32_t table_size = QED_DEFAULT_TABLE_SIZE;
> +    const char *backing_file = NULL;
> +    const char *backing_fmt = NULL;
> +
> +    while (options && options->name) {
> +        if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
> +            image_size = options->value.n;
> +        } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FILE)) {
> +            backing_file = options->value.s;
> +        } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FMT)) {
> +            backing_fmt = options->value.s;

I'm not sure here. It doesn't really matter for QED, but I find it
strange that -o backing_fmt=foobar works and gives you a non-raw backing
file. Should we check if the format exists at least?

Kevin

  reply	other threads:[~2010-11-12 15:42 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-10-28 11:01 [Qemu-devel] [PATCH v4 0/5] qed: Add QEMU Enhanced Disk format Stefan Hajnoczi
2010-10-28 11:01 ` [Qemu-devel] [PATCH v4 1/5] docs: Add QED image format specification Stefan Hajnoczi
2010-11-12 13:58   ` [Qemu-devel] " Kevin Wolf
2010-11-12 14:04     ` Stefan Hajnoczi
2010-10-28 11:01 ` [Qemu-devel] [PATCH v4 2/5] qed: Add QEMU Enhanced Disk image format Stefan Hajnoczi
2010-11-12 15:43   ` Kevin Wolf [this message]
2010-11-12 16:34     ` [Qemu-devel] " Stefan Hajnoczi
2010-11-12 16:52       ` Kevin Wolf
2010-11-12 17:24         ` Markus Armbruster
2010-11-12 17:34           ` Kevin Wolf
2010-10-28 11:01 ` [Qemu-devel] [PATCH v4 3/5] qed: Table, L2 cache, and cluster functions Stefan Hajnoczi
2010-11-12 17:26   ` [Qemu-devel] " Kevin Wolf
2010-11-12 17:40     ` Stefan Hajnoczi
2010-10-28 11:01 ` [Qemu-devel] [PATCH v4 4/5] qed: Read/write support Stefan Hajnoczi
2010-10-28 11:01 ` [Qemu-devel] [PATCH v4 5/5] qed: Consistency check support Stefan Hajnoczi

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4CDD6085.8020007@redhat.com \
    --to=kwolf@redhat.com \
    --cc=aliguori@us.ibm.com \
    --cc=avi@redhat.com \
    --cc=hch@lst.de \
    --cc=qemu-devel@nongnu.org \
    --cc=stefanha@linux.vnet.ibm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.