All of lore.kernel.org
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH] sheepdog: add data preallocation support
@ 2011-05-21 12:35 MORITA Kazutaka
  2011-05-23  9:19 ` Stefan Hajnoczi
  2011-07-01  8:29 ` Kevin Wolf
  0 siblings, 2 replies; 10+ messages in thread
From: MORITA Kazutaka @ 2011-05-21 12:35 UTC (permalink / raw)
  To: kwolf; +Cc: sheepdog, qemu-devel

This introduces a qemu-img create option for sheepdog which allows the
data to be preallocated (note that sheepdog always preallocates
metadata).  This is necessary to use Sheepdog volumes as a backend
storage for iSCSI target.  More information is available at
https://sourceforge.net/apps/trac/sheepdog/wiki/General%20Protocol%20Support

The option is disabled by default and you need to enable it like the
following:

qemu-img create sheepdog:test -o preallocation=data 1G

Signed-off-by: MORITA Kazutaka <morita.kazutaka@lab.ntt.co.jp>
Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
---
 block/sheepdog.c |   73 +++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 files changed, 72 insertions(+), 1 deletions(-)

diff --git a/block/sheepdog.c b/block/sheepdog.c
index 0392ca8..38ca9aa 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -1292,6 +1292,57 @@ static int do_sd_create(char *filename, int64_t vdi_size,
     return 0;
 }
 
+static int sd_prealloc(uint32_t vid, int64_t vdi_size)
+{
+    int fd, ret;
+    SheepdogInode *inode;
+    char *buf;
+    unsigned long idx, max_idx;
+
+    fd = connect_to_sdog(NULL, NULL);
+    if (fd < 0) {
+        return -EIO;
+    }
+
+    inode = qemu_malloc(sizeof(*inode));
+    buf = qemu_malloc(SD_DATA_OBJ_SIZE);
+
+    ret = read_object(fd, (char *)inode, vid_to_vdi_oid(vid),
+                      0, sizeof(*inode), 0);
+
+    max_idx = (vdi_size + SD_DATA_OBJ_SIZE - 1) / SD_DATA_OBJ_SIZE;
+
+    for (idx = 0; idx < max_idx; idx++) {
+        uint64_t oid;
+        oid = vid_to_data_oid(vid, idx);
+
+        if (inode->data_vdi_id[idx]) {
+            ret = read_object(fd, buf, vid_to_vdi_oid(inode->data_vdi_id[idx]),
+                              1, SD_DATA_OBJ_SIZE, 0);
+            if (ret)
+                goto out;
+        } else {
+            memset(buf, 0, SD_DATA_OBJ_SIZE);
+        }
+
+        ret = write_object(fd, buf, oid, 1, SD_DATA_OBJ_SIZE, 0, 1);
+        if (ret)
+            goto out;
+
+        inode->data_vdi_id[idx] = vid;
+        ret = write_object(fd, (char *)inode, vid_to_vdi_oid(vid),
+                           1, sizeof(*inode), 0, 0);
+        if (ret)
+            goto out;
+    }
+out:
+    free(inode);
+    free(buf);
+    closesocket(fd);
+
+    return ret;
+}
+
 static int sd_create(const char *filename, QEMUOptionParameter *options)
 {
     int ret;
@@ -1301,6 +1352,7 @@ static int sd_create(const char *filename, QEMUOptionParameter *options)
     BDRVSheepdogState s;
     char vdi[SD_MAX_VDI_LEN], tag[SD_MAX_VDI_TAG_LEN];
     uint32_t snapid;
+    int prealloc = 0;
 
     strstart(filename, "sheepdog:", (const char **)&filename);
 
@@ -1317,6 +1369,16 @@ static int sd_create(const char *filename, QEMUOptionParameter *options)
             vdi_size = options->value.n;
         } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FILE)) {
             backing_file = options->value.s;
+        } else if (!strcmp(options->name, BLOCK_OPT_PREALLOC)) {
+            if (!options->value.s || !strcmp(options->value.s, "off")) {
+                prealloc = 0;
+            } else if (!strcmp(options->value.s, "data")) {
+                prealloc = 1;
+            } else {
+                error_report("Invalid preallocation mode: '%s'\n",
+                    options->value.s);
+                return -EINVAL;
+            }
         }
         options++;
     }
@@ -1354,7 +1416,11 @@ static int sd_create(const char *filename, QEMUOptionParameter *options)
         bdrv_delete(bs);
     }
 
-    return do_sd_create((char *)vdi, vdi_size, base_vid, &vid, 0, s.addr, s.port);
+    ret = do_sd_create((char *)vdi, vdi_size, base_vid, &vid, 0, s.addr, s.port);
+    if (!prealloc || ret)
+        return ret;
+
+    return sd_prealloc(vid, vdi_size);
 }
 
 static void sd_close(BlockDriverState *bs)
@@ -1990,6 +2056,11 @@ static QEMUOptionParameter sd_create_options[] = {
         .type = OPT_STRING,
         .help = "File name of a base image"
     },
+    {
+        .name = BLOCK_OPT_PREALLOC,
+        .type = OPT_STRING,
+        .help = "Preallocation mode (allowed values: off, data)"
+    },
     { NULL }
 };
 
-- 
1.5.6.5

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [Qemu-devel] [PATCH] sheepdog: add data preallocation support
  2011-05-21 12:35 [Qemu-devel] [PATCH] sheepdog: add data preallocation support MORITA Kazutaka
@ 2011-05-23  9:19 ` Stefan Hajnoczi
  2011-05-23 11:13   ` MORITA Kazutaka
  2011-07-01  8:29 ` Kevin Wolf
  1 sibling, 1 reply; 10+ messages in thread
From: Stefan Hajnoczi @ 2011-05-23  9:19 UTC (permalink / raw)
  To: MORITA Kazutaka; +Cc: kwolf, sheepdog, qemu-devel

On Sat, May 21, 2011 at 1:35 PM, MORITA Kazutaka
<morita.kazutaka@lab.ntt.co.jp> wrote:
> +static int sd_prealloc(uint32_t vid, int64_t vdi_size)
> +{
> +    int fd, ret;
> +    SheepdogInode *inode;
> +    char *buf;
> +    unsigned long idx, max_idx;
[...]
> +    max_idx = (vdi_size + SD_DATA_OBJ_SIZE - 1) / SD_DATA_OBJ_SIZE;
> +
> +    for (idx = 0; idx < max_idx; idx++) {

Do you want to use uint64_t here instead of unsigned long, which may
be too small on 32-bit hosts?

Stefan

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [Qemu-devel] [PATCH] sheepdog: add data preallocation support
  2011-05-23  9:19 ` Stefan Hajnoczi
@ 2011-05-23 11:13   ` MORITA Kazutaka
  2011-05-23 13:23     ` Stefan Hajnoczi
  0 siblings, 1 reply; 10+ messages in thread
From: MORITA Kazutaka @ 2011-05-23 11:13 UTC (permalink / raw)
  To: Stefan Hajnoczi; +Cc: kwolf, sheepdog, qemu-devel, MORITA Kazutaka

At Mon, 23 May 2011 10:19:13 +0100,
Stefan Hajnoczi wrote:
> 
> On Sat, May 21, 2011 at 1:35 PM, MORITA Kazutaka
> <morita.kazutaka@lab.ntt.co.jp> wrote:
> > +static int sd_prealloc(uint32_t vid, int64_t vdi_size)
> > +{
> > +    int fd, ret;
> > +    SheepdogInode *inode;
> > +    char *buf;
> > +    unsigned long idx, max_idx;
> [...]
> > +    max_idx = (vdi_size + SD_DATA_OBJ_SIZE - 1) / SD_DATA_OBJ_SIZE;
> > +
> > +    for (idx = 0; idx < max_idx; idx++) {
> 
> Do you want to use uint64_t here instead of unsigned long, which may
> be too small on 32-bit hosts?

The index of a Sheepdog data object is within 32-bit range, so using
an unsigned long is safe here.

Thanks,

Kazutaka

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [Qemu-devel] [PATCH] sheepdog: add data preallocation support
  2011-05-23 11:13   ` MORITA Kazutaka
@ 2011-05-23 13:23     ` Stefan Hajnoczi
  0 siblings, 0 replies; 10+ messages in thread
From: Stefan Hajnoczi @ 2011-05-23 13:23 UTC (permalink / raw)
  To: MORITA Kazutaka; +Cc: kwolf, sheepdog, qemu-devel

On Mon, May 23, 2011 at 12:13 PM, MORITA Kazutaka
<morita.kazutaka@lab.ntt.co.jp> wrote:
> At Mon, 23 May 2011 10:19:13 +0100,
> Stefan Hajnoczi wrote:
>>
>> On Sat, May 21, 2011 at 1:35 PM, MORITA Kazutaka
>> <morita.kazutaka@lab.ntt.co.jp> wrote:
>> > +static int sd_prealloc(uint32_t vid, int64_t vdi_size)
>> > +{
>> > +    int fd, ret;
>> > +    SheepdogInode *inode;
>> > +    char *buf;
>> > +    unsigned long idx, max_idx;
>> [...]
>> > +    max_idx = (vdi_size + SD_DATA_OBJ_SIZE - 1) / SD_DATA_OBJ_SIZE;
>> > +
>> > +    for (idx = 0; idx < max_idx; idx++) {
>>
>> Do you want to use uint64_t here instead of unsigned long, which may
>> be too small on 32-bit hosts?
>
> The index of a Sheepdog data object is within 32-bit range, so using
> an unsigned long is safe here.

You are right:

#define MAX_DATA_OBJS (UINT64_C(1) << 20)
#define SD_DATA_OBJ_SIZE (UINT64_C(1) << 22)
#define SD_MAX_VDI_SIZE (SD_DATA_OBJ_SIZE * MAX_DATA_OBJS)

So the max_idx is MAX_DATA_OBJS, which is <32-bit.  It just looked suspicious.

Stefan

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [Qemu-devel] [PATCH] sheepdog: add data preallocation support
  2011-05-21 12:35 [Qemu-devel] [PATCH] sheepdog: add data preallocation support MORITA Kazutaka
  2011-05-23  9:19 ` Stefan Hajnoczi
@ 2011-07-01  8:29 ` Kevin Wolf
  2011-07-05 18:21   ` MORITA Kazutaka
  2011-07-05 18:38   ` [Qemu-devel] [PATCH v2] sheepdog: add full " MORITA Kazutaka
  1 sibling, 2 replies; 10+ messages in thread
From: Kevin Wolf @ 2011-07-01  8:29 UTC (permalink / raw)
  To: MORITA Kazutaka; +Cc: sheepdog, qemu-devel

Am 21.05.2011 14:35, schrieb MORITA Kazutaka:
> This introduces a qemu-img create option for sheepdog which allows the
> data to be preallocated (note that sheepdog always preallocates
> metadata).  This is necessary to use Sheepdog volumes as a backend
> storage for iSCSI target.  More information is available at
> https://sourceforge.net/apps/trac/sheepdog/wiki/General%20Protocol%20Support
> 
> The option is disabled by default and you need to enable it like the
> following:
> 
> qemu-img create sheepdog:test -o preallocation=data 1G
> 
> Signed-off-by: MORITA Kazutaka <morita.kazutaka@lab.ntt.co.jp>
> Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>

Hm, looks like I forgot about this patch and nobody pinged me...

>  block/sheepdog.c |   73 +++++++++++++++++++++++++++++++++++++++++++++++++++++-
>  1 files changed, 72 insertions(+), 1 deletions(-)
> 
> diff --git a/block/sheepdog.c b/block/sheepdog.c
> index 0392ca8..38ca9aa 100644
> --- a/block/sheepdog.c
> +++ b/block/sheepdog.c
> @@ -1292,6 +1292,57 @@ static int do_sd_create(char *filename, int64_t vdi_size,
>      return 0;
>  }
>  
> +static int sd_prealloc(uint32_t vid, int64_t vdi_size)

General comment to this function: Wouldn't it be easier to call the
existing bdrv_write function in a loop instead of reimplementing the
write manually? Though there may be a reason for it that I'm just missing.

> +{
> +    int fd, ret;
> +    SheepdogInode *inode;
> +    char *buf;
> +    unsigned long idx, max_idx;
> +
> +    fd = connect_to_sdog(NULL, NULL);
> +    if (fd < 0) {
> +        return -EIO;
> +    }
> +
> +    inode = qemu_malloc(sizeof(*inode));
> +    buf = qemu_malloc(SD_DATA_OBJ_SIZE);
> +
> +    ret = read_object(fd, (char *)inode, vid_to_vdi_oid(vid),
> +                      0, sizeof(*inode), 0);

No error handling?

> +
> +    max_idx = (vdi_size + SD_DATA_OBJ_SIZE - 1) / SD_DATA_OBJ_SIZE;
> +
> +    for (idx = 0; idx < max_idx; idx++) {
> +        uint64_t oid;
> +        oid = vid_to_data_oid(vid, idx);
> +
> +        if (inode->data_vdi_id[idx]) {
> +            ret = read_object(fd, buf, vid_to_vdi_oid(inode->data_vdi_id[idx]),
> +                              1, SD_DATA_OBJ_SIZE, 0);
> +            if (ret)
> +                goto out;

Missing braces.

Also, what is this if branch doing? Is it to ensure that we don't
overwrite existing data? But then, isn't an image always empty when we
preallocate it?

> +        } else {
> +            memset(buf, 0, SD_DATA_OBJ_SIZE);
> +        }
> +
> +        ret = write_object(fd, buf, oid, 1, SD_DATA_OBJ_SIZE, 0, 1);
> +        if (ret)
> +            goto out;

Braces

> +
> +        inode->data_vdi_id[idx] = vid;
> +        ret = write_object(fd, (char *)inode, vid_to_vdi_oid(vid),
> +                           1, sizeof(*inode), 0, 0);
> +        if (ret)
> +            goto out;

Same here

> +    }
> +out:
> +    free(inode);
> +    free(buf);
> +    closesocket(fd);
> +
> +    return ret;
> +}
> +
>  static int sd_create(const char *filename, QEMUOptionParameter *options)
>  {
>      int ret;
> @@ -1301,6 +1352,7 @@ static int sd_create(const char *filename, QEMUOptionParameter *options)
>      BDRVSheepdogState s;
>      char vdi[SD_MAX_VDI_LEN], tag[SD_MAX_VDI_TAG_LEN];
>      uint32_t snapid;
> +    int prealloc = 0;
>  
>      strstart(filename, "sheepdog:", (const char **)&filename);
>  
> @@ -1317,6 +1369,16 @@ static int sd_create(const char *filename, QEMUOptionParameter *options)
>              vdi_size = options->value.n;
>          } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FILE)) {
>              backing_file = options->value.s;
> +        } else if (!strcmp(options->name, BLOCK_OPT_PREALLOC)) {
> +            if (!options->value.s || !strcmp(options->value.s, "off")) {
> +                prealloc = 0;
> +            } else if (!strcmp(options->value.s, "data")) {
> +                prealloc = 1;
> +            } else {
> +                error_report("Invalid preallocation mode: '%s'\n",
> +                    options->value.s);
> +                return -EINVAL;
> +            }
>          }
>          options++;
>      }
> @@ -1354,7 +1416,11 @@ static int sd_create(const char *filename, QEMUOptionParameter *options)
>          bdrv_delete(bs);
>      }
>  
> -    return do_sd_create((char *)vdi, vdi_size, base_vid, &vid, 0, s.addr, s.port);
> +    ret = do_sd_create((char *)vdi, vdi_size, base_vid, &vid, 0, s.addr, s.port);
> +    if (!prealloc || ret)
> +        return ret;

And here.

> +
> +    return sd_prealloc(vid, vdi_size);
>  }
>  
>  static void sd_close(BlockDriverState *bs)
> @@ -1990,6 +2056,11 @@ static QEMUOptionParameter sd_create_options[] = {
>          .type = OPT_STRING,
>          .help = "File name of a base image"
>      },
> +    {
> +        .name = BLOCK_OPT_PREALLOC,
> +        .type = OPT_STRING,
> +        .help = "Preallocation mode (allowed values: off, data)"
> +    },
>      { NULL }
>  };

In my RFC for qcow2, I called this preallocation mode "full" instead of
"data". I don't mind much which we pick, but we should keep it
consistent. Any preferences?

Kevin

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [Qemu-devel] [PATCH] sheepdog: add data preallocation support
  2011-07-01  8:29 ` Kevin Wolf
@ 2011-07-05 18:21   ` MORITA Kazutaka
  2011-07-06  7:53     ` Kevin Wolf
  2011-07-05 18:38   ` [Qemu-devel] [PATCH v2] sheepdog: add full " MORITA Kazutaka
  1 sibling, 1 reply; 10+ messages in thread
From: MORITA Kazutaka @ 2011-07-05 18:21 UTC (permalink / raw)
  To: Kevin Wolf; +Cc: sheepdog, qemu-devel, MORITA Kazutaka

At Fri, 01 Jul 2011 10:29:09 +0200,
Kevin Wolf wrote:
> 
> Am 21.05.2011 14:35, schrieb MORITA Kazutaka:
> > This introduces a qemu-img create option for sheepdog which allows the
> > data to be preallocated (note that sheepdog always preallocates
> > metadata).  This is necessary to use Sheepdog volumes as a backend
> > storage for iSCSI target.  More information is available at
> > https://sourceforge.net/apps/trac/sheepdog/wiki/General%20Protocol%20Support
> > 
> > The option is disabled by default and you need to enable it like the
> > following:
> > 
> > qemu-img create sheepdog:test -o preallocation=data 1G
> > 
> > Signed-off-by: MORITA Kazutaka <morita.kazutaka@lab.ntt.co.jp>
> > Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
> 
> Hm, looks like I forgot about this patch and nobody pinged me...
> 
> >  block/sheepdog.c |   73 +++++++++++++++++++++++++++++++++++++++++++++++++++++-
> >  1 files changed, 72 insertions(+), 1 deletions(-)
> > 
> > diff --git a/block/sheepdog.c b/block/sheepdog.c
> > index 0392ca8..38ca9aa 100644
> > --- a/block/sheepdog.c
> > +++ b/block/sheepdog.c
> > @@ -1292,6 +1292,57 @@ static int do_sd_create(char *filename, int64_t vdi_size,
> >      return 0;
> >  }
> >  
> > +static int sd_prealloc(uint32_t vid, int64_t vdi_size)
> 
> General comment to this function: Wouldn't it be easier to call the
> existing bdrv_write function in a loop instead of reimplementing the
> write manually? Though there may be a reason for it that I'm just missing.

Yes, it's much easier!  I'll rewrite this function with those, thanks.

> 
> > +{
> > +    int fd, ret;
> > +    SheepdogInode *inode;
> > +    char *buf;
> > +    unsigned long idx, max_idx;
> > +
> > +    fd = connect_to_sdog(NULL, NULL);
> > +    if (fd < 0) {
> > +        return -EIO;
> > +    }
> > +
> > +    inode = qemu_malloc(sizeof(*inode));
> > +    buf = qemu_malloc(SD_DATA_OBJ_SIZE);
> > +
> > +    ret = read_object(fd, (char *)inode, vid_to_vdi_oid(vid),
> > +                      0, sizeof(*inode), 0);
> 
> No error handling?
> 
> > +
> > +    max_idx = (vdi_size + SD_DATA_OBJ_SIZE - 1) / SD_DATA_OBJ_SIZE;
> > +
> > +    for (idx = 0; idx < max_idx; idx++) {
> > +        uint64_t oid;
> > +        oid = vid_to_data_oid(vid, idx);
> > +
> > +        if (inode->data_vdi_id[idx]) {
> > +            ret = read_object(fd, buf, vid_to_vdi_oid(inode->data_vdi_id[idx]),
> > +                              1, SD_DATA_OBJ_SIZE, 0);
> > +            if (ret)
> > +                goto out;
> 
> Missing braces.
> 
> Also, what is this if branch doing? Is it to ensure that we don't
> overwrite existing data? But then, isn't an image always empty when we
> preallocate it?

This branch is for handling a cloned image, which is created with -b
option.  This branch reads data from the backing file (read_object
returns zero when it succeeds) instead of filling buffer with zero.

> 
> > +        } else {
> > +            memset(buf, 0, SD_DATA_OBJ_SIZE);
> > +        }
> > +
> > +        ret = write_object(fd, buf, oid, 1, SD_DATA_OBJ_SIZE, 0, 1);
> > +        if (ret)
> > +            goto out;
> 
> Braces
> 
> > +
> > +        inode->data_vdi_id[idx] = vid;
> > +        ret = write_object(fd, (char *)inode, vid_to_vdi_oid(vid),
> > +                           1, sizeof(*inode), 0, 0);
> > +        if (ret)
> > +            goto out;
> 
> Same here
> 
> > +    }
> > +out:
> > +    free(inode);
> > +    free(buf);
> > +    closesocket(fd);
> > +
> > +    return ret;
> > +}
> > +
> >  static int sd_create(const char *filename, QEMUOptionParameter *options)
> >  {
> >      int ret;
> > @@ -1301,6 +1352,7 @@ static int sd_create(const char *filename, QEMUOptionParameter *options)
> >      BDRVSheepdogState s;
> >      char vdi[SD_MAX_VDI_LEN], tag[SD_MAX_VDI_TAG_LEN];
> >      uint32_t snapid;
> > +    int prealloc = 0;
> >  
> >      strstart(filename, "sheepdog:", (const char **)&filename);
> >  
> > @@ -1317,6 +1369,16 @@ static int sd_create(const char *filename, QEMUOptionParameter *options)
> >              vdi_size = options->value.n;
> >          } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FILE)) {
> >              backing_file = options->value.s;
> > +        } else if (!strcmp(options->name, BLOCK_OPT_PREALLOC)) {
> > +            if (!options->value.s || !strcmp(options->value.s, "off")) {
> > +                prealloc = 0;
> > +            } else if (!strcmp(options->value.s, "data")) {
> > +                prealloc = 1;
> > +            } else {
> > +                error_report("Invalid preallocation mode: '%s'\n",
> > +                    options->value.s);
> > +                return -EINVAL;
> > +            }
> >          }
> >          options++;
> >      }
> > @@ -1354,7 +1416,11 @@ static int sd_create(const char *filename, QEMUOptionParameter *options)
> >          bdrv_delete(bs);
> >      }
> >  
> > -    return do_sd_create((char *)vdi, vdi_size, base_vid, &vid, 0, s.addr, s.port);
> > +    ret = do_sd_create((char *)vdi, vdi_size, base_vid, &vid, 0, s.addr, s.port);
> > +    if (!prealloc || ret)
> > +        return ret;
> 
> And here.
> 
> > +
> > +    return sd_prealloc(vid, vdi_size);
> >  }
> >  
> >  static void sd_close(BlockDriverState *bs)
> > @@ -1990,6 +2056,11 @@ static QEMUOptionParameter sd_create_options[] = {
> >          .type = OPT_STRING,
> >          .help = "File name of a base image"
> >      },
> > +    {
> > +        .name = BLOCK_OPT_PREALLOC,
> > +        .type = OPT_STRING,
> > +        .help = "Preallocation mode (allowed values: off, data)"
> > +    },
> >      { NULL }
> >  };
> 
> In my RFC for qcow2, I called this preallocation mode "full" instead of
> "data". I don't mind much which we pick, but we should keep it
> consistent. Any preferences?

I don't mind too.  I'll use "full" in the v2 patch.


Thanks,

Kazutaka

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [Qemu-devel] [PATCH v2] sheepdog: add full data preallocation support
  2011-07-01  8:29 ` Kevin Wolf
  2011-07-05 18:21   ` MORITA Kazutaka
@ 2011-07-05 18:38   ` MORITA Kazutaka
  2011-07-06 11:18     ` Kevin Wolf
  1 sibling, 1 reply; 10+ messages in thread
From: MORITA Kazutaka @ 2011-07-05 18:38 UTC (permalink / raw)
  To: kwolf; +Cc: sheepdog, qemu-devel

This introduces qemu-img create option for sheepdog which allows the
data to be fully preallocated (note that sheepdog always preallocates
metadata).

The option is disabled by default and you need to enable it like the
following:

qemu-img create sheepdog:test -o preallocation=full 1G

Signed-off-by: MORITA Kazutaka <morita.kazutaka@lab.ntt.co.jp>
Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
---
 block/sheepdog.c |   71 +++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 files changed, 68 insertions(+), 3 deletions(-)

diff --git a/block/sheepdog.c b/block/sheepdog.c
index 0392ca8..5b1eb57 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -1292,6 +1292,49 @@ static int do_sd_create(char *filename, int64_t vdi_size,
     return 0;
 }
 
+static int sd_prealloc(const char *filename)
+{
+    BlockDriverState *bs = NULL;
+    uint32_t idx, max_idx;
+    int64_t vdi_size;
+    void *buf = qemu_mallocz(SD_DATA_OBJ_SIZE);
+    int ret;
+
+    ret = bdrv_file_open(&bs, filename, BDRV_O_RDWR);
+    if (ret < 0) {
+        goto out;
+    }
+
+    vdi_size = bdrv_getlength(bs);
+    if (vdi_size < 0) {
+        ret = vdi_size;
+        goto out;
+    }
+    max_idx = DIV_ROUND_UP(vdi_size, SD_DATA_OBJ_SIZE);
+
+    for (idx = 0; idx < max_idx; idx++) {
+        /*
+         * The created image can be a cloned image, so we need to read
+         * a data from the source image.
+         */
+        ret = bdrv_pread(bs, idx * SD_DATA_OBJ_SIZE, buf, SD_DATA_OBJ_SIZE);
+        if (ret < 0) {
+            goto out;
+        }
+        ret = bdrv_pwrite(bs, idx * SD_DATA_OBJ_SIZE, buf, SD_DATA_OBJ_SIZE);
+        if (ret < 0) {
+            goto out;
+        }
+    }
+out:
+    if (bs) {
+        bdrv_delete(bs);
+    }
+    qemu_free(buf);
+
+    return ret;
+}
+
 static int sd_create(const char *filename, QEMUOptionParameter *options)
 {
     int ret;
@@ -1301,13 +1344,15 @@ static int sd_create(const char *filename, QEMUOptionParameter *options)
     BDRVSheepdogState s;
     char vdi[SD_MAX_VDI_LEN], tag[SD_MAX_VDI_TAG_LEN];
     uint32_t snapid;
+    int prealloc = 0;
+    const char *vdiname;
 
-    strstart(filename, "sheepdog:", (const char **)&filename);
+    strstart(filename, "sheepdog:", &vdiname);
 
     memset(&s, 0, sizeof(s));
     memset(vdi, 0, sizeof(vdi));
     memset(tag, 0, sizeof(tag));
-    if (parse_vdiname(&s, filename, vdi, &snapid, tag) < 0) {
+    if (parse_vdiname(&s, vdiname, vdi, &snapid, tag) < 0) {
         error_report("invalid filename\n");
         return -EINVAL;
     }
@@ -1317,6 +1362,16 @@ static int sd_create(const char *filename, QEMUOptionParameter *options)
             vdi_size = options->value.n;
         } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FILE)) {
             backing_file = options->value.s;
+        } else if (!strcmp(options->name, BLOCK_OPT_PREALLOC)) {
+            if (!options->value.s || !strcmp(options->value.s, "off")) {
+                prealloc = 0;
+            } else if (!strcmp(options->value.s, "full")) {
+                prealloc = 1;
+            } else {
+                error_report("Invalid preallocation mode: '%s'",
+                             options->value.s);
+                return -EINVAL;
+            }
         }
         options++;
     }
@@ -1354,7 +1409,12 @@ static int sd_create(const char *filename, QEMUOptionParameter *options)
         bdrv_delete(bs);
     }
 
-    return do_sd_create((char *)vdi, vdi_size, base_vid, &vid, 0, s.addr, s.port);
+    ret = do_sd_create(vdi, vdi_size, base_vid, &vid, 0, s.addr, s.port);
+    if (!prealloc || ret) {
+        return ret;
+    }
+
+    return sd_prealloc(filename);
 }
 
 static void sd_close(BlockDriverState *bs)
@@ -1990,6 +2050,11 @@ static QEMUOptionParameter sd_create_options[] = {
         .type = OPT_STRING,
         .help = "File name of a base image"
     },
+    {
+        .name = BLOCK_OPT_PREALLOC,
+        .type = OPT_STRING,
+        .help = "Preallocation mode (allowed values: off, full)"
+    },
     { NULL }
 };
 
-- 
1.7.2.5

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [Qemu-devel] [PATCH] sheepdog: add data preallocation support
  2011-07-05 18:21   ` MORITA Kazutaka
@ 2011-07-06  7:53     ` Kevin Wolf
  2011-07-08 11:06       ` [Qemu-devel] [Sheepdog] " MORITA Kazutaka
  0 siblings, 1 reply; 10+ messages in thread
From: Kevin Wolf @ 2011-07-06  7:53 UTC (permalink / raw)
  To: MORITA Kazutaka; +Cc: sheepdog, qemu-devel

Am 05.07.2011 20:21, schrieb MORITA Kazutaka:
>>> +
>>> +    max_idx = (vdi_size + SD_DATA_OBJ_SIZE - 1) / SD_DATA_OBJ_SIZE;
>>> +
>>> +    for (idx = 0; idx < max_idx; idx++) {
>>> +        uint64_t oid;
>>> +        oid = vid_to_data_oid(vid, idx);
>>> +
>>> +        if (inode->data_vdi_id[idx]) {
>>> +            ret = read_object(fd, buf, vid_to_vdi_oid(inode->data_vdi_id[idx]),
>>> +                              1, SD_DATA_OBJ_SIZE, 0);
>>> +            if (ret)
>>> +                goto out;
>>
>> Missing braces.
>>
>> Also, what is this if branch doing? Is it to ensure that we don't
>> overwrite existing data? But then, isn't an image always empty when we
>> preallocate it?
> 
> This branch is for handling a cloned image, which is created with -b
> option.  This branch reads data from the backing file (read_object
> returns zero when it succeeds) instead of filling buffer with zero.

Oh, I see. You support preallocation even with backing files. And
suddenly it makes perfect sense. :-)

(Although after completing preallocation, you won't need the backing
file any more as all of it has been copied into the image. Maybe we
should drop the reference then?)

>> In my RFC for qcow2, I called this preallocation mode "full" instead of
>> "data". I don't mind much which we pick, but we should keep it
>> consistent. Any preferences?
> 
> I don't mind too.  I'll use "full" in the v2 patch.

Great, thanks.

Kevin

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [Qemu-devel] [PATCH v2] sheepdog: add full data preallocation support
  2011-07-05 18:38   ` [Qemu-devel] [PATCH v2] sheepdog: add full " MORITA Kazutaka
@ 2011-07-06 11:18     ` Kevin Wolf
  0 siblings, 0 replies; 10+ messages in thread
From: Kevin Wolf @ 2011-07-06 11:18 UTC (permalink / raw)
  To: MORITA Kazutaka; +Cc: sheepdog, qemu-devel

Am 05.07.2011 20:38, schrieb MORITA Kazutaka:
> This introduces qemu-img create option for sheepdog which allows the
> data to be fully preallocated (note that sheepdog always preallocates
> metadata).
> 
> The option is disabled by default and you need to enable it like the
> following:
> 
> qemu-img create sheepdog:test -o preallocation=full 1G
> 
> Signed-off-by: MORITA Kazutaka <morita.kazutaka@lab.ntt.co.jp>
> Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>

Thanks, applied to the block branch.

Kevin

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [Qemu-devel] [Sheepdog] [PATCH] sheepdog: add data preallocation support
  2011-07-06  7:53     ` Kevin Wolf
@ 2011-07-08 11:06       ` MORITA Kazutaka
  0 siblings, 0 replies; 10+ messages in thread
From: MORITA Kazutaka @ 2011-07-08 11:06 UTC (permalink / raw)
  To: Kevin Wolf; +Cc: sheepdog, qemu-devel, MORITA Kazutaka

At Wed, 06 Jul 2011 09:53:32 +0200,
Kevin Wolf wrote:
> 
> Am 05.07.2011 20:21, schrieb MORITA Kazutaka:
> >>> +
> >>> +    max_idx = (vdi_size + SD_DATA_OBJ_SIZE - 1) / SD_DATA_OBJ_SIZE;
> >>> +
> >>> +    for (idx = 0; idx < max_idx; idx++) {
> >>> +        uint64_t oid;
> >>> +        oid = vid_to_data_oid(vid, idx);
> >>> +
> >>> +        if (inode->data_vdi_id[idx]) {
> >>> +            ret = read_object(fd, buf, vid_to_vdi_oid(inode->data_vdi_id[idx]),
> >>> +                              1, SD_DATA_OBJ_SIZE, 0);
> >>> +            if (ret)
> >>> +                goto out;
> >>
> >> Missing braces.
> >>
> >> Also, what is this if branch doing? Is it to ensure that we don't
> >> overwrite existing data? But then, isn't an image always empty when we
> >> preallocate it?
> > 
> > This branch is for handling a cloned image, which is created with -b
> > option.  This branch reads data from the backing file (read_object
> > returns zero when it succeeds) instead of filling buffer with zero.
> 
> Oh, I see. You support preallocation even with backing files. And
> suddenly it makes perfect sense. :-)
> 
> (Although after completing preallocation, you won't need the backing
> file any more as all of it has been copied into the image. Maybe we
> should drop the reference then?)

Though we can drop it, Sheepdog uses the reference to show the VM
image relationships in a tree format as VMware does.  So as far as a
Sheepdog protocol is concerned, I think we should keep it.


Thanks,

Kazutaka

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2011-07-08 11:04 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-05-21 12:35 [Qemu-devel] [PATCH] sheepdog: add data preallocation support MORITA Kazutaka
2011-05-23  9:19 ` Stefan Hajnoczi
2011-05-23 11:13   ` MORITA Kazutaka
2011-05-23 13:23     ` Stefan Hajnoczi
2011-07-01  8:29 ` Kevin Wolf
2011-07-05 18:21   ` MORITA Kazutaka
2011-07-06  7:53     ` Kevin Wolf
2011-07-08 11:06       ` [Qemu-devel] [Sheepdog] " MORITA Kazutaka
2011-07-05 18:38   ` [Qemu-devel] [PATCH v2] sheepdog: add full " MORITA Kazutaka
2011-07-06 11:18     ` Kevin Wolf

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.