* [Qemu-devel] [RFC PATCH v4 0/4] qemu-img: add preallocation=full
@ 2013-12-27 3:05 Hu Tao
2013-12-27 3:05 ` [Qemu-devel] [RFC PATCH v4 1/4] qapi: introduce PreallocMode Hu Tao
` (5 more replies)
0 siblings, 6 replies; 15+ messages in thread
From: Hu Tao @ 2013-12-27 3:05 UTC (permalink / raw)
To: qemu-devel; +Cc: Kevin Wolf, Fam Zheng, Peter Lieven, Stefan Hajnoczi
This series implements full image preallocation to create a non-sparse image
file at creation time, both for raw and qcow2 format. The purpose is to avoid
performance deterioration of the guest cause by sparse image.
v4: - remove bdrv_preallocate and make preallocation a bdrv_create_file option
- prealloc_mode -> PreallocMode and add it to QAPI
- fix return value in raw_preallocate2
Hu Tao (4):
qapi: introduce PreallocMode
raw,qcow2: don't convert file size to sector size
raw-posix: Add full image preallocation option
qcow2: Add full image preallocation option
block/qcow2.c | 64 +++++++++++++++++++++++++++++++++++++++++++++----------
block/raw-posix.c | 50 +++++++++++++++++++++++++++++++++++++++++--
qapi-schema.json | 12 +++++++++++
3 files changed, 113 insertions(+), 13 deletions(-)
--
1.7.11.7
^ permalink raw reply [flat|nested] 15+ messages in thread
* [Qemu-devel] [RFC PATCH v4 1/4] qapi: introduce PreallocMode
2013-12-27 3:05 [Qemu-devel] [RFC PATCH v4 0/4] qemu-img: add preallocation=full Hu Tao
@ 2013-12-27 3:05 ` Hu Tao
2013-12-27 3:05 ` [Qemu-devel] [RFC PATCH v4 2/4] raw, qcow2: don't convert file size to sector size Hu Tao
` (4 subsequent siblings)
5 siblings, 0 replies; 15+ messages in thread
From: Hu Tao @ 2013-12-27 3:05 UTC (permalink / raw)
To: qemu-devel; +Cc: Kevin Wolf, Fam Zheng, Peter Lieven, Stefan Hajnoczi
This patch prepares for the subsequent patches.
Signed-off-by: Hu Tao <hutao@cn.fujitsu.com>
---
block/qcow2.c | 8 ++++----
qapi-schema.json | 12 ++++++++++++
2 files changed, 16 insertions(+), 4 deletions(-)
diff --git a/block/qcow2.c b/block/qcow2.c
index f29aa88..729dac6 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -1444,7 +1444,7 @@ static int preallocate(BlockDriverState *bs)
static int qcow2_create2(const char *filename, int64_t total_size,
const char *backing_file, const char *backing_format,
- int flags, size_t cluster_size, int prealloc,
+ int flags, size_t cluster_size, PreallocMode prealloc,
QEMUOptionParameter *options, int version,
Error **errp)
{
@@ -1614,7 +1614,7 @@ static int qcow2_create(const char *filename, QEMUOptionParameter *options,
uint64_t sectors = 0;
int flags = 0;
size_t cluster_size = DEFAULT_CLUSTER_SIZE;
- int prealloc = 0;
+ PreallocMode prealloc = PREALLOC_MODE_OFF;
int version = 3;
Error *local_err = NULL;
int ret;
@@ -1635,9 +1635,9 @@ static int qcow2_create(const char *filename, QEMUOptionParameter *options,
}
} else if (!strcmp(options->name, BLOCK_OPT_PREALLOC)) {
if (!options->value.s || !strcmp(options->value.s, "off")) {
- prealloc = 0;
+ prealloc = PREALLOC_MODE_OFF;
} else if (!strcmp(options->value.s, "metadata")) {
- prealloc = 1;
+ prealloc = PREALLOC_MODE_METADATA;
} else {
error_setg(errp, "Invalid preallocation mode: '%s'",
options->value.s);
diff --git a/qapi-schema.json b/qapi-schema.json
index c3c939c..64533f1 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -4235,3 +4235,15 @@
# Since: 1.7
##
{ 'command': 'blockdev-add', 'data': { 'options': 'BlockdevOptions' } }
+
+##
+# @PreallocMode
+#
+# Preallocation mode of QEMU image file
+#
+# @off: no preallocation
+# @metadata: preallocate only for metadata
+# @full: preallocate all data, including metadata
+##
+{ 'enum': 'PreallocMode',
+ 'data': [ 'off', 'metadata', 'full' ] }
--
1.7.11.7
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [Qemu-devel] [RFC PATCH v4 2/4] raw, qcow2: don't convert file size to sector size
2013-12-27 3:05 [Qemu-devel] [RFC PATCH v4 0/4] qemu-img: add preallocation=full Hu Tao
2013-12-27 3:05 ` [Qemu-devel] [RFC PATCH v4 1/4] qapi: introduce PreallocMode Hu Tao
@ 2013-12-27 3:05 ` Hu Tao
2013-12-27 3:05 ` [Qemu-devel] [RFC PATCH v4 3/4] raw-posix: Add full image preallocation option Hu Tao
` (3 subsequent siblings)
5 siblings, 0 replies; 15+ messages in thread
From: Hu Tao @ 2013-12-27 3:05 UTC (permalink / raw)
To: qemu-devel; +Cc: Kevin Wolf, Fam Zheng, Peter Lieven, Stefan Hajnoczi
and avoid convert it back later.
Signed-off-by: Hu Tao <hutao@cn.fujitsu.com>
---
block/qcow2.c | 8 ++++----
block/raw-posix.c | 4 ++--
2 files changed, 6 insertions(+), 6 deletions(-)
diff --git a/block/qcow2.c b/block/qcow2.c
index 729dac6..f0a8d87 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -1561,7 +1561,7 @@ static int qcow2_create2(const char *filename, int64_t total_size,
}
/* Okay, now that we have a valid image, let's give it the right size */
- ret = bdrv_truncate(bs, total_size * BDRV_SECTOR_SIZE);
+ ret = bdrv_truncate(bs, total_size);
if (ret < 0) {
error_setg_errno(errp, -ret, "Could not resize image");
goto out;
@@ -1611,7 +1611,7 @@ static int qcow2_create(const char *filename, QEMUOptionParameter *options,
{
const char *backing_file = NULL;
const char *backing_fmt = NULL;
- uint64_t sectors = 0;
+ uint64_t size = 0;
int flags = 0;
size_t cluster_size = DEFAULT_CLUSTER_SIZE;
PreallocMode prealloc = PREALLOC_MODE_OFF;
@@ -1622,7 +1622,7 @@ static int qcow2_create(const char *filename, QEMUOptionParameter *options,
/* Read out options */
while (options && options->name) {
if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
- sectors = options->value.n / 512;
+ size = options->value.n & BDRV_SECTOR_MASK;
} else if (!strcmp(options->name, BLOCK_OPT_BACKING_FILE)) {
backing_file = options->value.s;
} else if (!strcmp(options->name, BLOCK_OPT_BACKING_FMT)) {
@@ -1673,7 +1673,7 @@ static int qcow2_create(const char *filename, QEMUOptionParameter *options,
return -EINVAL;
}
- ret = qcow2_create2(filename, sectors, backing_file, backing_fmt, flags,
+ ret = qcow2_create2(filename, size, backing_file, backing_fmt, flags,
cluster_size, prealloc, options, version, &local_err);
if (error_is_set(&local_err)) {
error_propagate(errp, local_err);
diff --git a/block/raw-posix.c b/block/raw-posix.c
index 10c6b34..6f6b8c1 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -1170,7 +1170,7 @@ static int raw_create(const char *filename, QEMUOptionParameter *options,
/* Read out options */
while (options && options->name) {
if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
- total_size = options->value.n / BDRV_SECTOR_SIZE;
+ total_size = options->value.n & BDRV_SECTOR_MASK;
}
options++;
}
@@ -1181,7 +1181,7 @@ static int raw_create(const char *filename, QEMUOptionParameter *options,
result = -errno;
error_setg_errno(errp, -result, "Could not create file");
} else {
- if (ftruncate(fd, total_size * BDRV_SECTOR_SIZE) != 0) {
+ if (ftruncate(fd, total_size) != 0) {
result = -errno;
error_setg_errno(errp, -result, "Could not resize file");
}
--
1.7.11.7
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [Qemu-devel] [RFC PATCH v4 3/4] raw-posix: Add full image preallocation option
2013-12-27 3:05 [Qemu-devel] [RFC PATCH v4 0/4] qemu-img: add preallocation=full Hu Tao
2013-12-27 3:05 ` [Qemu-devel] [RFC PATCH v4 1/4] qapi: introduce PreallocMode Hu Tao
2013-12-27 3:05 ` [Qemu-devel] [RFC PATCH v4 2/4] raw, qcow2: don't convert file size to sector size Hu Tao
@ 2013-12-27 3:05 ` Hu Tao
2014-01-17 8:25 ` Stefan Hajnoczi
2014-01-17 8:56 ` Stefan Hajnoczi
2013-12-27 3:05 ` [Qemu-devel] [RFC PATCH v4 4/4] qcow2: " Hu Tao
` (2 subsequent siblings)
5 siblings, 2 replies; 15+ messages in thread
From: Hu Tao @ 2013-12-27 3:05 UTC (permalink / raw)
To: qemu-devel; +Cc: Kevin Wolf, Fam Zheng, Peter Lieven, Stefan Hajnoczi
This patch adds a new option preallocation for raw format, and implements
full preallocation.
Signed-off-by: Hu Tao <hutao@cn.fujitsu.com>
---
block/raw-posix.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 46 insertions(+)
diff --git a/block/raw-posix.c b/block/raw-posix.c
index 6f6b8c1..a722d27 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -1160,17 +1160,52 @@ static int64_t raw_get_allocated_file_size(BlockDriverState *bs)
return (int64_t)st.st_blocks * 512;
}
+#ifdef __linux__
+static int raw_preallocate2(int fd, int64_t offset, int64_t length)
+{
+ int ret = -1;
+
+ ret = fallocate(fd, 0, offset, length);
+ if (ret < 0) {
+ ret = -errno;
+ }
+
+ /* fallback to posix_fallocate() if fallocate() is not supported */
+ if (ret == -ENOSYS || ret == -EOPNOTSUPP) {
+ ret = posix_fallocate(fd, offset, length);
+ }
+
+ return ret;
+}
+#else
+static int raw_preallocate2(int fd, int64_t offset, int64_t length)
+{
+ return posix_fallocate(fd, offset, length);
+}
+#endif
+
static int raw_create(const char *filename, QEMUOptionParameter *options,
Error **errp)
{
int fd;
int result = 0;
int64_t total_size = 0;
+ PreallocMode prealloc = PREALLOC_MODE_OFF;
/* Read out options */
while (options && options->name) {
if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
total_size = options->value.n & BDRV_SECTOR_MASK;
+ } else if (!strcmp(options->name, BLOCK_OPT_PREALLOC)) {
+ if (!options->value.s || !strcmp(options->value.s, "off")) {
+ prealloc = PREALLOC_MODE_OFF;
+ } else if (!strcmp(options->value.s, "full")) {
+ prealloc = PREALLOC_MODE_FULL;
+ } else {
+ error_setg(errp, "Invalid preallocation mode: '%s'",
+ options->value.s);
+ return -EINVAL;
+ }
}
options++;
}
@@ -1185,6 +1220,12 @@ static int raw_create(const char *filename, QEMUOptionParameter *options,
result = -errno;
error_setg_errno(errp, -result, "Could not resize file");
}
+ if (prealloc == PREALLOC_MODE_FULL) {
+ result = raw_preallocate2(fd, 0, total_size);
+ if (result != 0)
+ error_setg_errno(errp, -result,
+ "Could not preallocate data for the new file");
+ }
if (qemu_close(fd) != 0) {
result = -errno;
error_setg_errno(errp, -result, "Could not close the new file");
@@ -1340,6 +1381,11 @@ static QEMUOptionParameter raw_create_options[] = {
.type = OPT_SIZE,
.help = "Virtual disk size"
},
+ {
+ .name = BLOCK_OPT_PREALLOC,
+ .type = OPT_STRING,
+ .help = "Preallocation mode (allowed values: off, full)"
+ },
{ NULL }
};
--
1.7.11.7
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [Qemu-devel] [RFC PATCH v4 4/4] qcow2: Add full image preallocation option
2013-12-27 3:05 [Qemu-devel] [RFC PATCH v4 0/4] qemu-img: add preallocation=full Hu Tao
` (2 preceding siblings ...)
2013-12-27 3:05 ` [Qemu-devel] [RFC PATCH v4 3/4] raw-posix: Add full image preallocation option Hu Tao
@ 2013-12-27 3:05 ` Hu Tao
2014-01-17 8:48 ` Stefan Hajnoczi
2014-01-06 7:27 ` [Qemu-devel] [RFC PATCH v4 0/4] qemu-img: add preallocation=full Hu Tao
2014-01-17 9:05 ` Stefan Hajnoczi
5 siblings, 1 reply; 15+ messages in thread
From: Hu Tao @ 2013-12-27 3:05 UTC (permalink / raw)
To: qemu-devel; +Cc: Kevin Wolf, Fam Zheng, Peter Lieven, Stefan Hajnoczi
This adds a preallocation=full mode to qcow2 image creation, which
creates a non-sparse image file.
Signed-off-by: Hu Tao <hutao@cn.fujitsu.com>
---
block/qcow2.c | 48 +++++++++++++++++++++++++++++++++++++++++++++---
1 file changed, 45 insertions(+), 3 deletions(-)
diff --git a/block/qcow2.c b/block/qcow2.c
index f0a8d87..f601089 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -1448,6 +1448,7 @@ static int qcow2_create2(const char *filename, int64_t total_size,
QEMUOptionParameter *options, int version,
Error **errp)
{
+ QEMUOptionParameter *alloc_options = NULL;
/* Calculate cluster_bits */
int cluster_bits;
cluster_bits = ffs(cluster_size) - 1;
@@ -1477,16 +1478,53 @@ static int qcow2_create2(const char *filename, int64_t total_size,
Error *local_err = NULL;
int ret;
+ if (prealloc == PREALLOC_MODE_FULL) {
+ int64_t meta_size = 0;
+ unsigned nrefe, nl2e;
+ BlockDriver *drv;
+
+ drv = bdrv_find_protocol(filename, true);
+ if (drv == NULL) {
+ error_setg(errp, "Could not find protocol for file '%s'", filename);
+ return -ENOENT;
+ }
+
+ alloc_options = append_option_parameters(alloc_options,
+ drv->create_options);
+ alloc_options = append_option_parameters(alloc_options, options);
+
+ /* header: 1 cluster */
+ meta_size += cluster_size;
+ /* total size of refblocks */
+ nrefe = (total_size / cluster_size);
+ nrefe = align_offset(nrefe, cluster_size / sizeof(uint16_t));
+ meta_size += nrefe * sizeof(uint16_t);
+ /* total size of reftables */
+ meta_size += nrefe * sizeof(uint16_t) * sizeof(uint16_t) / cluster_size;
+ /* total size of L2 tables */
+ nl2e = total_size / cluster_size;
+ nl2e = align_offset(nl2e, cluster_size / sizeof(uint64_t));
+ meta_size += nl2e * sizeof(uint64_t);
+ /* total size of L1 tables */
+ meta_size += nl2e * sizeof(uint64_t) * sizeof(uint64_t) / cluster_size;
+
+ set_option_parameter_int(alloc_options, BLOCK_OPT_SIZE,
+ total_size + meta_size);
+ set_option_parameter(alloc_options, BLOCK_OPT_PREALLOC, "full");
+
+ options = alloc_options;
+ }
+
ret = bdrv_create_file(filename, options, &local_err);
if (ret < 0) {
error_propagate(errp, local_err);
- return ret;
+ goto out_options;
}
ret = bdrv_file_open(&bs, filename, NULL, BDRV_O_RDWR, &local_err);
if (ret < 0) {
error_propagate(errp, local_err);
- return ret;
+ goto out_options;
}
/* Write the header */
@@ -1603,6 +1641,8 @@ static int qcow2_create2(const char *filename, int64_t total_size,
ret = 0;
out:
bdrv_unref(bs);
+out_options:
+ free_option_parameters(alloc_options);
return ret;
}
@@ -1638,6 +1678,8 @@ static int qcow2_create(const char *filename, QEMUOptionParameter *options,
prealloc = PREALLOC_MODE_OFF;
} else if (!strcmp(options->value.s, "metadata")) {
prealloc = PREALLOC_MODE_METADATA;
+ } else if (!strcmp(options->value.s, "full")) {
+ prealloc = PREALLOC_MODE_FULL;
} else {
error_setg(errp, "Invalid preallocation mode: '%s'",
options->value.s);
@@ -2223,7 +2265,7 @@ static QEMUOptionParameter qcow2_create_options[] = {
{
.name = BLOCK_OPT_PREALLOC,
.type = OPT_STRING,
- .help = "Preallocation mode (allowed values: off, metadata)"
+ .help = "Preallocation mode (allowed values: off, metadata, full)"
},
{
.name = BLOCK_OPT_LAZY_REFCOUNTS,
--
1.7.11.7
^ permalink raw reply related [flat|nested] 15+ messages in thread
* Re: [Qemu-devel] [RFC PATCH v4 0/4] qemu-img: add preallocation=full
2013-12-27 3:05 [Qemu-devel] [RFC PATCH v4 0/4] qemu-img: add preallocation=full Hu Tao
` (3 preceding siblings ...)
2013-12-27 3:05 ` [Qemu-devel] [RFC PATCH v4 4/4] qcow2: " Hu Tao
@ 2014-01-06 7:27 ` Hu Tao
2014-01-13 10:26 ` Hu Tao
2014-01-17 9:05 ` Stefan Hajnoczi
5 siblings, 1 reply; 15+ messages in thread
From: Hu Tao @ 2014-01-06 7:27 UTC (permalink / raw)
To: qemu-devel; +Cc: Kevin Wolf, Peter Lieven, Fam Zheng, stefanha
ping...
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [Qemu-devel] [RFC PATCH v4 0/4] qemu-img: add preallocation=full
2014-01-06 7:27 ` [Qemu-devel] [RFC PATCH v4 0/4] qemu-img: add preallocation=full Hu Tao
@ 2014-01-13 10:26 ` Hu Tao
0 siblings, 0 replies; 15+ messages in thread
From: Hu Tao @ 2014-01-13 10:26 UTC (permalink / raw)
To: qemu-devel; +Cc: Kevin Wolf, Fam Zheng, Peter Lieven, stefanha
ping...
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [Qemu-devel] [RFC PATCH v4 3/4] raw-posix: Add full image preallocation option
2013-12-27 3:05 ` [Qemu-devel] [RFC PATCH v4 3/4] raw-posix: Add full image preallocation option Hu Tao
@ 2014-01-17 8:25 ` Stefan Hajnoczi
2014-01-20 2:19 ` Hu Tao
2014-01-17 8:56 ` Stefan Hajnoczi
1 sibling, 1 reply; 15+ messages in thread
From: Stefan Hajnoczi @ 2014-01-17 8:25 UTC (permalink / raw)
To: Hu Tao; +Cc: Kevin Wolf, Fam Zheng, Peter Lieven, qemu-devel
On Fri, Dec 27, 2013 at 11:05:53AM +0800, Hu Tao wrote:
> This patch adds a new option preallocation for raw format, and implements
> full preallocation.
>
> Signed-off-by: Hu Tao <hutao@cn.fujitsu.com>
> ---
> block/raw-posix.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 46 insertions(+)
>
> diff --git a/block/raw-posix.c b/block/raw-posix.c
> index 6f6b8c1..a722d27 100644
> --- a/block/raw-posix.c
> +++ b/block/raw-posix.c
> @@ -1160,17 +1160,52 @@ static int64_t raw_get_allocated_file_size(BlockDriverState *bs)
> return (int64_t)st.st_blocks * 512;
> }
>
> +#ifdef __linux__
> +static int raw_preallocate2(int fd, int64_t offset, int64_t length)
Why is this function called raw_preallocate2()? Please choose a
descriptive name.
> +{
> + int ret = -1;
> +
> + ret = fallocate(fd, 0, offset, length);
> + if (ret < 0) {
> + ret = -errno;
> + }
> +
> + /* fallback to posix_fallocate() if fallocate() is not supported */
> + if (ret == -ENOSYS || ret == -EOPNOTSUPP) {
> + ret = posix_fallocate(fd, offset, length);
>From the posix_fallocate(3) man page:
"posix_fallocate() returns zero on success, or an error number on
failure. Note that errno is not set."
You need to negate the error number:
if (ret > 0) {
ret = -ret; /* posix_fallocate(3) returns a positive errno */
}
> + }
> +
> + return ret;
> +}
> +#else
> +static int raw_preallocate2(int fd, int64_t offset, int64_t length)
> +{
> + return posix_fallocate(fd, offset, length);
Same error number problem as above.
> @@ -1185,6 +1220,12 @@ static int raw_create(const char *filename, QEMUOptionParameter *options,
> result = -errno;
> error_setg_errno(errp, -result, "Could not resize file");
> }
> + if (prealloc == PREALLOC_MODE_FULL) {
> + result = raw_preallocate2(fd, 0, total_size);
What if ftruncate() failed? We should not try to fallocate. Please add
a proper error return path.
> + if (result != 0)
> + error_setg_errno(errp, -result,
> + "Could not preallocate data for the new file");
WARNING: braces {} are necessary for all arms of this statement
Please run scripts/checkpatch.pl before submitting patches, it checks
QEMU coding style.
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [Qemu-devel] [RFC PATCH v4 4/4] qcow2: Add full image preallocation option
2013-12-27 3:05 ` [Qemu-devel] [RFC PATCH v4 4/4] qcow2: " Hu Tao
@ 2014-01-17 8:48 ` Stefan Hajnoczi
2014-01-20 2:16 ` Hu Tao
0 siblings, 1 reply; 15+ messages in thread
From: Stefan Hajnoczi @ 2014-01-17 8:48 UTC (permalink / raw)
To: Hu Tao; +Cc: Kevin Wolf, Fam Zheng, Peter Lieven, qemu-devel
On Fri, Dec 27, 2013 at 11:05:54AM +0800, Hu Tao wrote:
This approach seems okay but the calculation isn't quite right yet.
On Windows an error would be raised since we don't have preallocate=full
support. That's okay.
> @@ -1477,16 +1478,53 @@ static int qcow2_create2(const char *filename, int64_t total_size,
> Error *local_err = NULL;
> int ret;
>
> + if (prealloc == PREALLOC_MODE_FULL) {
> + int64_t meta_size = 0;
> + unsigned nrefe, nl2e;
> + BlockDriver *drv;
> +
> + drv = bdrv_find_protocol(filename, true);
> + if (drv == NULL) {
> + error_setg(errp, "Could not find protocol for file '%s'", filename);
> + return -ENOENT;
> + }
> +
> + alloc_options = append_option_parameters(alloc_options,
> + drv->create_options);
> + alloc_options = append_option_parameters(alloc_options, options);
> +
> + /* header: 1 cluster */
> + meta_size += cluster_size;
> + /* total size of refblocks */
> + nrefe = (total_size / cluster_size);
> + nrefe = align_offset(nrefe, cluster_size / sizeof(uint16_t));
> + meta_size += nrefe * sizeof(uint16_t);
Every host cluster is reference-counted, including metadata (even
refcount blocks are recursively included). This calculation is wrong
because it only considers data clusters.
> + /* total size of reftables */
> + meta_size += nrefe * sizeof(uint16_t) * sizeof(uint16_t) / cluster_size;
I don't understand this calculation. The refcount table consists of
contiguous clusters where each element is a 64-bit offset to a refcount
block.
> + /* total size of L2 tables */
> + nl2e = total_size / cluster_size;
> + nl2e = align_offset(nl2e, cluster_size / sizeof(uint64_t));
> + meta_size += nl2e * sizeof(uint64_t);
> + /* total size of L1 tables */
> + meta_size += nl2e * sizeof(uint64_t) * sizeof(uint64_t) / cluster_size;
Another strange calculation. The L1 table consists of contiguous
clusters where each element is a 64-bit offset to an L1 table.
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [Qemu-devel] [RFC PATCH v4 3/4] raw-posix: Add full image preallocation option
2013-12-27 3:05 ` [Qemu-devel] [RFC PATCH v4 3/4] raw-posix: Add full image preallocation option Hu Tao
2014-01-17 8:25 ` Stefan Hajnoczi
@ 2014-01-17 8:56 ` Stefan Hajnoczi
2014-01-20 2:27 ` Hu Tao
1 sibling, 1 reply; 15+ messages in thread
From: Stefan Hajnoczi @ 2014-01-17 8:56 UTC (permalink / raw)
To: Hu Tao; +Cc: Kevin Wolf, Fam Zheng, Peter Lieven, qemu-devel
On Fri, Dec 27, 2013 at 11:05:53AM +0800, Hu Tao wrote:
> diff --git a/block/raw-posix.c b/block/raw-posix.c
> index 6f6b8c1..a722d27 100644
> --- a/block/raw-posix.c
> +++ b/block/raw-posix.c
> @@ -1160,17 +1160,52 @@ static int64_t raw_get_allocated_file_size(BlockDriverState *bs)
> return (int64_t)st.st_blocks * 512;
> }
>
> +#ifdef __linux__
> +static int raw_preallocate2(int fd, int64_t offset, int64_t length)
> +{
> + int ret = -1;
> +
> + ret = fallocate(fd, 0, offset, length);
> + if (ret < 0) {
> + ret = -errno;
> + }
> +
> + /* fallback to posix_fallocate() if fallocate() is not supported */
> + if (ret == -ENOSYS || ret == -EOPNOTSUPP) {
> + ret = posix_fallocate(fd, offset, length);
> + }
> +
> + return ret;
> +}
Why is this Linux-specific #ifdef necessary? glibc will try to use the
fallocate(2) system call, if possible.
Stefan
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [Qemu-devel] [RFC PATCH v4 0/4] qemu-img: add preallocation=full
2013-12-27 3:05 [Qemu-devel] [RFC PATCH v4 0/4] qemu-img: add preallocation=full Hu Tao
` (4 preceding siblings ...)
2014-01-06 7:27 ` [Qemu-devel] [RFC PATCH v4 0/4] qemu-img: add preallocation=full Hu Tao
@ 2014-01-17 9:05 ` Stefan Hajnoczi
5 siblings, 0 replies; 15+ messages in thread
From: Stefan Hajnoczi @ 2014-01-17 9:05 UTC (permalink / raw)
To: Hu Tao; +Cc: Kevin Wolf, Fam Zheng, Peter Lieven, qemu-devel
On Fri, Dec 27, 2013 at 11:05:50AM +0800, Hu Tao wrote:
> This series implements full image preallocation to create a non-sparse image
> file at creation time, both for raw and qcow2 format. The purpose is to avoid
> performance deterioration of the guest cause by sparse image.
>
> v4: - remove bdrv_preallocate and make preallocation a bdrv_create_file option
> - prealloc_mode -> PreallocMode and add it to QAPI
> - fix return value in raw_preallocate2
>
> Hu Tao (4):
> qapi: introduce PreallocMode
> raw,qcow2: don't convert file size to sector size
> raw-posix: Add full image preallocation option
> qcow2: Add full image preallocation option
>
> block/qcow2.c | 64 +++++++++++++++++++++++++++++++++++++++++++++----------
> block/raw-posix.c | 50 +++++++++++++++++++++++++++++++++++++++++--
> qapi-schema.json | 12 +++++++++++
> 3 files changed, 113 insertions(+), 13 deletions(-)
I've thought more about the lack of progress information during
posix_fallocate(3). Users creating 10s or 100s of GB images may have to
wait for a long time if posix_fallocate(3) is implemented in userspace
(effectively dd if=/dev/zero of=vm.img).
But I think adding preallocate=full is a good starting point. Modern
file systems implement it efficiently so the wait time should be
negligible. If creating images takes too long we could consider a
fancier interface later.
I left comments on specific patches.
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [Qemu-devel] [RFC PATCH v4 4/4] qcow2: Add full image preallocation option
2014-01-17 8:48 ` Stefan Hajnoczi
@ 2014-01-20 2:16 ` Hu Tao
2014-02-07 2:22 ` Hu Tao
0 siblings, 1 reply; 15+ messages in thread
From: Hu Tao @ 2014-01-20 2:16 UTC (permalink / raw)
To: Stefan Hajnoczi; +Cc: Kevin Wolf, Fam Zheng, Peter Lieven, qemu-devel
Stefan,
On Fri, Jan 17, 2014 at 04:48:16PM +0800, Stefan Hajnoczi wrote:
> On Fri, Dec 27, 2013 at 11:05:54AM +0800, Hu Tao wrote:
>
> This approach seems okay but the calculation isn't quite right yet.
>
> On Windows an error would be raised since we don't have preallocate=full
> support. That's okay.
>
> > @@ -1477,16 +1478,53 @@ static int qcow2_create2(const char *filename, int64_t total_size,
> > Error *local_err = NULL;
> > int ret;
> >
> > + if (prealloc == PREALLOC_MODE_FULL) {
> > + int64_t meta_size = 0;
> > + unsigned nrefe, nl2e;
> > + BlockDriver *drv;
> > +
> > + drv = bdrv_find_protocol(filename, true);
> > + if (drv == NULL) {
> > + error_setg(errp, "Could not find protocol for file '%s'", filename);
> > + return -ENOENT;
> > + }
> > +
> > + alloc_options = append_option_parameters(alloc_options,
> > + drv->create_options);
> > + alloc_options = append_option_parameters(alloc_options, options);
> > +
> > + /* header: 1 cluster */
> > + meta_size += cluster_size;
> > + /* total size of refblocks */
> > + nrefe = (total_size / cluster_size);
> > + nrefe = align_offset(nrefe, cluster_size / sizeof(uint16_t));
> > + meta_size += nrefe * sizeof(uint16_t);
>
> Every host cluster is reference-counted, including metadata (even
> refcount blocks are recursively included). This calculation is wrong
> because it only considers data clusters.
Right. I'll fix this.
>
> > + /* total size of reftables */
> > + meta_size += nrefe * sizeof(uint16_t) * sizeof(uint16_t) / cluster_size;
>
> I don't understand this calculation. The refcount table consists of
> contiguous clusters where each element is a 64-bit offset to a refcount
> block.
>
> > + /* total size of L2 tables */
> > + nl2e = total_size / cluster_size;
> > + nl2e = align_offset(nl2e, cluster_size / sizeof(uint64_t));
> > + meta_size += nl2e * sizeof(uint64_t);
> > + /* total size of L1 tables */
> > + meta_size += nl2e * sizeof(uint64_t) * sizeof(uint64_t) / cluster_size;
>
> Another strange calculation. The L1 table consists of contiguous
> clusters where each element is a 64-bit offset to an L1 table.
I guest you mean "...offset to an L2 table" here.
Given the number of total L2 table entries nl2e, the number of L2 tables
is:
nl2table = nl2e * sizeof(l2e) / cluster_size
because each L1 table entry points to a L2 table, so this is also the
number of L1 table entries. So the total size of L1 tables is:
l1table_size = nl2table * sizeof(l1e)
= nl2e * sizeof(l2e) * sizeof(l1e) / cluster_size
= nl2e * sizeof(uint64_t) * sizeof(uint64_t) / cluster_size
I still can't see what's wrong here. But you're welcome to fix me.
The caculation of reftable size above is wrong because of the two problem
you've pointed out. Thanks!
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [Qemu-devel] [RFC PATCH v4 3/4] raw-posix: Add full image preallocation option
2014-01-17 8:25 ` Stefan Hajnoczi
@ 2014-01-20 2:19 ` Hu Tao
0 siblings, 0 replies; 15+ messages in thread
From: Hu Tao @ 2014-01-20 2:19 UTC (permalink / raw)
To: Stefan Hajnoczi; +Cc: Kevin Wolf, Fam Zheng, Peter Lieven, qemu-devel
On Fri, Jan 17, 2014 at 04:25:14PM +0800, Stefan Hajnoczi wrote:
> On Fri, Dec 27, 2013 at 11:05:53AM +0800, Hu Tao wrote:
> > This patch adds a new option preallocation for raw format, and implements
> > full preallocation.
> >
> > Signed-off-by: Hu Tao <hutao@cn.fujitsu.com>
> > ---
> > block/raw-posix.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++
> > 1 file changed, 46 insertions(+)
> >
> > diff --git a/block/raw-posix.c b/block/raw-posix.c
> > index 6f6b8c1..a722d27 100644
> > --- a/block/raw-posix.c
> > +++ b/block/raw-posix.c
> > @@ -1160,17 +1160,52 @@ static int64_t raw_get_allocated_file_size(BlockDriverState *bs)
> > return (int64_t)st.st_blocks * 512;
> > }
> >
> > +#ifdef __linux__
> > +static int raw_preallocate2(int fd, int64_t offset, int64_t length)
>
> Why is this function called raw_preallocate2()? Please choose a
> descriptive name.
Sure.
>
> > +{
> > + int ret = -1;
> > +
> > + ret = fallocate(fd, 0, offset, length);
> > + if (ret < 0) {
> > + ret = -errno;
> > + }
> > +
> > + /* fallback to posix_fallocate() if fallocate() is not supported */
> > + if (ret == -ENOSYS || ret == -EOPNOTSUPP) {
> > + ret = posix_fallocate(fd, offset, length);
>
> From the posix_fallocate(3) man page:
> "posix_fallocate() returns zero on success, or an error number on
> failure. Note that errno is not set."
>
> You need to negate the error number:
> if (ret > 0) {
> ret = -ret; /* posix_fallocate(3) returns a positive errno */
> }
Sure.
>
> > + }
> > +
> > + return ret;
> > +}
> > +#else
> > +static int raw_preallocate2(int fd, int64_t offset, int64_t length)
> > +{
> > + return posix_fallocate(fd, offset, length);
>
> Same error number problem as above.
>
> > @@ -1185,6 +1220,12 @@ static int raw_create(const char *filename, QEMUOptionParameter *options,
> > result = -errno;
> > error_setg_errno(errp, -result, "Could not resize file");
> > }
> > + if (prealloc == PREALLOC_MODE_FULL) {
> > + result = raw_preallocate2(fd, 0, total_size);
>
> What if ftruncate() failed? We should not try to fallocate. Please add
> a proper error return path.
Sure.
>
> > + if (result != 0)
> > + error_setg_errno(errp, -result,
> > + "Could not preallocate data for the new file");
>
> WARNING: braces {} are necessary for all arms of this statement
>
> Please run scripts/checkpatch.pl before submitting patches, it checks
> QEMU coding style.
Thanks!
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [Qemu-devel] [RFC PATCH v4 3/4] raw-posix: Add full image preallocation option
2014-01-17 8:56 ` Stefan Hajnoczi
@ 2014-01-20 2:27 ` Hu Tao
0 siblings, 0 replies; 15+ messages in thread
From: Hu Tao @ 2014-01-20 2:27 UTC (permalink / raw)
To: Stefan Hajnoczi; +Cc: Kevin Wolf, Fam Zheng, Peter Lieven, qemu-devel
On Fri, Jan 17, 2014 at 04:56:42PM +0800, Stefan Hajnoczi wrote:
> On Fri, Dec 27, 2013 at 11:05:53AM +0800, Hu Tao wrote:
> > diff --git a/block/raw-posix.c b/block/raw-posix.c
> > index 6f6b8c1..a722d27 100644
> > --- a/block/raw-posix.c
> > +++ b/block/raw-posix.c
> > @@ -1160,17 +1160,52 @@ static int64_t raw_get_allocated_file_size(BlockDriverState *bs)
> > return (int64_t)st.st_blocks * 512;
> > }
> >
> > +#ifdef __linux__
> > +static int raw_preallocate2(int fd, int64_t offset, int64_t length)
> > +{
> > + int ret = -1;
> > +
> > + ret = fallocate(fd, 0, offset, length);
> > + if (ret < 0) {
> > + ret = -errno;
> > + }
> > +
> > + /* fallback to posix_fallocate() if fallocate() is not supported */
> > + if (ret == -ENOSYS || ret == -EOPNOTSUPP) {
> > + ret = posix_fallocate(fd, offset, length);
> > + }
> > +
> > + return ret;
> > +}
>
> Why is this Linux-specific #ifdef necessary? glibc will try to use the
> fallocate(2) system call, if possible.
Fine. I'll drop this.
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [Qemu-devel] [RFC PATCH v4 4/4] qcow2: Add full image preallocation option
2014-01-20 2:16 ` Hu Tao
@ 2014-02-07 2:22 ` Hu Tao
0 siblings, 0 replies; 15+ messages in thread
From: Hu Tao @ 2014-02-07 2:22 UTC (permalink / raw)
To: Stefan Hajnoczi; +Cc: Kevin Wolf, Peter Lieven, Fam Zheng, qemu-devel
On Mon, Jan 20, 2014 at 10:16:23AM +0800, Hu Tao wrote:
> Stefan,
>
> On Fri, Jan 17, 2014 at 04:48:16PM +0800, Stefan Hajnoczi wrote:
> > On Fri, Dec 27, 2013 at 11:05:54AM +0800, Hu Tao wrote:
> >
> > This approach seems okay but the calculation isn't quite right yet.
> >
> > On Windows an error would be raised since we don't have preallocate=full
> > support. That's okay.
> >
> > > @@ -1477,16 +1478,53 @@ static int qcow2_create2(const char *filename, int64_t total_size,
> > > Error *local_err = NULL;
> > > int ret;
> > >
> > > + if (prealloc == PREALLOC_MODE_FULL) {
> > > + int64_t meta_size = 0;
> > > + unsigned nrefe, nl2e;
> > > + BlockDriver *drv;
> > > +
> > > + drv = bdrv_find_protocol(filename, true);
> > > + if (drv == NULL) {
> > > + error_setg(errp, "Could not find protocol for file '%s'", filename);
> > > + return -ENOENT;
> > > + }
> > > +
> > > + alloc_options = append_option_parameters(alloc_options,
> > > + drv->create_options);
> > > + alloc_options = append_option_parameters(alloc_options, options);
> > > +
> > > + /* header: 1 cluster */
> > > + meta_size += cluster_size;
> > > + /* total size of refblocks */
> > > + nrefe = (total_size / cluster_size);
> > > + nrefe = align_offset(nrefe, cluster_size / sizeof(uint16_t));
> > > + meta_size += nrefe * sizeof(uint16_t);
> >
> > Every host cluster is reference-counted, including metadata (even
> > refcount blocks are recursively included). This calculation is wrong
> > because it only considers data clusters.
>
> Right. I'll fix this.
>
> >
> > > + /* total size of reftables */
> > > + meta_size += nrefe * sizeof(uint16_t) * sizeof(uint16_t) / cluster_size;
> >
> > I don't understand this calculation. The refcount table consists of
> > contiguous clusters where each element is a 64-bit offset to a refcount
> > block.
> >
> > > + /* total size of L2 tables */
> > > + nl2e = total_size / cluster_size;
> > > + nl2e = align_offset(nl2e, cluster_size / sizeof(uint64_t));
> > > + meta_size += nl2e * sizeof(uint64_t);
> > > + /* total size of L1 tables */
> > > + meta_size += nl2e * sizeof(uint64_t) * sizeof(uint64_t) / cluster_size;
> >
> > Another strange calculation. The L1 table consists of contiguous
> > clusters where each element is a 64-bit offset to an L1 table.
>
> I guest you mean "...offset to an L2 table" here.
>
> Given the number of total L2 table entries nl2e, the number of L2 tables
> is:
>
> nl2table = nl2e * sizeof(l2e) / cluster_size
>
> because each L1 table entry points to a L2 table, so this is also the
> number of L1 table entries. So the total size of L1 tables is:
>
> l1table_size = nl2table * sizeof(l1e)
> = nl2e * sizeof(l2e) * sizeof(l1e) / cluster_size
> = nl2e * sizeof(uint64_t) * sizeof(uint64_t) / cluster_size
>
> I still can't see what's wrong here. But you're welcome to fix me.
Hi,
Any comments?
--
Regards,
Hu Tao
^ permalink raw reply [flat|nested] 15+ messages in thread
end of thread, other threads:[~2014-02-07 2:25 UTC | newest]
Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-12-27 3:05 [Qemu-devel] [RFC PATCH v4 0/4] qemu-img: add preallocation=full Hu Tao
2013-12-27 3:05 ` [Qemu-devel] [RFC PATCH v4 1/4] qapi: introduce PreallocMode Hu Tao
2013-12-27 3:05 ` [Qemu-devel] [RFC PATCH v4 2/4] raw, qcow2: don't convert file size to sector size Hu Tao
2013-12-27 3:05 ` [Qemu-devel] [RFC PATCH v4 3/4] raw-posix: Add full image preallocation option Hu Tao
2014-01-17 8:25 ` Stefan Hajnoczi
2014-01-20 2:19 ` Hu Tao
2014-01-17 8:56 ` Stefan Hajnoczi
2014-01-20 2:27 ` Hu Tao
2013-12-27 3:05 ` [Qemu-devel] [RFC PATCH v4 4/4] qcow2: " Hu Tao
2014-01-17 8:48 ` Stefan Hajnoczi
2014-01-20 2:16 ` Hu Tao
2014-02-07 2:22 ` Hu Tao
2014-01-06 7:27 ` [Qemu-devel] [RFC PATCH v4 0/4] qemu-img: add preallocation=full Hu Tao
2014-01-13 10:26 ` Hu Tao
2014-01-17 9:05 ` Stefan Hajnoczi
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.