All of lore.kernel.org
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCHv3] block: optimize zero writes with bdrv_write_zeroes
@ 2014-05-07  0:23 Peter Lieven
  2014-05-07  2:52 ` Eric Blake
  0 siblings, 1 reply; 12+ messages in thread
From: Peter Lieven @ 2014-05-07  0:23 UTC (permalink / raw)
  To: qemu-devel; +Cc: kwolf, famz, Peter Lieven, mreitz, stefanha, pbonzini

this patch tries to optimize zero write requests
by automatically using bdrv_write_zeroes if it is
supported by the format.

This significantly speeds up file system initialization and
should speed zero write test used to test backend storage
performance.

I ran the following 2 tests on my internal SSD with a
50G QCOW2 container and on an attached iSCSI storage.

a) mkfs.ext4 -E lazy_itable_init=0,lazy_journal_init=0 /dev/vdX

QCOW2         [off]     [on]     [unmap]
-----
runtime:       14secs    1.1secs  1.1secs
filesize:      937M      18M      18M

iSCSI         [off]     [on]     [unmap]
----
runtime:       9.3s      0.9s     0.9s

b) dd if=/dev/zero of=/dev/vdX bs=1M oflag=direct

QCOW2         [off]     [on]     [unmap]
-----
runtime:       246secs   18secs   18secs
filesize:      51G       192K     192K
throughput:    203M/s    2.3G/s   2.3G/s

iSCSI*        [off]     [on]     [unmap]
----
runtime:       8mins     45secs   33secs
throughput:    106M/s    1.2G/s   1.6G/s
allocated:     100%      100%     0%

* The storage was connected via an 1Gbit interface.
  It seems to internally handle writing zeroes
  via WRITESAME16 very fast.

Signed-off-by: Peter Lieven <pl@kamp.de>
---
v2->v3: - moved parameter parsing to blockdev_init
        - added per device detect_zeroes status to 
          hmp (info block -v) and qmp (query-block) [Eric]
        - added support to enable detect-zeroes also
          for hot added devices [Eric]
        - added missing entry to qemu_common_drive_opts
        - fixed description of qemu_iovec_is_zero [Fam]

v1->v2: - added tests to commit message (Markus)
RFCv2->v1: - fixed paramter parsing strncmp -> strcmp (Eric)
           - fixed typo (choosen->chosen) (Eric)
           - added second example to commit msg

RFCv1->RFCv2: - add detect-zeroes=off|on|unmap knob to drive cmdline parameter
              - call zero detection only for format (bs->file != NULL)

 block.c                   |   11 ++++++++++
 block/qapi.c              |   11 ++++++++++
 blockdev.c                |   28 +++++++++++++++++++++++++
 hmp.c                     |    6 ++++++
 include/block/block_int.h |   12 +++++++++++
 include/qemu-common.h     |    1 +
 qapi-schema.json          |   50 +++++++++++++++++++++++++++++++--------------
 qemu-options.hx           |    6 ++++++
 qmp-commands.hx           |    3 +++
 util/iov.c                |   21 +++++++++++++++++++
 10 files changed, 134 insertions(+), 15 deletions(-)

diff --git a/block.c b/block.c
index b749d31..f27b35d 100644
--- a/block.c
+++ b/block.c
@@ -3244,6 +3244,17 @@ static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
 
     ret = notifier_with_return_list_notify(&bs->before_write_notifiers, req);
 
+    if (!ret && bs->detect_zeroes != BDRV_DETECT_ZEROES_OFF &&
+        !(flags & BDRV_REQ_ZERO_WRITE) && bs->file &&
+        drv->bdrv_co_write_zeroes && qemu_iovec_is_zero(qiov)) {
+        flags |= BDRV_REQ_ZERO_WRITE;
+        /* if the device was not opened with discard=on the below flag
+         * is immediately cleared again in bdrv_co_do_write_zeroes */
+        if (bs->detect_zeroes == BDRV_DETECT_ZEROES_UNMAP) {
+            flags |= BDRV_REQ_MAY_UNMAP;
+        }
+    }
+
     if (ret < 0) {
         /* Do nothing, write notifier decided to fail this request */
     } else if (flags & BDRV_REQ_ZERO_WRITE) {
diff --git a/block/qapi.c b/block/qapi.c
index af11445..fbf66c2 100644
--- a/block/qapi.c
+++ b/block/qapi.c
@@ -51,6 +51,17 @@ BlockDeviceInfo *bdrv_block_device_info(BlockDriverState *bs)
 
     info->backing_file_depth = bdrv_get_backing_file_depth(bs);
 
+    switch (bs->detect_zeroes) {
+    case BDRV_DETECT_ZEROES_ON:
+        info->detect_zeroes = g_strdup("on");
+        break;
+    case BDRV_DETECT_ZEROES_UNMAP:
+        info->detect_zeroes = g_strdup("unmap");
+        break;
+    default:
+        info->detect_zeroes = g_strdup("off");
+    }
+
     if (bs->io_limits_enabled) {
         ThrottleConfig cfg;
         throttle_get_config(&bs->throttle_state, &cfg);
diff --git a/blockdev.c b/blockdev.c
index 7810e9f..96c11fd 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -288,6 +288,21 @@ static int parse_block_error_action(const char *buf, bool is_read, Error **errp)
     }
 }
 
+static BdrvDetectZeroes parse_detect_zeroes(const char *buf, Error **errp)
+{
+    if (!buf || !strcmp(buf, "off")) {
+        return BDRV_DETECT_ZEROES_OFF;
+    } else if (!strcmp(buf, "on")) {
+        return BDRV_DETECT_ZEROES_ON;
+    } else if (!strcmp(buf, "unmap")) {
+        return BDRV_DETECT_ZEROES_UNMAP;
+    } else {
+        error_setg(errp, "invalid value for detect-zeroes: %s",
+                   buf);
+    }
+    return BDRV_DETECT_ZEROES_OFF;
+}
+
 static bool check_throttle_config(ThrottleConfig *cfg, Error **errp)
 {
     if (throttle_conflicting(cfg)) {
@@ -324,6 +339,7 @@ static DriveInfo *blockdev_init(const char *file, QDict *bs_opts,
     QemuOpts *opts;
     const char *id;
     bool has_driver_specific_opts;
+    BdrvDetectZeroes detect_zeroes;
     BlockDriver *drv = NULL;
 
     /* Check common options by copying from bs_opts to opts, all other options
@@ -452,6 +468,13 @@ static DriveInfo *blockdev_init(const char *file, QDict *bs_opts,
         }
     }
 
+    detect_zeroes =
+        parse_detect_zeroes(qemu_opt_get(opts, "detect-zeroes"), &error);
+    if (error) {
+        error_propagate(errp, error);
+        goto early_err;
+    }
+
     /* init */
     dinfo = g_malloc0(sizeof(*dinfo));
     dinfo->id = g_strdup(qemu_opts_id(opts));
@@ -462,6 +485,7 @@ static DriveInfo *blockdev_init(const char *file, QDict *bs_opts,
     }
     dinfo->bdrv->open_flags = snapshot ? BDRV_O_SNAPSHOT : 0;
     dinfo->bdrv->read_only = ro;
+    dinfo->bdrv->detect_zeroes = detect_zeroes;
     dinfo->refcount = 1;
     if (serial != NULL) {
         dinfo->serial = g_strdup(serial);
@@ -2455,6 +2479,10 @@ QemuOptsList qemu_common_drive_opts = {
             .name = "copy-on-read",
             .type = QEMU_OPT_BOOL,
             .help = "copy read data from backing file into image file",
+        },{
+            .name = "detect-zeroes",
+            .type = QEMU_OPT_STRING,
+            .help = "try to optimize zero writes",
         },
         { /* end of list */ }
     },
diff --git a/hmp.c b/hmp.c
index ca869ba..b1942ed 100644
--- a/hmp.c
+++ b/hmp.c
@@ -336,6 +336,12 @@ void hmp_info_block(Monitor *mon, const QDict *qdict)
                            info->value->inserted->backing_file_depth);
         }
 
+        if (verbose) {
+            monitor_printf(mon,
+                           "    Detect zeroes:    %s\n",
+                           info->value->inserted->detect_zeroes);
+        }
+
         if (info->value->inserted->bps
             || info->value->inserted->bps_rd
             || info->value->inserted->bps_wr
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 9ffcb69..7b9ca05 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -271,6 +271,17 @@ typedef struct BlockLimits {
 } BlockLimits;
 
 /*
+ * Different operation modes for automatic zero detection
+ * to speed the write operation up with bdrv_write_zeroes.
+ */
+typedef enum {
+    BDRV_DETECT_ZEROES_OFF   = 0x0,
+    BDRV_DETECT_ZEROES_ON    = 0x1,
+    /* also set the BDRV_MAY_UNMAP flag with bdrv_write_zeroes */
+    BDRV_DETECT_ZEROES_UNMAP = 0x2,
+} BdrvDetectZeroes;
+
+/*
  * Note: the function bdrv_append() copies and swaps contents of
  * BlockDriverStates, so if you add new fields to this struct, please
  * inspect bdrv_append() to determine if the new fields need to be
@@ -364,6 +375,7 @@ struct BlockDriverState {
     BlockJob *job;
 
     QDict *options;
+    BdrvDetectZeroes detect_zeroes;
 };
 
 int get_tmp_filename(char *filename, int size);
diff --git a/include/qemu-common.h b/include/qemu-common.h
index a998e8d..8e3d6eb 100644
--- a/include/qemu-common.h
+++ b/include/qemu-common.h
@@ -330,6 +330,7 @@ void qemu_iovec_concat(QEMUIOVector *dst,
 void qemu_iovec_concat_iov(QEMUIOVector *dst,
                            struct iovec *src_iov, unsigned int src_cnt,
                            size_t soffset, size_t sbytes);
+bool qemu_iovec_is_zero(QEMUIOVector *qiov);
 void qemu_iovec_destroy(QEMUIOVector *qiov);
 void qemu_iovec_reset(QEMUIOVector *qiov);
 size_t qemu_iovec_to_buf(QEMUIOVector *qiov, size_t offset,
diff --git a/qapi-schema.json b/qapi-schema.json
index 0b00427..5e3b4a89 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -937,6 +937,8 @@
 # @encryption_key_missing: true if the backing device is encrypted but an
 #                          valid encryption key is missing
 #
+# @detect_zeroes: detect and optimize zero writes (Since 2.1)
+#
 # @bps: total throughput limit in bytes per second is specified
 #
 # @bps_rd: read throughput limit in bytes per second is specified
@@ -972,6 +974,7 @@
   'data': { 'file': 'str', '*node-name': 'str', 'ro': 'bool', 'drv': 'str',
             '*backing_file': 'str', 'backing_file_depth': 'int',
             'encrypted': 'bool', 'encryption_key_missing': 'bool',
+            'detect_zeroes': 'str',
             'bps': 'int', 'bps_rd': 'int', 'bps_wr': 'int',
             'iops': 'int', 'iops_rd': 'int', 'iops_wr': 'int',
             'image': 'ImageInfo',
@@ -4250,6 +4253,20 @@
   'data': [ 'ignore', 'unmap' ] }
 
 ##
+# @BlockdevDetectZeroesOptions
+#
+# Selects the operation mode for zero write detection.
+#
+# @off:      Disabled
+# @on:       Enabled
+# @unmap:    Enabled and even try to unmap blocks if possible
+#
+# Since: 2.1
+##
+{ 'enum': 'BlockdevDetectZeroesOptions',
+  'data': [ 'off', 'on', 'unmap' ] }
+
+##
 # @BlockdevAioOptions
 #
 # Selects the AIO backend to handle I/O requests
@@ -4301,20 +4318,22 @@
 # Options that are available for all block devices, independent of the block
 # driver.
 #
-# @driver:      block driver name
-# @id:          #optional id by which the new block device can be referred to.
-#               This is a required option on the top level of blockdev-add, and
-#               currently not allowed on any other level.
-# @node-name:   #optional the name of a block driver state node (Since 2.0)
-# @discard:     #optional discard-related options (default: ignore)
-# @cache:       #optional cache-related options
-# @aio:         #optional AIO backend (default: threads)
-# @rerror:      #optional how to handle read errors on the device
-#               (default: report)
-# @werror:      #optional how to handle write errors on the device
-#               (default: enospc)
-# @read-only:   #optional whether the block device should be read-only
-#               (default: false)
+# @driver:        block driver name
+# @id:            #optional id by which the new block device can be referred to.
+#                 This is a required option on the top level of blockdev-add, and
+#                 currently not allowed on any other level.
+# @node-name:     #optional the name of a block driver state node (Since 2.0)
+# @discard:       #optional discard-related options (default: ignore)
+# @cache:         #optional cache-related options
+# @aio:           #optional AIO backend (default: threads)
+# @rerror:        #optional how to handle read errors on the device
+#                 (default: report)
+# @werror:        #optional how to handle write errors on the device
+#                 (default: enospc)
+# @read-only:     #optional whether the block device should be read-only
+#                 (default: false)
+# @detect-zeroes: #optional detect and optimize zero writes (Since 2.1)
+#                 (default: off)
 #
 # Since: 1.7
 ##
@@ -4327,7 +4346,8 @@
             '*aio': 'BlockdevAioOptions',
             '*rerror': 'BlockdevOnError',
             '*werror': 'BlockdevOnError',
-            '*read-only': 'bool' } }
+            '*read-only': 'bool',
+            '*detect-zeroes': 'BlockdevDetectZeroesOptions' } }
 
 ##
 # @BlockdevOptionsFile
diff --git a/qemu-options.hx b/qemu-options.hx
index 781af14..5ee94ea 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -414,6 +414,7 @@ DEF("drive", HAS_ARG, QEMU_OPTION_drive,
     "       [,serial=s][,addr=A][,rerror=ignore|stop|report]\n"
     "       [,werror=ignore|stop|report|enospc][,id=name][,aio=threads|native]\n"
     "       [,readonly=on|off][,copy-on-read=on|off]\n"
+    "       [,detect-zeroes=on|off|unmap]\n"
     "       [[,bps=b]|[[,bps_rd=r][,bps_wr=w]]]\n"
     "       [[,iops=i]|[[,iops_rd=r][,iops_wr=w]]]\n"
     "       [[,bps_max=bm]|[[,bps_rd_max=rm][,bps_wr_max=wm]]]\n"
@@ -475,6 +476,11 @@ Open drive @option{file} as read-only. Guest write attempts will fail.
 @item copy-on-read=@var{copy-on-read}
 @var{copy-on-read} is "on" or "off" and enables whether to copy read backing
 file sectors into the image file.
+@item detect-zeroes=@var{detect-zeroes}
+@var{detect-zeroes} is "off", "on" or "unmap" and enables the automatic
+conversion of plain zero writes by the OS to driver specific optimized
+zero write commands. If "unmap" is chosen and @var{discard} is "on"
+a zero write may even be converted to an UNMAP operation.
 @end table
 
 By default, the @option{cache=writeback} mode is used. It will report data
diff --git a/qmp-commands.hx b/qmp-commands.hx
index ed3ab92..a535955 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -2032,6 +2032,8 @@ Each json-object contain the following:
          - "iops_rd_max":  read I/O operations max (json-int)
          - "iops_wr_max":  write I/O operations max (json-int)
          - "iops_size": I/O size when limiting by iops (json-int)
+         - "detect_zeroes": detect and optimize zero writes (json-string)
+             - Possible values: "off", "on", "unmap"
          - "image": the detail of the image, it is a json-object containing
             the following:
              - "filename": image file name (json-string)
@@ -2108,6 +2110,7 @@ Example:
                "iops_rd_max": 0,
                "iops_wr_max": 0,
                "iops_size": 0,
+               "detect_zeroes": "on",
                "image":{
                   "filename":"disks/test.qcow2",
                   "format":"qcow2",
diff --git a/util/iov.c b/util/iov.c
index 6569b5a..f8c49a1 100644
--- a/util/iov.c
+++ b/util/iov.c
@@ -335,6 +335,27 @@ void qemu_iovec_concat(QEMUIOVector *dst,
     qemu_iovec_concat_iov(dst, src->iov, src->niov, soffset, sbytes);
 }
 
+/*
+ *  check if the contents of the iovecs are all zero
+ */
+bool qemu_iovec_is_zero(QEMUIOVector *qiov)
+{
+    int i;
+    for (i = 0; i < qiov->niov; i++) {
+        size_t offs = qiov->iov[i].iov_len & ~(4 * sizeof(long) - 1);
+        uint8_t *ptr = qiov->iov[i].iov_base;
+        if (offs && !buffer_is_zero(qiov->iov[i].iov_base, offs)) {
+            return false;
+        }
+        for (; offs < qiov->iov[i].iov_len; offs++) {
+            if (ptr[offs]) {
+                return false;
+            }
+        }
+    }
+    return true;
+}
+
 void qemu_iovec_destroy(QEMUIOVector *qiov)
 {
     assert(qiov->nalloc != -1);
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [Qemu-devel] [PATCHv3] block: optimize zero writes with bdrv_write_zeroes
  2014-05-07  0:23 [Qemu-devel] [PATCHv3] block: optimize zero writes with bdrv_write_zeroes Peter Lieven
@ 2014-05-07  2:52 ` Eric Blake
  2014-05-07 14:26   ` Peter Lieven
  0 siblings, 1 reply; 12+ messages in thread
From: Eric Blake @ 2014-05-07  2:52 UTC (permalink / raw)
  To: Peter Lieven, qemu-devel; +Cc: kwolf, pbonzini, famz, stefanha, mreitz

[-- Attachment #1: Type: text/plain, Size: 5240 bytes --]

On 05/06/2014 06:23 PM, Peter Lieven wrote:
> this patch tries to optimize zero write requests
> by automatically using bdrv_write_zeroes if it is
> supported by the format.
> 
> This significantly speeds up file system initialization and
> should speed zero write test used to test backend storage
> performance.
> 

> 
> Signed-off-by: Peter Lieven <pl@kamp.de>
> ---
> v2->v3: - moved parameter parsing to blockdev_init
>         - added per device detect_zeroes status to 
>           hmp (info block -v) and qmp (query-block) [Eric]
>         - added support to enable detect-zeroes also
>           for hot added devices [Eric]
>         - added missing entry to qemu_common_drive_opts
>         - fixed description of qemu_iovec_is_zero [Fam]
> 

>  
> +static BdrvDetectZeroes parse_detect_zeroes(const char *buf, Error **errp)
> +{
> +    if (!buf || !strcmp(buf, "off")) {
> +        return BDRV_DETECT_ZEROES_OFF;
> +    } else if (!strcmp(buf, "on")) {
> +        return BDRV_DETECT_ZEROES_ON;
> +    } else if (!strcmp(buf, "unmap")) {
> +        return BDRV_DETECT_ZEROES_UNMAP;
> +    } else {
> +        error_setg(errp, "invalid value for detect-zeroes: %s",
> +                   buf);
> +    }
> +    return BDRV_DETECT_ZEROES_OFF;
> +}

Isn't there QAPI generated code that you can use instead of open-coding
this conversion between string and enum values?

> +++ b/qapi-schema.json
> @@ -937,6 +937,8 @@
>  # @encryption_key_missing: true if the backing device is encrypted but an
>  #                          valid encryption key is missing
>  #
> +# @detect_zeroes: detect and optimize zero writes (Since 2.1)
> +#
>  # @bps: total throughput limit in bytes per second is specified
>  #
>  # @bps_rd: read throughput limit in bytes per second is specified
> @@ -972,6 +974,7 @@
>    'data': { 'file': 'str', '*node-name': 'str', 'ro': 'bool', 'drv': 'str',
>              '*backing_file': 'str', 'backing_file_depth': 'int',
>              'encrypted': 'bool', 'encryption_key_missing': 'bool',
> +            'detect_zeroes': 'str',

For new additions, we try to prefer dash over underscore.  Eww - we've
already been inconsistent in this struct, with backing_file vs. node-name.

Maybe s/detect_zeroes/detect-zeroes/.  I obviously can't argue too
strongly in light of the rest of the struct in isolation.  However, you
DID use detect-zeroes on the input side later in the patch, so being
consistent between the two sites would be nice (given that node-name was
named consistently).

On the other hand, I _can_ argue strongly that open-coding this as 'str'
is wrong.  You already added an enum type, so use it:

'detect_zeroes': 'BlockdevDetectZeroesOptions',

(hmm, in this context, it's not really an option, so maybe there is some
other name we can bikeshed about; but beyond 'BlockdevDetectZeroes', I
don't have any other good ideas)

zero is one of those odd words with two different pluralized spellings
both in common use.  Code base currently has a 1:2 ratio between 'zeros'
vs. 'zeroes', so I guess you're okay; on the other hand, 'man qemu-img'
documents that 'convert -S' detects "zeros".

>              'bps': 'int', 'bps_rd': 'int', 'bps_wr': 'int',
>              'iops': 'int', 'iops_rd': 'int', 'iops_wr': 'int',
>              'image': 'ImageInfo',
> @@ -4250,6 +4253,20 @@
>    'data': [ 'ignore', 'unmap' ] }
>  
>  ##
> +# @BlockdevDetectZeroesOptions
> +#
> +# Selects the operation mode for zero write detection.

s/Selects/Describes/ since you are also going to use this enum on the
output field.

> +#
> +# @off:      Disabled
> +# @on:       Enabled

Maybe more details?  Seeing this doesn't tell me the tradeoffs involved
in tweaking the parameter (one is faster, while the other uses less
storage resources - so maybe mention those benefits).  I see the
documentation later on for the command line, so maybe repeating some of
that here would help someone going by just the QMP interface.

> +# @unmap:    Enabled and even try to unmap blocks if possible
> +#
> +# Since: 2.1
> +##
> +{ 'enum': 'BlockdevDetectZeroesOptions',
> +  'data': [ 'off', 'on', 'unmap' ] }
> +
> +##
>  # @BlockdevAioOptions
>  #
>  # Selects the AIO backend to handle I/O requests

> @@ -4327,7 +4346,8 @@
>              '*aio': 'BlockdevAioOptions',
>              '*rerror': 'BlockdevOnError',
>              '*werror': 'BlockdevOnError',
> -            '*read-only': 'bool' } }
> +            '*read-only': 'bool',
> +            '*detect-zeroes': 'BlockdevDetectZeroesOptions' } }

This part is fine.

>  @var{copy-on-read} is "on" or "off" and enables whether to copy read backing
>  file sectors into the image file.
> +@item detect-zeroes=@var{detect-zeroes}
> +@var{detect-zeroes} is "off", "on" or "unmap" and enables the automatic
> +conversion of plain zero writes by the OS to driver specific optimized
> +zero write commands. If "unmap" is chosen and @var{discard} is "on"
> +a zero write may even be converted to an UNMAP operation.

This looks good.

-- 
Eric Blake   eblake redhat com    +1-919-301-3266
Libvirt virtualization library http://libvirt.org


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 604 bytes --]

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [Qemu-devel] [PATCHv3] block: optimize zero writes with bdrv_write_zeroes
  2014-05-07  2:52 ` Eric Blake
@ 2014-05-07 14:26   ` Peter Lieven
  2014-05-07 15:19     ` Kevin Wolf
  0 siblings, 1 reply; 12+ messages in thread
From: Peter Lieven @ 2014-05-07 14:26 UTC (permalink / raw)
  To: Eric Blake, qemu-devel; +Cc: kwolf, pbonzini, famz, stefanha, mreitz

On 07.05.2014 04:52, Eric Blake wrote:
> On 05/06/2014 06:23 PM, Peter Lieven wrote:
>> this patch tries to optimize zero write requests
>> by automatically using bdrv_write_zeroes if it is
>> supported by the format.
>>
>> This significantly speeds up file system initialization and
>> should speed zero write test used to test backend storage
>> performance.
>>
>> Signed-off-by: Peter Lieven <pl@kamp.de>
>> ---
>> v2->v3: - moved parameter parsing to blockdev_init
>>          - added per device detect_zeroes status to
>>            hmp (info block -v) and qmp (query-block) [Eric]
>>          - added support to enable detect-zeroes also
>>            for hot added devices [Eric]
>>          - added missing entry to qemu_common_drive_opts
>>          - fixed description of qemu_iovec_is_zero [Fam]
>>
>>   
>> +static BdrvDetectZeroes parse_detect_zeroes(const char *buf, Error **errp)
>> +{
>> +    if (!buf || !strcmp(buf, "off")) {
>> +        return BDRV_DETECT_ZEROES_OFF;
>> +    } else if (!strcmp(buf, "on")) {
>> +        return BDRV_DETECT_ZEROES_ON;
>> +    } else if (!strcmp(buf, "unmap")) {
>> +        return BDRV_DETECT_ZEROES_UNMAP;
>> +    } else {
>> +        error_setg(errp, "invalid value for detect-zeroes: %s",
>> +                   buf);
>> +    }
>> +    return BDRV_DETECT_ZEROES_OFF;
>> +}
> Isn't there QAPI generated code that you can use instead of open-coding
> this conversion between string and enum values?

Actually I have no idea. As you pointed out in the qapi patch I sent
it was quite hard for me to crawl through the whole stuff as one who is not
familiar with it. Can somebody advise here? Anyhow, I wonder
how this would work since qapi doesn't know the C Macros.

>
>> +++ b/qapi-schema.json
>> @@ -937,6 +937,8 @@
>>   # @encryption_key_missing: true if the backing device is encrypted but an
>>   #                          valid encryption key is missing
>>   #
>> +# @detect_zeroes: detect and optimize zero writes (Since 2.1)
>> +#
>>   # @bps: total throughput limit in bytes per second is specified
>>   #
>>   # @bps_rd: read throughput limit in bytes per second is specified
>> @@ -972,6 +974,7 @@
>>     'data': { 'file': 'str', '*node-name': 'str', 'ro': 'bool', 'drv': 'str',
>>               '*backing_file': 'str', 'backing_file_depth': 'int',
>>               'encrypted': 'bool', 'encryption_key_missing': 'bool',
>> +            'detect_zeroes': 'str',
> For new additions, we try to prefer dash over underscore.  Eww - we've
> already been inconsistent in this struct, with backing_file vs. node-name.
>
> Maybe s/detect_zeroes/detect-zeroes/.  I obviously can't argue too
> strongly in light of the rest of the struct in isolation.  However, you
> DID use detect-zeroes on the input side later in the patch, so being
> consistent between the two sites would be nice (given that node-name was
> named consistently).

I tried to be consistent here. All "setters" use a dash while all "getters"
have an underscore. Look e.g. at all the I/O thortteling parameters.
One reason might be that a dash is not allowed as a member name in a struct.

 From this perspective the only inconsistent one is node-name.

>
> On the other hand, I _can_ argue strongly that open-coding this as 'str'
> is wrong.  You already added an enum type, so use it:
>
> 'detect_zeroes': 'BlockdevDetectZeroesOptions',
>
> (hmm, in this context, it's not really an option, so maybe there is some
> other name we can bikeshed about; but beyond 'BlockdevDetectZeroes', I
> don't have any other good ideas)

I tried to, however it did not compile for me when I tried this.

>
> zero is one of those odd words with two different pluralized spellings
> both in common use.  Code base currently has a 1:2 ratio between 'zeros'
> vs. 'zeroes', so I guess you're okay; on the other hand, 'man qemu-img'
> documents that 'convert -S' detects "zeros".

The reason I choosed zeroEs is that the function in the background
is named bdrv_write_zeroEs.

>
>>               'bps': 'int', 'bps_rd': 'int', 'bps_wr': 'int',
>>               'iops': 'int', 'iops_rd': 'int', 'iops_wr': 'int',
>>               'image': 'ImageInfo',
>> @@ -4250,6 +4253,20 @@
>>     'data': [ 'ignore', 'unmap' ] }
>>   
>>   ##
>> +# @BlockdevDetectZeroesOptions
>> +#
>> +# Selects the operation mode for zero write detection.
> s/Selects/Describes/ since you are also going to use this enum on the
> output field.

Ok

>
>> +#
>> +# @off:      Disabled
>> +# @on:       Enabled
> Maybe more details?  Seeing this doesn't tell me the tradeoffs involved
> in tweaking the parameter (one is faster, while the other uses less
> storage resources - so maybe mention those benefits).  I see the
> documentation later on for the command line, so maybe repeating some of
> that here would help someone going by just the QMP interface.

Will do.

Peter

>
>> +# @unmap:    Enabled and even try to unmap blocks if possible
>> +#
>> +# Since: 2.1
>> +##
>> +{ 'enum': 'BlockdevDetectZeroesOptions',
>> +  'data': [ 'off', 'on', 'unmap' ] }
>> +
>> +##
>>   # @BlockdevAioOptions
>>   #
>>   # Selects the AIO backend to handle I/O requests
>> @@ -4327,7 +4346,8 @@
>>               '*aio': 'BlockdevAioOptions',
>>               '*rerror': 'BlockdevOnError',
>>               '*werror': 'BlockdevOnError',
>> -            '*read-only': 'bool' } }
>> +            '*read-only': 'bool',
>> +            '*detect-zeroes': 'BlockdevDetectZeroesOptions' } }
> This part is fine.
>
>>   @var{copy-on-read} is "on" or "off" and enables whether to copy read backing
>>   file sectors into the image file.
>> +@item detect-zeroes=@var{detect-zeroes}
>> +@var{detect-zeroes} is "off", "on" or "unmap" and enables the automatic
>> +conversion of plain zero writes by the OS to driver specific optimized
>> +zero write commands. If "unmap" is chosen and @var{discard} is "on"
>> +a zero write may even be converted to an UNMAP operation.
> This looks good.
>

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [Qemu-devel] [PATCHv3] block: optimize zero writes with bdrv_write_zeroes
  2014-05-07 14:26   ` Peter Lieven
@ 2014-05-07 15:19     ` Kevin Wolf
  2014-05-07 15:26       ` Peter Lieven
  0 siblings, 1 reply; 12+ messages in thread
From: Kevin Wolf @ 2014-05-07 15:19 UTC (permalink / raw)
  To: Peter Lieven; +Cc: famz, qemu-devel, mreitz, stefanha, pbonzini

Am 07.05.2014 um 16:26 hat Peter Lieven geschrieben:
> On 07.05.2014 04:52, Eric Blake wrote:
> >On 05/06/2014 06:23 PM, Peter Lieven wrote:
> >>this patch tries to optimize zero write requests
> >>by automatically using bdrv_write_zeroes if it is
> >>supported by the format.
> >>
> >>This significantly speeds up file system initialization and
> >>should speed zero write test used to test backend storage
> >>performance.
> >>
> >>Signed-off-by: Peter Lieven <pl@kamp.de>
> >>---
> >>v2->v3: - moved parameter parsing to blockdev_init
> >>         - added per device detect_zeroes status to
> >>           hmp (info block -v) and qmp (query-block) [Eric]
> >>         - added support to enable detect-zeroes also
> >>           for hot added devices [Eric]
> >>         - added missing entry to qemu_common_drive_opts
> >>         - fixed description of qemu_iovec_is_zero [Fam]
> >>
> >>+static BdrvDetectZeroes parse_detect_zeroes(const char *buf, Error **errp)
> >>+{
> >>+    if (!buf || !strcmp(buf, "off")) {
> >>+        return BDRV_DETECT_ZEROES_OFF;
> >>+    } else if (!strcmp(buf, "on")) {
> >>+        return BDRV_DETECT_ZEROES_ON;
> >>+    } else if (!strcmp(buf, "unmap")) {
> >>+        return BDRV_DETECT_ZEROES_UNMAP;
> >>+    } else {
> >>+        error_setg(errp, "invalid value for detect-zeroes: %s",
> >>+                   buf);
> >>+    }
> >>+    return BDRV_DETECT_ZEROES_OFF;
> >>+}
> >Isn't there QAPI generated code that you can use instead of open-coding
> >this conversion between string and enum values?
> 
> Actually I have no idea. As you pointed out in the qapi patch I sent
> it was quite hard for me to crawl through the whole stuff as one who is not
> familiar with it. Can somebody advise here? Anyhow, I wonder
> how this would work since qapi doesn't know the C Macros.

QAPI does generate C enums, so you should take whatever identifier it
uses instead of defining your own macros. You may need to include
qapi-types.h for this. It also creates a *_lookup array that maps enum
IDs to strings.

Kevin

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [Qemu-devel] [PATCHv3] block: optimize zero writes with bdrv_write_zeroes
  2014-05-07 15:19     ` Kevin Wolf
@ 2014-05-07 15:26       ` Peter Lieven
  2014-05-07 15:38         ` Kevin Wolf
  0 siblings, 1 reply; 12+ messages in thread
From: Peter Lieven @ 2014-05-07 15:26 UTC (permalink / raw)
  To: Kevin Wolf; +Cc: famz, qemu-devel, mreitz, stefanha, pbonzini

On 07.05.2014 17:19, Kevin Wolf wrote:
> Am 07.05.2014 um 16:26 hat Peter Lieven geschrieben:
>> On 07.05.2014 04:52, Eric Blake wrote:
>>> On 05/06/2014 06:23 PM, Peter Lieven wrote:
>>>> this patch tries to optimize zero write requests
>>>> by automatically using bdrv_write_zeroes if it is
>>>> supported by the format.
>>>>
>>>> This significantly speeds up file system initialization and
>>>> should speed zero write test used to test backend storage
>>>> performance.
>>>>
>>>> Signed-off-by: Peter Lieven <pl@kamp.de>
>>>> ---
>>>> v2->v3: - moved parameter parsing to blockdev_init
>>>>          - added per device detect_zeroes status to
>>>>            hmp (info block -v) and qmp (query-block) [Eric]
>>>>          - added support to enable detect-zeroes also
>>>>            for hot added devices [Eric]
>>>>          - added missing entry to qemu_common_drive_opts
>>>>          - fixed description of qemu_iovec_is_zero [Fam]
>>>>
>>>> +static BdrvDetectZeroes parse_detect_zeroes(const char *buf, Error **errp)
>>>> +{
>>>> +    if (!buf || !strcmp(buf, "off")) {
>>>> +        return BDRV_DETECT_ZEROES_OFF;
>>>> +    } else if (!strcmp(buf, "on")) {
>>>> +        return BDRV_DETECT_ZEROES_ON;
>>>> +    } else if (!strcmp(buf, "unmap")) {
>>>> +        return BDRV_DETECT_ZEROES_UNMAP;
>>>> +    } else {
>>>> +        error_setg(errp, "invalid value for detect-zeroes: %s",
>>>> +                   buf);
>>>> +    }
>>>> +    return BDRV_DETECT_ZEROES_OFF;
>>>> +}
>>> Isn't there QAPI generated code that you can use instead of open-coding
>>> this conversion between string and enum values?
>> Actually I have no idea. As you pointed out in the qapi patch I sent
>> it was quite hard for me to crawl through the whole stuff as one who is not
>> familiar with it. Can somebody advise here? Anyhow, I wonder
>> how this would work since qapi doesn't know the C Macros.
> QAPI does generate C enums, so you should take whatever identifier it
> uses instead of defining your own macros. You may need to include
> qapi-types.h for this. It also creates a *_lookup array that maps enum
> IDs to strings.

Ah, cool stuff, thank you. I found the enum and the lookup array,
but is there also a function that maps a string to an enum ID?

Peter

>
> Kevin


-- 

Mit freundlichen Grüßen

Peter Lieven

...........................................................

   KAMP Netzwerkdienste GmbH
   Vestische Str. 89-91 | 46117 Oberhausen
   Tel: +49 (0) 208.89 402-50 | Fax: +49 (0) 208.89 402-40
   pl@kamp.de | http://www.kamp.de

   Geschäftsführer: Heiner Lante | Michael Lante
   Amtsgericht Duisburg | HRB Nr. 12154
   USt-Id-Nr.: DE 120607556

...........................................................

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [Qemu-devel] [PATCHv3] block: optimize zero writes with bdrv_write_zeroes
  2014-05-07 15:26       ` Peter Lieven
@ 2014-05-07 15:38         ` Kevin Wolf
  2014-05-07 15:45           ` Peter Lieven
  2014-05-07 16:13           ` Peter Lieven
  0 siblings, 2 replies; 12+ messages in thread
From: Kevin Wolf @ 2014-05-07 15:38 UTC (permalink / raw)
  To: Peter Lieven; +Cc: famz, qemu-devel, mreitz, stefanha, pbonzini

Am 07.05.2014 um 17:26 hat Peter Lieven geschrieben:
> On 07.05.2014 17:19, Kevin Wolf wrote:
> >Am 07.05.2014 um 16:26 hat Peter Lieven geschrieben:
> >>On 07.05.2014 04:52, Eric Blake wrote:
> >>>On 05/06/2014 06:23 PM, Peter Lieven wrote:
> >>>>this patch tries to optimize zero write requests
> >>>>by automatically using bdrv_write_zeroes if it is
> >>>>supported by the format.
> >>>>
> >>>>This significantly speeds up file system initialization and
> >>>>should speed zero write test used to test backend storage
> >>>>performance.
> >>>>
> >>>>Signed-off-by: Peter Lieven <pl@kamp.de>
> >>>>---
> >>>>v2->v3: - moved parameter parsing to blockdev_init
> >>>>         - added per device detect_zeroes status to
> >>>>           hmp (info block -v) and qmp (query-block) [Eric]
> >>>>         - added support to enable detect-zeroes also
> >>>>           for hot added devices [Eric]
> >>>>         - added missing entry to qemu_common_drive_opts
> >>>>         - fixed description of qemu_iovec_is_zero [Fam]
> >>>>
> >>>>+static BdrvDetectZeroes parse_detect_zeroes(const char *buf, Error **errp)
> >>>>+{
> >>>>+    if (!buf || !strcmp(buf, "off")) {
> >>>>+        return BDRV_DETECT_ZEROES_OFF;
> >>>>+    } else if (!strcmp(buf, "on")) {
> >>>>+        return BDRV_DETECT_ZEROES_ON;
> >>>>+    } else if (!strcmp(buf, "unmap")) {
> >>>>+        return BDRV_DETECT_ZEROES_UNMAP;
> >>>>+    } else {
> >>>>+        error_setg(errp, "invalid value for detect-zeroes: %s",
> >>>>+                   buf);
> >>>>+    }
> >>>>+    return BDRV_DETECT_ZEROES_OFF;
> >>>>+}
> >>>Isn't there QAPI generated code that you can use instead of open-coding
> >>>this conversion between string and enum values?
> >>Actually I have no idea. As you pointed out in the qapi patch I sent
> >>it was quite hard for me to crawl through the whole stuff as one who is not
> >>familiar with it. Can somebody advise here? Anyhow, I wonder
> >>how this would work since qapi doesn't know the C Macros.
> >QAPI does generate C enums, so you should take whatever identifier it
> >uses instead of defining your own macros. You may need to include
> >qapi-types.h for this. It also creates a *_lookup array that maps enum
> >IDs to strings.
> 
> Ah, cool stuff, thank you. I found the enum and the lookup array,
> but is there also a function that maps a string to an enum ID?

I don't think so, but if you need it, you're probably doing something
wrong because QAPI already calls you with an enum parameter and not a
char* one.

Kevin

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [Qemu-devel] [PATCHv3] block: optimize zero writes with bdrv_write_zeroes
  2014-05-07 15:38         ` Kevin Wolf
@ 2014-05-07 15:45           ` Peter Lieven
  2014-05-07 16:02             ` Peter Lieven
  2014-05-07 16:13           ` Peter Lieven
  1 sibling, 1 reply; 12+ messages in thread
From: Peter Lieven @ 2014-05-07 15:45 UTC (permalink / raw)
  To: Kevin Wolf; +Cc: famz, qemu-devel, mreitz, stefanha, pbonzini

On 07.05.2014 17:38, Kevin Wolf wrote:
> Am 07.05.2014 um 17:26 hat Peter Lieven geschrieben:
>> On 07.05.2014 17:19, Kevin Wolf wrote:
>>> Am 07.05.2014 um 16:26 hat Peter Lieven geschrieben:
>>>> On 07.05.2014 04:52, Eric Blake wrote:
>>>>> On 05/06/2014 06:23 PM, Peter Lieven wrote:
>>>>>> this patch tries to optimize zero write requests
>>>>>> by automatically using bdrv_write_zeroes if it is
>>>>>> supported by the format.
>>>>>>
>>>>>> This significantly speeds up file system initialization and
>>>>>> should speed zero write test used to test backend storage
>>>>>> performance.
>>>>>>
>>>>>> Signed-off-by: Peter Lieven <pl@kamp.de>
>>>>>> ---
>>>>>> v2->v3: - moved parameter parsing to blockdev_init
>>>>>>          - added per device detect_zeroes status to
>>>>>>            hmp (info block -v) and qmp (query-block) [Eric]
>>>>>>          - added support to enable detect-zeroes also
>>>>>>            for hot added devices [Eric]
>>>>>>          - added missing entry to qemu_common_drive_opts
>>>>>>          - fixed description of qemu_iovec_is_zero [Fam]
>>>>>>
>>>>>> +static BdrvDetectZeroes parse_detect_zeroes(const char *buf, Error **errp)
>>>>>> +{
>>>>>> +    if (!buf || !strcmp(buf, "off")) {
>>>>>> +        return BDRV_DETECT_ZEROES_OFF;
>>>>>> +    } else if (!strcmp(buf, "on")) {
>>>>>> +        return BDRV_DETECT_ZEROES_ON;
>>>>>> +    } else if (!strcmp(buf, "unmap")) {
>>>>>> +        return BDRV_DETECT_ZEROES_UNMAP;
>>>>>> +    } else {
>>>>>> +        error_setg(errp, "invalid value for detect-zeroes: %s",
>>>>>> +                   buf);
>>>>>> +    }
>>>>>> +    return BDRV_DETECT_ZEROES_OFF;
>>>>>> +}
>>>>> Isn't there QAPI generated code that you can use instead of open-coding
>>>>> this conversion between string and enum values?
>>>> Actually I have no idea. As you pointed out in the qapi patch I sent
>>>> it was quite hard for me to crawl through the whole stuff as one who is not
>>>> familiar with it. Can somebody advise here? Anyhow, I wonder
>>>> how this would work since qapi doesn't know the C Macros.
>>> QAPI does generate C enums, so you should take whatever identifier it
>>> uses instead of defining your own macros. You may need to include
>>> qapi-types.h for this. It also creates a *_lookup array that maps enum
>>> IDs to strings.
>> Ah, cool stuff, thank you. I found the enum and the lookup array,
>> but is there also a function that maps a string to an enum ID?
> I don't think so, but if you need it, you're probably doing something
> wrong because QAPI already calls you with an enum parameter and not a
> char* one.
I am in blockdev_init and want to set dinfo->bdrv->detect_zeroes to
the correct ID. Do you have a fast solution? Everything else in this
function is not using QAPI calls.

Peter

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [Qemu-devel] [PATCHv3] block: optimize zero writes with bdrv_write_zeroes
  2014-05-07 15:45           ` Peter Lieven
@ 2014-05-07 16:02             ` Peter Lieven
  2014-05-08  7:44               ` Kevin Wolf
  0 siblings, 1 reply; 12+ messages in thread
From: Peter Lieven @ 2014-05-07 16:02 UTC (permalink / raw)
  To: Kevin Wolf; +Cc: famz, qemu-devel, mreitz, stefanha, pbonzini

On 07.05.2014 17:45, Peter Lieven wrote:
> On 07.05.2014 17:38, Kevin Wolf wrote:
>> Am 07.05.2014 um 17:26 hat Peter Lieven geschrieben:
>>> On 07.05.2014 17:19, Kevin Wolf wrote:
>>>> Am 07.05.2014 um 16:26 hat Peter Lieven geschrieben:
>>>>> On 07.05.2014 04:52, Eric Blake wrote:
>>>>>> On 05/06/2014 06:23 PM, Peter Lieven wrote:
>>>>>>> this patch tries to optimize zero write requests
>>>>>>> by automatically using bdrv_write_zeroes if it is
>>>>>>> supported by the format.
>>>>>>>
>>>>>>> This significantly speeds up file system initialization and
>>>>>>> should speed zero write test used to test backend storage
>>>>>>> performance.
>>>>>>>
>>>>>>> Signed-off-by: Peter Lieven <pl@kamp.de>
>>>>>>> ---
>>>>>>> v2->v3: - moved parameter parsing to blockdev_init
>>>>>>>          - added per device detect_zeroes status to
>>>>>>>            hmp (info block -v) and qmp (query-block) [Eric]
>>>>>>>          - added support to enable detect-zeroes also
>>>>>>>            for hot added devices [Eric]
>>>>>>>          - added missing entry to qemu_common_drive_opts
>>>>>>>          - fixed description of qemu_iovec_is_zero [Fam]
>>>>>>>
>>>>>>> +static BdrvDetectZeroes parse_detect_zeroes(const char *buf, Error **errp)
>>>>>>> +{
>>>>>>> +    if (!buf || !strcmp(buf, "off")) {
>>>>>>> +        return BDRV_DETECT_ZEROES_OFF;
>>>>>>> +    } else if (!strcmp(buf, "on")) {
>>>>>>> +        return BDRV_DETECT_ZEROES_ON;
>>>>>>> +    } else if (!strcmp(buf, "unmap")) {
>>>>>>> +        return BDRV_DETECT_ZEROES_UNMAP;
>>>>>>> +    } else {
>>>>>>> +        error_setg(errp, "invalid value for detect-zeroes: %s",
>>>>>>> +                   buf);
>>>>>>> +    }
>>>>>>> +    return BDRV_DETECT_ZEROES_OFF;
>>>>>>> +}
>>>>>> Isn't there QAPI generated code that you can use instead of open-coding
>>>>>> this conversion between string and enum values?
>>>>> Actually I have no idea. As you pointed out in the qapi patch I sent
>>>>> it was quite hard for me to crawl through the whole stuff as one who is not
>>>>> familiar with it. Can somebody advise here? Anyhow, I wonder
>>>>> how this would work since qapi doesn't know the C Macros.
>>>> QAPI does generate C enums, so you should take whatever identifier it
>>>> uses instead of defining your own macros. You may need to include
>>>> qapi-types.h for this. It also creates a *_lookup array that maps enum
>>>> IDs to strings.
>>> Ah, cool stuff, thank you. I found the enum and the lookup array,
>>> but is there also a function that maps a string to an enum ID?
>> I don't think so, but if you need it, you're probably doing something
>> wrong because QAPI already calls you with an enum parameter and not a
>> char* one.
> I am in blockdev_init and want to set dinfo->bdrv->detect_zeroes to
> the correct ID. Do you have a fast solution? Everything else in this
> function is not using QAPI calls.

Actually, there is already a manual parsing for the werror and rerror in this function.

I think for the future there might be need for a generic function that maps a string
to the ID of an enum object.

If you don't have objections I would leave the parsing function as is for the moment.

Peter

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [Qemu-devel] [PATCHv3] block: optimize zero writes with bdrv_write_zeroes
  2014-05-07 15:38         ` Kevin Wolf
  2014-05-07 15:45           ` Peter Lieven
@ 2014-05-07 16:13           ` Peter Lieven
  1 sibling, 0 replies; 12+ messages in thread
From: Peter Lieven @ 2014-05-07 16:13 UTC (permalink / raw)
  To: Kevin Wolf; +Cc: famz, qemu-devel, mreitz, stefanha, pbonzini

On 07.05.2014 17:38, Kevin Wolf wrote:
> Am 07.05.2014 um 17:26 hat Peter Lieven geschrieben:
>> On 07.05.2014 17:19, Kevin Wolf wrote:
>>> Am 07.05.2014 um 16:26 hat Peter Lieven geschrieben:
>>>> On 07.05.2014 04:52, Eric Blake wrote:
>>>>> On 05/06/2014 06:23 PM, Peter Lieven wrote:
>>>>>> this patch tries to optimize zero write requests
>>>>>> by automatically using bdrv_write_zeroes if it is
>>>>>> supported by the format.
>>>>>>
>>>>>> This significantly speeds up file system initialization and
>>>>>> should speed zero write test used to test backend storage
>>>>>> performance.
>>>>>>
>>>>>> Signed-off-by: Peter Lieven <pl@kamp.de>
>>>>>> ---
>>>>>> v2->v3: - moved parameter parsing to blockdev_init
>>>>>>          - added per device detect_zeroes status to
>>>>>>            hmp (info block -v) and qmp (query-block) [Eric]
>>>>>>          - added support to enable detect-zeroes also
>>>>>>            for hot added devices [Eric]
>>>>>>          - added missing entry to qemu_common_drive_opts
>>>>>>          - fixed description of qemu_iovec_is_zero [Fam]
>>>>>>
>>>>>> +static BdrvDetectZeroes parse_detect_zeroes(const char *buf, Error **errp)
>>>>>> +{
>>>>>> +    if (!buf || !strcmp(buf, "off")) {
>>>>>> +        return BDRV_DETECT_ZEROES_OFF;
>>>>>> +    } else if (!strcmp(buf, "on")) {
>>>>>> +        return BDRV_DETECT_ZEROES_ON;
>>>>>> +    } else if (!strcmp(buf, "unmap")) {
>>>>>> +        return BDRV_DETECT_ZEROES_UNMAP;
>>>>>> +    } else {
>>>>>> +        error_setg(errp, "invalid value for detect-zeroes: %s",
>>>>>> +                   buf);
>>>>>> +    }
>>>>>> +    return BDRV_DETECT_ZEROES_OFF;
>>>>>> +}
>>>>> Isn't there QAPI generated code that you can use instead of open-coding
>>>>> this conversion between string and enum values?
>>>> Actually I have no idea. As you pointed out in the qapi patch I sent
>>>> it was quite hard for me to crawl through the whole stuff as one who is not
>>>> familiar with it. Can somebody advise here? Anyhow, I wonder
>>>> how this would work since qapi doesn't know the C Macros.
>>> QAPI does generate C enums, so you should take whatever identifier it
>>> uses instead of defining your own macros. You may need to include
>>> qapi-types.h for this. It also creates a *_lookup array that maps enum
>>> IDs to strings.
>> Ah, cool stuff, thank you. I found the enum and the lookup array,
>> but is there also a function that maps a string to an enum ID?
> I don't think so, but if you need it, you're probably doing something
> wrong because QAPI already calls you with an enum parameter and not a
> char* one.
>
> Kevin

This works for me:

diff --git a/blockdev.c b/blockdev.c
index 7810e9f..955bd49 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -288,6 +288,23 @@ static int parse_block_error_action(const char *buf, bool is_read, Error **errp)
      }
  }

+static int parse_enum_option(const char *lookup[], const char *buf, int max,
+                             int def, Error **errp)
+{
+    int i;
+    if (!buf) {
+        return def;
+    }
+    for (i = 0; i < max; i++) {
+        if (!strcmp(buf, lookup[i])) {
+            return i;
+        }
+    }
+    error_setg(errp, "invalid parameter value: %s",
+               buf);
+    return def;
+}
+
  static bool check_throttle_config(ThrottleConfig *cfg, Error **errp)
  {
      if (throttle_conflicting(cfg)) {
@@ -324,6 +341,7 @@ static DriveInfo *blockdev_init(const char *file, QDict *bs_opts,
      QemuOpts *opts;
      const char *id;
      bool has_driver_specific_opts;
+    BlockdevDetectZeroesOptions detect_zeroes;
      BlockDriver *drv = NULL;

      /* Check common options by copying from bs_opts to opts, all other options
@@ -452,6 +470,17 @@ static DriveInfo *blockdev_init(const char *file, QDict *bs_opts,
          }
      }

+    detect_zeroes =
+        parse_enum_option(BlockdevDetectZeroesOptions_lookup,
+                          qemu_opt_get(opts, "detect-zeroes"),
+                          BLOCKDEV_DETECT_ZEROES_OPTIONS_MAX,
+                          BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF,
+                          &error);
+    if (error) {
+        error_propagate(errp, error);
+        goto early_err;
+    }
+
      /* init */
      dinfo = g_malloc0(sizeof(*dinfo));
      dinfo->id = g_strdup(qemu_opts_id(opts));

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [Qemu-devel] [PATCHv3] block: optimize zero writes with bdrv_write_zeroes
  2014-05-07 16:02             ` Peter Lieven
@ 2014-05-08  7:44               ` Kevin Wolf
  0 siblings, 0 replies; 12+ messages in thread
From: Kevin Wolf @ 2014-05-08  7:44 UTC (permalink / raw)
  To: Peter Lieven; +Cc: famz, qemu-devel, mreitz, stefanha, pbonzini

Am 07.05.2014 um 18:02 hat Peter Lieven geschrieben:
> On 07.05.2014 17:45, Peter Lieven wrote:
> >On 07.05.2014 17:38, Kevin Wolf wrote:
> >>Am 07.05.2014 um 17:26 hat Peter Lieven geschrieben:
> >>>On 07.05.2014 17:19, Kevin Wolf wrote:
> >>>>Am 07.05.2014 um 16:26 hat Peter Lieven geschrieben:
> >>>>>On 07.05.2014 04:52, Eric Blake wrote:
> >>>>>>On 05/06/2014 06:23 PM, Peter Lieven wrote:
> >>>>>>>this patch tries to optimize zero write requests
> >>>>>>>by automatically using bdrv_write_zeroes if it is
> >>>>>>>supported by the format.
> >>>>>>>
> >>>>>>>This significantly speeds up file system initialization and
> >>>>>>>should speed zero write test used to test backend storage
> >>>>>>>performance.
> >>>>>>>
> >>>>>>>Signed-off-by: Peter Lieven <pl@kamp.de>
> >>>>>>>---
> >>>>>>>v2->v3: - moved parameter parsing to blockdev_init
> >>>>>>>         - added per device detect_zeroes status to
> >>>>>>>           hmp (info block -v) and qmp (query-block) [Eric]
> >>>>>>>         - added support to enable detect-zeroes also
> >>>>>>>           for hot added devices [Eric]
> >>>>>>>         - added missing entry to qemu_common_drive_opts
> >>>>>>>         - fixed description of qemu_iovec_is_zero [Fam]
> >>>>>>>
> >>>>>>>+static BdrvDetectZeroes parse_detect_zeroes(const char *buf, Error **errp)
> >>>>>>>+{
> >>>>>>>+    if (!buf || !strcmp(buf, "off")) {
> >>>>>>>+        return BDRV_DETECT_ZEROES_OFF;
> >>>>>>>+    } else if (!strcmp(buf, "on")) {
> >>>>>>>+        return BDRV_DETECT_ZEROES_ON;
> >>>>>>>+    } else if (!strcmp(buf, "unmap")) {
> >>>>>>>+        return BDRV_DETECT_ZEROES_UNMAP;
> >>>>>>>+    } else {
> >>>>>>>+        error_setg(errp, "invalid value for detect-zeroes: %s",
> >>>>>>>+                   buf);
> >>>>>>>+    }
> >>>>>>>+    return BDRV_DETECT_ZEROES_OFF;
> >>>>>>>+}
> >>>>>>Isn't there QAPI generated code that you can use instead of open-coding
> >>>>>>this conversion between string and enum values?
> >>>>>Actually I have no idea. As you pointed out in the qapi patch I sent
> >>>>>it was quite hard for me to crawl through the whole stuff as one who is not
> >>>>>familiar with it. Can somebody advise here? Anyhow, I wonder
> >>>>>how this would work since qapi doesn't know the C Macros.
> >>>>QAPI does generate C enums, so you should take whatever identifier it
> >>>>uses instead of defining your own macros. You may need to include
> >>>>qapi-types.h for this. It also creates a *_lookup array that maps enum
> >>>>IDs to strings.
> >>>Ah, cool stuff, thank you. I found the enum and the lookup array,
> >>>but is there also a function that maps a string to an enum ID?
> >>I don't think so, but if you need it, you're probably doing something
> >>wrong because QAPI already calls you with an enum parameter and not a
> >>char* one.
> >I am in blockdev_init and want to set dinfo->bdrv->detect_zeroes to
> >the correct ID. Do you have a fast solution? Everything else in this
> >function is not using QAPI calls.
> 
> Actually, there is already a manual parsing for the werror and rerror in this function.
> 
> I think for the future there might be need for a generic function that maps a string
> to the ID of an enum object.
> 
> If you don't have objections I would leave the parsing function as is for the moment.

Ah, so this is not for actual QAPI code, but the command line. Yeah, I
guess that's okay then, at least for the moment.

Kevin

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [Qemu-devel] [PATCHv3] block: optimize zero writes with bdrv_write_zeroes
  2014-05-07  0:01 Peter Lieven
@ 2014-05-07  0:19 ` Peter Lieven
  0 siblings, 0 replies; 12+ messages in thread
From: Peter Lieven @ 2014-05-07  0:19 UTC (permalink / raw)
  To: qemu-devel; +Cc: kwolf, famz, mreitz, stefanha, pbonzini

please ignore this one, I accidently used an old commit message

Am 07.05.2014 02:01, schrieb Peter Lieven:
> this patch tries to optimize zero write requests
> by automatically using bdrv_write_zeroes if it is
> supported by the format.
>
> this should significantly speed up file system initialization and
> should speed zero write test used to test backend storage performance.
>
> the difference can simply be tested by e.g.
>
>    dd if=/dev/zero of=/dev/vdX bs=1M
>
> or
>
>    mkfs.ext4 -E lazy_itable_init=0,lazy_journal_init=0 /dev/vdX
>
> Signed-off-by: Peter Lieven <pl@kamp.de>
> ---
> v2->v3: - moved parameter parsing to blockdev_init
>         - added per device detect_zeroes status to 
>           hmp (info block -v) and qmp (query-block) [Eric]
>         - added support to enable detect-zeroes also
>           for hot added devices [Eric]
>         - added missing entry to qemu_common_drive_opts
>         - fixed description of qemu_iovec_is_zero [Fam]
>
> v1->v2: - added tests to commit message (Markus)
> RFCv2->v1: - fixed paramter parsing strncmp -> strcmp (Eric)
>            - fixed typo (choosen->chosen) (Eric)
>            - added second example to commit msg
>
> RFCv1->RFCv2: - add detect-zeroes=off|on|unmap knob to drive cmdline parameter
>               - call zero detection only for format (bs->file != NULL)
>
>  block.c                   |   11 ++++++++++
>  block/qapi.c              |   11 ++++++++++
>  blockdev.c                |   28 +++++++++++++++++++++++++
>  hmp.c                     |    6 ++++++
>  include/block/block_int.h |   12 +++++++++++
>  include/qemu-common.h     |    1 +
>  qapi-schema.json          |   50 +++++++++++++++++++++++++++++++--------------
>  qemu-options.hx           |    6 ++++++
>  qmp-commands.hx           |    3 +++
>  util/iov.c                |   21 +++++++++++++++++++
>  10 files changed, 134 insertions(+), 15 deletions(-)
>
> diff --git a/block.c b/block.c
> index b749d31..f27b35d 100644
> --- a/block.c
> +++ b/block.c
> @@ -3244,6 +3244,17 @@ static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
>  
>      ret = notifier_with_return_list_notify(&bs->before_write_notifiers, req);
>  
> +    if (!ret && bs->detect_zeroes != BDRV_DETECT_ZEROES_OFF &&
> +        !(flags & BDRV_REQ_ZERO_WRITE) && bs->file &&
> +        drv->bdrv_co_write_zeroes && qemu_iovec_is_zero(qiov)) {
> +        flags |= BDRV_REQ_ZERO_WRITE;
> +        /* if the device was not opened with discard=on the below flag
> +         * is immediately cleared again in bdrv_co_do_write_zeroes */
> +        if (bs->detect_zeroes == BDRV_DETECT_ZEROES_UNMAP) {
> +            flags |= BDRV_REQ_MAY_UNMAP;
> +        }
> +    }
> +
>      if (ret < 0) {
>          /* Do nothing, write notifier decided to fail this request */
>      } else if (flags & BDRV_REQ_ZERO_WRITE) {
> diff --git a/block/qapi.c b/block/qapi.c
> index af11445..fbf66c2 100644
> --- a/block/qapi.c
> +++ b/block/qapi.c
> @@ -51,6 +51,17 @@ BlockDeviceInfo *bdrv_block_device_info(BlockDriverState *bs)
>  
>      info->backing_file_depth = bdrv_get_backing_file_depth(bs);
>  
> +    switch (bs->detect_zeroes) {
> +    case BDRV_DETECT_ZEROES_ON:
> +        info->detect_zeroes = g_strdup("on");
> +        break;
> +    case BDRV_DETECT_ZEROES_UNMAP:
> +        info->detect_zeroes = g_strdup("unmap");
> +        break;
> +    default:
> +        info->detect_zeroes = g_strdup("off");
> +    }
> +
>      if (bs->io_limits_enabled) {
>          ThrottleConfig cfg;
>          throttle_get_config(&bs->throttle_state, &cfg);
> diff --git a/blockdev.c b/blockdev.c
> index 7810e9f..96c11fd 100644
> --- a/blockdev.c
> +++ b/blockdev.c
> @@ -288,6 +288,21 @@ static int parse_block_error_action(const char *buf, bool is_read, Error **errp)
>      }
>  }
>  
> +static BdrvDetectZeroes parse_detect_zeroes(const char *buf, Error **errp)
> +{
> +    if (!buf || !strcmp(buf, "off")) {
> +        return BDRV_DETECT_ZEROES_OFF;
> +    } else if (!strcmp(buf, "on")) {
> +        return BDRV_DETECT_ZEROES_ON;
> +    } else if (!strcmp(buf, "unmap")) {
> +        return BDRV_DETECT_ZEROES_UNMAP;
> +    } else {
> +        error_setg(errp, "invalid value for detect-zeroes: %s",
> +                   buf);
> +    }
> +    return BDRV_DETECT_ZEROES_OFF;
> +}
> +
>  static bool check_throttle_config(ThrottleConfig *cfg, Error **errp)
>  {
>      if (throttle_conflicting(cfg)) {
> @@ -324,6 +339,7 @@ static DriveInfo *blockdev_init(const char *file, QDict *bs_opts,
>      QemuOpts *opts;
>      const char *id;
>      bool has_driver_specific_opts;
> +    BdrvDetectZeroes detect_zeroes;
>      BlockDriver *drv = NULL;
>  
>      /* Check common options by copying from bs_opts to opts, all other options
> @@ -452,6 +468,13 @@ static DriveInfo *blockdev_init(const char *file, QDict *bs_opts,
>          }
>      }
>  
> +    detect_zeroes =
> +        parse_detect_zeroes(qemu_opt_get(opts, "detect-zeroes"), &error);
> +    if (error) {
> +        error_propagate(errp, error);
> +        goto early_err;
> +    }
> +
>      /* init */
>      dinfo = g_malloc0(sizeof(*dinfo));
>      dinfo->id = g_strdup(qemu_opts_id(opts));
> @@ -462,6 +485,7 @@ static DriveInfo *blockdev_init(const char *file, QDict *bs_opts,
>      }
>      dinfo->bdrv->open_flags = snapshot ? BDRV_O_SNAPSHOT : 0;
>      dinfo->bdrv->read_only = ro;
> +    dinfo->bdrv->detect_zeroes = detect_zeroes;
>      dinfo->refcount = 1;
>      if (serial != NULL) {
>          dinfo->serial = g_strdup(serial);
> @@ -2455,6 +2479,10 @@ QemuOptsList qemu_common_drive_opts = {
>              .name = "copy-on-read",
>              .type = QEMU_OPT_BOOL,
>              .help = "copy read data from backing file into image file",
> +        },{
> +            .name = "detect-zeroes",
> +            .type = QEMU_OPT_STRING,
> +            .help = "try to optimize zero writes",
>          },
>          { /* end of list */ }
>      },
> diff --git a/hmp.c b/hmp.c
> index ca869ba..b1942ed 100644
> --- a/hmp.c
> +++ b/hmp.c
> @@ -336,6 +336,12 @@ void hmp_info_block(Monitor *mon, const QDict *qdict)
>                             info->value->inserted->backing_file_depth);
>          }
>  
> +        if (verbose) {
> +            monitor_printf(mon,
> +                           "    Detect zeroes:    %s\n",
> +                           info->value->inserted->detect_zeroes);
> +        }
> +
>          if (info->value->inserted->bps
>              || info->value->inserted->bps_rd
>              || info->value->inserted->bps_wr
> diff --git a/include/block/block_int.h b/include/block/block_int.h
> index 9ffcb69..7b9ca05 100644
> --- a/include/block/block_int.h
> +++ b/include/block/block_int.h
> @@ -271,6 +271,17 @@ typedef struct BlockLimits {
>  } BlockLimits;
>  
>  /*
> + * Different operation modes for automatic zero detection
> + * to speed the write operation up with bdrv_write_zeroes.
> + */
> +typedef enum {
> +    BDRV_DETECT_ZEROES_OFF   = 0x0,
> +    BDRV_DETECT_ZEROES_ON    = 0x1,
> +    /* also set the BDRV_MAY_UNMAP flag with bdrv_write_zeroes */
> +    BDRV_DETECT_ZEROES_UNMAP = 0x2,
> +} BdrvDetectZeroes;
> +
> +/*
>   * Note: the function bdrv_append() copies and swaps contents of
>   * BlockDriverStates, so if you add new fields to this struct, please
>   * inspect bdrv_append() to determine if the new fields need to be
> @@ -364,6 +375,7 @@ struct BlockDriverState {
>      BlockJob *job;
>  
>      QDict *options;
> +    BdrvDetectZeroes detect_zeroes;
>  };
>  
>  int get_tmp_filename(char *filename, int size);
> diff --git a/include/qemu-common.h b/include/qemu-common.h
> index a998e8d..8e3d6eb 100644
> --- a/include/qemu-common.h
> +++ b/include/qemu-common.h
> @@ -330,6 +330,7 @@ void qemu_iovec_concat(QEMUIOVector *dst,
>  void qemu_iovec_concat_iov(QEMUIOVector *dst,
>                             struct iovec *src_iov, unsigned int src_cnt,
>                             size_t soffset, size_t sbytes);
> +bool qemu_iovec_is_zero(QEMUIOVector *qiov);
>  void qemu_iovec_destroy(QEMUIOVector *qiov);
>  void qemu_iovec_reset(QEMUIOVector *qiov);
>  size_t qemu_iovec_to_buf(QEMUIOVector *qiov, size_t offset,
> diff --git a/qapi-schema.json b/qapi-schema.json
> index 0b00427..5e3b4a89 100644
> --- a/qapi-schema.json
> +++ b/qapi-schema.json
> @@ -937,6 +937,8 @@
>  # @encryption_key_missing: true if the backing device is encrypted but an
>  #                          valid encryption key is missing
>  #
> +# @detect_zeroes: detect and optimize zero writes (Since 2.1)
> +#
>  # @bps: total throughput limit in bytes per second is specified
>  #
>  # @bps_rd: read throughput limit in bytes per second is specified
> @@ -972,6 +974,7 @@
>    'data': { 'file': 'str', '*node-name': 'str', 'ro': 'bool', 'drv': 'str',
>              '*backing_file': 'str', 'backing_file_depth': 'int',
>              'encrypted': 'bool', 'encryption_key_missing': 'bool',
> +            'detect_zeroes': 'str',
>              'bps': 'int', 'bps_rd': 'int', 'bps_wr': 'int',
>              'iops': 'int', 'iops_rd': 'int', 'iops_wr': 'int',
>              'image': 'ImageInfo',
> @@ -4250,6 +4253,20 @@
>    'data': [ 'ignore', 'unmap' ] }
>  
>  ##
> +# @BlockdevDetectZeroesOptions
> +#
> +# Selects the operation mode for zero write detection.
> +#
> +# @off:      Disabled
> +# @on:       Enabled
> +# @unmap:    Enabled and even try to unmap blocks if possible
> +#
> +# Since: 2.1
> +##
> +{ 'enum': 'BlockdevDetectZeroesOptions',
> +  'data': [ 'off', 'on', 'unmap' ] }
> +
> +##
>  # @BlockdevAioOptions
>  #
>  # Selects the AIO backend to handle I/O requests
> @@ -4301,20 +4318,22 @@
>  # Options that are available for all block devices, independent of the block
>  # driver.
>  #
> -# @driver:      block driver name
> -# @id:          #optional id by which the new block device can be referred to.
> -#               This is a required option on the top level of blockdev-add, and
> -#               currently not allowed on any other level.
> -# @node-name:   #optional the name of a block driver state node (Since 2.0)
> -# @discard:     #optional discard-related options (default: ignore)
> -# @cache:       #optional cache-related options
> -# @aio:         #optional AIO backend (default: threads)
> -# @rerror:      #optional how to handle read errors on the device
> -#               (default: report)
> -# @werror:      #optional how to handle write errors on the device
> -#               (default: enospc)
> -# @read-only:   #optional whether the block device should be read-only
> -#               (default: false)
> +# @driver:        block driver name
> +# @id:            #optional id by which the new block device can be referred to.
> +#                 This is a required option on the top level of blockdev-add, and
> +#                 currently not allowed on any other level.
> +# @node-name:     #optional the name of a block driver state node (Since 2.0)
> +# @discard:       #optional discard-related options (default: ignore)
> +# @cache:         #optional cache-related options
> +# @aio:           #optional AIO backend (default: threads)
> +# @rerror:        #optional how to handle read errors on the device
> +#                 (default: report)
> +# @werror:        #optional how to handle write errors on the device
> +#                 (default: enospc)
> +# @read-only:     #optional whether the block device should be read-only
> +#                 (default: false)
> +# @detect-zeroes: #optional detect and optimize zero writes (Since 2.1)
> +#                 (default: off)
>  #
>  # Since: 1.7
>  ##
> @@ -4327,7 +4346,8 @@
>              '*aio': 'BlockdevAioOptions',
>              '*rerror': 'BlockdevOnError',
>              '*werror': 'BlockdevOnError',
> -            '*read-only': 'bool' } }
> +            '*read-only': 'bool',
> +            '*detect-zeroes': 'BlockdevDetectZeroesOptions' } }
>  
>  ##
>  # @BlockdevOptionsFile
> diff --git a/qemu-options.hx b/qemu-options.hx
> index 781af14..5ee94ea 100644
> --- a/qemu-options.hx
> +++ b/qemu-options.hx
> @@ -414,6 +414,7 @@ DEF("drive", HAS_ARG, QEMU_OPTION_drive,
>      "       [,serial=s][,addr=A][,rerror=ignore|stop|report]\n"
>      "       [,werror=ignore|stop|report|enospc][,id=name][,aio=threads|native]\n"
>      "       [,readonly=on|off][,copy-on-read=on|off]\n"
> +    "       [,detect-zeroes=on|off|unmap]\n"
>      "       [[,bps=b]|[[,bps_rd=r][,bps_wr=w]]]\n"
>      "       [[,iops=i]|[[,iops_rd=r][,iops_wr=w]]]\n"
>      "       [[,bps_max=bm]|[[,bps_rd_max=rm][,bps_wr_max=wm]]]\n"
> @@ -475,6 +476,11 @@ Open drive @option{file} as read-only. Guest write attempts will fail.
>  @item copy-on-read=@var{copy-on-read}
>  @var{copy-on-read} is "on" or "off" and enables whether to copy read backing
>  file sectors into the image file.
> +@item detect-zeroes=@var{detect-zeroes}
> +@var{detect-zeroes} is "off", "on" or "unmap" and enables the automatic
> +conversion of plain zero writes by the OS to driver specific optimized
> +zero write commands. If "unmap" is chosen and @var{discard} is "on"
> +a zero write may even be converted to an UNMAP operation.
>  @end table
>  
>  By default, the @option{cache=writeback} mode is used. It will report data
> diff --git a/qmp-commands.hx b/qmp-commands.hx
> index ed3ab92..a535955 100644
> --- a/qmp-commands.hx
> +++ b/qmp-commands.hx
> @@ -2032,6 +2032,8 @@ Each json-object contain the following:
>           - "iops_rd_max":  read I/O operations max (json-int)
>           - "iops_wr_max":  write I/O operations max (json-int)
>           - "iops_size": I/O size when limiting by iops (json-int)
> +         - "detect_zeroes": detect and optimize zero writes (json-string)
> +             - Possible values: "off", "on", "unmap"
>           - "image": the detail of the image, it is a json-object containing
>              the following:
>               - "filename": image file name (json-string)
> @@ -2108,6 +2110,7 @@ Example:
>                 "iops_rd_max": 0,
>                 "iops_wr_max": 0,
>                 "iops_size": 0,
> +               "detect_zeroes": "on",
>                 "image":{
>                    "filename":"disks/test.qcow2",
>                    "format":"qcow2",
> diff --git a/util/iov.c b/util/iov.c
> index 6569b5a..f8c49a1 100644
> --- a/util/iov.c
> +++ b/util/iov.c
> @@ -335,6 +335,27 @@ void qemu_iovec_concat(QEMUIOVector *dst,
>      qemu_iovec_concat_iov(dst, src->iov, src->niov, soffset, sbytes);
>  }
>  
> +/*
> + *  check if the contents of the iovecs are all zero
> + */
> +bool qemu_iovec_is_zero(QEMUIOVector *qiov)
> +{
> +    int i;
> +    for (i = 0; i < qiov->niov; i++) {
> +        size_t offs = qiov->iov[i].iov_len & ~(4 * sizeof(long) - 1);
> +        uint8_t *ptr = qiov->iov[i].iov_base;
> +        if (offs && !buffer_is_zero(qiov->iov[i].iov_base, offs)) {
> +            return false;
> +        }
> +        for (; offs < qiov->iov[i].iov_len; offs++) {
> +            if (ptr[offs]) {
> +                return false;
> +            }
> +        }
> +    }
> +    return true;
> +}
> +
>  void qemu_iovec_destroy(QEMUIOVector *qiov)
>  {
>      assert(qiov->nalloc != -1);

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [Qemu-devel] [PATCHv3] block: optimize zero writes with bdrv_write_zeroes
@ 2014-05-07  0:01 Peter Lieven
  2014-05-07  0:19 ` Peter Lieven
  0 siblings, 1 reply; 12+ messages in thread
From: Peter Lieven @ 2014-05-07  0:01 UTC (permalink / raw)
  To: qemu-devel; +Cc: kwolf, famz, Peter Lieven, mreitz, stefanha, pbonzini

this patch tries to optimize zero write requests
by automatically using bdrv_write_zeroes if it is
supported by the format.

this should significantly speed up file system initialization and
should speed zero write test used to test backend storage performance.

the difference can simply be tested by e.g.

   dd if=/dev/zero of=/dev/vdX bs=1M

or

   mkfs.ext4 -E lazy_itable_init=0,lazy_journal_init=0 /dev/vdX

Signed-off-by: Peter Lieven <pl@kamp.de>
---
v2->v3: - moved parameter parsing to blockdev_init
        - added per device detect_zeroes status to 
          hmp (info block -v) and qmp (query-block) [Eric]
        - added support to enable detect-zeroes also
          for hot added devices [Eric]
        - added missing entry to qemu_common_drive_opts
        - fixed description of qemu_iovec_is_zero [Fam]

v1->v2: - added tests to commit message (Markus)
RFCv2->v1: - fixed paramter parsing strncmp -> strcmp (Eric)
           - fixed typo (choosen->chosen) (Eric)
           - added second example to commit msg

RFCv1->RFCv2: - add detect-zeroes=off|on|unmap knob to drive cmdline parameter
              - call zero detection only for format (bs->file != NULL)

 block.c                   |   11 ++++++++++
 block/qapi.c              |   11 ++++++++++
 blockdev.c                |   28 +++++++++++++++++++++++++
 hmp.c                     |    6 ++++++
 include/block/block_int.h |   12 +++++++++++
 include/qemu-common.h     |    1 +
 qapi-schema.json          |   50 +++++++++++++++++++++++++++++++--------------
 qemu-options.hx           |    6 ++++++
 qmp-commands.hx           |    3 +++
 util/iov.c                |   21 +++++++++++++++++++
 10 files changed, 134 insertions(+), 15 deletions(-)

diff --git a/block.c b/block.c
index b749d31..f27b35d 100644
--- a/block.c
+++ b/block.c
@@ -3244,6 +3244,17 @@ static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
 
     ret = notifier_with_return_list_notify(&bs->before_write_notifiers, req);
 
+    if (!ret && bs->detect_zeroes != BDRV_DETECT_ZEROES_OFF &&
+        !(flags & BDRV_REQ_ZERO_WRITE) && bs->file &&
+        drv->bdrv_co_write_zeroes && qemu_iovec_is_zero(qiov)) {
+        flags |= BDRV_REQ_ZERO_WRITE;
+        /* if the device was not opened with discard=on the below flag
+         * is immediately cleared again in bdrv_co_do_write_zeroes */
+        if (bs->detect_zeroes == BDRV_DETECT_ZEROES_UNMAP) {
+            flags |= BDRV_REQ_MAY_UNMAP;
+        }
+    }
+
     if (ret < 0) {
         /* Do nothing, write notifier decided to fail this request */
     } else if (flags & BDRV_REQ_ZERO_WRITE) {
diff --git a/block/qapi.c b/block/qapi.c
index af11445..fbf66c2 100644
--- a/block/qapi.c
+++ b/block/qapi.c
@@ -51,6 +51,17 @@ BlockDeviceInfo *bdrv_block_device_info(BlockDriverState *bs)
 
     info->backing_file_depth = bdrv_get_backing_file_depth(bs);
 
+    switch (bs->detect_zeroes) {
+    case BDRV_DETECT_ZEROES_ON:
+        info->detect_zeroes = g_strdup("on");
+        break;
+    case BDRV_DETECT_ZEROES_UNMAP:
+        info->detect_zeroes = g_strdup("unmap");
+        break;
+    default:
+        info->detect_zeroes = g_strdup("off");
+    }
+
     if (bs->io_limits_enabled) {
         ThrottleConfig cfg;
         throttle_get_config(&bs->throttle_state, &cfg);
diff --git a/blockdev.c b/blockdev.c
index 7810e9f..96c11fd 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -288,6 +288,21 @@ static int parse_block_error_action(const char *buf, bool is_read, Error **errp)
     }
 }
 
+static BdrvDetectZeroes parse_detect_zeroes(const char *buf, Error **errp)
+{
+    if (!buf || !strcmp(buf, "off")) {
+        return BDRV_DETECT_ZEROES_OFF;
+    } else if (!strcmp(buf, "on")) {
+        return BDRV_DETECT_ZEROES_ON;
+    } else if (!strcmp(buf, "unmap")) {
+        return BDRV_DETECT_ZEROES_UNMAP;
+    } else {
+        error_setg(errp, "invalid value for detect-zeroes: %s",
+                   buf);
+    }
+    return BDRV_DETECT_ZEROES_OFF;
+}
+
 static bool check_throttle_config(ThrottleConfig *cfg, Error **errp)
 {
     if (throttle_conflicting(cfg)) {
@@ -324,6 +339,7 @@ static DriveInfo *blockdev_init(const char *file, QDict *bs_opts,
     QemuOpts *opts;
     const char *id;
     bool has_driver_specific_opts;
+    BdrvDetectZeroes detect_zeroes;
     BlockDriver *drv = NULL;
 
     /* Check common options by copying from bs_opts to opts, all other options
@@ -452,6 +468,13 @@ static DriveInfo *blockdev_init(const char *file, QDict *bs_opts,
         }
     }
 
+    detect_zeroes =
+        parse_detect_zeroes(qemu_opt_get(opts, "detect-zeroes"), &error);
+    if (error) {
+        error_propagate(errp, error);
+        goto early_err;
+    }
+
     /* init */
     dinfo = g_malloc0(sizeof(*dinfo));
     dinfo->id = g_strdup(qemu_opts_id(opts));
@@ -462,6 +485,7 @@ static DriveInfo *blockdev_init(const char *file, QDict *bs_opts,
     }
     dinfo->bdrv->open_flags = snapshot ? BDRV_O_SNAPSHOT : 0;
     dinfo->bdrv->read_only = ro;
+    dinfo->bdrv->detect_zeroes = detect_zeroes;
     dinfo->refcount = 1;
     if (serial != NULL) {
         dinfo->serial = g_strdup(serial);
@@ -2455,6 +2479,10 @@ QemuOptsList qemu_common_drive_opts = {
             .name = "copy-on-read",
             .type = QEMU_OPT_BOOL,
             .help = "copy read data from backing file into image file",
+        },{
+            .name = "detect-zeroes",
+            .type = QEMU_OPT_STRING,
+            .help = "try to optimize zero writes",
         },
         { /* end of list */ }
     },
diff --git a/hmp.c b/hmp.c
index ca869ba..b1942ed 100644
--- a/hmp.c
+++ b/hmp.c
@@ -336,6 +336,12 @@ void hmp_info_block(Monitor *mon, const QDict *qdict)
                            info->value->inserted->backing_file_depth);
         }
 
+        if (verbose) {
+            monitor_printf(mon,
+                           "    Detect zeroes:    %s\n",
+                           info->value->inserted->detect_zeroes);
+        }
+
         if (info->value->inserted->bps
             || info->value->inserted->bps_rd
             || info->value->inserted->bps_wr
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 9ffcb69..7b9ca05 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -271,6 +271,17 @@ typedef struct BlockLimits {
 } BlockLimits;
 
 /*
+ * Different operation modes for automatic zero detection
+ * to speed the write operation up with bdrv_write_zeroes.
+ */
+typedef enum {
+    BDRV_DETECT_ZEROES_OFF   = 0x0,
+    BDRV_DETECT_ZEROES_ON    = 0x1,
+    /* also set the BDRV_MAY_UNMAP flag with bdrv_write_zeroes */
+    BDRV_DETECT_ZEROES_UNMAP = 0x2,
+} BdrvDetectZeroes;
+
+/*
  * Note: the function bdrv_append() copies and swaps contents of
  * BlockDriverStates, so if you add new fields to this struct, please
  * inspect bdrv_append() to determine if the new fields need to be
@@ -364,6 +375,7 @@ struct BlockDriverState {
     BlockJob *job;
 
     QDict *options;
+    BdrvDetectZeroes detect_zeroes;
 };
 
 int get_tmp_filename(char *filename, int size);
diff --git a/include/qemu-common.h b/include/qemu-common.h
index a998e8d..8e3d6eb 100644
--- a/include/qemu-common.h
+++ b/include/qemu-common.h
@@ -330,6 +330,7 @@ void qemu_iovec_concat(QEMUIOVector *dst,
 void qemu_iovec_concat_iov(QEMUIOVector *dst,
                            struct iovec *src_iov, unsigned int src_cnt,
                            size_t soffset, size_t sbytes);
+bool qemu_iovec_is_zero(QEMUIOVector *qiov);
 void qemu_iovec_destroy(QEMUIOVector *qiov);
 void qemu_iovec_reset(QEMUIOVector *qiov);
 size_t qemu_iovec_to_buf(QEMUIOVector *qiov, size_t offset,
diff --git a/qapi-schema.json b/qapi-schema.json
index 0b00427..5e3b4a89 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -937,6 +937,8 @@
 # @encryption_key_missing: true if the backing device is encrypted but an
 #                          valid encryption key is missing
 #
+# @detect_zeroes: detect and optimize zero writes (Since 2.1)
+#
 # @bps: total throughput limit in bytes per second is specified
 #
 # @bps_rd: read throughput limit in bytes per second is specified
@@ -972,6 +974,7 @@
   'data': { 'file': 'str', '*node-name': 'str', 'ro': 'bool', 'drv': 'str',
             '*backing_file': 'str', 'backing_file_depth': 'int',
             'encrypted': 'bool', 'encryption_key_missing': 'bool',
+            'detect_zeroes': 'str',
             'bps': 'int', 'bps_rd': 'int', 'bps_wr': 'int',
             'iops': 'int', 'iops_rd': 'int', 'iops_wr': 'int',
             'image': 'ImageInfo',
@@ -4250,6 +4253,20 @@
   'data': [ 'ignore', 'unmap' ] }
 
 ##
+# @BlockdevDetectZeroesOptions
+#
+# Selects the operation mode for zero write detection.
+#
+# @off:      Disabled
+# @on:       Enabled
+# @unmap:    Enabled and even try to unmap blocks if possible
+#
+# Since: 2.1
+##
+{ 'enum': 'BlockdevDetectZeroesOptions',
+  'data': [ 'off', 'on', 'unmap' ] }
+
+##
 # @BlockdevAioOptions
 #
 # Selects the AIO backend to handle I/O requests
@@ -4301,20 +4318,22 @@
 # Options that are available for all block devices, independent of the block
 # driver.
 #
-# @driver:      block driver name
-# @id:          #optional id by which the new block device can be referred to.
-#               This is a required option on the top level of blockdev-add, and
-#               currently not allowed on any other level.
-# @node-name:   #optional the name of a block driver state node (Since 2.0)
-# @discard:     #optional discard-related options (default: ignore)
-# @cache:       #optional cache-related options
-# @aio:         #optional AIO backend (default: threads)
-# @rerror:      #optional how to handle read errors on the device
-#               (default: report)
-# @werror:      #optional how to handle write errors on the device
-#               (default: enospc)
-# @read-only:   #optional whether the block device should be read-only
-#               (default: false)
+# @driver:        block driver name
+# @id:            #optional id by which the new block device can be referred to.
+#                 This is a required option on the top level of blockdev-add, and
+#                 currently not allowed on any other level.
+# @node-name:     #optional the name of a block driver state node (Since 2.0)
+# @discard:       #optional discard-related options (default: ignore)
+# @cache:         #optional cache-related options
+# @aio:           #optional AIO backend (default: threads)
+# @rerror:        #optional how to handle read errors on the device
+#                 (default: report)
+# @werror:        #optional how to handle write errors on the device
+#                 (default: enospc)
+# @read-only:     #optional whether the block device should be read-only
+#                 (default: false)
+# @detect-zeroes: #optional detect and optimize zero writes (Since 2.1)
+#                 (default: off)
 #
 # Since: 1.7
 ##
@@ -4327,7 +4346,8 @@
             '*aio': 'BlockdevAioOptions',
             '*rerror': 'BlockdevOnError',
             '*werror': 'BlockdevOnError',
-            '*read-only': 'bool' } }
+            '*read-only': 'bool',
+            '*detect-zeroes': 'BlockdevDetectZeroesOptions' } }
 
 ##
 # @BlockdevOptionsFile
diff --git a/qemu-options.hx b/qemu-options.hx
index 781af14..5ee94ea 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -414,6 +414,7 @@ DEF("drive", HAS_ARG, QEMU_OPTION_drive,
     "       [,serial=s][,addr=A][,rerror=ignore|stop|report]\n"
     "       [,werror=ignore|stop|report|enospc][,id=name][,aio=threads|native]\n"
     "       [,readonly=on|off][,copy-on-read=on|off]\n"
+    "       [,detect-zeroes=on|off|unmap]\n"
     "       [[,bps=b]|[[,bps_rd=r][,bps_wr=w]]]\n"
     "       [[,iops=i]|[[,iops_rd=r][,iops_wr=w]]]\n"
     "       [[,bps_max=bm]|[[,bps_rd_max=rm][,bps_wr_max=wm]]]\n"
@@ -475,6 +476,11 @@ Open drive @option{file} as read-only. Guest write attempts will fail.
 @item copy-on-read=@var{copy-on-read}
 @var{copy-on-read} is "on" or "off" and enables whether to copy read backing
 file sectors into the image file.
+@item detect-zeroes=@var{detect-zeroes}
+@var{detect-zeroes} is "off", "on" or "unmap" and enables the automatic
+conversion of plain zero writes by the OS to driver specific optimized
+zero write commands. If "unmap" is chosen and @var{discard} is "on"
+a zero write may even be converted to an UNMAP operation.
 @end table
 
 By default, the @option{cache=writeback} mode is used. It will report data
diff --git a/qmp-commands.hx b/qmp-commands.hx
index ed3ab92..a535955 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -2032,6 +2032,8 @@ Each json-object contain the following:
          - "iops_rd_max":  read I/O operations max (json-int)
          - "iops_wr_max":  write I/O operations max (json-int)
          - "iops_size": I/O size when limiting by iops (json-int)
+         - "detect_zeroes": detect and optimize zero writes (json-string)
+             - Possible values: "off", "on", "unmap"
          - "image": the detail of the image, it is a json-object containing
             the following:
              - "filename": image file name (json-string)
@@ -2108,6 +2110,7 @@ Example:
                "iops_rd_max": 0,
                "iops_wr_max": 0,
                "iops_size": 0,
+               "detect_zeroes": "on",
                "image":{
                   "filename":"disks/test.qcow2",
                   "format":"qcow2",
diff --git a/util/iov.c b/util/iov.c
index 6569b5a..f8c49a1 100644
--- a/util/iov.c
+++ b/util/iov.c
@@ -335,6 +335,27 @@ void qemu_iovec_concat(QEMUIOVector *dst,
     qemu_iovec_concat_iov(dst, src->iov, src->niov, soffset, sbytes);
 }
 
+/*
+ *  check if the contents of the iovecs are all zero
+ */
+bool qemu_iovec_is_zero(QEMUIOVector *qiov)
+{
+    int i;
+    for (i = 0; i < qiov->niov; i++) {
+        size_t offs = qiov->iov[i].iov_len & ~(4 * sizeof(long) - 1);
+        uint8_t *ptr = qiov->iov[i].iov_base;
+        if (offs && !buffer_is_zero(qiov->iov[i].iov_base, offs)) {
+            return false;
+        }
+        for (; offs < qiov->iov[i].iov_len; offs++) {
+            if (ptr[offs]) {
+                return false;
+            }
+        }
+    }
+    return true;
+}
+
 void qemu_iovec_destroy(QEMUIOVector *qiov)
 {
     assert(qiov->nalloc != -1);
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2014-05-08  7:44 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-05-07  0:23 [Qemu-devel] [PATCHv3] block: optimize zero writes with bdrv_write_zeroes Peter Lieven
2014-05-07  2:52 ` Eric Blake
2014-05-07 14:26   ` Peter Lieven
2014-05-07 15:19     ` Kevin Wolf
2014-05-07 15:26       ` Peter Lieven
2014-05-07 15:38         ` Kevin Wolf
2014-05-07 15:45           ` Peter Lieven
2014-05-07 16:02             ` Peter Lieven
2014-05-08  7:44               ` Kevin Wolf
2014-05-07 16:13           ` Peter Lieven
  -- strict thread matches above, loose matches on Subject: below --
2014-05-07  0:01 Peter Lieven
2014-05-07  0:19 ` Peter Lieven

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.