All of lore.kernel.org
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCHv3] block: optimize zero writes with bdrv_write_zeroes
@ 2014-05-07  0:23 Peter Lieven
  2014-05-07  2:52 ` Eric Blake
  0 siblings, 1 reply; 12+ messages in thread
From: Peter Lieven @ 2014-05-07  0:23 UTC (permalink / raw)
  To: qemu-devel; +Cc: kwolf, famz, Peter Lieven, mreitz, stefanha, pbonzini

this patch tries to optimize zero write requests
by automatically using bdrv_write_zeroes if it is
supported by the format.

This significantly speeds up file system initialization and
should speed zero write test used to test backend storage
performance.

I ran the following 2 tests on my internal SSD with a
50G QCOW2 container and on an attached iSCSI storage.

a) mkfs.ext4 -E lazy_itable_init=0,lazy_journal_init=0 /dev/vdX

QCOW2         [off]     [on]     [unmap]
-----
runtime:       14secs    1.1secs  1.1secs
filesize:      937M      18M      18M

iSCSI         [off]     [on]     [unmap]
----
runtime:       9.3s      0.9s     0.9s

b) dd if=/dev/zero of=/dev/vdX bs=1M oflag=direct

QCOW2         [off]     [on]     [unmap]
-----
runtime:       246secs   18secs   18secs
filesize:      51G       192K     192K
throughput:    203M/s    2.3G/s   2.3G/s

iSCSI*        [off]     [on]     [unmap]
----
runtime:       8mins     45secs   33secs
throughput:    106M/s    1.2G/s   1.6G/s
allocated:     100%      100%     0%

* The storage was connected via an 1Gbit interface.
  It seems to internally handle writing zeroes
  via WRITESAME16 very fast.

Signed-off-by: Peter Lieven <pl@kamp.de>
---
v2->v3: - moved parameter parsing to blockdev_init
        - added per device detect_zeroes status to 
          hmp (info block -v) and qmp (query-block) [Eric]
        - added support to enable detect-zeroes also
          for hot added devices [Eric]
        - added missing entry to qemu_common_drive_opts
        - fixed description of qemu_iovec_is_zero [Fam]

v1->v2: - added tests to commit message (Markus)
RFCv2->v1: - fixed paramter parsing strncmp -> strcmp (Eric)
           - fixed typo (choosen->chosen) (Eric)
           - added second example to commit msg

RFCv1->RFCv2: - add detect-zeroes=off|on|unmap knob to drive cmdline parameter
              - call zero detection only for format (bs->file != NULL)

 block.c                   |   11 ++++++++++
 block/qapi.c              |   11 ++++++++++
 blockdev.c                |   28 +++++++++++++++++++++++++
 hmp.c                     |    6 ++++++
 include/block/block_int.h |   12 +++++++++++
 include/qemu-common.h     |    1 +
 qapi-schema.json          |   50 +++++++++++++++++++++++++++++++--------------
 qemu-options.hx           |    6 ++++++
 qmp-commands.hx           |    3 +++
 util/iov.c                |   21 +++++++++++++++++++
 10 files changed, 134 insertions(+), 15 deletions(-)

diff --git a/block.c b/block.c
index b749d31..f27b35d 100644
--- a/block.c
+++ b/block.c
@@ -3244,6 +3244,17 @@ static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
 
     ret = notifier_with_return_list_notify(&bs->before_write_notifiers, req);
 
+    if (!ret && bs->detect_zeroes != BDRV_DETECT_ZEROES_OFF &&
+        !(flags & BDRV_REQ_ZERO_WRITE) && bs->file &&
+        drv->bdrv_co_write_zeroes && qemu_iovec_is_zero(qiov)) {
+        flags |= BDRV_REQ_ZERO_WRITE;
+        /* if the device was not opened with discard=on the below flag
+         * is immediately cleared again in bdrv_co_do_write_zeroes */
+        if (bs->detect_zeroes == BDRV_DETECT_ZEROES_UNMAP) {
+            flags |= BDRV_REQ_MAY_UNMAP;
+        }
+    }
+
     if (ret < 0) {
         /* Do nothing, write notifier decided to fail this request */
     } else if (flags & BDRV_REQ_ZERO_WRITE) {
diff --git a/block/qapi.c b/block/qapi.c
index af11445..fbf66c2 100644
--- a/block/qapi.c
+++ b/block/qapi.c
@@ -51,6 +51,17 @@ BlockDeviceInfo *bdrv_block_device_info(BlockDriverState *bs)
 
     info->backing_file_depth = bdrv_get_backing_file_depth(bs);
 
+    switch (bs->detect_zeroes) {
+    case BDRV_DETECT_ZEROES_ON:
+        info->detect_zeroes = g_strdup("on");
+        break;
+    case BDRV_DETECT_ZEROES_UNMAP:
+        info->detect_zeroes = g_strdup("unmap");
+        break;
+    default:
+        info->detect_zeroes = g_strdup("off");
+    }
+
     if (bs->io_limits_enabled) {
         ThrottleConfig cfg;
         throttle_get_config(&bs->throttle_state, &cfg);
diff --git a/blockdev.c b/blockdev.c
index 7810e9f..96c11fd 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -288,6 +288,21 @@ static int parse_block_error_action(const char *buf, bool is_read, Error **errp)
     }
 }
 
+static BdrvDetectZeroes parse_detect_zeroes(const char *buf, Error **errp)
+{
+    if (!buf || !strcmp(buf, "off")) {
+        return BDRV_DETECT_ZEROES_OFF;
+    } else if (!strcmp(buf, "on")) {
+        return BDRV_DETECT_ZEROES_ON;
+    } else if (!strcmp(buf, "unmap")) {
+        return BDRV_DETECT_ZEROES_UNMAP;
+    } else {
+        error_setg(errp, "invalid value for detect-zeroes: %s",
+                   buf);
+    }
+    return BDRV_DETECT_ZEROES_OFF;
+}
+
 static bool check_throttle_config(ThrottleConfig *cfg, Error **errp)
 {
     if (throttle_conflicting(cfg)) {
@@ -324,6 +339,7 @@ static DriveInfo *blockdev_init(const char *file, QDict *bs_opts,
     QemuOpts *opts;
     const char *id;
     bool has_driver_specific_opts;
+    BdrvDetectZeroes detect_zeroes;
     BlockDriver *drv = NULL;
 
     /* Check common options by copying from bs_opts to opts, all other options
@@ -452,6 +468,13 @@ static DriveInfo *blockdev_init(const char *file, QDict *bs_opts,
         }
     }
 
+    detect_zeroes =
+        parse_detect_zeroes(qemu_opt_get(opts, "detect-zeroes"), &error);
+    if (error) {
+        error_propagate(errp, error);
+        goto early_err;
+    }
+
     /* init */
     dinfo = g_malloc0(sizeof(*dinfo));
     dinfo->id = g_strdup(qemu_opts_id(opts));
@@ -462,6 +485,7 @@ static DriveInfo *blockdev_init(const char *file, QDict *bs_opts,
     }
     dinfo->bdrv->open_flags = snapshot ? BDRV_O_SNAPSHOT : 0;
     dinfo->bdrv->read_only = ro;
+    dinfo->bdrv->detect_zeroes = detect_zeroes;
     dinfo->refcount = 1;
     if (serial != NULL) {
         dinfo->serial = g_strdup(serial);
@@ -2455,6 +2479,10 @@ QemuOptsList qemu_common_drive_opts = {
             .name = "copy-on-read",
             .type = QEMU_OPT_BOOL,
             .help = "copy read data from backing file into image file",
+        },{
+            .name = "detect-zeroes",
+            .type = QEMU_OPT_STRING,
+            .help = "try to optimize zero writes",
         },
         { /* end of list */ }
     },
diff --git a/hmp.c b/hmp.c
index ca869ba..b1942ed 100644
--- a/hmp.c
+++ b/hmp.c
@@ -336,6 +336,12 @@ void hmp_info_block(Monitor *mon, const QDict *qdict)
                            info->value->inserted->backing_file_depth);
         }
 
+        if (verbose) {
+            monitor_printf(mon,
+                           "    Detect zeroes:    %s\n",
+                           info->value->inserted->detect_zeroes);
+        }
+
         if (info->value->inserted->bps
             || info->value->inserted->bps_rd
             || info->value->inserted->bps_wr
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 9ffcb69..7b9ca05 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -271,6 +271,17 @@ typedef struct BlockLimits {
 } BlockLimits;
 
 /*
+ * Different operation modes for automatic zero detection
+ * to speed the write operation up with bdrv_write_zeroes.
+ */
+typedef enum {
+    BDRV_DETECT_ZEROES_OFF   = 0x0,
+    BDRV_DETECT_ZEROES_ON    = 0x1,
+    /* also set the BDRV_MAY_UNMAP flag with bdrv_write_zeroes */
+    BDRV_DETECT_ZEROES_UNMAP = 0x2,
+} BdrvDetectZeroes;
+
+/*
  * Note: the function bdrv_append() copies and swaps contents of
  * BlockDriverStates, so if you add new fields to this struct, please
  * inspect bdrv_append() to determine if the new fields need to be
@@ -364,6 +375,7 @@ struct BlockDriverState {
     BlockJob *job;
 
     QDict *options;
+    BdrvDetectZeroes detect_zeroes;
 };
 
 int get_tmp_filename(char *filename, int size);
diff --git a/include/qemu-common.h b/include/qemu-common.h
index a998e8d..8e3d6eb 100644
--- a/include/qemu-common.h
+++ b/include/qemu-common.h
@@ -330,6 +330,7 @@ void qemu_iovec_concat(QEMUIOVector *dst,
 void qemu_iovec_concat_iov(QEMUIOVector *dst,
                            struct iovec *src_iov, unsigned int src_cnt,
                            size_t soffset, size_t sbytes);
+bool qemu_iovec_is_zero(QEMUIOVector *qiov);
 void qemu_iovec_destroy(QEMUIOVector *qiov);
 void qemu_iovec_reset(QEMUIOVector *qiov);
 size_t qemu_iovec_to_buf(QEMUIOVector *qiov, size_t offset,
diff --git a/qapi-schema.json b/qapi-schema.json
index 0b00427..5e3b4a89 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -937,6 +937,8 @@
 # @encryption_key_missing: true if the backing device is encrypted but an
 #                          valid encryption key is missing
 #
+# @detect_zeroes: detect and optimize zero writes (Since 2.1)
+#
 # @bps: total throughput limit in bytes per second is specified
 #
 # @bps_rd: read throughput limit in bytes per second is specified
@@ -972,6 +974,7 @@
   'data': { 'file': 'str', '*node-name': 'str', 'ro': 'bool', 'drv': 'str',
             '*backing_file': 'str', 'backing_file_depth': 'int',
             'encrypted': 'bool', 'encryption_key_missing': 'bool',
+            'detect_zeroes': 'str',
             'bps': 'int', 'bps_rd': 'int', 'bps_wr': 'int',
             'iops': 'int', 'iops_rd': 'int', 'iops_wr': 'int',
             'image': 'ImageInfo',
@@ -4250,6 +4253,20 @@
   'data': [ 'ignore', 'unmap' ] }
 
 ##
+# @BlockdevDetectZeroesOptions
+#
+# Selects the operation mode for zero write detection.
+#
+# @off:      Disabled
+# @on:       Enabled
+# @unmap:    Enabled and even try to unmap blocks if possible
+#
+# Since: 2.1
+##
+{ 'enum': 'BlockdevDetectZeroesOptions',
+  'data': [ 'off', 'on', 'unmap' ] }
+
+##
 # @BlockdevAioOptions
 #
 # Selects the AIO backend to handle I/O requests
@@ -4301,20 +4318,22 @@
 # Options that are available for all block devices, independent of the block
 # driver.
 #
-# @driver:      block driver name
-# @id:          #optional id by which the new block device can be referred to.
-#               This is a required option on the top level of blockdev-add, and
-#               currently not allowed on any other level.
-# @node-name:   #optional the name of a block driver state node (Since 2.0)
-# @discard:     #optional discard-related options (default: ignore)
-# @cache:       #optional cache-related options
-# @aio:         #optional AIO backend (default: threads)
-# @rerror:      #optional how to handle read errors on the device
-#               (default: report)
-# @werror:      #optional how to handle write errors on the device
-#               (default: enospc)
-# @read-only:   #optional whether the block device should be read-only
-#               (default: false)
+# @driver:        block driver name
+# @id:            #optional id by which the new block device can be referred to.
+#                 This is a required option on the top level of blockdev-add, and
+#                 currently not allowed on any other level.
+# @node-name:     #optional the name of a block driver state node (Since 2.0)
+# @discard:       #optional discard-related options (default: ignore)
+# @cache:         #optional cache-related options
+# @aio:           #optional AIO backend (default: threads)
+# @rerror:        #optional how to handle read errors on the device
+#                 (default: report)
+# @werror:        #optional how to handle write errors on the device
+#                 (default: enospc)
+# @read-only:     #optional whether the block device should be read-only
+#                 (default: false)
+# @detect-zeroes: #optional detect and optimize zero writes (Since 2.1)
+#                 (default: off)
 #
 # Since: 1.7
 ##
@@ -4327,7 +4346,8 @@
             '*aio': 'BlockdevAioOptions',
             '*rerror': 'BlockdevOnError',
             '*werror': 'BlockdevOnError',
-            '*read-only': 'bool' } }
+            '*read-only': 'bool',
+            '*detect-zeroes': 'BlockdevDetectZeroesOptions' } }
 
 ##
 # @BlockdevOptionsFile
diff --git a/qemu-options.hx b/qemu-options.hx
index 781af14..5ee94ea 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -414,6 +414,7 @@ DEF("drive", HAS_ARG, QEMU_OPTION_drive,
     "       [,serial=s][,addr=A][,rerror=ignore|stop|report]\n"
     "       [,werror=ignore|stop|report|enospc][,id=name][,aio=threads|native]\n"
     "       [,readonly=on|off][,copy-on-read=on|off]\n"
+    "       [,detect-zeroes=on|off|unmap]\n"
     "       [[,bps=b]|[[,bps_rd=r][,bps_wr=w]]]\n"
     "       [[,iops=i]|[[,iops_rd=r][,iops_wr=w]]]\n"
     "       [[,bps_max=bm]|[[,bps_rd_max=rm][,bps_wr_max=wm]]]\n"
@@ -475,6 +476,11 @@ Open drive @option{file} as read-only. Guest write attempts will fail.
 @item copy-on-read=@var{copy-on-read}
 @var{copy-on-read} is "on" or "off" and enables whether to copy read backing
 file sectors into the image file.
+@item detect-zeroes=@var{detect-zeroes}
+@var{detect-zeroes} is "off", "on" or "unmap" and enables the automatic
+conversion of plain zero writes by the OS to driver specific optimized
+zero write commands. If "unmap" is chosen and @var{discard} is "on"
+a zero write may even be converted to an UNMAP operation.
 @end table
 
 By default, the @option{cache=writeback} mode is used. It will report data
diff --git a/qmp-commands.hx b/qmp-commands.hx
index ed3ab92..a535955 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -2032,6 +2032,8 @@ Each json-object contain the following:
          - "iops_rd_max":  read I/O operations max (json-int)
          - "iops_wr_max":  write I/O operations max (json-int)
          - "iops_size": I/O size when limiting by iops (json-int)
+         - "detect_zeroes": detect and optimize zero writes (json-string)
+             - Possible values: "off", "on", "unmap"
          - "image": the detail of the image, it is a json-object containing
             the following:
              - "filename": image file name (json-string)
@@ -2108,6 +2110,7 @@ Example:
                "iops_rd_max": 0,
                "iops_wr_max": 0,
                "iops_size": 0,
+               "detect_zeroes": "on",
                "image":{
                   "filename":"disks/test.qcow2",
                   "format":"qcow2",
diff --git a/util/iov.c b/util/iov.c
index 6569b5a..f8c49a1 100644
--- a/util/iov.c
+++ b/util/iov.c
@@ -335,6 +335,27 @@ void qemu_iovec_concat(QEMUIOVector *dst,
     qemu_iovec_concat_iov(dst, src->iov, src->niov, soffset, sbytes);
 }
 
+/*
+ *  check if the contents of the iovecs are all zero
+ */
+bool qemu_iovec_is_zero(QEMUIOVector *qiov)
+{
+    int i;
+    for (i = 0; i < qiov->niov; i++) {
+        size_t offs = qiov->iov[i].iov_len & ~(4 * sizeof(long) - 1);
+        uint8_t *ptr = qiov->iov[i].iov_base;
+        if (offs && !buffer_is_zero(qiov->iov[i].iov_base, offs)) {
+            return false;
+        }
+        for (; offs < qiov->iov[i].iov_len; offs++) {
+            if (ptr[offs]) {
+                return false;
+            }
+        }
+    }
+    return true;
+}
+
 void qemu_iovec_destroy(QEMUIOVector *qiov)
 {
     assert(qiov->nalloc != -1);
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 12+ messages in thread
* [Qemu-devel] [PATCHv3] block: optimize zero writes with bdrv_write_zeroes
@ 2014-05-07  0:01 Peter Lieven
  2014-05-07  0:19 ` Peter Lieven
  0 siblings, 1 reply; 12+ messages in thread
From: Peter Lieven @ 2014-05-07  0:01 UTC (permalink / raw)
  To: qemu-devel; +Cc: kwolf, famz, Peter Lieven, mreitz, stefanha, pbonzini

this patch tries to optimize zero write requests
by automatically using bdrv_write_zeroes if it is
supported by the format.

this should significantly speed up file system initialization and
should speed zero write test used to test backend storage performance.

the difference can simply be tested by e.g.

   dd if=/dev/zero of=/dev/vdX bs=1M

or

   mkfs.ext4 -E lazy_itable_init=0,lazy_journal_init=0 /dev/vdX

Signed-off-by: Peter Lieven <pl@kamp.de>
---
v2->v3: - moved parameter parsing to blockdev_init
        - added per device detect_zeroes status to 
          hmp (info block -v) and qmp (query-block) [Eric]
        - added support to enable detect-zeroes also
          for hot added devices [Eric]
        - added missing entry to qemu_common_drive_opts
        - fixed description of qemu_iovec_is_zero [Fam]

v1->v2: - added tests to commit message (Markus)
RFCv2->v1: - fixed paramter parsing strncmp -> strcmp (Eric)
           - fixed typo (choosen->chosen) (Eric)
           - added second example to commit msg

RFCv1->RFCv2: - add detect-zeroes=off|on|unmap knob to drive cmdline parameter
              - call zero detection only for format (bs->file != NULL)

 block.c                   |   11 ++++++++++
 block/qapi.c              |   11 ++++++++++
 blockdev.c                |   28 +++++++++++++++++++++++++
 hmp.c                     |    6 ++++++
 include/block/block_int.h |   12 +++++++++++
 include/qemu-common.h     |    1 +
 qapi-schema.json          |   50 +++++++++++++++++++++++++++++++--------------
 qemu-options.hx           |    6 ++++++
 qmp-commands.hx           |    3 +++
 util/iov.c                |   21 +++++++++++++++++++
 10 files changed, 134 insertions(+), 15 deletions(-)

diff --git a/block.c b/block.c
index b749d31..f27b35d 100644
--- a/block.c
+++ b/block.c
@@ -3244,6 +3244,17 @@ static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
 
     ret = notifier_with_return_list_notify(&bs->before_write_notifiers, req);
 
+    if (!ret && bs->detect_zeroes != BDRV_DETECT_ZEROES_OFF &&
+        !(flags & BDRV_REQ_ZERO_WRITE) && bs->file &&
+        drv->bdrv_co_write_zeroes && qemu_iovec_is_zero(qiov)) {
+        flags |= BDRV_REQ_ZERO_WRITE;
+        /* if the device was not opened with discard=on the below flag
+         * is immediately cleared again in bdrv_co_do_write_zeroes */
+        if (bs->detect_zeroes == BDRV_DETECT_ZEROES_UNMAP) {
+            flags |= BDRV_REQ_MAY_UNMAP;
+        }
+    }
+
     if (ret < 0) {
         /* Do nothing, write notifier decided to fail this request */
     } else if (flags & BDRV_REQ_ZERO_WRITE) {
diff --git a/block/qapi.c b/block/qapi.c
index af11445..fbf66c2 100644
--- a/block/qapi.c
+++ b/block/qapi.c
@@ -51,6 +51,17 @@ BlockDeviceInfo *bdrv_block_device_info(BlockDriverState *bs)
 
     info->backing_file_depth = bdrv_get_backing_file_depth(bs);
 
+    switch (bs->detect_zeroes) {
+    case BDRV_DETECT_ZEROES_ON:
+        info->detect_zeroes = g_strdup("on");
+        break;
+    case BDRV_DETECT_ZEROES_UNMAP:
+        info->detect_zeroes = g_strdup("unmap");
+        break;
+    default:
+        info->detect_zeroes = g_strdup("off");
+    }
+
     if (bs->io_limits_enabled) {
         ThrottleConfig cfg;
         throttle_get_config(&bs->throttle_state, &cfg);
diff --git a/blockdev.c b/blockdev.c
index 7810e9f..96c11fd 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -288,6 +288,21 @@ static int parse_block_error_action(const char *buf, bool is_read, Error **errp)
     }
 }
 
+static BdrvDetectZeroes parse_detect_zeroes(const char *buf, Error **errp)
+{
+    if (!buf || !strcmp(buf, "off")) {
+        return BDRV_DETECT_ZEROES_OFF;
+    } else if (!strcmp(buf, "on")) {
+        return BDRV_DETECT_ZEROES_ON;
+    } else if (!strcmp(buf, "unmap")) {
+        return BDRV_DETECT_ZEROES_UNMAP;
+    } else {
+        error_setg(errp, "invalid value for detect-zeroes: %s",
+                   buf);
+    }
+    return BDRV_DETECT_ZEROES_OFF;
+}
+
 static bool check_throttle_config(ThrottleConfig *cfg, Error **errp)
 {
     if (throttle_conflicting(cfg)) {
@@ -324,6 +339,7 @@ static DriveInfo *blockdev_init(const char *file, QDict *bs_opts,
     QemuOpts *opts;
     const char *id;
     bool has_driver_specific_opts;
+    BdrvDetectZeroes detect_zeroes;
     BlockDriver *drv = NULL;
 
     /* Check common options by copying from bs_opts to opts, all other options
@@ -452,6 +468,13 @@ static DriveInfo *blockdev_init(const char *file, QDict *bs_opts,
         }
     }
 
+    detect_zeroes =
+        parse_detect_zeroes(qemu_opt_get(opts, "detect-zeroes"), &error);
+    if (error) {
+        error_propagate(errp, error);
+        goto early_err;
+    }
+
     /* init */
     dinfo = g_malloc0(sizeof(*dinfo));
     dinfo->id = g_strdup(qemu_opts_id(opts));
@@ -462,6 +485,7 @@ static DriveInfo *blockdev_init(const char *file, QDict *bs_opts,
     }
     dinfo->bdrv->open_flags = snapshot ? BDRV_O_SNAPSHOT : 0;
     dinfo->bdrv->read_only = ro;
+    dinfo->bdrv->detect_zeroes = detect_zeroes;
     dinfo->refcount = 1;
     if (serial != NULL) {
         dinfo->serial = g_strdup(serial);
@@ -2455,6 +2479,10 @@ QemuOptsList qemu_common_drive_opts = {
             .name = "copy-on-read",
             .type = QEMU_OPT_BOOL,
             .help = "copy read data from backing file into image file",
+        },{
+            .name = "detect-zeroes",
+            .type = QEMU_OPT_STRING,
+            .help = "try to optimize zero writes",
         },
         { /* end of list */ }
     },
diff --git a/hmp.c b/hmp.c
index ca869ba..b1942ed 100644
--- a/hmp.c
+++ b/hmp.c
@@ -336,6 +336,12 @@ void hmp_info_block(Monitor *mon, const QDict *qdict)
                            info->value->inserted->backing_file_depth);
         }
 
+        if (verbose) {
+            monitor_printf(mon,
+                           "    Detect zeroes:    %s\n",
+                           info->value->inserted->detect_zeroes);
+        }
+
         if (info->value->inserted->bps
             || info->value->inserted->bps_rd
             || info->value->inserted->bps_wr
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 9ffcb69..7b9ca05 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -271,6 +271,17 @@ typedef struct BlockLimits {
 } BlockLimits;
 
 /*
+ * Different operation modes for automatic zero detection
+ * to speed the write operation up with bdrv_write_zeroes.
+ */
+typedef enum {
+    BDRV_DETECT_ZEROES_OFF   = 0x0,
+    BDRV_DETECT_ZEROES_ON    = 0x1,
+    /* also set the BDRV_MAY_UNMAP flag with bdrv_write_zeroes */
+    BDRV_DETECT_ZEROES_UNMAP = 0x2,
+} BdrvDetectZeroes;
+
+/*
  * Note: the function bdrv_append() copies and swaps contents of
  * BlockDriverStates, so if you add new fields to this struct, please
  * inspect bdrv_append() to determine if the new fields need to be
@@ -364,6 +375,7 @@ struct BlockDriverState {
     BlockJob *job;
 
     QDict *options;
+    BdrvDetectZeroes detect_zeroes;
 };
 
 int get_tmp_filename(char *filename, int size);
diff --git a/include/qemu-common.h b/include/qemu-common.h
index a998e8d..8e3d6eb 100644
--- a/include/qemu-common.h
+++ b/include/qemu-common.h
@@ -330,6 +330,7 @@ void qemu_iovec_concat(QEMUIOVector *dst,
 void qemu_iovec_concat_iov(QEMUIOVector *dst,
                            struct iovec *src_iov, unsigned int src_cnt,
                            size_t soffset, size_t sbytes);
+bool qemu_iovec_is_zero(QEMUIOVector *qiov);
 void qemu_iovec_destroy(QEMUIOVector *qiov);
 void qemu_iovec_reset(QEMUIOVector *qiov);
 size_t qemu_iovec_to_buf(QEMUIOVector *qiov, size_t offset,
diff --git a/qapi-schema.json b/qapi-schema.json
index 0b00427..5e3b4a89 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -937,6 +937,8 @@
 # @encryption_key_missing: true if the backing device is encrypted but an
 #                          valid encryption key is missing
 #
+# @detect_zeroes: detect and optimize zero writes (Since 2.1)
+#
 # @bps: total throughput limit in bytes per second is specified
 #
 # @bps_rd: read throughput limit in bytes per second is specified
@@ -972,6 +974,7 @@
   'data': { 'file': 'str', '*node-name': 'str', 'ro': 'bool', 'drv': 'str',
             '*backing_file': 'str', 'backing_file_depth': 'int',
             'encrypted': 'bool', 'encryption_key_missing': 'bool',
+            'detect_zeroes': 'str',
             'bps': 'int', 'bps_rd': 'int', 'bps_wr': 'int',
             'iops': 'int', 'iops_rd': 'int', 'iops_wr': 'int',
             'image': 'ImageInfo',
@@ -4250,6 +4253,20 @@
   'data': [ 'ignore', 'unmap' ] }
 
 ##
+# @BlockdevDetectZeroesOptions
+#
+# Selects the operation mode for zero write detection.
+#
+# @off:      Disabled
+# @on:       Enabled
+# @unmap:    Enabled and even try to unmap blocks if possible
+#
+# Since: 2.1
+##
+{ 'enum': 'BlockdevDetectZeroesOptions',
+  'data': [ 'off', 'on', 'unmap' ] }
+
+##
 # @BlockdevAioOptions
 #
 # Selects the AIO backend to handle I/O requests
@@ -4301,20 +4318,22 @@
 # Options that are available for all block devices, independent of the block
 # driver.
 #
-# @driver:      block driver name
-# @id:          #optional id by which the new block device can be referred to.
-#               This is a required option on the top level of blockdev-add, and
-#               currently not allowed on any other level.
-# @node-name:   #optional the name of a block driver state node (Since 2.0)
-# @discard:     #optional discard-related options (default: ignore)
-# @cache:       #optional cache-related options
-# @aio:         #optional AIO backend (default: threads)
-# @rerror:      #optional how to handle read errors on the device
-#               (default: report)
-# @werror:      #optional how to handle write errors on the device
-#               (default: enospc)
-# @read-only:   #optional whether the block device should be read-only
-#               (default: false)
+# @driver:        block driver name
+# @id:            #optional id by which the new block device can be referred to.
+#                 This is a required option on the top level of blockdev-add, and
+#                 currently not allowed on any other level.
+# @node-name:     #optional the name of a block driver state node (Since 2.0)
+# @discard:       #optional discard-related options (default: ignore)
+# @cache:         #optional cache-related options
+# @aio:           #optional AIO backend (default: threads)
+# @rerror:        #optional how to handle read errors on the device
+#                 (default: report)
+# @werror:        #optional how to handle write errors on the device
+#                 (default: enospc)
+# @read-only:     #optional whether the block device should be read-only
+#                 (default: false)
+# @detect-zeroes: #optional detect and optimize zero writes (Since 2.1)
+#                 (default: off)
 #
 # Since: 1.7
 ##
@@ -4327,7 +4346,8 @@
             '*aio': 'BlockdevAioOptions',
             '*rerror': 'BlockdevOnError',
             '*werror': 'BlockdevOnError',
-            '*read-only': 'bool' } }
+            '*read-only': 'bool',
+            '*detect-zeroes': 'BlockdevDetectZeroesOptions' } }
 
 ##
 # @BlockdevOptionsFile
diff --git a/qemu-options.hx b/qemu-options.hx
index 781af14..5ee94ea 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -414,6 +414,7 @@ DEF("drive", HAS_ARG, QEMU_OPTION_drive,
     "       [,serial=s][,addr=A][,rerror=ignore|stop|report]\n"
     "       [,werror=ignore|stop|report|enospc][,id=name][,aio=threads|native]\n"
     "       [,readonly=on|off][,copy-on-read=on|off]\n"
+    "       [,detect-zeroes=on|off|unmap]\n"
     "       [[,bps=b]|[[,bps_rd=r][,bps_wr=w]]]\n"
     "       [[,iops=i]|[[,iops_rd=r][,iops_wr=w]]]\n"
     "       [[,bps_max=bm]|[[,bps_rd_max=rm][,bps_wr_max=wm]]]\n"
@@ -475,6 +476,11 @@ Open drive @option{file} as read-only. Guest write attempts will fail.
 @item copy-on-read=@var{copy-on-read}
 @var{copy-on-read} is "on" or "off" and enables whether to copy read backing
 file sectors into the image file.
+@item detect-zeroes=@var{detect-zeroes}
+@var{detect-zeroes} is "off", "on" or "unmap" and enables the automatic
+conversion of plain zero writes by the OS to driver specific optimized
+zero write commands. If "unmap" is chosen and @var{discard} is "on"
+a zero write may even be converted to an UNMAP operation.
 @end table
 
 By default, the @option{cache=writeback} mode is used. It will report data
diff --git a/qmp-commands.hx b/qmp-commands.hx
index ed3ab92..a535955 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -2032,6 +2032,8 @@ Each json-object contain the following:
          - "iops_rd_max":  read I/O operations max (json-int)
          - "iops_wr_max":  write I/O operations max (json-int)
          - "iops_size": I/O size when limiting by iops (json-int)
+         - "detect_zeroes": detect and optimize zero writes (json-string)
+             - Possible values: "off", "on", "unmap"
          - "image": the detail of the image, it is a json-object containing
             the following:
              - "filename": image file name (json-string)
@@ -2108,6 +2110,7 @@ Example:
                "iops_rd_max": 0,
                "iops_wr_max": 0,
                "iops_size": 0,
+               "detect_zeroes": "on",
                "image":{
                   "filename":"disks/test.qcow2",
                   "format":"qcow2",
diff --git a/util/iov.c b/util/iov.c
index 6569b5a..f8c49a1 100644
--- a/util/iov.c
+++ b/util/iov.c
@@ -335,6 +335,27 @@ void qemu_iovec_concat(QEMUIOVector *dst,
     qemu_iovec_concat_iov(dst, src->iov, src->niov, soffset, sbytes);
 }
 
+/*
+ *  check if the contents of the iovecs are all zero
+ */
+bool qemu_iovec_is_zero(QEMUIOVector *qiov)
+{
+    int i;
+    for (i = 0; i < qiov->niov; i++) {
+        size_t offs = qiov->iov[i].iov_len & ~(4 * sizeof(long) - 1);
+        uint8_t *ptr = qiov->iov[i].iov_base;
+        if (offs && !buffer_is_zero(qiov->iov[i].iov_base, offs)) {
+            return false;
+        }
+        for (; offs < qiov->iov[i].iov_len; offs++) {
+            if (ptr[offs]) {
+                return false;
+            }
+        }
+    }
+    return true;
+}
+
 void qemu_iovec_destroy(QEMUIOVector *qiov)
 {
     assert(qiov->nalloc != -1);
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2014-05-08  7:44 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-05-07  0:23 [Qemu-devel] [PATCHv3] block: optimize zero writes with bdrv_write_zeroes Peter Lieven
2014-05-07  2:52 ` Eric Blake
2014-05-07 14:26   ` Peter Lieven
2014-05-07 15:19     ` Kevin Wolf
2014-05-07 15:26       ` Peter Lieven
2014-05-07 15:38         ` Kevin Wolf
2014-05-07 15:45           ` Peter Lieven
2014-05-07 16:02             ` Peter Lieven
2014-05-08  7:44               ` Kevin Wolf
2014-05-07 16:13           ` Peter Lieven
  -- strict thread matches above, loose matches on Subject: below --
2014-05-07  0:01 Peter Lieven
2014-05-07  0:19 ` Peter Lieven

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.