All of lore.kernel.org
 help / color / mirror / Atom feed
From: Eric Blake <eblake@redhat.com>
To: qemu-devel@nongnu.org
Cc: kwolf@redhat.com, qemu-block@nongnu.org,
	Stefan Hajnoczi <stefanha@redhat.com>,
	Fam Zheng <famz@redhat.com>, Max Reitz <mreitz@redhat.com>
Subject: [Qemu-devel] [PATCH v2 03/13] block: Add .bdrv_co_pwrite_zeroes()
Date: Wed,  1 Jun 2016 15:10:03 -0600	[thread overview]
Message-ID: <1464815413-613-4-git-send-email-eblake@redhat.com> (raw)
In-Reply-To: <1464815413-613-1-git-send-email-eblake@redhat.com>

Update bdrv_co_do_write_zeroes() to be byte-based, and select
between the new byte-based bdrv_co_pwrite_zeroes() or the old
bdrv_co_write_zeroes().  The next patches will convert drivers,
then remove the old interface.

Signed-off-by: Eric Blake <eblake@redhat.com>
---
 include/block/block_int.h |  4 ++-
 block/io.c                | 78 ++++++++++++++++++++++++++---------------------
 2 files changed, 46 insertions(+), 36 deletions(-)

diff --git a/include/block/block_int.h b/include/block/block_int.h
index 2e9c81f..1dfdf92 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -165,6 +165,8 @@ struct BlockDriver {
      */
     int coroutine_fn (*bdrv_co_write_zeroes)(BlockDriverState *bs,
         int64_t sector_num, int nb_sectors, BdrvRequestFlags flags);
+    int coroutine_fn (*bdrv_co_pwrite_zeroes)(BlockDriverState *bs,
+        int64_t offset, int count, BdrvRequestFlags flags);
     int coroutine_fn (*bdrv_co_discard)(BlockDriverState *bs,
         int64_t sector_num, int nb_sectors);
     int64_t coroutine_fn (*bdrv_co_get_block_status)(BlockDriverState *bs,
@@ -456,7 +458,7 @@ struct BlockDriverState {
     unsigned int request_alignment;
     /* Flags honored during pwrite (so far: BDRV_REQ_FUA) */
     unsigned int supported_write_flags;
-    /* Flags honored during write_zeroes (so far: BDRV_REQ_FUA,
+    /* Flags honored during pwrite_zeroes (so far: BDRV_REQ_FUA,
      * BDRV_REQ_MAY_UNMAP) */
     unsigned int supported_zero_flags;

diff --git a/block/io.c b/block/io.c
index 108cd35..3fe7576 100644
--- a/block/io.c
+++ b/block/io.c
@@ -42,8 +42,8 @@ static BlockAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
                                          void *opaque,
                                          bool is_write);
 static void coroutine_fn bdrv_co_do_rw(void *opaque);
-static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
-    int64_t sector_num, int nb_sectors, BdrvRequestFlags flags);
+static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
+    int64_t offset, int count, BdrvRequestFlags flags);

 static void bdrv_parent_drained_begin(BlockDriverState *bs)
 {
@@ -893,10 +893,12 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
         goto err;
     }

-    if (drv->bdrv_co_write_zeroes &&
+    if ((drv->bdrv_co_write_zeroes || drv->bdrv_co_pwrite_zeroes) &&
         buffer_is_zero(bounce_buffer, iov.iov_len)) {
-        ret = bdrv_co_do_write_zeroes(bs, cluster_sector_num,
-                                      cluster_nb_sectors, 0);
+        ret = bdrv_co_do_pwrite_zeroes(bs,
+                                       cluster_sector_num * BDRV_SECTOR_SIZE,
+                                       cluster_nb_sectors * BDRV_SECTOR_SIZE,
+                                       0);
     } else {
         /* This does not change the data on the disk, it is not necessary
          * to flush even in cache=writethrough mode.
@@ -1110,8 +1112,8 @@ int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,

 #define MAX_WRITE_ZEROES_BOUNCE_BUFFER 32768

-static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
-    int64_t sector_num, int nb_sectors, BdrvRequestFlags flags)
+static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
+    int64_t offset, int count, BdrvRequestFlags flags)
 {
     BlockDriver *drv = bs->drv;
     QEMUIOVector qiov;
@@ -1122,20 +1124,16 @@ static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
     int tail = 0;

     int max_write_zeroes = MIN_NON_ZERO(bs->bl.max_pwrite_zeroes, INT_MAX);
-    int write_zeroes_sector_align =
-        bs->bl.pwrite_zeroes_alignment >> BDRV_SECTOR_BITS;
+    int alignment = MAX(bs->bl.pwrite_zeroes_alignment ?: 1,
+                        bs->request_alignment);

-    max_write_zeroes >>= BDRV_SECTOR_BITS;
-    if (write_zeroes_sector_align) {
-        assert(is_power_of_2(bs->bl.pwrite_zeroes_alignment));
-        head = sector_num & (write_zeroes_sector_align - 1);
-        tail = (sector_num + nb_sectors) & (write_zeroes_sector_align - 1);
-        max_write_zeroes &= ~(write_zeroes_sector_align - 1);
-    }
+    assert(is_power_of_2(alignment));
+    head = offset & (alignment - 1);
+    tail = (offset + count) & (alignment - 1);
+    max_write_zeroes &= ~(alignment - 1);

-    assert(nb_sectors <= BDRV_REQUEST_MAX_SECTORS);
-    while (nb_sectors > 0 && !ret) {
-        int num = nb_sectors;
+    while (count > 0 && !ret) {
+        int num = count;

         /* Align request.  Block drivers can expect the "bulk" of the request
          * to be aligned, and that unaligned requests do not cross cluster
@@ -1143,9 +1141,9 @@ static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
          */
         if (head) {
             /* Make a small request up to the first aligned sector.  */
-            num = MIN(nb_sectors, write_zeroes_sector_align - head);
+            num = MIN(count, alignment - head);
             head = 0;
-        } else if (tail && num > write_zeroes_sector_align) {
+        } else if (tail && num > alignment) {
             /* Shorten the request to the last aligned sector.  */
             num -= tail;
         }
@@ -1157,8 +1155,18 @@ static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,

         ret = -ENOTSUP;
         /* First try the efficient write zeroes operation */
-        if (drv->bdrv_co_write_zeroes) {
-            ret = drv->bdrv_co_write_zeroes(bs, sector_num, num,
+        if (drv->bdrv_co_pwrite_zeroes) {
+            ret = drv->bdrv_co_pwrite_zeroes(bs, offset, num,
+                                             flags & bs->supported_zero_flags);
+            if (ret != -ENOTSUP && (flags & BDRV_REQ_FUA) &&
+                !(bs->supported_zero_flags & BDRV_REQ_FUA)) {
+                need_flush = true;
+            }
+        } else if (drv->bdrv_co_write_zeroes) {
+            assert(offset % BDRV_SECTOR_SIZE == 0);
+            assert(count % BDRV_SECTOR_SIZE == 0);
+            ret = drv->bdrv_co_write_zeroes(bs, offset >> BDRV_SECTOR_BITS,
+                                            num >> BDRV_SECTOR_BITS,
                                             flags & bs->supported_zero_flags);
             if (ret != -ENOTSUP && (flags & BDRV_REQ_FUA) &&
                 !(bs->supported_zero_flags & BDRV_REQ_FUA)) {
@@ -1181,33 +1189,31 @@ static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
                 write_flags &= ~BDRV_REQ_FUA;
                 need_flush = true;
             }
-            num = MIN(num, max_xfer_len);
-            iov.iov_len = num * BDRV_SECTOR_SIZE;
+            num = MIN(num, max_xfer_len << BDRV_SECTOR_BITS);
+            iov.iov_len = num;
             if (iov.iov_base == NULL) {
-                iov.iov_base = qemu_try_blockalign(bs, num * BDRV_SECTOR_SIZE);
+                iov.iov_base = qemu_try_blockalign(bs, num);
                 if (iov.iov_base == NULL) {
                     ret = -ENOMEM;
                     goto fail;
                 }
-                memset(iov.iov_base, 0, num * BDRV_SECTOR_SIZE);
+                memset(iov.iov_base, 0, num);
             }
             qemu_iovec_init_external(&qiov, &iov, 1);

-            ret = bdrv_driver_pwritev(bs, sector_num * BDRV_SECTOR_SIZE,
-                                      num * BDRV_SECTOR_SIZE, &qiov,
-                                      write_flags);
+            ret = bdrv_driver_pwritev(bs, offset, num, &qiov, write_flags);

             /* Keep bounce buffer around if it is big enough for all
              * all future requests.
              */
-            if (num < max_xfer_len) {
+            if (num < max_xfer_len << BDRV_SECTOR_BITS) {
                 qemu_vfree(iov.iov_base);
                 iov.iov_base = NULL;
             }
         }

-        sector_num += num;
-        nb_sectors -= num;
+        offset += num;
+        count -= num;
     }

 fail:
@@ -1245,7 +1251,8 @@ static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
     ret = notifier_with_return_list_notify(&bs->before_write_notifiers, req);

     if (!ret && bs->detect_zeroes != BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF &&
-        !(flags & BDRV_REQ_ZERO_WRITE) && drv->bdrv_co_write_zeroes &&
+        !(flags & BDRV_REQ_ZERO_WRITE) &&
+        (drv->bdrv_co_pwrite_zeroes || drv->bdrv_co_write_zeroes) &&
         qemu_iovec_is_zero(qiov)) {
         flags |= BDRV_REQ_ZERO_WRITE;
         if (bs->detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP) {
@@ -1257,7 +1264,8 @@ static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
         /* Do nothing, write notifier decided to fail this request */
     } else if (flags & BDRV_REQ_ZERO_WRITE) {
         bdrv_debug_event(bs, BLKDBG_PWRITEV_ZERO);
-        ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors, flags);
+        ret = bdrv_co_do_pwrite_zeroes(bs, sector_num << BDRV_SECTOR_BITS,
+                                       nb_sectors << BDRV_SECTOR_BITS, flags);
     } else {
         bdrv_debug_event(bs, BLKDBG_PWRITEV);
         ret = bdrv_driver_pwritev(bs, offset, bytes, qiov, flags);
-- 
2.5.5

  parent reply	other threads:[~2016-06-01 21:10 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-06-01 21:10 [Qemu-devel] [PATCH v2 00/13] Kill sector-based write_zeroes Eric Blake
2016-06-01 21:10 ` [Qemu-devel] [PATCH v2 01/13] iscsi: Use block size as minimum zero/discard alignment Eric Blake
2016-06-01 21:10 ` [Qemu-devel] [PATCH v2 02/13] block: Track write zero limits in bytes Eric Blake
2016-06-01 21:10 ` Eric Blake [this message]
2016-06-01 21:10 ` [Qemu-devel] [PATCH v2 04/13] block: Switch bdrv_write_zeroes() to byte interface Eric Blake
2016-06-02 11:01   ` Kevin Wolf
2016-06-01 21:10 ` [Qemu-devel] [PATCH v2 05/13] iscsi: Convert to bdrv_co_pwrite_zeroes() Eric Blake
2016-06-01 21:10 ` [Qemu-devel] [PATCH v2 06/13] qcow2: " Eric Blake
2016-06-01 21:10 ` [Qemu-devel] [PATCH v2 07/13] blkreplay: " Eric Blake
2016-06-01 21:10 ` [Qemu-devel] [PATCH v2 08/13] gluster: " Eric Blake
2016-06-01 21:10 ` [Qemu-devel] [PATCH v2 09/13] qed: " Eric Blake
2016-06-02 11:16   ` Kevin Wolf
2016-06-02 12:40     ` Eric Blake
2016-06-02 12:45       ` Kevin Wolf
2016-06-01 21:10 ` [Qemu-devel] [PATCH v2 10/13] raw-posix: " Eric Blake
2016-06-03 16:21   ` Kevin Wolf
2016-06-01 21:10 ` [Qemu-devel] [PATCH v2 11/13] raw_bsd: " Eric Blake
2016-06-01 21:10 ` [Qemu-devel] [PATCH v2 12/13] vmdk: " Eric Blake
2016-06-01 21:10 ` [Qemu-devel] [PATCH v2 13/13] block: Kill bdrv_co_write_zeroes() Eric Blake
2016-06-02 11:26 ` [Qemu-devel] [PATCH v2 00/13] Kill sector-based write_zeroes Kevin Wolf

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1464815413-613-4-git-send-email-eblake@redhat.com \
    --to=eblake@redhat.com \
    --cc=famz@redhat.com \
    --cc=kwolf@redhat.com \
    --cc=mreitz@redhat.com \
    --cc=qemu-block@nongnu.org \
    --cc=qemu-devel@nongnu.org \
    --cc=stefanha@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.