All of lore.kernel.org
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH v3 0/2] mirror: Improve zero write and discard
@ 2015-11-10  8:51 Fam Zheng
  2015-11-10  8:51 ` [Qemu-devel] [PATCH v3 1/2] block: Introduce coroutine lock to dirty bitmap Fam Zheng
  2015-11-10  8:51 ` [Qemu-devel] [PATCH v3 2/2] mirror: Improve zero-write and discard with fragmented image Fam Zheng
  0 siblings, 2 replies; 3+ messages in thread
From: Fam Zheng @ 2015-11-10  8:51 UTC (permalink / raw)
  To: qemu-devel; +Cc: Kevin Wolf, pbonzini, Jeff Cody, qemu-block, mreitz

The first patch adds a lock between bdrv_set_dirty{,_bitmap} and non-atomic
(coroutine) readers,

The second patch makes use of it and fixes mirror thin writing.

Fam Zheng (2):
  block: Introduce coroutine lock to dirty bitmap
  mirror: Improve zero-write and discard with fragmented image

 block.c                   |  26 ++++++--
 block/mirror.c            | 160 ++++++++++++++++++++++++++++++++++++----------
 include/block/block.h     |   6 +-
 include/block/block_int.h |   4 +-
 4 files changed, 156 insertions(+), 40 deletions(-)

-- 
2.4.3

^ permalink raw reply	[flat|nested] 3+ messages in thread

* [Qemu-devel] [PATCH v3 1/2] block: Introduce coroutine lock to dirty bitmap
  2015-11-10  8:51 [Qemu-devel] [PATCH v3 0/2] mirror: Improve zero write and discard Fam Zheng
@ 2015-11-10  8:51 ` Fam Zheng
  2015-11-10  8:51 ` [Qemu-devel] [PATCH v3 2/2] mirror: Improve zero-write and discard with fragmented image Fam Zheng
  1 sibling, 0 replies; 3+ messages in thread
From: Fam Zheng @ 2015-11-10  8:51 UTC (permalink / raw)
  To: qemu-devel; +Cc: Kevin Wolf, pbonzini, Jeff Cody, qemu-block, mreitz

Typically, what a dirty bit consumer does is 1) get the next dirty
sectors; 2) do something with the sectors; 3) clear the dirty bits; 4)
goto 1). This works as long as 2) is simple and atomic in the coroutine
sense.  Anything sophisticated requires either moving 3) before 2) or
using locks, because the dirty bits may get cleared in the middle when
the coroutine yield.

This will be the case for mirror.c in following patches, so introduce
CoMutex in BdrvDirtyBitmap to allowing blocking the producer.

Also mark all involved dirty bitmap functions as coroutine_fn.

Signed-off-by: Fam Zheng <famz@redhat.com>
---
 block.c                   | 26 +++++++++++++++++++++-----
 include/block/block.h     |  6 ++++--
 include/block/block_int.h |  4 +++-
 3 files changed, 28 insertions(+), 8 deletions(-)

diff --git a/block.c b/block.c
index e9f40dc..34a4109 100644
--- a/block.c
+++ b/block.c
@@ -69,6 +69,7 @@ struct BdrvDirtyBitmap {
     int64_t size;               /* Size of the bitmap (Number of sectors) */
     bool disabled;              /* Bitmap is read-only */
     QLIST_ENTRY(BdrvDirtyBitmap) list;
+    CoMutex lock;
 };
 
 #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
@@ -3173,6 +3174,7 @@ BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs,
     bitmap->size = bitmap_size;
     bitmap->name = g_strdup(name);
     bitmap->disabled = false;
+    qemu_co_mutex_init(&bitmap->lock);
     QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list);
     return bitmap;
 }
@@ -3385,11 +3387,24 @@ void bdrv_dirty_iter_init(BdrvDirtyBitmap *bitmap, HBitmapIter *hbi)
     hbitmap_iter_init(hbi, bitmap->bitmap, 0);
 }
 
-void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap,
-                           int64_t cur_sector, int nr_sectors)
+void coroutine_fn bdrv_lock_dirty_bitmap(BdrvDirtyBitmap *bitmap)
+{
+    qemu_co_mutex_lock(&bitmap->lock);
+}
+
+void coroutine_fn bdrv_unlock_dirty_bitmap(BdrvDirtyBitmap *bitmap)
+{
+    qemu_co_mutex_unlock(&bitmap->lock);
+}
+
+void coroutine_fn bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap,
+                                        int64_t cur_sector,
+                                        int nr_sectors)
 {
     assert(bdrv_dirty_bitmap_enabled(bitmap));
+    bdrv_lock_dirty_bitmap(bitmap);
     hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
+    bdrv_unlock_dirty_bitmap(bitmap);
 }
 
 void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap,
@@ -3405,15 +3420,16 @@ void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap)
     hbitmap_reset_all(bitmap->bitmap);
 }
 
-void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
-                    int nr_sectors)
+void coroutine_fn bdrv_set_dirty(BlockDriverState *bs,
+                                 int64_t cur_sector,
+                                 int nr_sectors)
 {
     BdrvDirtyBitmap *bitmap;
     QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
         if (!bdrv_dirty_bitmap_enabled(bitmap)) {
             continue;
         }
-        hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
+        bdrv_set_dirty_bitmap(bitmap, cur_sector, nr_sectors);
     }
 }
 
diff --git a/include/block/block.h b/include/block/block.h
index 610db92..592f317 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -488,9 +488,11 @@ uint32_t bdrv_dirty_bitmap_granularity(BdrvDirtyBitmap *bitmap);
 bool bdrv_dirty_bitmap_enabled(BdrvDirtyBitmap *bitmap);
 bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap);
 DirtyBitmapStatus bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap);
+void coroutine_fn bdrv_lock_dirty_bitmap(BdrvDirtyBitmap *bitmap);
+void coroutine_fn bdrv_unlock_dirty_bitmap(BdrvDirtyBitmap *bitmap);
 int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector);
-void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap,
-                           int64_t cur_sector, int nr_sectors);
+void coroutine_fn bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap,
+                                        int64_t cur_sector, int nr_sectors);
 void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap,
                              int64_t cur_sector, int nr_sectors);
 void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap);
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 3ceeb5a..e17712c 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -672,7 +672,9 @@ bool blk_dev_is_tray_open(BlockBackend *blk);
 bool blk_dev_is_medium_locked(BlockBackend *blk);
 void blk_dev_resize_cb(BlockBackend *blk);
 
-void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors);
+void coroutine_fn bdrv_set_dirty(BlockDriverState *bs,
+                                 int64_t cur_sector,
+                                 int nr_sectors);
 bool bdrv_requests_pending(BlockDriverState *bs);
 
 #endif /* BLOCK_INT_H */
-- 
2.4.3

^ permalink raw reply related	[flat|nested] 3+ messages in thread

* [Qemu-devel] [PATCH v3 2/2] mirror: Improve zero-write and discard with fragmented image
  2015-11-10  8:51 [Qemu-devel] [PATCH v3 0/2] mirror: Improve zero write and discard Fam Zheng
  2015-11-10  8:51 ` [Qemu-devel] [PATCH v3 1/2] block: Introduce coroutine lock to dirty bitmap Fam Zheng
@ 2015-11-10  8:51 ` Fam Zheng
  1 sibling, 0 replies; 3+ messages in thread
From: Fam Zheng @ 2015-11-10  8:51 UTC (permalink / raw)
  To: qemu-devel; +Cc: Kevin Wolf, pbonzini, Jeff Cody, qemu-block, mreitz

The "pnum < nb_sectors" condition in deciding whether to actually copy
data is unnecessarily strict, and the qiov initialization is
unnecessarily too, for both bdrv_aio_write_zeroes and bdrv_aio_discard
branches.

Reorganize mirror_iteration flow so that we:

    1) Find the contiguous zero/discarded sectors with
    bdrv_get_block_status_above() before deciding what to do. We query
    s->buf_size sized blocks at a time.

    2) If the sectors in question are zeroed/discarded and aligned to
    target cluster, issue zero write or discard accordingly. It's done
    in mirror_do_zero_or_discard, where we don't add buffer to qiov.

    3) Otherwise, do the same loop as before in mirror_do_read.

Signed-off-by: Fam Zheng <famz@redhat.com>
---
 block/mirror.c | 160 +++++++++++++++++++++++++++++++++++++++++++++------------
 1 file changed, 128 insertions(+), 32 deletions(-)

diff --git a/block/mirror.c b/block/mirror.c
index b1252a1..ade0412 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -157,23 +157,13 @@ static void mirror_read_complete(void *opaque, int ret)
                     mirror_write_complete, op);
 }
 
-static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
+static uint64_t mirror_do_read(MirrorBlockJob *s)
 {
     BlockDriverState *source = s->common.bs;
-    int nb_sectors, sectors_per_chunk, nb_chunks;
-    int64_t end, sector_num, next_chunk, next_sector, hbitmap_next_sector;
+    int sectors_per_chunk, nb_sectors, nb_chunks;
+    int64_t end, next_chunk, next_sector, hbitmap_next_sector, sector_num;
     uint64_t delay_ns = 0;
     MirrorOp *op;
-    int pnum;
-    int64_t ret;
-
-    s->sector_num = hbitmap_iter_next(&s->hbi);
-    if (s->sector_num < 0) {
-        bdrv_dirty_iter_init(s->dirty_bitmap, &s->hbi);
-        s->sector_num = hbitmap_iter_next(&s->hbi);
-        trace_mirror_restart_iter(s, bdrv_get_dirty_count(s->dirty_bitmap));
-        assert(s->sector_num >= 0);
-    }
 
     hbitmap_next_sector = s->sector_num;
     sector_num = s->sector_num;
@@ -198,14 +188,6 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
     next_sector = sector_num;
     next_chunk = sector_num / sectors_per_chunk;
 
-    /* Wait for I/O to this cluster (from a previous iteration) to be done.  */
-    while (test_bit(next_chunk, s->in_flight_bitmap)) {
-        trace_mirror_yield_in_flight(s, sector_num, s->in_flight);
-        s->waiting_for_io = true;
-        qemu_coroutine_yield();
-        s->waiting_for_io = false;
-    }
-
     do {
         int added_sectors, added_chunks;
 
@@ -301,24 +283,138 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
     s->sectors_in_flight += nb_sectors;
     trace_mirror_one_iteration(s, sector_num, nb_sectors);
 
-    ret = bdrv_get_block_status_above(source, NULL, sector_num,
-                                      nb_sectors, &pnum);
-    if (ret < 0 || pnum < nb_sectors ||
-            (ret & BDRV_BLOCK_DATA && !(ret & BDRV_BLOCK_ZERO))) {
-        bdrv_aio_readv(source, sector_num, &op->qiov, nb_sectors,
-                       mirror_read_complete, op);
-    } else if (ret & BDRV_BLOCK_ZERO) {
+    bdrv_aio_readv(source, sector_num, &op->qiov, nb_sectors,
+                   mirror_read_complete, op);
+    return delay_ns;
+}
+
+static uint64_t mirror_do_zero_or_discard(MirrorBlockJob *s,
+                                          int64_t sector_num,
+                                          int nb_sectors,
+                                          bool is_discard)
+{
+    int sectors_per_chunk, nb_chunks;
+    int64_t next_chunk, next_sector, hbitmap_next_sector;
+    uint64_t delay_ns = 0;
+    MirrorOp *op;
+
+    sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS;
+    assert(nb_sectors >= sectors_per_chunk);
+    next_chunk = sector_num / sectors_per_chunk;
+    nb_chunks = DIV_ROUND_UP(nb_sectors, sectors_per_chunk);
+    bitmap_set(s->in_flight_bitmap, next_chunk, nb_chunks);
+    delay_ns = ratelimit_calculate_delay(&s->limit, nb_sectors);
+
+    /* Allocate a MirrorOp that is used as an AIO callback. The qiov is zeroed
+     * out so the freeing in iteration is nop. */
+    op = g_new0(MirrorOp, 1);
+    op->s = s;
+    op->sector_num = sector_num;
+    op->nb_sectors = nb_sectors;
+
+    /* Advance the HBitmapIter in parallel, so that we do not examine
+     * the same sector twice.
+     */
+    hbitmap_next_sector = sector_num;
+    next_sector = sector_num + nb_sectors;
+    while (next_sector > hbitmap_next_sector) {
+        hbitmap_next_sector = hbitmap_iter_next(&s->hbi);
+        if (hbitmap_next_sector < 0) {
+            break;
+        }
+    }
+
+    bdrv_reset_dirty_bitmap(s->dirty_bitmap, sector_num, nb_sectors);
+    s->in_flight++;
+    s->sectors_in_flight += nb_sectors;
+    if (is_discard) {
+        bdrv_aio_discard(s->target, sector_num, op->nb_sectors,
+                         mirror_write_complete, op);
+    } else {
         bdrv_aio_write_zeroes(s->target, sector_num, op->nb_sectors,
                               s->unmap ? BDRV_REQ_MAY_UNMAP : 0,
                               mirror_write_complete, op);
-    } else {
-        assert(!(ret & BDRV_BLOCK_DATA));
-        bdrv_aio_discard(s->target, sector_num, op->nb_sectors,
-                         mirror_write_complete, op);
     }
+
     return delay_ns;
 }
 
+static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
+{
+    BlockDriverState *source = s->common.bs;
+    int sectors_per_chunk;
+    int64_t sector_num, next_chunk;
+    int ret;
+    int contiguous_sectors = s->buf_size >> BDRV_SECTOR_BITS;
+    enum MirrorMethod {
+        MIRROR_METHOD_COPY,
+        MIRROR_METHOD_ZERO,
+        MIRROR_METHOD_DISCARD
+    } mirror_method;
+
+    bdrv_lock_dirty_bitmap(s->dirty_bitmap);
+    s->sector_num = hbitmap_iter_next(&s->hbi);
+    if (s->sector_num < 0) {
+        bdrv_dirty_iter_init(s->dirty_bitmap, &s->hbi);
+        s->sector_num = hbitmap_iter_next(&s->hbi);
+        trace_mirror_restart_iter(s, bdrv_get_dirty_count(s->dirty_bitmap));
+        assert(s->sector_num >= 0);
+    }
+
+    sector_num = s->sector_num;
+    sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS;
+    next_chunk = sector_num / sectors_per_chunk;
+
+    /* Wait for I/O to this cluster (from a previous iteration) to be done.  */
+    while (test_bit(next_chunk, s->in_flight_bitmap)) {
+        trace_mirror_yield_in_flight(s, sector_num, s->in_flight);
+        s->waiting_for_io = true;
+        qemu_coroutine_yield();
+        s->waiting_for_io = false;
+    }
+
+    mirror_method = MIRROR_METHOD_COPY;
+    ret = bdrv_get_block_status_above(source, NULL, sector_num,
+                                      contiguous_sectors,
+                                      &contiguous_sectors);
+
+    contiguous_sectors -= contiguous_sectors % sectors_per_chunk;
+    if (ret < 0 || contiguous_sectors < sectors_per_chunk) {
+        contiguous_sectors = sectors_per_chunk;
+    } else if (!(ret & BDRV_BLOCK_DATA)) {
+        int64_t target_sector_num;
+        int target_nb_sectors;
+        bdrv_round_to_clusters(s->target, sector_num, contiguous_sectors,
+                               &target_sector_num, &target_nb_sectors);
+        if (target_sector_num == sector_num &&
+            target_nb_sectors == contiguous_sectors) {
+            mirror_method = ret & BDRV_BLOCK_ZERO ?
+                                MIRROR_METHOD_ZERO :
+                                MIRROR_METHOD_DISCARD;
+        }
+    }
+
+    switch (mirror_method) {
+    case MIRROR_METHOD_COPY:
+        ret = mirror_do_read(s);
+        break;
+    case MIRROR_METHOD_ZERO:
+        ret = mirror_do_zero_or_discard(s, sector_num,
+                                         contiguous_sectors,
+                                         false);
+        break;
+    case MIRROR_METHOD_DISCARD:
+        ret = mirror_do_zero_or_discard(s, sector_num,
+                                         contiguous_sectors,
+                                         true);
+        break;
+    default:
+        abort();
+    }
+    bdrv_unlock_dirty_bitmap(s->dirty_bitmap);
+    return ret;
+}
+
 static void mirror_free_init(MirrorBlockJob *s)
 {
     int granularity = s->granularity;
-- 
2.4.3

^ permalink raw reply related	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2015-11-10  8:51 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-11-10  8:51 [Qemu-devel] [PATCH v3 0/2] mirror: Improve zero write and discard Fam Zheng
2015-11-10  8:51 ` [Qemu-devel] [PATCH v3 1/2] block: Introduce coroutine lock to dirty bitmap Fam Zheng
2015-11-10  8:51 ` [Qemu-devel] [PATCH v3 2/2] mirror: Improve zero-write and discard with fragmented image Fam Zheng

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.