All of lore.kernel.org
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH 00/10] block: Coroutine support
@ 2011-07-26 11:48 Kevin Wolf
  2011-07-26 11:48 ` [Qemu-devel] [PATCH 01/10] block: Add bdrv_co_readv/writev Kevin Wolf
                   ` (11 more replies)
  0 siblings, 12 replies; 23+ messages in thread
From: Kevin Wolf @ 2011-07-26 11:48 UTC (permalink / raw)
  To: qemu-devel; +Cc: kwolf, stefanha

Depends on Stefan's latest coroutine patches. This series makes qcow and qcow2
take advantage of the new coroutine infrastructure. Both formats used
synchronous operations for accessing their metadata and blocked the guest CPU
during that time. With coroutines, the I/O will happen asynchronously in the
background and the CPU won't be blocked any more.

Kevin Wolf (10):
  block: Add bdrv_co_readv/writev
  block: Emulate AIO functions with bdrv_co_readv/writev
  block: Add bdrv_co_readv/writev emulation
  coroutines: Locks
  qcow2: Use coroutines
  qcow: Use coroutines
  async: Remove AsyncContext
  coroutines: Use one global bottom half for CoQueue
  posix-aio-compat: Allow read after EOF
  block: Use bdrv_co_* instead of synchronous versions in coroutines

 Makefile.objs         |    4 +-
 async.c               |   98 ++-----------------
 block.c               |  271 ++++++++++++++++++++++++++++++++++++++++++++++--
 block.h               |    5 +
 block/qcow.c          |  180 +++++++++++---------------------
 block/qcow2-cluster.c |   26 ++---
 block/qcow2.c         |  240 ++++++++++++++++----------------------------
 block/qcow2.h         |    5 +-
 block/qed-table.c     |   14 ---
 block/qed.c           |    4 -
 block_int.h           |    6 +
 linux-aio.c           |   43 +-------
 posix-aio-compat.c    |   30 ++++--
 qemu-common.h         |    4 -
 qemu-coroutine-int.h  |    1 +
 qemu-coroutine-lock.c |  117 +++++++++++++++++++++
 qemu-coroutine.h      |   64 ++++++++++++
 trace-events          |   11 ++
 18 files changed, 659 insertions(+), 464 deletions(-)
 create mode 100644 qemu-coroutine-lock.c

-- 
1.7.6

^ permalink raw reply	[flat|nested] 23+ messages in thread

* [Qemu-devel] [PATCH 01/10] block: Add bdrv_co_readv/writev
  2011-07-26 11:48 [Qemu-devel] [PATCH 00/10] block: Coroutine support Kevin Wolf
@ 2011-07-26 11:48 ` Kevin Wolf
  2011-07-26 11:49 ` [Qemu-devel] [PATCH 02/10] block: Emulate AIO functions with bdrv_co_readv/writev Kevin Wolf
                   ` (10 subsequent siblings)
  11 siblings, 0 replies; 23+ messages in thread
From: Kevin Wolf @ 2011-07-26 11:48 UTC (permalink / raw)
  To: qemu-devel; +Cc: kwolf, stefanha

Add new block driver callbacks bdrv_co_readv/writev, which work on a
QEMUIOVector like bdrv_aio_*, but don't need a callback. The function may only
be called inside a coroutine, so a block driver implementing this interface can
yield instead of blocking during I/O.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 Makefile.objs |    2 +-
 block.c       |   46 ++++++++++++++++++++++++++++++++++++++++++++++
 block.h       |    5 +++++
 block_int.h   |    6 ++++++
 trace-events  |    2 ++
 5 files changed, 60 insertions(+), 1 deletions(-)

diff --git a/Makefile.objs b/Makefile.objs
index 5679e1f..9549e2a 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -25,6 +25,7 @@ coroutine-obj-$(CONFIG_WIN32) += coroutine-win32.o
 
 block-obj-y = cutils.o cache-utils.o qemu-malloc.o qemu-option.o module.o async.o
 block-obj-y += nbd.o block.o aio.o aes.o qemu-config.o qemu-progress.o qemu-sockets.o
+block-obj-y += $(coroutine-obj-y)
 block-obj-$(CONFIG_POSIX) += posix-aio-compat.o
 block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
 
@@ -79,7 +80,6 @@ common-obj-y += readline.o console.o cursor.o qemu-error.o
 common-obj-y += $(oslib-obj-y)
 common-obj-$(CONFIG_WIN32) += os-win32.o
 common-obj-$(CONFIG_POSIX) += os-posix.o
-common-obj-y += $(coroutine-obj-y)
 
 common-obj-y += tcg-runtime.o host-utils.o
 common-obj-y += irq.o ioport.o input.o
diff --git a/block.c b/block.c
index 9549b9e..e1259ee 100644
--- a/block.c
+++ b/block.c
@@ -1121,6 +1121,52 @@ int bdrv_write_sync(BlockDriverState *bs, int64_t sector_num,
         buf, BDRV_SECTOR_SIZE * nb_sectors);
 }
 
+int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
+    int nb_sectors, QEMUIOVector *qiov)
+{
+    BlockDriver *drv = bs->drv;
+
+    trace_bdrv_co_readv(bs, sector_num, nb_sectors);
+
+    if (!drv) {
+        return -ENOMEDIUM;
+    }
+    if (bdrv_check_request(bs, sector_num, nb_sectors)) {
+        return -EIO;
+    }
+
+    return drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
+}
+
+int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
+    int nb_sectors, QEMUIOVector *qiov)
+{
+    BlockDriver *drv = bs->drv;
+
+    trace_bdrv_co_writev(bs, sector_num, nb_sectors);
+
+    if (!bs->drv) {
+        return -ENOMEDIUM;
+    }
+    if (bs->read_only) {
+        return -EACCES;
+    }
+    if (bdrv_check_request(bs, sector_num, nb_sectors)) {
+        return -EIO;
+    }
+
+    if (bs->dirty_bitmap) {
+        set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
+    }
+
+    if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
+        bs->wr_highest_sector = sector_num + nb_sectors - 1;
+    }
+
+    return drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
+}
+
+
 /**
  * Truncate file to 'offset' bytes (needed only for file protocols)
  */
diff --git a/block.h b/block.h
index 59cc410..a559459 100644
--- a/block.h
+++ b/block.h
@@ -4,6 +4,7 @@
 #include "qemu-aio.h"
 #include "qemu-common.h"
 #include "qemu-option.h"
+#include "qemu-coroutine.h"
 #include "qobject.h"
 
 /* block.c */
@@ -87,6 +88,10 @@ int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
     const void *buf, int count);
 int bdrv_write_sync(BlockDriverState *bs, int64_t sector_num,
     const uint8_t *buf, int nb_sectors);
+int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
+    int nb_sectors, QEMUIOVector *qiov);
+int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
+    int nb_sectors, QEMUIOVector *qiov);
 int bdrv_truncate(BlockDriverState *bs, int64_t offset);
 int64_t bdrv_getlength(BlockDriverState *bs);
 int64_t bdrv_get_allocated_file_size(BlockDriverState *bs);
diff --git a/block_int.h b/block_int.h
index efb6803..c868b49 100644
--- a/block_int.h
+++ b/block_int.h
@@ -27,6 +27,7 @@
 #include "block.h"
 #include "qemu-option.h"
 #include "qemu-queue.h"
+#include "qemu-coroutine.h"
 
 #define BLOCK_FLAG_ENCRYPT	1
 #define BLOCK_FLAG_COMPAT6	4
@@ -77,6 +78,11 @@ struct BlockDriver {
     int (*bdrv_discard)(BlockDriverState *bs, int64_t sector_num,
                         int nb_sectors);
 
+    int coroutine_fn (*bdrv_co_readv)(BlockDriverState *bs,
+        int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
+    int coroutine_fn (*bdrv_co_writev)(BlockDriverState *bs,
+        int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
+
     int (*bdrv_aio_multiwrite)(BlockDriverState *bs, BlockRequest *reqs,
         int num_reqs);
     int (*bdrv_merge_requests)(BlockDriverState *bs, BlockRequest* a,
diff --git a/trace-events b/trace-events
index 136f775..46bceca 100644
--- a/trace-events
+++ b/trace-events
@@ -66,6 +66,8 @@ disable bdrv_aio_flush(void *bs, void *opaque) "bs %p opaque %p"
 disable bdrv_aio_readv(void *bs, int64_t sector_num, int nb_sectors, void *opaque) "bs %p sector_num %"PRId64" nb_sectors %d opaque %p"
 disable bdrv_aio_writev(void *bs, int64_t sector_num, int nb_sectors, void *opaque) "bs %p sector_num %"PRId64" nb_sectors %d opaque %p"
 disable bdrv_set_locked(void *bs, int locked) "bs %p locked %d"
+disable bdrv_co_readv(void *bs, int64_t sector_num, int nb_sector) "bs %p sector_num %"PRId64" nb_sectors %d"
+disable bdrv_co_writev(void *bs, int64_t sector_num, int nb_sector) "bs %p sector_num %"PRId64" nb_sectors %d"
 
 # hw/virtio-blk.c
 disable virtio_blk_req_complete(void *req, int status) "req %p status %d"
-- 
1.7.6

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [Qemu-devel] [PATCH 02/10] block: Emulate AIO functions with bdrv_co_readv/writev
  2011-07-26 11:48 [Qemu-devel] [PATCH 00/10] block: Coroutine support Kevin Wolf
  2011-07-26 11:48 ` [Qemu-devel] [PATCH 01/10] block: Add bdrv_co_readv/writev Kevin Wolf
@ 2011-07-26 11:49 ` Kevin Wolf
  2011-07-26 11:49 ` [Qemu-devel] [PATCH 03/10] block: Add bdrv_co_readv/writev emulation Kevin Wolf
                   ` (9 subsequent siblings)
  11 siblings, 0 replies; 23+ messages in thread
From: Kevin Wolf @ 2011-07-26 11:49 UTC (permalink / raw)
  To: qemu-devel; +Cc: kwolf, stefanha

Use the bdrv_co_readv/writev callbacks to implement bdrv_aio_readv/writev and
bdrv_read/write if a driver provides the coroutine version instead of the
synchronous or AIO version.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block.c |   98 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 files changed, 97 insertions(+), 1 deletions(-)

diff --git a/block.c b/block.c
index e1259ee..48413ae 100644
--- a/block.c
+++ b/block.c
@@ -28,6 +28,7 @@
 #include "block_int.h"
 #include "module.h"
 #include "qemu-objects.h"
+#include "qemu-coroutine.h"
 
 #ifdef CONFIG_BSD
 #include <sys/types.h>
@@ -57,6 +58,12 @@ static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num,
                         uint8_t *buf, int nb_sectors);
 static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num,
                          const uint8_t *buf, int nb_sectors);
+static BlockDriverAIOCB *bdrv_co_aio_readv_em(BlockDriverState *bs,
+        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+        BlockDriverCompletionFunc *cb, void *opaque);
+static BlockDriverAIOCB *bdrv_co_aio_writev_em(BlockDriverState *bs,
+        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+        BlockDriverCompletionFunc *cb, void *opaque);
 
 static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
     QTAILQ_HEAD_INITIALIZER(bdrv_states);
@@ -169,7 +176,13 @@ void path_combine(char *dest, int dest_size,
 
 void bdrv_register(BlockDriver *bdrv)
 {
-    if (!bdrv->bdrv_aio_readv) {
+    if (bdrv->bdrv_co_readv) {
+        /* Emulate AIO by coroutines, and sync by AIO */
+        bdrv->bdrv_aio_readv = bdrv_co_aio_readv_em;
+        bdrv->bdrv_aio_writev = bdrv_co_aio_writev_em;
+        bdrv->bdrv_read = bdrv_read_em;
+        bdrv->bdrv_write = bdrv_write_em;
+     } else if (!bdrv->bdrv_aio_readv) {
         /* add AIO emulation layer */
         bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
         bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
@@ -2626,6 +2639,89 @@ static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
     return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
 }
 
+
+typedef struct BlockDriverAIOCBCoroutine {
+    BlockDriverAIOCB common;
+    BlockRequest req;
+    bool is_write;
+    QEMUBH* bh;
+} BlockDriverAIOCBCoroutine;
+
+static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb)
+{
+    qemu_aio_flush();
+}
+
+static AIOPool bdrv_em_co_aio_pool = {
+    .aiocb_size         = sizeof(BlockDriverAIOCBCoroutine),
+    .cancel             = bdrv_aio_co_cancel_em,
+};
+
+static void bdrv_co_rw_bh(void *opaque)
+{
+    BlockDriverAIOCBCoroutine *acb = opaque;
+
+    acb->common.cb(acb->common.opaque, acb->req.error);
+    qemu_bh_delete(acb->bh);
+    qemu_aio_release(acb);
+}
+
+static void coroutine_fn bdrv_co_rw(void *opaque)
+{
+    BlockDriverAIOCBCoroutine *acb = opaque;
+    BlockDriverState *bs = acb->common.bs;
+
+    if (!acb->is_write) {
+        acb->req.error = bs->drv->bdrv_co_readv(bs, acb->req.sector,
+            acb->req.nb_sectors, acb->req.qiov);
+    } else {
+        acb->req.error = bs->drv->bdrv_co_writev(bs, acb->req.sector,
+            acb->req.nb_sectors, acb->req.qiov);
+    }
+
+    acb->bh = qemu_bh_new(bdrv_co_rw_bh, acb);
+    qemu_bh_schedule(acb->bh);
+}
+
+static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
+                                               int64_t sector_num,
+                                               QEMUIOVector *qiov,
+                                               int nb_sectors,
+                                               BlockDriverCompletionFunc *cb,
+                                               void *opaque,
+                                               bool is_write)
+{
+    Coroutine *co;
+    BlockDriverAIOCBCoroutine *acb;
+
+    acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
+    acb->req.sector = sector_num;
+    acb->req.nb_sectors = nb_sectors;
+    acb->req.qiov = qiov;
+    acb->is_write = is_write;
+
+    co = qemu_coroutine_create(bdrv_co_rw);
+    qemu_coroutine_enter(co, acb);
+
+    return &acb->common;
+}
+
+static BlockDriverAIOCB *bdrv_co_aio_readv_em(BlockDriverState *bs,
+        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+        BlockDriverCompletionFunc *cb, void *opaque)
+{
+    return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque,
+                                 false);
+}
+
+static BlockDriverAIOCB *bdrv_co_aio_writev_em(BlockDriverState *bs,
+        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+        BlockDriverCompletionFunc *cb, void *opaque)
+{
+    return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque,
+                                 true);
+}
+
 static BlockDriverAIOCB *bdrv_aio_flush_em(BlockDriverState *bs,
         BlockDriverCompletionFunc *cb, void *opaque)
 {
-- 
1.7.6

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [Qemu-devel] [PATCH 03/10] block: Add bdrv_co_readv/writev emulation
  2011-07-26 11:48 [Qemu-devel] [PATCH 00/10] block: Coroutine support Kevin Wolf
  2011-07-26 11:48 ` [Qemu-devel] [PATCH 01/10] block: Add bdrv_co_readv/writev Kevin Wolf
  2011-07-26 11:49 ` [Qemu-devel] [PATCH 02/10] block: Emulate AIO functions with bdrv_co_readv/writev Kevin Wolf
@ 2011-07-26 11:49 ` Kevin Wolf
  2011-08-02 12:12   ` Kevin Wolf
  2011-07-26 11:49 ` [Qemu-devel] [PATCH 04/10] coroutines: Locks Kevin Wolf
                   ` (8 subsequent siblings)
  11 siblings, 1 reply; 23+ messages in thread
From: Kevin Wolf @ 2011-07-26 11:49 UTC (permalink / raw)
  To: qemu-devel; +Cc: kwolf, stefanha

In order to be able to call bdrv_co_readv/writev for drivers that don't
implement the functions natively, add an emulation that uses the AIO functions
to implement them.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block.c      |   84 ++++++++++++++++++++++++++++++++++++++++++++++++++++-----
 trace-events |    1 +
 2 files changed, 77 insertions(+), 8 deletions(-)

diff --git a/block.c b/block.c
index 48413ae..2f589ac 100644
--- a/block.c
+++ b/block.c
@@ -64,6 +64,12 @@ static BlockDriverAIOCB *bdrv_co_aio_readv_em(BlockDriverState *bs,
 static BlockDriverAIOCB *bdrv_co_aio_writev_em(BlockDriverState *bs,
         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
         BlockDriverCompletionFunc *cb, void *opaque);
+static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
+                                         int64_t sector_num, int nb_sectors,
+                                         QEMUIOVector *iov);
+static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
+                                         int64_t sector_num, int nb_sectors,
+                                         QEMUIOVector *iov);
 
 static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
     QTAILQ_HEAD_INITIALIZER(bdrv_states);
@@ -182,14 +188,19 @@ void bdrv_register(BlockDriver *bdrv)
         bdrv->bdrv_aio_writev = bdrv_co_aio_writev_em;
         bdrv->bdrv_read = bdrv_read_em;
         bdrv->bdrv_write = bdrv_write_em;
-     } else if (!bdrv->bdrv_aio_readv) {
-        /* add AIO emulation layer */
-        bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
-        bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
-    } else if (!bdrv->bdrv_read) {
-        /* add synchronous IO emulation layer */
-        bdrv->bdrv_read = bdrv_read_em;
-        bdrv->bdrv_write = bdrv_write_em;
+     } else {
+        bdrv->bdrv_co_readv = bdrv_co_readv_em;
+        bdrv->bdrv_co_writev = bdrv_co_writev_em;
+
+        if (!bdrv->bdrv_aio_readv) {
+            /* add AIO emulation layer */
+            bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
+            bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
+        } else if (!bdrv->bdrv_read) {
+            /* add synchronous IO emulation layer */
+            bdrv->bdrv_read = bdrv_read_em;
+            bdrv->bdrv_write = bdrv_write_em;
+        }
     }
 
     if (!bdrv->bdrv_aio_flush)
@@ -2868,6 +2879,63 @@ void qemu_aio_release(void *p)
 }
 
 /**************************************************************/
+/* Coroutine block device emulation */
+
+typedef struct CoroutineIOCompletion {
+    Coroutine *coroutine;
+    int ret;
+} CoroutineIOCompletion;
+
+static void bdrv_co_io_em_complete(void *opaque, int ret)
+{
+    CoroutineIOCompletion *co = opaque;
+
+    co->ret = ret;
+    qemu_coroutine_enter(co->coroutine, NULL);
+}
+
+static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
+                                      int nb_sectors, QEMUIOVector *iov,
+                                      bool is_write)
+{
+    CoroutineIOCompletion co = {
+        .coroutine = qemu_coroutine_self(),
+        .ret = -EINPROGRESS,
+    };
+    BlockDriverAIOCB *acb;
+
+    if (is_write) {
+        acb = bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
+                              bdrv_co_io_em_complete, &co);
+    } else {
+        acb = bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
+                             bdrv_co_io_em_complete, &co);
+    }
+
+    trace_bdrv_co_io(is_write, acb);
+    if (!acb) {
+        return -EIO;
+    }
+    qemu_coroutine_yield();
+
+    return co.ret;
+}
+
+static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
+                                         int64_t sector_num, int nb_sectors,
+                                         QEMUIOVector *iov)
+{
+    return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
+}
+
+static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
+                                         int64_t sector_num, int nb_sectors,
+                                         QEMUIOVector *iov)
+{
+    return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
+}
+
+/**************************************************************/
 /* removable device support */
 
 /**
diff --git a/trace-events b/trace-events
index 46bceca..bc9be30 100644
--- a/trace-events
+++ b/trace-events
@@ -68,6 +68,7 @@ disable bdrv_aio_writev(void *bs, int64_t sector_num, int nb_sectors, void *opaq
 disable bdrv_set_locked(void *bs, int locked) "bs %p locked %d"
 disable bdrv_co_readv(void *bs, int64_t sector_num, int nb_sector) "bs %p sector_num %"PRId64" nb_sectors %d"
 disable bdrv_co_writev(void *bs, int64_t sector_num, int nb_sector) "bs %p sector_num %"PRId64" nb_sectors %d"
+disable bdrv_co_io(int is_write, void *acb) "is_write %d acb %p"
 
 # hw/virtio-blk.c
 disable virtio_blk_req_complete(void *req, int status) "req %p status %d"
-- 
1.7.6

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [Qemu-devel] [PATCH 04/10] coroutines: Locks
  2011-07-26 11:48 [Qemu-devel] [PATCH 00/10] block: Coroutine support Kevin Wolf
                   ` (2 preceding siblings ...)
  2011-07-26 11:49 ` [Qemu-devel] [PATCH 03/10] block: Add bdrv_co_readv/writev emulation Kevin Wolf
@ 2011-07-26 11:49 ` Kevin Wolf
  2011-07-26 11:49 ` [Qemu-devel] [PATCH 05/10] qcow2: Use coroutines Kevin Wolf
                   ` (7 subsequent siblings)
  11 siblings, 0 replies; 23+ messages in thread
From: Kevin Wolf @ 2011-07-26 11:49 UTC (permalink / raw)
  To: qemu-devel; +Cc: kwolf, stefanha

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 Makefile.objs         |    2 +-
 qemu-coroutine-int.h  |    1 +
 qemu-coroutine-lock.c |  124 +++++++++++++++++++++++++++++++++++++++++++++++++
 qemu-coroutine.h      |   64 +++++++++++++++++++++++++
 trace-events          |    8 +++
 5 files changed, 198 insertions(+), 1 deletions(-)
 create mode 100644 qemu-coroutine-lock.c

diff --git a/Makefile.objs b/Makefile.objs
index 9549e2a..89ca361 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -12,7 +12,7 @@ oslib-obj-$(CONFIG_POSIX) += oslib-posix.o qemu-thread-posix.o
 
 #######################################################################
 # coroutines
-coroutine-obj-y = qemu-coroutine.o
+coroutine-obj-y = qemu-coroutine.o qemu-coroutine-lock.o
 ifeq ($(CONFIG_UCONTEXT_COROUTINE),y)
 coroutine-obj-$(CONFIG_POSIX) += coroutine-ucontext.o
 else
diff --git a/qemu-coroutine-int.h b/qemu-coroutine-int.h
index 64915c2..d495615 100644
--- a/qemu-coroutine-int.h
+++ b/qemu-coroutine-int.h
@@ -38,6 +38,7 @@ struct Coroutine {
     void *entry_arg;
     Coroutine *caller;
     QLIST_ENTRY(Coroutine) pool_next;
+    QTAILQ_ENTRY(Coroutine) co_queue_next;
 };
 
 Coroutine *qemu_coroutine_new(void);
diff --git a/qemu-coroutine-lock.c b/qemu-coroutine-lock.c
new file mode 100644
index 0000000..abaa1f7
--- /dev/null
+++ b/qemu-coroutine-lock.c
@@ -0,0 +1,124 @@
+/*
+ * coroutine queues and locks
+ *
+ * Copyright (c) 2011 Kevin Wolf <kwolf@redhat.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu-common.h"
+#include "qemu-coroutine.h"
+#include "qemu-coroutine-int.h"
+#include "qemu-queue.h"
+#include "trace.h"
+
+static QTAILQ_HEAD(, Coroutine) unlock_bh_queue =
+    QTAILQ_HEAD_INITIALIZER(unlock_bh_queue);
+
+struct unlock_bh {
+    QEMUBH *bh;
+};
+
+static void qemu_co_queue_next_bh(void *opaque)
+{
+    struct unlock_bh *unlock_bh = opaque;
+    Coroutine *next;
+
+    trace_qemu_co_queue_next_bh();
+    while ((next = QTAILQ_FIRST(&unlock_bh_queue))) {
+        QTAILQ_REMOVE(&unlock_bh_queue, next, co_queue_next);
+        qemu_coroutine_enter(next, NULL);
+    }
+
+    qemu_bh_delete(unlock_bh->bh);
+    qemu_free(unlock_bh);
+}
+
+void qemu_co_queue_init(CoQueue *queue)
+{
+    QTAILQ_INIT(&queue->entries);
+}
+
+void coroutine_fn qemu_co_queue_wait(CoQueue *queue)
+{
+    Coroutine *self = qemu_coroutine_self();
+    QTAILQ_INSERT_TAIL(&queue->entries, self, co_queue_next);
+    qemu_coroutine_yield();
+    assert(qemu_in_coroutine());
+}
+
+bool qemu_co_queue_next(CoQueue *queue)
+{
+    struct unlock_bh *unlock_bh;
+    Coroutine *next;
+
+    next = QTAILQ_FIRST(&queue->entries);
+    if (next) {
+        QTAILQ_REMOVE(&queue->entries, next, co_queue_next);
+        QTAILQ_INSERT_TAIL(&unlock_bh_queue, next, co_queue_next);
+        trace_qemu_co_queue_next(next);
+
+        unlock_bh = qemu_malloc(sizeof(*unlock_bh));
+        unlock_bh->bh = qemu_bh_new(qemu_co_queue_next_bh, unlock_bh);
+        qemu_bh_schedule(unlock_bh->bh);
+    }
+
+    return (next != NULL);
+}
+
+bool qemu_co_queue_empty(CoQueue *queue)
+{
+    return (QTAILQ_FIRST(&queue->entries) == NULL);
+}
+
+void qemu_co_mutex_init(CoMutex *mutex)
+{
+    memset(mutex, 0, sizeof(*mutex));
+    qemu_co_queue_init(&mutex->queue);
+}
+
+void coroutine_fn qemu_co_mutex_lock(CoMutex *mutex)
+{
+    Coroutine *self = qemu_coroutine_self();
+
+    trace_qemu_co_mutex_lock_entry(mutex, self);
+
+    while (mutex->locked) {
+        qemu_co_queue_wait(&mutex->queue);
+    }
+
+    mutex->locked = true;
+
+    trace_qemu_co_mutex_lock_return(mutex, self);
+}
+
+void coroutine_fn qemu_co_mutex_unlock(CoMutex *mutex)
+{
+    Coroutine *self = qemu_coroutine_self();
+
+    trace_qemu_co_mutex_unlock_entry(mutex, self);
+
+    assert(mutex->locked == true);
+    assert(qemu_in_coroutine());
+
+    mutex->locked = false;
+    qemu_co_queue_next(&mutex->queue);
+
+    trace_qemu_co_mutex_unlock_return(mutex, self);
+}
diff --git a/qemu-coroutine.h b/qemu-coroutine.h
index 08255c7..2f2fd95 100644
--- a/qemu-coroutine.h
+++ b/qemu-coroutine.h
@@ -5,6 +5,7 @@
  *
  * Authors:
  *  Stefan Hajnoczi    <stefanha@linux.vnet.ibm.com>
+ *  Kevin Wolf         <kwolf@redhat.com>
  *
  * This work is licensed under the terms of the GNU LGPL, version 2 or later.
  * See the COPYING.LIB file in the top-level directory.
@@ -15,6 +16,7 @@
 #define QEMU_COROUTINE_H
 
 #include <stdbool.h>
+#include "qemu-queue.h"
 
 /**
  * Coroutines are a mechanism for stack switching and can be used for
@@ -92,4 +94,66 @@ Coroutine *coroutine_fn qemu_coroutine_self(void);
  */
 bool qemu_in_coroutine(void);
 
+
+
+/**
+ * CoQueues are a mechanism to queue coroutines in order to continue executing
+ * them later. They provide the fundamental primitives on which coroutine locks
+ * are built.
+ */
+typedef struct CoQueue {
+    QTAILQ_HEAD(, Coroutine) entries;
+} CoQueue;
+
+/**
+ * Initialise a CoQueue. This must be called before any other operation is used
+ * on the CoQueue.
+ */
+void qemu_co_queue_init(CoQueue *queue);
+
+/**
+ * Adds the current coroutine to the CoQueue and transfers control to the
+ * caller of the coroutine.
+ */
+void coroutine_fn qemu_co_queue_wait(CoQueue *queue);
+
+/**
+ * Restarts the next coroutine in the CoQueue and removes it from the queue.
+ *
+ * Returns true if a coroutine was restarted, false if the queue is empty.
+ */
+bool qemu_co_queue_next(CoQueue *queue);
+
+/**
+ * Checks if the CoQueue is empty.
+ */
+bool qemu_co_queue_empty(CoQueue *queue);
+
+
+/**
+ * Provides a mutex that can be used to synchronise coroutines
+ */
+typedef struct CoMutex {
+    bool locked;
+    CoQueue queue;
+} CoMutex;
+
+/**
+ * Initialises a CoMutex. This must be called before any other operation is used
+ * on the CoMutex.
+ */
+void qemu_co_mutex_init(CoMutex *mutex);
+
+/**
+ * Locks the mutex. If the lock cannot be taken immediately, control is
+ * transferred to the caller of the current coroutine.
+ */
+void coroutine_fn qemu_co_mutex_lock(CoMutex *mutex);
+
+/**
+ * Unlocks the mutex and schedules the next coroutine that was waiting for this
+ * lock to be run.
+ */
+void coroutine_fn qemu_co_mutex_unlock(CoMutex *mutex);
+
 #endif /* QEMU_COROUTINE_H */
diff --git a/trace-events b/trace-events
index bc9be30..19d31e3 100644
--- a/trace-events
+++ b/trace-events
@@ -433,3 +433,11 @@ disable xen_platform_log(char *s) "xen platform: %s"
 disable qemu_coroutine_enter(void *from, void *to, void *opaque) "from %p to %p opaque %p"
 disable qemu_coroutine_yield(void *from, void *to) "from %p to %p"
 disable qemu_coroutine_terminate(void *co) "self %p"
+
+# qemu-coroutine-lock.c
+disable qemu_co_queue_next_bh(void) ""
+disable qemu_co_queue_next(void *next) "next %p"
+disable qemu_co_mutex_lock_entry(void *mutex, void *self) "mutex %p self %p"
+disable qemu_co_mutex_lock_return(void *mutex, void *self) "mutex %p self %p"
+disable qemu_co_mutex_unlock_entry(void *mutex, void *self) "mutex %p self %p"
+disable qemu_co_mutex_unlock_return(void *mutex, void *self) "mutex %p self %p"
-- 
1.7.6

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [Qemu-devel] [PATCH 05/10] qcow2: Use coroutines
  2011-07-26 11:48 [Qemu-devel] [PATCH 00/10] block: Coroutine support Kevin Wolf
                   ` (3 preceding siblings ...)
  2011-07-26 11:49 ` [Qemu-devel] [PATCH 04/10] coroutines: Locks Kevin Wolf
@ 2011-07-26 11:49 ` Kevin Wolf
  2011-07-29 13:20   ` Stefan Hajnoczi
  2011-07-26 11:49 ` [Qemu-devel] [PATCH 06/10] qcow: " Kevin Wolf
                   ` (6 subsequent siblings)
  11 siblings, 1 reply; 23+ messages in thread
From: Kevin Wolf @ 2011-07-26 11:49 UTC (permalink / raw)
  To: qemu-devel; +Cc: kwolf, stefanha

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block/qcow2-cluster.c |   26 +++---
 block/qcow2.c         |  240 ++++++++++++++++++-------------------------------
 block/qcow2.h         |    5 +-
 3 files changed, 102 insertions(+), 169 deletions(-)

diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index 882f50a..81cf77d 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -697,12 +697,12 @@ err:
  * m->depends_on is set to NULL and the other fields in m are meaningless.
  *
  * If the cluster is newly allocated, m->nb_clusters is set to the number of
- * contiguous clusters that have been allocated. This may be 0 if the request
- * conflict with another write request in flight; in this case, m->depends_on
- * is set and the remaining fields of m are meaningless.
+ * contiguous clusters that have been allocated. In this case, the other
+ * fields of m are valid and contain information about the first allocated
+ * cluster.
  *
- * If m->nb_clusters is non-zero, the other fields of m are valid and contain
- * information about the first allocated cluster.
+ * If the request conflicts with another write request in flight, the coroutine
+ * is queued and will be reentered when the dependency has completed.
  *
  * Return 0 on success and -errno in error cases
  */
@@ -721,6 +721,7 @@ int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset,
         return ret;
     }
 
+again:
     nb_clusters = size_to_clusters(s, n_end << 9);
 
     nb_clusters = MIN(nb_clusters, s->l2_size - l2_index);
@@ -792,12 +793,12 @@ int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset,
             }
 
             if (nb_clusters == 0) {
-                /* Set dependency and wait for a callback */
-                m->depends_on = old_alloc;
-                m->nb_clusters = 0;
-                *num = 0;
-
-                goto out_wait_dependency;
+                /* Wait for the dependency to complete. We need to recheck
+                 * the free/allocated clusters when we continue. */
+                qemu_co_mutex_unlock(&s->lock);
+                qemu_co_queue_wait(&old_alloc->dependent_requests);
+                qemu_co_mutex_lock(&s->lock);
+                goto again;
             }
         }
     }
@@ -834,9 +835,6 @@ out:
 
     return 0;
 
-out_wait_dependency:
-    return qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
-
 fail:
     qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
 fail_put:
diff --git a/block/qcow2.c b/block/qcow2.c
index 48e1b95..f07d550 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -276,6 +276,9 @@ static int qcow2_open(BlockDriverState *bs, int flags)
         goto fail;
     }
 
+    /* Initialise locks */
+    qemu_co_mutex_init(&s->lock);
+
 #ifdef DEBUG_ALLOC
     qcow2_check_refcounts(bs);
 #endif
@@ -379,7 +382,6 @@ typedef struct QCowAIOCB {
     uint64_t cluster_offset;
     uint8_t *cluster_data;
     bool is_write;
-    BlockDriverAIOCB *hd_aiocb;
     QEMUIOVector hd_qiov;
     QEMUBH *bh;
     QCowL2Meta l2meta;
@@ -389,8 +391,6 @@ typedef struct QCowAIOCB {
 static void qcow2_aio_cancel(BlockDriverAIOCB *blockacb)
 {
     QCowAIOCB *acb = container_of(blockacb, QCowAIOCB, common);
-    if (acb->hd_aiocb)
-        bdrv_aio_cancel(acb->hd_aiocb);
     qemu_aio_release(acb);
 }
 
@@ -399,46 +399,16 @@ static AIOPool qcow2_aio_pool = {
     .cancel             = qcow2_aio_cancel,
 };
 
-static void qcow2_aio_read_cb(void *opaque, int ret);
-static void qcow2_aio_write_cb(void *opaque, int ret);
-
-static void qcow2_aio_rw_bh(void *opaque)
-{
-    QCowAIOCB *acb = opaque;
-    qemu_bh_delete(acb->bh);
-    acb->bh = NULL;
-
-    if (acb->is_write) {
-        qcow2_aio_write_cb(opaque, 0);
-    } else {
-        qcow2_aio_read_cb(opaque, 0);
-    }
-}
-
-static int qcow2_schedule_bh(QEMUBHFunc *cb, QCowAIOCB *acb)
-{
-    if (acb->bh)
-        return -EIO;
-
-    acb->bh = qemu_bh_new(cb, acb);
-    if (!acb->bh)
-        return -EIO;
-
-    qemu_bh_schedule(acb->bh);
-
-    return 0;
-}
-
-static void qcow2_aio_read_cb(void *opaque, int ret)
+/*
+ * Returns 0 when the request is completed successfully, 1 when there is still
+ * a part left to do and -errno in error cases.
+ */
+static int qcow2_aio_read_cb(QCowAIOCB *acb)
 {
-    QCowAIOCB *acb = opaque;
     BlockDriverState *bs = acb->common.bs;
     BDRVQcowState *s = bs->opaque;
     int index_in_cluster, n1;
-
-    acb->hd_aiocb = NULL;
-    if (ret < 0)
-        goto done;
+    int ret;
 
     /* post process the read buffer */
     if (!acb->cluster_offset) {
@@ -463,8 +433,7 @@ static void qcow2_aio_read_cb(void *opaque, int ret)
 
     if (acb->remaining_sectors == 0) {
         /* request completed */
-        ret = 0;
-        goto done;
+        return 0;
     }
 
     /* prepare next AIO request */
@@ -477,7 +446,7 @@ static void qcow2_aio_read_cb(void *opaque, int ret)
     ret = qcow2_get_cluster_offset(bs, acb->sector_num << 9,
         &acb->cur_nr_sectors, &acb->cluster_offset);
     if (ret < 0) {
-        goto done;
+        return ret;
     }
 
     index_in_cluster = acb->sector_num & (s->cluster_sectors - 1);
@@ -494,42 +463,35 @@ static void qcow2_aio_read_cb(void *opaque, int ret)
                 acb->sector_num, acb->cur_nr_sectors);
             if (n1 > 0) {
                 BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO);
-                acb->hd_aiocb = bdrv_aio_readv(bs->backing_hd, acb->sector_num,
-                                    &acb->hd_qiov, n1, qcow2_aio_read_cb, acb);
-                if (acb->hd_aiocb == NULL) {
-                    ret = -EIO;
-                    goto done;
+                qemu_co_mutex_unlock(&s->lock);
+                ret = bdrv_co_readv(bs->backing_hd, acb->sector_num,
+                                    n1, &acb->hd_qiov);
+                qemu_co_mutex_lock(&s->lock);
+                if (ret < 0) {
+                    return ret;
                 }
-            } else {
-                ret = qcow2_schedule_bh(qcow2_aio_rw_bh, acb);
-                if (ret < 0)
-                    goto done;
             }
+            return 1;
         } else {
             /* Note: in this case, no need to wait */
             qemu_iovec_memset(&acb->hd_qiov, 0, 512 * acb->cur_nr_sectors);
-            ret = qcow2_schedule_bh(qcow2_aio_rw_bh, acb);
-            if (ret < 0)
-                goto done;
+            return 1;
         }
     } else if (acb->cluster_offset & QCOW_OFLAG_COMPRESSED) {
         /* add AIO support for compressed blocks ? */
         ret = qcow2_decompress_cluster(bs, acb->cluster_offset);
         if (ret < 0) {
-            goto done;
+            return ret;
         }
 
         qemu_iovec_from_buffer(&acb->hd_qiov,
             s->cluster_cache + index_in_cluster * 512,
             512 * acb->cur_nr_sectors);
 
-        ret = qcow2_schedule_bh(qcow2_aio_rw_bh, acb);
-        if (ret < 0)
-            goto done;
+        return 1;
     } else {
         if ((acb->cluster_offset & 511) != 0) {
-            ret = -EIO;
-            goto done;
+            return -EIO;
         }
 
         if (s->crypt_method) {
@@ -550,21 +512,17 @@ static void qcow2_aio_read_cb(void *opaque, int ret)
         }
 
         BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
-        acb->hd_aiocb = bdrv_aio_readv(bs->file,
+        qemu_co_mutex_unlock(&s->lock);
+        ret = bdrv_co_readv(bs->file,
                             (acb->cluster_offset >> 9) + index_in_cluster,
-                            &acb->hd_qiov, acb->cur_nr_sectors,
-                            qcow2_aio_read_cb, acb);
-        if (acb->hd_aiocb == NULL) {
-            ret = -EIO;
-            goto done;
+                            acb->cur_nr_sectors, &acb->hd_qiov);
+        qemu_co_mutex_lock(&s->lock);
+        if (ret < 0) {
+            return ret;
         }
     }
 
-    return;
-done:
-    acb->common.cb(acb->common.opaque, ret);
-    qemu_iovec_destroy(&acb->hd_qiov);
-    qemu_aio_release(acb);
+    return 1;
 }
 
 static QCowAIOCB *qcow2_aio_setup(BlockDriverState *bs, int64_t sector_num,
@@ -577,7 +535,6 @@ static QCowAIOCB *qcow2_aio_setup(BlockDriverState *bs, int64_t sector_num,
     acb = qemu_aio_get(&qcow2_aio_pool, bs, cb, opaque);
     if (!acb)
         return NULL;
-    acb->hd_aiocb = NULL;
     acb->sector_num = sector_num;
     acb->qiov = qiov;
     acb->is_write = is_write;
@@ -589,70 +546,65 @@ static QCowAIOCB *qcow2_aio_setup(BlockDriverState *bs, int64_t sector_num,
     acb->cur_nr_sectors = 0;
     acb->cluster_offset = 0;
     acb->l2meta.nb_clusters = 0;
-    QLIST_INIT(&acb->l2meta.dependent_requests);
+    qemu_co_queue_init(&acb->l2meta.dependent_requests);
     return acb;
 }
 
-static BlockDriverAIOCB *qcow2_aio_readv(BlockDriverState *bs,
-                                         int64_t sector_num,
-                                         QEMUIOVector *qiov, int nb_sectors,
-                                         BlockDriverCompletionFunc *cb,
-                                         void *opaque)
+static int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
+                          int nb_sectors, QEMUIOVector *qiov)
 {
+    BDRVQcowState *s = bs->opaque;
     QCowAIOCB *acb;
     int ret;
 
-    acb = qcow2_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
-    if (!acb)
-        return NULL;
+    acb = qcow2_aio_setup(bs, sector_num, qiov, nb_sectors, NULL, NULL, 0);
 
-    ret = qcow2_schedule_bh(qcow2_aio_rw_bh, acb);
-    if (ret < 0) {
-        qemu_iovec_destroy(&acb->hd_qiov);
-        qemu_aio_release(acb);
-        return NULL;
-    }
+    qemu_co_mutex_lock(&s->lock);
+    do {
+        ret = qcow2_aio_read_cb(acb);
+    } while (ret > 0);
+    qemu_co_mutex_unlock(&s->lock);
 
-    return &acb->common;
+    qemu_iovec_destroy(&acb->hd_qiov);
+    qemu_aio_release(acb);
+
+    return ret;
 }
 
-static void run_dependent_requests(QCowL2Meta *m)
+static void run_dependent_requests(BDRVQcowState *s, QCowL2Meta *m)
 {
-    QCowAIOCB *req;
-    QCowAIOCB *next;
-
     /* Take the request off the list of running requests */
     if (m->nb_clusters != 0) {
         QLIST_REMOVE(m, next_in_flight);
     }
 
     /* Restart all dependent requests */
-    QLIST_FOREACH_SAFE(req, &m->dependent_requests, next_depend, next) {
-        qcow2_aio_write_cb(req, 0);
+    if (!qemu_co_queue_empty(&m->dependent_requests)) {
+        qemu_co_mutex_unlock(&s->lock);
+        while(qemu_co_queue_next(&m->dependent_requests));
+        qemu_co_mutex_lock(&s->lock);
     }
-
-    /* Empty the list for the next part of the request */
-    QLIST_INIT(&m->dependent_requests);
 }
 
-static void qcow2_aio_write_cb(void *opaque, int ret)
+/*
+ * Returns 0 when the request is completed successfully, 1 when there is still
+ * a part left to do and -errno in error cases.
+ */
+static int qcow2_aio_write_cb(QCowAIOCB *acb)
 {
-    QCowAIOCB *acb = opaque;
     BlockDriverState *bs = acb->common.bs;
     BDRVQcowState *s = bs->opaque;
     int index_in_cluster;
     int n_end;
+    int ret;
 
-    acb->hd_aiocb = NULL;
-
-    if (ret >= 0) {
-        ret = qcow2_alloc_cluster_link_l2(bs, &acb->l2meta);
-    }
+    ret = qcow2_alloc_cluster_link_l2(bs, &acb->l2meta);
 
-    run_dependent_requests(&acb->l2meta);
+    run_dependent_requests(s, &acb->l2meta);
 
-    if (ret < 0)
-        goto done;
+    if (ret < 0) {
+        return ret;
+    }
 
     acb->remaining_sectors -= acb->cur_nr_sectors;
     acb->sector_num += acb->cur_nr_sectors;
@@ -660,8 +612,7 @@ static void qcow2_aio_write_cb(void *opaque, int ret)
 
     if (acb->remaining_sectors == 0) {
         /* request completed */
-        ret = 0;
-        goto done;
+        return 0;
     }
 
     index_in_cluster = acb->sector_num & (s->cluster_sectors - 1);
@@ -673,18 +624,10 @@ static void qcow2_aio_write_cb(void *opaque, int ret)
     ret = qcow2_alloc_cluster_offset(bs, acb->sector_num << 9,
         index_in_cluster, n_end, &acb->cur_nr_sectors, &acb->l2meta);
     if (ret < 0) {
-        goto done;
+        return ret;
     }
 
     acb->cluster_offset = acb->l2meta.cluster_offset;
-
-    /* Need to wait for another request? If so, we are done for now. */
-    if (acb->l2meta.nb_clusters == 0 && acb->l2meta.depends_on != NULL) {
-        QLIST_INSERT_HEAD(&acb->l2meta.depends_on->dependent_requests,
-            acb, next_depend);
-        return;
-    }
-
     assert((acb->cluster_offset & 511) == 0);
 
     qemu_iovec_reset(&acb->hd_qiov);
@@ -709,51 +652,40 @@ static void qcow2_aio_write_cb(void *opaque, int ret)
     }
 
     BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
-    acb->hd_aiocb = bdrv_aio_writev(bs->file,
-                                    (acb->cluster_offset >> 9) + index_in_cluster,
-                                    &acb->hd_qiov, acb->cur_nr_sectors,
-                                    qcow2_aio_write_cb, acb);
-    if (acb->hd_aiocb == NULL) {
-        ret = -EIO;
-        goto fail;
+    qemu_co_mutex_unlock(&s->lock);
+    ret = bdrv_co_writev(bs->file,
+                         (acb->cluster_offset >> 9) + index_in_cluster,
+                         acb->cur_nr_sectors, &acb->hd_qiov);
+    qemu_co_mutex_lock(&s->lock);
+    if (ret < 0) {
+        return ret;
     }
 
-    return;
-
-fail:
-    if (acb->l2meta.nb_clusters != 0) {
-        QLIST_REMOVE(&acb->l2meta, next_in_flight);
-    }
-done:
-    acb->common.cb(acb->common.opaque, ret);
-    qemu_iovec_destroy(&acb->hd_qiov);
-    qemu_aio_release(acb);
+    return 1;
 }
 
-static BlockDriverAIOCB *qcow2_aio_writev(BlockDriverState *bs,
-                                          int64_t sector_num,
-                                          QEMUIOVector *qiov, int nb_sectors,
-                                          BlockDriverCompletionFunc *cb,
-                                          void *opaque)
+static int qcow2_co_writev(BlockDriverState *bs,
+                           int64_t sector_num,
+                           int nb_sectors,
+                           QEMUIOVector *qiov)
 {
     BDRVQcowState *s = bs->opaque;
     QCowAIOCB *acb;
     int ret;
 
+    acb = qcow2_aio_setup(bs, sector_num, qiov, nb_sectors, NULL, NULL, 1);
     s->cluster_cache_offset = -1; /* disable compressed cache */
 
-    acb = qcow2_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
-    if (!acb)
-        return NULL;
+    qemu_co_mutex_lock(&s->lock);
+    do {
+        ret = qcow2_aio_write_cb(acb);
+    } while (ret > 0);
+    qemu_co_mutex_unlock(&s->lock);
 
-    ret = qcow2_schedule_bh(qcow2_aio_rw_bh, acb);
-    if (ret < 0) {
-        qemu_iovec_destroy(&acb->hd_qiov);
-        qemu_aio_release(acb);
-        return NULL;
-    }
+    qemu_iovec_destroy(&acb->hd_qiov);
+    qemu_aio_release(acb);
 
-    return &acb->common;
+    return ret;
 }
 
 static void qcow2_close(BlockDriverState *bs)
@@ -881,7 +813,7 @@ static int preallocate(BlockDriverState *bs)
 
     nb_sectors = bdrv_getlength(bs) >> 9;
     offset = 0;
-    QLIST_INIT(&meta.dependent_requests);
+    qemu_co_queue_init(&meta.dependent_requests);
     meta.cluster_offset = 0;
 
     while (nb_sectors) {
@@ -899,7 +831,7 @@ static int preallocate(BlockDriverState *bs)
 
         /* There are no dependent requests, but we need to remove our request
          * from the list of in-flight requests */
-        run_dependent_requests(&meta);
+        run_dependent_requests(bs->opaque, &meta);
 
         /* TODO Preallocate data if requested */
 
@@ -1387,8 +1319,8 @@ static BlockDriver bdrv_qcow2 = {
     .bdrv_set_key       = qcow2_set_key,
     .bdrv_make_empty    = qcow2_make_empty,
 
-    .bdrv_aio_readv     = qcow2_aio_readv,
-    .bdrv_aio_writev    = qcow2_aio_writev,
+    .bdrv_co_readv      = qcow2_co_readv,
+    .bdrv_co_writev     = qcow2_co_writev,
     .bdrv_aio_flush     = qcow2_aio_flush,
 
     .bdrv_discard           = qcow2_discard,
diff --git a/block/qcow2.h b/block/qcow2.h
index 6a0a21b..de23abe 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -26,6 +26,7 @@
 #define BLOCK_QCOW2_H
 
 #include "aes.h"
+#include "qemu-coroutine.h"
 
 //#define DEBUG_ALLOC
 //#define DEBUG_ALLOC2
@@ -114,6 +115,8 @@ typedef struct BDRVQcowState {
     int64_t free_cluster_index;
     int64_t free_byte_offset;
 
+    CoMutex lock;
+
     uint32_t crypt_method; /* current crypt method, 0 if no key yet */
     uint32_t crypt_method_header;
     AES_KEY aes_encrypt_key;
@@ -146,7 +149,7 @@ typedef struct QCowL2Meta
     int nb_available;
     int nb_clusters;
     struct QCowL2Meta *depends_on;
-    QLIST_HEAD(QCowAioDependencies, QCowAIOCB) dependent_requests;
+    CoQueue dependent_requests;
 
     QLIST_ENTRY(QCowL2Meta) next_in_flight;
 } QCowL2Meta;
-- 
1.7.6

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [Qemu-devel] [PATCH 06/10] qcow: Use coroutines
  2011-07-26 11:48 [Qemu-devel] [PATCH 00/10] block: Coroutine support Kevin Wolf
                   ` (4 preceding siblings ...)
  2011-07-26 11:49 ` [Qemu-devel] [PATCH 05/10] qcow2: Use coroutines Kevin Wolf
@ 2011-07-26 11:49 ` Kevin Wolf
  2011-07-26 11:49 ` [Qemu-devel] [PATCH 07/10] async: Remove AsyncContext Kevin Wolf
                   ` (5 subsequent siblings)
  11 siblings, 0 replies; 23+ messages in thread
From: Kevin Wolf @ 2011-07-26 11:49 UTC (permalink / raw)
  To: qemu-devel; +Cc: kwolf, stefanha

The old qcow format is another user of the AsyncContext infrastructure.
Converting it to coroutines (and therefore CoMutexes) allows to remove
AsyncContexts.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block/qcow.c |  180 ++++++++++++++++++++--------------------------------------
 1 files changed, 62 insertions(+), 118 deletions(-)

diff --git a/block/qcow.c b/block/qcow.c
index 227b104..6447c2a 100644
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -73,6 +73,7 @@ typedef struct BDRVQcowState {
     uint32_t crypt_method_header;
     AES_KEY aes_encrypt_key;
     AES_KEY aes_decrypt_key;
+    CoMutex lock;
 } BDRVQcowState;
 
 static int decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset);
@@ -517,11 +518,11 @@ static AIOPool qcow_aio_pool = {
 
 static QCowAIOCB *qcow_aio_setup(BlockDriverState *bs,
         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockDriverCompletionFunc *cb, void *opaque, int is_write)
+        int is_write)
 {
     QCowAIOCB *acb;
 
-    acb = qemu_aio_get(&qcow_aio_pool, bs, cb, opaque);
+    acb = qemu_aio_get(&qcow_aio_pool, bs, NULL, NULL);
     if (!acb)
         return NULL;
     acb->hd_aiocb = NULL;
@@ -542,48 +543,15 @@ static QCowAIOCB *qcow_aio_setup(BlockDriverState *bs,
     return acb;
 }
 
-static void qcow_aio_read_cb(void *opaque, int ret);
-static void qcow_aio_write_cb(void *opaque, int ret);
-
-static void qcow_aio_rw_bh(void *opaque)
-{
-    QCowAIOCB *acb = opaque;
-    qemu_bh_delete(acb->bh);
-    acb->bh = NULL;
-
-    if (acb->is_write) {
-        qcow_aio_write_cb(opaque, 0);
-    } else {
-        qcow_aio_read_cb(opaque, 0);
-    }
-}
-
-static int qcow_schedule_bh(QEMUBHFunc *cb, QCowAIOCB *acb)
-{
-    if (acb->bh) {
-        return -EIO;
-    }
-
-    acb->bh = qemu_bh_new(cb, acb);
-    if (!acb->bh) {
-        return -EIO;
-    }
-
-    qemu_bh_schedule(acb->bh);
-
-    return 0;
-}
-
-static void qcow_aio_read_cb(void *opaque, int ret)
+static int qcow_aio_read_cb(void *opaque)
 {
     QCowAIOCB *acb = opaque;
     BlockDriverState *bs = acb->common.bs;
     BDRVQcowState *s = bs->opaque;
     int index_in_cluster;
+    int ret;
 
     acb->hd_aiocb = NULL;
-    if (ret < 0)
-        goto done;
 
  redo:
     /* post process the read buffer */
@@ -605,8 +573,7 @@ static void qcow_aio_read_cb(void *opaque, int ret)
 
     if (acb->nb_sectors == 0) {
         /* request completed */
-        ret = 0;
-        goto done;
+        return 0;
     }
 
     /* prepare next AIO request */
@@ -623,11 +590,12 @@ static void qcow_aio_read_cb(void *opaque, int ret)
             acb->hd_iov.iov_base = (void *)acb->buf;
             acb->hd_iov.iov_len = acb->n * 512;
             qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1);
-            acb->hd_aiocb = bdrv_aio_readv(bs->backing_hd, acb->sector_num,
-                &acb->hd_qiov, acb->n, qcow_aio_read_cb, acb);
-            if (acb->hd_aiocb == NULL) {
-                ret = -EIO;
-                goto done;
+            qemu_co_mutex_unlock(&s->lock);
+            ret = bdrv_co_readv(bs->backing_hd, acb->sector_num,
+                                acb->n, &acb->hd_qiov);
+            qemu_co_mutex_lock(&s->lock);
+            if (ret < 0) {
+                return -EIO;
             }
         } else {
             /* Note: in this case, no need to wait */
@@ -637,64 +605,56 @@ static void qcow_aio_read_cb(void *opaque, int ret)
     } else if (acb->cluster_offset & QCOW_OFLAG_COMPRESSED) {
         /* add AIO support for compressed blocks ? */
         if (decompress_cluster(bs, acb->cluster_offset) < 0) {
-            ret = -EIO;
-            goto done;
+            return -EIO;
         }
         memcpy(acb->buf,
                s->cluster_cache + index_in_cluster * 512, 512 * acb->n);
         goto redo;
     } else {
         if ((acb->cluster_offset & 511) != 0) {
-            ret = -EIO;
-            goto done;
+            return -EIO;
         }
         acb->hd_iov.iov_base = (void *)acb->buf;
         acb->hd_iov.iov_len = acb->n * 512;
         qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1);
-        acb->hd_aiocb = bdrv_aio_readv(bs->file,
+        qemu_co_mutex_unlock(&s->lock);
+        ret = bdrv_co_readv(bs->file,
                             (acb->cluster_offset >> 9) + index_in_cluster,
-                            &acb->hd_qiov, acb->n, qcow_aio_read_cb, acb);
-        if (acb->hd_aiocb == NULL) {
-            ret = -EIO;
-            goto done;
+                            acb->n, &acb->hd_qiov);
+        qemu_co_mutex_lock(&s->lock);
+        if (ret < 0) {
+            return ret;
         }
     }
 
-    return;
-
-done:
-    if (acb->qiov->niov > 1) {
-        qemu_iovec_from_buffer(acb->qiov, acb->orig_buf, acb->qiov->size);
-        qemu_vfree(acb->orig_buf);
-    }
-    acb->common.cb(acb->common.opaque, ret);
-    qemu_aio_release(acb);
+    return 1;
 }
 
-static BlockDriverAIOCB *qcow_aio_readv(BlockDriverState *bs,
-        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockDriverCompletionFunc *cb, void *opaque)
+static int qcow_co_readv(BlockDriverState *bs, int64_t sector_num,
+                         int nb_sectors, QEMUIOVector *qiov)
 {
+    BDRVQcowState *s = bs->opaque;
     QCowAIOCB *acb;
     int ret;
 
-    acb = qcow_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
-    if (!acb)
-        return NULL;
+    acb = qcow_aio_setup(bs, sector_num, qiov, nb_sectors, 0);
 
-    ret = qcow_schedule_bh(qcow_aio_rw_bh, acb);
-    if (ret < 0) {
-        if (acb->qiov->niov > 1) {
-            qemu_vfree(acb->orig_buf);
-        }
-        qemu_aio_release(acb);
-        return NULL;
+    qemu_co_mutex_lock(&s->lock);
+    do {
+        ret = qcow_aio_read_cb(acb);
+    } while (ret > 0);
+    qemu_co_mutex_unlock(&s->lock);
+
+    if (acb->qiov->niov > 1) {
+        qemu_iovec_from_buffer(acb->qiov, acb->orig_buf, acb->qiov->size);
+        qemu_vfree(acb->orig_buf);
     }
+    qemu_aio_release(acb);
 
-    return &acb->common;
+    return ret;
 }
 
-static void qcow_aio_write_cb(void *opaque, int ret)
+static int qcow_aio_write_cb(void *opaque)
 {
     QCowAIOCB *acb = opaque;
     BlockDriverState *bs = acb->common.bs;
@@ -702,20 +662,17 @@ static void qcow_aio_write_cb(void *opaque, int ret)
     int index_in_cluster;
     uint64_t cluster_offset;
     const uint8_t *src_buf;
+    int ret;
 
     acb->hd_aiocb = NULL;
 
-    if (ret < 0)
-        goto done;
-
     acb->nb_sectors -= acb->n;
     acb->sector_num += acb->n;
     acb->buf += acb->n * 512;
 
     if (acb->nb_sectors == 0) {
         /* request completed */
-        ret = 0;
-        goto done;
+        return 0;
     }
 
     index_in_cluster = acb->sector_num & (s->cluster_sectors - 1);
@@ -726,16 +683,11 @@ static void qcow_aio_write_cb(void *opaque, int ret)
                                         index_in_cluster,
                                         index_in_cluster + acb->n);
     if (!cluster_offset || (cluster_offset & 511) != 0) {
-        ret = -EIO;
-        goto done;
+        return -EIO;
     }
     if (s->crypt_method) {
         if (!acb->cluster_data) {
             acb->cluster_data = qemu_mallocz(s->cluster_size);
-            if (!acb->cluster_data) {
-                ret = -ENOMEM;
-                goto done;
-            }
         }
         encrypt_sectors(s, acb->sector_num, acb->cluster_data, acb->buf,
                         acb->n, 1, &s->aes_encrypt_key);
@@ -747,26 +699,19 @@ static void qcow_aio_write_cb(void *opaque, int ret)
     acb->hd_iov.iov_base = (void *)src_buf;
     acb->hd_iov.iov_len = acb->n * 512;
     qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1);
-    acb->hd_aiocb = bdrv_aio_writev(bs->file,
-                                    (cluster_offset >> 9) + index_in_cluster,
-                                    &acb->hd_qiov, acb->n,
-                                    qcow_aio_write_cb, acb);
-    if (acb->hd_aiocb == NULL) {
-        ret = -EIO;
-        goto done;
+    qemu_co_mutex_unlock(&s->lock);
+    ret = bdrv_co_writev(bs->file,
+                         (cluster_offset >> 9) + index_in_cluster,
+                         acb->n, &acb->hd_qiov);
+    qemu_co_mutex_lock(&s->lock);
+    if (ret < 0) {
+        return ret;
     }
-    return;
-
-done:
-    if (acb->qiov->niov > 1)
-        qemu_vfree(acb->orig_buf);
-    acb->common.cb(acb->common.opaque, ret);
-    qemu_aio_release(acb);
+    return 1;
 }
 
-static BlockDriverAIOCB *qcow_aio_writev(BlockDriverState *bs,
-        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockDriverCompletionFunc *cb, void *opaque)
+static int qcow_co_writev(BlockDriverState *bs, int64_t sector_num,
+                          int nb_sectors, QEMUIOVector *qiov)
 {
     BDRVQcowState *s = bs->opaque;
     QCowAIOCB *acb;
@@ -774,21 +719,20 @@ static BlockDriverAIOCB *qcow_aio_writev(BlockDriverState *bs,
 
     s->cluster_cache_offset = -1; /* disable compressed cache */
 
-    acb = qcow_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
-    if (!acb)
-        return NULL;
+    acb = qcow_aio_setup(bs, sector_num, qiov, nb_sectors, 1);
 
+    qemu_co_mutex_lock(&s->lock);
+    do {
+        ret = qcow_aio_write_cb(acb);
+    } while (ret > 0);
+    qemu_co_mutex_unlock(&s->lock);
 
-    ret = qcow_schedule_bh(qcow_aio_rw_bh, acb);
-    if (ret < 0) {
-        if (acb->qiov->niov > 1) {
-            qemu_vfree(acb->orig_buf);
-        }
-        qemu_aio_release(acb);
-        return NULL;
+    if (acb->qiov->niov > 1) {
+        qemu_vfree(acb->orig_buf);
     }
+    qemu_aio_release(acb);
 
-    return &acb->common;
+    return ret;
 }
 
 static void qcow_close(BlockDriverState *bs)
@@ -1020,8 +964,8 @@ static BlockDriver bdrv_qcow = {
     .bdrv_is_allocated	= qcow_is_allocated,
     .bdrv_set_key	= qcow_set_key,
     .bdrv_make_empty	= qcow_make_empty,
-    .bdrv_aio_readv	= qcow_aio_readv,
-    .bdrv_aio_writev	= qcow_aio_writev,
+    .bdrv_co_readv  = qcow_co_readv,
+    .bdrv_co_writev = qcow_co_writev,
     .bdrv_aio_flush	= qcow_aio_flush,
     .bdrv_write_compressed = qcow_write_compressed,
     .bdrv_get_info	= qcow_get_info,
-- 
1.7.6

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [Qemu-devel] [PATCH 07/10] async: Remove AsyncContext
  2011-07-26 11:48 [Qemu-devel] [PATCH 00/10] block: Coroutine support Kevin Wolf
                   ` (5 preceding siblings ...)
  2011-07-26 11:49 ` [Qemu-devel] [PATCH 06/10] qcow: " Kevin Wolf
@ 2011-07-26 11:49 ` Kevin Wolf
  2011-07-26 11:49 ` [Qemu-devel] [PATCH 08/10] coroutines: Use one global bottom half for CoQueue Kevin Wolf
                   ` (4 subsequent siblings)
  11 siblings, 0 replies; 23+ messages in thread
From: Kevin Wolf @ 2011-07-26 11:49 UTC (permalink / raw)
  To: qemu-devel; +Cc: kwolf, stefanha

The purpose of AsyncContexts was to protect qcow and qcow2 against reentrancy
during an emulated bdrv_read/write (which includes a qemu_aio_wait() call and
can run AIO callbacks of different requests if it weren't for AsyncContexts).

Now both qcow and qcow2 are protected by CoMutexes and AsyncContexts can be
removed.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 async.c            |   98 ++++------------------------------------------------
 block.c            |    6 ---
 block/qed-table.c  |   14 -------
 block/qed.c        |    4 --
 linux-aio.c        |   43 ++--------------------
 posix-aio-compat.c |   11 ------
 qemu-common.h      |    4 --
 7 files changed, 11 insertions(+), 169 deletions(-)

diff --git a/async.c b/async.c
index fd313df..3fe70b9 100644
--- a/async.c
+++ b/async.c
@@ -25,92 +25,8 @@
 #include "qemu-common.h"
 #include "qemu-aio.h"
 
-/*
- * An AsyncContext protects the callbacks of AIO requests and Bottom Halves
- * against interfering with each other. A typical example is qcow2 that accepts
- * asynchronous requests, but relies for manipulation of its metadata on
- * synchronous bdrv_read/write that doesn't trigger any callbacks.
- *
- * However, these functions are often emulated using AIO which means that AIO
- * callbacks must be run - but at the same time we must not run callbacks of
- * other requests as they might start to modify metadata and corrupt the
- * internal state of the caller of bdrv_read/write.
- *
- * To achieve the desired semantics we switch into a new AsyncContext.
- * Callbacks must only be run if they belong to the current AsyncContext.
- * Otherwise they need to be queued until their own context is active again.
- * This is how you can make qemu_aio_wait() wait only for your own callbacks.
- *
- * The AsyncContexts form a stack. When you leave a AsyncContexts, you always
- * return to the old ("parent") context.
- */
-struct AsyncContext {
-    /* Consecutive number of the AsyncContext (position in the stack) */
-    int id;
-
-    /* Anchor of the list of Bottom Halves belonging to the context */
-    struct QEMUBH *first_bh;
-
-    /* Link to parent context */
-    struct AsyncContext *parent;
-};
-
-/* The currently active AsyncContext */
-static struct AsyncContext *async_context = &(struct AsyncContext) { 0 };
-
-/*
- * Enter a new AsyncContext. Already scheduled Bottom Halves and AIO callbacks
- * won't be called until this context is left again.
- */
-void async_context_push(void)
-{
-    struct AsyncContext *new = qemu_mallocz(sizeof(*new));
-    new->parent = async_context;
-    new->id = async_context->id + 1;
-    async_context = new;
-}
-
-/* Run queued AIO completions and destroy Bottom Half */
-static void bh_run_aio_completions(void *opaque)
-{
-    QEMUBH **bh = opaque;
-    qemu_bh_delete(*bh);
-    qemu_free(bh);
-    qemu_aio_process_queue();
-}
-/*
- * Leave the currently active AsyncContext. All Bottom Halves belonging to the
- * old context are executed before changing the context.
- */
-void async_context_pop(void)
-{
-    struct AsyncContext *old = async_context;
-    QEMUBH **bh;
-
-    /* Flush the bottom halves, we don't want to lose them */
-    while (qemu_bh_poll());
-
-    /* Switch back to the parent context */
-    async_context = async_context->parent;
-    qemu_free(old);
-
-    if (async_context == NULL) {
-        abort();
-    }
-
-    /* Schedule BH to run any queued AIO completions as soon as possible */
-    bh = qemu_malloc(sizeof(*bh));
-    *bh = qemu_bh_new(bh_run_aio_completions, bh);
-    qemu_bh_schedule(*bh);
-}
-
-/*
- * Returns the ID of the currently active AsyncContext
- */
-int get_async_context_id(void)
-{
-    return async_context->id;
-}
+/* Anchor of the list of Bottom Halves belonging to the context */
+static struct QEMUBH *first_bh;
 
 /***********************************************************/
 /* bottom halves (can be seen as timers which expire ASAP) */
@@ -130,8 +46,8 @@ QEMUBH *qemu_bh_new(QEMUBHFunc *cb, void *opaque)
     bh = qemu_mallocz(sizeof(QEMUBH));
     bh->cb = cb;
     bh->opaque = opaque;
-    bh->next = async_context->first_bh;
-    async_context->first_bh = bh;
+    bh->next = first_bh;
+    first_bh = bh;
     return bh;
 }
 
@@ -141,7 +57,7 @@ int qemu_bh_poll(void)
     int ret;
 
     ret = 0;
-    for (bh = async_context->first_bh; bh; bh = next) {
+    for (bh = first_bh; bh; bh = next) {
         next = bh->next;
         if (!bh->deleted && bh->scheduled) {
             bh->scheduled = 0;
@@ -153,7 +69,7 @@ int qemu_bh_poll(void)
     }
 
     /* remove deleted bhs */
-    bhp = &async_context->first_bh;
+    bhp = &first_bh;
     while (*bhp) {
         bh = *bhp;
         if (bh->deleted) {
@@ -199,7 +115,7 @@ void qemu_bh_update_timeout(int *timeout)
 {
     QEMUBH *bh;
 
-    for (bh = async_context->first_bh; bh; bh = bh->next) {
+    for (bh = first_bh; bh; bh = bh->next) {
         if (!bh->deleted && bh->scheduled) {
             if (bh->idle) {
                 /* idle bottom halves will be polled at least
diff --git a/block.c b/block.c
index 2f589ac..39819e9 100644
--- a/block.c
+++ b/block.c
@@ -2789,8 +2789,6 @@ static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num,
     struct iovec iov;
     QEMUIOVector qiov;
 
-    async_context_push();
-
     async_ret = NOT_DONE;
     iov.iov_base = (void *)buf;
     iov.iov_len = nb_sectors * BDRV_SECTOR_SIZE;
@@ -2808,7 +2806,6 @@ static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num,
 
 
 fail:
-    async_context_pop();
     return async_ret;
 }
 
@@ -2820,8 +2817,6 @@ static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num,
     struct iovec iov;
     QEMUIOVector qiov;
 
-    async_context_push();
-
     async_ret = NOT_DONE;
     iov.iov_base = (void *)buf;
     iov.iov_len = nb_sectors * BDRV_SECTOR_SIZE;
@@ -2837,7 +2832,6 @@ static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num,
     }
 
 fail:
-    async_context_pop();
     return async_ret;
 }
 
diff --git a/block/qed-table.c b/block/qed-table.c
index d38c673..d96afa8 100644
--- a/block/qed-table.c
+++ b/block/qed-table.c
@@ -179,16 +179,12 @@ int qed_read_l1_table_sync(BDRVQEDState *s)
 {
     int ret = -EINPROGRESS;
 
-    async_context_push();
-
     qed_read_table(s, s->header.l1_table_offset,
                    s->l1_table, qed_sync_cb, &ret);
     while (ret == -EINPROGRESS) {
         qemu_aio_wait();
     }
 
-    async_context_pop();
-
     return ret;
 }
 
@@ -205,15 +201,11 @@ int qed_write_l1_table_sync(BDRVQEDState *s, unsigned int index,
 {
     int ret = -EINPROGRESS;
 
-    async_context_push();
-
     qed_write_l1_table(s, index, n, qed_sync_cb, &ret);
     while (ret == -EINPROGRESS) {
         qemu_aio_wait();
     }
 
-    async_context_pop();
-
     return ret;
 }
 
@@ -282,14 +274,11 @@ int qed_read_l2_table_sync(BDRVQEDState *s, QEDRequest *request, uint64_t offset
 {
     int ret = -EINPROGRESS;
 
-    async_context_push();
-
     qed_read_l2_table(s, request, offset, qed_sync_cb, &ret);
     while (ret == -EINPROGRESS) {
         qemu_aio_wait();
     }
 
-    async_context_pop();
     return ret;
 }
 
@@ -307,13 +296,10 @@ int qed_write_l2_table_sync(BDRVQEDState *s, QEDRequest *request,
 {
     int ret = -EINPROGRESS;
 
-    async_context_push();
-
     qed_write_l2_table(s, request, index, n, flush, qed_sync_cb, &ret);
     while (ret == -EINPROGRESS) {
         qemu_aio_wait();
     }
 
-    async_context_pop();
     return ret;
 }
diff --git a/block/qed.c b/block/qed.c
index 3970379..333f067 100644
--- a/block/qed.c
+++ b/block/qed.c
@@ -680,16 +680,12 @@ static int bdrv_qed_is_allocated(BlockDriverState *bs, int64_t sector_num,
     };
     QEDRequest request = { .l2_table = NULL };
 
-    async_context_push();
-
     qed_find_cluster(s, &request, pos, len, qed_is_allocated_cb, &cb);
 
     while (cb.is_allocated == -1) {
         qemu_aio_wait();
     }
 
-    async_context_pop();
-
     qed_unref_l2_cache_entry(request.l2_table);
 
     return cb.is_allocated;
diff --git a/linux-aio.c b/linux-aio.c
index 68f4b3d..dc3faf2 100644
--- a/linux-aio.c
+++ b/linux-aio.c
@@ -31,7 +31,6 @@ struct qemu_laiocb {
     struct iocb iocb;
     ssize_t ret;
     size_t nbytes;
-    int async_context_id;
     QLIST_ENTRY(qemu_laiocb) node;
 };
 
@@ -39,7 +38,6 @@ struct qemu_laio_state {
     io_context_t ctx;
     int efd;
     int count;
-    QLIST_HEAD(, qemu_laiocb) completed_reqs;
 };
 
 static inline ssize_t io_event_ret(struct io_event *ev)
@@ -49,7 +47,6 @@ static inline ssize_t io_event_ret(struct io_event *ev)
 
 /*
  * Completes an AIO request (calls the callback and frees the ACB).
- * Be sure to be in the right AsyncContext before calling this function.
  */
 static void qemu_laio_process_completion(struct qemu_laio_state *s,
     struct qemu_laiocb *laiocb)
@@ -72,42 +69,12 @@ static void qemu_laio_process_completion(struct qemu_laio_state *s,
 }
 
 /*
- * Processes all queued AIO requests, i.e. requests that have return from OS
- * but their callback was not called yet. Requests that cannot have their
- * callback called in the current AsyncContext, remain in the queue.
- *
- * Returns 1 if at least one request could be completed, 0 otherwise.
+ * All requests are directly processed when they complete, so there's nothing
+ * left to do during qemu_aio_wait().
  */
 static int qemu_laio_process_requests(void *opaque)
 {
-    struct qemu_laio_state *s = opaque;
-    struct qemu_laiocb *laiocb, *next;
-    int res = 0;
-
-    QLIST_FOREACH_SAFE (laiocb, &s->completed_reqs, node, next) {
-        if (laiocb->async_context_id == get_async_context_id()) {
-            qemu_laio_process_completion(s, laiocb);
-            QLIST_REMOVE(laiocb, node);
-            res = 1;
-        }
-    }
-
-    return res;
-}
-
-/*
- * Puts a request in the completion queue so that its callback is called the
- * next time when it's possible. If we already are in the right AsyncContext,
- * the request is completed immediately instead.
- */
-static void qemu_laio_enqueue_completed(struct qemu_laio_state *s,
-    struct qemu_laiocb* laiocb)
-{
-    if (laiocb->async_context_id == get_async_context_id()) {
-        qemu_laio_process_completion(s, laiocb);
-    } else {
-        QLIST_INSERT_HEAD(&s->completed_reqs, laiocb, node);
-    }
+    return 0;
 }
 
 static void qemu_laio_completion_cb(void *opaque)
@@ -141,7 +108,7 @@ static void qemu_laio_completion_cb(void *opaque)
                     container_of(iocb, struct qemu_laiocb, iocb);
 
             laiocb->ret = io_event_ret(&events[i]);
-            qemu_laio_enqueue_completed(s, laiocb);
+            qemu_laio_process_completion(s, laiocb);
         }
     }
 }
@@ -204,7 +171,6 @@ BlockDriverAIOCB *laio_submit(BlockDriverState *bs, void *aio_ctx, int fd,
     laiocb->nbytes = nb_sectors * 512;
     laiocb->ctx = s;
     laiocb->ret = -EINPROGRESS;
-    laiocb->async_context_id = get_async_context_id();
 
     iocbs = &laiocb->iocb;
 
@@ -239,7 +205,6 @@ void *laio_init(void)
     struct qemu_laio_state *s;
 
     s = qemu_mallocz(sizeof(*s));
-    QLIST_INIT(&s->completed_reqs);
     s->efd = eventfd(0, 0);
     if (s->efd == -1)
         goto out_free_state;
diff --git a/posix-aio-compat.c b/posix-aio-compat.c
index c4116e3..788d113 100644
--- a/posix-aio-compat.c
+++ b/posix-aio-compat.c
@@ -49,8 +49,6 @@ struct qemu_paiocb {
     ssize_t ret;
     int active;
     struct qemu_paiocb *next;
-
-    int async_context_id;
 };
 
 typedef struct PosixAioState {
@@ -420,7 +418,6 @@ static int posix_aio_process_queue(void *opaque)
     struct qemu_paiocb *acb, **pacb;
     int ret;
     int result = 0;
-    int async_context_id = get_async_context_id();
 
     for(;;) {
         pacb = &s->first_aio;
@@ -429,12 +426,6 @@ static int posix_aio_process_queue(void *opaque)
             if (!acb)
                 return result;
 
-            /* we're only interested in requests in the right context */
-            if (acb->async_context_id != async_context_id) {
-                pacb = &acb->next;
-                continue;
-            }
-
             ret = qemu_paio_error(acb);
             if (ret == ECANCELED) {
                 /* remove the request */
@@ -575,7 +566,6 @@ BlockDriverAIOCB *paio_submit(BlockDriverState *bs, int fd,
     acb->aio_type = type;
     acb->aio_fildes = fd;
     acb->ev_signo = SIGUSR2;
-    acb->async_context_id = get_async_context_id();
 
     if (qiov) {
         acb->aio_iov = qiov->iov;
@@ -604,7 +594,6 @@ BlockDriverAIOCB *paio_ioctl(BlockDriverState *bs, int fd,
     acb->aio_type = QEMU_AIO_IOCTL;
     acb->aio_fildes = fd;
     acb->ev_signo = SIGUSR2;
-    acb->async_context_id = get_async_context_id();
     acb->aio_offset = 0;
     acb->aio_ioctl_buf = buf;
     acb->aio_ioctl_cmd = req;
diff --git a/qemu-common.h b/qemu-common.h
index ba55719..97a80e4 100644
--- a/qemu-common.h
+++ b/qemu-common.h
@@ -111,10 +111,6 @@ int qemu_main(int argc, char **argv, char **envp);
 /* bottom halves */
 typedef void QEMUBHFunc(void *opaque);
 
-void async_context_push(void);
-void async_context_pop(void);
-int get_async_context_id(void);
-
 QEMUBH *qemu_bh_new(QEMUBHFunc *cb, void *opaque);
 void qemu_bh_schedule(QEMUBH *bh);
 /* Bottom halfs that are scheduled from a bottom half handler are instantly
-- 
1.7.6

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [Qemu-devel] [PATCH 08/10] coroutines: Use one global bottom half for CoQueue
  2011-07-26 11:48 [Qemu-devel] [PATCH 00/10] block: Coroutine support Kevin Wolf
                   ` (6 preceding siblings ...)
  2011-07-26 11:49 ` [Qemu-devel] [PATCH 07/10] async: Remove AsyncContext Kevin Wolf
@ 2011-07-26 11:49 ` Kevin Wolf
  2011-07-26 11:49 ` [Qemu-devel] [PATCH 09/10] posix-aio-compat: Allow read after EOF Kevin Wolf
                   ` (3 subsequent siblings)
  11 siblings, 0 replies; 23+ messages in thread
From: Kevin Wolf @ 2011-07-26 11:49 UTC (permalink / raw)
  To: qemu-devel; +Cc: kwolf, stefanha

Now that AsyncContexts don't exist any more, we can use one global bottom half
for restarting coroutines instead of allocating a new one every time (before
removing AsyncContexts, the problem with having a global BH was that it had to
belong to a single AsyncContexts and wouldn't be executed in a different one -
which leads to deadlocks)

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 qemu-coroutine-lock.c |   19 ++++++-------------
 1 files changed, 6 insertions(+), 13 deletions(-)

diff --git a/qemu-coroutine-lock.c b/qemu-coroutine-lock.c
index abaa1f7..a80f437 100644
--- a/qemu-coroutine-lock.c
+++ b/qemu-coroutine-lock.c
@@ -30,14 +30,10 @@
 
 static QTAILQ_HEAD(, Coroutine) unlock_bh_queue =
     QTAILQ_HEAD_INITIALIZER(unlock_bh_queue);
-
-struct unlock_bh {
-    QEMUBH *bh;
-};
+static QEMUBH* unlock_bh;
 
 static void qemu_co_queue_next_bh(void *opaque)
 {
-    struct unlock_bh *unlock_bh = opaque;
     Coroutine *next;
 
     trace_qemu_co_queue_next_bh();
@@ -45,14 +41,15 @@ static void qemu_co_queue_next_bh(void *opaque)
         QTAILQ_REMOVE(&unlock_bh_queue, next, co_queue_next);
         qemu_coroutine_enter(next, NULL);
     }
-
-    qemu_bh_delete(unlock_bh->bh);
-    qemu_free(unlock_bh);
 }
 
 void qemu_co_queue_init(CoQueue *queue)
 {
     QTAILQ_INIT(&queue->entries);
+
+    if (!unlock_bh) {
+        unlock_bh = qemu_bh_new(qemu_co_queue_next_bh, NULL);
+    }
 }
 
 void coroutine_fn qemu_co_queue_wait(CoQueue *queue)
@@ -65,7 +62,6 @@ void coroutine_fn qemu_co_queue_wait(CoQueue *queue)
 
 bool qemu_co_queue_next(CoQueue *queue)
 {
-    struct unlock_bh *unlock_bh;
     Coroutine *next;
 
     next = QTAILQ_FIRST(&queue->entries);
@@ -73,10 +69,7 @@ bool qemu_co_queue_next(CoQueue *queue)
         QTAILQ_REMOVE(&queue->entries, next, co_queue_next);
         QTAILQ_INSERT_TAIL(&unlock_bh_queue, next, co_queue_next);
         trace_qemu_co_queue_next(next);
-
-        unlock_bh = qemu_malloc(sizeof(*unlock_bh));
-        unlock_bh->bh = qemu_bh_new(qemu_co_queue_next_bh, unlock_bh);
-        qemu_bh_schedule(unlock_bh->bh);
+        qemu_bh_schedule(unlock_bh);
     }
 
     return (next != NULL);
-- 
1.7.6

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [Qemu-devel] [PATCH 09/10] posix-aio-compat: Allow read after EOF
  2011-07-26 11:48 [Qemu-devel] [PATCH 00/10] block: Coroutine support Kevin Wolf
                   ` (7 preceding siblings ...)
  2011-07-26 11:49 ` [Qemu-devel] [PATCH 08/10] coroutines: Use one global bottom half for CoQueue Kevin Wolf
@ 2011-07-26 11:49 ` Kevin Wolf
  2011-07-26 13:55   ` Frediano Ziglio
  2011-07-26 11:49 ` [Qemu-devel] [PATCH 10/10] block: Use bdrv_co_* instead of synchronous versions in coroutines Kevin Wolf
                   ` (2 subsequent siblings)
  11 siblings, 1 reply; 23+ messages in thread
From: Kevin Wolf @ 2011-07-26 11:49 UTC (permalink / raw)
  To: qemu-devel; +Cc: kwolf, stefanha

In order to be able to transparently replace bdrv_read calls by bdrv_co_read,
reading beyond EOF must produce zeros instead of short reads for AIO, too.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 posix-aio-compat.c |   19 +++++++++++++++++++
 1 files changed, 19 insertions(+), 0 deletions(-)

diff --git a/posix-aio-compat.c b/posix-aio-compat.c
index 788d113..8dc00cb 100644
--- a/posix-aio-compat.c
+++ b/posix-aio-compat.c
@@ -198,6 +198,12 @@ static ssize_t handle_aiocb_rw_vector(struct qemu_paiocb *aiocb)
     return len;
 }
 
+/*
+ * Read/writes the data to/from a given linear buffer.
+ *
+ * Returns the number of bytes handles or -errno in case of an error. Short
+ * reads are only returned if the end of the file is reached.
+ */
 static ssize_t handle_aiocb_rw_linear(struct qemu_paiocb *aiocb, char *buf)
 {
     ssize_t offset = 0;
@@ -334,6 +340,19 @@ static void *aio_thread(void *unused)
 
         switch (aiocb->aio_type & QEMU_AIO_TYPE_MASK) {
         case QEMU_AIO_READ:
+            ret = handle_aiocb_rw(aiocb);
+            if (ret >= 0 && ret < aiocb->aio_nbytes && aiocb->common.bs->growable) {
+                /* A short read means that we have reached EOF. Pad the buffer
+                 * with zeros for bytes after EOF. */
+                QEMUIOVector qiov;
+
+                qemu_iovec_init_external(&qiov, aiocb->aio_iov,
+                                         aiocb->aio_niov);
+                qemu_iovec_memset_skip(&qiov, 0, aiocb->aio_nbytes - ret, ret);
+
+                ret = aiocb->aio_nbytes;
+            }
+            break;
         case QEMU_AIO_WRITE:
             ret = handle_aiocb_rw(aiocb);
             break;
-- 
1.7.6

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [Qemu-devel] [PATCH 10/10] block: Use bdrv_co_* instead of synchronous versions in coroutines
  2011-07-26 11:48 [Qemu-devel] [PATCH 00/10] block: Coroutine support Kevin Wolf
                   ` (8 preceding siblings ...)
  2011-07-26 11:49 ` [Qemu-devel] [PATCH 09/10] posix-aio-compat: Allow read after EOF Kevin Wolf
@ 2011-07-26 11:49 ` Kevin Wolf
  2011-08-02 13:56   ` [Qemu-devel] [PATCH v2 " Kevin Wolf
  2011-08-01  9:59 ` [Qemu-devel] [PATCH 00/10] block: Coroutine support Stefan Hajnoczi
  2011-08-02 14:23 ` Avi Kivity
  11 siblings, 1 reply; 23+ messages in thread
From: Kevin Wolf @ 2011-07-26 11:49 UTC (permalink / raw)
  To: qemu-devel; +Cc: kwolf, stefanha

If we're already in a coroutine, there is no reason to use the synchronous
version of block layer functions when a coroutine one exists. This makes
bdrv_read/write/flush use bdrv_co_* when used inside a coroutine.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block.c |   43 +++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 43 insertions(+), 0 deletions(-)

diff --git a/block.c b/block.c
index 39819e9..e2075d4 100644
--- a/block.c
+++ b/block.c
@@ -70,6 +70,7 @@ static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
 static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
                                          int64_t sector_num, int nb_sectors,
                                          QEMUIOVector *iov);
+static int coroutine_fn bdrv_co_flush_em(BlockDriverState *bs);
 
 static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
     QTAILQ_HEAD_INITIALIZER(bdrv_states);
@@ -950,6 +951,17 @@ int bdrv_read(BlockDriverState *bs, int64_t sector_num,
 {
     BlockDriver *drv = bs->drv;
 
+    if (qemu_in_coroutine()) {
+        QEMUIOVector qiov;
+        struct iovec iov = {
+            .iov_base = (void *)buf,
+            .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
+        };
+
+        qemu_iovec_init_external(&qiov, &iov, 1);
+        return bdrv_co_readv(bs, sector_num, nb_sectors, &qiov);
+    }
+
     if (!drv)
         return -ENOMEDIUM;
     if (bdrv_check_request(bs, sector_num, nb_sectors))
@@ -996,6 +1008,18 @@ int bdrv_write(BlockDriverState *bs, int64_t sector_num,
                const uint8_t *buf, int nb_sectors)
 {
     BlockDriver *drv = bs->drv;
+
+    if (qemu_in_coroutine()) {
+        QEMUIOVector qiov;
+        struct iovec iov = {
+            .iov_base = (void *)buf,
+            .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
+        };
+
+        qemu_iovec_init_external(&qiov, &iov, 1);
+        return bdrv_co_writev(bs, sector_num, nb_sectors, &qiov);
+    }
+
     if (!bs->drv)
         return -ENOMEDIUM;
     if (bs->read_only)
@@ -1661,6 +1685,10 @@ int bdrv_flush(BlockDriverState *bs)
         return 0;
     }
 
+    if (qemu_in_coroutine()) {
+        return bdrv_co_flush_em(bs);
+    }
+
     if (bs->drv && bs->drv->bdrv_flush) {
         return bs->drv->bdrv_flush(bs);
     }
@@ -2929,6 +2957,21 @@ static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
     return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
 }
 
+static int coroutine_fn bdrv_co_flush_em(BlockDriverState *bs)
+{
+    CoroutineIOCompletion co = {
+        .coroutine = qemu_coroutine_self(),
+    };
+    BlockDriverAIOCB *acb;
+
+    acb = bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
+    if (!acb) {
+        return -EIO;
+    }
+    qemu_coroutine_yield();
+    return co.ret;
+}
+
 /**************************************************************/
 /* removable device support */
 
-- 
1.7.6

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* Re: [Qemu-devel] [PATCH 09/10] posix-aio-compat: Allow read after EOF
  2011-07-26 11:49 ` [Qemu-devel] [PATCH 09/10] posix-aio-compat: Allow read after EOF Kevin Wolf
@ 2011-07-26 13:55   ` Frediano Ziglio
  2011-07-26 14:22     ` Kevin Wolf
  0 siblings, 1 reply; 23+ messages in thread
From: Frediano Ziglio @ 2011-07-26 13:55 UTC (permalink / raw)
  To: Kevin Wolf; +Cc: stefanha, qemu-devel

2011/7/26 Kevin Wolf <kwolf@redhat.com>:
> In order to be able to transparently replace bdrv_read calls by bdrv_co_read,
> reading beyond EOF must produce zeros instead of short reads for AIO, too.
>
> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
> ---
>  posix-aio-compat.c |   19 +++++++++++++++++++
>  1 files changed, 19 insertions(+), 0 deletions(-)
>
> diff --git a/posix-aio-compat.c b/posix-aio-compat.c
> index 788d113..8dc00cb 100644
> --- a/posix-aio-compat.c
> +++ b/posix-aio-compat.c
> @@ -198,6 +198,12 @@ static ssize_t handle_aiocb_rw_vector(struct qemu_paiocb *aiocb)
>     return len;
>  }
>
> +/*
> + * Read/writes the data to/from a given linear buffer.
> + *
> + * Returns the number of bytes handles or -errno in case of an error. Short
> + * reads are only returned if the end of the file is reached.
> + */
>  static ssize_t handle_aiocb_rw_linear(struct qemu_paiocb *aiocb, char *buf)
>  {
>     ssize_t offset = 0;
> @@ -334,6 +340,19 @@ static void *aio_thread(void *unused)
>
>         switch (aiocb->aio_type & QEMU_AIO_TYPE_MASK) {
>         case QEMU_AIO_READ:
> +            ret = handle_aiocb_rw(aiocb);
> +            if (ret >= 0 && ret < aiocb->aio_nbytes && aiocb->common.bs->growable) {
> +                /* A short read means that we have reached EOF. Pad the buffer
> +                 * with zeros for bytes after EOF. */
> +                QEMUIOVector qiov;
> +
> +                qemu_iovec_init_external(&qiov, aiocb->aio_iov,
> +                                         aiocb->aio_niov);
> +                qemu_iovec_memset_skip(&qiov, 0, aiocb->aio_nbytes - ret, ret);
> +
> +                ret = aiocb->aio_nbytes;
> +            }
> +            break;
>         case QEMU_AIO_WRITE:
>             ret = handle_aiocb_rw(aiocb);
>             break;
> --
> 1.7.6
>

Still not tested but I think to know what does it solve :)

I think Linux AIO require same attention.

Frediano

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [Qemu-devel] [PATCH 09/10] posix-aio-compat: Allow read after EOF
  2011-07-26 13:55   ` Frediano Ziglio
@ 2011-07-26 14:22     ` Kevin Wolf
  0 siblings, 0 replies; 23+ messages in thread
From: Kevin Wolf @ 2011-07-26 14:22 UTC (permalink / raw)
  To: Frediano Ziglio; +Cc: stefanha, qemu-devel

Am 26.07.2011 15:55, schrieb Frediano Ziglio:
> 2011/7/26 Kevin Wolf <kwolf@redhat.com>:
>> In order to be able to transparently replace bdrv_read calls by bdrv_co_read,
>> reading beyond EOF must produce zeros instead of short reads for AIO, too.
>>
>> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
>> ---
>>  posix-aio-compat.c |   19 +++++++++++++++++++
>>  1 files changed, 19 insertions(+), 0 deletions(-)
>>
>> diff --git a/posix-aio-compat.c b/posix-aio-compat.c
>> index 788d113..8dc00cb 100644
>> --- a/posix-aio-compat.c
>> +++ b/posix-aio-compat.c
>> @@ -198,6 +198,12 @@ static ssize_t handle_aiocb_rw_vector(struct qemu_paiocb *aiocb)
>>     return len;
>>  }
>>
>> +/*
>> + * Read/writes the data to/from a given linear buffer.
>> + *
>> + * Returns the number of bytes handles or -errno in case of an error. Short
>> + * reads are only returned if the end of the file is reached.
>> + */
>>  static ssize_t handle_aiocb_rw_linear(struct qemu_paiocb *aiocb, char *buf)
>>  {
>>     ssize_t offset = 0;
>> @@ -334,6 +340,19 @@ static void *aio_thread(void *unused)
>>
>>         switch (aiocb->aio_type & QEMU_AIO_TYPE_MASK) {
>>         case QEMU_AIO_READ:
>> +            ret = handle_aiocb_rw(aiocb);
>> +            if (ret >= 0 && ret < aiocb->aio_nbytes && aiocb->common.bs->growable) {
>> +                /* A short read means that we have reached EOF. Pad the buffer
>> +                 * with zeros for bytes after EOF. */
>> +                QEMUIOVector qiov;
>> +
>> +                qemu_iovec_init_external(&qiov, aiocb->aio_iov,
>> +                                         aiocb->aio_niov);
>> +                qemu_iovec_memset_skip(&qiov, 0, aiocb->aio_nbytes - ret, ret);
>> +
>> +                ret = aiocb->aio_nbytes;
>> +            }
>> +            break;
>>         case QEMU_AIO_WRITE:
>>             ret = handle_aiocb_rw(aiocb);
>>             break;
>> --
>> 1.7.6
>>
> 
> Still not tested but I think to know what does it solve :)
> 
> I think Linux AIO require same attention.

In theory yes, but it's not as easy and for some reason I couldn't
reproduce it with Linux AIO (maybe the problematic requests are
misaligned so that it falls back to posix-aio-compat.c), so I decided to
ignore it for now.

Patches are welcome. ;-)

Kevin

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [Qemu-devel] [PATCH 05/10] qcow2: Use coroutines
  2011-07-26 11:49 ` [Qemu-devel] [PATCH 05/10] qcow2: Use coroutines Kevin Wolf
@ 2011-07-29 13:20   ` Stefan Hajnoczi
  0 siblings, 0 replies; 23+ messages in thread
From: Stefan Hajnoczi @ 2011-07-29 13:20 UTC (permalink / raw)
  To: Kevin Wolf; +Cc: qemu-devel

On Tue, Jul 26, 2011 at 12:49 PM, Kevin Wolf <kwolf@redhat.com> wrote:
> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
> ---
>  block/qcow2-cluster.c |   26 +++---
>  block/qcow2.c         |  240 ++++++++++++++++++-------------------------------
>  block/qcow2.h         |    5 +-
>  3 files changed, 102 insertions(+), 169 deletions(-)

I like this.  The only thing that I would like to see is comments
explain what "lock" protects and why.

As I understand it lock is needed because the metadata accesses which
use bdrv_pread() and friends will yield.  So it is necessary to
prevent in-memory qcow2 metadata from getting corrupted during these
operations.

Stefan

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [Qemu-devel] [PATCH 00/10] block: Coroutine support
  2011-07-26 11:48 [Qemu-devel] [PATCH 00/10] block: Coroutine support Kevin Wolf
                   ` (9 preceding siblings ...)
  2011-07-26 11:49 ` [Qemu-devel] [PATCH 10/10] block: Use bdrv_co_* instead of synchronous versions in coroutines Kevin Wolf
@ 2011-08-01  9:59 ` Stefan Hajnoczi
  2011-08-02 14:23 ` Avi Kivity
  11 siblings, 0 replies; 23+ messages in thread
From: Stefan Hajnoczi @ 2011-08-01  9:59 UTC (permalink / raw)
  To: Kevin Wolf; +Cc: qemu-devel

On Tue, Jul 26, 2011 at 12:48 PM, Kevin Wolf <kwolf@redhat.com> wrote:
> Depends on Stefan's latest coroutine patches. This series makes qcow and qcow2
> take advantage of the new coroutine infrastructure. Both formats used
> synchronous operations for accessing their metadata and blocked the guest CPU
> during that time. With coroutines, the I/O will happen asynchronously in the
> background and the CPU won't be blocked any more.
>
> Kevin Wolf (10):
>  block: Add bdrv_co_readv/writev
>  block: Emulate AIO functions with bdrv_co_readv/writev
>  block: Add bdrv_co_readv/writev emulation
>  coroutines: Locks
>  qcow2: Use coroutines
>  qcow: Use coroutines
>  async: Remove AsyncContext
>  coroutines: Use one global bottom half for CoQueue
>  posix-aio-compat: Allow read after EOF
>  block: Use bdrv_co_* instead of synchronous versions in coroutines
>
>  Makefile.objs         |    4 +-
>  async.c               |   98 ++-----------------
>  block.c               |  271 ++++++++++++++++++++++++++++++++++++++++++++++--
>  block.h               |    5 +
>  block/qcow.c          |  180 +++++++++++---------------------
>  block/qcow2-cluster.c |   26 ++---
>  block/qcow2.c         |  240 ++++++++++++++++----------------------------
>  block/qcow2.h         |    5 +-
>  block/qed-table.c     |   14 ---
>  block/qed.c           |    4 -
>  block_int.h           |    6 +
>  linux-aio.c           |   43 +-------
>  posix-aio-compat.c    |   30 ++++--
>  qemu-common.h         |    4 -
>  qemu-coroutine-int.h  |    1 +
>  qemu-coroutine-lock.c |  117 +++++++++++++++++++++
>  qemu-coroutine.h      |   64 ++++++++++++
>  trace-events          |   11 ++
>  18 files changed, 659 insertions(+), 464 deletions(-)
>  create mode 100644 qemu-coroutine-lock.c

Reviewed-by: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [Qemu-devel] [PATCH 03/10] block: Add bdrv_co_readv/writev emulation
  2011-07-26 11:49 ` [Qemu-devel] [PATCH 03/10] block: Add bdrv_co_readv/writev emulation Kevin Wolf
@ 2011-08-02 12:12   ` Kevin Wolf
  0 siblings, 0 replies; 23+ messages in thread
From: Kevin Wolf @ 2011-08-02 12:12 UTC (permalink / raw)
  To: Kevin Wolf; +Cc: stefanha, qemu-devel

Am 26.07.2011 13:49, schrieb Kevin Wolf:
> In order to be able to call bdrv_co_readv/writev for drivers that don't
> implement the functions natively, add an emulation that uses the AIO functions
> to implement them.
> 
> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
> ---
>  block.c      |   84 ++++++++++++++++++++++++++++++++++++++++++++++++++++-----
>  trace-events |    1 +
>  2 files changed, 77 insertions(+), 8 deletions(-)
> 
> diff --git a/block.c b/block.c
> index 48413ae..2f589ac 100644
> --- a/block.c
> +++ b/block.c
> @@ -64,6 +64,12 @@ static BlockDriverAIOCB *bdrv_co_aio_readv_em(BlockDriverState *bs,
>  static BlockDriverAIOCB *bdrv_co_aio_writev_em(BlockDriverState *bs,
>          int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
>          BlockDriverCompletionFunc *cb, void *opaque);
> +static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
> +                                         int64_t sector_num, int nb_sectors,
> +                                         QEMUIOVector *iov);
> +static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
> +                                         int64_t sector_num, int nb_sectors,
> +                                         QEMUIOVector *iov);
>  
>  static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
>      QTAILQ_HEAD_INITIALIZER(bdrv_states);
> @@ -182,14 +188,19 @@ void bdrv_register(BlockDriver *bdrv)
>          bdrv->bdrv_aio_writev = bdrv_co_aio_writev_em;
>          bdrv->bdrv_read = bdrv_read_em;
>          bdrv->bdrv_write = bdrv_write_em;
> -     } else if (!bdrv->bdrv_aio_readv) {
> -        /* add AIO emulation layer */
> -        bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
> -        bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
> -    } else if (!bdrv->bdrv_read) {
> -        /* add synchronous IO emulation layer */
> -        bdrv->bdrv_read = bdrv_read_em;
> -        bdrv->bdrv_write = bdrv_write_em;
> +     } else {
> +        bdrv->bdrv_co_readv = bdrv_co_readv_em;
> +        bdrv->bdrv_co_writev = bdrv_co_writev_em;
> +
> +        if (!bdrv->bdrv_aio_readv) {
> +            /* add AIO emulation layer */
> +            bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
> +            bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
> +        } else if (!bdrv->bdrv_read) {
> +            /* add synchronous IO emulation layer */
> +            bdrv->bdrv_read = bdrv_read_em;
> +            bdrv->bdrv_write = bdrv_write_em;
> +        }
>      }
>  
>      if (!bdrv->bdrv_aio_flush)
> @@ -2868,6 +2879,63 @@ void qemu_aio_release(void *p)
>  }
>  
>  /**************************************************************/
> +/* Coroutine block device emulation */
> +
> +typedef struct CoroutineIOCompletion {
> +    Coroutine *coroutine;
> +    int ret;
> +} CoroutineIOCompletion;
> +
> +static void bdrv_co_io_em_complete(void *opaque, int ret)
> +{
> +    CoroutineIOCompletion *co = opaque;
> +
> +    co->ret = ret;
> +    qemu_coroutine_enter(co->coroutine, NULL);
> +}
> +
> +static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
> +                                      int nb_sectors, QEMUIOVector *iov,
> +                                      bool is_write)
> +{
> +    CoroutineIOCompletion co = {
> +        .coroutine = qemu_coroutine_self(),
> +        .ret = -EINPROGRESS,

This line doesn't compile with mingw32. Additionally, we never use this
value, so I just dropped it from the patch in the block queue.

Unless someone insists on it, I won't resend the series for this one-liner.

Kevin

^ permalink raw reply	[flat|nested] 23+ messages in thread

* [Qemu-devel] [PATCH v2 10/10] block: Use bdrv_co_* instead of synchronous versions in coroutines
  2011-07-26 11:49 ` [Qemu-devel] [PATCH 10/10] block: Use bdrv_co_* instead of synchronous versions in coroutines Kevin Wolf
@ 2011-08-02 13:56   ` Kevin Wolf
  0 siblings, 0 replies; 23+ messages in thread
From: Kevin Wolf @ 2011-08-02 13:56 UTC (permalink / raw)
  To: qemu-devel; +Cc: kwolf

If we're already in a coroutine, there is no reason to use the synchronous
version of block layer functions when a coroutine one exists. This makes
bdrv_read/write/flush use bdrv_co_* when used inside a coroutine.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---

v2:
- Avoid endless recursion when we're in a coroutine, but the block driver
  doesn't support AIO. Required for raw-win32.

 block.c |   54 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 54 insertions(+), 0 deletions(-)

diff --git a/block.c b/block.c
index 0d05b4b..d868299 100644
--- a/block.c
+++ b/block.c
@@ -70,6 +70,7 @@ static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
 static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
                                          int64_t sector_num, int nb_sectors,
                                          QEMUIOVector *iov);
+static int coroutine_fn bdrv_co_flush_em(BlockDriverState *bs);
 
 static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
     QTAILQ_HEAD_INITIALIZER(bdrv_states);
@@ -946,12 +947,34 @@ static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
                                    nb_sectors * BDRV_SECTOR_SIZE);
 }
 
+static inline bool bdrv_has_async_rw(BlockDriver *drv)
+{
+    return drv->bdrv_co_readv != bdrv_co_readv_em
+        || drv->bdrv_aio_readv != bdrv_aio_readv_em;
+}
+
+static inline bool bdrv_has_async_flush(BlockDriver *drv)
+{
+    return drv->bdrv_aio_flush != bdrv_aio_flush_em;
+}
+
 /* return < 0 if error. See bdrv_write() for the return codes */
 int bdrv_read(BlockDriverState *bs, int64_t sector_num,
               uint8_t *buf, int nb_sectors)
 {
     BlockDriver *drv = bs->drv;
 
+    if (bdrv_has_async_rw(drv) && qemu_in_coroutine()) {
+        QEMUIOVector qiov;
+        struct iovec iov = {
+            .iov_base = (void *)buf,
+            .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
+        };
+
+        qemu_iovec_init_external(&qiov, &iov, 1);
+        return bdrv_co_readv(bs, sector_num, nb_sectors, &qiov);
+    }
+
     if (!drv)
         return -ENOMEDIUM;
     if (bdrv_check_request(bs, sector_num, nb_sectors))
@@ -998,6 +1021,18 @@ int bdrv_write(BlockDriverState *bs, int64_t sector_num,
                const uint8_t *buf, int nb_sectors)
 {
     BlockDriver *drv = bs->drv;
+
+    if (bdrv_has_async_rw(drv) && qemu_in_coroutine()) {
+        QEMUIOVector qiov;
+        struct iovec iov = {
+            .iov_base = (void *)buf,
+            .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
+        };
+
+        qemu_iovec_init_external(&qiov, &iov, 1);
+        return bdrv_co_writev(bs, sector_num, nb_sectors, &qiov);
+    }
+
     if (!bs->drv)
         return -ENOMEDIUM;
     if (bs->read_only)
@@ -1649,6 +1684,10 @@ int bdrv_flush(BlockDriverState *bs)
         return 0;
     }
 
+    if (bdrv_has_async_flush(bs->drv) && qemu_in_coroutine()) {
+        return bdrv_co_flush_em(bs);
+    }
+
     if (bs->drv && bs->drv->bdrv_flush) {
         return bs->drv->bdrv_flush(bs);
     }
@@ -2916,6 +2955,21 @@ static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
     return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
 }
 
+static int coroutine_fn bdrv_co_flush_em(BlockDriverState *bs)
+{
+    CoroutineIOCompletion co = {
+        .coroutine = qemu_coroutine_self(),
+    };
+    BlockDriverAIOCB *acb;
+
+    acb = bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
+    if (!acb) {
+        return -EIO;
+    }
+    qemu_coroutine_yield();
+    return co.ret;
+}
+
 /**************************************************************/
 /* removable device support */
 
-- 
1.7.6

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* Re: [Qemu-devel] [PATCH 00/10] block: Coroutine support
  2011-07-26 11:48 [Qemu-devel] [PATCH 00/10] block: Coroutine support Kevin Wolf
                   ` (10 preceding siblings ...)
  2011-08-01  9:59 ` [Qemu-devel] [PATCH 00/10] block: Coroutine support Stefan Hajnoczi
@ 2011-08-02 14:23 ` Avi Kivity
  2011-08-02 14:50   ` Kevin Wolf
  11 siblings, 1 reply; 23+ messages in thread
From: Avi Kivity @ 2011-08-02 14:23 UTC (permalink / raw)
  To: Kevin Wolf; +Cc: stefanha, qemu-devel

On 07/26/2011 02:48 PM, Kevin Wolf wrote:
> Depends on Stefan's latest coroutine patches. This series makes qcow and qcow2
> take advantage of the new coroutine infrastructure. Both formats used
> synchronous operations for accessing their metadata and blocked the guest CPU
> during that time. With coroutines, the I/O will happen asynchronously in the
> background and the CPU won't be blocked any more.
>

Do you plan to convert qcow2 to a fully synchronous design?

IMO that will make it more maintainable.  Cancellation will need some 
thought, though.

-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [Qemu-devel] [PATCH 00/10] block: Coroutine support
  2011-08-02 14:23 ` Avi Kivity
@ 2011-08-02 14:50   ` Kevin Wolf
  2011-08-02 14:55     ` Frediano Ziglio
                       ` (2 more replies)
  0 siblings, 3 replies; 23+ messages in thread
From: Kevin Wolf @ 2011-08-02 14:50 UTC (permalink / raw)
  To: Avi Kivity; +Cc: stefanha, qemu-devel

Am 02.08.2011 16:23, schrieb Avi Kivity:
> On 07/26/2011 02:48 PM, Kevin Wolf wrote:
>> Depends on Stefan's latest coroutine patches. This series makes qcow and qcow2
>> take advantage of the new coroutine infrastructure. Both formats used
>> synchronous operations for accessing their metadata and blocked the guest CPU
>> during that time. With coroutines, the I/O will happen asynchronously in the
>> background and the CPU won't be blocked any more.
>>
> 
> Do you plan to convert qcow2 to a fully synchronous design?
> 
> IMO that will make it more maintainable.  Cancellation will need some 
> thought, though.

After this patch series, all interesting paths are free of callbacks (I
assume this is what you mean by synchronous?). The only thing I can see
that is left is qcow2_aio_flush. What is required are some cleanups that
eliminate things that still look like AIO code, and yes, that's
something that I want to have.

Frediano has posted some patches which I haven't fully reviewed yet, but
the qcow1 RFC he posted was definitely a step in the right direction.

Regarding cancellation, I don't know any driver that really does what
it's supposed to do. There are basically two ways of implementing it in
current code: Either by completing the request instead of cancelling, or
it's broken. I'd suggest that we implement waiting for completion as a
generic function in the block layer and be done with it (actually this
is what happens with bdrv_aio_co_cancel_em, it just could be a bit finer
grained).

Kevin

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [Qemu-devel] [PATCH 00/10] block: Coroutine support
  2011-08-02 14:50   ` Kevin Wolf
@ 2011-08-02 14:55     ` Frediano Ziglio
  2011-08-02 15:14       ` Kevin Wolf
  2011-08-02 14:58     ` Anthony Liguori
  2011-08-02 14:59     ` Avi Kivity
  2 siblings, 1 reply; 23+ messages in thread
From: Frediano Ziglio @ 2011-08-02 14:55 UTC (permalink / raw)
  To: Kevin Wolf; +Cc: stefanha, Avi Kivity, qemu-devel

2011/8/2 Kevin Wolf <kwolf@redhat.com>:
> Am 02.08.2011 16:23, schrieb Avi Kivity:
>> On 07/26/2011 02:48 PM, Kevin Wolf wrote:
>>> Depends on Stefan's latest coroutine patches. This series makes qcow and qcow2
>>> take advantage of the new coroutine infrastructure. Both formats used
>>> synchronous operations for accessing their metadata and blocked the guest CPU
>>> during that time. With coroutines, the I/O will happen asynchronously in the
>>> background and the CPU won't be blocked any more.
>>>
>>
>> Do you plan to convert qcow2 to a fully synchronous design?
>>
>> IMO that will make it more maintainable.  Cancellation will need some
>> thought, though.
>
> After this patch series, all interesting paths are free of callbacks (I
> assume this is what you mean by synchronous?). The only thing I can see
> that is left is qcow2_aio_flush. What is required are some cleanups that
> eliminate things that still look like AIO code, and yes, that's
> something that I want to have.
>
> Frediano has posted some patches which I haven't fully reviewed yet, but
> the qcow1 RFC he posted was definitely a step in the right direction.
>

Did I send patches for qcow2?
I just rebased them with your last updates, I'll send them again.

> Regarding cancellation, I don't know any driver that really does what
> it's supposed to do. There are basically two ways of implementing it in
> current code: Either by completing the request instead of cancelling, or
> it's broken. I'd suggest that we implement waiting for completion as a
> generic function in the block layer and be done with it (actually this
> is what happens with bdrv_aio_co_cancel_em, it just could be a bit finer
> grained).
>
> Kevin
>
>

Frediano

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [Qemu-devel] [PATCH 00/10] block: Coroutine support
  2011-08-02 14:50   ` Kevin Wolf
  2011-08-02 14:55     ` Frediano Ziglio
@ 2011-08-02 14:58     ` Anthony Liguori
  2011-08-02 14:59     ` Avi Kivity
  2 siblings, 0 replies; 23+ messages in thread
From: Anthony Liguori @ 2011-08-02 14:58 UTC (permalink / raw)
  To: Kevin Wolf; +Cc: stefanha, Avi Kivity, qemu-devel

On 08/02/2011 09:50 AM, Kevin Wolf wrote:
> Am 02.08.2011 16:23, schrieb Avi Kivity:
>> On 07/26/2011 02:48 PM, Kevin Wolf wrote:
>>> Depends on Stefan's latest coroutine patches. This series makes qcow and qcow2
>>> take advantage of the new coroutine infrastructure. Both formats used
>>> synchronous operations for accessing their metadata and blocked the guest CPU
>>> during that time. With coroutines, the I/O will happen asynchronously in the
>>> background and the CPU won't be blocked any more.
>>>
>>
>> Do you plan to convert qcow2 to a fully synchronous design?
>>
>> IMO that will make it more maintainable.  Cancellation will need some
>> thought, though.
>
> After this patch series, all interesting paths are free of callbacks (I
> assume this is what you mean by synchronous?). The only thing I can see
> that is left is qcow2_aio_flush. What is required are some cleanups that
> eliminate things that still look like AIO code, and yes, that's
> something that I want to have.
>
> Frediano has posted some patches which I haven't fully reviewed yet, but
> the qcow1 RFC he posted was definitely a step in the right direction.
>
> Regarding cancellation, I don't know any driver that really does what
> it's supposed to do. There are basically two ways of implementing it in
> current code: Either by completing the request instead of cancelling, or
> it's broken. I'd suggest that we implement waiting for completion as a
> generic function in the block layer and be done with it (actually this
> is what happens with bdrv_aio_co_cancel_em, it just could be a bit finer
> grained).

If you introduce queuing at the generic layer, than removing from queue 
or waiting for completion is entirely acceptable semantics IMHO.

Regards,

Anthony Liguori

>
> Kevin
>

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [Qemu-devel] [PATCH 00/10] block: Coroutine support
  2011-08-02 14:50   ` Kevin Wolf
  2011-08-02 14:55     ` Frediano Ziglio
  2011-08-02 14:58     ` Anthony Liguori
@ 2011-08-02 14:59     ` Avi Kivity
  2 siblings, 0 replies; 23+ messages in thread
From: Avi Kivity @ 2011-08-02 14:59 UTC (permalink / raw)
  To: Kevin Wolf; +Cc: stefanha, qemu-devel

On 08/02/2011 05:50 PM, Kevin Wolf wrote:
> Am 02.08.2011 16:23, schrieb Avi Kivity:
> >  On 07/26/2011 02:48 PM, Kevin Wolf wrote:
> >>  Depends on Stefan's latest coroutine patches. This series makes qcow and qcow2
> >>  take advantage of the new coroutine infrastructure. Both formats used
> >>  synchronous operations for accessing their metadata and blocked the guest CPU
> >>  during that time. With coroutines, the I/O will happen asynchronously in the
> >>  background and the CPU won't be blocked any more.
> >>
> >
> >  Do you plan to convert qcow2 to a fully synchronous design?
> >
> >  IMO that will make it more maintainable.  Cancellation will need some
> >  thought, though.
>
> After this patch series, all interesting paths are free of callbacks (I
> assume this is what you mean by synchronous?).

That's what I meant.  I just didn't review thoroughly - I saw some _cb 
suffixes and made incorrect assumptions.

> The only thing I can see
> that is left is qcow2_aio_flush. What is required are some cleanups that
> eliminate things that still look like AIO code, and yes, that's
> something that I want to have.

Ok, great.

> Frediano has posted some patches which I haven't fully reviewed yet, but
> the qcow1 RFC he posted was definitely a step in the right direction.
>
> Regarding cancellation, I don't know any driver that really does what
> it's supposed to do. There are basically two ways of implementing it in
> current code: Either by completing the request instead of cancelling, or
> it's broken. I'd suggest that we implement waiting for completion as a
> generic function in the block layer and be done with it (actually this
> is what happens with bdrv_aio_co_cancel_em, it just could be a bit finer
> grained).

Usually completing is fine, but sometimes it cannot be made to work 
(networked or fabriced disk with no timeout or timeout greater than the 
device we're emulating).


However, there's no kernel interface for cancellation (it will usually 
be TASK_UNINTERRUPTIBLE) except for linux-aio.  So I guess we can 
implement it only there.

-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [Qemu-devel] [PATCH 00/10] block: Coroutine support
  2011-08-02 14:55     ` Frediano Ziglio
@ 2011-08-02 15:14       ` Kevin Wolf
  0 siblings, 0 replies; 23+ messages in thread
From: Kevin Wolf @ 2011-08-02 15:14 UTC (permalink / raw)
  To: Frediano Ziglio; +Cc: stefanha, Avi Kivity, qemu-devel

Am 02.08.2011 16:55, schrieb Frediano Ziglio:
> 2011/8/2 Kevin Wolf <kwolf@redhat.com>:
>> Am 02.08.2011 16:23, schrieb Avi Kivity:
>>> On 07/26/2011 02:48 PM, Kevin Wolf wrote:
>>>> Depends on Stefan's latest coroutine patches. This series makes qcow and qcow2
>>>> take advantage of the new coroutine infrastructure. Both formats used
>>>> synchronous operations for accessing their metadata and blocked the guest CPU
>>>> during that time. With coroutines, the I/O will happen asynchronously in the
>>>> background and the CPU won't be blocked any more.
>>>>
>>>
>>> Do you plan to convert qcow2 to a fully synchronous design?
>>>
>>> IMO that will make it more maintainable.  Cancellation will need some
>>> thought, though.
>>
>> After this patch series, all interesting paths are free of callbacks (I
>> assume this is what you mean by synchronous?). The only thing I can see
>> that is left is qcow2_aio_flush. What is required are some cleanups that
>> eliminate things that still look like AIO code, and yes, that's
>> something that I want to have.
>>
>> Frediano has posted some patches which I haven't fully reviewed yet, but
>> the qcow1 RFC he posted was definitely a step in the right direction.
>>
> 
> Did I send patches for qcow2?
> I just rebased them with your last updates, I'll send them again.

Yes, you did. I just haven't reviewed them yet. But assuming that you're
basically doing the same as in qcow1, they are the right thing to do.

Kevin

^ permalink raw reply	[flat|nested] 23+ messages in thread

end of thread, other threads:[~2011-08-02 15:11 UTC | newest]

Thread overview: 23+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-07-26 11:48 [Qemu-devel] [PATCH 00/10] block: Coroutine support Kevin Wolf
2011-07-26 11:48 ` [Qemu-devel] [PATCH 01/10] block: Add bdrv_co_readv/writev Kevin Wolf
2011-07-26 11:49 ` [Qemu-devel] [PATCH 02/10] block: Emulate AIO functions with bdrv_co_readv/writev Kevin Wolf
2011-07-26 11:49 ` [Qemu-devel] [PATCH 03/10] block: Add bdrv_co_readv/writev emulation Kevin Wolf
2011-08-02 12:12   ` Kevin Wolf
2011-07-26 11:49 ` [Qemu-devel] [PATCH 04/10] coroutines: Locks Kevin Wolf
2011-07-26 11:49 ` [Qemu-devel] [PATCH 05/10] qcow2: Use coroutines Kevin Wolf
2011-07-29 13:20   ` Stefan Hajnoczi
2011-07-26 11:49 ` [Qemu-devel] [PATCH 06/10] qcow: " Kevin Wolf
2011-07-26 11:49 ` [Qemu-devel] [PATCH 07/10] async: Remove AsyncContext Kevin Wolf
2011-07-26 11:49 ` [Qemu-devel] [PATCH 08/10] coroutines: Use one global bottom half for CoQueue Kevin Wolf
2011-07-26 11:49 ` [Qemu-devel] [PATCH 09/10] posix-aio-compat: Allow read after EOF Kevin Wolf
2011-07-26 13:55   ` Frediano Ziglio
2011-07-26 14:22     ` Kevin Wolf
2011-07-26 11:49 ` [Qemu-devel] [PATCH 10/10] block: Use bdrv_co_* instead of synchronous versions in coroutines Kevin Wolf
2011-08-02 13:56   ` [Qemu-devel] [PATCH v2 " Kevin Wolf
2011-08-01  9:59 ` [Qemu-devel] [PATCH 00/10] block: Coroutine support Stefan Hajnoczi
2011-08-02 14:23 ` Avi Kivity
2011-08-02 14:50   ` Kevin Wolf
2011-08-02 14:55     ` Frediano Ziglio
2011-08-02 15:14       ` Kevin Wolf
2011-08-02 14:58     ` Anthony Liguori
2011-08-02 14:59     ` Avi Kivity

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.