All of lore.kernel.org
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH] [RFC] aio/async: Add timed bottom-halves
@ 2013-07-06 16:24 Alex Bligh
  2013-07-06 16:31 ` Alex Bligh
  2013-07-15 14:25 ` [Qemu-devel] [PATCH] " Paolo Bonzini
  0 siblings, 2 replies; 29+ messages in thread
From: Alex Bligh @ 2013-07-06 16:24 UTC (permalink / raw)
  To: qemu-devel
  Cc: Kevin Wolf, Anthony Liguori, Alex Bligh, Stefan Hajnoczi,
	Paolo Bonzini, rth

Add timed bottom halves. A timed bottom half is a bottom half that
will not execute until a given time has passed (qemu_bh_schedule_at)
or a given interval has passed (qemu_bh_schedule_in). Any qemu
clock can be used, and times are specified in nanoseconds.

Timed bottom halves can be used where timers cannot. For instance,
in block drivers where there is no mainloop that calls timers
(qemu-nbd, qemu-img), or where (per stefanha@redhat.com) the
aio code loops internally and thus timers never get called.

Signed-off-by: Alex Bligh <alex@alex.org.uk>
---
 async.c             |   53 +++++++++++++++++++++++++++++++++++++++++++++------
 include/block/aio.h |   33 ++++++++++++++++++++++++++++++++
 tests/test-aio.c    |   47 +++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 127 insertions(+), 6 deletions(-)

diff --git a/async.c b/async.c
index 90fe906..d93b1ab 100644
--- a/async.c
+++ b/async.c
@@ -35,6 +35,8 @@ struct QEMUBH {
     QEMUBHFunc *cb;
     void *opaque;
     QEMUBH *next;
+    QEMUClock *clock;
+    int64_t time;
     bool scheduled;
     bool idle;
     bool deleted;
@@ -52,6 +54,11 @@ QEMUBH *aio_bh_new(AioContext *ctx, QEMUBHFunc *cb, void *opaque)
     return bh;
 }
 
+static inline int64_t qemu_bh_ready_in(QEMUBH *bh)
+{
+    return (bh->clock) ? (bh->time - qemu_get_clock_ns(bh->clock)) : 0;
+}
+
 int aio_bh_poll(AioContext *ctx)
 {
     QEMUBH *bh, **bhp, *next;
@@ -62,8 +69,10 @@ int aio_bh_poll(AioContext *ctx)
     ret = 0;
     for (bh = ctx->first_bh; bh; bh = next) {
         next = bh->next;
-        if (!bh->deleted && bh->scheduled) {
+        if (!bh->deleted && bh->scheduled && qemu_bh_ready_in(bh) <= 0) {
             bh->scheduled = 0;
+            bh->clock = NULL;
+            bh->time = 0;
             if (!bh->idle)
                 ret = 1;
             bh->idle = 0;
@@ -96,6 +105,8 @@ void qemu_bh_schedule_idle(QEMUBH *bh)
         return;
     bh->scheduled = 1;
     bh->idle = 1;
+    bh->clock = NULL;
+    bh->time = 0;
 }
 
 void qemu_bh_schedule(QEMUBH *bh)
@@ -104,18 +115,39 @@ void qemu_bh_schedule(QEMUBH *bh)
         return;
     bh->scheduled = 1;
     bh->idle = 0;
+    bh->clock = NULL;
+    bh->time = 0;
     aio_notify(bh->ctx);
 }
 
+void qemu_bh_schedule_at(QEMUBH *bh, QEMUClock *clock, int64_t time)
+{
+    /* Allow rescheduling if already scheduled */
+    bh->scheduled = 1;
+    bh->idle = 0;
+    bh->clock = clock;
+    bh->time = time;
+    aio_notify(bh->ctx); /*FIXME: is this right?*/
+}
+
+void qemu_bh_schedule_in(QEMUBH *bh, QEMUClock *clock, int64_t time)
+{
+    qemu_bh_schedule_at(bh, clock, qemu_get_clock_ns(clock) + time);
+}
+
 void qemu_bh_cancel(QEMUBH *bh)
 {
     bh->scheduled = 0;
+    bh->clock = NULL;
+    bh->time = 0;
 }
 
 void qemu_bh_delete(QEMUBH *bh)
 {
     bh->scheduled = 0;
     bh->deleted = 1;
+    bh->clock = NULL;
+    bh->time = 0;
 }
 
 static gboolean
@@ -126,13 +158,22 @@ aio_ctx_prepare(GSource *source, gint    *timeout)
 
     for (bh = ctx->first_bh; bh; bh = bh->next) {
         if (!bh->deleted && bh->scheduled) {
-            if (bh->idle) {
+            int64_t wait = qemu_bh_ready_in(bh) / SCALE_MS;
+            if (!wait && bh->idle) {
                 /* idle bottom halves will be polled at least
                  * every 10ms */
-                *timeout = 10;
+                wait = 10;
+            }
+            if (wait) {
+                /* Use the minimum wait across all bottom
+                 * halves */
+                if (*timeout == -1 || *timeout > wait) {
+                    *timeout = wait;
+                }
             } else {
-                /* non-idle bottom halves will be executed
-                 * immediately */
+                /* non-idle bottom halves or timed bottom
+                 * halves which are ready to run will be
+                 * executed immediately */
                 *timeout = 0;
                 return true;
             }
@@ -149,7 +190,7 @@ aio_ctx_check(GSource *source)
     QEMUBH *bh;
 
     for (bh = ctx->first_bh; bh; bh = bh->next) {
-        if (!bh->deleted && bh->scheduled) {
+        if (!bh->deleted && bh->scheduled && qemu_bh_ready_in(bh) <= 0) {
             return true;
 	}
     }
diff --git a/include/block/aio.h b/include/block/aio.h
index 1836793..ff26a3b 100644
--- a/include/block/aio.h
+++ b/include/block/aio.h
@@ -20,6 +20,7 @@
 
 typedef struct BlockDriverAIOCB BlockDriverAIOCB;
 typedef void BlockDriverCompletionFunc(void *opaque, int ret);
+typedef struct QEMUClock QEMUClock;
 
 typedef struct AIOCBInfo {
     void (*cancel)(BlockDriverAIOCB *acb);
@@ -145,6 +146,38 @@ int aio_bh_poll(AioContext *ctx);
 void qemu_bh_schedule(QEMUBH *bh);
 
 /**
+ * qemu_bh_schedule_at: Schedule a bottom half at a future time
+ *
+ * Scheduling a bottom half interrupts the main loop and causes the
+ * execution of the callback that was passed to qemu_bh_new.
+ *
+ * Bottom halves that are scheduled from a bottom half handler are instantly
+ * invoked.  This can create an infinite loop if a bottom half handler
+ * schedules itself.
+ *
+ * @bh: The bottom half to be scheduled.
+ * @clock: The clock to be used
+ * @time: The time in nanoseconds at which the bh is scheduled
+ */
+void qemu_bh_schedule_at(QEMUBH *bh, QEMUClock *clock, int64_t time);
+
+/**
+ * qemu_bh_schedule_in: Schedule a bottom half after an interval
+ *
+ * Scheduling a bottom half interrupts the main loop and causes the
+ * execution of the callback that was passed to qemu_bh_new.
+ *
+ * Bottom halves that are scheduled from a bottom half handler are instantly
+ * invoked.  This can create an infinite loop if a bottom half handler
+ * schedules itself.
+ *
+ * @bh: The bottom half to be scheduled.
+ * @clock: The clock to be used
+ * @time: The interval in nanoseconds after which the bh is scheduled
+ */
+void qemu_bh_schedule_in(QEMUBH *bh, QEMUClock *clock, int64_t time);
+
+/**
  * qemu_bh_cancel: Cancel execution of a bottom half.
  *
  * Canceling execution of a bottom half undoes the effect of calls to
diff --git a/tests/test-aio.c b/tests/test-aio.c
index c173870..9352242 100644
--- a/tests/test-aio.c
+++ b/tests/test-aio.c
@@ -12,6 +12,7 @@
 
 #include <glib.h>
 #include "block/aio.h"
+#include "qemu/timer.h"
 
 AioContext *ctx;
 
@@ -124,6 +125,27 @@ static void test_bh_schedule10(void)
     qemu_bh_delete(data.bh);
 }
 
+static void test_bh_schedule_in(void)
+{
+    BHTestData data = { .n = 0 };
+    data.bh = aio_bh_new(ctx, bh_test_cb, &data);
+
+    qemu_bh_schedule_in(data.bh, rt_clock, 1000000000LL);
+    g_assert_cmpint(data.n, ==, 0);
+
+    g_assert(!aio_poll(ctx, false));
+    g_assert_cmpint(data.n, ==, 0);
+
+    sleep(2);
+
+    g_assert(aio_poll(ctx, true));
+    g_assert_cmpint(data.n, ==, 1);
+
+    g_assert(!aio_poll(ctx, false));
+    g_assert_cmpint(data.n, ==, 1);
+    qemu_bh_delete(data.bh);
+}
+
 static void test_bh_cancel(void)
 {
     BHTestData data = { .n = 0 };
@@ -407,6 +429,27 @@ static void test_source_bh_schedule10(void)
     qemu_bh_delete(data.bh);
 }
 
+static void test_source_bh_schedule_in(void)
+{
+    BHTestData data = { .n = 0 };
+    data.bh = aio_bh_new(ctx, bh_test_cb, &data);
+
+    qemu_bh_schedule_in(data.bh, rt_clock, 1000000000LL);
+    g_assert_cmpint(data.n, ==, 0);
+
+    g_assert(g_main_context_iteration(NULL, true));
+    g_assert_cmpint(data.n, ==, 0);
+
+    sleep(2);
+
+    g_assert(g_main_context_iteration(NULL, true));
+    g_assert_cmpint(data.n, ==, 1);
+
+    g_assert(!g_main_context_iteration(NULL, false));
+    g_assert_cmpint(data.n, ==, 1);
+    qemu_bh_delete(data.bh);
+}
+
 static void test_source_bh_cancel(void)
 {
     BHTestData data = { .n = 0 };
@@ -628,6 +671,8 @@ int main(int argc, char **argv)
 {
     GSource *src;
 
+    init_clocks();
+
     ctx = aio_context_new();
     src = aio_get_g_source(ctx);
     g_source_attach(src, NULL);
@@ -639,6 +684,7 @@ int main(int argc, char **argv)
     g_test_add_func("/aio/notify",                  test_notify);
     g_test_add_func("/aio/bh/schedule",             test_bh_schedule);
     g_test_add_func("/aio/bh/schedule10",           test_bh_schedule10);
+    g_test_add_func("/aio/bh/schedule-in",          test_bh_schedule_in);
     g_test_add_func("/aio/bh/cancel",               test_bh_cancel);
     g_test_add_func("/aio/bh/delete",               test_bh_delete);
     g_test_add_func("/aio/bh/callback-delete/one",  test_bh_delete_from_cb);
@@ -653,6 +699,7 @@ int main(int argc, char **argv)
     g_test_add_func("/aio-gsource/flush",                   test_source_flush);
     g_test_add_func("/aio-gsource/bh/schedule",             test_source_bh_schedule);
     g_test_add_func("/aio-gsource/bh/schedule10",           test_source_bh_schedule10);
+    g_test_add_func("/aio-gsource/bh/schedule-in",          test_source_bh_schedule_in);
     g_test_add_func("/aio-gsource/bh/cancel",               test_source_bh_cancel);
     g_test_add_func("/aio-gsource/bh/delete",               test_source_bh_delete);
     g_test_add_func("/aio-gsource/bh/callback-delete/one",  test_source_bh_delete_from_cb);
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* Re: [Qemu-devel] [PATCH] [RFC] aio/async: Add timed bottom-halves
  2013-07-06 16:24 [Qemu-devel] [PATCH] [RFC] aio/async: Add timed bottom-halves Alex Bligh
@ 2013-07-06 16:31 ` Alex Bligh
  2013-07-06 18:04   ` [Qemu-devel] [PATCHv2] " Alex Bligh
  2013-07-15 14:25 ` [Qemu-devel] [PATCH] " Paolo Bonzini
  1 sibling, 1 reply; 29+ messages in thread
From: Alex Bligh @ 2013-07-06 16:31 UTC (permalink / raw)
  To: qemu-devel
  Cc: Kevin Wolf, Anthony Liguori, Stefan Hajnoczi, Alex Bligh,
	Paolo Bonzini, rth

--On 6 July 2013 17:24:57 +0100 Alex Bligh <alex@alex.org.uk> wrote:

> Add timed bottom halves. A timed bottom half is a bottom half that
> will not execute until a given time has passed (qemu_bh_schedule_at)
> or a given interval has passed (qemu_bh_schedule_in). Any qemu
> clock can be used, and times are specified in nanoseconds.

For background, this patch resulted from a discussion with Kevin & Stefan
on IRC as to the best way to use timers (or rather avoid using timers)
in block drivers.

There is one thing I'm not entirely sure about:

> +void qemu_bh_schedule_at(QEMUBH *bh, QEMUClock *clock, int64_t time)
> +{
> +    /* Allow rescheduling if already scheduled */
> +    bh->scheduled = 1;
> +    bh->idle = 0;
> +    bh->clock = clock;
> +    bh->time = time;
> +    aio_notify(bh->ctx); /*FIXME: is this right?*/
> +}

qemu_bh_schedule calls aio_notify, but qemu_bh_schedule_idle does
not. I am not quite sure why the latter doesn't - possibly through
not fully understanding the aio system. Should qemu_bh_schedule_at
be calling this, when I don't know whether the bh is scheduled
for 1 nanosecond ahead or 1 second?

-- 
Alex Bligh

^ permalink raw reply	[flat|nested] 29+ messages in thread

* [Qemu-devel] [PATCHv2] [RFC] aio/async: Add timed bottom-halves
  2013-07-06 16:31 ` Alex Bligh
@ 2013-07-06 18:04   ` Alex Bligh
  0 siblings, 0 replies; 29+ messages in thread
From: Alex Bligh @ 2013-07-06 18:04 UTC (permalink / raw)
  To: qemu-devel
  Cc: Kevin Wolf, Anthony Liguori, Alex Bligh, Stefan Hajnoczi,
	Paolo Bonzini, rth

Add timed bottom halves. A timed bottom half is a bottom half that
will not execute until a given time has passed (qemu_bh_schedule_at)
or a given interval has passed (qemu_bh_schedule_in). Any qemu
clock can be used, and times are specified in nanoseconds.

Timed bottom halves can be used where timers cannot. For instance,
in block drivers where there is no mainloop that calls timers
(qemu-nbd, qemu-img), or where (per stefanha@redhat.com) the
aio code loops internally and thus timers never get called.

Changes since v1:
* aio_ctx_prepare should cope with wait<0
* aio_ctx_prepare should round up wait time

Signed-off-by: Alex Bligh <alex@alex.org.uk>
---
 async.c             |   53 +++++++++++++++++++++++++++++++++++++++++++++------
 include/block/aio.h |   33 ++++++++++++++++++++++++++++++++
 tests/test-aio.c    |   47 +++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 127 insertions(+), 6 deletions(-)

diff --git a/async.c b/async.c
index 90fe906..d93b1ab 100644
--- a/async.c
+++ b/async.c
@@ -35,6 +35,8 @@ struct QEMUBH {
     QEMUBHFunc *cb;
     void *opaque;
     QEMUBH *next;
+    QEMUClock *clock;
+    int64_t time;
     bool scheduled;
     bool idle;
     bool deleted;
@@ -52,6 +54,11 @@ QEMUBH *aio_bh_new(AioContext *ctx, QEMUBHFunc *cb, void *opaque)
     return bh;
 }
 
+static inline int64_t qemu_bh_ready_in(QEMUBH *bh)
+{
+    return (bh->clock) ? (bh->time - qemu_get_clock_ns(bh->clock)) : 0;
+}
+
 int aio_bh_poll(AioContext *ctx)
 {
     QEMUBH *bh, **bhp, *next;
@@ -62,8 +69,10 @@ int aio_bh_poll(AioContext *ctx)
     ret = 0;
     for (bh = ctx->first_bh; bh; bh = next) {
         next = bh->next;
-        if (!bh->deleted && bh->scheduled) {
+        if (!bh->deleted && bh->scheduled && qemu_bh_ready_in(bh) <= 0) {
             bh->scheduled = 0;
+            bh->clock = NULL;
+            bh->time = 0;
             if (!bh->idle)
                 ret = 1;
             bh->idle = 0;
@@ -96,6 +105,8 @@ void qemu_bh_schedule_idle(QEMUBH *bh)
         return;
     bh->scheduled = 1;
     bh->idle = 1;
+    bh->clock = NULL;
+    bh->time = 0;
 }
 
 void qemu_bh_schedule(QEMUBH *bh)
@@ -104,18 +115,39 @@ void qemu_bh_schedule(QEMUBH *bh)
         return;
     bh->scheduled = 1;
     bh->idle = 0;
+    bh->clock = NULL;
+    bh->time = 0;
     aio_notify(bh->ctx);
 }
 
+void qemu_bh_schedule_at(QEMUBH *bh, QEMUClock *clock, int64_t time)
+{
+    /* Allow rescheduling if already scheduled */
+    bh->scheduled = 1;
+    bh->idle = 0;
+    bh->clock = clock;
+    bh->time = time;
+    aio_notify(bh->ctx); /*FIXME: is this right?*/
+}
+
+void qemu_bh_schedule_in(QEMUBH *bh, QEMUClock *clock, int64_t time)
+{
+    qemu_bh_schedule_at(bh, clock, qemu_get_clock_ns(clock) + time);
+}
+
 void qemu_bh_cancel(QEMUBH *bh)
 {
     bh->scheduled = 0;
+    bh->clock = NULL;
+    bh->time = 0;
 }
 
 void qemu_bh_delete(QEMUBH *bh)
 {
     bh->scheduled = 0;
     bh->deleted = 1;
+    bh->clock = NULL;
+    bh->time = 0;
 }
 
 static gboolean
@@ -126,13 +158,22 @@ aio_ctx_prepare(GSource *source, gint    *timeout)
 
     for (bh = ctx->first_bh; bh; bh = bh->next) {
         if (!bh->deleted && bh->scheduled) {
-            if (bh->idle) {
+            int64_t wait = qemu_bh_ready_in(bh) / SCALE_MS;
+            if (!wait && bh->idle) {
                 /* idle bottom halves will be polled at least
                  * every 10ms */
-                *timeout = 10;
+                wait = 10;
+            }
+            if (wait) {
+                /* Use the minimum wait across all bottom
+                 * halves */
+                if (*timeout == -1 || *timeout > wait) {
+                    *timeout = wait;
+                }
             } else {
-                /* non-idle bottom halves will be executed
-                 * immediately */
+                /* non-idle bottom halves or timed bottom
+                 * halves which are ready to run will be
+                 * executed immediately */
                 *timeout = 0;
                 return true;
             }
@@ -149,7 +190,7 @@ aio_ctx_check(GSource *source)
     QEMUBH *bh;
 
     for (bh = ctx->first_bh; bh; bh = bh->next) {
-        if (!bh->deleted && bh->scheduled) {
+        if (!bh->deleted && bh->scheduled && qemu_bh_ready_in(bh) <= 0) {
             return true;
 	}
     }
diff --git a/include/block/aio.h b/include/block/aio.h
index 1836793..ff26a3b 100644
--- a/include/block/aio.h
+++ b/include/block/aio.h
@@ -20,6 +20,7 @@
 
 typedef struct BlockDriverAIOCB BlockDriverAIOCB;
 typedef void BlockDriverCompletionFunc(void *opaque, int ret);
+typedef struct QEMUClock QEMUClock;
 
 typedef struct AIOCBInfo {
     void (*cancel)(BlockDriverAIOCB *acb);
@@ -145,6 +146,38 @@ int aio_bh_poll(AioContext *ctx);
 void qemu_bh_schedule(QEMUBH *bh);
 
 /**
+ * qemu_bh_schedule_at: Schedule a bottom half at a future time
+ *
+ * Scheduling a bottom half interrupts the main loop and causes the
+ * execution of the callback that was passed to qemu_bh_new.
+ *
+ * Bottom halves that are scheduled from a bottom half handler are instantly
+ * invoked.  This can create an infinite loop if a bottom half handler
+ * schedules itself.
+ *
+ * @bh: The bottom half to be scheduled.
+ * @clock: The clock to be used
+ * @time: The time in nanoseconds at which the bh is scheduled
+ */
+void qemu_bh_schedule_at(QEMUBH *bh, QEMUClock *clock, int64_t time);
+
+/**
+ * qemu_bh_schedule_in: Schedule a bottom half after an interval
+ *
+ * Scheduling a bottom half interrupts the main loop and causes the
+ * execution of the callback that was passed to qemu_bh_new.
+ *
+ * Bottom halves that are scheduled from a bottom half handler are instantly
+ * invoked.  This can create an infinite loop if a bottom half handler
+ * schedules itself.
+ *
+ * @bh: The bottom half to be scheduled.
+ * @clock: The clock to be used
+ * @time: The interval in nanoseconds after which the bh is scheduled
+ */
+void qemu_bh_schedule_in(QEMUBH *bh, QEMUClock *clock, int64_t time);
+
+/**
  * qemu_bh_cancel: Cancel execution of a bottom half.
  *
  * Canceling execution of a bottom half undoes the effect of calls to
diff --git a/tests/test-aio.c b/tests/test-aio.c
index c173870..9352242 100644
--- a/tests/test-aio.c
+++ b/tests/test-aio.c
@@ -12,6 +12,7 @@
 
 #include <glib.h>
 #include "block/aio.h"
+#include "qemu/timer.h"
 
 AioContext *ctx;
 
@@ -124,6 +125,27 @@ static void test_bh_schedule10(void)
     qemu_bh_delete(data.bh);
 }
 
+static void test_bh_schedule_in(void)
+{
+    BHTestData data = { .n = 0 };
+    data.bh = aio_bh_new(ctx, bh_test_cb, &data);
+
+    qemu_bh_schedule_in(data.bh, rt_clock, 1000000000LL);
+    g_assert_cmpint(data.n, ==, 0);
+
+    g_assert(!aio_poll(ctx, false));
+    g_assert_cmpint(data.n, ==, 0);
+
+    sleep(2);
+
+    g_assert(aio_poll(ctx, true));
+    g_assert_cmpint(data.n, ==, 1);
+
+    g_assert(!aio_poll(ctx, false));
+    g_assert_cmpint(data.n, ==, 1);
+    qemu_bh_delete(data.bh);
+}
+
 static void test_bh_cancel(void)
 {
     BHTestData data = { .n = 0 };
@@ -407,6 +429,27 @@ static void test_source_bh_schedule10(void)
     qemu_bh_delete(data.bh);
 }
 
+static void test_source_bh_schedule_in(void)
+{
+    BHTestData data = { .n = 0 };
+    data.bh = aio_bh_new(ctx, bh_test_cb, &data);
+
+    qemu_bh_schedule_in(data.bh, rt_clock, 1000000000LL);
+    g_assert_cmpint(data.n, ==, 0);
+
+    g_assert(g_main_context_iteration(NULL, true));
+    g_assert_cmpint(data.n, ==, 0);
+
+    sleep(2);
+
+    g_assert(g_main_context_iteration(NULL, true));
+    g_assert_cmpint(data.n, ==, 1);
+
+    g_assert(!g_main_context_iteration(NULL, false));
+    g_assert_cmpint(data.n, ==, 1);
+    qemu_bh_delete(data.bh);
+}
+
 static void test_source_bh_cancel(void)
 {
     BHTestData data = { .n = 0 };
@@ -628,6 +671,8 @@ int main(int argc, char **argv)
 {
     GSource *src;
 
+    init_clocks();
+
     ctx = aio_context_new();
     src = aio_get_g_source(ctx);
     g_source_attach(src, NULL);
@@ -639,6 +684,7 @@ int main(int argc, char **argv)
     g_test_add_func("/aio/notify",                  test_notify);
     g_test_add_func("/aio/bh/schedule",             test_bh_schedule);
     g_test_add_func("/aio/bh/schedule10",           test_bh_schedule10);
+    g_test_add_func("/aio/bh/schedule-in",          test_bh_schedule_in);
     g_test_add_func("/aio/bh/cancel",               test_bh_cancel);
     g_test_add_func("/aio/bh/delete",               test_bh_delete);
     g_test_add_func("/aio/bh/callback-delete/one",  test_bh_delete_from_cb);
@@ -653,6 +699,7 @@ int main(int argc, char **argv)
     g_test_add_func("/aio-gsource/flush",                   test_source_flush);
     g_test_add_func("/aio-gsource/bh/schedule",             test_source_bh_schedule);
     g_test_add_func("/aio-gsource/bh/schedule10",           test_source_bh_schedule10);
+    g_test_add_func("/aio-gsource/bh/schedule-in",          test_source_bh_schedule_in);
     g_test_add_func("/aio-gsource/bh/cancel",               test_source_bh_cancel);
     g_test_add_func("/aio-gsource/bh/delete",               test_source_bh_delete);
     g_test_add_func("/aio-gsource/bh/callback-delete/one",  test_source_bh_delete_from_cb);
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* Re: [Qemu-devel] [PATCH] [RFC] aio/async: Add timed bottom-halves
  2013-07-06 16:24 [Qemu-devel] [PATCH] [RFC] aio/async: Add timed bottom-halves Alex Bligh
  2013-07-06 16:31 ` Alex Bligh
@ 2013-07-15 14:25 ` Paolo Bonzini
  2013-07-15 20:15   ` Alex Bligh
  1 sibling, 1 reply; 29+ messages in thread
From: Paolo Bonzini @ 2013-07-15 14:25 UTC (permalink / raw)
  To: Alex Bligh; +Cc: Kevin Wolf, Anthony Liguori, qemu-devel, Stefan Hajnoczi, rth

Il 06/07/2013 18:24, Alex Bligh ha scritto:
> Add timed bottom halves. A timed bottom half is a bottom half that
> will not execute until a given time has passed (qemu_bh_schedule_at)
> or a given interval has passed (qemu_bh_schedule_in).

... and may be delayed arbitrarily past that given interval if you are
running in qemu-img or in other synchronous I/O APIs.  I'm especially
worried that this will not have any effect if bdrv_aio_cancel is calling
qemu_aio_wait.  bdrv_aio_cancel is presumably one place where you want
timeout/reconnect functionality to trigger.

I would really prefer to have a TimeEventNotifier or something like
that, which is API-compatibile with EventNotifier (so you can use the
regular aio-*.c APIs) but triggers when a given time has passed.
Basically an "heavyweight" QEMUTimer; that would be a timerfd on Linux,
and a queue timer on Windows.  No idea on other POSIX systems,
unfortunately.

Even better would be to remove the whole timer stuff (POSIX timers,
setitimer, and the Win32 equivalents), and just make the timers use a
shorter timeout for the main loop.  If you do this, I suspect adding
timer support to AioContext would be much simpler.

BTW, note that qemu-nbd (and qemu-io too) does call timers.

Paolo

> Any qemu
> clock can be used, and times are specified in nanoseconds.
> 
> Timed bottom halves can be used where timers cannot. For instance,
> in block drivers where there is no mainloop that calls timers
> (qemu-nbd, qemu-img), or where (per stefanha@redhat.com) the
> aio code loops internally and thus timers never get called.
> 
> Signed-off-by: Alex Bligh <alex@alex.org.uk>

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [Qemu-devel] [PATCH] [RFC] aio/async: Add timed bottom-halves
  2013-07-15 14:25 ` [Qemu-devel] [PATCH] " Paolo Bonzini
@ 2013-07-15 20:15   ` Alex Bligh
  2013-07-15 20:53     ` Paolo Bonzini
  0 siblings, 1 reply; 29+ messages in thread
From: Alex Bligh @ 2013-07-15 20:15 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: Kevin Wolf, Anthony Liguori, Stefan Hajnoczi, qemu-devel,
	Alex Bligh, rth

Paolo,

--On 15 July 2013 16:25:01 +0200 Paolo Bonzini <pbonzini@redhat.com> wrote:

Thanks for the review.

> Il 06/07/2013 18:24, Alex Bligh ha scritto:
>> Add timed bottom halves. A timed bottom half is a bottom half that
>> will not execute until a given time has passed (qemu_bh_schedule_at)
>> or a given interval has passed (qemu_bh_schedule_in).
>
> ... and may be delayed arbitrarily past that given interval if you are
> running in qemu-img or in other synchronous I/O APIs.

That's true. However, the problem with timers is worse, in that we
poll for timers even less frequently as far as I can tell.

> I'm especially
> worried that this will not have any effect if bdrv_aio_cancel is calling
> qemu_aio_wait.  bdrv_aio_cancel is presumably one place where you want
> timeout/reconnect functionality to trigger.

Well, I'm a newbie here, so may well be wrong but I thought qemu_aio_wait
/did/ call bottom halves (but didn't call QemuTimers). Provided time
does actually advance (which inspection suggests it does), then
these bh's should be called just like any other bh's. I may have missed
the point here entirely.

> I would really prefer to have a TimeEventNotifier or something like
> that, which is API-compatibile with EventNotifier (so you can use the
> regular aio-*.c APIs) but triggers when a given time has passed.
> Basically an "heavyweight" QEMUTimer; that would be a timerfd on Linux,
> and a queue timer on Windows.  No idea on other POSIX systems,
> unfortunately.

I was trying to use the bh API because that's what the existing block
drivers use, and what I really wanted was a bh that wouldn't execute
for a while. Do EventNotifiers run whilst AIO is polling for completion?

> Even better would be to remove the whole timer stuff (POSIX timers,
> setitimer, and the Win32 equivalents), and just make the timers use a
> shorter timeout for the main loop.  If you do this, I suspect adding
> timer support to AioContext would be much simpler.

In discussion with Stefan H on IRC, I originally suggested moving the
QemuTimer poll to the AIO loop (or adding another), which is a half
arsed way to do what you are suggesting. He suggested this would be
hairy because the existing users might not be safe to be called there.
This was an attempt at a minimal change to fix that use.

> BTW, note that qemu-nbd (and qemu-io too) does call timers.

I'd thought I tested qemu-io. qemu-convert definitely does not.

Alex

> Paolo
>
>> Any qemu
>> clock can be used, and times are specified in nanoseconds.
>>
>> Timed bottom halves can be used where timers cannot. For instance,
>> in block drivers where there is no mainloop that calls timers
>> (qemu-nbd, qemu-img), or where (per stefanha@redhat.com) the
>> aio code loops internally and thus timers never get called.
>>
>> Signed-off-by: Alex Bligh <alex@alex.org.uk>
>
>



-- 
Alex Bligh

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [Qemu-devel] [PATCH] [RFC] aio/async: Add timed bottom-halves
  2013-07-15 20:15   ` Alex Bligh
@ 2013-07-15 20:53     ` Paolo Bonzini
  2013-07-15 23:04       ` Alex Bligh
  0 siblings, 1 reply; 29+ messages in thread
From: Paolo Bonzini @ 2013-07-15 20:53 UTC (permalink / raw)
  To: Alex Bligh; +Cc: Kevin Wolf, Anthony Liguori, qemu-devel, Stefan Hajnoczi, rth

Il 15/07/2013 22:15, Alex Bligh ha scritto:
> Paolo,
> 
> --On 15 July 2013 16:25:01 +0200 Paolo Bonzini <pbonzini@redhat.com> wrote:
> 
> Thanks for the review.
> 
>> Il 06/07/2013 18:24, Alex Bligh ha scritto:
>>> Add timed bottom halves. A timed bottom half is a bottom half that
>>> will not execute until a given time has passed (qemu_bh_schedule_at)
>>> or a given interval has passed (qemu_bh_schedule_in).
>>
>> ... and may be delayed arbitrarily past that given interval if you are
>> running in qemu-img or in other synchronous I/O APIs.
> 
> That's true. However, the problem with timers is worse, in that we
> poll for timers even less frequently as far as I can tell.

Right, we poll for bottom halves during qemu_aio_wait().  We don't poll
for timers.

>> I'm especially
>> worried that this will not have any effect if bdrv_aio_cancel is calling
>> qemu_aio_wait.  bdrv_aio_cancel is presumably one place where you want
>> timeout/reconnect functionality to trigger.
> 
> Well, I'm a newbie here, so may well be wrong but I thought qemu_aio_wait
> /did/ call bottom halves (but didn't call QemuTimers). Provided time
> does actually advance (which inspection suggests it does), then
> these bh's should be called just like any other bh's. I may have missed
> the point here entirely.

So far you are right.

But this only happens if qemu_aio_wait() actually returns, so that on
the next call we poll for timers.  If QEMU is stuck in qemu_aio_wait()'s
infinite-timeout poll(), it will never advance and process the timed
bottom halves.

This goes to the question of having aio_notify() or not.  If you have
it, you will immediately process timed BHs that are "born expired".  For
other bottom halves, there will be no difference if you add it or not.

>> I would really prefer to have a TimeEventNotifier or something like
>> that, which is API-compatibile with EventNotifier (so you can use the
>> regular aio-*.c APIs) but triggers when a given time has passed.
>> Basically an "heavyweight" QEMUTimer; that would be a timerfd on Linux,
>> and a queue timer on Windows.  No idea on other POSIX systems,
>> unfortunately.
> 
> I was trying to use the bh API because that's what the existing block
> drivers use, and what I really wanted was a bh that wouldn't execute
> for a while. Do EventNotifiers run whilst AIO is polling for completion?

Yes, and they can actually interrupt qemu_aio_wait().  See
aio_set_event_notifier.

>> Even better would be to remove the whole timer stuff (POSIX timers,
>> setitimer, and the Win32 equivalents), and just make the timers use a
>> shorter timeout for the main loop.  If you do this, I suspect adding
>> timer support to AioContext would be much simpler.
> 
> In discussion with Stefan H on IRC, I originally suggested moving the
> QemuTimer poll to the AIO loop (or adding another), which is a half
> arsed way to do what you are suggesting. He suggested this would be
> hairy because the existing users might not be safe to be called there.
> This was an attempt at a minimal change to fix that use.
> 
>> BTW, note that qemu-nbd (and qemu-io too) does call timers.
> 
> I'd thought I tested qemu-io. qemu-convert definitely does not.

qemu-io is the tool that is used by the unit tests.  Conversion is in
qemu-img.

Paolo

> Alex
> 
>> Paolo
>>
>>> Any qemu
>>> clock can be used, and times are specified in nanoseconds.
>>>
>>> Timed bottom halves can be used where timers cannot. For instance,
>>> in block drivers where there is no mainloop that calls timers
>>> (qemu-nbd, qemu-img), or where (per stefanha@redhat.com) the
>>> aio code loops internally and thus timers never get called.
>>>
>>> Signed-off-by: Alex Bligh <alex@alex.org.uk>
>>
>>
> 
> 
> 

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [Qemu-devel] [PATCH] [RFC] aio/async: Add timed bottom-halves
  2013-07-15 20:53     ` Paolo Bonzini
@ 2013-07-15 23:04       ` Alex Bligh
  2013-07-16  6:16         ` Paolo Bonzini
  0 siblings, 1 reply; 29+ messages in thread
From: Alex Bligh @ 2013-07-15 23:04 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: Kevin Wolf, Anthony Liguori, Stefan Hajnoczi, qemu-devel,
	Alex Bligh, rth

Paolo,

--On 15 July 2013 22:53:17 +0200 Paolo Bonzini <pbonzini@redhat.com> wrote:

> So far you are right.
>
> But this only happens if qemu_aio_wait() actually returns, so that on
> the next call we poll for timers.  If QEMU is stuck in qemu_aio_wait()'s
> infinite-timeout poll(), it will never advance and process the timed
> bottom halves.

I may have misunderstood the code here. I thought what it was doing was
setting the timeout to poll() to 0 if there was a bh queued, 10ms if
there was a bh->idle timeout queued, or infinite otherwise.

What I changed it to do was set the timeout to:
* 0 if there was an untimed bh ready (no change)
* 10ms if there was an untimed idle bh ready (no change)
* the number of ms to expiry if there is a timed bh ready (rounded up)
* infinite otherwise (no change)
* and if there is more the one, the minimum of those

So the infinite timeout poll should not be entered if there is a timed
bh there. This of course assumes a single thread as else there is a TOCTOU
problem if a timed bh gets inserted between calculating the expiry time
and the poll.

> This goes to the question of having aio_notify() or not.  If you have
> it, you will immediately process timed BHs that are "born expired".  For
> other bottom halves, there will be no difference if you add it or not.

You've lost me there. I'm taking it by 'born expired' you mean
when poll() is entered, they're already expired. Timed BH's that are
'born expired' should (with the patch I sent) be treated exactly the
same as untimed BH's, i.e. the timeout to poll should be being
set to zero.

-- 
Alex Bligh

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [Qemu-devel] [PATCH] [RFC] aio/async: Add timed bottom-halves
  2013-07-15 23:04       ` Alex Bligh
@ 2013-07-16  6:16         ` Paolo Bonzini
  2013-07-16  7:30           ` Alex Bligh
  0 siblings, 1 reply; 29+ messages in thread
From: Paolo Bonzini @ 2013-07-16  6:16 UTC (permalink / raw)
  To: Alex Bligh; +Cc: Kevin Wolf, Anthony Liguori, qemu-devel, Stefan Hajnoczi, rth

Il 16/07/2013 01:04, Alex Bligh ha scritto:
> Paolo,
> 
> --On 15 July 2013 22:53:17 +0200 Paolo Bonzini <pbonzini@redhat.com> wrote:
> 
>> So far you are right.
>>
>> But this only happens if qemu_aio_wait() actually returns, so that on
>> the next call we poll for timers.  If QEMU is stuck in qemu_aio_wait()'s
>> infinite-timeout poll(), it will never advance and process the timed
>> bottom halves.
> 
> I may have misunderstood the code here. I thought what it was doing was
> setting the timeout to poll() to 0 if there was a bh queued, 10ms if
> there was a bh->idle timeout queued, or infinite otherwise.
> 
> What I changed it to do was set the timeout to:
> * 0 if there was an untimed bh ready (no change)
> * 10ms if there was an untimed idle bh ready (no change)
> * the number of ms to expiry if there is a timed bh ready (rounded up)
> * infinite otherwise (no change)
> * and if there is more the one, the minimum of those

You did.  But aio_wait() ignores the timeout.  It is only used by the
main loop.

> So the infinite timeout poll should not be entered if there is a timed
> bh there. This of course assumes a single thread as else there is a TOCTOU
> problem if a timed bh gets inserted between calculating the expiry time
> and the poll.
> 
>> This goes to the question of having aio_notify() or not.  If you have
>> it, you will immediately process timed BHs that are "born expired".  For
>> other bottom halves, there will be no difference if you add it or not.
> 
> You've lost me there. I'm taking it by 'born expired' you mean
> when poll() is entered, they're already expired. Timed BH's that are
> 'born expired' should (with the patch I sent) be treated exactly the
> same as untimed BH's, i.e. the timeout to poll should be being
> set to zero.

Yes, that's exactly what I meant.  Looks like you understood well! :)

Paolo

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [Qemu-devel] [PATCH] [RFC] aio/async: Add timed bottom-halves
  2013-07-16  6:16         ` Paolo Bonzini
@ 2013-07-16  7:30           ` Alex Bligh
  2013-07-16  7:34             ` Paolo Bonzini
  0 siblings, 1 reply; 29+ messages in thread
From: Alex Bligh @ 2013-07-16  7:30 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: Kevin Wolf, Anthony Liguori, Stefan Hajnoczi, qemu-devel,
	Alex Bligh, rth

Paolo,

--On 16 July 2013 08:16:42 +0200 Paolo Bonzini <pbonzini@redhat.com> wrote:

> You did.  But aio_wait() ignores the timeout.  It is only used by the
> main loop.

OK well that seems worth fixing in any case, as even without timed bh's
that means no bh can be executed for an indeterminate time. I'll have
a look at that.

-- 
Alex Bligh

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [Qemu-devel] [PATCH] [RFC] aio/async: Add timed bottom-halves
  2013-07-16  7:30           ` Alex Bligh
@ 2013-07-16  7:34             ` Paolo Bonzini
  2013-07-16 15:29               ` Alex Bligh
  0 siblings, 1 reply; 29+ messages in thread
From: Paolo Bonzini @ 2013-07-16  7:34 UTC (permalink / raw)
  To: Alex Bligh; +Cc: Kevin Wolf, Anthony Liguori, qemu-devel, Stefan Hajnoczi, rth

Il 16/07/2013 09:30, Alex Bligh ha scritto:
> 
>> You did.  But aio_wait() ignores the timeout.  It is only used by the
>> main loop.
> 
> OK well that seems worth fixing in any case, as even without timed bh's
> that means no bh can be executed for an indeterminate time. I'll have
> a look at that.

No, BHs work because they do aio_notify().  Idle BHs can be skipped for
an indeterminate time, but that's fine because idle BHs are a hack that
we should not need at all.

Paolo

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [Qemu-devel] [PATCH] [RFC] aio/async: Add timed bottom-halves
  2013-07-16  7:34             ` Paolo Bonzini
@ 2013-07-16 15:29               ` Alex Bligh
  2013-07-16 15:43                 ` Paolo Bonzini
  0 siblings, 1 reply; 29+ messages in thread
From: Alex Bligh @ 2013-07-16 15:29 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: Kevin Wolf, Anthony Liguori, Stefan Hajnoczi, qemu-devel,
	Alex Bligh, rth

Paolo,

--On 16 July 2013 09:34:20 +0200 Paolo Bonzini <pbonzini@redhat.com> wrote:

>>> You did.  But aio_wait() ignores the timeout.  It is only used by the
>>> main loop.
>>
>> OK well that seems worth fixing in any case, as even without timed bh's
>> that means no bh can be executed for an indeterminate time. I'll have
>> a look at that.
>
> No, BHs work because they do aio_notify().  Idle BHs can be skipped for
> an indeterminate time, but that's fine because idle BHs are a hack that
> we should not need at all.

OK, so a bit more code reading later, I think I now understand.

1. non-idle bh's call aio_notify at schedule time, which will cause
   any poll to exit immediately because at least one FD will be ready.

2. idle bh's do not do aio_notify() because we don't care whether
   they get stuck in aio_poll and they are a hack [actually I think
   we could do better here]

3. aio_poll calls aio_bh_poll. If this returns true, this indicates
   at least one non-idle bh exists, which causes aio_poll not to
   block.

   Question 1: it then calls aio_dispatch - if this itself
   generates non-idle bh's, this would seem not to clear the blocking
   flag. Does this rely on aio_notify?

   Question 2: if we're already telling aio_poll not to block
   by the presence of non-idle bh's as detected in aio_bh_poll,
   why do we need to use aio_notify too? IE why do we have both
   the blocking= logic AND the aio_notify logic?

4. aio_poll then calls g_poll (POSIX) or WaitForMultipleObjects
   (Windows). However, the timeout is either 0 or infinite.
   Both functions take a milliseconds (yuck) timeout, but that
   is not used.

So, the first thing I don't understand is why aio_poll needs the
return value of aio_bh_poll at all. Firstly, after sampling it,
it then causes aio_dispatch, and that can presumably set its own
bottom half callbacks; if this happens 'int blocking' won't be
cleared, and it will still enter g_poll with an infinite timeout.
Secondly, there seems to be an entirely separate mechanism
(aio_notify) in any case. If a non-idle bh has been scheduled,
this will cause g_poll to exit immediately as a read will be
ready. I believe this is cleared by the bh being used.

The second thing I don't understand is why we aren't using
the timeout on g_poll / WaitForMultipleObjects. It would
seem to be reasonably easy to make aio_poll call aio_ctx_prepare
or something that does the same calculation. This would fix
idle bh's to be more reliable (we know it's safe to call them
within aio_poll anyway, it's just a question of whether
we turn an infinite wait into a 10ms wait).

Perhaps these two are related.

I /think/ fixing the second (and removing the aio_notify
from qemu_bh_schedule_at) is sufficient provided it checks
for scheduled bh's immediately prior to the poll. This assumes
other threads cannot schedule bh's. This would seem to be less
intrusive that a TimedEventNotifier approach which (as far as I
can see) requires another thread.

-- 
Alex Bligh

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [Qemu-devel] [PATCH] [RFC] aio/async: Add timed bottom-halves
  2013-07-16 15:29               ` Alex Bligh
@ 2013-07-16 15:43                 ` Paolo Bonzini
  2013-07-16 16:14                   ` Alex Bligh
  0 siblings, 1 reply; 29+ messages in thread
From: Paolo Bonzini @ 2013-07-16 15:43 UTC (permalink / raw)
  To: Alex Bligh; +Cc: Kevin Wolf, Anthony Liguori, qemu-devel, Stefan Hajnoczi, rth

Il 16/07/2013 17:29, Alex Bligh ha scritto:
> Paolo,
> 
> --On 16 July 2013 09:34:20 +0200 Paolo Bonzini <pbonzini@redhat.com> wrote:
> 
>>>> You did.  But aio_wait() ignores the timeout.  It is only used by the
>>>> main loop.
>>>
>>> OK well that seems worth fixing in any case, as even without timed bh's
>>> that means no bh can be executed for an indeterminate time. I'll have
>>> a look at that.
>>
>> No, BHs work because they do aio_notify().  Idle BHs can be skipped for
>> an indeterminate time, but that's fine because idle BHs are a hack that
>> we should not need at all.
> 
> OK, so a bit more code reading later, I think I now understand.
> 
> 1. non-idle bh's call aio_notify at schedule time, which will cause
>   any poll to exit immediately because at least one FD will be ready.
> 
> 2. idle bh's do not do aio_notify() because we don't care whether
>   they get stuck in aio_poll and they are a hack [actually I think
>   we could do better here]
> 
> 3. aio_poll calls aio_bh_poll. If this returns true, this indicates
>   at least one non-idle bh exists, which causes aio_poll not to
>   block.

No, this indicates that at least one scheduled non-idle bh exist*ed*,
which causes aio_poll not to block (because some progress has been done).

There could be non-idle BHs scheduled during aio_poll, but not visited
by aio_bh_poll.  These rely on aio_notify (it could be an idle BH who
scheduled this non-idle BH, so aio_bh_poll might return 0).

>   Question 1: it then calls aio_dispatch - if this itself
>   generates non-idle bh's, this would seem not to clear the blocking
>   flag. Does this rely on aio_notify?

Yes.

>   Question 2: if we're already telling aio_poll not to block
>   by the presence of non-idle bh's as detected in aio_bh_poll,
>   why do we need to use aio_notify too? IE why do we have both
>   the blocking= logic AND the aio_notify logic?

See above (newly-scheduled BHs are always handled with aio_notify, the
blocking=false logic is for previously-scheduled BHs).

> 4. aio_poll then calls g_poll (POSIX) or WaitForMultipleObjects
>   (Windows). However, the timeout is either 0 or infinite.
>   Both functions take a milliseconds (yuck) timeout, but that
>   is not used.

I agree with the yuck. :)  But Linux has the nanoseconds-resolution
ppoll, too.

> So, the first thing I don't understand is why aio_poll needs the
> return value of aio_bh_poll at all. Firstly, after sampling it,
> it then causes aio_dispatch, and that can presumably set its own
> bottom half callbacks; if this happens 'int blocking' won't be
> cleared, and it will still enter g_poll with an infinite timeout.
> Secondly, there seems to be an entirely separate mechanism
> (aio_notify) in any case. If a non-idle bh has been scheduled,
> this will cause g_poll to exit immediately as a read will be
> ready. I believe this is cleared by the bh being used.

I hope the above

> The second thing I don't understand is why we aren't using
> the timeout on g_poll / WaitForMultipleObjects.

Because so far it wasn't needed (insert rant about idle BHs being a
hack).  This is a good occasion to use it.  But I wouldn't introduce a
new one-off concept (almost as much of a hack as idle BHs), I would
rather reuse as much code as possible from QEMUTimer/QEMUClock.  I must
admit I don't have a clear idea of how the API would look like.

> It would
> seem to be reasonably easy to make aio_poll call aio_ctx_prepare
> or something that does the same calculation. This would fix
> idle bh's to be more reliable (we know it's safe to call them
> within aio_poll anyway, it's just a question of whether
> we turn an infinite wait into a 10ms wait).

Idle BHs could be changed to timers as well, and then they would disappear.

Paolo

> Perhaps these two are related.
> 
> I /think/ fixing the second (and removing the aio_notify
> from qemu_bh_schedule_at) is sufficient provided it checks
> for scheduled bh's immediately prior to the poll. This assumes
> other threads cannot schedule bh's. This would seem to be less
> intrusive that a TimedEventNotifier approach which (as far as I
> can see) requires another thread.
> 

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [Qemu-devel] [PATCH] [RFC] aio/async: Add timed bottom-halves
  2013-07-16 15:43                 ` Paolo Bonzini
@ 2013-07-16 16:14                   ` Alex Bligh
  2013-07-16 16:55                     ` Paolo Bonzini
  0 siblings, 1 reply; 29+ messages in thread
From: Alex Bligh @ 2013-07-16 16:14 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: Kevin Wolf, Anthony Liguori, Stefan Hajnoczi, qemu-devel,
	Alex Bligh, rth

Paolo,

>> 3. aio_poll calls aio_bh_poll. If this returns true, this indicates
>>   at least one non-idle bh exists, which causes aio_poll not to
>>   block.
>
> No, this indicates that at least one scheduled non-idle bh exist*ed*,
> which causes aio_poll not to block (because some progress has been done).

Ah yes, in the sense the callback will have been called and it now
doesn't exist.

>> 4. aio_poll then calls g_poll (POSIX) or WaitForMultipleObjects
>>   (Windows). However, the timeout is either 0 or infinite.
>>   Both functions take a milliseconds (yuck) timeout, but that
>>   is not used.
>
> I agree with the yuck. :)  But Linux has the nanoseconds-resolution
> ppoll, too.

Sadly I don't think we have a g_ppoll.

>> So, the first thing I don't understand is why aio_poll needs the
>> return value of aio_bh_poll at all. Firstly, after sampling it,
>> it then causes aio_dispatch, and that can presumably set its own
>> bottom half callbacks; if this happens 'int blocking' won't be
>> cleared, and it will still enter g_poll with an infinite timeout.
>> Secondly, there seems to be an entirely separate mechanism
>> (aio_notify) in any case. If a non-idle bh has been scheduled,
>> this will cause g_poll to exit immediately as a read will be
>> ready. I believe this is cleared by the bh being used.
>

Sorry - forgot to delete the questions from above when I moved
them down below. Yes that make sense.

>> The second thing I don't understand is why we aren't using
>> the timeout on g_poll / WaitForMultipleObjects.
>
> Because so far it wasn't needed (insert rant about idle BHs being a
> hack).  This is a good occasion to use it.

OK

> But I wouldn't introduce a
> new one-off concept (almost as much of a hack as idle BHs), I would
> rather reuse as much code as possible from QEMUTimer/QEMUClock.  I must
> admit I don't have a clear idea of how the API would look like.

So the reason I was trying to avoid using QEMUTimer stuff was that
bh's get called from aio_poll and it was not evident that all timers
would be safe to call from aio_poll.

My original approach was simply to call qemu_run_all_timers from
aio_poll at the top. I now know I'd have to (slightly) modify
the poll routine too use a specific timeout if there are any timers
ready to run and the timeout would otherwise be infinity. However
Stefan H pointed out on IRC that we'd then have to audit every
timer usage to check it was safe to call from aio_poll, or have
a 'isSafeToCallFromAioPoll' type flag (yuck). Hence we thought the
timed bh would be less intrusive as actually what I and other block
driver folks might want is something more like a bh. Equally happy
to recode it as a QEMUTimer.

What do you think? In the end I thought the schedule_bh_at stuff
was simpler.

>> It would
>> seem to be reasonably easy to make aio_poll call aio_ctx_prepare
>> or something that does the same calculation. This would fix
>> idle bh's to be more reliable (we know it's safe to call them
>> within aio_poll anyway, it's just a question of whether
>> we turn an infinite wait into a 10ms wait).
>
> Idle BHs could be changed to timers as well, and then they would
> disappear.

Yes I was in essence hoping an idle BH would be treated as an
epsilon millisecond timed bh, or (if above) an epsilon millisecond
timer. By that I mean poll() would be called at least once before
it ran, but with a 0 ms timeout.

-- 
Alex Bligh

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [Qemu-devel] [PATCH] [RFC] aio/async: Add timed bottom-halves
  2013-07-16 16:14                   ` Alex Bligh
@ 2013-07-16 16:55                     ` Paolo Bonzini
  2013-07-16 21:22                       ` [Qemu-devel] [PATCHv3] " Alex Bligh
                                         ` (2 more replies)
  0 siblings, 3 replies; 29+ messages in thread
From: Paolo Bonzini @ 2013-07-16 16:55 UTC (permalink / raw)
  To: Alex Bligh; +Cc: Kevin Wolf, Anthony Liguori, qemu-devel, Stefan Hajnoczi, rth

Il 16/07/2013 18:14, Alex Bligh ha scritto:
> Paolo,
> 
>>> 3. aio_poll calls aio_bh_poll. If this returns true, this indicates
>>>   at least one non-idle bh exists, which causes aio_poll not to
>>>   block.
>>
>> No, this indicates that at least one scheduled non-idle bh exist*ed*,
>> which causes aio_poll not to block (because some progress has been done).
> 
> Ah yes, in the sense the callback will have been called and it now
> doesn't exist.

Or at least isn't scheduled.

>>> 4. aio_poll then calls g_poll (POSIX) or WaitForMultipleObjects
>>>   (Windows). However, the timeout is either 0 or infinite.
>>>   Both functions take a milliseconds (yuck) timeout, but that
>>>   is not used.
>>
>> I agree with the yuck. :)  But Linux has the nanoseconds-resolution
>> ppoll, too.
> 
> Sadly I don't think we have a g_ppoll.

Doesn't matter, poll==g_poll on POSIX systems.  You can just use poll
(or g_poll) on non-Linux, and ppoll on Linux.

>> But I wouldn't introduce a
>> new one-off concept (almost as much of a hack as idle BHs), I would
>> rather reuse as much code as possible from QEMUTimer/QEMUClock.  I must
>> admit I don't have a clear idea of how the API would look like.
> 
> So the reason I was trying to avoid using QEMUTimer stuff was that
> bh's get called from aio_poll and it was not evident that all timers
> would be safe to call from aio_poll.

It wouldn't.

> What do you think? In the end I thought the schedule_bh_at stuff
> was simpler.

It is simpler, but I'm not sure it is the right API.  Of course, if
Kevin and Stefan says it is, I have no problem with that.

Paolo

^ permalink raw reply	[flat|nested] 29+ messages in thread

* [Qemu-devel] [PATCHv3] [RFC] aio/async: Add timed bottom-halves
  2013-07-16 16:55                     ` Paolo Bonzini
@ 2013-07-16 21:22                       ` Alex Bligh
  2013-07-16 21:24                       ` [Qemu-devel] [PATCH] " Alex Bligh
  2013-07-17  7:50                       ` [Qemu-devel] [PATCH] [RFC] aio/async: Add timed bottom-halves Kevin Wolf
  2 siblings, 0 replies; 29+ messages in thread
From: Alex Bligh @ 2013-07-16 21:22 UTC (permalink / raw)
  To: qemu-devel
  Cc: Kevin Wolf, Anthony Liguori, Alex Bligh, Stefan Hajnoczi,
	Paolo Bonzini, rth

Add timed bottom halves. A timed bottom half is a bottom half that
will not execute until a given time has passed (qemu_bh_schedule_at)
or a given interval has passed (qemu_bh_schedule_in). Any qemu
clock can be used, and times are specified in nanoseconds.

Timed bottom halves can be used where timers cannot. For instance,
in block drivers where there is no mainloop that calls timers
(qemu-nbd, qemu-img), or where (per stefanha@redhat.com) the
aio code loops internally and thus timers never get called.

Changes to aio-win32.c have not even been compile tested.

Changes since v1:
* aio_ctx_prepare should cope with wait<0
* aio_ctx_prepare should round up wait time

Changes since v2:
* aio_poll timeout depends on presence of timed bottom halves
* timed bh's do not do aio_notify immediately

Signed-off-by: Alex Bligh <alex@alex.org.uk>
---
 aio-posix.c         |   20 +++++++++++++-
 aio-win32.c         |   21 ++++++++++++++-
 async.c             |   74 ++++++++++++++++++++++++++++++++++++++++++++++-----
 include/block/aio.h |   43 ++++++++++++++++++++++++++++++
 tests/test-aio.c    |   47 ++++++++++++++++++++++++++++++++
 5 files changed, 196 insertions(+), 9 deletions(-)

diff --git a/aio-posix.c b/aio-posix.c
index b68eccd..f949e22 100644
--- a/aio-posix.c
+++ b/aio-posix.c
@@ -173,6 +173,7 @@ bool aio_poll(AioContext *ctx, bool blocking)
 {
     AioHandler *node;
     int ret;
+    int timeout = -1;
     bool busy, progress;
 
     progress = false;
@@ -183,6 +184,10 @@ bool aio_poll(AioContext *ctx, bool blocking)
      * does not need a complete flush (as is the case for qemu_aio_wait loops).
      */
     if (aio_bh_poll(ctx)) {
+        /* At least one non-idle but scheduled bh existed when aio_bh_poll
+         * was called. We're thus making progress, so don't block.
+         * Note it may no longer be scheduled.
+         */
         blocking = false;
         progress = true;
     }
@@ -191,6 +196,11 @@ bool aio_poll(AioContext *ctx, bool blocking)
         progress = true;
     }
 
+    /* The above may schedule bh's in which case if they need the poll below to
+     * exit immediately, they will have called aio_notify. If they are timed,
+     * we'll process the timeout below.
+     */
+
     if (progress && !blocking) {
         return true;
     }
@@ -231,10 +241,18 @@ bool aio_poll(AioContext *ctx, bool blocking)
         return progress;
     }
 
+    /* calculate the timeout. This will be infinite if no bh's
+     * are scheduled, else the lowest accross all scheduled bh's for
+     *    * 0ms for a non-timed, non-idle bh
+     *    * 0ms for a timed, non-idle bh which is ready
+     *    * 10ms for a idle-bh
+     */
+    aio_bh_get_timeout(ctx, &timeout);
+
     /* wait until next event */
     ret = g_poll((GPollFD *)ctx->pollfds->data,
                  ctx->pollfds->len,
-                 blocking ? -1 : 0);
+                 blocking ? timeout : 0);
 
     /* if we have any readable fds, dispatch event */
     if (ret > 0) {
diff --git a/aio-win32.c b/aio-win32.c
index 38723bf..5d45448 100644
--- a/aio-win32.c
+++ b/aio-win32.c
@@ -107,6 +107,10 @@ bool aio_poll(AioContext *ctx, bool blocking)
      * does not need a complete flush (as is the case for qemu_aio_wait loops).
      */
     if (aio_bh_poll(ctx)) {
+        /* At least one non-idle but scheduled bh existed when aio_bh_poll
+         * was called. We're thus making progress, so don't block.
+         * Note it may no longer be scheduled.
+         */
         blocking = false;
         progress = true;
     }
@@ -140,6 +144,11 @@ bool aio_poll(AioContext *ctx, bool blocking)
         }
     }
 
+    /* The above may schedule bh's in which case if they need the poll below to
+     * exit immediately, they will have called aio_notify. If they are timed,
+     * we'll process the timeout below.
+     */
+
     if (progress && !blocking) {
         return true;
     }
@@ -175,7 +184,17 @@ bool aio_poll(AioContext *ctx, bool blocking)
     /* wait until next event */
     while (count > 0) {
         int timeout = blocking ? INFINITE : 0;
-        int ret = WaitForMultipleObjects(count, events, FALSE, timeout);
+        int ret;
+
+        /* calculate the timeout. This will be infinite if no bh's
+         * are scheduled, else the lowest accross all scheduled bh's for
+         *    * 0ms for a non-timed, non-idle bh
+         *    * 0ms for a timed, non-idle bh which is ready
+         *    * 10ms for a idle-bh
+         */
+        aio_bh_get_timeout(ctx, &timeout);
+
+        ret = WaitForMultipleObjects(count, events, FALSE, timeout);
 
         /* if we have any signaled events, dispatch event */
         if ((DWORD) (ret - WAIT_OBJECT_0) >= count) {
diff --git a/async.c b/async.c
index 90fe906..c844b9d 100644
--- a/async.c
+++ b/async.c
@@ -35,6 +35,8 @@ struct QEMUBH {
     QEMUBHFunc *cb;
     void *opaque;
     QEMUBH *next;
+    QEMUClock *clock;
+    int64_t time;
     bool scheduled;
     bool idle;
     bool deleted;
@@ -52,6 +54,11 @@ QEMUBH *aio_bh_new(AioContext *ctx, QEMUBHFunc *cb, void *opaque)
     return bh;
 }
 
+static inline int64_t qemu_bh_ready_in(QEMUBH *bh)
+{
+    return (bh->clock) ? (bh->time - qemu_get_clock_ns(bh->clock)) : 0;
+}
+
 int aio_bh_poll(AioContext *ctx)
 {
     QEMUBH *bh, **bhp, *next;
@@ -62,8 +69,10 @@ int aio_bh_poll(AioContext *ctx)
     ret = 0;
     for (bh = ctx->first_bh; bh; bh = next) {
         next = bh->next;
-        if (!bh->deleted && bh->scheduled) {
+        if (!bh->deleted && bh->scheduled && qemu_bh_ready_in(bh) <= 0) {
             bh->scheduled = 0;
+            bh->clock = NULL;
+            bh->time = 0;
             if (!bh->idle)
                 ret = 1;
             bh->idle = 0;
@@ -96,6 +105,8 @@ void qemu_bh_schedule_idle(QEMUBH *bh)
         return;
     bh->scheduled = 1;
     bh->idle = 1;
+    bh->clock = NULL;
+    bh->time = 0;
 }
 
 void qemu_bh_schedule(QEMUBH *bh)
@@ -104,35 +115,84 @@ void qemu_bh_schedule(QEMUBH *bh)
         return;
     bh->scheduled = 1;
     bh->idle = 0;
+    bh->clock = NULL;
+    bh->time = 0;
     aio_notify(bh->ctx);
 }
 
+void qemu_bh_schedule_at(QEMUBH *bh, QEMUClock *clock, int64_t time)
+{
+    /* Allow rescheduling if already scheduled */
+    bh->scheduled = 1;
+    bh->idle = 0;
+    bh->clock = clock;
+    bh->time = time;
+}
+
+void qemu_bh_schedule_in(QEMUBH *bh, QEMUClock *clock, int64_t time)
+{
+    qemu_bh_schedule_at(bh, clock, qemu_get_clock_ns(clock) + time);
+}
+
 void qemu_bh_cancel(QEMUBH *bh)
 {
     bh->scheduled = 0;
+    bh->clock = NULL;
+    bh->time = 0;
 }
 
 void qemu_bh_delete(QEMUBH *bh)
 {
     bh->scheduled = 0;
     bh->deleted = 1;
+    bh->clock = NULL;
+    bh->time = 0;
 }
 
 static gboolean
-aio_ctx_prepare(GSource *source, gint    *timeout)
+aio_ctx_prepare(GSource *source, gint *timeout)
 {
     AioContext *ctx = (AioContext *) source;
+    return aio_bh_get_timeout((AioContext *)ctx, (int *)timeout);
+}
+
+bool aio_bh_get_timeout(AioContext *ctx, int *timeout)
+{
     QEMUBH *bh;
 
     for (bh = ctx->first_bh; bh; bh = bh->next) {
         if (!bh->deleted && bh->scheduled) {
-            if (bh->idle) {
+            int64_t wait = qemu_bh_ready_in(bh);
+            if (wait <= 0) {
+                wait = 0;
+            } else {
+                /* Sadly timeout is in milliseconds not nanoseconds;
+                 * it is better to trigger a timeout too late than too
+                 * early for risk of busywaiting */
+                wait = (wait + SCALE_MS - 1) / SCALE_MS;
+            }
+            if (!wait && bh->idle) {
                 /* idle bottom halves will be polled at least
                  * every 10ms */
-                *timeout = 10;
+                wait = 10;
+            }
+            if (wait) {
+                /* Use the minimum wait across all bottom
+                 * halves */
+                if (*timeout == -1 || *timeout > wait) {
+                    *timeout = wait;
+                }
             } else {
-                /* non-idle bottom halves will be executed
-                 * immediately */
+                /* non-idle bottom halves or timed bottom
+                 * halves which are ready to run will be
+                 * executed immediately */
+                if (bh->clock) {
+                    /* if there is an expired timer, make sure
+                     * aio_notify is called as it won't have
+                     * been called when scheduled
+                     */
+                    aio_notify(bh->ctx);
+                }
                 *timeout = 0;
                 return true;
             }
@@ -149,7 +209,7 @@ aio_ctx_check(GSource *source)
     QEMUBH *bh;
 
     for (bh = ctx->first_bh; bh; bh = bh->next) {
-        if (!bh->deleted && bh->scheduled) {
+        if (!bh->deleted && bh->scheduled && qemu_bh_ready_in(bh) <= 0) {
             return true;
 	}
     }
diff --git a/include/block/aio.h b/include/block/aio.h
index 1836793..afa685d 100644
--- a/include/block/aio.h
+++ b/include/block/aio.h
@@ -20,6 +20,7 @@
 
 typedef struct BlockDriverAIOCB BlockDriverAIOCB;
 typedef void BlockDriverCompletionFunc(void *opaque, int ret);
+typedef struct QEMUClock QEMUClock;
 
 typedef struct AIOCBInfo {
     void (*cancel)(BlockDriverAIOCB *acb);
@@ -145,6 +146,38 @@ int aio_bh_poll(AioContext *ctx);
 void qemu_bh_schedule(QEMUBH *bh);
 
 /**
+ * qemu_bh_schedule_at: Schedule a bottom half at a future time
+ *
+ * Scheduling a bottom half interrupts the main loop and causes the
+ * execution of the callback that was passed to qemu_bh_new.
+ *
+ * Bottom halves that are scheduled from a bottom half handler are instantly
+ * invoked.  This can create an infinite loop if a bottom half handler
+ * schedules itself.
+ *
+ * @bh: The bottom half to be scheduled.
+ * @clock: The clock to be used
+ * @time: The time in nanoseconds at which the bh is scheduled
+ */
+void qemu_bh_schedule_at(QEMUBH *bh, QEMUClock *clock, int64_t time);
+
+/**
+ * qemu_bh_schedule_in: Schedule a bottom half after an interval
+ *
+ * Scheduling a bottom half interrupts the main loop and causes the
+ * execution of the callback that was passed to qemu_bh_new.
+ *
+ * Bottom halves that are scheduled from a bottom half handler are instantly
+ * invoked.  This can create an infinite loop if a bottom half handler
+ * schedules itself.
+ *
+ * @bh: The bottom half to be scheduled.
+ * @clock: The clock to be used
+ * @time: The interval in nanoseconds after which the bh is scheduled
+ */
+void qemu_bh_schedule_in(QEMUBH *bh, QEMUClock *clock, int64_t time);
+
+/**
  * qemu_bh_cancel: Cancel execution of a bottom half.
  *
  * Canceling execution of a bottom half undoes the effect of calls to
@@ -190,6 +223,16 @@ bool aio_pending(AioContext *ctx);
  */
 bool aio_poll(AioContext *ctx, bool blocking);
 
+/* This is used internally to calculate the appropriate timeout for aio_poll.
+ * It returns true if a bh can run right now (either a scheduled bh or
+ * a timed bh that has already passed its time), or false otherwise.
+ * timeout should be set on entry to the proposed timeout value to use
+ * (or -1 for infinite), and if a lower timeout is needed due to there
+ * being a timed scheduled in the future, timeout will be adjusted. Timeout
+ * is in milliseconds (yuck).
+ */
+bool aio_bh_get_timeout(AioContext *ctx, int *timeout);
+
 #ifdef CONFIG_POSIX
 /* Returns 1 if there are still outstanding AIO requests; 0 otherwise */
 typedef int (AioFlushHandler)(void *opaque);
diff --git a/tests/test-aio.c b/tests/test-aio.c
index c173870..29daf81 100644
--- a/tests/test-aio.c
+++ b/tests/test-aio.c
@@ -12,6 +12,7 @@
 
 #include <glib.h>
 #include "block/aio.h"
+#include "qemu/timer.h"
 
 AioContext *ctx;
 
@@ -124,6 +125,27 @@ static void test_bh_schedule10(void)
     qemu_bh_delete(data.bh);
 }
 
+static void test_bh_schedule_in(void)
+{
+    BHTestData data = { .n = 0 };
+    data.bh = aio_bh_new(ctx, bh_test_cb, &data);
+
+    qemu_bh_schedule_in(data.bh, rt_clock, 1000000000LL);
+    g_assert_cmpint(data.n, ==, 0);
+
+    g_assert(!aio_poll(ctx, false));
+    g_assert_cmpint(data.n, ==, 0);
+
+    sleep(2);
+
+    g_assert(aio_poll(ctx, true));
+    g_assert_cmpint(data.n, ==, 1);
+
+    g_assert(!aio_poll(ctx, false));
+    g_assert_cmpint(data.n, ==, 1);
+    qemu_bh_delete(data.bh);
+}
+
 static void test_bh_cancel(void)
 {
     BHTestData data = { .n = 0 };
@@ -407,6 +429,27 @@ static void test_source_bh_schedule10(void)
     qemu_bh_delete(data.bh);
 }
 
+static void test_source_bh_schedule_in(void)
+{
+    BHTestData data = { .n = 0 };
+    data.bh = aio_bh_new(ctx, bh_test_cb, &data);
+
+    qemu_bh_schedule_in(data.bh, rt_clock, 1000000000LL);
+    g_assert_cmpint(data.n, ==, 0);
+
+    g_assert(!g_main_context_iteration(NULL, false));
+    g_assert_cmpint(data.n, ==, 0);
+
+    sleep(2);
+
+    g_assert(g_main_context_iteration(NULL, true));
+    g_assert_cmpint(data.n, ==, 1);
+
+    g_assert(!g_main_context_iteration(NULL, false));
+    g_assert_cmpint(data.n, ==, 1);
+    qemu_bh_delete(data.bh);
+}
+
 static void test_source_bh_cancel(void)
 {
     BHTestData data = { .n = 0 };
@@ -628,6 +671,8 @@ int main(int argc, char **argv)
 {
     GSource *src;
 
+    init_clocks();
+
     ctx = aio_context_new();
     src = aio_get_g_source(ctx);
     g_source_attach(src, NULL);
@@ -639,6 +684,7 @@ int main(int argc, char **argv)
     g_test_add_func("/aio/notify",                  test_notify);
     g_test_add_func("/aio/bh/schedule",             test_bh_schedule);
     g_test_add_func("/aio/bh/schedule10",           test_bh_schedule10);
+    g_test_add_func("/aio/bh/schedule-in",          test_bh_schedule_in);
     g_test_add_func("/aio/bh/cancel",               test_bh_cancel);
     g_test_add_func("/aio/bh/delete",               test_bh_delete);
     g_test_add_func("/aio/bh/callback-delete/one",  test_bh_delete_from_cb);
@@ -653,6 +699,7 @@ int main(int argc, char **argv)
     g_test_add_func("/aio-gsource/flush",                   test_source_flush);
     g_test_add_func("/aio-gsource/bh/schedule",             test_source_bh_schedule);
     g_test_add_func("/aio-gsource/bh/schedule10",           test_source_bh_schedule10);
+    g_test_add_func("/aio-gsource/bh/schedule-in",          test_source_bh_schedule_in);
     g_test_add_func("/aio-gsource/bh/cancel",               test_source_bh_cancel);
     g_test_add_func("/aio-gsource/bh/delete",               test_source_bh_delete);
     g_test_add_func("/aio-gsource/bh/callback-delete/one",  test_source_bh_delete_from_cb);
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* Re: [Qemu-devel] [PATCH] [RFC] aio/async: Add timed bottom-halves
  2013-07-16 16:55                     ` Paolo Bonzini
  2013-07-16 21:22                       ` [Qemu-devel] [PATCHv3] " Alex Bligh
@ 2013-07-16 21:24                       ` Alex Bligh
  2013-07-17  3:02                         ` Stefan Hajnoczi
  2013-07-17  7:50                       ` [Qemu-devel] [PATCH] [RFC] aio/async: Add timed bottom-halves Kevin Wolf
  2 siblings, 1 reply; 29+ messages in thread
From: Alex Bligh @ 2013-07-16 21:24 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: Kevin Wolf, Anthony Liguori, Stefan Hajnoczi, qemu-devel,
	Alex Bligh, rth

Paolo,

--On 16 July 2013 18:55:15 +0200 Paolo Bonzini <pbonzini@redhat.com> wrote:

>> What do you think? In the end I thought the schedule_bh_at stuff
>> was simpler.
>
> It is simpler, but I'm not sure it is the right API.  Of course, if
> Kevin and Stefan says it is, I have no problem with that.

For the sake of having something to comment on, I just sent v3 of this
patch to the list. This is basically a 'minimal change' version that
fixes the issue with aio_poll (I think). It passes make check.

Stefan? Kevin?

-- 
Alex Bligh

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [Qemu-devel] [PATCH] [RFC] aio/async: Add timed bottom-halves
  2013-07-16 21:24                       ` [Qemu-devel] [PATCH] " Alex Bligh
@ 2013-07-17  3:02                         ` Stefan Hajnoczi
  2013-07-17  8:07                           ` Alex Bligh
  2013-07-18 18:48                           ` Alex Bligh
  0 siblings, 2 replies; 29+ messages in thread
From: Stefan Hajnoczi @ 2013-07-17  3:02 UTC (permalink / raw)
  To: Alex Bligh
  Cc: Kevin Wolf, Anthony Liguori, qemu-devel, Stefan Hajnoczi,
	Paolo Bonzini, rth

On Tue, Jul 16, 2013 at 10:24:38PM +0100, Alex Bligh wrote:
> --On 16 July 2013 18:55:15 +0200 Paolo Bonzini <pbonzini@redhat.com> wrote:
> 
> >>What do you think? In the end I thought the schedule_bh_at stuff
> >>was simpler.
> >
> >It is simpler, but I'm not sure it is the right API.  Of course, if
> >Kevin and Stefan says it is, I have no problem with that.
> 
> For the sake of having something to comment on, I just sent v3 of this
> patch to the list. This is basically a 'minimal change' version that
> fixes the issue with aio_poll (I think). It passes make check.

I would prefer to stick with QEMUTimer instead of introducing an
AioContext-specific concept that does something very similar.

This can be done by introducing a per-AioContext QEMUClock.  Legacy
QEMUTimers will not run during aio_poll() because they are associated
with vm_clock, host_clock, or rt_clock.  Only QEMUTimers associated with
this AioContext's aio_ctx_clock will run.

In other words, the main loop will run vm_clock, host_clock, and
rt_clock timers.  The AioContext will run its aio_ctx_clock timers.

A few notes about QEMUTimer and QEMUClock:

 * QEMUClock can be enabled/disabled.  Disabled clocks suppress timer
   expiration until re-enabled.

 * QEMUClock can use an arbitrary time source, which is used to present
   a virtual time based on the instruction counter when icount mode is
   active.

 * QEMUTimer is associated with a QEMUClock.  This allows timers that
   only expire when the vm_clock is enabled, for example.

 * Modifying a QEMUTimer uses qemu_notify_event() since it may be called
   from a vcpu thread while the iothread is blocked.

The steps to achieving this:

1. Drop alarm timers from qemu-timer.c and calculate g_poll() timeout
   instead for the main loop.

2. Introduce a per-AioContext aio_ctx_clock that can be used with
   qemu_new_timer() to create a QEMUTimer that expires during
   aio_poll().

3. Calculate g_poll() timeout for aio_ctx_clock in aio_poll().

Stefan

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [Qemu-devel] [PATCH] [RFC] aio/async: Add timed bottom-halves
  2013-07-16 16:55                     ` Paolo Bonzini
  2013-07-16 21:22                       ` [Qemu-devel] [PATCHv3] " Alex Bligh
  2013-07-16 21:24                       ` [Qemu-devel] [PATCH] " Alex Bligh
@ 2013-07-17  7:50                       ` Kevin Wolf
  2 siblings, 0 replies; 29+ messages in thread
From: Kevin Wolf @ 2013-07-17  7:50 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: Anthony Liguori, Stefan Hajnoczi, qemu-devel, Alex Bligh, rth

Am 16.07.2013 um 18:55 hat Paolo Bonzini geschrieben:
> >> But I wouldn't introduce a
> >> new one-off concept (almost as much of a hack as idle BHs), I would
> >> rather reuse as much code as possible from QEMUTimer/QEMUClock.  I must
> >> admit I don't have a clear idea of how the API would look like.
> > 
> > So the reason I was trying to avoid using QEMUTimer stuff was that
> > bh's get called from aio_poll and it was not evident that all timers
> > would be safe to call from aio_poll.
> 
> It wouldn't.
> 
> > What do you think? In the end I thought the schedule_bh_at stuff
> > was simpler.
> 
> It is simpler, but I'm not sure it is the right API.  Of course, if
> Kevin and Stefan says it is, I have no problem with that.

Well, the one thing I'm pretty sure we need is an additional interface,
so that existing timers continue to run only from the main loop, and new
timers / timed BHs / whatever have the option to run in nested event
loops as well.

I don't really care what the thing is called as long as it does what is
needed. Timed BHs seemed to do that, so I agreed.

Kevin

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [Qemu-devel] [PATCH] [RFC] aio/async: Add timed bottom-halves
  2013-07-17  3:02                         ` Stefan Hajnoczi
@ 2013-07-17  8:07                           ` Alex Bligh
  2013-07-17  8:11                             ` Paolo Bonzini
  2013-07-18 18:48                           ` Alex Bligh
  1 sibling, 1 reply; 29+ messages in thread
From: Alex Bligh @ 2013-07-17  8:07 UTC (permalink / raw)
  To: Stefan Hajnoczi
  Cc: Kevin Wolf, Anthony Liguori, Alex Bligh, qemu-devel,
	Stefan Hajnoczi, Paolo Bonzini, rth

Stefan,

--On 17 July 2013 11:02:30 +0800 Stefan Hajnoczi <stefanha@gmail.com> wrote:

> The steps to achieving this:
>
> 1. Drop alarm timers from qemu-timer.c and calculate g_poll() timeout
>    instead for the main loop.
>
> 2. Introduce a per-AioContext aio_ctx_clock that can be used with
>    qemu_new_timer() to create a QEMUTimer that expires during
>    aio_poll().
>
> 3. Calculate g_poll() timeout for aio_ctx_clock in aio_poll().

A couple of questions:

1. How would this work where the user has no main loop, e.g. qemu-img? A
   block driver may well still need timers.

2. Are we guaranteed that no aio_poll user can repeatedly call aio_poll
   without exiting to the main loop?

3. Is it safe to anything you can do in a bh in a timer? IE are users every
   going to need to schedule a bh from a timer? If so, this seems a bit
   long winded for users that want bh functionality.

The timed bh solution (which I'm by no means set on) can use any QEMUClock
in the bh, so you get to choose the clock per BH, not per AioContext,
which may or may not have advantages.

-- 
Alex Bligh

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [Qemu-devel] [PATCH] [RFC] aio/async: Add timed bottom-halves
  2013-07-17  8:07                           ` Alex Bligh
@ 2013-07-17  8:11                             ` Paolo Bonzini
  2013-07-17 16:09                               ` Alex Bligh
  0 siblings, 1 reply; 29+ messages in thread
From: Paolo Bonzini @ 2013-07-17  8:11 UTC (permalink / raw)
  To: Alex Bligh
  Cc: Kevin Wolf, Anthony Liguori, Stefan Hajnoczi, qemu-devel,
	Stefan Hajnoczi, rth

Il 17/07/2013 10:07, Alex Bligh ha scritto:
> Stefan,
> 
> --On 17 July 2013 11:02:30 +0800 Stefan Hajnoczi <stefanha@gmail.com>
> wrote:
> 
>> The steps to achieving this:
>>
>> 1. Drop alarm timers from qemu-timer.c and calculate g_poll() timeout
>>    instead for the main loop.
>>
>> 2. Introduce a per-AioContext aio_ctx_clock that can be used with
>>    qemu_new_timer() to create a QEMUTimer that expires during
>>    aio_poll().
>>
>> 3. Calculate g_poll() timeout for aio_ctx_clock in aio_poll().
> 
> A couple of questions:
> 
> 1. How would this work where the user has no main loop, e.g. qemu-img? A
>   block driver may well still need timers.

The block driver should only use aio_ctx_clock, and those _would_ be
handled in aio_poll().

> 3. Is it safe to anything you can do in a bh in a timer? IE are users every
>   going to need to schedule a bh from a timer? If so, this seems a bit
>   long winded for users that want bh functionality.

It is safe.

Paolo

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [Qemu-devel] [PATCH] [RFC] aio/async: Add timed bottom-halves
  2013-07-17  8:11                             ` Paolo Bonzini
@ 2013-07-17 16:09                               ` Alex Bligh
  0 siblings, 0 replies; 29+ messages in thread
From: Alex Bligh @ 2013-07-17 16:09 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: Kevin Wolf, Anthony Liguori, Alex Bligh, Stefan Hajnoczi,
	qemu-devel, Stefan Hajnoczi, rth

Paolo,

--On 17 July 2013 10:11:07 +0200 Paolo Bonzini <pbonzini@redhat.com> wrote:

>>> The steps to achieving this:
>>>
>>> 1. Drop alarm timers from qemu-timer.c and calculate g_poll() timeout
>>>    instead for the main loop.
>>>
>>> 2. Introduce a per-AioContext aio_ctx_clock that can be used with
>>>    qemu_new_timer() to create a QEMUTimer that expires during
>>>    aio_poll().
>>>
>>> 3. Calculate g_poll() timeout for aio_ctx_clock in aio_poll().
>>
>> A couple of questions:
>>
>> 1. How would this work where the user has no main loop, e.g. qemu-img? A
>>   block driver may well still need timers.
>
> The block driver should only use aio_ctx_clock, and those _would_ be
> handled in aio_poll().

OK, so modify aio_poll not to run qemu_run_all_timers but to run
qemu_run_timer on that particular clock.

We'd still need to modify some of the executables to call init_timers()
or whatever it is, but that's easy enough.

I might have a go at that.

-- 
Alex Bligh

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [Qemu-devel] [PATCH] [RFC] aio/async: Add timed bottom-halves
  2013-07-17  3:02                         ` Stefan Hajnoczi
  2013-07-17  8:07                           ` Alex Bligh
@ 2013-07-18 18:48                           ` Alex Bligh
  2013-07-19  1:58                             ` Stefan Hajnoczi
  1 sibling, 1 reply; 29+ messages in thread
From: Alex Bligh @ 2013-07-18 18:48 UTC (permalink / raw)
  To: Stefan Hajnoczi
  Cc: Kevin Wolf, Anthony Liguori, Alex Bligh, qemu-devel,
	Stefan Hajnoczi, Paolo Bonzini, rth

Stefan,

--On 17 July 2013 11:02:30 +0800 Stefan Hajnoczi <stefanha@gmail.com> wrote:

> The steps to achieving this:
>
> 1. Drop alarm timers from qemu-timer.c and calculate g_poll() timeout
>    instead for the main loop.
>
> 2. Introduce a per-AioContext aio_ctx_clock that can be used with
>    qemu_new_timer() to create a QEMUTimer that expires during
>    aio_poll().
>
> 3. Calculate g_poll() timeout for aio_ctx_clock in aio_poll().

I've pretty much written this.

Two issues:

1. I very much enjoyed deleting all the alarm timers code. However, it was
   doing something vaguely useful, which was calling qemu_notify_event
   when the timer expired. Under the new regime above, if the AioContext
   clock's timers expires within aio_poll, life is good as the timeout
   to g_poll will expire. However, this won't apply if any of the other
   three static clock's timers expire. Also, it won't work within the
   mainloop poll. Also, we lose the ability to respond to timers in a sub
   millisecond way.

   Options:

   a) restore alarm timers (at least for the time being). Make all
      alarm timers do qemu_notify_event. However, only run the AioContext
      clock's timers within aio_poll. This is the least intrusive change.

   b) calculate the timeout in aio_poll with respect to the minimum
      deadline across all clocks, not just AioContext's clock. Use the
      same logic in mainloop.

   I'd go for (b), except for the millisecond accuracy thing. So my
   temptation (sadly) is (a).

2. If we do delete alarm timers, I'll need to delete the -clock option.

WDYT?

-- 
Alex Bligh

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [Qemu-devel] [PATCH] [RFC] aio/async: Add timed bottom-halves
  2013-07-18 18:48                           ` Alex Bligh
@ 2013-07-19  1:58                             ` Stefan Hajnoczi
  2013-07-19  6:22                               ` Paolo Bonzini
  2013-07-19  6:38                               ` Alex Bligh
  0 siblings, 2 replies; 29+ messages in thread
From: Stefan Hajnoczi @ 2013-07-19  1:58 UTC (permalink / raw)
  To: Alex Bligh
  Cc: Kevin Wolf, Anthony Liguori, Stefan Hajnoczi, qemu-devel,
	Paolo Bonzini, rth

On Thu, Jul 18, 2013 at 07:48:28PM +0100, Alex Bligh wrote:
> Stefan,
> 
> --On 17 July 2013 11:02:30 +0800 Stefan Hajnoczi <stefanha@gmail.com> wrote:
> 
> >The steps to achieving this:
> >
> >1. Drop alarm timers from qemu-timer.c and calculate g_poll() timeout
> >   instead for the main loop.
> >
> >2. Introduce a per-AioContext aio_ctx_clock that can be used with
> >   qemu_new_timer() to create a QEMUTimer that expires during
> >   aio_poll().
> >
> >3. Calculate g_poll() timeout for aio_ctx_clock in aio_poll().
> 
> I've pretty much written this.
> 
> Two issues:
> 
> 1. I very much enjoyed deleting all the alarm timers code. However, it was
>   doing something vaguely useful, which was calling qemu_notify_event
>   when the timer expired. Under the new regime above, if the AioContext
>   clock's timers expires within aio_poll, life is good as the timeout
>   to g_poll will expire. However, this won't apply if any of the other
>   three static clock's timers expire. Also, it won't work within the
>   mainloop poll. Also, we lose the ability to respond to timers in a sub
>   millisecond way.
> 
>   Options:
> 
>   a) restore alarm timers (at least for the time being). Make all
>      alarm timers do qemu_notify_event. However, only run the AioContext
>      clock's timers within aio_poll. This is the least intrusive change.
> 
>   b) calculate the timeout in aio_poll with respect to the minimum
>      deadline across all clocks, not just AioContext's clock. Use the
>      same logic in mainloop.
> 
>   I'd go for (b), except for the millisecond accuracy thing. So my
>   temptation (sadly) is (a).

I think this is a non-issue because host_alarm_handler() can only be
called from the main loop:

main-loop.c:qemu_signal_init() sets up signalfd to monitor SIGALRM.
Therefore we do not asynchronously invoke the SIGALRM signals handler.
It is only invoked from main-loop.c:sigfd_handler() when the main loop
runs.

That's how I read the code.  I haven't checked a running process to be
sure.

> 2. If we do delete alarm timers, I'll need to delete the -clock option.

I noticed this too because I think we should stub it out for
compatibility.  Accept existing options but ignore them, update
documentation to state that they are kept for compatibility.

Stefan

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [Qemu-devel] [PATCH] [RFC] aio/async: Add timed bottom-halves
  2013-07-19  1:58                             ` Stefan Hajnoczi
@ 2013-07-19  6:22                               ` Paolo Bonzini
  2013-07-19  6:38                               ` Alex Bligh
  1 sibling, 0 replies; 29+ messages in thread
From: Paolo Bonzini @ 2013-07-19  6:22 UTC (permalink / raw)
  To: Stefan Hajnoczi
  Cc: Kevin Wolf, Anthony Liguori, Stefan Hajnoczi, qemu-devel,
	Alex Bligh, rth

Il 19/07/2013 03:58, Stefan Hajnoczi ha scritto:
>>   Options:
>>
>>   a) restore alarm timers (at least for the time being). Make all
>>      alarm timers do qemu_notify_event. However, only run the AioContext
>>      clock's timers within aio_poll. This is the least intrusive change.
>>
>>   b) calculate the timeout in aio_poll with respect to the minimum
>>      deadline across all clocks, not just AioContext's clock. Use the
>>      same logic in mainloop.
>>
>>   I'd go for (b), except for the millisecond accuracy thing. So my
>>   temptation (sadly) is (a).
> 
> I think this is a non-issue because host_alarm_handler() can only be
> called from the main loop:
> 
> main-loop.c:qemu_signal_init() sets up signalfd to monitor SIGALRM.
> Therefore we do not asynchronously invoke the SIGALRM signals handler.
> It is only invoked from main-loop.c:sigfd_handler() when the main loop
> runs.
> 
> That's how I read the code.  I haven't checked a running process to be
> sure.

I think you're right.  It was not strictly needed even with alarm
timers, because host_alarm_handler() is called always before
qemu_run_all_timers.  But it made the code more robust.

With your change to delete alarm timers and move the deadline to poll,
the next poll call will have a timeout of 0 and all will be well.

As to millisecond accuracy, as discussed earlier we can use ppoll on
Linux.  This of course should be introduced before alarm timers are
deleted, to avoid breaking bisection.

Paolo

>> 2. If we do delete alarm timers, I'll need to delete the -clock option.
> 
> I noticed this too because I think we should stub it out for
> compatibility.  Accept existing options but ignore them, update
> documentation to state that they are kept for compatibility.
> 
> Stefan
> 

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [Qemu-devel] [PATCH] [RFC] aio/async: Add timed bottom-halves
  2013-07-19  1:58                             ` Stefan Hajnoczi
  2013-07-19  6:22                               ` Paolo Bonzini
@ 2013-07-19  6:38                               ` Alex Bligh
  2013-07-19  6:51                                 ` Paolo Bonzini
  1 sibling, 1 reply; 29+ messages in thread
From: Alex Bligh @ 2013-07-19  6:38 UTC (permalink / raw)
  To: Stefan Hajnoczi
  Cc: Kevin Wolf, Anthony Liguori, Stefan Hajnoczi, qemu-devel,
	Alex Bligh, Paolo Bonzini, rth

Stefan,

--On 19 July 2013 09:58:50 +0800 Stefan Hajnoczi <stefanha@redhat.com> 
wrote:

>>   Options:
>>
>>   a) restore alarm timers (at least for the time being). Make all
>>      alarm timers do qemu_notify_event. However, only run the AioContext
>>      clock's timers within aio_poll. This is the least intrusive change.
>>
>>   b) calculate the timeout in aio_poll with respect to the minimum
>>      deadline across all clocks, not just AioContext's clock. Use the
>>      same logic in mainloop.
>>
>>   I'd go for (b), except for the millisecond accuracy thing. So my
>>   temptation (sadly) is (a).
>
> I think this is a non-issue because host_alarm_handler() can only be
> called from the main loop:
>
> main-loop.c:qemu_signal_init() sets up signalfd to monitor SIGALRM.
> Therefore we do not asynchronously invoke the SIGALRM signals handler.
> It is only invoked from main-loop.c:sigfd_handler() when the main loop
> runs.
>
> That's how I read the code.  I haven't checked a running process to be
> sure.

Not sure that's the case on win32, but OK let's go with that.

However, I still don't quite see how the poll in the mainloop is
meant to exit when a timer expires. There's now no qemu_notify_event,
and no SIGALRM, and the timeout will still be infinite (unless I
calculate the timeout as the minimum across all clocks, in which
case I might as well do (b) above).

-- 
Alex Bligh

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [Qemu-devel] [PATCH] [RFC] aio/async: Add timed bottom-halves
  2013-07-19  6:38                               ` Alex Bligh
@ 2013-07-19  6:51                                 ` Paolo Bonzini
  2013-07-19 17:26                                   ` [Qemu-devel] [PATCH] [RFC] aio/timers: Drop alarm timers; introduce QEMUClock to AioContext; run timers in aio_poll Alex Bligh
  0 siblings, 1 reply; 29+ messages in thread
From: Paolo Bonzini @ 2013-07-19  6:51 UTC (permalink / raw)
  To: Alex Bligh
  Cc: Kevin Wolf, Anthony Liguori, Stefan Hajnoczi, qemu-devel,
	Stefan Hajnoczi, rth

Il 19/07/2013 08:38, Alex Bligh ha scritto:
> 
> However, I still don't quite see how the poll in the mainloop is
> meant to exit when a timer expires. There's now no qemu_notify_event,
> and no SIGALRM, and the timeout will still be infinite (unless I
> calculate the timeout as the minimum across all clocks, in which
> case I might as well do (b) above).

Yes, that was the idea.

Paolo

^ permalink raw reply	[flat|nested] 29+ messages in thread

* [Qemu-devel] [PATCH] [RFC] aio/timers: Drop alarm timers; introduce QEMUClock to AioContext; run timers in aio_poll
  2013-07-19  6:51                                 ` Paolo Bonzini
@ 2013-07-19 17:26                                   ` Alex Bligh
  2013-07-25  9:00                                     ` Stefan Hajnoczi
  2013-07-25  9:02                                     ` Stefan Hajnoczi
  0 siblings, 2 replies; 29+ messages in thread
From: Alex Bligh @ 2013-07-19 17:26 UTC (permalink / raw)
  To: qemu-devel
  Cc: Kevin Wolf, Anthony Liguori, Alex Bligh, Stefan Hajnoczi,
	Paolo Bonzini, rth

[ This is a patch for RFC purposes only. It is compile tested on Linux x86_64 only
and passes make check (or rather did before make check started dying in the
boot order test - different bug). I'd like to know whether I'm going in
the right direction ]

We no longer need alarm timers to trigger QEMUTimer as we'll be polling
them in aio_poll.

Remove static declaration from qemu_new_clock and introduce qemu_free_clock.

Maintain a list of QEMUClocks.

Introduce qemu_clock_deadline_ns and qemu_clock_deadine_all_ns which calculate how
long aio_poll etc. should wait, plus (for the time being) a conversion to milliseconds.

Make qemu_run_timers return a bool to indicate progress.

Add QEMUClock to AioContext.

Run timers attached to clock in aio_poll

Signed-off-by: Alex Bligh <alex@alex.org.uk>
---
 aio-posix.c          |   16 +-
 aio-win32.c          |   20 +-
 async.c              |    2 +
 include/block/aio.h  |    5 +
 include/qemu/timer.h |   15 +-
 main-loop.c          |    9 +-
 qemu-timer.c         |  599 ++++++++------------------------------------------
 vl.c                 |    5 +-
 8 files changed, 150 insertions(+), 521 deletions(-)

diff --git a/aio-posix.c b/aio-posix.c
index b68eccd..6401259 100644
--- a/aio-posix.c
+++ b/aio-posix.c
@@ -173,6 +173,7 @@ bool aio_poll(AioContext *ctx, bool blocking)
 {
     AioHandler *node;
     int ret;
+    int timeout;
     bool busy, progress;
 
     progress = false;
@@ -195,6 +196,9 @@ bool aio_poll(AioContext *ctx, bool blocking)
         return true;
     }
 
+    /* Run our timers */
+    progress |= qemu_run_timers(ctx->clock);
+
     ctx->walking_handlers++;
 
     g_array_set_size(ctx->pollfds, 0);
@@ -232,9 +236,10 @@ bool aio_poll(AioContext *ctx, bool blocking)
     }
 
     /* wait until next event */
+    timeout = qemu_timeout_ns_to_ms(qemu_clock_deadline_all_ns());
     ret = g_poll((GPollFD *)ctx->pollfds->data,
                  ctx->pollfds->len,
-                 blocking ? -1 : 0);
+                 blocking ? timeout : 0);
 
     /* if we have any readable fds, dispatch event */
     if (ret > 0) {
@@ -250,6 +255,15 @@ bool aio_poll(AioContext *ctx, bool blocking)
         }
     }
 
+    if (blocking) {
+        /* Run the timers a second time. We do this because otherwise aio_wait
+         * will not note progress - and will stop a drain early - if we have
+         * a timer that was not ready to run entering g_poll but is ready
+         * after g_poll. This will only do anything if a timer has expired.
+         */
+        progress |= qemu_run_timers(ctx->clock);
+    }
+
     assert(progress || busy);
     return true;
 }
diff --git a/aio-win32.c b/aio-win32.c
index 38723bf..68343ba 100644
--- a/aio-win32.c
+++ b/aio-win32.c
@@ -98,6 +98,7 @@ bool aio_poll(AioContext *ctx, bool blocking)
     HANDLE events[MAXIMUM_WAIT_OBJECTS + 1];
     bool busy, progress;
     int count;
+    int timeout;
 
     progress = false;
 
@@ -111,6 +112,9 @@ bool aio_poll(AioContext *ctx, bool blocking)
         progress = true;
     }
 
+    /* Run timers */
+    progress |= qemu_run_timers(ctx->clock);
+
     /*
      * Then dispatch any pending callbacks from the GSource.
      *
@@ -174,8 +178,11 @@ bool aio_poll(AioContext *ctx, bool blocking)
 
     /* wait until next event */
     while (count > 0) {
-        int timeout = blocking ? INFINITE : 0;
-        int ret = WaitForMultipleObjects(count, events, FALSE, timeout);
+        int ret;
+
+        timeout = blocking ?
+            qemu_timeout_ns_to_ms(qemu_clock_deadline_all_ns()) : 0;
+        ret = WaitForMultipleObjects(count, events, FALSE, timeout);
 
         /* if we have any signaled events, dispatch event */
         if ((DWORD) (ret - WAIT_OBJECT_0) >= count) {
@@ -214,6 +221,15 @@ bool aio_poll(AioContext *ctx, bool blocking)
         events[ret - WAIT_OBJECT_0] = events[--count];
     }
 
+    if (blocking) {
+        /* Run the timers a second time. We do this because otherwise aio_wait
+         * will not note progress - and will stop a drain early - if we have
+         * a timer that was not ready to run entering g_poll but is ready
+         * after g_poll. This will only do anything if a timer has expired.
+         */
+        progress |= qemu_run_timers(ctx->clock);
+    }
+
     assert(progress || busy);
     return true;
 }
diff --git a/async.c b/async.c
index 90fe906..0d41431 100644
--- a/async.c
+++ b/async.c
@@ -177,6 +177,7 @@ aio_ctx_finalize(GSource     *source)
     aio_set_event_notifier(ctx, &ctx->notifier, NULL, NULL);
     event_notifier_cleanup(&ctx->notifier);
     g_array_free(ctx->pollfds, TRUE);
+    qemu_free_clock(ctx->clock);
 }
 
 static GSourceFuncs aio_source_funcs = {
@@ -215,6 +216,7 @@ AioContext *aio_context_new(void)
     aio_set_event_notifier(ctx, &ctx->notifier, 
                            (EventNotifierHandler *)
                            event_notifier_test_and_clear, NULL);
+    ctx->clock = qemu_new_clock(QEMU_CLOCK_REALTIME);
 
     return ctx;
 }
diff --git a/include/block/aio.h b/include/block/aio.h
index 1836793..0835a4d 100644
--- a/include/block/aio.h
+++ b/include/block/aio.h
@@ -41,6 +41,8 @@ typedef struct AioHandler AioHandler;
 typedef void QEMUBHFunc(void *opaque);
 typedef void IOHandler(void *opaque);
 
+typedef struct QEMUClock QEMUClock;
+
 typedef struct AioContext {
     GSource source;
 
@@ -69,6 +71,9 @@ typedef struct AioContext {
 
     /* Thread pool for performing work and receiving completion callbacks */
     struct ThreadPool *thread_pool;
+
+    /* Clock for calling timers */
+    QEMUClock *clock;
 } AioContext;
 
 /* Returns 1 if there are still outstanding AIO requests; 0 otherwise */
diff --git a/include/qemu/timer.h b/include/qemu/timer.h
index 9dd206c..0649064 100644
--- a/include/qemu/timer.h
+++ b/include/qemu/timer.h
@@ -11,6 +11,10 @@
 #define SCALE_US 1000
 #define SCALE_NS 1
 
+#define QEMU_CLOCK_REALTIME 0
+#define QEMU_CLOCK_VIRTUAL  1
+#define QEMU_CLOCK_HOST     2
+
 typedef struct QEMUClock QEMUClock;
 typedef void QEMUTimerCB(void *opaque);
 
@@ -32,10 +36,15 @@ extern QEMUClock *vm_clock;
    the virtual clock. */
 extern QEMUClock *host_clock;
 
+QEMUClock *qemu_new_clock(int type);
+void qemu_free_clock(QEMUClock *clock);
 int64_t qemu_get_clock_ns(QEMUClock *clock);
 int64_t qemu_clock_has_timers(QEMUClock *clock);
 int64_t qemu_clock_expired(QEMUClock *clock);
 int64_t qemu_clock_deadline(QEMUClock *clock);
+int64_t qemu_clock_deadline_ns(QEMUClock *clock);
+int64_t qemu_clock_deadline_all_ns(void);
+int qemu_timeout_ns_to_ms(int64_t ns);
 void qemu_clock_enable(QEMUClock *clock, bool enabled);
 void qemu_clock_warp(QEMUClock *clock);
 
@@ -53,11 +62,9 @@ bool qemu_timer_pending(QEMUTimer *ts);
 bool qemu_timer_expired(QEMUTimer *timer_head, int64_t current_time);
 uint64_t qemu_timer_expire_time_ns(QEMUTimer *ts);
 
-void qemu_run_timers(QEMUClock *clock);
-void qemu_run_all_timers(void);
-void configure_alarms(char const *opt);
+bool qemu_run_timers(QEMUClock *clock);
+bool qemu_run_all_timers(void);
 void init_clocks(void);
-int init_timer_alarm(void);
 
 int64_t cpu_get_ticks(void);
 void cpu_enable_ticks(void);
diff --git a/main-loop.c b/main-loop.c
index a44fff6..626eda2 100644
--- a/main-loop.c
+++ b/main-loop.c
@@ -131,10 +131,6 @@ int qemu_init_main_loop(void)
     GSource *src;
 
     init_clocks();
-    if (init_timer_alarm() < 0) {
-        fprintf(stderr, "could not initialize alarm timer\n");
-        exit(1);
-    }
 
     ret = qemu_signal_init();
     if (ret) {
@@ -449,6 +445,11 @@ int main_loop_wait(int nonblocking)
 {
     int ret;
     uint32_t timeout = UINT32_MAX;
+    int32_t timer_timeout = qemu_timeout_ns_to_ms(qemu_clock_deadline_all_ns());
+
+    if (timer_timeout >= 0) {
+        timeout = timer_timeout;
+    }
 
     if (nonblocking) {
         timeout = 0;
diff --git a/qemu-timer.c b/qemu-timer.c
index b2d95e2..e643d94 100644
--- a/qemu-timer.c
+++ b/qemu-timer.c
@@ -33,17 +33,9 @@
 #include <pthread.h>
 #endif
 
-#ifdef _WIN32
-#include <mmsystem.h>
-#endif
-
 /***********************************************************/
 /* timers */
 
-#define QEMU_CLOCK_REALTIME 0
-#define QEMU_CLOCK_VIRTUAL  1
-#define QEMU_CLOCK_HOST     2
-
 struct QEMUClock {
     QEMUTimer *active_timers;
 
@@ -52,6 +44,8 @@ struct QEMUClock {
 
     int type;
     bool enabled;
+
+    QLIST_ENTRY(QEMUClock) list;
 };
 
 struct QEMUTimer {
@@ -63,175 +57,19 @@ struct QEMUTimer {
     int scale;
 };
 
-struct qemu_alarm_timer {
-    char const *name;
-    int (*start)(struct qemu_alarm_timer *t);
-    void (*stop)(struct qemu_alarm_timer *t);
-    void (*rearm)(struct qemu_alarm_timer *t, int64_t nearest_delta_ns);
-#if defined(__linux__)
-    timer_t timer;
-    int fd;
-#elif defined(_WIN32)
-    HANDLE timer;
-#endif
-    bool expired;
-    bool pending;
-};
-
-static struct qemu_alarm_timer *alarm_timer;
+static QLIST_HEAD(, QEMUClock) qemu_clocks =
+    QLIST_HEAD_INITIALIZER(qemu_clocks);
 
 static bool qemu_timer_expired_ns(QEMUTimer *timer_head, int64_t current_time)
 {
     return timer_head && (timer_head->expire_time <= current_time);
 }
 
-static int64_t qemu_next_alarm_deadline(void)
-{
-    int64_t delta = INT64_MAX;
-    int64_t rtdelta;
-
-    if (!use_icount && vm_clock->enabled && vm_clock->active_timers) {
-        delta = vm_clock->active_timers->expire_time -
-                     qemu_get_clock_ns(vm_clock);
-    }
-    if (host_clock->enabled && host_clock->active_timers) {
-        int64_t hdelta = host_clock->active_timers->expire_time -
-                 qemu_get_clock_ns(host_clock);
-        if (hdelta < delta) {
-            delta = hdelta;
-        }
-    }
-    if (rt_clock->enabled && rt_clock->active_timers) {
-        rtdelta = (rt_clock->active_timers->expire_time -
-                 qemu_get_clock_ns(rt_clock));
-        if (rtdelta < delta) {
-            delta = rtdelta;
-        }
-    }
-
-    return delta;
-}
-
-static void qemu_rearm_alarm_timer(struct qemu_alarm_timer *t)
-{
-    int64_t nearest_delta_ns = qemu_next_alarm_deadline();
-    if (nearest_delta_ns < INT64_MAX) {
-        t->rearm(t, nearest_delta_ns);
-    }
-}
-
-/* TODO: MIN_TIMER_REARM_NS should be optimized */
-#define MIN_TIMER_REARM_NS 250000
-
-#ifdef _WIN32
-
-static int mm_start_timer(struct qemu_alarm_timer *t);
-static void mm_stop_timer(struct qemu_alarm_timer *t);
-static void mm_rearm_timer(struct qemu_alarm_timer *t, int64_t delta);
-
-static int win32_start_timer(struct qemu_alarm_timer *t);
-static void win32_stop_timer(struct qemu_alarm_timer *t);
-static void win32_rearm_timer(struct qemu_alarm_timer *t, int64_t delta);
-
-#else
-
-static int unix_start_timer(struct qemu_alarm_timer *t);
-static void unix_stop_timer(struct qemu_alarm_timer *t);
-static void unix_rearm_timer(struct qemu_alarm_timer *t, int64_t delta);
-
-#ifdef __linux__
-
-static int dynticks_start_timer(struct qemu_alarm_timer *t);
-static void dynticks_stop_timer(struct qemu_alarm_timer *t);
-static void dynticks_rearm_timer(struct qemu_alarm_timer *t, int64_t delta);
-
-#endif /* __linux__ */
-
-#endif /* _WIN32 */
-
-static struct qemu_alarm_timer alarm_timers[] = {
-#ifndef _WIN32
-#ifdef __linux__
-    {"dynticks", dynticks_start_timer,
-     dynticks_stop_timer, dynticks_rearm_timer},
-#endif
-    {"unix", unix_start_timer, unix_stop_timer, unix_rearm_timer},
-#else
-    {"mmtimer", mm_start_timer, mm_stop_timer, mm_rearm_timer},
-    {"dynticks", win32_start_timer, win32_stop_timer, win32_rearm_timer},
-#endif
-    {NULL, }
-};
-
-static void show_available_alarms(void)
-{
-    int i;
-
-    printf("Available alarm timers, in order of precedence:\n");
-    for (i = 0; alarm_timers[i].name; i++)
-        printf("%s\n", alarm_timers[i].name);
-}
-
-void configure_alarms(char const *opt)
-{
-    int i;
-    int cur = 0;
-    int count = ARRAY_SIZE(alarm_timers) - 1;
-    char *arg;
-    char *name;
-    struct qemu_alarm_timer tmp;
-
-    if (is_help_option(opt)) {
-        show_available_alarms();
-        exit(0);
-    }
-
-    arg = g_strdup(opt);
-
-    /* Reorder the array */
-    name = strtok(arg, ",");
-    while (name) {
-        for (i = 0; i < count && alarm_timers[i].name; i++) {
-            if (!strcmp(alarm_timers[i].name, name))
-                break;
-        }
-
-        if (i == count) {
-            fprintf(stderr, "Unknown clock %s\n", name);
-            goto next;
-        }
-
-        if (i < cur)
-            /* Ignore */
-            goto next;
-
-	/* Swap */
-        tmp = alarm_timers[i];
-        alarm_timers[i] = alarm_timers[cur];
-        alarm_timers[cur] = tmp;
-
-        cur++;
-next:
-        name = strtok(NULL, ",");
-    }
-
-    g_free(arg);
-
-    if (cur) {
-        /* Disable remaining timers */
-        for (i = cur; i < count; i++)
-            alarm_timers[i].name = NULL;
-    } else {
-        show_available_alarms();
-        exit(1);
-    }
-}
-
 QEMUClock *rt_clock;
 QEMUClock *vm_clock;
 QEMUClock *host_clock;
 
-static QEMUClock *qemu_new_clock(int type)
+QEMUClock *qemu_new_clock(int type)
 {
     QEMUClock *clock;
 
@@ -240,16 +78,19 @@ static QEMUClock *qemu_new_clock(int type)
     clock->enabled = true;
     clock->last = INT64_MIN;
     notifier_list_init(&clock->reset_notifiers);
+    QLIST_INSERT_HEAD(&qemu_clocks, clock, list);
     return clock;
 }
 
+void qemu_free_clock(QEMUClock *clock)
+{
+    QLIST_REMOVE(clock, list);
+    g_free(clock);
+}
+
 void qemu_clock_enable(QEMUClock *clock, bool enabled)
 {
-    bool old = clock->enabled;
     clock->enabled = enabled;
-    if (enabled && !old) {
-        qemu_rearm_alarm_timer(alarm_timer);
-    }
 }
 
 int64_t qemu_clock_has_timers(QEMUClock *clock)
@@ -268,7 +109,7 @@ int64_t qemu_clock_deadline(QEMUClock *clock)
     /* To avoid problems with overflow limit this to 2^32.  */
     int64_t delta = INT32_MAX;
 
-    if (clock->active_timers) {
+    if (clock->enabled && clock->active_timers) {
         delta = clock->active_timers->expire_time - qemu_get_clock_ns(clock);
     }
     if (delta < 0) {
@@ -277,6 +118,71 @@ int64_t qemu_clock_deadline(QEMUClock *clock)
     return delta;
 }
 
+/*
+ * As above, but return -1 for no deadline, and do not cap to 2^32
+ * as we know the result is always positive.
+ */
+
+int64_t qemu_clock_deadline_ns(QEMUClock *clock)
+{
+    int64_t delta;
+
+    if (!clock->enabled || !clock->active_timers) {
+        return -1;
+    }
+
+    delta = clock->active_timers->expire_time - qemu_get_clock_ns(clock);
+
+    if (delta <= 0) {
+        return 0;
+    }
+
+    return delta;
+}
+
+/* Return the minimum deadline across all clocks or -1 for no
+ * deadline
+ */
+int64_t qemu_clock_deadline_all_ns(void)
+{
+    QEMUClock *clock;
+    int64_t ret = -1;
+    QLIST_FOREACH(clock, &qemu_clocks, list) {
+        int64_t ns = qemu_clock_deadline_ns(clock);
+        if ((ns >= 0) && ((ret == -1) || (ns < ret))) {
+            ret = ns;
+        }
+    }
+    return ret;
+}
+
+/* Transition function to convert a nanosecond timeout to ms
+ * This will be deleted when we switch to ppoll
+ */
+int qemu_timeout_ns_to_ms(int64_t ns)
+{
+    int64_t ms;
+    if (ns < 0) {
+        return -1;
+    }
+
+    if (!ns) {
+        return 0;
+    }
+
+    /* Always round up, because it's better to wait too long than to wait too
+     * little and effectively busy-wait
+     */
+    ms = (ns + SCALE_MS - 1) / SCALE_MS;
+
+    /* To avoid overflow problems, limit this to 2^31, i.e. approx 25 days */
+    if (ms > (int64_t) INT32_MAX) {
+        ms = INT32_MAX;
+    }
+
+    return (int) ms;
+}
+
 QEMUTimer *qemu_new_timer(QEMUClock *clock, int scale,
                           QEMUTimerCB *cb, void *opaque)
 {
@@ -340,13 +246,10 @@ void qemu_mod_timer_ns(QEMUTimer *ts, int64_t expire_time)
 
     /* Rearm if necessary  */
     if (pt == &ts->clock->active_timers) {
-        if (!alarm_timer->pending) {
-            qemu_rearm_alarm_timer(alarm_timer);
-        }
         /* Interrupt execution to force deadline recalculation.  */
         qemu_clock_warp(ts->clock);
         if (use_icount) {
-            qemu_notify_event();
+            qemu_notify_event(); /* FIXME: do we need this now? */
         }
     }
 }
@@ -372,13 +275,14 @@ bool qemu_timer_expired(QEMUTimer *timer_head, int64_t current_time)
     return qemu_timer_expired_ns(timer_head, current_time * timer_head->scale);
 }
 
-void qemu_run_timers(QEMUClock *clock)
+bool qemu_run_timers(QEMUClock *clock)
 {
     QEMUTimer *ts;
     int64_t current_time;
+    bool progress = false;
    
     if (!clock->enabled)
-        return;
+        return progress;
 
     current_time = qemu_get_clock_ns(clock);
     for(;;) {
@@ -392,7 +296,9 @@ void qemu_run_timers(QEMUClock *clock)
 
         /* run the callback (the timer list can be modified) */
         ts->cb(ts->opaque);
+        progress = true;
     }
+    return progress;
 }
 
 int64_t qemu_get_clock_ns(QEMUClock *clock)
@@ -444,337 +350,12 @@ uint64_t qemu_timer_expire_time_ns(QEMUTimer *ts)
     return qemu_timer_pending(ts) ? ts->expire_time : -1;
 }
 
-void qemu_run_all_timers(void)
+bool qemu_run_all_timers(void)
 {
-    alarm_timer->pending = false;
-
     /* vm time timers */
-    qemu_run_timers(vm_clock);
-    qemu_run_timers(rt_clock);
-    qemu_run_timers(host_clock);
-
-    /* rearm timer, if not periodic */
-    if (alarm_timer->expired) {
-        alarm_timer->expired = false;
-        qemu_rearm_alarm_timer(alarm_timer);
-    }
-}
-
-#ifdef _WIN32
-static void CALLBACK host_alarm_handler(PVOID lpParam, BOOLEAN unused)
-#else
-static void host_alarm_handler(int host_signum)
-#endif
-{
-    struct qemu_alarm_timer *t = alarm_timer;
-    if (!t)
-	return;
-
-    t->expired = true;
-    t->pending = true;
-    qemu_notify_event();
-}
-
-#if defined(__linux__)
-
-#include "qemu/compatfd.h"
-
-static int dynticks_start_timer(struct qemu_alarm_timer *t)
-{
-    struct sigevent ev;
-    timer_t host_timer;
-    struct sigaction act;
-
-    sigfillset(&act.sa_mask);
-    act.sa_flags = 0;
-    act.sa_handler = host_alarm_handler;
-
-    sigaction(SIGALRM, &act, NULL);
-
-    /* 
-     * Initialize ev struct to 0 to avoid valgrind complaining
-     * about uninitialized data in timer_create call
-     */
-    memset(&ev, 0, sizeof(ev));
-    ev.sigev_value.sival_int = 0;
-    ev.sigev_notify = SIGEV_SIGNAL;
-#ifdef CONFIG_SIGEV_THREAD_ID
-    if (qemu_signalfd_available()) {
-        ev.sigev_notify = SIGEV_THREAD_ID;
-        ev._sigev_un._tid = qemu_get_thread_id();
-    }
-#endif /* CONFIG_SIGEV_THREAD_ID */
-    ev.sigev_signo = SIGALRM;
-
-    if (timer_create(CLOCK_REALTIME, &ev, &host_timer)) {
-        perror("timer_create");
-        return -1;
-    }
-
-    t->timer = host_timer;
-
-    return 0;
-}
-
-static void dynticks_stop_timer(struct qemu_alarm_timer *t)
-{
-    timer_t host_timer = t->timer;
-
-    timer_delete(host_timer);
-}
-
-static void dynticks_rearm_timer(struct qemu_alarm_timer *t,
-                                 int64_t nearest_delta_ns)
-{
-    timer_t host_timer = t->timer;
-    struct itimerspec timeout;
-    int64_t current_ns;
-
-    if (nearest_delta_ns < MIN_TIMER_REARM_NS)
-        nearest_delta_ns = MIN_TIMER_REARM_NS;
-
-    /* check whether a timer is already running */
-    if (timer_gettime(host_timer, &timeout)) {
-        perror("gettime");
-        fprintf(stderr, "Internal timer error: aborting\n");
-        exit(1);
-    }
-    current_ns = timeout.it_value.tv_sec * 1000000000LL + timeout.it_value.tv_nsec;
-    if (current_ns && current_ns <= nearest_delta_ns)
-        return;
-
-    timeout.it_interval.tv_sec = 0;
-    timeout.it_interval.tv_nsec = 0; /* 0 for one-shot timer */
-    timeout.it_value.tv_sec =  nearest_delta_ns / 1000000000;
-    timeout.it_value.tv_nsec = nearest_delta_ns % 1000000000;
-    if (timer_settime(host_timer, 0 /* RELATIVE */, &timeout, NULL)) {
-        perror("settime");
-        fprintf(stderr, "Internal timer error: aborting\n");
-        exit(1);
-    }
-}
-
-#endif /* defined(__linux__) */
-
-#if !defined(_WIN32)
-
-static int unix_start_timer(struct qemu_alarm_timer *t)
-{
-    struct sigaction act;
-
-    /* timer signal */
-    sigfillset(&act.sa_mask);
-    act.sa_flags = 0;
-    act.sa_handler = host_alarm_handler;
-
-    sigaction(SIGALRM, &act, NULL);
-    return 0;
-}
-
-static void unix_rearm_timer(struct qemu_alarm_timer *t,
-                             int64_t nearest_delta_ns)
-{
-    struct itimerval itv;
-    int err;
-
-    if (nearest_delta_ns < MIN_TIMER_REARM_NS)
-        nearest_delta_ns = MIN_TIMER_REARM_NS;
-
-    itv.it_interval.tv_sec = 0;
-    itv.it_interval.tv_usec = 0; /* 0 for one-shot timer */
-    itv.it_value.tv_sec =  nearest_delta_ns / 1000000000;
-    itv.it_value.tv_usec = (nearest_delta_ns % 1000000000) / 1000;
-    err = setitimer(ITIMER_REAL, &itv, NULL);
-    if (err) {
-        perror("setitimer");
-        fprintf(stderr, "Internal timer error: aborting\n");
-        exit(1);
-    }
-}
-
-static void unix_stop_timer(struct qemu_alarm_timer *t)
-{
-    struct itimerval itv;
-
-    memset(&itv, 0, sizeof(itv));
-    setitimer(ITIMER_REAL, &itv, NULL);
-}
-
-#endif /* !defined(_WIN32) */
-
-
-#ifdef _WIN32
-
-static MMRESULT mm_timer;
-static TIMECAPS mm_tc;
-
-static void CALLBACK mm_alarm_handler(UINT uTimerID, UINT uMsg,
-                                      DWORD_PTR dwUser, DWORD_PTR dw1,
-                                      DWORD_PTR dw2)
-{
-    struct qemu_alarm_timer *t = alarm_timer;
-    if (!t) {
-        return;
-    }
-    t->expired = true;
-    t->pending = true;
-    qemu_notify_event();
-}
-
-static int mm_start_timer(struct qemu_alarm_timer *t)
-{
-    timeGetDevCaps(&mm_tc, sizeof(mm_tc));
-    return 0;
-}
-
-static void mm_stop_timer(struct qemu_alarm_timer *t)
-{
-    if (mm_timer) {
-        timeKillEvent(mm_timer);
-    }
+    bool progress = false;
+    progress |= qemu_run_timers(vm_clock);
+    progress |= qemu_run_timers(rt_clock);
+    progress |= qemu_run_timers(host_clock);
+    return progress;
 }
-
-static void mm_rearm_timer(struct qemu_alarm_timer *t, int64_t delta)
-{
-    int64_t nearest_delta_ms = delta / 1000000;
-    if (nearest_delta_ms < mm_tc.wPeriodMin) {
-        nearest_delta_ms = mm_tc.wPeriodMin;
-    } else if (nearest_delta_ms > mm_tc.wPeriodMax) {
-        nearest_delta_ms = mm_tc.wPeriodMax;
-    }
-
-    if (mm_timer) {
-        timeKillEvent(mm_timer);
-    }
-    mm_timer = timeSetEvent((UINT)nearest_delta_ms,
-                            mm_tc.wPeriodMin,
-                            mm_alarm_handler,
-                            (DWORD_PTR)t,
-                            TIME_ONESHOT | TIME_CALLBACK_FUNCTION);
-
-    if (!mm_timer) {
-        fprintf(stderr, "Failed to re-arm win32 alarm timer\n");
-        timeEndPeriod(mm_tc.wPeriodMin);
-        exit(1);
-    }
-}
-
-static int win32_start_timer(struct qemu_alarm_timer *t)
-{
-    HANDLE hTimer;
-    BOOLEAN success;
-
-    /* If you call ChangeTimerQueueTimer on a one-shot timer (its period
-       is zero) that has already expired, the timer is not updated.  Since
-       creating a new timer is relatively expensive, set a bogus one-hour
-       interval in the dynticks case.  */
-    success = CreateTimerQueueTimer(&hTimer,
-                          NULL,
-                          host_alarm_handler,
-                          t,
-                          1,
-                          3600000,
-                          WT_EXECUTEINTIMERTHREAD);
-
-    if (!success) {
-        fprintf(stderr, "Failed to initialize win32 alarm timer: %ld\n",
-                GetLastError());
-        return -1;
-    }
-
-    t->timer = hTimer;
-    return 0;
-}
-
-static void win32_stop_timer(struct qemu_alarm_timer *t)
-{
-    HANDLE hTimer = t->timer;
-
-    if (hTimer) {
-        DeleteTimerQueueTimer(NULL, hTimer, NULL);
-    }
-}
-
-static void win32_rearm_timer(struct qemu_alarm_timer *t,
-                              int64_t nearest_delta_ns)
-{
-    HANDLE hTimer = t->timer;
-    int64_t nearest_delta_ms;
-    BOOLEAN success;
-
-    nearest_delta_ms = nearest_delta_ns / 1000000;
-    if (nearest_delta_ms < 1) {
-        nearest_delta_ms = 1;
-    }
-    /* ULONG_MAX can be 32 bit */
-    if (nearest_delta_ms > ULONG_MAX) {
-        nearest_delta_ms = ULONG_MAX;
-    }
-    success = ChangeTimerQueueTimer(NULL,
-                                    hTimer,
-                                    (unsigned long) nearest_delta_ms,
-                                    3600000);
-
-    if (!success) {
-        fprintf(stderr, "Failed to rearm win32 alarm timer: %ld\n",
-                GetLastError());
-        exit(-1);
-    }
-
-}
-
-#endif /* _WIN32 */
-
-static void quit_timers(void)
-{
-    struct qemu_alarm_timer *t = alarm_timer;
-    alarm_timer = NULL;
-    t->stop(t);
-}
-
-#ifdef CONFIG_POSIX
-static void reinit_timers(void)
-{
-    struct qemu_alarm_timer *t = alarm_timer;
-    t->stop(t);
-    if (t->start(t)) {
-        fprintf(stderr, "Internal timer error: aborting\n");
-        exit(1);
-    }
-    qemu_rearm_alarm_timer(t);
-}
-#endif /* CONFIG_POSIX */
-
-int init_timer_alarm(void)
-{
-    struct qemu_alarm_timer *t = NULL;
-    int i, err = -1;
-
-    if (alarm_timer) {
-        return 0;
-    }
-
-    for (i = 0; alarm_timers[i].name; i++) {
-        t = &alarm_timers[i];
-
-        err = t->start(t);
-        if (!err)
-            break;
-    }
-
-    if (err) {
-        err = -ENOENT;
-        goto fail;
-    }
-
-    atexit(quit_timers);
-#ifdef CONFIG_POSIX
-    pthread_atfork(NULL, NULL, reinit_timers);
-#endif
-    alarm_timer = t;
-    return 0;
-
-fail:
-    return err;
-}
-
diff --git a/vl.c b/vl.c
index 25b8f2f..612c609 100644
--- a/vl.c
+++ b/vl.c
@@ -3714,7 +3714,10 @@ int main(int argc, char **argv, char **envp)
                 old_param = 1;
                 break;
             case QEMU_OPTION_clock:
-                configure_alarms(optarg);
+                /* Once upon a time we did:
+                 *   configure_alarms(optarg);
+                 * here. This is stubbed out for compatibility.
+                 */
                 break;
             case QEMU_OPTION_startdate:
                 configure_rtc_date_offset(optarg, 1);
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* Re: [Qemu-devel] [PATCH] [RFC] aio/timers: Drop alarm timers; introduce QEMUClock to AioContext; run timers in aio_poll
  2013-07-19 17:26                                   ` [Qemu-devel] [PATCH] [RFC] aio/timers: Drop alarm timers; introduce QEMUClock to AioContext; run timers in aio_poll Alex Bligh
@ 2013-07-25  9:00                                     ` Stefan Hajnoczi
  2013-07-25  9:02                                     ` Stefan Hajnoczi
  1 sibling, 0 replies; 29+ messages in thread
From: Stefan Hajnoczi @ 2013-07-25  9:00 UTC (permalink / raw)
  To: Alex Bligh
  Cc: Kevin Wolf, Anthony Liguori, qemu-devel, Stefan Hajnoczi,
	Paolo Bonzini, rth

On Fri, Jul 19, 2013 at 06:26:23PM +0100, Alex Bligh wrote:
> [ This is a patch for RFC purposes only. It is compile tested on Linux x86_64 only
> and passes make check (or rather did before make check started dying in the
> boot order test - different bug). I'd like to know whether I'm going in
> the right direction ]

Looks promising.

> We no longer need alarm timers to trigger QEMUTimer as we'll be polling
> them in aio_poll.
> 
> Remove static declaration from qemu_new_clock and introduce qemu_free_clock.
> 
> Maintain a list of QEMUClocks.
> 
> Introduce qemu_clock_deadline_ns and qemu_clock_deadine_all_ns which calculate how
> long aio_poll etc. should wait, plus (for the time being) a conversion to milliseconds.
> 
> Make qemu_run_timers return a bool to indicate progress.
> 
> Add QEMUClock to AioContext.
> 
> Run timers attached to clock in aio_poll

Too many logical changes for a single patch :).  Please split this into
a series.

> @@ -52,6 +44,8 @@ struct QEMUClock {
>  
>      int type;
>      bool enabled;
> +
> +    QLIST_ENTRY(QEMUClock) list;

Please avoid global state.  AioContext should be usable from multiple
threads, this list would require synchronization.

main-loop.c should call functions to run timers and calculate the
nearest deadline on vm_clock/host_clock/rt_clock.  It knows about them,
they are defined in <qemu/timer.h>.

That way no list is needed.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [Qemu-devel] [PATCH] [RFC] aio/timers: Drop alarm timers; introduce QEMUClock to AioContext; run timers in aio_poll
  2013-07-19 17:26                                   ` [Qemu-devel] [PATCH] [RFC] aio/timers: Drop alarm timers; introduce QEMUClock to AioContext; run timers in aio_poll Alex Bligh
  2013-07-25  9:00                                     ` Stefan Hajnoczi
@ 2013-07-25  9:02                                     ` Stefan Hajnoczi
  1 sibling, 0 replies; 29+ messages in thread
From: Stefan Hajnoczi @ 2013-07-25  9:02 UTC (permalink / raw)
  To: Alex Bligh
  Cc: Kevin Wolf, Anthony Liguori, qemu-devel, Stefan Hajnoczi,
	Paolo Bonzini, rth

On Fri, Jul 19, 2013 at 06:26:23PM +0100, Alex Bligh wrote:
> [ This is a patch for RFC purposes only. It is compile tested on Linux x86_64 only
> and passes make check (or rather did before make check started dying in the
> boot order test - different bug). I'd like to know whether I'm going in
> the right direction ]
> 
> We no longer need alarm timers to trigger QEMUTimer as we'll be polling
> them in aio_poll.
> 
> Remove static declaration from qemu_new_clock and introduce qemu_free_clock.
> 
> Maintain a list of QEMUClocks.
> 
> Introduce qemu_clock_deadline_ns and qemu_clock_deadine_all_ns which calculate how
> long aio_poll etc. should wait, plus (for the time being) a conversion to milliseconds.
> 
> Make qemu_run_timers return a bool to indicate progress.
> 
> Add QEMUClock to AioContext.
> 
> Run timers attached to clock in aio_poll
> 
> Signed-off-by: Alex Bligh <alex@alex.org.uk>
> ---
>  aio-posix.c          |   16 +-
>  aio-win32.c          |   20 +-
>  async.c              |    2 +
>  include/block/aio.h  |    5 +
>  include/qemu/timer.h |   15 +-
>  main-loop.c          |    9 +-
>  qemu-timer.c         |  599 ++++++++------------------------------------------
>  vl.c                 |    5 +-
>  8 files changed, 150 insertions(+), 521 deletions(-)

I think you've already split this into a series.  Will review that, just
catching up on your emails :).

Stefan

^ permalink raw reply	[flat|nested] 29+ messages in thread

end of thread, other threads:[~2013-07-25  9:02 UTC | newest]

Thread overview: 29+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-07-06 16:24 [Qemu-devel] [PATCH] [RFC] aio/async: Add timed bottom-halves Alex Bligh
2013-07-06 16:31 ` Alex Bligh
2013-07-06 18:04   ` [Qemu-devel] [PATCHv2] " Alex Bligh
2013-07-15 14:25 ` [Qemu-devel] [PATCH] " Paolo Bonzini
2013-07-15 20:15   ` Alex Bligh
2013-07-15 20:53     ` Paolo Bonzini
2013-07-15 23:04       ` Alex Bligh
2013-07-16  6:16         ` Paolo Bonzini
2013-07-16  7:30           ` Alex Bligh
2013-07-16  7:34             ` Paolo Bonzini
2013-07-16 15:29               ` Alex Bligh
2013-07-16 15:43                 ` Paolo Bonzini
2013-07-16 16:14                   ` Alex Bligh
2013-07-16 16:55                     ` Paolo Bonzini
2013-07-16 21:22                       ` [Qemu-devel] [PATCHv3] " Alex Bligh
2013-07-16 21:24                       ` [Qemu-devel] [PATCH] " Alex Bligh
2013-07-17  3:02                         ` Stefan Hajnoczi
2013-07-17  8:07                           ` Alex Bligh
2013-07-17  8:11                             ` Paolo Bonzini
2013-07-17 16:09                               ` Alex Bligh
2013-07-18 18:48                           ` Alex Bligh
2013-07-19  1:58                             ` Stefan Hajnoczi
2013-07-19  6:22                               ` Paolo Bonzini
2013-07-19  6:38                               ` Alex Bligh
2013-07-19  6:51                                 ` Paolo Bonzini
2013-07-19 17:26                                   ` [Qemu-devel] [PATCH] [RFC] aio/timers: Drop alarm timers; introduce QEMUClock to AioContext; run timers in aio_poll Alex Bligh
2013-07-25  9:00                                     ` Stefan Hajnoczi
2013-07-25  9:02                                     ` Stefan Hajnoczi
2013-07-17  7:50                       ` [Qemu-devel] [PATCH] [RFC] aio/async: Add timed bottom-halves Kevin Wolf

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.