All of lore.kernel.org
 help / color / mirror / Atom feed
From: Stefan Hajnoczi <stefanha@redhat.com>
To: qemu-devel@nongnu.org
Cc: "Kevin Wolf" <kwolf@redhat.com>,
	"Peter Maydell" <peter.maydell@linaro.org>,
	"Thomas Huth" <thuth@redhat.com>, "Sergio Lopez" <slp@redhat.com>,
	"Eduardo Habkost" <ehabkost@redhat.com>,
	qemu-block@nongnu.org, "Michael S. Tsirkin" <mst@redhat.com>,
	"Laurent Vivier" <lvivier@redhat.com>,
	"Max Reitz" <mreitz@redhat.com>,
	"Alexander Bulekov" <alxndr@bu.edu>,
	"Bandan Das" <bsd@redhat.com>,
	"Stefan Hajnoczi" <stefanha@redhat.com>,
	"Marc-André Lureau" <marcandre.lureau@redhat.com>,
	"Paolo Bonzini" <pbonzini@redhat.com>,
	"Fam Zheng" <fam@euphon.net>,
	"Richard Henderson" <rth@twiddle.net>
Subject: [PULL 09/31] aio-posix: make AioHandler dispatch O(1) with epoll
Date: Sat, 22 Feb 2020 08:50:08 +0000	[thread overview]
Message-ID: <20200222085030.1760640-10-stefanha@redhat.com> (raw)
In-Reply-To: <20200222085030.1760640-1-stefanha@redhat.com>

File descriptor monitoring is O(1) with epoll(7), but
aio_dispatch_handlers() still scans all AioHandlers instead of
dispatching just those that are ready.  This makes aio_poll() O(n) with
respect to the total number of registered handlers.

Add a local ready_list to aio_poll() so that each nested aio_poll()
builds a list of handlers ready to be dispatched.  Since file descriptor
polling is level-triggered, nested aio_poll() calls also see fds that
were ready in the parent but not yet dispatched.  This guarantees that
nested aio_poll() invocations will dispatch all fds, even those that
became ready before the nested invocation.

Since only handlers ready to be dispatched are placed onto the
ready_list, the new aio_dispatch_ready_handlers() function provides O(1)
dispatch.

Note that AioContext polling is still O(n) and currently cannot be fully
disabled.  This still needs to be fixed before aio_poll() is fully O(1).

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Sergio Lopez <slp@redhat.com>
Message-id: 20200214171712.541358-6-stefanha@redhat.com
[Fix compilation error on macOS where there is no epoll(87).  The
aio_epoll() prototype was out of date and aio_add_ready_list() needed to
be moved outside the ifdef.
--Stefan]
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
 util/aio-posix.c | 110 +++++++++++++++++++++++++++++++++--------------
 1 file changed, 78 insertions(+), 32 deletions(-)

diff --git a/util/aio-posix.c b/util/aio-posix.c
index b5cfdbd2f6..9e1befc0c0 100644
--- a/util/aio-posix.c
+++ b/util/aio-posix.c
@@ -35,9 +35,20 @@ struct AioHandler
     void *opaque;
     bool is_external;
     QLIST_ENTRY(AioHandler) node;
+    QLIST_ENTRY(AioHandler) node_ready; /* only used during aio_poll() */
     QLIST_ENTRY(AioHandler) node_deleted;
 };
 
+/* Add a handler to a ready list */
+static void add_ready_handler(AioHandlerList *ready_list,
+                              AioHandler *node,
+                              int revents)
+{
+    QLIST_SAFE_REMOVE(node, node_ready); /* remove from nested parent's list */
+    node->pfd.revents = revents;
+    QLIST_INSERT_HEAD(ready_list, node, node_ready);
+}
+
 #ifdef CONFIG_EPOLL_CREATE1
 
 /* The fd number threshold to switch to epoll */
@@ -105,7 +116,8 @@ static void aio_epoll_update(AioContext *ctx, AioHandler *node, bool is_new)
     }
 }
 
-static int aio_epoll(AioContext *ctx, int64_t timeout)
+static int aio_epoll(AioContext *ctx, AioHandlerList *ready_list,
+                     int64_t timeout)
 {
     GPollFD pfd = {
         .fd = ctx->epollfd,
@@ -130,11 +142,13 @@ static int aio_epoll(AioContext *ctx, int64_t timeout)
         }
         for (i = 0; i < ret; i++) {
             int ev = events[i].events;
+            int revents = (ev & EPOLLIN ? G_IO_IN : 0) |
+                          (ev & EPOLLOUT ? G_IO_OUT : 0) |
+                          (ev & EPOLLHUP ? G_IO_HUP : 0) |
+                          (ev & EPOLLERR ? G_IO_ERR : 0);
+
             node = events[i].data.ptr;
-            node->pfd.revents = (ev & EPOLLIN ? G_IO_IN : 0) |
-                (ev & EPOLLOUT ? G_IO_OUT : 0) |
-                (ev & EPOLLHUP ? G_IO_HUP : 0) |
-                (ev & EPOLLERR ? G_IO_ERR : 0);
+            add_ready_handler(ready_list, node, revents);
         }
     }
 out:
@@ -172,8 +186,8 @@ static void aio_epoll_update(AioContext *ctx, AioHandler *node, bool is_new)
 {
 }
 
-static int aio_epoll(AioContext *ctx, GPollFD *pfds,
-                     unsigned npfd, int64_t timeout)
+static int aio_epoll(AioContext *ctx, AioHandlerList *ready_list,
+                     int64_t timeout)
 {
     assert(false);
 }
@@ -438,36 +452,63 @@ static void aio_free_deleted_handlers(AioContext *ctx)
     qemu_lockcnt_inc_and_unlock(&ctx->list_lock);
 }
 
-static bool aio_dispatch_handlers(AioContext *ctx)
+static bool aio_dispatch_handler(AioContext *ctx, AioHandler *node)
 {
-    AioHandler *node, *tmp;
     bool progress = false;
+    int revents;
 
-    QLIST_FOREACH_SAFE_RCU(node, &ctx->aio_handlers, node, tmp) {
-        int revents;
+    revents = node->pfd.revents & node->pfd.events;
+    node->pfd.revents = 0;
 
-        revents = node->pfd.revents & node->pfd.events;
-        node->pfd.revents = 0;
+    if (!QLIST_IS_INSERTED(node, node_deleted) &&
+        (revents & (G_IO_IN | G_IO_HUP | G_IO_ERR)) &&
+        aio_node_check(ctx, node->is_external) &&
+        node->io_read) {
+        node->io_read(node->opaque);
 
-        if (!QLIST_IS_INSERTED(node, node_deleted) &&
-            (revents & (G_IO_IN | G_IO_HUP | G_IO_ERR)) &&
-            aio_node_check(ctx, node->is_external) &&
-            node->io_read) {
-            node->io_read(node->opaque);
-
-            /* aio_notify() does not count as progress */
-            if (node->opaque != &ctx->notifier) {
-                progress = true;
-            }
-        }
-        if (!QLIST_IS_INSERTED(node, node_deleted) &&
-            (revents & (G_IO_OUT | G_IO_ERR)) &&
-            aio_node_check(ctx, node->is_external) &&
-            node->io_write) {
-            node->io_write(node->opaque);
+        /* aio_notify() does not count as progress */
+        if (node->opaque != &ctx->notifier) {
             progress = true;
         }
     }
+    if (!QLIST_IS_INSERTED(node, node_deleted) &&
+        (revents & (G_IO_OUT | G_IO_ERR)) &&
+        aio_node_check(ctx, node->is_external) &&
+        node->io_write) {
+        node->io_write(node->opaque);
+        progress = true;
+    }
+
+    return progress;
+}
+
+/*
+ * If we have a list of ready handlers then this is more efficient than
+ * scanning all handlers with aio_dispatch_handlers().
+ */
+static bool aio_dispatch_ready_handlers(AioContext *ctx,
+                                        AioHandlerList *ready_list)
+{
+    bool progress = false;
+    AioHandler *node;
+
+    while ((node = QLIST_FIRST(ready_list))) {
+        QLIST_SAFE_REMOVE(node, node_ready);
+        progress = aio_dispatch_handler(ctx, node) || progress;
+    }
+
+    return progress;
+}
+
+/* Slower than aio_dispatch_ready_handlers() but only used via glib */
+static bool aio_dispatch_handlers(AioContext *ctx)
+{
+    AioHandler *node, *tmp;
+    bool progress = false;
+
+    QLIST_FOREACH_SAFE_RCU(node, &ctx->aio_handlers, node, tmp) {
+        progress = aio_dispatch_handler(ctx, node) || progress;
+    }
 
     return progress;
 }
@@ -639,6 +680,7 @@ static bool try_poll_mode(AioContext *ctx, int64_t *timeout)
 
 bool aio_poll(AioContext *ctx, bool blocking)
 {
+    AioHandlerList ready_list = QLIST_HEAD_INITIALIZER(ready_list);
     AioHandler *node;
     int i;
     int ret = 0;
@@ -689,7 +731,7 @@ bool aio_poll(AioContext *ctx, bool blocking)
         /* wait until next event */
         if (aio_epoll_check_poll(ctx, pollfds, npfd, timeout)) {
             npfd = 0; /* pollfds[] is not being used */
-            ret = aio_epoll(ctx, timeout);
+            ret = aio_epoll(ctx, &ready_list, timeout);
         } else  {
             ret = qemu_poll_ns(pollfds, npfd, timeout);
         }
@@ -744,7 +786,11 @@ bool aio_poll(AioContext *ctx, bool blocking)
     /* if we have any readable fds, dispatch event */
     if (ret > 0) {
         for (i = 0; i < npfd; i++) {
-            nodes[i]->pfd.revents = pollfds[i].revents;
+            int revents = pollfds[i].revents;
+
+            if (revents) {
+                add_ready_handler(&ready_list, nodes[i], revents);
+            }
         }
     }
 
@@ -753,7 +799,7 @@ bool aio_poll(AioContext *ctx, bool blocking)
     progress |= aio_bh_poll(ctx);
 
     if (ret > 0) {
-        progress |= aio_dispatch_handlers(ctx);
+        progress |= aio_dispatch_ready_handlers(ctx, &ready_list);
     }
 
     aio_free_deleted_handlers(ctx);
-- 
2.24.1


  parent reply	other threads:[~2020-02-22  9:00 UTC|newest]

Thread overview: 39+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-02-22  8:49 [PULL 00/31] Block patches Stefan Hajnoczi
2020-02-22  8:50 ` [PULL 01/31] virtio: increase virtqueue size for virtio-scsi and virtio-blk Stefan Hajnoczi
2020-02-22  8:50 ` [PULL 02/31] aio-posix: avoid reacquiring rcu_read_lock() when polling Stefan Hajnoczi
2020-02-22  8:50 ` [PULL 03/31] rcu_queue: add QSLIST functions Stefan Hajnoczi
2020-02-22  8:50 ` [PULL 04/31] util/async: make bh_aio_poll() O(1) Stefan Hajnoczi
2020-03-16 16:42   ` Marc-André Lureau
2020-02-22  8:50 ` [PULL 05/31] aio-posix: fix use after leaving scope in aio_poll() Stefan Hajnoczi
2020-02-22  8:50 ` [PULL 06/31] aio-posix: don't pass ns timeout to epoll_wait() Stefan Hajnoczi
2020-02-22  8:50 ` [PULL 07/31] qemu/queue.h: add QLIST_SAFE_REMOVE() Stefan Hajnoczi
2020-02-22  8:50 ` [PULL 08/31] aio-posix: make AioHandler deletion O(1) Stefan Hajnoczi
2020-02-22  8:50 ` Stefan Hajnoczi [this message]
2020-02-22  8:50 ` [PULL 10/31] softmmu: move vl.c to softmmu/ Stefan Hajnoczi
2020-02-22  8:50 ` [PULL 11/31] softmmu: split off vl.c:main() into main.c Stefan Hajnoczi
2020-02-22  8:50 ` [PULL 12/31] module: check module wasn't already initialized Stefan Hajnoczi
2020-02-22  8:50 ` [PULL 13/31] fuzz: add FUZZ_TARGET module type Stefan Hajnoczi
2020-02-22  8:50 ` [PULL 14/31] qtest: add qtest_server_send abstraction Stefan Hajnoczi
2020-02-22  8:50 ` [PULL 15/31] libqtest: add a layer of abstraction to send/recv Stefan Hajnoczi
2020-02-22  8:50 ` [PULL 16/31] libqtest: make bufwrite rely on the TransportOps Stefan Hajnoczi
2020-02-22  8:50 ` [PULL 17/31] qtest: add in-process incoming command handler Stefan Hajnoczi
2020-02-22  8:50 ` [PULL 18/31] libqos: rename i2c_send and i2c_recv Stefan Hajnoczi
2020-02-22  8:50 ` [PULL 19/31] libqos: split qos-test and libqos makefile vars Stefan Hajnoczi
2020-02-22  8:50 ` [PULL 20/31] libqos: move useful qos-test funcs to qos_external Stefan Hajnoczi
2020-02-22  8:50 ` [PULL 21/31] fuzz: add fuzzer skeleton Stefan Hajnoczi
2020-02-22  8:50 ` [PULL 22/31] exec: keep ram block across fork when using qtest Stefan Hajnoczi
2020-02-22  8:50 ` [PULL 23/31] main: keep rcu_atfork callback enabled for qtest Stefan Hajnoczi
2020-02-22  8:50 ` [PULL 24/31] fuzz: support for fork-based fuzzing Stefan Hajnoczi
2020-02-22 11:34   ` Eric Blake
2020-02-24 11:35     ` Stefan Hajnoczi
2020-02-27  2:50       ` Alexander Bulekov
2020-02-22  8:50 ` [PULL 25/31] fuzz: add support for qos-assisted fuzz targets Stefan Hajnoczi
2020-02-22  8:50 ` [PULL 26/31] fuzz: add target/fuzz makefile rules Stefan Hajnoczi
2020-02-22  8:50 ` [PULL 27/31] fuzz: add configure flag --enable-fuzzing Stefan Hajnoczi
2020-02-22  8:50 ` [PULL 28/31] fuzz: add i440fx fuzz targets Stefan Hajnoczi
2020-02-22  8:50 ` [PULL 29/31] fuzz: add virtio-net fuzz target Stefan Hajnoczi
2020-02-22  8:50 ` [PULL 30/31] fuzz: add virtio-scsi " Stefan Hajnoczi
2020-02-22  8:50 ` [PULL 31/31] fuzz: add documentation to docs/devel/ Stefan Hajnoczi
2020-02-22  9:13 ` [PULL 00/31] Block patches no-reply
2020-02-24 11:33   ` Stefan Hajnoczi
2020-02-24 12:47 ` Peter Maydell

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200222085030.1760640-10-stefanha@redhat.com \
    --to=stefanha@redhat.com \
    --cc=alxndr@bu.edu \
    --cc=bsd@redhat.com \
    --cc=ehabkost@redhat.com \
    --cc=fam@euphon.net \
    --cc=kwolf@redhat.com \
    --cc=lvivier@redhat.com \
    --cc=marcandre.lureau@redhat.com \
    --cc=mreitz@redhat.com \
    --cc=mst@redhat.com \
    --cc=pbonzini@redhat.com \
    --cc=peter.maydell@linaro.org \
    --cc=qemu-block@nongnu.org \
    --cc=qemu-devel@nongnu.org \
    --cc=rth@twiddle.net \
    --cc=slp@redhat.com \
    --cc=thuth@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.