All of lore.kernel.org
 help / color / mirror / Atom feed
From: Stefan Hajnoczi <stefanha@redhat.com>
To: qemu-devel@nongnu.org
Cc: Fam Zheng <fam@euphon.net>,
	Peter Maydell <peter.maydell@linaro.org>,
	qemu-block@nongnu.org, Max Reitz <mreitz@redhat.com>,
	Stefan Hajnoczi <stefanha@redhat.com>,
	Paolo Bonzini <pbonzini@redhat.com>,
	Kevin Wolf <kwolf@redhat.com>
Subject: [PULL 8/9] aio-posix: support userspace polling of fd monitoring
Date: Wed, 11 Mar 2020 12:40:44 +0000	[thread overview]
Message-ID: <20200311124045.277969-9-stefanha@redhat.com> (raw)
In-Reply-To: <20200311124045.277969-1-stefanha@redhat.com>

Unlike ppoll(2) and epoll(7), Linux io_uring completions can be polled
from userspace.  Previously userspace polling was only allowed when all
AioHandler's had an ->io_poll() callback.  This prevented starvation of
fds by userspace pollable handlers.

Add the FDMonOps->need_wait() callback that enables userspace polling
even when some AioHandlers lack ->io_poll().

For example, it's now possible to do userspace polling when a TCP/IP
socket is monitored thanks to Linux io_uring.

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Link: https://lore.kernel.org/r/20200305170806.1313245-7-stefanha@redhat.com
Message-Id: <20200305170806.1313245-7-stefanha@redhat.com>
---
 include/block/aio.h   | 19 +++++++++++++++++++
 util/aio-posix.c      | 11 ++++++++---
 util/fdmon-epoll.c    |  1 +
 util/fdmon-io_uring.c |  6 ++++++
 util/fdmon-poll.c     |  1 +
 5 files changed, 35 insertions(+), 3 deletions(-)

diff --git a/include/block/aio.h b/include/block/aio.h
index 83fc9b844d..f07ebb76b8 100644
--- a/include/block/aio.h
+++ b/include/block/aio.h
@@ -55,6 +55,9 @@ struct ThreadPool;
 struct LinuxAioState;
 struct LuringState;
 
+/* Is polling disabled? */
+bool aio_poll_disabled(AioContext *ctx);
+
 /* Callbacks for file descriptor monitoring implementations */
 typedef struct {
     /*
@@ -84,6 +87,22 @@ typedef struct {
      * Returns: number of ready file descriptors.
      */
     int (*wait)(AioContext *ctx, AioHandlerList *ready_list, int64_t timeout);
+
+    /*
+     * need_wait:
+     * @ctx: the AioContext
+     *
+     * Tell aio_poll() when to stop userspace polling early because ->wait()
+     * has fds ready.
+     *
+     * File descriptor monitoring implementations that cannot poll fd readiness
+     * from userspace should use aio_poll_disabled() here.  This ensures that
+     * file descriptors are not starved by handlers that frequently make
+     * progress via userspace polling.
+     *
+     * Returns: true if ->wait() should be called, false otherwise.
+     */
+    bool (*need_wait)(AioContext *ctx);
 } FDMonOps;
 
 /*
diff --git a/util/aio-posix.c b/util/aio-posix.c
index ffd9cc381b..759989b45b 100644
--- a/util/aio-posix.c
+++ b/util/aio-posix.c
@@ -22,6 +22,11 @@
 #include "trace.h"
 #include "aio-posix.h"
 
+bool aio_poll_disabled(AioContext *ctx)
+{
+    return atomic_read(&ctx->poll_disable_cnt);
+}
+
 void aio_add_ready_handler(AioHandlerList *ready_list,
                            AioHandler *node,
                            int revents)
@@ -423,7 +428,7 @@ static bool run_poll_handlers(AioContext *ctx, int64_t max_ns, int64_t *timeout)
         elapsed_time = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - start_time;
         max_ns = qemu_soonest_timeout(*timeout, max_ns);
         assert(!(max_ns && progress));
-    } while (elapsed_time < max_ns && !atomic_read(&ctx->poll_disable_cnt));
+    } while (elapsed_time < max_ns && !ctx->fdmon_ops->need_wait(ctx));
 
     /* If time has passed with no successful polling, adjust *timeout to
      * keep the same ending time.
@@ -451,7 +456,7 @@ static bool try_poll_mode(AioContext *ctx, int64_t *timeout)
 {
     int64_t max_ns = qemu_soonest_timeout(*timeout, ctx->poll_ns);
 
-    if (max_ns && !atomic_read(&ctx->poll_disable_cnt)) {
+    if (max_ns && !ctx->fdmon_ops->need_wait(ctx)) {
         poll_set_started(ctx, true);
 
         if (run_poll_handlers(ctx, max_ns, timeout)) {
@@ -501,7 +506,7 @@ bool aio_poll(AioContext *ctx, bool blocking)
     /* If polling is allowed, non-blocking aio_poll does not need the
      * system call---a single round of run_poll_handlers_once suffices.
      */
-    if (timeout || atomic_read(&ctx->poll_disable_cnt)) {
+    if (timeout || ctx->fdmon_ops->need_wait(ctx)) {
         ret = ctx->fdmon_ops->wait(ctx, &ready_list, timeout);
     }
 
diff --git a/util/fdmon-epoll.c b/util/fdmon-epoll.c
index d56b69468b..fcd989d47d 100644
--- a/util/fdmon-epoll.c
+++ b/util/fdmon-epoll.c
@@ -100,6 +100,7 @@ out:
 static const FDMonOps fdmon_epoll_ops = {
     .update = fdmon_epoll_update,
     .wait = fdmon_epoll_wait,
+    .need_wait = aio_poll_disabled,
 };
 
 static bool fdmon_epoll_try_enable(AioContext *ctx)
diff --git a/util/fdmon-io_uring.c b/util/fdmon-io_uring.c
index fb99b4b61e..893b79b622 100644
--- a/util/fdmon-io_uring.c
+++ b/util/fdmon-io_uring.c
@@ -288,9 +288,15 @@ static int fdmon_io_uring_wait(AioContext *ctx, AioHandlerList *ready_list,
     return process_cq_ring(ctx, ready_list);
 }
 
+static bool fdmon_io_uring_need_wait(AioContext *ctx)
+{
+    return io_uring_cq_ready(&ctx->fdmon_io_uring);
+}
+
 static const FDMonOps fdmon_io_uring_ops = {
     .update = fdmon_io_uring_update,
     .wait = fdmon_io_uring_wait,
+    .need_wait = fdmon_io_uring_need_wait,
 };
 
 bool fdmon_io_uring_setup(AioContext *ctx)
diff --git a/util/fdmon-poll.c b/util/fdmon-poll.c
index 28114a0f39..488067b679 100644
--- a/util/fdmon-poll.c
+++ b/util/fdmon-poll.c
@@ -103,4 +103,5 @@ static void fdmon_poll_update(AioContext *ctx,
 const FDMonOps fdmon_poll_ops = {
     .update = fdmon_poll_update,
     .wait = fdmon_poll_wait,
+    .need_wait = aio_poll_disabled,
 };
-- 
2.24.1


  parent reply	other threads:[~2020-03-11 12:46 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-03-11 12:40 [PULL 0/9] Block patches Stefan Hajnoczi
2020-03-11 12:40 ` [PULL 1/9] qemu/queue.h: clear linked list pointers on remove Stefan Hajnoczi
2020-03-11 12:40 ` [PULL 2/9] aio-posix: remove confusing QLIST_SAFE_REMOVE() Stefan Hajnoczi
2020-03-11 12:40 ` [PULL 3/9] aio-posix: completely stop polling when disabled Stefan Hajnoczi
2020-03-11 12:40 ` [PULL 4/9] aio-posix: move RCU_READ_LOCK() into run_poll_handlers() Stefan Hajnoczi
2020-03-11 12:40 ` [PULL 5/9] aio-posix: extract ppoll(2) and epoll(7) fd monitoring Stefan Hajnoczi
2020-03-11 12:40 ` [PULL 6/9] aio-posix: simplify FDMonOps->update() prototype Stefan Hajnoczi
2020-03-11 12:40 ` [PULL 7/9] aio-posix: add io_uring fd monitoring implementation Stefan Hajnoczi
2020-03-11 12:40 ` Stefan Hajnoczi [this message]
2020-03-11 12:40 ` [PULL 9/9] aio-posix: remove idle poll handlers to improve scalability Stefan Hajnoczi
2020-03-11 13:50 ` [PULL 0/9] Block patches no-reply
2020-03-11 16:54   ` Stefan Hajnoczi
2020-03-11 13:51 ` no-reply
2020-03-11 16:55   ` Stefan Hajnoczi
2020-03-11 17:06 ` Peter Maydell

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200311124045.277969-9-stefanha@redhat.com \
    --to=stefanha@redhat.com \
    --cc=fam@euphon.net \
    --cc=kwolf@redhat.com \
    --cc=mreitz@redhat.com \
    --cc=pbonzini@redhat.com \
    --cc=peter.maydell@linaro.org \
    --cc=qemu-block@nongnu.org \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.