All of lore.kernel.org
 help / color / mirror / Atom feed
From: Kevin Wolf <kwolf@redhat.com>
To: qemu-block@nongnu.org
Cc: kwolf@redhat.com, qemu-devel@nongnu.org
Subject: [PULL 16/27] virtio: use defer_call() in virtio_irqfd_notify()
Date: Tue, 31 Oct 2023 19:59:07 +0100	[thread overview]
Message-ID: <20231031185918.346940-17-kwolf@redhat.com> (raw)
In-Reply-To: <20231031185918.346940-1-kwolf@redhat.com>

From: Stefan Hajnoczi <stefanha@redhat.com>

virtio-blk and virtio-scsi invoke virtio_irqfd_notify() to send Used
Buffer Notifications from an IOThread. This involves an eventfd
write(2) syscall. Calling this repeatedly when completing multiple I/O
requests in a row is wasteful.

Use the defer_call() API to batch together virtio_irqfd_notify() calls
made during thread pool (aio=threads), Linux AIO (aio=native), and
io_uring (aio=io_uring) completion processing.

Behavior is unchanged for emulated devices that do not use
defer_call_begin()/defer_call_end() since defer_call() immediately
invokes the callback when called outside a
defer_call_begin()/defer_call_end() region.

fio rw=randread bs=4k iodepth=64 numjobs=8 IOPS increases by ~9% with a
single IOThread and 8 vCPUs. iodepth=1 decreases by ~1% but this could
be noise. Detailed performance data and configuration specifics are
available here:
https://gitlab.com/stefanha/virt-playbooks/-/tree/blk_io_plug-irqfd

This duplicates the BH that virtio-blk uses for batching. The next
commit will remove it.

Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-ID: <20230913200045.1024233-4-stefanha@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block/io_uring.c       |  6 ++++++
 block/linux-aio.c      |  4 ++++
 hw/virtio/virtio.c     | 13 ++++++++++++-
 util/thread-pool.c     |  5 +++++
 hw/virtio/trace-events |  1 +
 5 files changed, 28 insertions(+), 1 deletion(-)

diff --git a/block/io_uring.c b/block/io_uring.c
index 3a1e1f45b3..7cdd00e9f1 100644
--- a/block/io_uring.c
+++ b/block/io_uring.c
@@ -125,6 +125,9 @@ static void luring_process_completions(LuringState *s)
 {
     struct io_uring_cqe *cqes;
     int total_bytes;
+
+    defer_call_begin();
+
     /*
      * Request completion callbacks can run the nested event loop.
      * Schedule ourselves so the nested event loop will "see" remaining
@@ -217,7 +220,10 @@ end:
             aio_co_wake(luringcb->co);
         }
     }
+
     qemu_bh_cancel(s->completion_bh);
+
+    defer_call_end();
 }
 
 static int ioq_submit(LuringState *s)
diff --git a/block/linux-aio.c b/block/linux-aio.c
index a2670b3e46..ec05d946f3 100644
--- a/block/linux-aio.c
+++ b/block/linux-aio.c
@@ -205,6 +205,8 @@ static void qemu_laio_process_completions(LinuxAioState *s)
 {
     struct io_event *events;
 
+    defer_call_begin();
+
     /* Reschedule so nested event loops see currently pending completions */
     qemu_bh_schedule(s->completion_bh);
 
@@ -231,6 +233,8 @@ static void qemu_laio_process_completions(LinuxAioState *s)
      * own `for` loop.  If we are the last all counters dropped to zero. */
     s->event_max = 0;
     s->event_idx = 0;
+
+    defer_call_end();
 }
 
 static void qemu_laio_process_completions_and_submit(LinuxAioState *s)
diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index fb24bc927b..e5105571cf 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -15,6 +15,7 @@
 #include "qapi/error.h"
 #include "qapi/qapi-commands-virtio.h"
 #include "trace.h"
+#include "qemu/defer-call.h"
 #include "qemu/error-report.h"
 #include "qemu/log.h"
 #include "qemu/main-loop.h"
@@ -2445,6 +2446,16 @@ static bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq)
     }
 }
 
+/* Batch irqs while inside a defer_call_begin()/defer_call_end() section */
+static void virtio_notify_irqfd_deferred_fn(void *opaque)
+{
+    EventNotifier *notifier = opaque;
+    VirtQueue *vq = container_of(notifier, VirtQueue, guest_notifier);
+
+    trace_virtio_notify_irqfd_deferred_fn(vq->vdev, vq);
+    event_notifier_set(notifier);
+}
+
 void virtio_notify_irqfd(VirtIODevice *vdev, VirtQueue *vq)
 {
     WITH_RCU_READ_LOCK_GUARD() {
@@ -2471,7 +2482,7 @@ void virtio_notify_irqfd(VirtIODevice *vdev, VirtQueue *vq)
      * to an atomic operation.
      */
     virtio_set_isr(vq->vdev, 0x1);
-    event_notifier_set(&vq->guest_notifier);
+    defer_call(virtio_notify_irqfd_deferred_fn, &vq->guest_notifier);
 }
 
 static void virtio_irq(VirtQueue *vq)
diff --git a/util/thread-pool.c b/util/thread-pool.c
index 22f9ba3286..27eb777e85 100644
--- a/util/thread-pool.c
+++ b/util/thread-pool.c
@@ -15,6 +15,7 @@
  * GNU GPL, version 2 or (at your option) any later version.
  */
 #include "qemu/osdep.h"
+#include "qemu/defer-call.h"
 #include "qemu/queue.h"
 #include "qemu/thread.h"
 #include "qemu/coroutine.h"
@@ -175,6 +176,8 @@ static void thread_pool_completion_bh(void *opaque)
     ThreadPool *pool = opaque;
     ThreadPoolElement *elem, *next;
 
+    defer_call_begin(); /* cb() may use defer_call() to coalesce work */
+
 restart:
     QLIST_FOREACH_SAFE(elem, &pool->head, all, next) {
         if (elem->state != THREAD_DONE) {
@@ -208,6 +211,8 @@ restart:
             qemu_aio_unref(elem);
         }
     }
+
+    defer_call_end();
 }
 
 static void thread_pool_cancel(BlockAIOCB *acb)
diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events
index 1cb9027d1e..0af7a2886c 100644
--- a/hw/virtio/trace-events
+++ b/hw/virtio/trace-events
@@ -73,6 +73,7 @@ virtqueue_fill(void *vq, const void *elem, unsigned int len, unsigned int idx) "
 virtqueue_flush(void *vq, unsigned int count) "vq %p count %u"
 virtqueue_pop(void *vq, void *elem, unsigned int in_num, unsigned int out_num) "vq %p elem %p in_num %u out_num %u"
 virtio_queue_notify(void *vdev, int n, void *vq) "vdev %p n %d vq %p"
+virtio_notify_irqfd_deferred_fn(void *vdev, void *vq) "vdev %p vq %p"
 virtio_notify_irqfd(void *vdev, void *vq) "vdev %p vq %p"
 virtio_notify(void *vdev, void *vq) "vdev %p vq %p"
 virtio_set_status(void *vdev, uint8_t val) "vdev %p val %u"
-- 
2.41.0



  parent reply	other threads:[~2023-10-31 19:03 UTC|newest]

Thread overview: 29+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-10-31 18:58 [PULL 00/27] Block layer patches Kevin Wolf
2023-10-31 18:58 ` [PULL 01/27] qemu-img: rebase: stop when reaching EOF of old backing file Kevin Wolf
2023-10-31 18:58 ` [PULL 02/27] qemu-iotests: 024: add rebasing test case for overlay_size > backing_size Kevin Wolf
2023-10-31 18:58 ` [PULL 03/27] qemu-img: rebase: use backing files' BlockBackend for buffer alignment Kevin Wolf
2023-10-31 18:58 ` [PULL 04/27] qemu-img: add chunk size parameter to compare_buffers() Kevin Wolf
2023-10-31 18:58 ` [PULL 05/27] qemu-img: rebase: avoid unnecessary COW operations Kevin Wolf
2023-10-31 18:58 ` [PULL 06/27] iotests/{024, 271}: add testcases for qemu-img rebase Kevin Wolf
2023-10-31 18:58 ` [PULL 07/27] qemu-img: add compression option to rebase subcommand Kevin Wolf
2023-10-31 18:58 ` [PULL 08/27] iotests: add tests for "qemu-img rebase" with compression Kevin Wolf
2023-10-31 18:59 ` [PULL 09/27] block: Fix locking in media change monitor commands Kevin Wolf
2023-10-31 18:59 ` [PULL 10/27] iotests: Test media change with iothreads Kevin Wolf
2023-10-31 18:59 ` [PULL 11/27] blockjob: drop AioContext lock before calling bdrv_graph_wrlock() Kevin Wolf
2023-10-31 18:59 ` [PULL 12/27] block: avoid potential deadlock during bdrv_graph_wrlock() in bdrv_close() Kevin Wolf
2023-10-31 18:59 ` [PULL 13/27] blockdev: mirror: avoid potential deadlock when using iothread Kevin Wolf
2023-10-31 18:59 ` [PULL 14/27] block: rename blk_io_plug_call() API to defer_call() Kevin Wolf
2023-10-31 18:59 ` [PULL 15/27] util/defer-call: move defer_call() to util/ Kevin Wolf
2023-10-31 18:59 ` Kevin Wolf [this message]
2023-10-31 18:59 ` [PULL 17/27] virtio-blk: remove batch notification BH Kevin Wolf
2023-10-31 18:59 ` [PULL 18/27] blockjob: introduce block-job-change QMP command Kevin Wolf
2023-10-31 18:59 ` [PULL 19/27] block/mirror: set actively_synced even after the job is ready Kevin Wolf
2023-10-31 18:59 ` [PULL 20/27] block/mirror: move dirty bitmap to filter Kevin Wolf
2023-10-31 18:59 ` [PULL 21/27] block/mirror: determine copy_to_target only once Kevin Wolf
2023-10-31 18:59 ` [PULL 22/27] mirror: implement mirror_change method Kevin Wolf
2023-10-31 18:59 ` [PULL 23/27] qapi/block-core: use JobType for BlockJobInfo's type Kevin Wolf
2023-10-31 18:59 ` [PULL 24/27] qapi/block-core: turn BlockJobInfo into a union Kevin Wolf
2023-10-31 18:59 ` [PULL 25/27] blockjob: query driver-specific info via a new 'query' driver method Kevin Wolf
2023-10-31 18:59 ` [PULL 26/27] mirror: return mirror-specific information upon query Kevin Wolf
2023-10-31 18:59 ` [PULL 27/27] iotests: add test for changing mirror's copy_mode Kevin Wolf
2023-10-31 23:31 ` [PULL 00/27] Block layer patches Stefan Hajnoczi

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20231031185918.346940-17-kwolf@redhat.com \
    --to=kwolf@redhat.com \
    --cc=qemu-block@nongnu.org \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.