All of lore.kernel.org
 help / color / mirror / Atom feed
From: Stefan Hajnoczi <stefanha@redhat.com>
To: Stefano Garzarella <sgarzare@redhat.com>
Cc: qemu-devel@nongnu.org, "Aarushi Mehta" <mehta.aaru20@gmail.com>,
	"Michael S. Tsirkin" <mst@redhat.com>,
	"Julia Suvorova" <jusual@redhat.com>,
	"Paolo Bonzini" <pbonzini@redhat.com>,
	"Philippe Mathieu-Daudé" <philmd@linaro.org>,
	"Stefano Stabellini" <sstabellini@kernel.org>,
	"Paul Durrant" <paul@xen.org>, "Hanna Reitz" <hreitz@redhat.com>,
	"Kevin Wolf" <kwolf@redhat.com>, "Fam Zheng" <fam@euphon.net>,
	xen-devel@lists.xenproject.org, eblake@redhat.com,
	"Anthony Perard" <anthony.perard@citrix.com>,
	qemu-block@nongnu.org
Subject: Re: [PATCH v2 5/6] block/linux-aio: convert to blk_io_plug_call() API
Date: Wed, 24 May 2023 15:36:34 -0400	[thread overview]
Message-ID: <20230524193634.GB17357@fedora> (raw)
In-Reply-To: <n6hik7dbl26lomhxvfal2kjrq6jhdiknjepb372dvxavuwiw6q@3l3mo4eywoxq>

[-- Attachment #1: Type: text/plain, Size: 7677 bytes --]

On Wed, May 24, 2023 at 10:52:03AM +0200, Stefano Garzarella wrote:
> On Tue, May 23, 2023 at 01:12:59PM -0400, Stefan Hajnoczi wrote:
> > Stop using the .bdrv_co_io_plug() API because it is not multi-queue
> > block layer friendly. Use the new blk_io_plug_call() API to batch I/O
> > submission instead.
> > 
> > Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
> > Reviewed-by: Eric Blake <eblake@redhat.com>
> > ---
> > include/block/raw-aio.h |  7 -------
> > block/file-posix.c      | 28 ----------------------------
> > block/linux-aio.c       | 41 +++++++++++------------------------------
> > 3 files changed, 11 insertions(+), 65 deletions(-)
> > 
> > diff --git a/include/block/raw-aio.h b/include/block/raw-aio.h
> > index da60ca13ef..0f63c2800c 100644
> > --- a/include/block/raw-aio.h
> > +++ b/include/block/raw-aio.h
> > @@ -62,13 +62,6 @@ int coroutine_fn laio_co_submit(int fd, uint64_t offset, QEMUIOVector *qiov,
> > 
> > void laio_detach_aio_context(LinuxAioState *s, AioContext *old_context);
> > void laio_attach_aio_context(LinuxAioState *s, AioContext *new_context);
> > -
> > -/*
> > - * laio_io_plug/unplug work in the thread's current AioContext, therefore the
> > - * caller must ensure that they are paired in the same IOThread.
> > - */
> > -void laio_io_plug(void);
> > -void laio_io_unplug(uint64_t dev_max_batch);
> > #endif
> > /* io_uring.c - Linux io_uring implementation */
> > #ifdef CONFIG_LINUX_IO_URING
> > diff --git a/block/file-posix.c b/block/file-posix.c
> > index 7baa8491dd..ac1ed54811 100644
> > --- a/block/file-posix.c
> > +++ b/block/file-posix.c
> > @@ -2550,26 +2550,6 @@ static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, int64_t offset,
> >     return raw_co_prw(bs, offset, bytes, qiov, QEMU_AIO_WRITE);
> > }
> > 
> > -static void coroutine_fn raw_co_io_plug(BlockDriverState *bs)
> > -{
> > -    BDRVRawState __attribute__((unused)) *s = bs->opaque;
> > -#ifdef CONFIG_LINUX_AIO
> > -    if (s->use_linux_aio) {
> > -        laio_io_plug();
> > -    }
> > -#endif
> > -}
> > -
> > -static void coroutine_fn raw_co_io_unplug(BlockDriverState *bs)
> > -{
> > -    BDRVRawState __attribute__((unused)) *s = bs->opaque;
> > -#ifdef CONFIG_LINUX_AIO
> > -    if (s->use_linux_aio) {
> > -        laio_io_unplug(s->aio_max_batch);
> > -    }
> > -#endif
> > -}
> > -
> > static int coroutine_fn raw_co_flush_to_disk(BlockDriverState *bs)
> > {
> >     BDRVRawState *s = bs->opaque;
> > @@ -3914,8 +3894,6 @@ BlockDriver bdrv_file = {
> >     .bdrv_co_copy_range_from = raw_co_copy_range_from,
> >     .bdrv_co_copy_range_to  = raw_co_copy_range_to,
> >     .bdrv_refresh_limits = raw_refresh_limits,
> > -    .bdrv_co_io_plug        = raw_co_io_plug,
> > -    .bdrv_co_io_unplug      = raw_co_io_unplug,
> >     .bdrv_attach_aio_context = raw_aio_attach_aio_context,
> > 
> >     .bdrv_co_truncate                   = raw_co_truncate,
> > @@ -4286,8 +4264,6 @@ static BlockDriver bdrv_host_device = {
> >     .bdrv_co_copy_range_from = raw_co_copy_range_from,
> >     .bdrv_co_copy_range_to  = raw_co_copy_range_to,
> >     .bdrv_refresh_limits = raw_refresh_limits,
> > -    .bdrv_co_io_plug        = raw_co_io_plug,
> > -    .bdrv_co_io_unplug      = raw_co_io_unplug,
> >     .bdrv_attach_aio_context = raw_aio_attach_aio_context,
> > 
> >     .bdrv_co_truncate                   = raw_co_truncate,
> > @@ -4424,8 +4400,6 @@ static BlockDriver bdrv_host_cdrom = {
> >     .bdrv_co_pwritev        = raw_co_pwritev,
> >     .bdrv_co_flush_to_disk  = raw_co_flush_to_disk,
> >     .bdrv_refresh_limits    = cdrom_refresh_limits,
> > -    .bdrv_co_io_plug        = raw_co_io_plug,
> > -    .bdrv_co_io_unplug      = raw_co_io_unplug,
> >     .bdrv_attach_aio_context = raw_aio_attach_aio_context,
> > 
> >     .bdrv_co_truncate                   = raw_co_truncate,
> > @@ -4552,8 +4526,6 @@ static BlockDriver bdrv_host_cdrom = {
> >     .bdrv_co_pwritev        = raw_co_pwritev,
> >     .bdrv_co_flush_to_disk  = raw_co_flush_to_disk,
> >     .bdrv_refresh_limits    = cdrom_refresh_limits,
> > -    .bdrv_co_io_plug        = raw_co_io_plug,
> > -    .bdrv_co_io_unplug      = raw_co_io_unplug,
> >     .bdrv_attach_aio_context = raw_aio_attach_aio_context,
> > 
> >     .bdrv_co_truncate                   = raw_co_truncate,
> > diff --git a/block/linux-aio.c b/block/linux-aio.c
> > index 442c86209b..5021aed68f 100644
> > --- a/block/linux-aio.c
> > +++ b/block/linux-aio.c
> > @@ -15,6 +15,7 @@
> > #include "qemu/event_notifier.h"
> > #include "qemu/coroutine.h"
> > #include "qapi/error.h"
> > +#include "sysemu/block-backend.h"
> > 
> > /* Only used for assertions.  */
> > #include "qemu/coroutine_int.h"
> > @@ -46,7 +47,6 @@ struct qemu_laiocb {
> > };
> > 
> > typedef struct {
> > -    int plugged;
> >     unsigned int in_queue;
> >     unsigned int in_flight;
> >     bool blocked;
> > @@ -236,7 +236,7 @@ static void qemu_laio_process_completions_and_submit(LinuxAioState *s)
> > {
> >     qemu_laio_process_completions(s);
> > 
> > -    if (!s->io_q.plugged && !QSIMPLEQ_EMPTY(&s->io_q.pending)) {
> > +    if (!QSIMPLEQ_EMPTY(&s->io_q.pending)) {
> >         ioq_submit(s);
> >     }
> > }
> > @@ -277,7 +277,6 @@ static void qemu_laio_poll_ready(EventNotifier *opaque)
> > static void ioq_init(LaioQueue *io_q)
> > {
> >     QSIMPLEQ_INIT(&io_q->pending);
> > -    io_q->plugged = 0;
> >     io_q->in_queue = 0;
> >     io_q->in_flight = 0;
> >     io_q->blocked = false;
> > @@ -354,31 +353,11 @@ static uint64_t laio_max_batch(LinuxAioState *s, uint64_t dev_max_batch)
> >     return max_batch;
> > }
> > 
> > -void laio_io_plug(void)
> > +static void laio_unplug_fn(void *opaque)
> > {
> > -    AioContext *ctx = qemu_get_current_aio_context();
> > -    LinuxAioState *s = aio_get_linux_aio(ctx);
> > +    LinuxAioState *s = opaque;
> > 
> > -    s->io_q.plugged++;
> > -}
> > -
> > -void laio_io_unplug(uint64_t dev_max_batch)
> > -{
> > -    AioContext *ctx = qemu_get_current_aio_context();
> > -    LinuxAioState *s = aio_get_linux_aio(ctx);
> > -
> > -    assert(s->io_q.plugged);
> > -    s->io_q.plugged--;
> > -
> > -    /*
> > -     * Why max batch checking is performed here:
> > -     * Another BDS may have queued requests with a higher dev_max_batch and
> > -     * therefore in_queue could now exceed our dev_max_batch. Re-check the max
> > -     * batch so we can honor our device's dev_max_batch.
> > -     */
> > -    if (s->io_q.in_queue >= laio_max_batch(s, dev_max_batch) ||
> 
> Why are we removing this condition?
> Could the same situation occur with the new API?

The semantics of unplug_fn() are different from .bdrv_co_unplug():
1. unplug_fn() is only called when the last blk_io_unplug() call occurs,
   not every time blk_io_unplug() is called.
2. unplug_fn() is per-thread, not per-BlockDriverState, so there is no
   way to get per-BlockDriverState fields like dev_max_batch.

Therefore this condition cannot be moved to laio_unplug_fn().

How important is this condition? I believe that dropping it does not
have much of an effect but maybe I missed something.

Also, does it make sense to define per-BlockDriverState batching limits
when the AIO engine (Linux AIO or io_uring) is thread-local and shared
between all BlockDriverStates? I believe the fundamental reason (that we
discovered later) why dev_max_batch is effective is because the Linux
kernel processes 32 I/O request submissions at a time. Anything above 32
adds latency without a batching benefit.

Stefan

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 488 bytes --]

  reply	other threads:[~2023-05-25 17:14 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-05-23 17:12 [PATCH v2 0/6] block: add blk_io_plug_call() API Stefan Hajnoczi
2023-05-23 17:12 ` [PATCH v2 1/6] " Stefan Hajnoczi
2023-05-24  8:08   ` Stefano Garzarella
2023-05-23 17:12 ` [PATCH v2 2/6] block/nvme: convert to " Stefan Hajnoczi
2023-05-24  8:11   ` Stefano Garzarella
2023-05-23 17:12 ` [PATCH v2 3/6] block/blkio: " Stefan Hajnoczi
2023-05-24  8:13   ` Stefano Garzarella
2023-05-23 17:12 ` [PATCH v2 4/6] block/io_uring: " Stefan Hajnoczi
2023-05-24  8:47   ` Stefano Garzarella
2023-05-23 17:12 ` [PATCH v2 5/6] block/linux-aio: " Stefan Hajnoczi
2023-05-24  8:52   ` Stefano Garzarella
2023-05-24 19:36     ` Stefan Hajnoczi [this message]
2023-05-29  8:50       ` Stefano Garzarella
2023-05-30 17:11         ` Stefan Hajnoczi
2023-05-23 17:13 ` [PATCH v2 6/6] block: remove bdrv_co_io_plug() API Stefan Hajnoczi
2023-05-24  8:52   ` Stefano Garzarella

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230524193634.GB17357@fedora \
    --to=stefanha@redhat.com \
    --cc=anthony.perard@citrix.com \
    --cc=eblake@redhat.com \
    --cc=fam@euphon.net \
    --cc=hreitz@redhat.com \
    --cc=jusual@redhat.com \
    --cc=kwolf@redhat.com \
    --cc=mehta.aaru20@gmail.com \
    --cc=mst@redhat.com \
    --cc=paul@xen.org \
    --cc=pbonzini@redhat.com \
    --cc=philmd@linaro.org \
    --cc=qemu-block@nongnu.org \
    --cc=qemu-devel@nongnu.org \
    --cc=sgarzare@redhat.com \
    --cc=sstabellini@kernel.org \
    --cc=xen-devel@lists.xenproject.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.