All of lore.kernel.org
 help / color / mirror / Atom feed
From: Stefan Hajnoczi <stefanha@redhat.com>
To: Stefano Garzarella <sgarzare@redhat.com>
Cc: "Laurent Vivier" <lvivier@redhat.com>,
	"Kevin Wolf" <kwolf@redhat.com>, "Thomas Huth" <thuth@redhat.com>,
	"Vladimir Sementsov-Ogievskiy" <v.sementsov-og@mail.ru>,
	qemu-block@nongnu.org, "Michael S. Tsirkin" <mst@redhat.com>,
	"John Snow" <jsnow@redhat.com>,
	qemu-devel@nongnu.org, "Alberto Faria" <afaria@redhat.com>,
	"Philippe Mathieu-Daudé" <f4bug@amsat.org>,
	"Yanan Wang" <wangyanan55@huawei.com>,
	"Eduardo Habkost" <eduardo@habkost.net>,
	"Hanna Reitz" <hreitz@redhat.com>,
	"Paolo Bonzini" <pbonzini@redhat.com>,
	"Fam Zheng" <fam@euphon.net>, "Eric Blake" <eblake@redhat.com>,
	"Markus Armbruster" <armbru@redhat.com>
Subject: Re: [RFC 7/8] blkio: implement BDRV_REQ_REGISTERED_BUF optimization
Date: Wed, 30 Mar 2022 11:45:12 +0100	[thread overview]
Message-ID: <YkQ0uPFfXB3NvjQu@stefanha-x1.localdomain> (raw)
In-Reply-To: <20220329152430.xhmhiq2l2cibzoio@sgarzare-redhat>

[-- Attachment #1: Type: text/plain, Size: 8421 bytes --]

On Tue, Mar 29, 2022 at 05:24:30PM +0200, Stefano Garzarella wrote:
> On Wed, Mar 23, 2022 at 11:17:26AM +0000, Stefan Hajnoczi wrote:
> > Avoid bounce buffers when QEMUIOVector elements are within previously
> > registered bdrv_register_buf() buffers.
> > 
> > The idea is that emulated storage controllers will register guest RAM
> > using bdrv_register_buf() and set the BDRV_REQ_REGISTERED_BUF on I/O
> > requests. Therefore no blkio_add_mem_region() calls are necessary in the
> > performance-critical I/O code path.
> > 
> > This optimization doesn't apply if the I/O buffer is internally
> > allocated by QEMU (e.g. qcow2 metadata). There we still take the slow
> > path because BDRV_REQ_REGISTERED_BUF is not set.
> > 
> > Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
> > ---
> > block/blkio.c | 108 ++++++++++++++++++++++++++++++++++++++++++++++++--
> > 1 file changed, 104 insertions(+), 4 deletions(-)
> > 
> > diff --git a/block/blkio.c b/block/blkio.c
> > index dd2308b967..78f4ca5f49 100644
> > --- a/block/blkio.c
> > +++ b/block/blkio.c
> > @@ -1,7 +1,9 @@
> > #include "qemu/osdep.h"
> > #include <blkio.h>
> > #include "block/block_int.h"
> > +#include "exec/memory.h"
> > #include "qapi/error.h"
> > +#include "qemu/error-report.h"
> > #include "qapi/qmp/qdict.h"
> > #include "qemu/module.h"
> > 
> > @@ -26,6 +28,9 @@ typedef struct {
> >     /* Can we skip adding/deleting blkio_mem_regions? */
> >     bool needs_mem_regions;
> > 
> > +    /* Are file descriptors necessary for blkio_mem_regions? */
> > +    bool needs_mem_region_fd;
> > +
> >     /*
> >      * blkio_completion_fd_poll() stashes the next completion for
> >      * blkio_completion_fd_poll_ready().
> > @@ -170,6 +175,8 @@ static BlockAIOCB *blkio_aio_preadv(BlockDriverState *bs, int64_t offset,
> >         BlockCompletionFunc *cb, void *opaque)
> > {
> >     BDRVBlkioState *s = bs->opaque;
> > +    bool needs_mem_regions =
> > +        s->needs_mem_regions && !(flags & BDRV_REQ_REGISTERED_BUF);
> >     struct iovec *iov = qiov->iov;
> >     int iovcnt = qiov->niov;
> >     BlkioAIOCB *acb;
> > @@ -179,7 +186,7 @@ static BlockAIOCB *blkio_aio_preadv(BlockDriverState *bs, int64_t offset,
> > 
> >     acb = blkio_aiocb_get(bs, cb, opaque);
> > 
> > -    if (s->needs_mem_regions) {
> > +    if (needs_mem_regions) {
> >         if (blkio_aiocb_init_mem_region_locked(acb, bytes) < 0) {
> >             qemu_aio_unref(&acb->common);
> >             return NULL;
> > @@ -194,7 +201,7 @@ static BlockAIOCB *blkio_aio_preadv(BlockDriverState *bs, int64_t offset,
> > 
> >     ret = blkioq_readv(s->blkioq, offset, iov, iovcnt, acb, 0);
> >     if (ret < 0) {
> > -        if (s->needs_mem_regions) {
> > +        if (needs_mem_regions) {
> >             blkio_free_mem_region(s->blkio, &acb->mem_region);
> >             qemu_iovec_destroy(&acb->qiov);
> >         }
> > @@ -215,6 +222,8 @@ static BlockAIOCB *blkio_aio_pwritev(BlockDriverState *bs, int64_t offset,
> > {
> >     uint32_t blkio_flags = (flags & BDRV_REQ_FUA) ? BLKIO_REQ_FUA : 0;
> >     BDRVBlkioState *s = bs->opaque;
> > +    bool needs_mem_regions =
> > +        s->needs_mem_regions && !(flags & BDRV_REQ_REGISTERED_BUF);
> >     struct iovec *iov = qiov->iov;
> >     int iovcnt = qiov->niov;
> >     BlkioAIOCB *acb;
> > @@ -224,7 +233,7 @@ static BlockAIOCB *blkio_aio_pwritev(BlockDriverState *bs, int64_t offset,
> > 
> >     acb = blkio_aiocb_get(bs, cb, opaque);
> > 
> > -    if (s->needs_mem_regions) {
> > +    if (needs_mem_regions) {
> >         if (blkio_aiocb_init_mem_region_locked(acb, bytes) < 0) {
> >             qemu_aio_unref(&acb->common);
> >             return NULL;
> > @@ -238,7 +247,7 @@ static BlockAIOCB *blkio_aio_pwritev(BlockDriverState *bs, int64_t offset,
> > 
> >     ret = blkioq_writev(s->blkioq, offset, iov, iovcnt, acb, blkio_flags);
> >     if (ret < 0) {
> > -        if (s->needs_mem_regions) {
> > +        if (needs_mem_regions) {
> >             blkio_free_mem_region(s->blkio, &acb->mem_region);
> >         }
> >         qemu_aio_unref(&acb->common);
> > @@ -286,6 +295,83 @@ static void blkio_io_unplug(BlockDriverState *bs)
> >     }
> > }
> > 
> > +static void blkio_register_buf(BlockDriverState *bs, void *host, size_t size)
> > +{
> > +    BDRVBlkioState *s = bs->opaque;
> > +    char *errmsg;
> > +    int ret;
> > +    struct blkio_mem_region region = (struct blkio_mem_region){
> > +        .addr = host,
> > +        .len = size,
> > +        .fd = -1,
> > +    };
> > +
> > +    if (((uintptr_t)host | size) % s->mem_region_alignment) {
> > +        error_report_once("%s: skipping unaligned buf %p with size %zu",
> > +                          __func__, host, size);
> > +        return; /* skip unaligned */
> > +    }
> > +
> > +    /* Attempt to find the fd for a MemoryRegion */
> > +    if (s->needs_mem_region_fd) {
> > +        int fd = -1;
> > +        ram_addr_t offset;
> > +        MemoryRegion *mr;
> > +
> > +        /*
> > +         * bdrv_register_buf() is called with the BQL held so mr lives at least
> > +         * until this function returns.
> > +         */
> > +        mr = memory_region_from_host(host, &offset);
> > +        if (mr) {
> > +            fd = memory_region_get_fd(mr);
> > +        }
> > +        if (fd == -1) {
> > +            error_report_once("%s: skipping fd-less buf %p with size %zu",
> > +                              __func__, host, size);
> > +            return; /* skip if there is no fd */
> > +        }
> > +
> > +        region.fd = fd;
> > +        region.fd_offset = offset;
> > +    }
> > +
> > +    WITH_QEMU_LOCK_GUARD(&s->lock) {
> > +        ret = blkio_add_mem_region(s->blkio, &region, &errmsg);
> > +    }
> > +
> > +    if (ret < 0) {
> > +        error_report_once("Failed to add blkio mem region %p with size %zu: %s",
> > +                          host, size, errmsg);
> > +        free(errmsg);
> > +    }
> > +}
> > +
> > +static void blkio_unregister_buf(BlockDriverState *bs, void *host, size_t size)
> > +{
> > +    BDRVBlkioState *s = bs->opaque;
> > +    char *errmsg;
> > +    int ret;
> > +    struct blkio_mem_region region = (struct blkio_mem_region){
> > +        .addr = host,
> > +        .len = size,
> > +    };
> > +
> > +    if (((uintptr_t)host | size) % s->mem_region_alignment) {
> > +        return; /* skip unaligned */
> > +    }
> > +
> > +    WITH_QEMU_LOCK_GUARD(&s->lock) {
> > +        ret = blkio_del_mem_region(s->blkio, &region, &errmsg);
> > +    }
> > +
> > +    if (ret < 0) {
> > +        error_report_once("Failed to delete blkio mem region %p with size %zu: %s",
> > +                          host, size, errmsg);
> > +        free(errmsg);
> > +    }
> > +}
> > +
> > static void blkio_parse_filename_io_uring(const char *filename, QDict *options,
> >                                           Error **errp)
> > {
> > @@ -356,6 +442,18 @@ static int blkio_file_open(BlockDriverState *bs, QDict *options, int flags,
> >         return ret;
> >     }
> > 
> > +    ret = blkio_get_bool(s->blkio,
> > +                         "needs-mem-region-fd",
> > +                         &s->needs_mem_region_fd,
> > +                         &errmsg);
> > +    if (ret < 0) {
> > +        error_setg_errno(errp, -ret,
> > +                         "failed to get needs-mem-region-fd: %s", errmsg);
> > +        free(errmsg);
> > +        blkio_destroy(&s->blkio);
> > +        return ret;
> > +    }
> > +
> >     ret = blkio_get_uint64(s->blkio,
> >                            "mem-region-alignment",
> >                            &s->mem_region_alignment,
> 
> I already mentioned on IRC while testing the series, but I'm writing it here
> so we don't forget ;-)
> 
> To prevent bdrv_driver_pwritev() from removing the BDRV_REQ_REGISTERED_BUF
> flag from requests, we should add this:
> 
> @@ -474,7 +474,7 @@ static int blkio_file_open(BlockDriverState *bs, QDict *options, int flags,
>          return ret;
>      }
> 
> -    bs->supported_write_flags = BDRV_REQ_FUA;
> +    bs->supported_write_flags = BDRV_REQ_FUA | BDRV_REQ_REGISTERED_BUF;
> 
>      qemu_mutex_init(&s->lock);
>      s->blkioq = blkio_get_queue(s->blkio, 0);

Thanks, will fix.

Stefan

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 488 bytes --]

  reply	other threads:[~2022-03-30 10:58 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-03-23 11:17 [RFC 0/8] blkio: add libblkio BlockDriver Stefan Hajnoczi
2022-03-23 11:17 ` [RFC 1/8] blkio: add io_uring block driver using libblkio Stefan Hajnoczi
2022-03-23 11:17 ` [RFC 2/8] numa: call ->ram_block_removed() in ram_block_notifer_remove() Stefan Hajnoczi
2022-03-30  8:42   ` David Hildenbrand
2022-03-23 11:17 ` [RFC 3/8] block: pass size to bdrv_unregister_buf() Stefan Hajnoczi
2022-03-23 11:17 ` [RFC 4/8] block: add BDRV_REQ_REGISTERED_BUF request flag Stefan Hajnoczi
2022-03-23 11:17 ` [RFC 5/8] block: add BlockRAMRegistrar Stefan Hajnoczi
2022-03-23 11:17 ` [RFC 6/8] stubs: add memory_region_from_host() and memory_region_get_fd() Stefan Hajnoczi
2022-03-23 11:17 ` [RFC 7/8] blkio: implement BDRV_REQ_REGISTERED_BUF optimization Stefan Hajnoczi
2022-03-29 15:24   ` Stefano Garzarella
2022-03-30 10:45     ` Stefan Hajnoczi [this message]
2022-03-23 11:17 ` [RFC 8/8] virtio-blk: use BDRV_REQ_REGISTERED_BUF optimization hint Stefan Hajnoczi
2022-03-29 15:27 ` [RFC 0/8] blkio: add libblkio BlockDriver Stefano Garzarella

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=YkQ0uPFfXB3NvjQu@stefanha-x1.localdomain \
    --to=stefanha@redhat.com \
    --cc=afaria@redhat.com \
    --cc=armbru@redhat.com \
    --cc=eblake@redhat.com \
    --cc=eduardo@habkost.net \
    --cc=f4bug@amsat.org \
    --cc=fam@euphon.net \
    --cc=hreitz@redhat.com \
    --cc=jsnow@redhat.com \
    --cc=kwolf@redhat.com \
    --cc=lvivier@redhat.com \
    --cc=mst@redhat.com \
    --cc=pbonzini@redhat.com \
    --cc=qemu-block@nongnu.org \
    --cc=qemu-devel@nongnu.org \
    --cc=sgarzare@redhat.com \
    --cc=thuth@redhat.com \
    --cc=v.sementsov-og@mail.ru \
    --cc=wangyanan55@huawei.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.