On Tue, Jul 12, 2022 at 04:28:02PM +0200, Stefano Garzarella wrote: > On Fri, Jul 08, 2022 at 05:17:36AM +0100, Stefan Hajnoczi wrote: > > Avoid bounce buffers when QEMUIOVector elements are within previously > > registered bdrv_register_buf() buffers. > > > > The idea is that emulated storage controllers will register guest RAM > > using bdrv_register_buf() and set the BDRV_REQ_REGISTERED_BUF on I/O > > requests. Therefore no blkio_map_mem_region() calls are necessary in the > > performance-critical I/O code path. > > > > This optimization doesn't apply if the I/O buffer is internally > > allocated by QEMU (e.g. qcow2 metadata). There we still take the slow > > path because BDRV_REQ_REGISTERED_BUF is not set. > > > > Signed-off-by: Stefan Hajnoczi > > --- > > block/blkio.c | 104 ++++++++++++++++++++++++++++++++++++++++++++++++-- > > 1 file changed, 101 insertions(+), 3 deletions(-) > > > > diff --git a/block/blkio.c b/block/blkio.c > > index 7fbdbd7fae..37d593a20c 100644 > > --- a/block/blkio.c > > +++ b/block/blkio.c > > @@ -1,7 +1,9 @@ > > #include "qemu/osdep.h" > > #include > > #include "block/block_int.h" > > +#include "exec/memory.h" > > #include "qapi/error.h" > > +#include "qemu/error-report.h" > > #include "qapi/qmp/qdict.h" > > #include "qemu/module.h" > > > > @@ -28,6 +30,9 @@ typedef struct { > > > > /* Can we skip adding/deleting blkio_mem_regions? */ > > bool needs_mem_regions; > > + > > + /* Are file descriptors necessary for blkio_mem_regions? */ > > + bool needs_mem_region_fd; > > } BDRVBlkioState; > > > > static void blkio_aiocb_complete(BlkioAIOCB *acb, int ret) > > @@ -198,6 +203,8 @@ static BlockAIOCB *blkio_aio_preadv(BlockDriverState *bs, int64_t offset, > > BlockCompletionFunc *cb, void *opaque) > > { > > BDRVBlkioState *s = bs->opaque; > > + bool needs_mem_regions = > > + s->needs_mem_regions && !(flags & BDRV_REQ_REGISTERED_BUF); > > struct iovec *iov = qiov->iov; > > int iovcnt = qiov->niov; > > BlkioAIOCB *acb; > > @@ -206,7 +213,7 @@ static BlockAIOCB *blkio_aio_preadv(BlockDriverState *bs, int64_t offset, > > > > acb = blkio_aiocb_get(bs, cb, opaque); > > > > - if (s->needs_mem_regions) { > > + if (needs_mem_regions) { > > if (blkio_aiocb_init_mem_region_locked(acb, bytes) < 0) { > > qemu_aio_unref(&acb->common); > > return NULL; > > @@ -230,6 +237,8 @@ static BlockAIOCB *blkio_aio_pwritev(BlockDriverState *bs, int64_t offset, > > { > > uint32_t blkio_flags = (flags & BDRV_REQ_FUA) ? BLKIO_REQ_FUA : 0; > > BDRVBlkioState *s = bs->opaque; > > + bool needs_mem_regions = > > + s->needs_mem_regions && !(flags & BDRV_REQ_REGISTERED_BUF); > > struct iovec *iov = qiov->iov; > > int iovcnt = qiov->niov; > > BlkioAIOCB *acb; > > @@ -238,7 +247,7 @@ static BlockAIOCB *blkio_aio_pwritev(BlockDriverState *bs, int64_t offset, > > > > acb = blkio_aiocb_get(bs, cb, opaque); > > > > - if (s->needs_mem_regions) { > > + if (needs_mem_regions) { > > if (blkio_aiocb_init_mem_region_locked(acb, bytes) < 0) { > > qemu_aio_unref(&acb->common); > > return NULL; > > @@ -324,6 +333,80 @@ static void blkio_io_unplug(BlockDriverState *bs) > > } > > } > > > > +static void blkio_register_buf(BlockDriverState *bs, void *host, size_t size) > > +{ > > + BDRVBlkioState *s = bs->opaque; > > + int ret; > > + struct blkio_mem_region region = (struct blkio_mem_region){ > > + .addr = host, > > + .len = size, > > + .fd = -1, > > + }; > > + > > + if (((uintptr_t)host | size) % s->mem_region_alignment) { > > + error_report_once("%s: skipping unaligned buf %p with size %zu", > > + __func__, host, size); > > + return; /* skip unaligned */ > > + } > > + > > + /* Attempt to find the fd for a MemoryRegion */ > > + if (s->needs_mem_region_fd) { > > + int fd = -1; > > + ram_addr_t offset; > > + MemoryRegion *mr; > > + > > + /* > > + * bdrv_register_buf() is called with the BQL held so mr lives at least > > + * until this function returns. > > + */ > > + mr = memory_region_from_host(host, &offset); > > + if (mr) { > > + fd = memory_region_get_fd(mr); > > If s->needs_mem_region_fd is true, memory_region_get_fd() crashes I think > because mr->ram_block is not yet set, indeed from the stack trace > blkio_register_buf() is called inside qemu_ram_alloc_resizeable(), and its > result is used to set mr->ram_block in memory_region_init_resizeable_ram(): > > Program terminated with signal SIGSEGV, Segmentation fault. > #0 0x000056235bf1f7a3 in memory_region_get_fd (mr=) at ../softmmu/memory.c:2309 > #1 0x000056235c07e54d in blkio_register_buf (bs=, host=0x7f824e200000, size=2097152) > at ../block/blkio.c:364 > #2 0x000056235c0246c6 in bdrv_register_buf (bs=0x56235d606b40, host=0x7f824e200000, size=2097152) > at ../block/io.c:3362 > #3 0x000056235bea44e6 in ram_block_notify_add (host=0x7f824e200000, size=131072, max_size=2097152) > at ../hw/core/numa.c:863 > #4 0x000056235bf22c00 in ram_block_add (new_block=, errp=) > at ../softmmu/physmem.c:2057 > #5 0x000056235bf232e4 in qemu_ram_alloc_internal (size=size@entry=131072, > max_size=max_size@entry=2097152, resized=resized@entry=0x56235bc0f920 > , host=host@entry=0x0, ram_flags=ram_flags@entry=4, > mr=mr@entry=0x56235dc3fe00, errp=0x7ffcb21f1be0) at > ../softmmu/physmem.c:2180 > #6 0x000056235bf26426 in qemu_ram_alloc_resizeable (size=size@entry=131072, > maxsz=maxsz@entry=2097152, resized=resized@entry=0x56235bc0f920 > , mr=mr@entry=0x56235dc3fe00, > errp=errp@entry=0x7ffcb21f1be0) at ../softmmu/physmem.c:2209 > #7 0x000056235bf1cc99 in memory_region_init_resizeable_ram > (mr=0x56235dc3fe00, owner=owner@entry=0x56235d93ffc0, > name=name@entry=0x7ffcb21f1ca0 "/rom@etc/acpi/tables", size=131072, > max_size=2097152, resized=resized@entry=0x56235bc0f920 , > errp=0x56235c996490 ) at ../softmmu/memory.c:1586 > #8 0x000056235bc0f99c in rom_set_mr (rom=rom@entry=0x56235ddd0200, > owner=0x56235d93ffc0, name=name@entry=0x7ffcb21f1ca0 > "/rom@etc/acpi/tables", ro=ro@entry=true) > at ../hw/core/loader.c:961 > #9 0x000056235bc12a65 in rom_add_blob (name=name@entry=0x56235c1a2a09 > "etc/acpi/tables", blob=0x56235df4f4b0, len=, > max_len=max_len@entry=2097152, addr=addr@entry=18446744073709551615, > fw_file_name=fw_file_name@entry=0x56235c1a2a09 "etc/acpi/tables", > fw_callback=0x56235be47f90 , > callback_opaque=0x56235d817830, as=0x0, read_only=true) at > ../hw/core/loader.c:1102 > #10 0x000056235bbe0990 in acpi_add_rom_blob ( > update=update@entry=0x56235be47f90 , > opaque=opaque@entry=0x56235d817830, blob=0x56235d3ab750, > name=name@entry=0x56235c1a2a09 "etc/acpi/tables") at ../hw/acpi/utils.c:46 > #11 0x000056235be481e6 in acpi_setup () at ../hw/i386/acpi-build.c:2805 > #12 0x000056235be3e209 in pc_machine_done (notifier=0x56235d5efce8, data=) > at ../hw/i386/pc.c:758 > #13 0x000056235c12e4a7 in notifier_list_notify ( > list=list@entry=0x56235c963790 , data=data@entry=0x0) > at ../util/notify.c:39 Hi Stefano, I have fixed this by using RAMBlock instead of MemoryRegion. The next revision will call qemu_ram_block_from_host() to fetch the RAMBlock's fd. Stefan