On Wed, Dec 9, 2020 at 8:20 PM Jag Raman wrote: > > > > On Dec 8, 2020, at 8:57 AM, Marc-André Lureau < > marcandre.lureau@gmail.com> wrote: > > > > Hi > > > > On Wed, Dec 2, 2020 at 12:23 AM Jagannathan Raman > wrote: > > Add memory-listener object which is used to keep the view of the RAM > > in sync between QEMU and remote process. > > A MemoryListener is registered for system-memory AddressSpace. The > > listener sends SYNC_SYSMEM message to the remote process when memory > > listener commits the changes to memory, the remote process receives > > the message and processes it in the handler for SYNC_SYSMEM message. > > > > Signed-off-by: Jagannathan Raman > > Signed-off-by: John G Johnson > > Signed-off-by: Elena Ufimtseva > > Reviewed-by: Stefan Hajnoczi > > --- > > include/hw/remote/memory-sync.h | 27 ++++++ > > include/hw/remote/proxy.h | 2 + > > hw/remote/memory-sync.c | 210 > ++++++++++++++++++++++++++++++++++++++++ > > hw/remote/message.c | 5 + > > hw/remote/proxy.c | 6 ++ > > MAINTAINERS | 2 + > > hw/remote/meson.build | 1 + > > 7 files changed, 253 insertions(+) > > create mode 100644 include/hw/remote/memory-sync.h > > create mode 100644 hw/remote/memory-sync.c > > > > diff --git a/include/hw/remote/memory-sync.h > b/include/hw/remote/memory-sync.h > > new file mode 100644 > > index 0000000..785f76a > > --- /dev/null > > +++ b/include/hw/remote/memory-sync.h > > @@ -0,0 +1,27 @@ > > +/* > > + * Copyright © 2018, 2020 Oracle and/or its affiliates. > > + * > > + * This work is licensed under the terms of the GNU GPL, version 2 or > later. > > + * See the COPYING file in the top-level directory. > > + * > > + */ > > + > > +#ifndef MEMORY_SYNC_H > > +#define MEMORY_SYNC_H > > + > > +#include "exec/memory.h" > > +#include "io/channel.h" > > + > > +typedef struct RemoteMemSync { > > + MemoryListener listener; > > + > > + int n_mr_sections; > > + MemoryRegionSection *mr_sections; > > + > > + QIOChannel *ioc; > > +} RemoteMemSync; > > + > > +void configure_memory_sync(RemoteMemSync *sync, QIOChannel *ioc); > > +void deconfigure_memory_sync(RemoteMemSync *sync); > > > > RemoteMemSync vs MemorySync, and function with _memory_sync suffixes... > > Naming things is hard, but trying to be consistent generally helps. > > > > My understanding is that this is a proxy-dev helper to handle memory > listening and sending SYNC_SYSMEM. > > > > I would thus suggest naming it ProxyMemoryListener. It could eventually > be folded in proxy.c > > > > Please try to be consistent with header naming, structure naming, type, > functions and enum prefixes etc. > > > > proxy_memory_listener isn't that long imho. > > > > + > > +#endif > > diff --git a/include/hw/remote/proxy.h b/include/hw/remote/proxy.h > > index e29c61b..a687b7d 100644 > > --- a/include/hw/remote/proxy.h > > +++ b/include/hw/remote/proxy.h > > @@ -11,6 +11,7 @@ > > > > #include "hw/pci/pci.h" > > #include "io/channel.h" > > +#include "hw/remote/memory-sync.h" > > > > #define TYPE_PCI_PROXY_DEV "x-pci-proxy-dev" > > > > @@ -40,6 +41,7 @@ struct PCIProxyDev { > > QemuMutex io_mutex; > > QIOChannel *ioc; > > Error *migration_blocker; > > + RemoteMemSync sync; > > ProxyMemoryRegion region[PCI_NUM_REGIONS]; > > }; > > > > diff --git a/hw/remote/memory-sync.c b/hw/remote/memory-sync.c > > new file mode 100644 > > index 0000000..2365e69 > > --- /dev/null > > +++ b/hw/remote/memory-sync.c > > @@ -0,0 +1,210 @@ > > +/* > > + * Copyright © 2018, 2020 Oracle and/or its affiliates. > > + * > > + * This work is licensed under the terms of the GNU GPL, version 2 or > later. > > + * See the COPYING file in the top-level directory. > > + * > > + */ > > + > > +#include "qemu/osdep.h" > > +#include "qemu-common.h" > > + > > +#include "qemu/compiler.h" > > +#include "qemu/int128.h" > > +#include "qemu/range.h" > > +#include "exec/memory.h" > > +#include "exec/cpu-common.h" > > +#include "cpu.h" > > +#include "exec/ram_addr.h" > > +#include "exec/address-spaces.h" > > +#include "hw/remote/mpqemu-link.h" > > +#include "hw/remote/memory-sync.h" > > + > > +static void proxy_ml_begin(MemoryListener *listener) > > > > I suggest to rename begin -> reset > > > > +{ > > + RemoteMemSync *sync = container_of(listener, RemoteMemSync, > listener); > > + int mrs; > > + > > + for (mrs = 0; mrs < sync->n_mr_sections; mrs++) { > > + memory_region_unref(sync->mr_sections[mrs].mr); > > + } > > + > > + g_free(sync->mr_sections); > > + sync->mr_sections = NULL; > > + sync->n_mr_sections = 0; > > +} > > + > > +static int get_fd_from_hostaddr(uint64_t host, ram_addr_t *offset) > > > > This function is very similar to vhost_user_get_mr_data(). That suggests > we could factor the code. > > > > Perhaps a new memory_region_from_host_full(), or extend > memory_region_from_host() with an extra optional "int *fd" argument. > > > > +{ > > + MemoryRegion *mr; > > + ram_addr_t off; > > + > > + /** > > + * Assumes that the host address is a valid address as it's > > + * coming from the MemoryListener system. In the case host > > + * address is not valid, the following call would return > > + * the default subregion of "system_memory" region, and > > + * not NULL. So it's not possible to check for NULL here. > > + */ > > + mr = memory_region_from_host((void *)(uintptr_t)host, &off); > > + > > + if (offset) { > > + *offset = off; > > + } > > + > > + return memory_region_get_fd(mr); > > +} > > + > > +static bool proxy_mrs_can_merge(uint64_t host, uint64_t prev_host, > size_t size) > > +{ > > > > This seems similar to vhost_user_can_merge(). > > > > + bool merge; > > + int fd1, fd2; > > + > > + fd1 = get_fd_from_hostaddr(host, NULL); > > + > > + fd2 = get_fd_from_hostaddr(prev_host, NULL); > > + > > + merge = (fd1 == fd2); > > > > This could be written in a simpler manner, ex: > > > > if (get_fd_from_hostaddr(host, NULL) != get_fd_from_hostaddr(prev_host, > NULL)) > > return false > > > > + > > + merge &= ((prev_host + size) == host); > > > > That check could be done early on before doing the more expensive > memory_region_from_host() calls > > > > + > > + return merge; > > +} > > + > > +static bool try_merge(RemoteMemSync *sync, MemoryRegionSection *section) > > +{ > > + uint64_t mrs_size, mrs_gpa, mrs_page; > > + MemoryRegionSection *prev_sec; > > + bool merged = false; > > + uintptr_t mrs_host; > > + RAMBlock *mrs_rb; > > + > > + if (!sync->n_mr_sections) { > > + return false; > > + } > > + > > + mrs_rb = section->mr->ram_block; > > + mrs_page = (uint64_t)qemu_ram_pagesize(mrs_rb); > > + mrs_size = int128_get64(section->size); > > + mrs_gpa = section->offset_within_address_space; > > + mrs_host = (uintptr_t)memory_region_get_ram_ptr(section->mr) + > > + section->offset_within_region; > > + > > + if (get_fd_from_hostaddr(mrs_host, NULL) < 0) { > > + return true; > > + } > > + > > + mrs_host = mrs_host & ~(mrs_page - 1); > > + mrs_gpa = mrs_gpa & ~(mrs_page - 1); > > + mrs_size = ROUND_UP(mrs_size, mrs_page); > > + > > + prev_sec = sync->mr_sections + (sync->n_mr_sections - 1); > > + uint64_t prev_gpa_start = prev_sec->offset_within_address_space; > > + uint64_t prev_size = int128_get64(prev_sec->size); > > + uint64_t prev_gpa_end = range_get_last(prev_gpa_start, prev_size); > > + uint64_t prev_host_start = > > + (uintptr_t)memory_region_get_ram_ptr(prev_sec->mr) + > > + prev_sec->offset_within_region; > > + uint64_t prev_host_end = range_get_last(prev_host_start, prev_size); > > + > > + if (mrs_gpa <= (prev_gpa_end + 1)) { > > + g_assert(mrs_gpa > prev_gpa_start); > > + > > + if ((section->mr == prev_sec->mr) && > > + proxy_mrs_can_merge(mrs_host, prev_host_start, > > + (mrs_gpa - prev_gpa_start))) { > > + uint64_t max_end = MAX(prev_host_end, mrs_host + mrs_size); > > + merged = true; > > + prev_sec->offset_within_address_space = > > + MIN(prev_gpa_start, mrs_gpa); > > + prev_sec->offset_within_region = > > + MIN(prev_host_start, mrs_host) - > > + (uintptr_t)memory_region_get_ram_ptr(prev_sec->mr); > > + prev_sec->size = int128_make64(max_end - > MIN(prev_host_start, > > + mrs_host)); > > + } > > + } > > + > > + return merged; > > +} > > + > > +static void proxy_ml_region_addnop(MemoryListener *listener, > > + MemoryRegionSection *section) > > +{ > > + RemoteMemSync *sync = container_of(listener, RemoteMemSync, > listener); > > + > > + if (!(memory_region_is_ram(section->mr) && > > + !memory_region_is_rom(section->mr))) { > > + return; > > > > A bit clearer in vhost.c: > > if (memory_region_is_ram(mr) && !memory_region_is_rom(mr)) { > > > > + } > > + > > + if (try_merge(sync, section)) { > > + return; > > + } > > + > > + ++sync->n_mr_sections; > > + sync->mr_sections = g_renew(MemoryRegionSection, sync->mr_sections, > > + sync->n_mr_sections); > > + sync->mr_sections[sync->n_mr_sections - 1] = *section; > > + sync->mr_sections[sync->n_mr_sections - 1].fv = NULL; > > + memory_region_ref(section->mr); > > +} > > + > > +static void proxy_ml_commit(MemoryListener *listener) > > +{ > > + RemoteMemSync *sync = container_of(listener, RemoteMemSync, > listener); > > + MPQemuMsg msg; > > + MemoryRegionSection *section; > > + ram_addr_t offset; > > + uintptr_t host_addr; > > + int region; > > + Error *local_err = NULL; > > + > > + memset(&msg, 0, sizeof(MPQemuMsg)); > > + > > + msg.cmd = SYNC_SYSMEM; > > + msg.num_fds = sync->n_mr_sections; > > + msg.size = sizeof(SyncSysmemMsg); > > + if (msg.num_fds > REMOTE_MAX_FDS) { > > + error_report("Number of fds is more than %d", REMOTE_MAX_FDS); > > + return; > > + } > > + > > + for (region = 0; region < sync->n_mr_sections; region++) { > > + section = &sync->mr_sections[region]; > > + msg.data.sync_sysmem.gpas[region] = > > + section->offset_within_address_space; > > + msg.data.sync_sysmem.sizes[region] = > int128_get64(section->size); > > + host_addr = (uintptr_t)memory_region_get_ram_ptr(section->mr) + > > + section->offset_within_region; > > + msg.fds[region] = get_fd_from_hostaddr(host_addr, &offset); > > + msg.data.sync_sysmem.offsets[region] = offset; > > + } > > + mpqemu_msg_send(&msg, sync->ioc, &local_err); > > + if (local_err) { > > + error_report("Error in sending command %d", msg.cmd); > > + } > > +} > > > > That whole complex code above duplicates much of the logic in vhost.c. > Can we try to factorize it instead? > > Hi Marc-Andre, > > Thank you for sharing your feedback! > > Would it be alright if we addressed this item alone in a separate patch in > the future? Since > this refactoring affects vhost code, we’re wondering it would be better to > address it in a > future patch to help with any regression analysis in the future. > That's fine with me, but please leave a TODO note in the code then. thanks > Thank you! > — > Jag > > > > > + > > +void deconfigure_memory_sync(RemoteMemSync *sync) > > +{ > > + memory_listener_unregister(&sync->listener); > > + > > + proxy_ml_begin(&sync->listener); > > +} > > + > > +void configure_memory_sync(RemoteMemSync *sync, QIOChannel *ioc) > > +{ > > + sync->n_mr_sections = 0; > > + sync->mr_sections = NULL; > > + > > + sync->ioc = ioc; > > + > > + sync->listener.begin = proxy_ml_begin; > > + sync->listener.commit = proxy_ml_commit; > > + sync->listener.region_add = proxy_ml_region_addnop; > > + sync->listener.region_nop = proxy_ml_region_addnop; > > + sync->listener.priority = 10; > > + > > + memory_listener_register(&sync->listener, &address_space_memory); > > +} > > diff --git a/hw/remote/message.c b/hw/remote/message.c > > index 0f3e38a..454fd2d 100644 > > --- a/hw/remote/message.c > > +++ b/hw/remote/message.c > > @@ -17,6 +17,7 @@ > > #include "sysemu/runstate.h" > > #include "hw/pci/pci.h" > > #include "exec/memattrs.h" > > +#include "hw/remote/memory.h" > > > > static void process_config_write(QIOChannel *ioc, PCIDevice *dev, > > MPQemuMsg *msg); > > @@ -64,6 +65,10 @@ void coroutine_fn mpqemu_remote_msg_loop_co(void > *data) > > case BAR_READ: > > process_bar_read(com->ioc, &msg, &local_err); > > break; > > + case SYNC_SYSMEM: > > + remote_sysmem_reconfig(&msg, &local_err); > > + break; > > + > > default: > > error_setg(&local_err, > > "Unknown command (%d) received for device %s > (pid=%d)", > > diff --git a/hw/remote/proxy.c b/hw/remote/proxy.c > > index 039347d..0f2d1aa 100644 > > --- a/hw/remote/proxy.c > > +++ b/hw/remote/proxy.c > > @@ -18,6 +18,8 @@ > > #include "migration/blocker.h" > > #include "hw/remote/mpqemu-link.h" > > #include "qemu/error-report.h" > > +#include "hw/remote/memory-sync.h" > > +#include "qom/object.h" > > > > static void proxy_set_socket(PCIProxyDev *pdev, int fd, Error **errp) > > { > > @@ -58,6 +60,8 @@ static void pci_proxy_dev_realize(PCIDevice *device, > Error **errp) > > > > qemu_mutex_init(&dev->io_mutex); > > qio_channel_set_blocking(dev->ioc, true, NULL); > > + > > + configure_memory_sync(&dev->sync, dev->ioc); > > } > > > > static void pci_proxy_dev_exit(PCIDevice *pdev) > > @@ -69,6 +73,8 @@ static void pci_proxy_dev_exit(PCIDevice *pdev) > > migrate_del_blocker(dev->migration_blocker); > > > > error_free(dev->migration_blocker); > > + > > + deconfigure_memory_sync(&dev->sync); > > } > > > > static int config_op_send(PCIProxyDev *pdev, uint32_t addr, uint32_t > *val, > > diff --git a/MAINTAINERS b/MAINTAINERS > > index ebd1d1d..5d78b78 100644 > > --- a/MAINTAINERS > > +++ b/MAINTAINERS > > @@ -3150,6 +3150,8 @@ F: include/hw/remote/memory.h > > F: hw/remote/memory.c > > F: hw/remote/proxy.c > > F: include/hw/remote/proxy.h > > +F: hw/remote/memory-sync.c > > +F: include/hw/remote/memory-sync.h > > > > Build and test automation > > ------------------------- > > diff --git a/hw/remote/meson.build b/hw/remote/meson.build > > index 569cd20..7d434a5 100644 > > --- a/hw/remote/meson.build > > +++ b/hw/remote/meson.build > > @@ -7,5 +7,6 @@ remote_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: > files('remote-obj.c')) > > remote_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: files('proxy.c')) > > > > specific_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: files('memory.c')) > > +specific_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: > files('memory-sync.c')) > > > > softmmu_ss.add_all(when: 'CONFIG_MULTIPROCESS', if_true: remote_ss) > > -- > > 1.8.3.1 > > > > > > > > -- > > Marc-André Lureau > > -- Marc-André Lureau