On Mon, Aug 16, 2021 at 09:42:45AM -0700, Elena Ufimtseva wrote: > From: John Johnson > > Signed-off-by: Elena Ufimtseva > Signed-off-by: John G Johnson > Signed-off-by: Jagannathan Raman > --- > include/hw/vfio/vfio-common.h | 3 ++ > hw/vfio/common.c | 84 +++++++++++++++++++++++++++++++++++ > hw/vfio/pci.c | 22 +++++++++ > 3 files changed, 109 insertions(+) Alex: I'm not familiar enough with hw/vfio/ to review this in depth. You might have suggestions on how to unify the vfio-user and vfio kernel concepts of groups and containers. > > diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h > index bdd25a546c..688660c28d 100644 > --- a/include/hw/vfio/vfio-common.h > +++ b/include/hw/vfio/vfio-common.h > @@ -91,6 +91,7 @@ typedef struct VFIOContainer { > uint64_t max_dirty_bitmap_size; > unsigned long pgsizes; > unsigned int dma_max_mappings; > + VFIOProxy *proxy; > QLIST_HEAD(, VFIOGuestIOMMU) giommu_list; > QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list; > QLIST_HEAD(, VFIOGroup) group_list; > @@ -217,6 +218,8 @@ VFIOGroup *vfio_get_group(int groupid, AddressSpace *as, Error **errp); > void vfio_put_group(VFIOGroup *group); > int vfio_get_device(VFIOGroup *group, const char *name, > VFIODevice *vbasedev, Error **errp); > +void vfio_connect_proxy(VFIOProxy *proxy, VFIOGroup *group, AddressSpace *as); > +void vfio_disconnect_proxy(VFIOGroup *group); > > extern const MemoryRegionOps vfio_region_ops; > typedef QLIST_HEAD(VFIOGroupList, VFIOGroup) VFIOGroupList; > diff --git a/hw/vfio/common.c b/hw/vfio/common.c > index 9fe3e05dc6..57b9e111e6 100644 > --- a/hw/vfio/common.c > +++ b/hw/vfio/common.c > @@ -2249,6 +2249,55 @@ put_space_exit: > return ret; > } > > +void vfio_connect_proxy(VFIOProxy *proxy, VFIOGroup *group, AddressSpace *as) > +{ > + VFIOAddressSpace *space; > + VFIOContainer *container; > + > + if (QLIST_EMPTY(&vfio_group_list)) { > + qemu_register_reset(vfio_reset_handler, NULL); > + } > + > + QLIST_INSERT_HEAD(&vfio_group_list, group, next); > + > + /* > + * try to mirror vfio_connect_container() > + * as much as possible > + */ > + > + space = vfio_get_address_space(as); > + > + container = g_malloc0(sizeof(*container)); > + container->space = space; > + container->fd = -1; > + QLIST_INIT(&container->giommu_list); > + QLIST_INIT(&container->hostwin_list); > + container->proxy = proxy; > + > + /* > + * The proxy uses a SW IOMMU in lieu of the HW one > + * used in the ioctl() version. Use TYPE1 with the > + * target's page size for maximum capatibility > + */ > + container->iommu_type = VFIO_TYPE1_IOMMU; > + vfio_host_win_add(container, 0, (hwaddr)-1, TARGET_PAGE_SIZE); > + container->pgsizes = TARGET_PAGE_SIZE; > + > + container->dirty_pages_supported = true; > + container->max_dirty_bitmap_size = VFIO_USER_DEF_MAX_XFER; > + container->dirty_pgsizes = TARGET_PAGE_SIZE; > + > + QLIST_INIT(&container->group_list); > + QLIST_INSERT_HEAD(&space->containers, container, next); > + > + group->container = container; > + QLIST_INSERT_HEAD(&container->group_list, group, container_next); > + > + container->listener = vfio_memory_listener; > + memory_listener_register(&container->listener, container->space->as); > + container->initialized = true; > +} > + > static void vfio_disconnect_container(VFIOGroup *group) > { > VFIOContainer *container = group->container; > @@ -2291,6 +2340,41 @@ static void vfio_disconnect_container(VFIOGroup *group) > } > } > > +void vfio_disconnect_proxy(VFIOGroup *group) > +{ > + VFIOContainer *container = group->container; > + VFIOAddressSpace *space = container->space; > + VFIOGuestIOMMU *giommu, *tmp; > + > + /* > + * try to mirror vfio_disconnect_container() > + * as much as possible, knowing each device > + * is in one group and one container > + */ > + > + QLIST_REMOVE(group, container_next); > + group->container = NULL; > + > + /* > + * Explicitly release the listener first before unset container, > + * since unset may destroy the backend container if it's the last > + * group. > + */ > + memory_listener_unregister(&container->listener); > + > + QLIST_REMOVE(container, next); > + > + QLIST_FOREACH_SAFE(giommu, &container->giommu_list, giommu_next, tmp) { > + memory_region_unregister_iommu_notifier( > + MEMORY_REGION(giommu->iommu), &giommu->n); > + QLIST_REMOVE(giommu, giommu_next); > + g_free(giommu); > + } > + > + g_free(container); > + vfio_put_address_space(space); > +} > + > VFIOGroup *vfio_get_group(int groupid, AddressSpace *as, Error **errp) > { > VFIOGroup *group; > diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c > index 282de6a30b..2c9fcb2fa9 100644 > --- a/hw/vfio/pci.c > +++ b/hw/vfio/pci.c > @@ -3442,6 +3442,7 @@ static void vfio_user_pci_realize(PCIDevice *pdev, Error **errp) > VFIODevice *vbasedev = &vdev->vbasedev; > SocketAddress addr; > VFIOProxy *proxy; > + VFIOGroup *group = NULL; > int ret; > Error *err = NULL; > > @@ -3484,6 +3485,19 @@ static void vfio_user_pci_realize(PCIDevice *pdev, Error **errp) > vbasedev->no_mmap = false; > vbasedev->ops = &vfio_user_pci_ops; > > + /* > + * each device gets its own group and container > + * make them unrelated to any host IOMMU groupings > + */ > + group = g_malloc0(sizeof(*group)); > + group->fd = -1; > + group->groupid = -1; > + QLIST_INIT(&group->device_list); > + QLIST_INSERT_HEAD(&group->device_list, vbasedev, next); > + vbasedev->group = group; > + > + vfio_connect_proxy(proxy, group, pci_device_iommu_address_space(pdev)); > + > ret = vfio_user_get_info(&vdev->vbasedev); > if (ret) { > error_setg_errno(errp, -ret, "get info failure"); > @@ -3587,6 +3601,9 @@ out_teardown: > vfio_teardown_msi(vdev); > vfio_bars_exit(vdev); > error: > + if (group != NULL) { > + vfio_disconnect_proxy(group); > + } > vfio_user_disconnect(proxy); > error_prepend(errp, VFIO_MSG_PREFIX, vdev->vbasedev.name); > } > @@ -3595,6 +3612,11 @@ static void vfio_user_instance_finalize(Object *obj) > { > VFIOPCIDevice *vdev = VFIO_PCI_BASE(obj); > VFIODevice *vbasedev = &vdev->vbasedev; > + VFIOGroup *group = vbasedev->group; > + > + vfio_disconnect_proxy(group); > + g_free(group); > + vbasedev->group = NULL; Can vfio_put_group() be used instead? I'm worried that the cleanup code will be duplicated or become inconsistent if it's not shared. Also, vfio_instance_finalize() calls vfio_put_group() after vfio_put_device(). Does this code intentionally take advantage of the if (!vbasedev->group) early return in vfio_put_base_device()? This is non-obvious. I recommend unifying the device and group cleanup instead of special-casing it here (this is fragile!). > > vfio_put_device(vdev); > > -- > 2.25.1 >