On Fri, May 20, 2016 at 06:04:42PM +1000, Alexey Kardashevskiy wrote: > The sPAPR TCE tables manage 2 copies when VFIO is using an IOMMU - > a guest view of the table and a hardware TCE table. If there is no VFIO > presense in the address space, then just the guest view is used, if Nit: s/presense/presence/ > this is the case, it is allocated in the KVM. However since there is no > support yet for VFIO in KVM TCE hypercalls, when we start using VFIO, > we need to move the guest view from KVM to the userspace; and we need > to do this for every IOMMU on a bus with VFIO devices. > > This adds notify_started/notify_stopped callbacks in MemoryRegionIOMMUOps > to notify IOMMU that listeners were set/removed. This allows IOMMU to > take necessary steps before actual notifications happen and do proper > cleanup when the last notifier is removed. > > This implements the callbacks for the sPAPR IOMMU - notify_started() > reallocated the guest view to the user space, notify_stopped() does > the opposite. > > This removes explicit spapr_tce_set_need_vfio() call from PCI hotplug > path as the new callbacks do this better - they notify IOMMU at > the exact moment when the configuration is changed, and this also > includes the case of PCI hot unplug. > > This adds MemoryRegion* to memory_region_unregister_iommu_notifier() > as we need iommu_ops to call notify_stopped() and Notifier* does not > store the owner. > > Signed-off-by: Alexey Kardashevskiy > --- > > > Is this any better? If so, I'll repost as a part of "v17". Thanks. I agree with Alex that this is a much better approach. I'm sad I didn't think of it earlier. Reviewed-by: David Gibson > --- > Changes: > v17: > * replaced IOMMU users counting with simple QLIST_EMPTY() > * renamed the callbacks > * removed requirement for region_del() to be called on memory_listener_unregister() > > v16: > * added a use counter in VFIOAddressSpace->VFIOIOMMUMR > > v15: > * s/need_vfio/vfio-Users/g > --- > hw/ppc/spapr_iommu.c | 12 ++++++++++++ > hw/ppc/spapr_pci.c | 6 ------ > hw/vfio/common.c | 5 +++-- > include/exec/memory.h | 8 +++++++- > memory.c | 10 +++++++++- > 5 files changed, 31 insertions(+), 10 deletions(-) > > diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c > index 73bc26b..fd38006 100644 > --- a/hw/ppc/spapr_iommu.c > +++ b/hw/ppc/spapr_iommu.c > @@ -156,6 +156,16 @@ static uint64_t spapr_tce_get_page_sizes(MemoryRegion *iommu) > return 1ULL << tcet->page_shift; > } > > +static void spapr_tce_notify_started(MemoryRegion *iommu) > +{ > + spapr_tce_set_need_vfio(container_of(iommu, sPAPRTCETable, iommu), true); > +} > + > +static void spapr_tce_notify_stopped(MemoryRegion *iommu) > +{ > + spapr_tce_set_need_vfio(container_of(iommu, sPAPRTCETable, iommu), false); > +} > + At some point we probably want to use this cleanup to remove the "need_vfio" names inside the spapr code, but I don't think that's reasonably within the scope of this patch. > static void spapr_tce_table_do_enable(sPAPRTCETable *tcet); > static void spapr_tce_table_do_disable(sPAPRTCETable *tcet); > > @@ -240,6 +250,8 @@ static const VMStateDescription vmstate_spapr_tce_table = { > static MemoryRegionIOMMUOps spapr_iommu_ops = { > .translate = spapr_tce_translate_iommu, > .get_page_sizes = spapr_tce_get_page_sizes, > + .notify_started = spapr_tce_notify_started, > + .notify_stopped = spapr_tce_notify_stopped, > }; > > static int spapr_tce_table_realize(DeviceState *dev) > diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c > index 68e77b0..7c2c622 100644 > --- a/hw/ppc/spapr_pci.c > +++ b/hw/ppc/spapr_pci.c > @@ -1089,12 +1089,6 @@ static void spapr_phb_add_pci_device(sPAPRDRConnector *drc, > void *fdt = NULL; > int fdt_start_offset = 0, fdt_size; > > - if (object_dynamic_cast(OBJECT(pdev), "vfio-pci")) { > - sPAPRTCETable *tcet = spapr_tce_find_by_liobn(phb->dma_liobn); > - > - spapr_tce_set_need_vfio(tcet, true); > - } > - > if (dev->hotplugged) { > fdt = create_device_tree(&fdt_size); > fdt_start_offset = spapr_create_pci_child_dt(phb, pdev, fdt, 0); > diff --git a/hw/vfio/common.c b/hw/vfio/common.c > index 2e4f703..d1fa9ab 100644 > --- a/hw/vfio/common.c > +++ b/hw/vfio/common.c > @@ -523,7 +523,8 @@ static void vfio_listener_region_del(MemoryListener *listener, > > QLIST_FOREACH(giommu, &container->giommu_list, giommu_next) { > if (giommu->iommu == section->mr) { > - memory_region_unregister_iommu_notifier(&giommu->n); > + memory_region_unregister_iommu_notifier(giommu->iommu, > + &giommu->n); > QLIST_REMOVE(giommu, giommu_next); > g_free(giommu); > break; > @@ -1040,7 +1041,7 @@ static void vfio_disconnect_container(VFIOGroup *group) > QLIST_REMOVE(container, next); > > QLIST_FOREACH_SAFE(giommu, &container->giommu_list, giommu_next, tmp) { > - memory_region_unregister_iommu_notifier(&giommu->n); > + memory_region_unregister_iommu_notifier(giommu->iommu, &giommu->n); > QLIST_REMOVE(giommu, giommu_next); > g_free(giommu); > } > diff --git a/include/exec/memory.h b/include/exec/memory.h > index bfb08d4..1c41eb6 100644 > --- a/include/exec/memory.h > +++ b/include/exec/memory.h > @@ -151,6 +151,10 @@ struct MemoryRegionIOMMUOps { > IOMMUTLBEntry (*translate)(MemoryRegion *iommu, hwaddr addr, bool is_write); > /* Returns supported page sizes */ > uint64_t (*get_page_sizes)(MemoryRegion *iommu); > + /* Called when the first notifier is set */ > + void (*notify_started)(MemoryRegion *iommu); > + /* Called when the last notifier is removed */ > + void (*notify_stopped)(MemoryRegion *iommu); > }; > > typedef struct CoalescedMemoryRange CoalescedMemoryRange; > @@ -620,9 +624,11 @@ void memory_region_iommu_replay(MemoryRegion *mr, Notifier *n, bool is_write); > * memory_region_unregister_iommu_notifier: unregister a notifier for > * changes to IOMMU translation entries. > * > + * @mr: the memory region which was observed and for which notity_stopped() > + * needs to be called > * @n: the notifier to be removed. > */ > -void memory_region_unregister_iommu_notifier(Notifier *n); > +void memory_region_unregister_iommu_notifier(MemoryRegion *mr, Notifier *n); > > /** > * memory_region_name: get a memory region's name > diff --git a/memory.c b/memory.c > index d22cf5e..fcf978a 100644 > --- a/memory.c > +++ b/memory.c > @@ -1512,6 +1512,10 @@ bool memory_region_is_logging(MemoryRegion *mr, uint8_t client) > > void memory_region_register_iommu_notifier(MemoryRegion *mr, Notifier *n) > { > + if (mr->iommu_ops->notify_started && > + QLIST_EMPTY(&mr->iommu_notify.notifiers)) { > + mr->iommu_ops->notify_started(mr); > + } > notifier_list_add(&mr->iommu_notify, n); > } > > @@ -1545,9 +1549,13 @@ void memory_region_iommu_replay(MemoryRegion *mr, Notifier *n, bool is_write) > } > } > > -void memory_region_unregister_iommu_notifier(Notifier *n) > +void memory_region_unregister_iommu_notifier(MemoryRegion *mr, Notifier *n) > { > notifier_remove(n); > + if (mr->iommu_ops->notify_stopped && > + QLIST_EMPTY(&mr->iommu_notify.notifiers)) { > + mr->iommu_ops->notify_stopped(mr); > + } > } > > void memory_region_notify_iommu(MemoryRegion *mr, -- David Gibson | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_ | _way_ _around_! http://www.ozlabs.org/~dgibson