All of lore.kernel.org
 help / color / mirror / Atom feed
From: Eric Auger <eric.auger@redhat.com>
To: eric.auger.pro@gmail.com, eric.auger@redhat.com,
	qemu-devel@nongnu.org, qemu-arm@nongnu.org,
	peter.maydell@linaro.org
Cc: drjones@redhat.com, yi.l.liu@intel.com, mst@redhat.com,
	jean-philippe.brucker@arm.com, zhangfei.gao@foxmail.com,
	peterx@redhat.com, alex.williamson@redhat.com,
	vincent.stehle@arm.com
Subject: [Qemu-devel] [RFC v5 17/29] vfio: Set up nested stage mappings
Date: Thu, 11 Jul 2019 19:39:21 +0200	[thread overview]
Message-ID: <20190711173933.31203-18-eric.auger@redhat.com> (raw)
In-Reply-To: <20190711173933.31203-1-eric.auger@redhat.com>

In nested mode, legacy vfio_iommu_map_notify cannot be used as
there is no "caching" mode and we do not trap on map.

On Intel, vfio_iommu_map_notify was used to DMA map the RAM
through the host single stage.

With nested mode, we need to setup the stage 2 and the stage 1
separately. This patch introduces a prereg_listener to setup
the stage 2 mapping.

The stage 1 mapping, owned by the guest, is passed to the host
when the guest invalidates the stage 1 configuration, through
a dedicated PCIPASIDOps callback. Guest IOTLB invalidations
are cascaded downto the host through another IOMMU MR UNMAP
notifier.

Signed-off-by: Eric Auger <eric.auger@redhat.com>

---

v4 -> v5:
- use VFIO_IOMMU_SET_PASID_TABLE
- use PCIPASIDOps for config notification

v3 -> v4:
- use iommu_inv_pasid_info for ASID invalidation

v2 -> v3:
- use VFIO_IOMMU_ATTACH_PASID_TABLE
- new user API
- handle leaf

v1 -> v2:
- adapt to uapi changes
- pass the asid
- pass IOMMU_NOTIFIER_S1_CFG when initializing the config notifier
---
 hw/vfio/common.c     | 123 +++++++++++++++++++++++++++++++++++++++----
 hw/vfio/pci.c        |  21 ++++++++
 hw/vfio/trace-events |   2 +
 3 files changed, 136 insertions(+), 10 deletions(-)

diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index ef8452a4bc..bd975c5b83 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -444,6 +444,52 @@ static bool vfio_get_vaddr(IOMMUTLBEntry *iotlb, void **vaddr,
     return true;
 }
 
+/* Propagate a guest IOTLB invalidation to the host (nested mode) */
+static void vfio_iommu_unmap_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
+{
+    VFIOGuestIOMMU *giommu = container_of(n, VFIOGuestIOMMU, n);
+    hwaddr start = iotlb->iova + giommu->iommu_offset;
+
+    VFIOContainer *container = giommu->container;
+    struct vfio_iommu_type1_cache_invalidate ustruct;
+    size_t size = iotlb->addr_mask + 1;
+    int ret;
+
+    assert(iotlb->perm == IOMMU_NONE);
+
+    ustruct.argsz = sizeof(ustruct);
+    ustruct.flags = 0;
+    ustruct.info.version = IOMMU_CACHE_INVALIDATE_INFO_VERSION_1;
+
+    if (size <= 0x10000) {
+        ustruct.info.cache = IOMMU_CACHE_INV_TYPE_IOTLB;
+        ustruct.info.granularity = IOMMU_INV_GRANU_ADDR;
+        ustruct.info.addr_info.flags = IOMMU_INV_ADDR_FLAGS_ARCHID;
+        if (iotlb->leaf) {
+            ustruct.info.addr_info.flags |= IOMMU_INV_ADDR_FLAGS_LEAF;
+        }
+        ustruct.info.addr_info.archid = iotlb->arch_id;
+        ustruct.info.addr_info.addr = start;
+        ustruct.info.addr_info.granule_size = size;
+        ustruct.info.addr_info.nb_granules = 1;
+        trace_vfio_iommu_addr_inv_iotlb(iotlb->arch_id, start, size, 1,
+                                        iotlb->leaf);
+    } else {
+        ustruct.info.cache = IOMMU_CACHE_INV_TYPE_IOTLB;
+        ustruct.info.granularity = IOMMU_INV_GRANU_PASID;
+        ustruct.info.pasid_info.archid = iotlb->arch_id;
+        ustruct.info.pasid_info.flags = IOMMU_INV_PASID_FLAGS_ARCHID;
+        trace_vfio_iommu_asid_inv_iotlb(iotlb->arch_id);
+    }
+
+    ret = ioctl(container->fd, VFIO_IOMMU_CACHE_INVALIDATE, &ustruct);
+    if (ret) {
+        error_report("%p: failed to invalidate CACHE for 0x%"PRIx64
+                     " mask=0x%"PRIx64" (%d)",
+                     container, start, iotlb->addr_mask, ret);
+    }
+}
+
 static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
 {
     VFIOGuestIOMMU *giommu = container_of(n, VFIOGuestIOMMU, n);
@@ -613,6 +659,32 @@ static void vfio_dma_unmap_ram_section(VFIOContainer *container,
     }
 }
 
+static void vfio_prereg_listener_region_add(MemoryListener *listener,
+                                            MemoryRegionSection *section)
+{
+    VFIOContainer *container =
+        container_of(listener, VFIOContainer, prereg_listener);
+
+    if (!memory_region_is_ram(section->mr)) {
+        return;
+    }
+
+    vfio_dma_map_ram_section(container, section);
+
+}
+static void vfio_prereg_listener_region_del(MemoryListener *listener,
+                                     MemoryRegionSection *section)
+{
+    VFIOContainer *container =
+        container_of(listener, VFIOContainer, prereg_listener);
+
+    if (!memory_region_is_ram(section->mr)) {
+        return;
+    }
+
+    vfio_dma_unmap_ram_section(container, section);
+}
+
 static void vfio_listener_region_add(MemoryListener *listener,
                                      MemoryRegionSection *section)
 {
@@ -709,10 +781,11 @@ static void vfio_listener_region_add(MemoryListener *listener,
     memory_region_ref(section->mr);
 
     if (memory_region_is_iommu(section->mr)) {
+        IOMMUNotify notify;
         VFIOGuestIOMMU *giommu;
         IOMMUMemoryRegion *iommu_mr = IOMMU_MEMORY_REGION(section->mr);
         bool nested;
-        int iommu_idx;
+        int iommu_idx, flags;
 
         trace_vfio_listener_region_add_iommu(iova, end);
 
@@ -738,15 +811,26 @@ static void vfio_listener_region_add(MemoryListener *listener,
         llend = int128_sub(llend, int128_one());
         iommu_idx = memory_region_iommu_attrs_to_index(iommu_mr,
                                                        MEMTXATTRS_UNSPECIFIED);
-        iommu_notifier_init(&giommu->n, vfio_iommu_map_notify,
-                            IOMMU_NOTIFIER_ALL,
-                            section->offset_within_region,
-                            int128_get64(llend),
-                            iommu_idx);
-        QLIST_INSERT_HEAD(&container->giommu_list, giommu, giommu_next);
 
+        if (container->iommu_type == VFIO_TYPE1_NESTING_IOMMU) {
+            /* IOTLB unmap notifier to propagate guest IOTLB invalidations */
+            flags = IOMMU_NOTIFIER_UNMAP;
+            notify = vfio_iommu_unmap_notify;
+        } else {
+            /* MAP/UNMAP IOTLB notifier */
+            flags = IOMMU_NOTIFIER_ALL;
+            notify = vfio_iommu_map_notify;
+        }
+
+        iommu_notifier_init(&giommu->n, notify, flags,
+                            section->offset_within_region,
+                            int128_get64(llend), iommu_idx);
+        QLIST_INSERT_HEAD(&container->giommu_list, giommu, giommu_next);
         memory_region_register_iommu_notifier(section->mr, &giommu->n);
-        memory_region_iommu_replay(giommu->iommu, &giommu->n);
+
+        if (flags & IOMMU_NOTIFIER_MAP) {
+            memory_region_iommu_replay(giommu->iommu, &giommu->n);
+        }
 
         return;
     }
@@ -835,15 +919,21 @@ static void vfio_listener_region_del(MemoryListener *listener,
     }
 }
 
-static const MemoryListener vfio_memory_listener = {
+static MemoryListener vfio_memory_listener = {
     .region_add = vfio_listener_region_add,
     .region_del = vfio_listener_region_del,
 };
 
+static MemoryListener vfio_memory_prereg_listener = {
+    .region_add = vfio_prereg_listener_region_add,
+    .region_del = vfio_prereg_listener_region_del,
+};
+
 static void vfio_listener_release(VFIOContainer *container)
 {
     memory_listener_unregister(&container->listener);
-    if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU) {
+    if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU ||
+        container->iommu_type == VFIO_TYPE1_NESTING_IOMMU) {
         memory_listener_unregister(&container->prereg_listener);
     }
 }
@@ -1340,6 +1430,19 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
         }
         vfio_host_win_add(container, 0, (hwaddr)-1, info.iova_pgsizes);
         container->pgsizes = info.iova_pgsizes;
+
+        if (container->iommu_type == VFIO_TYPE1_NESTING_IOMMU) {
+            container->prereg_listener = vfio_memory_prereg_listener;
+            memory_listener_register(&container->prereg_listener,
+                                     &address_space_memory);
+            if (container->error) {
+                memory_listener_unregister(&container->prereg_listener);
+                ret = container->error;
+                error_setg(errp, "RAM memory listener initialization failed "
+                          " for container");
+                goto free_container_exit;
+            }
+        }
         break;
     }
     case VFIO_SPAPR_TCE_v2_IOMMU:
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index d7a4e1875c..729f1f353e 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -2693,6 +2693,25 @@ static void vfio_unregister_req_notifier(VFIOPCIDevice *vdev)
     vdev->req_enabled = false;
 }
 
+static int vfio_iommu_set_pasid_table(PCIBus *bus, int32_t devfn,
+                                      IOMMUConfig *config)
+{
+    PCIDevice *pdev = bus->devices[devfn];
+    VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
+    VFIOContainer *container = vdev->vbasedev.group->container;
+    struct vfio_iommu_type1_set_pasid_table info;
+
+    info.argsz = sizeof(info);
+    info.flags = VFIO_PASID_TABLE_FLAG_SET;
+    memcpy(&info.config, &config->pasid_cfg, sizeof(config->pasid_cfg));
+
+    return ioctl(container->fd, VFIO_IOMMU_SET_PASID_TABLE, &info);
+}
+
+static PCIPASIDOps vfio_pci_pasid_ops = {
+    .set_pasid_table = vfio_iommu_set_pasid_table,
+};
+
 static void vfio_realize(PCIDevice *pdev, Error **errp)
 {
     VFIOPCIDevice *vdev = PCI_VFIO(pdev);
@@ -2994,6 +3013,8 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
     vfio_register_req_notifier(vdev);
     vfio_setup_resetfn_quirk(vdev);
 
+    pci_setup_pasid_ops(pdev, &vfio_pci_pasid_ops);
+
     return;
 
 out_teardown:
diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
index 410801de6e..9f1868af2d 100644
--- a/hw/vfio/trace-events
+++ b/hw/vfio/trace-events
@@ -115,6 +115,8 @@ vfio_region_sparse_mmap_header(const char *name, int index, int nr_areas) "Devic
 vfio_region_sparse_mmap_entry(int i, unsigned long start, unsigned long end) "sparse entry %d [0x%lx - 0x%lx]"
 vfio_get_dev_region(const char *name, int index, uint32_t type, uint32_t subtype) "%s index %d, %08x/%0x8"
 vfio_dma_unmap_overflow_workaround(void) ""
+vfio_iommu_addr_inv_iotlb(int asid, uint64_t addr, uint64_t size, uint64_t nb_granules, bool leaf) "nested IOTLB invalidate asid=%d, addr=0x%"PRIx64" granule_size=0x%"PRIx64" nb_granules=0x%"PRIx64" leaf=%d"
+vfio_iommu_asid_inv_iotlb(int asid) "nested IOTLB invalidate asid=%d"
 
 # platform.c
 vfio_platform_base_device_init(char *name, int groupid) "%s belongs to group #%d"
-- 
2.20.1



  parent reply	other threads:[~2019-07-11 17:43 UTC|newest]

Thread overview: 32+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-07-11 17:39 [Qemu-devel] [RFC v5 00/29] vSMMUv3/pSMMUv3 2 stage VFIO integration Eric Auger
2019-07-11 17:39 ` [Qemu-devel] [RFC v5 01/29] memory: Remove unused memory_region_iommu_replay_all() Eric Auger
2019-07-11 17:39 ` [Qemu-devel] [RFC v5 02/29] memory: Add IOMMU_ATTR_VFIO_NESTED IOMMU memory region attribute Eric Auger
2019-07-11 17:39 ` [Qemu-devel] [RFC v5 03/29] hw/vfio/common: Assert in case of nested mode Eric Auger
2019-07-11 17:39 ` [Qemu-devel] [RFC v5 04/29] hw/arm/smmuv3: Log a guest error when decoding an invalid STE Eric Auger
2019-07-11 17:39 ` [Qemu-devel] [RFC v5 05/29] hw/arm/smmuv3: Remove spurious error messages on IOVA invalidations Eric Auger
2019-07-11 17:39 ` [Qemu-devel] [RFC v5 06/29] update-linux-headers: Import iommu.h Eric Auger
2019-07-11 17:39 ` [Qemu-devel] [RFC v5 07/29] update-linux-headers: Add sve_context.h to asm-arm64 Eric Auger
2019-07-11 17:39 ` [Qemu-devel] [RFC v5 08/29] header update against 5.3.0-rc0 and IOMMU/VFIO nested stage APIs Eric Auger
2019-07-11 17:39 ` [Qemu-devel] [RFC v5 09/29] memory: Add IOMMU_ATTR_MSI_TRANSLATE IOMMU memory region attribute Eric Auger
2019-07-11 17:39 ` [Qemu-devel] [RFC v5 10/29] memory: Introduce IOMMU Memory Region inject_faults API Eric Auger
2019-07-11 17:39 ` [Qemu-devel] [RFC v5 11/29] memory: Add arch_id and leaf fields in IOTLBEntry Eric Auger
2019-07-11 17:39 ` [Qemu-devel] [RFC v5 12/29] iommu: Introduce generic header Eric Auger
2019-07-11 17:39 ` [Qemu-devel] [RFC v5 13/29] pci: introduce PCIPASIDOps to PCIDevice Eric Auger
2019-07-11 17:39 ` [Qemu-devel] [RFC v5 14/29] vfio: Force nested if iommu requires it Eric Auger
2019-07-11 17:39 ` [Qemu-devel] [RFC v5 15/29] vfio: Introduce hostwin_from_range helper Eric Auger
2019-07-11 17:39 ` [Qemu-devel] [RFC v5 16/29] vfio: Introduce helpers to DMA map/unmap a RAM section Eric Auger
2019-07-11 17:39 ` Eric Auger [this message]
2019-07-11 17:39 ` [Qemu-devel] [RFC v5 18/29] vfio: Pass stage 1 MSI bindings to the host Eric Auger
2019-07-11 17:39 ` [Qemu-devel] [RFC v5 19/29] vfio: Helper to get IRQ info including capabilities Eric Auger
2019-07-11 17:39 ` [Qemu-devel] [RFC v5 20/29] vfio/pci: Register handler for iommu fault Eric Auger
2019-07-11 17:39 ` [Qemu-devel] [RFC v5 21/29] vfio/pci: Set up the DMA FAULT region Eric Auger
2019-07-11 17:39 ` [Qemu-devel] [RFC v5 22/29] vfio/pci: Implement the DMA fault handler Eric Auger
2019-07-11 17:39 ` [Qemu-devel] [RFC v5 23/29] hw/arm/smmuv3: Advertise MSI_TRANSLATE attribute Eric Auger
2019-07-11 17:39 ` [Qemu-devel] [RFC v5 24/29] hw/arm/smmuv3: Store the PASID table GPA in the translation config Eric Auger
2019-07-11 17:39 ` [Qemu-devel] [RFC v5 25/29] hw/arm/smmuv3: Fill the IOTLBEntry arch_id on NH_VA invalidation Eric Auger
2019-07-11 17:39 ` [Qemu-devel] [RFC v5 26/29] hw/arm/smmuv3: Fill the IOTLBEntry leaf field " Eric Auger
2019-07-11 17:39 ` [Qemu-devel] [RFC v5 27/29] hw/arm/smmuv3: Pass stage 1 configurations to the host Eric Auger
2019-07-11 17:39 ` [Qemu-devel] [RFC v5 28/29] hw/arm/smmuv3: Implement fault injection Eric Auger
2019-07-11 17:39 ` [Qemu-devel] [RFC v5 29/29] vfio: Remove VFIO/SMMUv3 assert Eric Auger
2019-07-11 22:26 ` [Qemu-devel] [RFC v5 00/29] vSMMUv3/pSMMUv3 2 stage VFIO integration no-reply
2019-07-12  7:23   ` Auger Eric

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190711173933.31203-18-eric.auger@redhat.com \
    --to=eric.auger@redhat.com \
    --cc=alex.williamson@redhat.com \
    --cc=drjones@redhat.com \
    --cc=eric.auger.pro@gmail.com \
    --cc=jean-philippe.brucker@arm.com \
    --cc=mst@redhat.com \
    --cc=peter.maydell@linaro.org \
    --cc=peterx@redhat.com \
    --cc=qemu-arm@nongnu.org \
    --cc=qemu-devel@nongnu.org \
    --cc=vincent.stehle@arm.com \
    --cc=yi.l.liu@intel.com \
    --cc=zhangfei.gao@foxmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.