All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH V2 0/3] Passthru device support under emulated amd-iommu
@ 2020-10-02 14:59 Wei Huang
  2020-10-02 14:59 ` [PATCH V2 1/3] amd-iommu: Add address space notifier and replay support Wei Huang
                   ` (2 more replies)
  0 siblings, 3 replies; 6+ messages in thread
From: Wei Huang @ 2020-10-02 14:59 UTC (permalink / raw)
  To: qemu-devel
  Cc: ehabkost, mst, wei.huang2, peterx, alex.williamson, pbonzini,
	Suravee.Suthikulpanit, rth

This patchset adds support for passthru devices to run inside VMs under
the management of an emulated amd-iommu device (vIOMMU). This feature
has a variety of benefits, including enhanced I/O security and user-mode
driver support, for guest VMs.

This patchset has been tested with various 1G and 10G (SR-IOV) NICs on
AMD boxes.

V1->V2:
 * Remove unnecessary VFIO_IOMMU_MAP_DMA errno check (Alex Williamson)

Thanks,
-Wei

Wei Huang (3):
  amd-iommu: Add address space notifier and replay support
  amd-iommu: Sync IOVA-to-GPA translation during page invalidation
  amd-iommu: Fix amdvi_mmio_trace() to differentiate MMIO R/W

 hw/i386/amd_iommu.c | 243 ++++++++++++++++++++++++++++++++++++++++++--
 hw/i386/amd_iommu.h |  13 +++
 2 files changed, 245 insertions(+), 11 deletions(-)

-- 
2.25.2



^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH V2 1/3] amd-iommu: Add address space notifier and replay support
  2020-10-02 14:59 [PATCH V2 0/3] Passthru device support under emulated amd-iommu Wei Huang
@ 2020-10-02 14:59 ` Wei Huang
  2020-10-02 18:53   ` Peter Xu
  2020-10-02 14:59 ` [PATCH V2 2/3] amd-iommu: Sync IOVA-to-GPA translation during page invalidation Wei Huang
  2020-10-02 14:59 ` [PATCH V2 3/3] amd-iommu: Fix amdvi_mmio_trace() to differentiate MMIO R/W Wei Huang
  2 siblings, 1 reply; 6+ messages in thread
From: Wei Huang @ 2020-10-02 14:59 UTC (permalink / raw)
  To: qemu-devel
  Cc: ehabkost, mst, wei.huang2, peterx, alex.williamson, pbonzini,
	Suravee.Suthikulpanit, rth

Currently the emulated amd-iommu device does not support memory address
space notifier and replay. These two functions are required to have I/O
devices supported inside guest VMs as passthru devices. This patch adds
basic as_notifier infrastructure and replay function in amd_iommu.

Co-developed-by: Wei Huang <wei.huang2@amd.com>
Signed-off-by: Suravee Suthikulpanit <Suravee.Suthikulpanit@amd.com>
---
 hw/i386/amd_iommu.c | 45 +++++++++++++++++++++++++++++++++++++++------
 hw/i386/amd_iommu.h |  3 +++
 2 files changed, 42 insertions(+), 6 deletions(-)

diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c
index 74a93a5d93f4..c7d24a05484d 100644
--- a/hw/i386/amd_iommu.c
+++ b/hw/i386/amd_iommu.c
@@ -63,6 +63,8 @@ struct AMDVIAddressSpace {
     IOMMUMemoryRegion iommu;    /* Device's address translation region  */
     MemoryRegion iommu_ir;      /* Device's interrupt remapping region  */
     AddressSpace as;            /* device's corresponding address space */
+    IOMMUNotifierFlag notifier_flags; /* notifier flags of address space */
+    QLIST_ENTRY(AMDVIAddressSpace) next; /* notifier linked list */
 };
 
 /* AMDVI cache entry */
@@ -425,6 +427,22 @@ static void amdvi_inval_all(AMDVIState *s, uint64_t *cmd)
     trace_amdvi_all_inval();
 }
 
+static void amdvi_address_space_unmap(AMDVIAddressSpace *as, IOMMUNotifier *n)
+{
+    IOMMUTLBEntry entry;
+    hwaddr start = n->start;
+    hwaddr end = n->end;
+    hwaddr size = end - start + 1;
+
+    entry.target_as = &address_space_memory;
+    entry.iova = start;
+    entry.translated_addr = 0;
+    entry.perm = IOMMU_NONE;
+    entry.addr_mask = size - 1;
+
+    memory_region_notify_one(n, &entry);
+}
+
 static gboolean amdvi_iotlb_remove_by_domid(gpointer key, gpointer value,
                                             gpointer user_data)
 {
@@ -1473,14 +1491,17 @@ static int amdvi_iommu_notify_flag_changed(IOMMUMemoryRegion *iommu,
                                            Error **errp)
 {
     AMDVIAddressSpace *as = container_of(iommu, AMDVIAddressSpace, iommu);
+    AMDVIState *s = as->iommu_state;
 
-    if (new & IOMMU_NOTIFIER_MAP) {
-        error_setg(errp,
-                   "device %02x.%02x.%x requires iommu notifier which is not "
-                   "currently supported", as->bus_num, PCI_SLOT(as->devfn),
-                   PCI_FUNC(as->devfn));
-        return -EINVAL;
+    /* Update address space notifier flags */
+    as->notifier_flags = new;
+
+    if (old == IOMMU_NOTIFIER_NONE) {
+        QLIST_INSERT_HEAD(&s->amdvi_as_with_notifiers, as, next);
+    } else if (new == IOMMU_NOTIFIER_NONE) {
+        QLIST_REMOVE(as, next);
     }
+
     return 0;
 }
 
@@ -1573,6 +1594,8 @@ static void amdvi_realize(DeviceState *dev, Error **errp)
     /* Pseudo address space under root PCI bus. */
     x86ms->ioapic_as = amdvi_host_dma_iommu(bus, s, AMDVI_IOAPIC_SB_DEVID);
 
+    QLIST_INIT(&s->amdvi_as_with_notifiers);
+
     /* set up MMIO */
     memory_region_init_io(&s->mmio, OBJECT(s), &mmio_mem_ops, s, "amdvi-mmio",
                           AMDVI_MMIO_SIZE);
@@ -1631,12 +1654,22 @@ static const TypeInfo amdviPCI = {
     },
 };
 
+static void amdvi_iommu_replay(IOMMUMemoryRegion *iommu_mr, IOMMUNotifier *n)
+{
+    AMDVIAddressSpace *as = container_of(iommu_mr, AMDVIAddressSpace, iommu);
+
+    amdvi_address_space_unmap(as, n);
+
+    return;
+}
+
 static void amdvi_iommu_memory_region_class_init(ObjectClass *klass, void *data)
 {
     IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_CLASS(klass);
 
     imrc->translate = amdvi_translate;
     imrc->notify_flag_changed = amdvi_iommu_notify_flag_changed;
+    imrc->replay = amdvi_iommu_replay;
 }
 
 static const TypeInfo amdvi_iommu_memory_region_info = {
diff --git a/hw/i386/amd_iommu.h b/hw/i386/amd_iommu.h
index 79d38a3e4184..29b7a35a51a5 100644
--- a/hw/i386/amd_iommu.h
+++ b/hw/i386/amd_iommu.h
@@ -362,6 +362,9 @@ struct AMDVIState {
     /* for each served device */
     AMDVIAddressSpace **address_spaces[PCI_BUS_MAX];
 
+    /* list of registered notifiers */
+    QLIST_HEAD(, AMDVIAddressSpace) amdvi_as_with_notifiers;
+
     /* IOTLB */
     GHashTable *iotlb;
 
-- 
2.25.2



^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH V2 2/3] amd-iommu: Sync IOVA-to-GPA translation during page invalidation
  2020-10-02 14:59 [PATCH V2 0/3] Passthru device support under emulated amd-iommu Wei Huang
  2020-10-02 14:59 ` [PATCH V2 1/3] amd-iommu: Add address space notifier and replay support Wei Huang
@ 2020-10-02 14:59 ` Wei Huang
  2020-10-02 19:11   ` Peter Xu
  2020-10-02 14:59 ` [PATCH V2 3/3] amd-iommu: Fix amdvi_mmio_trace() to differentiate MMIO R/W Wei Huang
  2 siblings, 1 reply; 6+ messages in thread
From: Wei Huang @ 2020-10-02 14:59 UTC (permalink / raw)
  To: qemu-devel
  Cc: ehabkost, mst, wei.huang2, peterx, alex.williamson, pbonzini,
	Suravee.Suthikulpanit, rth

Add support to sync the IOVA-to-GPA translation at the time of IOMMU
page invalidation. This function is called when two IOMMU commands,
AMDVI_CMD_INVAL_AMDVI_PAGES and AMDVI_CMD_INVAL_AMDVI_ALL, are
intercepted. Address space notifiers are called accordingly.

Co-developed-by: Wei Huang <wei.huang2@amd.com>
Signed-off-by: Suravee Suthikulpanit <Suravee.Suthikulpanit@amd.com>
---
 hw/i386/amd_iommu.c | 177 ++++++++++++++++++++++++++++++++++++++++++++
 hw/i386/amd_iommu.h |  10 +++
 2 files changed, 187 insertions(+)

diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c
index c7d24a05484d..7ce68289d20c 100644
--- a/hw/i386/amd_iommu.c
+++ b/hw/i386/amd_iommu.c
@@ -76,6 +76,12 @@ typedef struct AMDVIIOTLBEntry {
     uint64_t page_mask;         /* physical page size  */
 } AMDVIIOTLBEntry;
 
+static bool amdvi_get_dte(AMDVIState *s, int devid, uint64_t *entry);
+static void amdvi_sync_domain(AMDVIState *s, uint32_t domid,
+                              uint64_t addr, uint16_t flags);
+static void amdvi_walk_level(AMDVIAddressSpace *as, uint64_t pte,
+                             uint64_t iova, uint64_t partial);
+
 /* configure MMIO registers at startup/reset */
 static void amdvi_set_quad(AMDVIState *s, hwaddr addr, uint64_t val,
                            uint64_t romask, uint64_t w1cmask)
@@ -443,6 +449,78 @@ static void amdvi_address_space_unmap(AMDVIAddressSpace *as, IOMMUNotifier *n)
     memory_region_notify_one(n, &entry);
 }
 
+/*
+ * Sync the IOVA-to-GPA translation at the time of IOMMU page invalidation.
+ * This function is called when IOMMU commands, AMDVI_CMD_INVAL_AMDVI_PAGES
+ * and AMDVI_CMD_INVAL_AMDVI_ALL, are triggred.
+ *
+ * The range of addr invalidation is determined by addr and flags, using
+ * the following rules:
+ *   - All pages
+ *     In this case, we unmap the whole address space and then re-walk the
+ *     I/O page table to sync the mapping relationship.
+ *   - Single page
+ *     Re-walk the page based on the specified iova, and only sync the
+ *     newly mapped page.
+ */
+static void amdvi_sync_domain(AMDVIState *s, uint32_t domid,
+                              uint64_t addr, uint16_t flags)
+{
+    AMDVIAddressSpace *as;
+    bool sync_all_domains = false;
+    uint64_t mask, size = 0x1000;
+
+    if (domid == AMDVI_DOMAIN_ALL) {
+        sync_all_domains = true;
+    }
+
+     /* S=1 means the invalidation size is from addr field; otherwise 4KB */
+    if (flags & AMDVI_CMD_INVAL_IOMMU_PAGES_S_BIT) {
+        uint32_t zbit = cto64(addr | 0xFFF) + 1;
+
+        size = 1ULL << zbit;
+
+        if (size < 0x1000) {
+            addr = 0;
+            size = AMDVI_PGSZ_ENTIRE;
+        } else {
+            mask = ~(size - 1);
+            addr &= mask;
+        }
+    }
+
+    QLIST_FOREACH(as, &s->amdvi_as_with_notifiers, next) {
+        uint64_t dte[4];
+        IOMMUNotifier *n;
+
+        if (!amdvi_get_dte(s, as->devfn, dte)) {
+            continue;
+        }
+
+        if (!sync_all_domains && (domid != (dte[1] & 0xFFFULL))) {
+            continue;
+        }
+
+        /*
+         * In case of syncing more than a page, we invalidate the entire
+         * address range and re-walk the whole page table.
+         */
+        if (size == AMDVI_PGSZ_ENTIRE) {
+            IOMMU_NOTIFIER_FOREACH(n, &as->iommu) {
+                amdvi_address_space_unmap(as, n);
+            }
+        } else if (size > 0x1000) {
+            IOMMU_NOTIFIER_FOREACH(n, &as->iommu) {
+                if (n->start <= addr && addr + size < n->end) {
+                    amdvi_address_space_unmap(as, n);
+                }
+            }
+        }
+
+        amdvi_walk_level(as, dte[0], addr, 0);
+    }
+}
+
 static gboolean amdvi_iotlb_remove_by_domid(gpointer key, gpointer value,
                                             gpointer user_data)
 {
@@ -455,6 +533,8 @@ static gboolean amdvi_iotlb_remove_by_domid(gpointer key, gpointer value,
 static void amdvi_inval_pages(AMDVIState *s, uint64_t *cmd)
 {
     uint16_t domid = cpu_to_le16((uint16_t)extract64(cmd[0], 32, 16));
+    uint64_t addr  = cpu_to_le64(extract64(cmd[1], 12, 52)) << 12;
+    uint16_t flags = cpu_to_le16((uint16_t)extract64(cmd[1], 0, 12));
 
     if (extract64(cmd[0], 20, 12) || extract64(cmd[0], 48, 12) ||
         extract64(cmd[1], 3, 9)) {
@@ -465,6 +545,8 @@ static void amdvi_inval_pages(AMDVIState *s, uint64_t *cmd)
     g_hash_table_foreach_remove(s->iotlb, amdvi_iotlb_remove_by_domid,
                                 &domid);
     trace_amdvi_pages_inval(domid);
+
+    amdvi_sync_domain(s, domid, addr, flags);
 }
 
 static void amdvi_prefetch_pages(AMDVIState *s, uint64_t *cmd)
@@ -910,6 +992,101 @@ static inline uint64_t amdvi_get_pte_entry(AMDVIState *s, uint64_t pte_addr,
     return pte;
 }
 
+static inline uint64_t pte_get_page_size(uint64_t level)
+{
+    return 1UL << ((level * 9) + 3);
+}
+
+static void amdvi_sync_iova(AMDVIAddressSpace *as, uint64_t pte, uint64_t iova)
+{
+    IOMMUTLBEntry entry;
+    uint64_t addr = pte & AMDVI_DEV_PT_ROOT_MASK;
+    uint32_t level = get_pte_translation_mode(pte);
+    uint64_t size = pte_get_page_size(level + 1);
+    uint64_t perm = amdvi_get_perms(pte);
+
+    assert(level == 0 || level == 7);
+
+    entry.target_as = &address_space_memory;
+    entry.iova = iova ;
+    entry.perm = perm;
+    if (level == 0) {
+        entry.addr_mask = size - 1;
+        entry.translated_addr = addr;
+    } else if (level == 7) {
+        entry.addr_mask = (1 << (cto64(addr | 0xFFF) + 1)) - 1;
+        entry.translated_addr = addr & ~entry.addr_mask;
+    }
+
+    memory_region_notify_iommu(&as->iommu, 0, entry);
+}
+
+/*
+ * Walk the I/O page table and notify mapping change. Note that iova
+ * determines if this function's behavior:
+ *   - iova == 0: re-walk the whole page table
+ *   - iova != 0: re-walk the address defined in iova
+ */
+static void amdvi_walk_level(AMDVIAddressSpace *as, uint64_t pte,
+                             uint64_t iova, uint64_t partial)
+{
+    uint64_t index = 0;
+    uint8_t level = get_pte_translation_mode(pte);
+    uint64_t cur_addr = pte & AMDVI_DEV_PT_ROOT_MASK;
+    uint64_t end_addr = cur_addr + 4096;
+    uint64_t new_partial = 0;
+
+    if (!(pte & AMDVI_PTE_PRESENT)) {
+        return;
+    }
+
+    if (level == 7) {
+        amdvi_sync_iova(as, pte, iova);
+        return;
+    }
+
+    /* narrow the scope of table walk if iova != 0 */
+    if (iova) {
+        cur_addr += ((iova >> (3 + 9 * level)) & 0x1FF) << 3;
+        end_addr = cur_addr + 8;
+    }
+
+    while (cur_addr < end_addr) {
+        int cur_addr_inc = 8;
+        int index_inc = 1;
+
+        pte = amdvi_get_pte_entry(as->iommu_state, cur_addr, as->devfn);
+        /* validate the entry */
+        if (!(pte & AMDVI_PTE_PRESENT)) {
+            goto next;
+        }
+
+        if (level > 1) {
+            new_partial = (partial << 9) | index;
+            amdvi_walk_level(as, pte, iova, new_partial);
+        } else {
+            /* found a page, sync the mapping first */
+            if (iova) {
+                amdvi_sync_iova(as, pte, iova);
+            } else {
+                amdvi_sync_iova(as, pte, ((partial << 9) | index) << 12);
+            }
+
+            /* skip following entries when a large page is found */
+            if (get_pte_translation_mode(pte) == 7) {
+                int skipped = 1 << (cto64(pte >> 12) + 1);
+
+                cur_addr_inc = 8 * skipped;
+                index_inc = skipped;
+            }
+        }
+
+next:
+        cur_addr += cur_addr_inc;
+        index += index_inc;
+    }
+}
+
 static void amdvi_page_walk(AMDVIAddressSpace *as, uint64_t *dte,
                             IOMMUTLBEntry *ret, unsigned perms,
                             hwaddr addr)
diff --git a/hw/i386/amd_iommu.h b/hw/i386/amd_iommu.h
index 29b7a35a51a5..6d80f4913984 100644
--- a/hw/i386/amd_iommu.h
+++ b/hw/i386/amd_iommu.h
@@ -123,6 +123,8 @@
 #define AMDVI_CMD_COMPLETE_PPR_REQUEST    0x07
 #define AMDVI_CMD_INVAL_AMDVI_ALL         0x08
 
+#define AMDVI_CMD_INVAL_IOMMU_PAGES_S_BIT (1ULL << 0)
+
 #define AMDVI_DEVTAB_ENTRY_SIZE           32
 
 /* Device table entry bits 0:63 */
@@ -148,6 +150,9 @@
 #define AMDVI_EVENT_ILLEGAL_COMMAND_ERROR (0x5U << 12)
 #define AMDVI_EVENT_COMMAND_HW_ERROR      (0x6U << 12)
 
+/* PTE bits */
+#define AMDVI_PTE_PRESENT                 (1ULL << 0)
+
 #define AMDVI_EVENT_LEN                  16
 #define AMDVI_PERM_READ             (1 << 0)
 #define AMDVI_PERM_WRITE            (1 << 1)
@@ -198,6 +203,11 @@
 #define AMDVI_MAX_PH_ADDR          (40UL << 8)
 #define AMDVI_MAX_GVA_ADDR         (48UL << 15)
 
+#define AMDVI_PGSZ_ENTIRE          (0X0007FFFFFFFFF000ULL)
+
+/* The domain id is 16-bit, so use 32-bit all 1's to represent all domains */
+#define AMDVI_DOMAIN_ALL           (UINT32_MAX)
+
 /* Completion Wait data size */
 #define AMDVI_COMPLETION_DATA_SIZE    8
 
-- 
2.25.2



^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH V2 3/3] amd-iommu: Fix amdvi_mmio_trace() to differentiate MMIO R/W
  2020-10-02 14:59 [PATCH V2 0/3] Passthru device support under emulated amd-iommu Wei Huang
  2020-10-02 14:59 ` [PATCH V2 1/3] amd-iommu: Add address space notifier and replay support Wei Huang
  2020-10-02 14:59 ` [PATCH V2 2/3] amd-iommu: Sync IOVA-to-GPA translation during page invalidation Wei Huang
@ 2020-10-02 14:59 ` Wei Huang
  2 siblings, 0 replies; 6+ messages in thread
From: Wei Huang @ 2020-10-02 14:59 UTC (permalink / raw)
  To: qemu-devel
  Cc: ehabkost, mst, wei.huang2, peterx, alex.williamson, pbonzini,
	Suravee.Suthikulpanit, rth

amd-iommu MMIO trace function does not differentiate MMIO writes from
reads. Let us extend it to support both types.

Co-developed-by: Wei Huang <wei.huang2@amd.com>
Signed-off-by: Suravee Suthikulpanit <Suravee.Suthikulpanit@amd.com>
---
 hw/i386/amd_iommu.c | 21 ++++++++++++++++-----
 1 file changed, 16 insertions(+), 5 deletions(-)

diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c
index 7ce68289d20c..914368f0e4b7 100644
--- a/hw/i386/amd_iommu.c
+++ b/hw/i386/amd_iommu.c
@@ -662,17 +662,28 @@ static void amdvi_cmdbuf_run(AMDVIState *s)
     }
 }
 
-static void amdvi_mmio_trace(hwaddr addr, unsigned size)
+static void amdvi_mmio_trace(hwaddr addr, unsigned size, bool iswrite,
+                             uint64_t val)
 {
     uint8_t index = (addr & ~0x2000) / 8;
 
     if ((addr & 0x2000)) {
         /* high table */
         index = index >= AMDVI_MMIO_REGS_HIGH ? AMDVI_MMIO_REGS_HIGH : index;
-        trace_amdvi_mmio_read(amdvi_mmio_high[index], addr, size, addr & ~0x07);
+        if (!iswrite)
+            trace_amdvi_mmio_read(amdvi_mmio_high[index], addr, size,
+                                  addr & ~0x07);
+        else
+            trace_amdvi_mmio_write(amdvi_mmio_high[index], addr, size, val,
+                                   addr & ~0x07);
     } else {
         index = index >= AMDVI_MMIO_REGS_LOW ? AMDVI_MMIO_REGS_LOW : index;
-        trace_amdvi_mmio_read(amdvi_mmio_low[index], addr, size, addr & ~0x07);
+        if (!iswrite)
+            trace_amdvi_mmio_read(amdvi_mmio_low[index], addr, size,
+                                  addr & ~0x07);
+        else
+            trace_amdvi_mmio_write(amdvi_mmio_low[index], addr, size, val,
+                                   addr & ~0x07);
     }
 }
 
@@ -693,7 +704,7 @@ static uint64_t amdvi_mmio_read(void *opaque, hwaddr addr, unsigned size)
     } else if (size == 8) {
         val = amdvi_readq(s, addr);
     }
-    amdvi_mmio_trace(addr, size);
+    amdvi_mmio_trace(addr, size, 0, val);
 
     return val;
 }
@@ -840,7 +851,7 @@ static void amdvi_mmio_write(void *opaque, hwaddr addr, uint64_t val,
         return;
     }
 
-    amdvi_mmio_trace(addr, size);
+    amdvi_mmio_trace(addr, size, 1, val);
     switch (addr & ~0x07) {
     case AMDVI_MMIO_CONTROL:
         amdvi_mmio_reg_write(s, size, val, addr);
-- 
2.25.2



^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH V2 1/3] amd-iommu: Add address space notifier and replay support
  2020-10-02 14:59 ` [PATCH V2 1/3] amd-iommu: Add address space notifier and replay support Wei Huang
@ 2020-10-02 18:53   ` Peter Xu
  0 siblings, 0 replies; 6+ messages in thread
From: Peter Xu @ 2020-10-02 18:53 UTC (permalink / raw)
  To: Wei Huang
  Cc: ehabkost, mst, qemu-devel, alex.williamson, pbonzini,
	Suravee.Suthikulpanit, rth

On Fri, Oct 02, 2020 at 09:59:05AM -0500, Wei Huang wrote:
> +static void amdvi_address_space_unmap(AMDVIAddressSpace *as, IOMMUNotifier *n)
> +{
> +    IOMMUTLBEntry entry;
> +    hwaddr start = n->start;
> +    hwaddr end = n->end;
> +    hwaddr size = end - start + 1;
> +
> +    entry.target_as = &address_space_memory;
> +    entry.iova = start;
> +    entry.translated_addr = 0;
> +    entry.perm = IOMMU_NONE;
> +    entry.addr_mask = size - 1;

This may race with Eugenio's series:

  https://mail.gnu.org/archive/html/qemu-ppc/2020-09/msg00131.html

IMHO that series should be acceptable for merging already.  Anyway, there's
probably a trivial conflict to solve.

> +
> +    memory_region_notify_one(n, &entry);
> +}
> +
>  static gboolean amdvi_iotlb_remove_by_domid(gpointer key, gpointer value,
>                                              gpointer user_data)
>  {
> @@ -1473,14 +1491,17 @@ static int amdvi_iommu_notify_flag_changed(IOMMUMemoryRegion *iommu,
>                                             Error **errp)
>  {
>      AMDVIAddressSpace *as = container_of(iommu, AMDVIAddressSpace, iommu);
> +    AMDVIState *s = as->iommu_state;
>  
> -    if (new & IOMMU_NOTIFIER_MAP) {
> -        error_setg(errp,
> -                   "device %02x.%02x.%x requires iommu notifier which is not "
> -                   "currently supported", as->bus_num, PCI_SLOT(as->devfn),
> -                   PCI_FUNC(as->devfn));
> -        return -EINVAL;

Ideally, we shouldn't remove this error message until the functionality is
fully implemented.  Otherwise an user could potentially boot such a vm with a
broken assigned device.

Thanks,

> +    /* Update address space notifier flags */
> +    as->notifier_flags = new;
> +
> +    if (old == IOMMU_NOTIFIER_NONE) {
> +        QLIST_INSERT_HEAD(&s->amdvi_as_with_notifiers, as, next);
> +    } else if (new == IOMMU_NOTIFIER_NONE) {
> +        QLIST_REMOVE(as, next);
>      }
> +
>      return 0;
>  }

-- 
Peter Xu



^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH V2 2/3] amd-iommu: Sync IOVA-to-GPA translation during page invalidation
  2020-10-02 14:59 ` [PATCH V2 2/3] amd-iommu: Sync IOVA-to-GPA translation during page invalidation Wei Huang
@ 2020-10-02 19:11   ` Peter Xu
  0 siblings, 0 replies; 6+ messages in thread
From: Peter Xu @ 2020-10-02 19:11 UTC (permalink / raw)
  To: Wei Huang
  Cc: ehabkost, mst, qemu-devel, alex.williamson, pbonzini,
	Suravee.Suthikulpanit, rth

On Fri, Oct 02, 2020 at 09:59:06AM -0500, Wei Huang wrote:
> +static void amdvi_sync_domain(AMDVIState *s, uint32_t domid,
> +                              uint64_t addr, uint16_t flags)
> +{

[...]

> +        /*
> +         * In case of syncing more than a page, we invalidate the entire
> +         * address range and re-walk the whole page table.
> +         */
> +        if (size == AMDVI_PGSZ_ENTIRE) {
> +            IOMMU_NOTIFIER_FOREACH(n, &as->iommu) {
> +                amdvi_address_space_unmap(as, n);
> +            }
> +        } else if (size > 0x1000) {
> +            IOMMU_NOTIFIER_FOREACH(n, &as->iommu) {
> +                if (n->start <= addr && addr + size < n->end) {
> +                    amdvi_address_space_unmap(as, n);
> +                }
> +            }
> +        }

So this may not work... Because DMA could happen concurrently with the page
sync, so the DMA could fail if the mapping is suddenly gone (even if it's a
very short period).

We encountered this problem on x86 and those will be reported as host DMAR
errors (since those pages were missing), please feel free to have a look at:

  63b88968f1 ("intel-iommu: rework the page walk logic", 2018-05-23)

Majorly this paragraph:

  For each VTDAddressSpace, now we maintain what IOVA ranges we have
  mapped and what we have not.  With that information, now we only send
  MAP or UNMAP when necessary.  Say, we don't send MAP notifies if we
  know we have already mapped the range, meanwhile we don't send UNMAP
  notifies if we know we never mapped the range at all.

So the simple idea is, as long as one page was mapped and it's not unmapped, we
can _never_ unmap it.. because the device might be using it simultaneously.

What VT-d does right now is to maintain a per-device iova tree, so that we know
what exactly have been mapped.  That's kind of an overkill for sure (especially
because vfio kernel module will maintain a similar iova tree, so it's at least
kind of duplicated), however that should fix this problem.

-- 
Peter Xu



^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2020-10-02 19:12 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-10-02 14:59 [PATCH V2 0/3] Passthru device support under emulated amd-iommu Wei Huang
2020-10-02 14:59 ` [PATCH V2 1/3] amd-iommu: Add address space notifier and replay support Wei Huang
2020-10-02 18:53   ` Peter Xu
2020-10-02 14:59 ` [PATCH V2 2/3] amd-iommu: Sync IOVA-to-GPA translation during page invalidation Wei Huang
2020-10-02 19:11   ` Peter Xu
2020-10-02 14:59 ` [PATCH V2 3/3] amd-iommu: Fix amdvi_mmio_trace() to differentiate MMIO R/W Wei Huang

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.