All of lore.kernel.org
 help / color / mirror / Atom feed
* [Qemu-devel] [RFC PATCH qemu 0/5] vfio-pci, spapr: Allow in-kernel acceleration
@ 2017-03-30 12:47 Alexey Kardashevskiy
  2017-03-30 12:47 ` [Qemu-devel] [RFC PATCH qemu 1/5] memory/iommu: QOM'fy IOMMU MemoryRegion Alexey Kardashevskiy
                   ` (4 more replies)
  0 siblings, 5 replies; 10+ messages in thread
From: Alexey Kardashevskiy @ 2017-03-30 12:47 UTC (permalink / raw)
  To: qemu-devel
  Cc: Alexey Kardashevskiy, qemu-ppc, David Gibson, Alex Williamson,
	Paolo Bonzini


This is my current working tree to support kernel's
"powerpc/kvm/vfio: Enable in-kernel acceleration".

Changes:
v2:
* QOM'fy of IOMMUMemoryRegion
* fix comments from v1 review

Please comment. Thanks.



Alexey Kardashevskiy (5):
  memory/iommu: QOM'fy IOMMU MemoryRegion
  spapr-iommu: Subclass TYPE_IOMMU_MEMORY_REGION
  vfio-pci: Reorder group-to-container attaching
  vfio/spapr: Add a notifier for PPC64 HV/PR KVM about new group
    attached to LIOBN
  spapr/iommu: Enable in-kernel TCE acceleration via VFIO KVM device

 include/exec/memory.h         |  50 ++++++++++++++++----
 include/hw/ppc/spapr.h        |  25 +++++++++-
 include/hw/vfio/vfio-common.h |   3 +-
 include/qemu/typedefs.h       |   1 +
 target/ppc/kvm_ppc.h          |   6 +++
 exec.c                        |  16 +++++--
 hw/ppc/spapr_iommu.c          |  47 +++++++++++++++----
 hw/vfio/common.c              |  41 ++++++++++------
 hw/vfio/spapr.c               |  35 +++++++++++++-
 memory.c                      | 106 ++++++++++++++++++++++++++++--------------
 target/ppc/kvm.c              |   7 ++-
 hw/vfio/trace-events          |   1 +
 12 files changed, 264 insertions(+), 74 deletions(-)

-- 
2.11.0

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [Qemu-devel] [RFC PATCH qemu 1/5] memory/iommu: QOM'fy IOMMU MemoryRegion
  2017-03-30 12:47 [Qemu-devel] [RFC PATCH qemu 0/5] vfio-pci, spapr: Allow in-kernel acceleration Alexey Kardashevskiy
@ 2017-03-30 12:47 ` Alexey Kardashevskiy
  2017-03-30 13:00   ` Paolo Bonzini
  2017-03-30 12:47 ` [Qemu-devel] [RFC PATCH qemu 2/5] spapr-iommu: Subclass TYPE_IOMMU_MEMORY_REGION Alexey Kardashevskiy
                   ` (3 subsequent siblings)
  4 siblings, 1 reply; 10+ messages in thread
From: Alexey Kardashevskiy @ 2017-03-30 12:47 UTC (permalink / raw)
  To: qemu-devel
  Cc: Alexey Kardashevskiy, qemu-ppc, David Gibson, Alex Williamson,
	Paolo Bonzini

Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
---
 include/exec/memory.h         |  50 ++++++++++++++++----
 include/hw/ppc/spapr.h        |   3 +-
 include/hw/vfio/vfio-common.h |   2 +-
 include/qemu/typedefs.h       |   1 +
 exec.c                        |  16 +++++--
 hw/ppc/spapr_iommu.c          |  20 ++++----
 hw/vfio/common.c              |  12 +++--
 hw/vfio/spapr.c               |   3 +-
 memory.c                      | 106 ++++++++++++++++++++++++++++--------------
 9 files changed, 148 insertions(+), 65 deletions(-)

diff --git a/include/exec/memory.h b/include/exec/memory.h
index e39256ad03..479d8fbfe2 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -37,6 +37,10 @@
 #define MEMORY_REGION(obj) \
         OBJECT_CHECK(MemoryRegion, (obj), TYPE_MEMORY_REGION)
 
+#define TYPE_IOMMU_MEMORY_REGION "qemu:iommu-memory-region"
+#define IOMMU_MEMORY_REGION(obj) \
+        OBJECT_CHECK(IOMMUMemoryRegion, (obj), TYPE_IOMMU_MEMORY_REGION)
+
 typedef struct MemoryRegionOps MemoryRegionOps;
 typedef struct MemoryRegionMmio MemoryRegionMmio;
 
@@ -167,11 +171,12 @@ typedef struct MemoryRegionIOMMUOps MemoryRegionIOMMUOps;
 
 struct MemoryRegionIOMMUOps {
     /* Return a TLB entry that contains a given address. */
-    IOMMUTLBEntry (*translate)(MemoryRegion *iommu, hwaddr addr, bool is_write);
+    IOMMUTLBEntry (*translate)(IOMMUMemoryRegion *iommu, hwaddr addr,
+                               bool is_write);
     /* Returns minimum supported page size */
-    uint64_t (*get_min_page_size)(MemoryRegion *iommu);
+    uint64_t (*get_min_page_size)(IOMMUMemoryRegion *iommu);
     /* Called when IOMMU Notifier flag changed */
-    void (*notify_flag_changed)(MemoryRegion *iommu,
+    void (*notify_flag_changed)(IOMMUMemoryRegion *iommu,
                                 IOMMUNotifierFlag old_flags,
                                 IOMMUNotifierFlag new_flags);
 };
@@ -195,7 +200,6 @@ struct MemoryRegion {
     uint8_t dirty_log_mask;
     RAMBlock *ram_block;
     Object *owner;
-    const MemoryRegionIOMMUOps *iommu_ops;
 
     const MemoryRegionOps *ops;
     void *opaque;
@@ -218,6 +222,12 @@ struct MemoryRegion {
     const char *name;
     unsigned ioeventfd_nb;
     MemoryRegionIoeventfd *ioeventfds;
+};
+
+struct IOMMUMemoryRegion {
+    MemoryRegion parent_obj;
+
+    const MemoryRegionIOMMUOps *iommu_ops;
     QLIST_HEAD(, IOMMUNotifier) iommu_notify;
     IOMMUNotifierFlag iommu_notify_flags;
 };
@@ -555,19 +565,39 @@ static inline void memory_region_init_reservation(MemoryRegion *mr,
 }
 
 /**
+ * memory_region_init_iommu_type: Initialize a memory region of a custom type
+ * that translates addresses
+ *
+ * An IOMMU region translates addresses and forwards accesses to a target
+ * memory region.
+ *
+ * @typename: QOM class name
+ * @mr: the #IOMMUMemoryRegion to be initialized
+ * @owner: the object that tracks the region's reference count
+ * @ops: a function that translates addresses into the @target region
+ * @name: used for debugging; not visible to the user or ABI
+ * @size: size of the region.
+ */
+void memory_region_init_iommu_type(const char *mrtypename,
+                                   IOMMUMemoryRegion *iommumr,
+                                   Object *owner,
+                                   const MemoryRegionIOMMUOps *ops,
+                                   const char *name,
+                                   uint64_t size);
+/**
  * memory_region_init_iommu: Initialize a memory region that translates
  * addresses
  *
  * An IOMMU region translates addresses and forwards accesses to a target
  * memory region.
  *
- * @mr: the #MemoryRegion to be initialized
+ * @mr: the #IOMMUMemoryRegion to be initialized
  * @owner: the object that tracks the region's reference count
  * @ops: a function that translates addresses into the @target region
  * @name: used for debugging; not visible to the user or ABI
  * @size: size of the region.
  */
-void memory_region_init_iommu(MemoryRegion *mr,
+void memory_region_init_iommu(IOMMUMemoryRegion *iommumr,
                               struct Object *owner,
                               const MemoryRegionIOMMUOps *ops,
                               const char *name,
@@ -633,7 +663,7 @@ static inline bool memory_region_is_iommu(MemoryRegion *mr)
     if (mr->alias) {
         return memory_region_is_iommu(mr->alias);
     }
-    return mr->iommu_ops;
+    return object_dynamic_cast(OBJECT(mr), TYPE_IOMMU_MEMORY_REGION) != NULL;
 }
 
 
@@ -645,7 +675,7 @@ static inline bool memory_region_is_iommu(MemoryRegion *mr)
  *
  * @mr: the memory region being queried
  */
-uint64_t memory_region_iommu_get_min_page_size(MemoryRegion *mr);
+uint64_t memory_region_iommu_get_min_page_size(IOMMUMemoryRegion *mr);
 
 /**
  * memory_region_notify_iommu: notify a change in an IOMMU translation entry.
@@ -664,7 +694,7 @@ uint64_t memory_region_iommu_get_min_page_size(MemoryRegion *mr);
  *         replaces all old entries for the same virtual I/O address range.
  *         Deleted entries have .@perm == 0.
  */
-void memory_region_notify_iommu(MemoryRegion *mr,
+void memory_region_notify_iommu(IOMMUMemoryRegion *mr,
                                 IOMMUTLBEntry entry);
 
 /**
@@ -689,7 +719,7 @@ void memory_region_register_iommu_notifier(MemoryRegion *mr,
  * @is_write: Whether to treat the replay as a translate "write"
  *     through the iommu
  */
-void memory_region_iommu_replay(MemoryRegion *mr, IOMMUNotifier *n,
+void memory_region_iommu_replay(IOMMUMemoryRegion *mr, IOMMUNotifier *n,
                                 bool is_write);
 
 /**
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index e27de64d31..6997ed7e98 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -590,7 +590,8 @@ struct sPAPRTCETable {
     bool bypass;
     bool need_vfio;
     int fd;
-    MemoryRegion root, iommu;
+    MemoryRegion root;
+    IOMMUMemoryRegion iommu;
     struct VIOsPAPRDevice *vdev; /* for @bypass migration compatibility only */
     QLIST_ENTRY(sPAPRTCETable) list;
 };
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index c582de18c9..7a4135ae6f 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -94,7 +94,7 @@ typedef struct VFIOContainer {
 
 typedef struct VFIOGuestIOMMU {
     VFIOContainer *container;
-    MemoryRegion *iommu;
+    IOMMUMemoryRegion *iommu;
     hwaddr iommu_offset;
     IOMMUNotifier n;
     QLIST_ENTRY(VFIOGuestIOMMU) giommu_next;
diff --git a/include/qemu/typedefs.h b/include/qemu/typedefs.h
index e95f28cfec..b45f71ec11 100644
--- a/include/qemu/typedefs.h
+++ b/include/qemu/typedefs.h
@@ -45,6 +45,7 @@ typedef struct MachineState MachineState;
 typedef struct MemoryListener MemoryListener;
 typedef struct MemoryMappingList MemoryMappingList;
 typedef struct MemoryRegion MemoryRegion;
+typedef struct IOMMUMemoryRegion IOMMUMemoryRegion;
 typedef struct MemoryRegionCache MemoryRegionCache;
 typedef struct MemoryRegionSection MemoryRegionSection;
 typedef struct MigrationIncomingState MigrationIncomingState;
diff --git a/exec.c b/exec.c
index e57a8a2178..3b47dad1f8 100644
--- a/exec.c
+++ b/exec.c
@@ -463,6 +463,7 @@ IOMMUTLBEntry address_space_get_iotlb_entry(AddressSpace *as, hwaddr addr,
     IOMMUTLBEntry iotlb = {0};
     MemoryRegionSection *section;
     MemoryRegion *mr;
+    IOMMUMemoryRegion *iommumr;
 
     for (;;) {
         AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch);
@@ -471,11 +472,13 @@ IOMMUTLBEntry address_space_get_iotlb_entry(AddressSpace *as, hwaddr addr,
                + section->offset_within_region;
         mr = section->mr;
 
-        if (!mr->iommu_ops) {
+        if (!memory_region_is_iommu(mr)) {
             break;
         }
 
-        iotlb = mr->iommu_ops->translate(mr, addr, is_write);
+        iommumr = IOMMU_MEMORY_REGION(mr);
+
+        iotlb = iommumr->iommu_ops->translate(iommumr, addr, is_write);
         if (!(iotlb.perm & (1 << is_write))) {
             iotlb.target_as = NULL;
             break;
@@ -497,17 +500,20 @@ MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
     IOMMUTLBEntry iotlb;
     MemoryRegionSection *section;
     MemoryRegion *mr;
+    IOMMUMemoryRegion *iommumr;
 
     for (;;) {
         AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch);
         section = address_space_translate_internal(d, addr, &addr, plen, true);
         mr = section->mr;
 
-        if (!mr->iommu_ops) {
+        if (!memory_region_is_iommu(mr)) {
             break;
         }
 
-        iotlb = mr->iommu_ops->translate(mr, addr, is_write);
+        iommumr = IOMMU_MEMORY_REGION(mr);
+
+        iotlb = iommumr->iommu_ops->translate(iommumr, addr, is_write);
         addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
                 | (addr & iotlb.addr_mask));
         *plen = MIN(*plen, (addr | iotlb.addr_mask) - addr + 1);
@@ -538,7 +544,7 @@ address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr addr,
 
     section = address_space_translate_internal(d, addr, xlat, plen, false);
 
-    assert(!section->mr->iommu_ops);
+    assert(!memory_region_is_iommu(section->mr));
     return section;
 }
 #endif
diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
index 9e30e148d6..5051110b9d 100644
--- a/hw/ppc/spapr_iommu.c
+++ b/hw/ppc/spapr_iommu.c
@@ -110,7 +110,8 @@ static void spapr_tce_free_table(uint64_t *table, int fd, uint32_t nb_table)
 }
 
 /* Called from RCU critical section */
-static IOMMUTLBEntry spapr_tce_translate_iommu(MemoryRegion *iommu, hwaddr addr,
+static IOMMUTLBEntry spapr_tce_translate_iommu(IOMMUMemoryRegion *iommu,
+                                               hwaddr addr,
                                                bool is_write)
 {
     sPAPRTCETable *tcet = container_of(iommu, sPAPRTCETable, iommu);
@@ -150,14 +151,14 @@ static void spapr_tce_table_pre_save(void *opaque)
                                tcet->bus_offset, tcet->page_shift);
 }
 
-static uint64_t spapr_tce_get_min_page_size(MemoryRegion *iommu)
+static uint64_t spapr_tce_get_min_page_size(IOMMUMemoryRegion *iommu)
 {
     sPAPRTCETable *tcet = container_of(iommu, sPAPRTCETable, iommu);
 
     return 1ULL << tcet->page_shift;
 }
 
-static void spapr_tce_notify_flag_changed(MemoryRegion *iommu,
+static void spapr_tce_notify_flag_changed(IOMMUMemoryRegion *iommu,
                                           IOMMUNotifierFlag old,
                                           IOMMUNotifierFlag new)
 {
@@ -265,7 +266,9 @@ static int spapr_tce_table_realize(DeviceState *dev)
     memory_region_init(&tcet->root, tcetobj, tmp, UINT64_MAX);
 
     snprintf(tmp, sizeof(tmp), "tce-iommu-%x", tcet->liobn);
-    memory_region_init_iommu(&tcet->iommu, tcetobj, &spapr_iommu_ops, tmp, 0);
+    memory_region_init_iommu_type(TYPE_IOMMU_MEMORY_REGION,
+                                  &tcet->iommu, tcetobj, &spapr_iommu_ops,
+                                  tmp, 0);
 
     QLIST_INSERT_HEAD(&spapr_tce_tables, tcet, list);
 
@@ -341,9 +344,10 @@ void spapr_tce_table_enable(sPAPRTCETable *tcet,
                                         &tcet->fd,
                                         tcet->need_vfio);
 
-    memory_region_set_size(&tcet->iommu,
+    memory_region_set_size(MEMORY_REGION(&tcet->iommu),
                            (uint64_t)tcet->nb_table << tcet->page_shift);
-    memory_region_add_subregion(&tcet->root, tcet->bus_offset, &tcet->iommu);
+    memory_region_add_subregion(&tcet->root, tcet->bus_offset,
+                                MEMORY_REGION(&tcet->iommu));
 }
 
 void spapr_tce_table_disable(sPAPRTCETable *tcet)
@@ -352,8 +356,8 @@ void spapr_tce_table_disable(sPAPRTCETable *tcet)
         return;
     }
 
-    memory_region_del_subregion(&tcet->root, &tcet->iommu);
-    memory_region_set_size(&tcet->iommu, 0);
+    memory_region_del_subregion(&tcet->root, MEMORY_REGION(&tcet->iommu));
+    memory_region_set_size(MEMORY_REGION(&tcet->iommu), 0);
 
     spapr_tce_free_table(tcet->table, tcet->fd, tcet->nb_table);
     tcet->fd = -1;
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index f3ba9b9007..ab95db689c 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -465,6 +465,7 @@ static void vfio_listener_region_add(MemoryListener *listener,
 
     if (memory_region_is_iommu(section->mr)) {
         VFIOGuestIOMMU *giommu;
+        IOMMUMemoryRegion *iommumr = IOMMU_MEMORY_REGION(section->mr);
 
         trace_vfio_listener_region_add_iommu(iova, end);
         /*
@@ -474,7 +475,7 @@ static void vfio_listener_region_add(MemoryListener *listener,
          * device emulation the VFIO iommu handles to use).
          */
         giommu = g_malloc0(sizeof(*giommu));
-        giommu->iommu = section->mr;
+        giommu->iommu = iommumr;
         giommu->iommu_offset = section->offset_within_address_space -
                                section->offset_within_region;
         giommu->container = container;
@@ -482,7 +483,7 @@ static void vfio_listener_region_add(MemoryListener *listener,
         giommu->n.notifier_flags = IOMMU_NOTIFIER_ALL;
         QLIST_INSERT_HEAD(&container->giommu_list, giommu, giommu_next);
 
-        memory_region_register_iommu_notifier(giommu->iommu, &giommu->n);
+        memory_region_register_iommu_notifier(section->mr, &giommu->n);
         memory_region_iommu_replay(giommu->iommu, &giommu->n, false);
 
         return;
@@ -550,8 +551,8 @@ static void vfio_listener_region_del(MemoryListener *listener,
         VFIOGuestIOMMU *giommu;
 
         QLIST_FOREACH(giommu, &container->giommu_list, giommu_next) {
-            if (giommu->iommu == section->mr) {
-                memory_region_unregister_iommu_notifier(giommu->iommu,
+            if (MEMORY_REGION(giommu->iommu) == section->mr) {
+                memory_region_unregister_iommu_notifier(section->mr,
                                                         &giommu->n);
                 QLIST_REMOVE(giommu, giommu_next);
                 g_free(giommu);
@@ -1141,7 +1142,8 @@ static void vfio_disconnect_container(VFIOGroup *group)
         QLIST_REMOVE(container, next);
 
         QLIST_FOREACH_SAFE(giommu, &container->giommu_list, giommu_next, tmp) {
-            memory_region_unregister_iommu_notifier(giommu->iommu, &giommu->n);
+            memory_region_unregister_iommu_notifier(
+                    MEMORY_REGION(giommu->iommu), &giommu->n);
             QLIST_REMOVE(giommu, giommu_next);
             g_free(giommu);
         }
diff --git a/hw/vfio/spapr.c b/hw/vfio/spapr.c
index 4409bcc0d7..551870d46b 100644
--- a/hw/vfio/spapr.c
+++ b/hw/vfio/spapr.c
@@ -143,7 +143,8 @@ int vfio_spapr_create_window(VFIOContainer *container,
                              hwaddr *pgsize)
 {
     int ret;
-    unsigned pagesize = memory_region_iommu_get_min_page_size(section->mr);
+    IOMMUMemoryRegion *iommumr = IOMMU_MEMORY_REGION(section->mr);
+    unsigned pagesize = memory_region_iommu_get_min_page_size(iommumr);
     unsigned entries, pages;
     struct vfio_iommu_spapr_tce_create create = { .argsz = sizeof(create) };
 
diff --git a/memory.c b/memory.c
index 4c95aaf39c..036abd78dc 100644
--- a/memory.c
+++ b/memory.c
@@ -975,12 +975,11 @@ static char *memory_region_escape_name(const char *name)
     return escaped;
 }
 
-void memory_region_init(MemoryRegion *mr,
-                        Object *owner,
-                        const char *name,
-                        uint64_t size)
+static void memory_region_do_init(MemoryRegion *mr,
+                                  Object *owner,
+                                  const char *name,
+                                  uint64_t size)
 {
-    object_initialize(mr, sizeof(*mr), TYPE_MEMORY_REGION);
     mr->size = int128_make64(size);
     if (size == UINT64_MAX) {
         mr->size = int128_2_64();
@@ -1004,6 +1003,15 @@ void memory_region_init(MemoryRegion *mr,
     }
 }
 
+void memory_region_init(MemoryRegion *mr,
+                        Object *owner,
+                        const char *name,
+                        uint64_t size)
+{
+    object_initialize(mr, sizeof(*mr), TYPE_MEMORY_REGION);
+    memory_region_do_init(mr, owner, name, size);
+}
+
 static void memory_region_get_addr(Object *obj, Visitor *v, const char *name,
                                    void *opaque, Error **errp)
 {
@@ -1473,17 +1481,33 @@ void memory_region_init_rom_device(MemoryRegion *mr,
     mr->ram_block = qemu_ram_alloc(size, mr, errp);
 }
 
-void memory_region_init_iommu(MemoryRegion *mr,
-                              Object *owner,
-                              const MemoryRegionIOMMUOps *ops,
-                              const char *name,
-                              uint64_t size)
+void memory_region_init_iommu_type(const char *mrtypename,
+                                   IOMMUMemoryRegion *iommumr,
+                                   Object *owner,
+                                   const MemoryRegionIOMMUOps *ops,
+                                   const char *name,
+                                   uint64_t size)
 {
-    memory_region_init(mr, owner, name, size);
-    mr->iommu_ops = ops,
+    struct MemoryRegion *mr;
+    size_t instance_size = object_type_get_instance_size(mrtypename);
+
+    object_initialize(iommumr, instance_size, mrtypename);
+    mr = MEMORY_REGION(iommumr);
+    memory_region_do_init(mr, owner, name, size);
+    iommumr->iommu_ops = ops,
     mr->terminates = true;  /* then re-forwards */
-    QLIST_INIT(&mr->iommu_notify);
-    mr->iommu_notify_flags = IOMMU_NOTIFIER_NONE;
+    QLIST_INIT(&iommumr->iommu_notify);
+    iommumr->iommu_notify_flags = IOMMU_NOTIFIER_NONE;
+}
+
+void memory_region_init_iommu(IOMMUMemoryRegion *iommumr,
+                              Object *owner,
+                              const MemoryRegionIOMMUOps *ops,
+                              const char *name,
+                              uint64_t size)
+{
+    memory_region_init_iommu_type(TYPE_IOMMU_MEMORY_REGION, iommumr,
+                                  owner, ops, name, size);
 }
 
 static void memory_region_finalize(Object *obj)
@@ -1578,57 +1602,61 @@ bool memory_region_is_logging(MemoryRegion *mr, uint8_t client)
     return memory_region_get_dirty_log_mask(mr) & (1 << client);
 }
 
-static void memory_region_update_iommu_notify_flags(MemoryRegion *mr)
+static void memory_region_update_iommu_notify_flags(IOMMUMemoryRegion *iommumr)
 {
     IOMMUNotifierFlag flags = IOMMU_NOTIFIER_NONE;
     IOMMUNotifier *iommu_notifier;
 
-    QLIST_FOREACH(iommu_notifier, &mr->iommu_notify, node) {
+    QLIST_FOREACH(iommu_notifier, &iommumr->iommu_notify, node) {
         flags |= iommu_notifier->notifier_flags;
     }
 
-    if (flags != mr->iommu_notify_flags &&
-        mr->iommu_ops->notify_flag_changed) {
-        mr->iommu_ops->notify_flag_changed(mr, mr->iommu_notify_flags,
-                                           flags);
+    if (flags != iommumr->iommu_notify_flags &&
+        iommumr->iommu_ops->notify_flag_changed) {
+        iommumr->iommu_ops->notify_flag_changed(iommumr,
+                                                iommumr->iommu_notify_flags,
+                                                flags);
     }
 
-    mr->iommu_notify_flags = flags;
+    iommumr->iommu_notify_flags = flags;
 }
 
 void memory_region_register_iommu_notifier(MemoryRegion *mr,
                                            IOMMUNotifier *n)
 {
+    IOMMUMemoryRegion *iommumr;
+
     if (mr->alias) {
         memory_region_register_iommu_notifier(mr->alias, n);
         return;
     }
 
     /* We need to register for at least one bitfield */
+    iommumr = IOMMU_MEMORY_REGION(mr);
     assert(n->notifier_flags != IOMMU_NOTIFIER_NONE);
-    QLIST_INSERT_HEAD(&mr->iommu_notify, n, node);
-    memory_region_update_iommu_notify_flags(mr);
+    QLIST_INSERT_HEAD(&iommumr->iommu_notify, n, node);
+    memory_region_update_iommu_notify_flags(iommumr);
 }
 
-uint64_t memory_region_iommu_get_min_page_size(MemoryRegion *mr)
+uint64_t memory_region_iommu_get_min_page_size(IOMMUMemoryRegion *iommumr)
 {
-    assert(memory_region_is_iommu(mr));
-    if (mr->iommu_ops && mr->iommu_ops->get_min_page_size) {
-        return mr->iommu_ops->get_min_page_size(mr);
+    if (iommumr->iommu_ops && iommumr->iommu_ops->get_min_page_size) {
+        return iommumr->iommu_ops->get_min_page_size(iommumr);
     }
     return TARGET_PAGE_SIZE;
 }
 
-void memory_region_iommu_replay(MemoryRegion *mr, IOMMUNotifier *n,
+void memory_region_iommu_replay(IOMMUMemoryRegion *iommumr, IOMMUNotifier *n,
                                 bool is_write)
 {
+    MemoryRegion *mr = MEMORY_REGION(iommumr);
     hwaddr addr, granularity;
     IOMMUTLBEntry iotlb;
 
-    granularity = memory_region_iommu_get_min_page_size(mr);
+    granularity = memory_region_iommu_get_min_page_size(iommumr);
 
     for (addr = 0; addr < memory_region_size(mr); addr += granularity) {
-        iotlb = mr->iommu_ops->translate(mr, addr, is_write);
+        iotlb = iommumr->iommu_ops->translate(iommumr, addr, is_write);
         if (iotlb.perm != IOMMU_NONE) {
             n->notify(n, &iotlb);
         }
@@ -1644,21 +1672,24 @@ void memory_region_iommu_replay(MemoryRegion *mr, IOMMUNotifier *n,
 void memory_region_unregister_iommu_notifier(MemoryRegion *mr,
                                              IOMMUNotifier *n)
 {
+    IOMMUMemoryRegion *iommumr;
+
     if (mr->alias) {
         memory_region_unregister_iommu_notifier(mr->alias, n);
         return;
     }
     QLIST_REMOVE(n, node);
-    memory_region_update_iommu_notify_flags(mr);
+    iommumr = IOMMU_MEMORY_REGION(mr);
+    memory_region_update_iommu_notify_flags(iommumr);
 }
 
-void memory_region_notify_iommu(MemoryRegion *mr,
+void memory_region_notify_iommu(IOMMUMemoryRegion *iommumr,
                                 IOMMUTLBEntry entry)
 {
     IOMMUNotifier *iommu_notifier;
     IOMMUNotifierFlag request_flags;
 
-    assert(memory_region_is_iommu(mr));
+    assert(memory_region_is_iommu(MEMORY_REGION(iommumr)));
 
     if (entry.perm & IOMMU_RW) {
         request_flags = IOMMU_NOTIFIER_MAP;
@@ -1666,7 +1697,7 @@ void memory_region_notify_iommu(MemoryRegion *mr,
         request_flags = IOMMU_NOTIFIER_UNMAP;
     }
 
-    QLIST_FOREACH(iommu_notifier, &mr->iommu_notify, node) {
+    QLIST_FOREACH(iommu_notifier, &iommumr->iommu_notify, node) {
         if (iommu_notifier->notifier_flags & request_flags) {
             iommu_notifier->notify(iommu_notifier, &entry);
         }
@@ -2660,9 +2691,16 @@ static const TypeInfo memory_region_info = {
     .instance_finalize  = memory_region_finalize,
 };
 
+static const TypeInfo iommu_memory_region_info = {
+    .parent             = TYPE_MEMORY_REGION,
+    .name               = TYPE_IOMMU_MEMORY_REGION,
+    .instance_size      = sizeof(IOMMUMemoryRegion),
+};
+
 static void memory_register_types(void)
 {
     type_register_static(&memory_region_info);
+    type_register_static(&iommu_memory_region_info);
 }
 
 type_init(memory_register_types)
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [Qemu-devel] [RFC PATCH qemu 2/5] spapr-iommu: Subclass TYPE_IOMMU_MEMORY_REGION
  2017-03-30 12:47 [Qemu-devel] [RFC PATCH qemu 0/5] vfio-pci, spapr: Allow in-kernel acceleration Alexey Kardashevskiy
  2017-03-30 12:47 ` [Qemu-devel] [RFC PATCH qemu 1/5] memory/iommu: QOM'fy IOMMU MemoryRegion Alexey Kardashevskiy
@ 2017-03-30 12:47 ` Alexey Kardashevskiy
  2017-03-30 13:00   ` Paolo Bonzini
  2017-03-30 12:47 ` [Qemu-devel] [RFC PATCH qemu 3/5] vfio-pci: Reorder group-to-container attaching Alexey Kardashevskiy
                   ` (2 subsequent siblings)
  4 siblings, 1 reply; 10+ messages in thread
From: Alexey Kardashevskiy @ 2017-03-30 12:47 UTC (permalink / raw)
  To: qemu-devel
  Cc: Alexey Kardashevskiy, qemu-ppc, David Gibson, Alex Williamson,
	Paolo Bonzini

Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
---
 include/hw/ppc/spapr.h | 22 ++++++++++++++++++++++
 hw/ppc/spapr_iommu.c   | 25 ++++++++++++++++++++++++-
 2 files changed, 46 insertions(+), 1 deletion(-)

diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index 6997ed7e98..5d5ce4dd2b 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -573,11 +573,25 @@ void spapr_load_rtas(sPAPRMachineState *spapr, void *fdt, hwaddr addr);
 #define RTAS_EVENT_SCAN_RATE    1
 
 typedef struct sPAPRTCETable sPAPRTCETable;
+typedef struct sPAPRIOMMUMemoryRegionClass sPAPRIOMMUMemoryRegionClass;
 
 #define TYPE_SPAPR_TCE_TABLE "spapr-tce-table"
 #define SPAPR_TCE_TABLE(obj) \
     OBJECT_CHECK(sPAPRTCETable, (obj), TYPE_SPAPR_TCE_TABLE)
 
+#define TYPE_SPAPR_IOMMU_MEMORY_REGION "qemu:spapr_iommu-memory-region"
+#define SPAPR_IOMMU_MEMORY_REGION(obj) \
+        OBJECT_CHECK(IOMMUMemoryRegion, (obj), \
+        TYPE_SPAPR_IOMMU_MEMORY_REGION)
+
+#define SPAPR_IOMMU_MEMORY_REGION_CLASS(k) \
+        OBJECT_CLASS_CHECK(sPAPRIOMMUMemoryRegionClass, (k), \
+        TYPE_SPAPR_IOMMU_MEMORY_REGION)
+
+#define SPAPR_IOMMU_MEMORY_REGION_GET_CLASS(obj) \
+        OBJECT_GET_CLASS(sPAPRIOMMUMemoryRegionClass, (obj), \
+        TYPE_SPAPR_IOMMU_MEMORY_REGION)
+
 struct sPAPRTCETable {
     DeviceState parent;
     uint32_t liobn;
@@ -596,6 +610,14 @@ struct sPAPRTCETable {
     QLIST_ENTRY(sPAPRTCETable) list;
 };
 
+struct sPAPRIOMMUMemoryRegionClass {
+    /*< private >*/
+    ObjectClass parent_class;
+    /*< public >*/
+
+    int (*get_fd)(IOMMUMemoryRegion *iommu);
+};
+
 sPAPRTCETable *spapr_tce_find_by_liobn(target_ulong liobn);
 
 struct sPAPREventLogEntry {
diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
index 5051110b9d..5b0eee1be4 100644
--- a/hw/ppc/spapr_iommu.c
+++ b/hw/ppc/spapr_iommu.c
@@ -171,6 +171,13 @@ static void spapr_tce_notify_flag_changed(IOMMUMemoryRegion *iommu,
     }
 }
 
+static int spapr_tce_get_fd(IOMMUMemoryRegion *iommu)
+{
+    sPAPRTCETable *tcet = container_of(iommu, sPAPRTCETable, iommu);
+
+    return tcet->fd;
+}
+
 static int spapr_tce_table_post_load(void *opaque, int version_id)
 {
     sPAPRTCETable *tcet = SPAPR_TCE_TABLE(opaque);
@@ -266,7 +273,7 @@ static int spapr_tce_table_realize(DeviceState *dev)
     memory_region_init(&tcet->root, tcetobj, tmp, UINT64_MAX);
 
     snprintf(tmp, sizeof(tmp), "tce-iommu-%x", tcet->liobn);
-    memory_region_init_iommu_type(TYPE_IOMMU_MEMORY_REGION,
+    memory_region_init_iommu_type(TYPE_SPAPR_IOMMU_MEMORY_REGION,
                                   &tcet->iommu, tcetobj, &spapr_iommu_ops,
                                   tmp, 0);
 
@@ -634,9 +641,25 @@ static TypeInfo spapr_tce_table_info = {
     .class_init = spapr_tce_table_class_init,
 };
 
+static void spapr_iommu_memory_region_class_init(ObjectClass *k, void *data)
+{
+    sPAPRIOMMUMemoryRegionClass *smrc = SPAPR_IOMMU_MEMORY_REGION_CLASS(k);
+
+    smrc->get_fd = spapr_tce_get_fd;
+}
+
+static const TypeInfo spapr_iommu_memory_region_info = {
+    .parent = TYPE_IOMMU_MEMORY_REGION,
+    .name = TYPE_SPAPR_IOMMU_MEMORY_REGION,
+    .instance_size = sizeof(IOMMUMemoryRegion),
+    .class_size = sizeof(sPAPRIOMMUMemoryRegionClass),
+    .class_init = spapr_iommu_memory_region_class_init,
+};
+
 static void register_types(void)
 {
     type_register_static(&spapr_tce_table_info);
+    type_register_static(&spapr_iommu_memory_region_info);
 }
 
 type_init(register_types);
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [Qemu-devel] [RFC PATCH qemu 3/5] vfio-pci: Reorder group-to-container attaching
  2017-03-30 12:47 [Qemu-devel] [RFC PATCH qemu 0/5] vfio-pci, spapr: Allow in-kernel acceleration Alexey Kardashevskiy
  2017-03-30 12:47 ` [Qemu-devel] [RFC PATCH qemu 1/5] memory/iommu: QOM'fy IOMMU MemoryRegion Alexey Kardashevskiy
  2017-03-30 12:47 ` [Qemu-devel] [RFC PATCH qemu 2/5] spapr-iommu: Subclass TYPE_IOMMU_MEMORY_REGION Alexey Kardashevskiy
@ 2017-03-30 12:47 ` Alexey Kardashevskiy
  2017-03-30 12:47 ` [Qemu-devel] [RFC PATCH qemu 4/5] vfio/spapr: Add a notifier for PPC64 HV/PR KVM about new group attached to LIOBN Alexey Kardashevskiy
  2017-03-30 12:47 ` [Qemu-devel] [RFC PATCH qemu 5/5] spapr/iommu: Enable in-kernel TCE acceleration via VFIO KVM device Alexey Kardashevskiy
  4 siblings, 0 replies; 10+ messages in thread
From: Alexey Kardashevskiy @ 2017-03-30 12:47 UTC (permalink / raw)
  To: qemu-devel
  Cc: Alexey Kardashevskiy, qemu-ppc, David Gibson, Alex Williamson,
	Paolo Bonzini

At the moment VFIO PCI device initialization works as follows:
vfio_realize
	vfio_get_group
		vfio_connect_container
			register memory listeners (1)
			update QEMU groups lists
		vfio_kvm_device_add_group

Then (example for pseries) the machine reset hook triggers region_add()
for all regions where listeners from (1) are listening:

ppc_spapr_reset
	spapr_phb_reset
		spapr_tce_table_enable
			memory_region_add_subregion
				vfio_listener_region_add
					vfio_spapr_create_window

This scheme works fine until we need to handle VFIO PCI device hotplug
_and_ we want to enable in-kernel acceleration on, i.e. after PCI hotplug
we need a place to call
ioctl(vfio_kvm_device_fd, KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE).
Since the ioctl needs a LIOBN fd (from sPAPRTCETable) and a IOMMU group fd
(from VFIOGroup), vfio_listener_region_add() seems to be the only place
for this ioctl().

However this only works during boot time because the machine reset
happens strictly after all devices are finalized. When hotplug happens,
vfio_listener_region_add() is called when a memory listener is registered
but when this happens:
1. new group is not added to the container->group_list yet;
2. VFIO KVM device is unaware of the new IOMMU group.

This moves bits around to have all necessary VFIO infrastructure
in place for both initial startup and hotplug cases.

Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
---
Changes:
v2:
* moved container->initialized back to its correct location
* added missing QLIST_REMOVE()
---
 hw/vfio/common.c | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index ab95db689c..e8188eb3d5 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -1087,6 +1087,14 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
         goto free_container_exit;
     }
 
+    vfio_kvm_device_add_group(group);
+
+    QLIST_INIT(&container->group_list);
+    QLIST_INSERT_HEAD(&space->containers, container, next);
+
+    group->container = container;
+    QLIST_INSERT_HEAD(&container->group_list, group, container_next);
+
     container->listener = vfio_memory_listener;
 
     memory_listener_register(&container->listener, container->space->as);
@@ -1100,14 +1108,11 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
 
     container->initialized = true;
 
-    QLIST_INIT(&container->group_list);
-    QLIST_INSERT_HEAD(&space->containers, container, next);
-
-    group->container = container;
-    QLIST_INSERT_HEAD(&container->group_list, group, container_next);
-
     return 0;
 listener_release_exit:
+    QLIST_REMOVE(group, container_next);
+    QLIST_REMOVE(container, next);
+    vfio_kvm_device_del_group(group);
     vfio_listener_release(container);
 
 free_container_exit:
@@ -1212,8 +1217,6 @@ VFIOGroup *vfio_get_group(int groupid, AddressSpace *as, Error **errp)
 
     QLIST_INSERT_HEAD(&vfio_group_list, group, next);
 
-    vfio_kvm_device_add_group(group);
-
     return group;
 
 close_fd_exit:
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [Qemu-devel] [RFC PATCH qemu 4/5] vfio/spapr: Add a notifier for PPC64 HV/PR KVM about new group attached to LIOBN
  2017-03-30 12:47 [Qemu-devel] [RFC PATCH qemu 0/5] vfio-pci, spapr: Allow in-kernel acceleration Alexey Kardashevskiy
                   ` (2 preceding siblings ...)
  2017-03-30 12:47 ` [Qemu-devel] [RFC PATCH qemu 3/5] vfio-pci: Reorder group-to-container attaching Alexey Kardashevskiy
@ 2017-03-30 12:47 ` Alexey Kardashevskiy
  2017-03-30 12:47 ` [Qemu-devel] [RFC PATCH qemu 5/5] spapr/iommu: Enable in-kernel TCE acceleration via VFIO KVM device Alexey Kardashevskiy
  4 siblings, 0 replies; 10+ messages in thread
From: Alexey Kardashevskiy @ 2017-03-30 12:47 UTC (permalink / raw)
  To: qemu-devel
  Cc: Alexey Kardashevskiy, qemu-ppc, David Gibson, Alex Williamson,
	Paolo Bonzini

This implements a notification for a new IOMMU group attached to
sPAPR's logical IO bus (LIOBN) to enable in-kernel TCE acceleration.

Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
---
 include/hw/vfio/vfio-common.h |  1 +
 hw/vfio/common.c              | 10 ++++++++++
 hw/vfio/spapr.c               | 32 ++++++++++++++++++++++++++++++++
 hw/vfio/trace-events          |  1 +
 4 files changed, 44 insertions(+)

diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index 7a4135ae6f..9662cb29a0 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -175,6 +175,7 @@ extern const MemoryListener vfio_prereg_listener;
 int vfio_spapr_create_window(VFIOContainer *container,
                              MemoryRegionSection *section,
                              hwaddr *pgsize);
+int vfio_spapr_notify_kvm(int vfio_kvm_device_fd, int groupfd, int tablefd);
 int vfio_spapr_remove_window(VFIOContainer *container,
                              hwaddr offset_within_address_space);
 
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index e8188eb3d5..b94b29be15 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -440,6 +440,16 @@ static void vfio_listener_region_add(MemoryListener *listener,
             goto fail;
         }
 
+#ifdef CONFIG_KVM
+        if (kvm_enabled()) {
+            VFIOGroup *group;
+
+            QLIST_FOREACH(group, &container->group_list, container_next) {
+                vfio_spapr_notify_kvm(vfio_kvm_device_fd, group->fd,
+                                      IOMMU_MEMORY_REGION(section->mr));
+            }
+        }
+#endif
         vfio_host_win_add(container, section->offset_within_address_space,
                           section->offset_within_address_space +
                           int128_get64(section->size) - 1, pgsize);
diff --git a/hw/vfio/spapr.c b/hw/vfio/spapr.c
index 551870d46b..bab808b837 100644
--- a/hw/vfio/spapr.c
+++ b/hw/vfio/spapr.c
@@ -15,8 +15,12 @@
 
 #include "hw/vfio/vfio-common.h"
 #include "hw/hw.h"
+#include "hw/ppc/spapr.h"
 #include "qemu/error-report.h"
 #include "trace.h"
+#ifdef CONFIG_KVM
+#include "linux/kvm.h"
+#endif
 
 static bool vfio_prereg_listener_skipped_section(MemoryRegionSection *section)
 {
@@ -188,6 +192,34 @@ int vfio_spapr_create_window(VFIOContainer *container,
     return 0;
 }
 
+int vfio_spapr_notify_kvm(int vfio_kvm_device_fd, int groupfd,
+                          IOMMUMemoryRegion *iommumr)
+{
+#ifdef CONFIG_KVM
+    struct kvm_vfio_spapr_tce param = {
+        .groupfd = groupfd,
+    };
+    struct kvm_device_attr attr = {
+        .group = KVM_DEV_VFIO_GROUP,
+        .attr = KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE,
+        .addr = (uint64_t)(unsigned long)&param,
+    };
+    sPAPRIOMMUMemoryRegionClass *spmc =
+        SPAPR_IOMMU_MEMORY_REGION_GET_CLASS(iommumr);
+
+    param.tablefd = spmc->get_fd(iommumr);
+    if (param.tablefd != -1) {
+        if (ioctl(vfio_kvm_device_fd, KVM_SET_DEVICE_ATTR, &attr)) {
+            error_report("vfio: failed to setup fd %d for a group with fd %d: %s",
+                         param.tablefd, param.groupfd, strerror(errno));
+            return -errno;
+        }
+    }
+    trace_vfio_spapr_notify_kvm(groupfd, param.tablefd);
+#endif
+    return 0;
+}
+
 int vfio_spapr_remove_window(VFIOContainer *container,
                              hwaddr offset_within_address_space)
 {
diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
index 2561c6d31a..084a92f7c2 100644
--- a/hw/vfio/trace-events
+++ b/hw/vfio/trace-events
@@ -123,3 +123,4 @@ vfio_prereg_register(uint64_t va, uint64_t size, int ret) "va=%"PRIx64" size=%"P
 vfio_prereg_unregister(uint64_t va, uint64_t size, int ret) "va=%"PRIx64" size=%"PRIx64" ret=%d"
 vfio_spapr_create_window(int ps, uint64_t ws, uint64_t off) "pageshift=0x%x winsize=0x%"PRIx64" offset=0x%"PRIx64
 vfio_spapr_remove_window(uint64_t off) "offset=%"PRIx64
+vfio_spapr_notify_kvm(int groupfd, int tablefd) "Attached groupfd %d to liobn fd %d"
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [Qemu-devel] [RFC PATCH qemu 5/5] spapr/iommu: Enable in-kernel TCE acceleration via VFIO KVM device
  2017-03-30 12:47 [Qemu-devel] [RFC PATCH qemu 0/5] vfio-pci, spapr: Allow in-kernel acceleration Alexey Kardashevskiy
                   ` (3 preceding siblings ...)
  2017-03-30 12:47 ` [Qemu-devel] [RFC PATCH qemu 4/5] vfio/spapr: Add a notifier for PPC64 HV/PR KVM about new group attached to LIOBN Alexey Kardashevskiy
@ 2017-03-30 12:47 ` Alexey Kardashevskiy
  4 siblings, 0 replies; 10+ messages in thread
From: Alexey Kardashevskiy @ 2017-03-30 12:47 UTC (permalink / raw)
  To: qemu-devel
  Cc: Alexey Kardashevskiy, qemu-ppc, David Gibson, Alex Williamson,
	Paolo Bonzini

This uses new kernel KVM_CAP_SPAPR_TCE_VFIO capability to enable
in-kernel acceleration of TCE update requests which will go via
the VFIO KVM device.

Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
---
 target/ppc/kvm_ppc.h | 6 ++++++
 hw/ppc/spapr_iommu.c | 4 ++++
 target/ppc/kvm.c     | 7 ++++++-
 3 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/target/ppc/kvm_ppc.h b/target/ppc/kvm_ppc.h
index f48243d13f..ce7327a4e0 100644
--- a/target/ppc/kvm_ppc.h
+++ b/target/ppc/kvm_ppc.h
@@ -46,6 +46,7 @@ void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift,
 int kvmppc_remove_spapr_tce(void *table, int pfd, uint32_t window_size);
 int kvmppc_reset_htab(int shift_hint);
 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift);
+bool kvmppc_has_cap_spapr_vfio(void);
 #endif /* !CONFIG_USER_ONLY */
 bool kvmppc_has_cap_epr(void);
 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function);
@@ -216,6 +217,11 @@ static inline bool kvmppc_is_mem_backend_page_size_ok(char *obj_path)
     return true;
 }
 
+static inline bool kvmppc_has_cap_spapr_vfio(void)
+{
+    return false;
+}
+
 #endif /* !CONFIG_USER_ONLY */
 
 static inline bool kvmppc_has_cap_epr(void)
diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
index 5b0eee1be4..bfc2b71ca3 100644
--- a/hw/ppc/spapr_iommu.c
+++ b/hw/ppc/spapr_iommu.c
@@ -295,6 +295,10 @@ void spapr_tce_set_need_vfio(sPAPRTCETable *tcet, bool need_vfio)
 
     tcet->need_vfio = need_vfio;
 
+    if (!need_vfio || (tcet->fd != -1 && kvmppc_has_cap_spapr_vfio())) {
+        return;
+    }
+
     oldtable = tcet->table;
 
     tcet->table = spapr_tce_alloc_table(tcet->liobn,
diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
index abf4cb23b1..82886b669f 100644
--- a/target/ppc/kvm.c
+++ b/target/ppc/kvm.c
@@ -131,7 +131,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s)
     cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
     cap_spapr_tce_64 = kvm_check_extension(s, KVM_CAP_SPAPR_TCE_64);
     cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
-    cap_spapr_vfio = false;
+    cap_spapr_vfio = kvm_check_extension(s, KVM_CAP_SPAPR_TCE_VFIO);
     cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
     cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
     cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
@@ -2418,6 +2418,11 @@ bool kvmppc_has_cap_mmu_hash_v3(void)
     return cap_mmu_hash_v3;
 }
 
+bool kvmppc_has_cap_spapr_vfio(void)
+{
+    return cap_spapr_vfio;
+}
+
 static PowerPCCPUClass *ppc_cpu_get_family_class(PowerPCCPUClass *pcc)
 {
     ObjectClass *oc = OBJECT_CLASS(pcc);
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [Qemu-devel] [RFC PATCH qemu 2/5] spapr-iommu: Subclass TYPE_IOMMU_MEMORY_REGION
  2017-03-30 12:47 ` [Qemu-devel] [RFC PATCH qemu 2/5] spapr-iommu: Subclass TYPE_IOMMU_MEMORY_REGION Alexey Kardashevskiy
@ 2017-03-30 13:00   ` Paolo Bonzini
  2017-03-31 11:47     ` Alexey Kardashevskiy
  0 siblings, 1 reply; 10+ messages in thread
From: Paolo Bonzini @ 2017-03-30 13:00 UTC (permalink / raw)
  To: Alexey Kardashevskiy, qemu-devel; +Cc: qemu-ppc, David Gibson, Alex Williamson



On 30/03/2017 14:47, Alexey Kardashevskiy wrote:
> +static int spapr_tce_get_fd(IOMMUMemoryRegion *iommu)
> +{
> +    sPAPRTCETable *tcet = container_of(iommu, sPAPRTCETable, iommu);
> +
> +    return tcet->fd;
> +}
> +
>  static int spapr_tce_table_post_load(void *opaque, int version_id)
>  {
>      sPAPRTCETable *tcet = SPAPR_TCE_TABLE(opaque);
> @@ -266,7 +273,7 @@ static int spapr_tce_table_realize(DeviceState *dev)
>      memory_region_init(&tcet->root, tcetobj, tmp, UINT64_MAX);
>  
>      snprintf(tmp, sizeof(tmp), "tce-iommu-%x", tcet->liobn);
> -    memory_region_init_iommu_type(TYPE_IOMMU_MEMORY_REGION,
> +    memory_region_init_iommu_type(TYPE_SPAPR_IOMMU_MEMORY_REGION,
>                                    &tcet->iommu, tcetobj, &spapr_iommu_ops,
>                                    tmp, 0);
>  
> @@ -634,9 +641,25 @@ static TypeInfo spapr_tce_table_info = {
>      .class_init = spapr_tce_table_class_init,
>  };
>  
> +static void spapr_iommu_memory_region_class_init(ObjectClass *k, void *data)
> +{
> +    sPAPRIOMMUMemoryRegionClass *smrc = SPAPR_IOMMU_MEMORY_REGION_CLASS(k);
> +
> +    smrc->get_fd = spapr_tce_get_fd;
> +}
> +

You don't even need the virtual function.  Rather, make spapr_tce_get_fd
public and give it a sPAPRTCETable argument.  Then vfio_spapr_notify_kvm
can use SPAPR_IOMMU_MEMORY_REGION(iommumr).

Paolo

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [Qemu-devel] [RFC PATCH qemu 1/5] memory/iommu: QOM'fy IOMMU MemoryRegion
  2017-03-30 12:47 ` [Qemu-devel] [RFC PATCH qemu 1/5] memory/iommu: QOM'fy IOMMU MemoryRegion Alexey Kardashevskiy
@ 2017-03-30 13:00   ` Paolo Bonzini
  0 siblings, 0 replies; 10+ messages in thread
From: Paolo Bonzini @ 2017-03-30 13:00 UTC (permalink / raw)
  To: Alexey Kardashevskiy, qemu-devel; +Cc: qemu-ppc, David Gibson, Alex Williamson



On 30/03/2017 14:47, Alexey Kardashevskiy wrote:
> @@ -633,7 +663,7 @@ static inline bool memory_region_is_iommu(MemoryRegion *mr)
>      if (mr->alias) {
>          return memory_region_is_iommu(mr->alias);
>      }
> -    return mr->iommu_ops;
> +    return object_dynamic_cast(OBJECT(mr), TYPE_IOMMU_MEMORY_REGION) != NULL;
>  }

This is going to be too slow for all the places where fast paths use
address_space_translate.  You need a (slightly hackish) bool iommu flag
in MemoryRegion, and a regular C cast rather than the type-safe
IOMMU_MEMORY_REGION(mr) in address_space_translate.

You can set the flag in iommu_memory_region_instance_init for example.

Paolo

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [Qemu-devel] [RFC PATCH qemu 2/5] spapr-iommu: Subclass TYPE_IOMMU_MEMORY_REGION
  2017-03-30 13:00   ` Paolo Bonzini
@ 2017-03-31 11:47     ` Alexey Kardashevskiy
  2017-03-31 12:03       ` Paolo Bonzini
  0 siblings, 1 reply; 10+ messages in thread
From: Alexey Kardashevskiy @ 2017-03-31 11:47 UTC (permalink / raw)
  To: Paolo Bonzini, qemu-devel; +Cc: qemu-ppc, David Gibson, Alex Williamson

On 31/03/17 00:00, Paolo Bonzini wrote:
> 
> 
> On 30/03/2017 14:47, Alexey Kardashevskiy wrote:
>> +static int spapr_tce_get_fd(IOMMUMemoryRegion *iommu)
>> +{
>> +    sPAPRTCETable *tcet = container_of(iommu, sPAPRTCETable, iommu);
>> +
>> +    return tcet->fd;
>> +}
>> +
>>  static int spapr_tce_table_post_load(void *opaque, int version_id)
>>  {
>>      sPAPRTCETable *tcet = SPAPR_TCE_TABLE(opaque);
>> @@ -266,7 +273,7 @@ static int spapr_tce_table_realize(DeviceState *dev)
>>      memory_region_init(&tcet->root, tcetobj, tmp, UINT64_MAX);
>>  
>>      snprintf(tmp, sizeof(tmp), "tce-iommu-%x", tcet->liobn);
>> -    memory_region_init_iommu_type(TYPE_IOMMU_MEMORY_REGION,
>> +    memory_region_init_iommu_type(TYPE_SPAPR_IOMMU_MEMORY_REGION,
>>                                    &tcet->iommu, tcetobj, &spapr_iommu_ops,
>>                                    tmp, 0);
>>  
>> @@ -634,9 +641,25 @@ static TypeInfo spapr_tce_table_info = {
>>      .class_init = spapr_tce_table_class_init,
>>  };
>>  
>> +static void spapr_iommu_memory_region_class_init(ObjectClass *k, void *data)
>> +{
>> +    sPAPRIOMMUMemoryRegionClass *smrc = SPAPR_IOMMU_MEMORY_REGION_CLASS(k);
>> +
>> +    smrc->get_fd = spapr_tce_get_fd;
>> +}
>> +
> 
> You don't even need the virtual function.  Rather, make spapr_tce_get_fd
> public and give it a sPAPRTCETable argument.  Then vfio_spapr_notify_kvm
> can use SPAPR_IOMMU_MEMORY_REGION(iommumr).

Well, if I make spapr_tce_get_fd() public, vfio_spapr_notify_kvm() could
just take MemoryRegion and cast it to sPAPRTCETable without all these
dances with MemoryRegion, IOMMUMemoryRegion, what do I miss here? I have
made a big patch with IOMMUMemoryRegion though, post it?


-- 
Alexey

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [Qemu-devel] [RFC PATCH qemu 2/5] spapr-iommu: Subclass TYPE_IOMMU_MEMORY_REGION
  2017-03-31 11:47     ` Alexey Kardashevskiy
@ 2017-03-31 12:03       ` Paolo Bonzini
  0 siblings, 0 replies; 10+ messages in thread
From: Paolo Bonzini @ 2017-03-31 12:03 UTC (permalink / raw)
  To: Alexey Kardashevskiy, qemu-devel; +Cc: qemu-ppc, David Gibson, Alex Williamson



On 31/03/2017 13:47, Alexey Kardashevskiy wrote:
>>>  
>>> +static void spapr_iommu_memory_region_class_init(ObjectClass *k, void *data)
>>> +{
>>> +    sPAPRIOMMUMemoryRegionClass *smrc = SPAPR_IOMMU_MEMORY_REGION_CLASS(k);
>>> +
>>> +    smrc->get_fd = spapr_tce_get_fd;
>>> +}
>>> +
>> You don't even need the virtual function.  Rather, make spapr_tce_get_fd
>> public and give it a sPAPRTCETable argument.  Then vfio_spapr_notify_kvm
>> can use SPAPR_IOMMU_MEMORY_REGION(iommumr).
> Well, if I make spapr_tce_get_fd() public, vfio_spapr_notify_kvm() could
> just take MemoryRegion and cast it to sPAPRTCETable without all these
> dances with MemoryRegion, IOMMUMemoryRegion, what do I miss here? I have
> made a big patch with IOMMUMemoryRegion though, post it?

*I* was missing that the call was inside an "if (container->iommu_type == TCE)".

So the hierarchy is good to have, but the virtual function can be removed.

Paolo

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2017-03-31 12:03 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-03-30 12:47 [Qemu-devel] [RFC PATCH qemu 0/5] vfio-pci, spapr: Allow in-kernel acceleration Alexey Kardashevskiy
2017-03-30 12:47 ` [Qemu-devel] [RFC PATCH qemu 1/5] memory/iommu: QOM'fy IOMMU MemoryRegion Alexey Kardashevskiy
2017-03-30 13:00   ` Paolo Bonzini
2017-03-30 12:47 ` [Qemu-devel] [RFC PATCH qemu 2/5] spapr-iommu: Subclass TYPE_IOMMU_MEMORY_REGION Alexey Kardashevskiy
2017-03-30 13:00   ` Paolo Bonzini
2017-03-31 11:47     ` Alexey Kardashevskiy
2017-03-31 12:03       ` Paolo Bonzini
2017-03-30 12:47 ` [Qemu-devel] [RFC PATCH qemu 3/5] vfio-pci: Reorder group-to-container attaching Alexey Kardashevskiy
2017-03-30 12:47 ` [Qemu-devel] [RFC PATCH qemu 4/5] vfio/spapr: Add a notifier for PPC64 HV/PR KVM about new group attached to LIOBN Alexey Kardashevskiy
2017-03-30 12:47 ` [Qemu-devel] [RFC PATCH qemu 5/5] spapr/iommu: Enable in-kernel TCE acceleration via VFIO KVM device Alexey Kardashevskiy

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.