All of lore.kernel.org
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH 0/3] memory: add IOMMU notifier type
@ 2016-09-05  7:21 Peter Xu
  2016-09-05  7:21 ` [Qemu-devel] [PATCH 1/3] memory: add one flag for IOMMU notifier Peter Xu
                   ` (3 more replies)
  0 siblings, 4 replies; 28+ messages in thread
From: Peter Xu @ 2016-09-05  7:21 UTC (permalink / raw)
  To: qemu-devel
  Cc: dgibson, jasowang, mst, pbonzini, cornelia.huck, wexu, vkaplans,
	alex.williamson, peterx

In the thread:

  https://lists.gnu.org/archive/html/qemu-devel/2016-09/msg00254.html

Alex proposed a way for vhost DMAR to be enabled without breaking
existing protections on vIOMMU and device assignments. This series
tried to implement the idea, by introducing a IOMMU notifier type for
each IOMMU memory region.

Thanks,

Peter Xu (3):
  memory: add one flag for IOMMU notifier
  memory: add iommu_notify_flag
  intel_iommu: allow IOMMU_NONE typed notifiers

 hw/i386/intel_iommu.c    | 13 ++++++++-----
 hw/ppc/spapr_iommu.c     |  5 +++--
 hw/s390x/s390-pci-inst.c |  2 +-
 hw/vfio/common.c         |  3 ++-
 include/exec/memory.h    | 14 +++++++++++---
 memory.c                 | 12 +++++++++---
 6 files changed, 34 insertions(+), 15 deletions(-)

-- 
2.7.4

^ permalink raw reply	[flat|nested] 28+ messages in thread

* [Qemu-devel] [PATCH 1/3] memory: add one flag for IOMMU notifier
  2016-09-05  7:21 [Qemu-devel] [PATCH 0/3] memory: add IOMMU notifier type Peter Xu
@ 2016-09-05  7:21 ` Peter Xu
  2016-09-05  7:21 ` [Qemu-devel] [PATCH 2/3] memory: add iommu_notify_flag Peter Xu
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 28+ messages in thread
From: Peter Xu @ 2016-09-05  7:21 UTC (permalink / raw)
  To: qemu-devel
  Cc: dgibson, jasowang, mst, pbonzini, cornelia.huck, wexu, vkaplans,
	alex.williamson, peterx

With the new flag, now we allow to register two kinds of IOMMU
notifiers:

- IOMMU_RW:   All DMA mapping changes will be notified.
- IOMMU_NONE: will only be notified when there are cache invalidations.

Here IOMMU_RW is the original scemantics for existing IOMMU notifiers.
VFIO is the only register for IOMMU notifier, and it's with type
IOMMU_RW.

Suggested-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Peter Xu <peterx@redhat.com>
---
 hw/i386/intel_iommu.c | 3 ++-
 hw/ppc/spapr_iommu.c  | 3 ++-
 hw/vfio/common.c      | 3 ++-
 include/exec/memory.h | 8 ++++++--
 memory.c              | 6 ++++--
 5 files changed, 16 insertions(+), 7 deletions(-)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 28c31a2..e0d4f23 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -1974,7 +1974,8 @@ static IOMMUTLBEntry vtd_iommu_translate(MemoryRegion *iommu, hwaddr addr,
     return ret;
 }
 
-static void vtd_iommu_notify_started(MemoryRegion *iommu)
+static void vtd_iommu_notify_started(MemoryRegion *iommu,
+                                     IOMMUAccessFlags flag)
 {
     VTDAddressSpace *vtd_as = container_of(iommu, VTDAddressSpace, iommu);
 
diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
index 6bc4d4d..09660fc 100644
--- a/hw/ppc/spapr_iommu.c
+++ b/hw/ppc/spapr_iommu.c
@@ -156,7 +156,8 @@ static uint64_t spapr_tce_get_min_page_size(MemoryRegion *iommu)
     return 1ULL << tcet->page_shift;
 }
 
-static void spapr_tce_notify_started(MemoryRegion *iommu)
+static void spapr_tce_notify_started(MemoryRegion *iommu,
+                                     IOMMUAccessFlags flag)
 {
     spapr_tce_set_need_vfio(container_of(iommu, sPAPRTCETable, iommu), true);
 }
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index b313e7c..b2d1fad 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -456,7 +456,8 @@ static void vfio_listener_region_add(MemoryListener *listener,
         giommu->n.notify = vfio_iommu_map_notify;
         QLIST_INSERT_HEAD(&container->giommu_list, giommu, giommu_next);
 
-        memory_region_register_iommu_notifier(giommu->iommu, &giommu->n);
+        memory_region_register_iommu_notifier(giommu->iommu, &giommu->n,
+                                              IOMMU_RW);
         memory_region_iommu_replay(giommu->iommu, &giommu->n, false);
 
         return;
diff --git a/include/exec/memory.h b/include/exec/memory.h
index 3e4d416..6c16a86 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -154,7 +154,7 @@ struct MemoryRegionIOMMUOps {
     /* Returns minimum supported page size */
     uint64_t (*get_min_page_size)(MemoryRegion *iommu);
     /* Called when the first notifier is set */
-    void (*notify_started)(MemoryRegion *iommu);
+    void (*notify_started)(MemoryRegion *iommu, IOMMUAccessFlags flag);
     /* Called when the last notifier is removed */
     void (*notify_stopped)(MemoryRegion *iommu);
 };
@@ -623,8 +623,12 @@ void memory_region_notify_iommu(MemoryRegion *mr,
  * @n: the notifier to be added; the notifier receives a pointer to an
  *     #IOMMUTLBEntry as the opaque value; the pointer ceases to be
  *     valid on exit from the notifier.
+ * @flag: kind of notifer to request. IOMMU_RW for notifying all
+ *        events (including additions), and IOMMU_NONE for notifying
+ *        cache invalidations only.
  */
-void memory_region_register_iommu_notifier(MemoryRegion *mr, Notifier *n);
+void memory_region_register_iommu_notifier(MemoryRegion *mr, Notifier *n,
+                                           IOMMUAccessFlags flag);
 
 /**
  * memory_region_iommu_replay: replay existing IOMMU translations to
diff --git a/memory.c b/memory.c
index 0eb6895..747a9ec 100644
--- a/memory.c
+++ b/memory.c
@@ -1513,11 +1513,13 @@ bool memory_region_is_logging(MemoryRegion *mr, uint8_t client)
     return memory_region_get_dirty_log_mask(mr) & (1 << client);
 }
 
-void memory_region_register_iommu_notifier(MemoryRegion *mr, Notifier *n)
+void memory_region_register_iommu_notifier(MemoryRegion *mr, Notifier *n,
+                                           IOMMUAccessFlags flag)
 {
+    assert(flag == IOMMU_NONE || flag == IOMMU_RW);
     if (mr->iommu_ops->notify_started &&
         QLIST_EMPTY(&mr->iommu_notify.notifiers)) {
-        mr->iommu_ops->notify_started(mr);
+        mr->iommu_ops->notify_started(mr, flag);
     }
     notifier_list_add(&mr->iommu_notify, n);
 }
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [Qemu-devel] [PATCH 2/3] memory: add iommu_notify_flag
  2016-09-05  7:21 [Qemu-devel] [PATCH 0/3] memory: add IOMMU notifier type Peter Xu
  2016-09-05  7:21 ` [Qemu-devel] [PATCH 1/3] memory: add one flag for IOMMU notifier Peter Xu
@ 2016-09-05  7:21 ` Peter Xu
  2016-09-05  8:04   ` Paolo Bonzini
  2016-09-06  5:12   ` David Gibson
  2016-09-05  7:21 ` [Qemu-devel] [PATCH 3/3] intel_iommu: allow IOMMU_NONE typed notifiers Peter Xu
  2016-09-06  5:06 ` [Qemu-devel] [PATCH 0/3] memory: add IOMMU notifier type David Gibson
  3 siblings, 2 replies; 28+ messages in thread
From: Peter Xu @ 2016-09-05  7:21 UTC (permalink / raw)
  To: qemu-devel
  Cc: dgibson, jasowang, mst, pbonzini, cornelia.huck, wexu, vkaplans,
	alex.williamson, peterx

When there are active IOMMU notify registers, the iommu_notify_flag will
cache existing notify flag. When notifiers are triggered, flag will be
checked against the cached result.

Signed-off-by: Peter Xu <peterx@redhat.com>
---
 hw/ppc/spapr_iommu.c     | 2 +-
 hw/s390x/s390-pci-inst.c | 2 +-
 include/exec/memory.h    | 6 +++++-
 memory.c                 | 6 +++++-
 4 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
index 09660fc..14b1f00 100644
--- a/hw/ppc/spapr_iommu.c
+++ b/hw/ppc/spapr_iommu.c
@@ -411,7 +411,7 @@ static target_ulong put_tce_emu(sPAPRTCETable *tcet, target_ulong ioba,
     entry.translated_addr = tce & page_mask;
     entry.addr_mask = ~page_mask;
     entry.perm = spapr_tce_iommu_access_flags(tce);
-    memory_region_notify_iommu(&tcet->iommu, entry);
+    memory_region_notify_iommu(&tcet->iommu, entry, IOMMU_RW);
 
     return H_SUCCESS;
 }
diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c
index f069b11..29ef345 100644
--- a/hw/s390x/s390-pci-inst.c
+++ b/hw/s390x/s390-pci-inst.c
@@ -616,7 +616,7 @@ int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2)
             goto out;
         }
 
-        memory_region_notify_iommu(mr, entry);
+        memory_region_notify_iommu(mr, entry, IOMMU_RW);
         start += entry.addr_mask + 1;
     }
 
diff --git a/include/exec/memory.h b/include/exec/memory.h
index 6c16a86..c67b3da 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -57,6 +57,7 @@ typedef enum {
     IOMMU_RO   = 1,
     IOMMU_WO   = 2,
     IOMMU_RW   = 3,
+    IOMMU_ACCESS_INVALID = 4,
 } IOMMUAccessFlags;
 
 struct IOMMUTLBEntry {
@@ -202,6 +203,7 @@ struct MemoryRegion {
     unsigned ioeventfd_nb;
     MemoryRegionIoeventfd *ioeventfds;
     NotifierList iommu_notify;
+    IOMMUAccessFlags iommu_notify_flag;
 };
 
 /**
@@ -611,9 +613,11 @@ uint64_t memory_region_iommu_get_min_page_size(MemoryRegion *mr);
  * @entry: the new entry in the IOMMU translation table.  The entry
  *         replaces all old entries for the same virtual I/O address range.
  *         Deleted entries have .@perm == 0.
+ * @flag: type of IOMMU notification (IOMMU_RW, IOMMU_NONE)
  */
 void memory_region_notify_iommu(MemoryRegion *mr,
-                                IOMMUTLBEntry entry);
+                                IOMMUTLBEntry entry,
+                                IOMMUAccessFlags flag);
 
 /**
  * memory_region_register_iommu_notifier: register a notifier for changes to
diff --git a/memory.c b/memory.c
index 747a9ec..5b50d15 100644
--- a/memory.c
+++ b/memory.c
@@ -1418,6 +1418,7 @@ void memory_region_init_iommu(MemoryRegion *mr,
     memory_region_init(mr, owner, name, size);
     mr->iommu_ops = ops,
     mr->terminates = true;  /* then re-forwards */
+    mr->iommu_notify_flag = IOMMU_ACCESS_INVALID;
     notifier_list_init(&mr->iommu_notify);
 }
 
@@ -1520,6 +1521,7 @@ void memory_region_register_iommu_notifier(MemoryRegion *mr, Notifier *n,
     if (mr->iommu_ops->notify_started &&
         QLIST_EMPTY(&mr->iommu_notify.notifiers)) {
         mr->iommu_ops->notify_started(mr, flag);
+        mr->iommu_notify_flag = flag;
     }
     notifier_list_add(&mr->iommu_notify, n);
 }
@@ -1560,13 +1562,15 @@ void memory_region_unregister_iommu_notifier(MemoryRegion *mr, Notifier *n)
     if (mr->iommu_ops->notify_stopped &&
         QLIST_EMPTY(&mr->iommu_notify.notifiers)) {
         mr->iommu_ops->notify_stopped(mr);
+        mr->iommu_notify_flag = IOMMU_ACCESS_INVALID;
     }
 }
 
 void memory_region_notify_iommu(MemoryRegion *mr,
-                                IOMMUTLBEntry entry)
+                                IOMMUTLBEntry entry, IOMMUAccessFlags flag)
 {
     assert(memory_region_is_iommu(mr));
+    assert(flag == mr->iommu_notify_flag);
     notifier_list_notify(&mr->iommu_notify, &entry);
 }
 
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [Qemu-devel] [PATCH 3/3] intel_iommu: allow IOMMU_NONE typed notifiers
  2016-09-05  7:21 [Qemu-devel] [PATCH 0/3] memory: add IOMMU notifier type Peter Xu
  2016-09-05  7:21 ` [Qemu-devel] [PATCH 1/3] memory: add one flag for IOMMU notifier Peter Xu
  2016-09-05  7:21 ` [Qemu-devel] [PATCH 2/3] memory: add iommu_notify_flag Peter Xu
@ 2016-09-05  7:21 ` Peter Xu
  2016-09-06  5:06 ` [Qemu-devel] [PATCH 0/3] memory: add IOMMU notifier type David Gibson
  3 siblings, 0 replies; 28+ messages in thread
From: Peter Xu @ 2016-09-05  7:21 UTC (permalink / raw)
  To: qemu-devel
  Cc: dgibson, jasowang, mst, pbonzini, cornelia.huck, wexu, vkaplans,
	alex.williamson, peterx

Intel vIOMMU is still lacking of a complete IOMMU notifier mechanism.
Before that is achieved, let's open a door for vhost DMAR support, which
only requires device-IOTLB based cache invalidations (IOMMU_NONE typed
notifies).

Signed-off-by: Peter Xu <peterx@redhat.com>
---
 hw/i386/intel_iommu.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index e0d4f23..5a78cb2 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -1979,10 +1979,12 @@ static void vtd_iommu_notify_started(MemoryRegion *iommu,
 {
     VTDAddressSpace *vtd_as = container_of(iommu, VTDAddressSpace, iommu);
 
-    hw_error("Device at bus %s addr %02x.%d requires iommu notifier which "
-             "is currently not supported by intel-iommu emulation",
-             vtd_as->bus->qbus.name, PCI_SLOT(vtd_as->devfn),
-             PCI_FUNC(vtd_as->devfn));
+    if (flag == IOMMU_RW) {
+        hw_error("Device at bus %s addr %02x.%d requires iommu notifier which "
+                 "is currently not supported by intel-iommu emulation",
+                 vtd_as->bus->qbus.name, PCI_SLOT(vtd_as->devfn),
+                 PCI_FUNC(vtd_as->devfn));
+    }
 }
 
 static const VMStateDescription vtd_vmstate = {
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* Re: [Qemu-devel] [PATCH 2/3] memory: add iommu_notify_flag
  2016-09-05  7:21 ` [Qemu-devel] [PATCH 2/3] memory: add iommu_notify_flag Peter Xu
@ 2016-09-05  8:04   ` Paolo Bonzini
  2016-09-05  8:38     ` Peter Xu
  2016-09-06  5:12   ` David Gibson
  1 sibling, 1 reply; 28+ messages in thread
From: Paolo Bonzini @ 2016-09-05  8:04 UTC (permalink / raw)
  To: Peter Xu, qemu-devel
  Cc: mst, jasowang, vkaplans, alex.williamson, wexu, cornelia.huck, dgibson



On 05/09/2016 09:21, Peter Xu wrote:
>  void memory_region_notify_iommu(MemoryRegion *mr,
> -                                IOMMUTLBEntry entry)
> +                                IOMMUTLBEntry entry, IOMMUAccessFlags flag)
>  {
>      assert(memory_region_is_iommu(mr));
> +    assert(flag == mr->iommu_notify_flag);
>      notifier_list_notify(&mr->iommu_notify, &entry);
>  }

Shouldn't it be possible to have IOMMU_RW and IOMMU_NONE on the same
IOMMU, if the IOMMU supports IOMMU_RW at all?

Thanks,

Paolo

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [Qemu-devel] [PATCH 2/3] memory: add iommu_notify_flag
  2016-09-05  8:04   ` Paolo Bonzini
@ 2016-09-05  8:38     ` Peter Xu
  2016-09-05  9:56       ` Paolo Bonzini
  2016-09-06  5:18       ` David Gibson
  0 siblings, 2 replies; 28+ messages in thread
From: Peter Xu @ 2016-09-05  8:38 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: qemu-devel, mst, jasowang, vkaplans, alex.williamson, wexu,
	cornelia.huck, dgibson

On Mon, Sep 05, 2016 at 10:04:42AM +0200, Paolo Bonzini wrote:
> 
> 
> On 05/09/2016 09:21, Peter Xu wrote:
> >  void memory_region_notify_iommu(MemoryRegion *mr,
> > -                                IOMMUTLBEntry entry)
> > +                                IOMMUTLBEntry entry, IOMMUAccessFlags flag)
> >  {
> >      assert(memory_region_is_iommu(mr));
> > +    assert(flag == mr->iommu_notify_flag);
> >      notifier_list_notify(&mr->iommu_notify, &entry);
> >  }
> 
> Shouldn't it be possible to have IOMMU_RW and IOMMU_NONE on the same
> IOMMU, if the IOMMU supports IOMMU_RW at all?

Yeah, this is a good point...

If we see IOMMU_NONE as a subset of IOMMU_RW, we should allow notify
IOMMU_NONE even if the cached flag is IOMMU_RW.

However in this patch I was not meant to do that. I made it an
exclusive flag to identify two different use cases. I don't know
whether this is good, but at least for Intel IOMMU's current use case,
these two types should be totally isolated from each other:

- IOMMU_NONE notification is used by future DMAR-enabled vhost, it
  should only be notified with device-IOTLB invalidations, this will
  only require "Device IOTLB" capability for Intel IOMMUs, and be
  notified in Device IOTLB invalidation handlers.

- IOMMU_RW notifications should only be used for vfio-pci, notified
  with IOTLB invalidations. This will only require Cache Mode (CM=1)
  capability, and will be notified in common IOTLB invalidations (no
  matter whether it's an cache invalidation or not, we will all use
  IOMMU_RW flag for this kind of notifies).

Maybe here naming the flags as IOMMU_{RW_NONE} is a little bit
confusing (just to leverage existing access flags), but what I was
trying to do is to make the two things not overlapped at all, since I
didn't find a mixture use case.

Thanks,

-- peterx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [Qemu-devel] [PATCH 2/3] memory: add iommu_notify_flag
  2016-09-05  8:38     ` Peter Xu
@ 2016-09-05  9:56       ` Paolo Bonzini
  2016-09-06  5:27         ` Peter Xu
  2016-09-06  5:18       ` David Gibson
  1 sibling, 1 reply; 28+ messages in thread
From: Paolo Bonzini @ 2016-09-05  9:56 UTC (permalink / raw)
  To: Peter Xu
  Cc: qemu-devel, mst, jasowang, vkaplans, alex.williamson, wexu,
	cornelia.huck, dgibson



On 05/09/2016 10:38, Peter Xu wrote:
> However in this patch I was not meant to do that. I made it an
> exclusive flag to identify two different use cases. I don't know
> whether this is good, but at least for Intel IOMMU's current use case,
> these two types should be totally isolated from each other:
> 
> - IOMMU_NONE notification is used by future DMAR-enabled vhost, it
>   should only be notified with device-IOTLB invalidations, this will
>   only require "Device IOTLB" capability for Intel IOMMUs, and be
>   notified in Device IOTLB invalidation handlers.
> 
> - IOMMU_RW notifications should only be used for vfio-pci, notified
>   with IOTLB invalidations. This will only require Cache Mode (CM=1)
>   capability, and will be notified in common IOTLB invalidations (no
>   matter whether it's an cache invalidation or not, we will all use
>   IOMMU_RW flag for this kind of notifies).
> 
> Maybe here naming the flags as IOMMU_{RW_NONE} is a little bit
> confusing (just to leverage existing access flags),

Yeah, if you really want to have these semantics, you need to define an 
enum like this:

	IOMMU_NOTIFIER_NONE = -1,
	IOMMU_NOTIFIER_FLUSH = 0,
	IOMMU_NOTIFIER_CHANGED_ENTRY = 1,

But I'm still not convinced of the exclusivity between "flush" and 
"entry changed" notifiers.  If I saw the above, my first reaction would 
be that you need a bit mask:

	IOMMU_NOTIFIER_NONE = -1,
	IOMMU_NOTIFIER_FLUSH = 1,
	IOMMU_NOTIFIER_CHANGED_ENTRY = 2,

But perhaps what you're looking for is to change the "notifier" to a 
"listener" like

	struct IOMMUListener {
	    void (*flush)(IOMMUListener *);
	    void (*entry_changed)(IOMMUListener *, IOMMUTLBEntry *);
	    QLIST_ENTRY(IOMMUListener) node;
	};

The patches can start with an IOMMUListener that only has the 
entry_changed callback and that replaces the current use of Notifier.  
Then notify_started and notify_stopped can be called on every notifier 
that is added/removed (see attached prototype), and the Intel IOMMU can 
simply reject registration of a listener that has a non-NULL 
iotlb_changed member.

Thanks,

Paolo

diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
index 6bc4d4d..d8cbd90 100644
--- a/hw/ppc/spapr_iommu.c
+++ b/hw/ppc/spapr_iommu.c
@@ -156,14 +156,20 @@ static uint64_t spapr_tce_get_min_page_size(MemoryRegion *iommu)
     return 1ULL << tcet->page_shift;
 }
 
-static void spapr_tce_notify_started(MemoryRegion *iommu)
+static void spapr_tce_listener_add(MemoryRegion *iommu, IOMMUListener *l)
 {
-    spapr_tce_set_need_vfio(container_of(iommu, sPAPRTCETable, iommu), true);
+    sPAPRTCETable *tcet = container_of(iommu, sPAPRTCETable, iommu);
+    if (tcet->users++ == 0) {
+        spapr_tce_set_need_vfio(tcet, true);
+    }
 }
 
-static void spapr_tce_notify_stopped(MemoryRegion *iommu)
+static void spapr_tce_listener_del(MemoryRegion *iommu, IOMMUListener *l)
 {
-    spapr_tce_set_need_vfio(container_of(iommu, sPAPRTCETable, iommu), false);
+    sPAPRTCETable *tcet = container_of(iommu, sPAPRTCETable, iommu);
+    if (--tcet->users == 0) {
+        spapr_tce_set_need_vfio(tcet, false);
+    }
 }
 
 static int spapr_tce_table_post_load(void *opaque, int version_id)
@@ -246,8 +252,8 @@ static const VMStateDescription vmstate_spapr_tce_table = {
 static MemoryRegionIOMMUOps spapr_iommu_ops = {
     .translate = spapr_tce_translate_iommu,
     .get_min_page_size = spapr_tce_get_min_page_size,
-    .notify_started = spapr_tce_notify_started,
-    .notify_stopped = spapr_tce_notify_stopped,
+    .listener_add = spapr_tce_listener_add,
+    .listener_del = spapr_tce_listener_del,
 };
 
 static int spapr_tce_table_realize(DeviceState *dev)
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index caf7be9..4761afd 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -558,6 +558,7 @@ struct sPAPRTCETable {
     uint64_t *table;
     uint32_t mig_nb_table;
     uint64_t *mig_table;
+    int users;
     bool bypass;
     bool need_vfio;
     int fd;
diff --git a/memory.c b/memory.c
index 0eb6895..e9f50af 100644
--- a/memory.c
+++ b/memory.c
@@ -1515,9 +1515,8 @@ bool memory_region_is_logging(MemoryRegion *mr, uint8_t client)
 
 void memory_region_register_iommu_notifier(MemoryRegion *mr, Notifier *n)
 {
-    if (mr->iommu_ops->notify_started &&
-        QLIST_EMPTY(&mr->iommu_notify.notifiers)) {
-        mr->iommu_ops->notify_started(mr);
+    if (mr->iommu_ops->listener_add) {
+        mr->iommu_ops->listener_add(mr, ...);
     }
     notifier_list_add(&mr->iommu_notify, n);
 }
@@ -1555,9 +1554,8 @@ void memory_region_iommu_replay(MemoryRegion *mr, Notifier *n, bool is_write)
 void memory_region_unregister_iommu_notifier(MemoryRegion *mr, Notifier *n)
 {
     notifier_remove(n);
-    if (mr->iommu_ops->notify_stopped &&
-        QLIST_EMPTY(&mr->iommu_notify.notifiers)) {
-        mr->iommu_ops->notify_stopped(mr);
+    if (mr->iommu_ops->listener_del) {
+        mr->iommu_ops->listener_del(mr, ...);
     }
 }
 

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* Re: [Qemu-devel] [PATCH 0/3] memory: add IOMMU notifier type
  2016-09-05  7:21 [Qemu-devel] [PATCH 0/3] memory: add IOMMU notifier type Peter Xu
                   ` (2 preceding siblings ...)
  2016-09-05  7:21 ` [Qemu-devel] [PATCH 3/3] intel_iommu: allow IOMMU_NONE typed notifiers Peter Xu
@ 2016-09-06  5:06 ` David Gibson
  2016-09-06  5:49   ` Peter Xu
  3 siblings, 1 reply; 28+ messages in thread
From: David Gibson @ 2016-09-06  5:06 UTC (permalink / raw)
  To: Peter Xu
  Cc: qemu-devel, mst, jasowang, vkaplans, alex.williamson, wexu,
	pbonzini, cornelia.huck, dgibson

[-- Attachment #1: Type: text/plain, Size: 1277 bytes --]

On Mon, Sep 05, 2016 at 03:21:18PM +0800, Peter Xu wrote:
> In the thread:
> 
>   https://lists.gnu.org/archive/html/qemu-devel/2016-09/msg00254.html
> 
> Alex proposed a way for vhost DMAR to be enabled without breaking
> existing protections on vIOMMU and device assignments. This series
> tried to implement the idea, by introducing a IOMMU notifier type for
> each IOMMU memory region.

Hrm, I'm pretty dubious about this concept, since it's basically just
an interim hack for an incomplete notifier implementation on x86.
What makes just fixing the notifier so difficult?

> 
> Thanks,
> 
> Peter Xu (3):
>   memory: add one flag for IOMMU notifier
>   memory: add iommu_notify_flag
>   intel_iommu: allow IOMMU_NONE typed notifiers
> 
>  hw/i386/intel_iommu.c    | 13 ++++++++-----
>  hw/ppc/spapr_iommu.c     |  5 +++--
>  hw/s390x/s390-pci-inst.c |  2 +-
>  hw/vfio/common.c         |  3 ++-
>  include/exec/memory.h    | 14 +++++++++++---
>  memory.c                 | 12 +++++++++---
>  6 files changed, 34 insertions(+), 15 deletions(-)
> 

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 801 bytes --]

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [Qemu-devel] [PATCH 2/3] memory: add iommu_notify_flag
  2016-09-05  7:21 ` [Qemu-devel] [PATCH 2/3] memory: add iommu_notify_flag Peter Xu
  2016-09-05  8:04   ` Paolo Bonzini
@ 2016-09-06  5:12   ` David Gibson
  2016-09-06  5:33     ` Peter Xu
  1 sibling, 1 reply; 28+ messages in thread
From: David Gibson @ 2016-09-06  5:12 UTC (permalink / raw)
  To: Peter Xu
  Cc: qemu-devel, mst, jasowang, vkaplans, alex.williamson, wexu,
	pbonzini, cornelia.huck, dgibson

[-- Attachment #1: Type: text/plain, Size: 4650 bytes --]

On Mon, Sep 05, 2016 at 03:21:20PM +0800, Peter Xu wrote:
> When there are active IOMMU notify registers, the iommu_notify_flag will
> cache existing notify flag. When notifiers are triggered, flag will be
> checked against the cached result.
> 
> Signed-off-by: Peter Xu <peterx@redhat.com>
> ---
>  hw/ppc/spapr_iommu.c     | 2 +-
>  hw/s390x/s390-pci-inst.c | 2 +-
>  include/exec/memory.h    | 6 +++++-
>  memory.c                 | 6 +++++-
>  4 files changed, 12 insertions(+), 4 deletions(-)
> 
> diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
> index 09660fc..14b1f00 100644
> --- a/hw/ppc/spapr_iommu.c
> +++ b/hw/ppc/spapr_iommu.c
> @@ -411,7 +411,7 @@ static target_ulong put_tce_emu(sPAPRTCETable *tcet, target_ulong ioba,
>      entry.translated_addr = tce & page_mask;
>      entry.addr_mask = ~page_mask;
>      entry.perm = spapr_tce_iommu_access_flags(tce);
> -    memory_region_notify_iommu(&tcet->iommu, entry);
> +    memory_region_notify_iommu(&tcet->iommu, entry, IOMMU_RW);
>  
>      return H_SUCCESS;
>  }
> diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c
> index f069b11..29ef345 100644
> --- a/hw/s390x/s390-pci-inst.c
> +++ b/hw/s390x/s390-pci-inst.c
> @@ -616,7 +616,7 @@ int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2)
>              goto out;
>          }
>  
> -        memory_region_notify_iommu(mr, entry);
> +        memory_region_notify_iommu(mr, entry, IOMMU_RW);
>          start += entry.addr_mask + 1;
>      }
>  
> diff --git a/include/exec/memory.h b/include/exec/memory.h
> index 6c16a86..c67b3da 100644
> --- a/include/exec/memory.h
> +++ b/include/exec/memory.h
> @@ -57,6 +57,7 @@ typedef enum {
>      IOMMU_RO   = 1,
>      IOMMU_WO   = 2,
>      IOMMU_RW   = 3,
> +    IOMMU_ACCESS_INVALID = 4,
>  } IOMMUAccessFlags;
>  
>  struct IOMMUTLBEntry {
> @@ -202,6 +203,7 @@ struct MemoryRegion {
>      unsigned ioeventfd_nb;
>      MemoryRegionIoeventfd *ioeventfds;
>      NotifierList iommu_notify;
> +    IOMMUAccessFlags iommu_notify_flag;
>  };
>  
>  /**
> @@ -611,9 +613,11 @@ uint64_t memory_region_iommu_get_min_page_size(MemoryRegion *mr);
>   * @entry: the new entry in the IOMMU translation table.  The entry
>   *         replaces all old entries for the same virtual I/O address range.
>   *         Deleted entries have .@perm == 0.
> + * @flag: type of IOMMU notification (IOMMU_RW, IOMMU_NONE)

This makes no sense.  The overall type of the notifier is already
noted in register / notify_start.  The permission on this specific
mapping is already included in the IOMMUTLBEntry.

>   */
>  void memory_region_notify_iommu(MemoryRegion *mr,
> -                                IOMMUTLBEntry entry);
> +                                IOMMUTLBEntry entry,
> +                                IOMMUAccessFlags flag);
>  
>  /**
>   * memory_region_register_iommu_notifier: register a notifier for changes to
> diff --git a/memory.c b/memory.c
> index 747a9ec..5b50d15 100644
> --- a/memory.c
> +++ b/memory.c
> @@ -1418,6 +1418,7 @@ void memory_region_init_iommu(MemoryRegion *mr,
>      memory_region_init(mr, owner, name, size);
>      mr->iommu_ops = ops,
>      mr->terminates = true;  /* then re-forwards */
> +    mr->iommu_notify_flag = IOMMU_ACCESS_INVALID;
>      notifier_list_init(&mr->iommu_notify);
>  }
>  
> @@ -1520,6 +1521,7 @@ void memory_region_register_iommu_notifier(MemoryRegion *mr, Notifier *n,
>      if (mr->iommu_ops->notify_started &&
>          QLIST_EMPTY(&mr->iommu_notify.notifiers)) {
>          mr->iommu_ops->notify_started(mr, flag);
> +        mr->iommu_notify_flag = flag;
>      }
>      notifier_list_add(&mr->iommu_notify, n);
>  }
> @@ -1560,13 +1562,15 @@ void memory_region_unregister_iommu_notifier(MemoryRegion *mr, Notifier *n)
>      if (mr->iommu_ops->notify_stopped &&
>          QLIST_EMPTY(&mr->iommu_notify.notifiers)) {
>          mr->iommu_ops->notify_stopped(mr);
> +        mr->iommu_notify_flag = IOMMU_ACCESS_INVALID;
>      }
>  }
>  
>  void memory_region_notify_iommu(MemoryRegion *mr,
> -                                IOMMUTLBEntry entry)
> +                                IOMMUTLBEntry entry, IOMMUAccessFlags flag)
>  {
>      assert(memory_region_is_iommu(mr));
> +    assert(flag == mr->iommu_notify_flag);
>      notifier_list_notify(&mr->iommu_notify, &entry);
>  }
>  

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 801 bytes --]

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [Qemu-devel] [PATCH 2/3] memory: add iommu_notify_flag
  2016-09-05  8:38     ` Peter Xu
  2016-09-05  9:56       ` Paolo Bonzini
@ 2016-09-06  5:18       ` David Gibson
  2016-09-06  5:55         ` Peter Xu
  1 sibling, 1 reply; 28+ messages in thread
From: David Gibson @ 2016-09-06  5:18 UTC (permalink / raw)
  To: Peter Xu
  Cc: Paolo Bonzini, mst, jasowang, qemu-devel, cornelia.huck,
	alex.williamson, wexu, vkaplans, dgibson

[-- Attachment #1: Type: text/plain, Size: 2626 bytes --]

On Mon, Sep 05, 2016 at 04:38:04PM +0800, Peter Xu wrote:
> On Mon, Sep 05, 2016 at 10:04:42AM +0200, Paolo Bonzini wrote:
> > 
> > 
> > On 05/09/2016 09:21, Peter Xu wrote:
> > >  void memory_region_notify_iommu(MemoryRegion *mr,
> > > -                                IOMMUTLBEntry entry)
> > > +                                IOMMUTLBEntry entry, IOMMUAccessFlags flag)
> > >  {
> > >      assert(memory_region_is_iommu(mr));
> > > +    assert(flag == mr->iommu_notify_flag);
> > >      notifier_list_notify(&mr->iommu_notify, &entry);
> > >  }
> > 
> > Shouldn't it be possible to have IOMMU_RW and IOMMU_NONE on the same
> > IOMMU, if the IOMMU supports IOMMU_RW at all?
> 
> Yeah, this is a good point...
> 
> If we see IOMMU_NONE as a subset of IOMMU_RW, we should allow notify
> IOMMU_NONE even if the cached flag is IOMMU_RW.
> 
> However in this patch I was not meant to do that. I made it an
> exclusive flag to identify two different use cases. I don't know
> whether this is good, but at least for Intel IOMMU's current use case,
> these two types should be totally isolated from each other:

It's not good, it's horrible.  This makes a hideous interface that's
just a collection of separate use cases without any coherent
underlying semantics.

> - IOMMU_NONE notification is used by future DMAR-enabled vhost, it
>   should only be notified with device-IOTLB invalidations, this will
>   only require "Device IOTLB" capability for Intel IOMMUs, and be
>   notified in Device IOTLB invalidation handlers.
> 
> - IOMMU_RW notifications should only be used for vfio-pci, notified
>   with IOTLB invalidations. This will only require Cache Mode (CM=1)
>   capability, and will be notified in common IOTLB invalidations (no
>   matter whether it's an cache invalidation or not, we will all use
>   IOMMU_RW flag for this kind of notifies).
> 
> Maybe here naming the flags as IOMMU_{RW_NONE} is a little bit
> confusing (just to leverage existing access flags), but what I was
> trying to do is to make the two things not overlapped at all, since I
> didn't find a mixture use case.

Maybe not now, but a common use case is absolutely possible.  If you
had a single (guest) bus with a single IO address space, with vIOMMU
and both VFIO and vhost devices on it, the same vIOMMU would need to
send all notifications to VFIO and (at least) unmap notifications to
vhost.

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 801 bytes --]

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [Qemu-devel] [PATCH 2/3] memory: add iommu_notify_flag
  2016-09-05  9:56       ` Paolo Bonzini
@ 2016-09-06  5:27         ` Peter Xu
  2016-09-06  7:51           ` Paolo Bonzini
  0 siblings, 1 reply; 28+ messages in thread
From: Peter Xu @ 2016-09-06  5:27 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: qemu-devel, mst, jasowang, vkaplans, alex.williamson, wexu,
	cornelia.huck, dgibson

On Mon, Sep 05, 2016 at 11:56:12AM +0200, Paolo Bonzini wrote:
> Yeah, if you really want to have these semantics, you need to define an 
> enum like this:
> 
> 	IOMMU_NOTIFIER_NONE = -1,
> 	IOMMU_NOTIFIER_FLUSH = 0,
> 	IOMMU_NOTIFIER_CHANGED_ENTRY = 1,
> 
> But I'm still not convinced of the exclusivity between "flush" and 
> "entry changed" notifiers.  If I saw the above, my first reaction would 
> be that you need a bit mask:
> 
> 	IOMMU_NOTIFIER_NONE = -1,
> 	IOMMU_NOTIFIER_FLUSH = 1,
> 	IOMMU_NOTIFIER_CHANGED_ENTRY = 2,
> 
> But perhaps what you're looking for is to change the "notifier" to a 
> "listener" like
> 
> 	struct IOMMUListener {
> 	    void (*flush)(IOMMUListener *);
> 	    void (*entry_changed)(IOMMUListener *, IOMMUTLBEntry *);
> 	    QLIST_ENTRY(IOMMUListener) node;
> 	};
> 
> The patches can start with an IOMMUListener that only has the 
> entry_changed callback and that replaces the current use of Notifier.  
> Then notify_started and notify_stopped can be called on every notifier 
> that is added/removed (see attached prototype), and the Intel IOMMU can 
> simply reject registration of a listener that has a non-NULL 
> iotlb_changed member.

Thanks for the quick prototyping. :-)

Maybe I haven't explained the idea very clearly, but device-IOTLB is
not a "flush" of whole device cache. It still needs a IOMMUTLBEntry,
and works just like how general IOMMU invalidations. E.g., we can do
device-IOTLB invalidation for a single 4K page.

However, I agree with you that the namings are confusing, maybe at
least we should introduce IOMMU_NOTIFIER_* macros, though instead of a
_FLUSH one, we can have:

    IOMMU_NOTIFIER_NONE = -1,
    IOMMU_NOTIFIER_DEVICE_INVALIDATE = 0,
    IOMMU_NOTIFIER_IOTLB_CHANGED = 1,

To clarify that these are two non-overlapped cases.

Thanks,

-- peterx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [Qemu-devel] [PATCH 2/3] memory: add iommu_notify_flag
  2016-09-06  5:12   ` David Gibson
@ 2016-09-06  5:33     ` Peter Xu
  0 siblings, 0 replies; 28+ messages in thread
From: Peter Xu @ 2016-09-06  5:33 UTC (permalink / raw)
  To: David Gibson
  Cc: qemu-devel, mst, jasowang, vkaplans, alex.williamson, wexu,
	pbonzini, cornelia.huck, dgibson

On Tue, Sep 06, 2016 at 03:12:26PM +1000, David Gibson wrote:
> >  /**
> > @@ -611,9 +613,11 @@ uint64_t memory_region_iommu_get_min_page_size(MemoryRegion *mr);
> >   * @entry: the new entry in the IOMMU translation table.  The entry
> >   *         replaces all old entries for the same virtual I/O address range.
> >   *         Deleted entries have .@perm == 0.
> > + * @flag: type of IOMMU notification (IOMMU_RW, IOMMU_NONE)
> 
> This makes no sense.  The overall type of the notifier is already
> noted in register / notify_start.  The permission on this specific
> mapping is already included in the IOMMUTLBEntry.

This is not meant to be the same as the one in IOMMUTLBEntry. For
example, we can have:

  memory_region_notify_iommu(..., entry, IOMMU_RW);

This means that the caller of notification will notify all changed
entries (will only be used for checking, see the assert() in
memory_region_notify_iommu()). While within the entry, we can have:

  entry.perm == IOMMU_NONE

Which means that this specific invalidation is an unmap() operation.

The naming is bad.

We can put this aside and see whether we can have better solution in
general...

Thanks,

-- peterx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [Qemu-devel] [PATCH 0/3] memory: add IOMMU notifier type
  2016-09-06  5:06 ` [Qemu-devel] [PATCH 0/3] memory: add IOMMU notifier type David Gibson
@ 2016-09-06  5:49   ` Peter Xu
  2016-09-06  6:26     ` Jason Wang
  0 siblings, 1 reply; 28+ messages in thread
From: Peter Xu @ 2016-09-06  5:49 UTC (permalink / raw)
  To: David Gibson
  Cc: qemu-devel, mst, jasowang, vkaplans, alex.williamson, wexu,
	pbonzini, cornelia.huck, dgibson

On Tue, Sep 06, 2016 at 03:06:17PM +1000, David Gibson wrote:
> On Mon, Sep 05, 2016 at 03:21:18PM +0800, Peter Xu wrote:
> > In the thread:
> > 
> >   https://lists.gnu.org/archive/html/qemu-devel/2016-09/msg00254.html
> > 
> > Alex proposed a way for vhost DMAR to be enabled without breaking
> > existing protections on vIOMMU and device assignments. This series
> > tried to implement the idea, by introducing a IOMMU notifier type for
> > each IOMMU memory region.
> 
> Hrm, I'm pretty dubious about this concept, since it's basically just
> an interim hack for an incomplete notifier implementation on x86.
> What makes just fixing the notifier so difficult?

Aviv is working on the full notifier support for that. It's been
months since his last post though. If he cannot continue it (due to
any reason), I can take it over. But for now, we may still need to
wait for his patches to fully enable a complete notifier mechanism.

I don't know how POWER works to provide a complete notifier, and
whether POWER can selectively enable the notified items... But for
Intel VT-d, it provided two choices: by default, only cache
invalidations are notified (even, we can disalbe cache invaliations),
but if one want to have a complete notifier, just set the CM bit to 1.
So I just think it'll be cool if we can support both cases. E.g., for
vhost, it does not need to be notified with newly added entries, but
only cache invalidations. IMHO we can't just force vhost to use a
complete notifier while actually it only needs part of it.

Thanks,

-- peterx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [Qemu-devel] [PATCH 2/3] memory: add iommu_notify_flag
  2016-09-06  5:18       ` David Gibson
@ 2016-09-06  5:55         ` Peter Xu
  0 siblings, 0 replies; 28+ messages in thread
From: Peter Xu @ 2016-09-06  5:55 UTC (permalink / raw)
  To: David Gibson
  Cc: Paolo Bonzini, mst, jasowang, qemu-devel, cornelia.huck,
	alex.williamson, wexu, vkaplans, dgibson

On Tue, Sep 06, 2016 at 03:18:46PM +1000, David Gibson wrote:
> > Maybe here naming the flags as IOMMU_{RW_NONE} is a little bit
> > confusing (just to leverage existing access flags), but what I was
> > trying to do is to make the two things not overlapped at all, since I
> > didn't find a mixture use case.
> 
> Maybe not now, but a common use case is absolutely possible.  If you
> had a single (guest) bus with a single IO address space, with vIOMMU
> and both VFIO and vhost devices on it, the same vIOMMU would need to
> send all notifications to VFIO and (at least) unmap notifications to
> vhost.

Yeah this is a good one... Thanks to point out.

Then I agree that splitting the use cases won't be enough... We may
need to exactly register IOMMU notifiers with notification type
(invalidations only, or we also need entry additions), and we just
selectively notify the consumers depending on what kind of
notification it is.

Or any smarter way to do it.

-- peterx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [Qemu-devel] [PATCH 0/3] memory: add IOMMU notifier type
  2016-09-06  5:49   ` Peter Xu
@ 2016-09-06  6:26     ` Jason Wang
  2016-09-07  4:38       ` David Gibson
  0 siblings, 1 reply; 28+ messages in thread
From: Jason Wang @ 2016-09-06  6:26 UTC (permalink / raw)
  To: Peter Xu, David Gibson
  Cc: qemu-devel, mst, vkaplans, alex.williamson, wexu, pbonzini,
	cornelia.huck, dgibson



On 2016年09月06日 13:49, Peter Xu wrote:
> On Tue, Sep 06, 2016 at 03:06:17PM +1000, David Gibson wrote:
>> On Mon, Sep 05, 2016 at 03:21:18PM +0800, Peter Xu wrote:
>>> In the thread:
>>>
>>>    https://lists.gnu.org/archive/html/qemu-devel/2016-09/msg00254.html
>>>
>>> Alex proposed a way for vhost DMAR to be enabled without breaking
>>> existing protections on vIOMMU and device assignments. This series
>>> tried to implement the idea, by introducing a IOMMU notifier type for
>>> each IOMMU memory region.
>> Hrm, I'm pretty dubious about this concept, since it's basically just
>> an interim hack for an incomplete notifier implementation on x86.
>> What makes just fixing the notifier so difficult?
> Aviv is working on the full notifier support for that. It's been
> months since his last post though. If he cannot continue it (due to
> any reason), I can take it over. But for now, we may still need to
> wait for his patches to fully enable a complete notifier mechanism.

Yes, and the issue is:

- There's no way for current intel IOMMU code to be notified when guest 
map a page. So it's impossible for intel IOMMU to co-work with vfio now.
- A solution is caching mode (CM) support which requires a TLB 
invalidation even if it was a non-present to present changing, but this 
is still under development.
- VT-d spec requires: "Hardware implementations of this architecture 
must support operation corresponding to CM=0." So even if we have CM 
mode which can work with vfio, we still must support intel IOMMU with CM 
disabled. It was not only a spec requirement but also a performance 
consideration. (CM is usually slower)

So in conclusion, there's a mode for intel IOMMU that can't work with 
vfio at all. Fixing the notifier does not solve this since the notifier 
won't be able to be triggered.

>
> I don't know how POWER works to provide a complete notifier, and
> whether POWER can selectively enable the notified items... But for
> Intel VT-d, it provided two choices: by default, only cache
> invalidations are notified (even, we can disalbe cache invaliations),
> but if one want to have a complete notifier, just set the CM bit to 1.
> So I just think it'll be cool if we can support both cases. E.g., for
> vhost, it does not need to be notified with newly added entries, but
> only cache invalidations. IMHO we can't just force vhost to use a
> complete notifier while actually it only needs part of it.
>
> Thanks,
>
> -- peterx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [Qemu-devel] [PATCH 2/3] memory: add iommu_notify_flag
  2016-09-06  5:27         ` Peter Xu
@ 2016-09-06  7:51           ` Paolo Bonzini
  2016-09-06  8:17             ` Peter Xu
  0 siblings, 1 reply; 28+ messages in thread
From: Paolo Bonzini @ 2016-09-06  7:51 UTC (permalink / raw)
  To: Peter Xu
  Cc: qemu-devel, mst, jasowang, vkaplans, alex.williamson, wexu,
	cornelia.huck, dgibson



On 06/09/2016 07:27, Peter Xu wrote:
> Maybe I haven't explained the idea very clearly, but device-IOTLB is
> not a "flush" of whole device cache. It still needs a IOMMUTLBEntry,
> and works just like how general IOMMU invalidations. E.g., we can do
> device-IOTLB invalidation for a single 4K page.

Yes, it can be FLUSHED_ENTRY and CHANGED_ENTRY or
INVALIDATE_ENTRY/CHANGE_ENTRY.

> However, I agree with you that the namings are confusing, maybe at
> least we should introduce IOMMU_NOTIFIER_* macros, though instead of a
> _FLUSH one, we can have:
> 
>     IOMMU_NOTIFIER_NONE = -1,
>     IOMMU_NOTIFIER_DEVICE_INVALIDATE = 0,
>     IOMMU_NOTIFIER_IOTLB_CHANGED = 1,

I suggest making the names more similar:

- two participles (invalidated/changed) or two imperatives
(invalidate!/change!);

- choose whether to keep the verb first ("invalidate device") or keep
the noun first ("IOTLB changed"), and stick with one convention.

> To clarify that these are two non-overlapped cases.

If they are not overlapping, they really should be using a bitmask or
multiple callbacks in a struct...

Paolo

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [Qemu-devel] [PATCH 2/3] memory: add iommu_notify_flag
  2016-09-06  7:51           ` Paolo Bonzini
@ 2016-09-06  8:17             ` Peter Xu
  2016-09-06  8:19               ` Paolo Bonzini
  0 siblings, 1 reply; 28+ messages in thread
From: Peter Xu @ 2016-09-06  8:17 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: qemu-devel, mst, jasowang, vkaplans, alex.williamson, wexu,
	cornelia.huck, dgibson

On Tue, Sep 06, 2016 at 09:51:28AM +0200, Paolo Bonzini wrote:
> 
> 
> On 06/09/2016 07:27, Peter Xu wrote:
> > Maybe I haven't explained the idea very clearly, but device-IOTLB is
> > not a "flush" of whole device cache. It still needs a IOMMUTLBEntry,
> > and works just like how general IOMMU invalidations. E.g., we can do
> > device-IOTLB invalidation for a single 4K page.
> 
> Yes, it can be FLUSHED_ENTRY and CHANGED_ENTRY or
> INVALIDATE_ENTRY/CHANGE_ENTRY.
> 
> > However, I agree with you that the namings are confusing, maybe at
> > least we should introduce IOMMU_NOTIFIER_* macros, though instead of a
> > _FLUSH one, we can have:
> > 
> >     IOMMU_NOTIFIER_NONE = -1,
> >     IOMMU_NOTIFIER_DEVICE_INVALIDATE = 0,
> >     IOMMU_NOTIFIER_IOTLB_CHANGED = 1,
> 
> I suggest making the names more similar:
> 
> - two participles (invalidated/changed) or two imperatives
> (invalidate!/change!);
> 
> - choose whether to keep the verb first ("invalidate device") or keep
> the noun first ("IOTLB changed"), and stick with one convention.

Sensible. Will follow.

> 
> > To clarify that these are two non-overlapped cases.
> 
> If they are not overlapping, they really should be using a bitmask or
> multiple callbacks in a struct...

After knowing the possibility that the two consumers might be
mixturely used in the future (as David has mentioned), I'd vote for a
bitmask for notification type:

    IOMMU_NOTIFIER_NONE = 0,
    IOMMU_NOTIFIER_INVALIDATION = 1,
    IOMMU_NOTIFIER_ADDITION = 2,

When registering the IOMMU notifier, user should specify the type. For
VFIO, it should be (INVALIDATION|ADDITION). For vhost, (INVALIDATION)
would suffice.

Will cook a v2 soon. Thanks!

-- peterx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [Qemu-devel] [PATCH 2/3] memory: add iommu_notify_flag
  2016-09-06  8:17             ` Peter Xu
@ 2016-09-06  8:19               ` Paolo Bonzini
  2016-09-06 10:31                 ` Peter Xu
  0 siblings, 1 reply; 28+ messages in thread
From: Paolo Bonzini @ 2016-09-06  8:19 UTC (permalink / raw)
  To: Peter Xu
  Cc: qemu-devel, mst, jasowang, vkaplans, alex.williamson, wexu,
	cornelia.huck, dgibson



On 06/09/2016 10:17, Peter Xu wrote:
> After knowing the possibility that the two consumers might be
> mixturely used in the future (as David has mentioned), I'd vote for a
> bitmask for notification type:
> 
>     IOMMU_NOTIFIER_NONE = 0,
>     IOMMU_NOTIFIER_INVALIDATION = 1,
>     IOMMU_NOTIFIER_ADDITION = 2,

ADDITION really should be "CHANGE" I think, so what about
IOMMU_NOTIFIER_INVALIDATE and IOMMU_NOTIFIER_CHANGE?

For VFIO, would the "invalidate" and "add" callbacks use the same code
or different?

Thanks,

Paolo

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [Qemu-devel] [PATCH 2/3] memory: add iommu_notify_flag
  2016-09-06  8:19               ` Paolo Bonzini
@ 2016-09-06 10:31                 ` Peter Xu
  2016-09-07  5:44                   ` David Gibson
  0 siblings, 1 reply; 28+ messages in thread
From: Peter Xu @ 2016-09-06 10:31 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: qemu-devel, mst, jasowang, vkaplans, alex.williamson, wexu,
	cornelia.huck, dgibson

On Tue, Sep 06, 2016 at 10:19:14AM +0200, Paolo Bonzini wrote:
> 
> 
> On 06/09/2016 10:17, Peter Xu wrote:
> > After knowing the possibility that the two consumers might be
> > mixturely used in the future (as David has mentioned), I'd vote for a
> > bitmask for notification type:
> > 
> >     IOMMU_NOTIFIER_NONE = 0,
> >     IOMMU_NOTIFIER_INVALIDATION = 1,
> >     IOMMU_NOTIFIER_ADDITION = 2,
> 
> ADDITION really should be "CHANGE" I think, so what about
> IOMMU_NOTIFIER_INVALIDATE and IOMMU_NOTIFIER_CHANGE?

For "CHANGE", it sounds like a unmap() + a map(). However I'd say
"ADDITION" is nowhere better...

Will use "CHANGE".

> 
> For VFIO, would the "invalidate" and "add" callbacks use the same code
> or different?

Currently vfio_iommu_map_notify() should be handling both.

Thanks,

-- peterx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [Qemu-devel] [PATCH 0/3] memory: add IOMMU notifier type
  2016-09-06  6:26     ` Jason Wang
@ 2016-09-07  4:38       ` David Gibson
  0 siblings, 0 replies; 28+ messages in thread
From: David Gibson @ 2016-09-07  4:38 UTC (permalink / raw)
  To: Jason Wang
  Cc: Peter Xu, qemu-devel, mst, vkaplans, alex.williamson, wexu,
	pbonzini, cornelia.huck, dgibson

[-- Attachment #1: Type: text/plain, Size: 3504 bytes --]

On Tue, Sep 06, 2016 at 02:26:44PM +0800, Jason Wang wrote:
> 
> 
> On 2016年09月06日 13:49, Peter Xu wrote:
> > On Tue, Sep 06, 2016 at 03:06:17PM +1000, David Gibson wrote:
> > > On Mon, Sep 05, 2016 at 03:21:18PM +0800, Peter Xu wrote:
> > > > In the thread:
> > > > 
> > > >    https://lists.gnu.org/archive/html/qemu-devel/2016-09/msg00254.html
> > > > 
> > > > Alex proposed a way for vhost DMAR to be enabled without breaking
> > > > existing protections on vIOMMU and device assignments. This series
> > > > tried to implement the idea, by introducing a IOMMU notifier type for
> > > > each IOMMU memory region.
> > > Hrm, I'm pretty dubious about this concept, since it's basically just
> > > an interim hack for an incomplete notifier implementation on x86.
> > > What makes just fixing the notifier so difficult?
> > Aviv is working on the full notifier support for that. It's been
> > months since his last post though. If he cannot continue it (due to
> > any reason), I can take it over. But for now, we may still need to
> > wait for his patches to fully enable a complete notifier mechanism.
> 
> Yes, and the issue is:
> 
> - There's no way for current intel IOMMU code to be notified when guest map
> a page. So it's impossible for intel IOMMU to co-work with vfio now.
> - A solution is caching mode (CM) support which requires a TLB invalidation
> even if it was a non-present to present changing, but this is still under
> development.

Right.  AIUI the whole point of CM is to allow this sort of
virtualization.

What I hadn't reaalized before was that even with CM==0, invalidations
were still notified, otherwise I didn't see how anything was possible.

> - VT-d spec requires: "Hardware implementations of this architecture must
> support operation corresponding to CM=0." So even if we have CM mode which
> can work with vfio, we still must support intel IOMMU with CM disabled. It
> was not only a spec requirement but also a performance consideration. (CM is
> usually slower)

Well.. this isn't a hardware implementation, so I don't think it's
really a spec requirement, although I can see why you'd want it so
that you can do something as near as possible indistinguishable from
hardware.

> So in conclusion, there's a mode for intel IOMMU that can't work with vfio
> at all. Fixing the notifier does not solve this since the notifier won't be
> able to be triggered.

Ok, I understand.  CM==1 will be able to have the full notifier, but
CM==0 won't and will never work with VFIO, so we need a way to detect
that cleanly.

> 
> > 
> > I don't know how POWER works to provide a complete notifier, and
> > whether POWER can selectively enable the notified items... But for
> > Intel VT-d, it provided two choices: by default, only cache
> > invalidations are notified (even, we can disalbe cache invaliations),
> > but if one want to have a complete notifier, just set the CM bit to 1.
> > So I just think it'll be cool if we can support both cases. E.g., for
> > vhost, it does not need to be notified with newly added entries, but
> > only cache invalidations. IMHO we can't just force vhost to use a
> > complete notifier while actually it only needs part of it.
> > 
> > Thanks,
> > 
> > -- peterx
> 

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 801 bytes --]

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [Qemu-devel] [PATCH 2/3] memory: add iommu_notify_flag
  2016-09-06 10:31                 ` Peter Xu
@ 2016-09-07  5:44                   ` David Gibson
  2016-09-07  6:34                     ` Peter Xu
  0 siblings, 1 reply; 28+ messages in thread
From: David Gibson @ 2016-09-07  5:44 UTC (permalink / raw)
  To: Peter Xu
  Cc: Paolo Bonzini, mst, jasowang, qemu-devel, cornelia.huck,
	alex.williamson, wexu, vkaplans, dgibson

[-- Attachment #1: Type: text/plain, Size: 1987 bytes --]

On Tue, Sep 06, 2016 at 06:31:42PM +0800, Peter Xu wrote:
> On Tue, Sep 06, 2016 at 10:19:14AM +0200, Paolo Bonzini wrote:
> > 
> > 
> > On 06/09/2016 10:17, Peter Xu wrote:
> > > After knowing the possibility that the two consumers might be
> > > mixturely used in the future (as David has mentioned), I'd vote for a
> > > bitmask for notification type:
> > > 
> > >     IOMMU_NOTIFIER_NONE = 0,
> > >     IOMMU_NOTIFIER_INVALIDATION = 1,
> > >     IOMMU_NOTIFIER_ADDITION = 2,
> > 
> > ADDITION really should be "CHANGE" I think, so what about
> > IOMMU_NOTIFIER_INVALIDATE and IOMMU_NOTIFIER_CHANGE?
> 
> For "CHANGE", it sounds like a unmap() + a map(). However I'd say
> "ADDITION" is nowhere better...

Right.. this brings up a good point.

Changing a mapping (i.e. overwriting an existing mapping with a
different one) would also need notification, even on x86, no?  Since
it implicitly invalidates the previous mapping.

I'm guessing the guest will avoid this by always unmapping before it
maps.  We still need to consider this possibility when designing the
notifier interface though.

It seems the real notification triggers here are:
    * map - something is mapped which previously wasn't
    * unmap - something is no longer mapped which was before

Note that whether the second needs to be triggered depends on the
*previous* state of that IOBA range, *not* on the permissions of the
new mapping (if any).

A "change" - replacing one mapping with another should count as both a
"map" and "unmap" event.

> 
> Will use "CHANGE".
> 
> > 
> > For VFIO, would the "invalidate" and "add" callbacks use the same code
> > or different?
> 
> Currently vfio_iommu_map_notify() should be handling both.
> 
> Thanks,
> 
> -- peterx
> 

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 801 bytes --]

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [Qemu-devel] [PATCH 2/3] memory: add iommu_notify_flag
  2016-09-07  5:44                   ` David Gibson
@ 2016-09-07  6:34                     ` Peter Xu
  2016-09-07  6:41                       ` David Gibson
  0 siblings, 1 reply; 28+ messages in thread
From: Peter Xu @ 2016-09-07  6:34 UTC (permalink / raw)
  To: David Gibson
  Cc: Paolo Bonzini, mst, jasowang, qemu-devel, cornelia.huck,
	alex.williamson, wexu, vkaplans, dgibson

On Wed, Sep 07, 2016 at 03:44:19PM +1000, David Gibson wrote:
> > For "CHANGE", it sounds like a unmap() + a map(). However I'd say
> > "ADDITION" is nowhere better...
> 
> Right.. this brings up a good point.
> 
> Changing a mapping (i.e. overwriting an existing mapping with a
> different one) would also need notification, even on x86, no?  Since
> it implicitly invalidates the previous mapping.
> 
> I'm guessing the guest will avoid this by always unmapping before it
> maps.  We still need to consider this possibility when designing the
> notifier interface though.
> 
> It seems the real notification triggers here are:
>     * map - something is mapped which previously wasn't
>     * unmap - something is no longer mapped which was before
> 
> Note that whether the second needs to be triggered depends on the
> *previous* state of that IOBA range, *not* on the permissions of the
> new mapping (if any).
> 
> A "change" - replacing one mapping with another should count as both a
> "map" and "unmap" event.

Yeah... For MAP/UNMAP, it is strange in another way: e.g. for vhost,
it doesn't care about map/unmap, it cares about invalidated cache. So
IIUC this is a question about "naming" but not the implementations...
I suppose it is really a matter of taste, and both work for me (either
INVALIDATION/CHANGE or UNMAP/MAP).

Thanks,

-- peterx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [Qemu-devel] [PATCH 2/3] memory: add iommu_notify_flag
  2016-09-07  6:34                     ` Peter Xu
@ 2016-09-07  6:41                       ` David Gibson
  2016-09-08  9:07                         ` Peter Xu
  0 siblings, 1 reply; 28+ messages in thread
From: David Gibson @ 2016-09-07  6:41 UTC (permalink / raw)
  To: Peter Xu
  Cc: Paolo Bonzini, mst, jasowang, qemu-devel, cornelia.huck,
	alex.williamson, wexu, vkaplans, dgibson

[-- Attachment #1: Type: text/plain, Size: 2139 bytes --]

On Wed, Sep 07, 2016 at 02:34:19PM +0800, Peter Xu wrote:
> On Wed, Sep 07, 2016 at 03:44:19PM +1000, David Gibson wrote:
> > > For "CHANGE", it sounds like a unmap() + a map(). However I'd say
> > > "ADDITION" is nowhere better...
> > 
> > Right.. this brings up a good point.
> > 
> > Changing a mapping (i.e. overwriting an existing mapping with a
> > different one) would also need notification, even on x86, no?  Since
> > it implicitly invalidates the previous mapping.
> > 
> > I'm guessing the guest will avoid this by always unmapping before it
> > maps.  We still need to consider this possibility when designing the
> > notifier interface though.
> > 
> > It seems the real notification triggers here are:
> >     * map - something is mapped which previously wasn't
> >     * unmap - something is no longer mapped which was before
> > 
> > Note that whether the second needs to be triggered depends on the
> > *previous* state of that IOBA range, *not* on the permissions of the
> > new mapping (if any).
> > 
> > A "change" - replacing one mapping with another should count as both a
> > "map" and "unmap" event.
> 
> Yeah... For MAP/UNMAP, it is strange in another way: e.g. for vhost,
> it doesn't care about map/unmap, it cares about invalidated cache.

I think caring about invalidated cache *is* caring about unmap.  It
doesn't matter whether the new mapping is something or nothing - if
the old mapping is no longer valid, you need to invalidate the cache,
yes?

> So
> IIUC this is a question about "naming" but not the implementations...
> I suppose it is really a matter of taste, and both work for me (either
> INVALIDATION/CHANGE or UNMAP/MAP).

No.. it is a question of implementation.  My point is that I don't
think the new permission is sufficient information to let you know if
a notification is necessary.  You need to know if there was an
existing mapping at that IOBA.

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 801 bytes --]

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [Qemu-devel] [PATCH 2/3] memory: add iommu_notify_flag
  2016-09-07  6:41                       ` David Gibson
@ 2016-09-08  9:07                         ` Peter Xu
  2016-09-12  1:26                           ` David Gibson
  0 siblings, 1 reply; 28+ messages in thread
From: Peter Xu @ 2016-09-08  9:07 UTC (permalink / raw)
  To: David Gibson
  Cc: Paolo Bonzini, mst, jasowang, qemu-devel, cornelia.huck,
	alex.williamson, wexu, vkaplans, dgibson

On Wed, Sep 07, 2016 at 04:41:54PM +1000, David Gibson wrote:
> On Wed, Sep 07, 2016 at 02:34:19PM +0800, Peter Xu wrote:
> > On Wed, Sep 07, 2016 at 03:44:19PM +1000, David Gibson wrote:
> > > > For "CHANGE", it sounds like a unmap() + a map(). However I'd say
> > > > "ADDITION" is nowhere better...
> > > 
> > > Right.. this brings up a good point.
> > > 
> > > Changing a mapping (i.e. overwriting an existing mapping with a
> > > different one) would also need notification, even on x86, no?  Since
> > > it implicitly invalidates the previous mapping.
> > > 
> > > I'm guessing the guest will avoid this by always unmapping before it
> > > maps.  We still need to consider this possibility when designing the
> > > notifier interface though.
> > > 
> > > It seems the real notification triggers here are:
> > >     * map - something is mapped which previously wasn't
> > >     * unmap - something is no longer mapped which was before
> > > 
> > > Note that whether the second needs to be triggered depends on the
> > > *previous* state of that IOBA range, *not* on the permissions of the
> > > new mapping (if any).
> > > 
> > > A "change" - replacing one mapping with another should count as both a
> > > "map" and "unmap" event.
> > 
> > Yeah... For MAP/UNMAP, it is strange in another way: e.g. for vhost,
> > it doesn't care about map/unmap, it cares about invalidated cache.
> 
> I think caring about invalidated cache *is* caring about unmap.  It
> doesn't matter whether the new mapping is something or nothing - if
> the old mapping is no longer valid, you need to invalidate the cache,
> yes?

Yes, I think these two are exactly the same in implementation (vhost
needs UNMAP events of course). So that's why I called it "a naming
issue". :)

> 
> > So
> > IIUC this is a question about "naming" but not the implementations...
> > I suppose it is really a matter of taste, and both work for me (either
> > INVALIDATION/CHANGE or UNMAP/MAP).
> 
> No.. it is a question of implementation.  My point is that I don't
> think the new permission is sufficient information to let you know if
> a notification is necessary.  You need to know if there was an
> existing mapping at that IOBA.

My understanding is that we don't need to know that. Because IIUC
there are only map_page() and unmap_page() in guest IOMMU driver
(please check dma_map_ops in kernel). There is no chance for anyone to
"change" the content of the mapping, unless it calls unmap_page() then
with a map_page(). In that case, we'll have two IOTLB invalidation
requests.

Please kindly correct me if I am wrong.

Thanks,

-- peterx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [Qemu-devel] [PATCH 2/3] memory: add iommu_notify_flag
  2016-09-08  9:07                         ` Peter Xu
@ 2016-09-12  1:26                           ` David Gibson
  2016-09-12  5:13                             ` Peter Xu
  0 siblings, 1 reply; 28+ messages in thread
From: David Gibson @ 2016-09-12  1:26 UTC (permalink / raw)
  To: Peter Xu
  Cc: Paolo Bonzini, mst, jasowang, qemu-devel, cornelia.huck,
	alex.williamson, wexu, vkaplans, dgibson

[-- Attachment #1: Type: text/plain, Size: 3294 bytes --]

On Thu, Sep 08, 2016 at 05:07:32PM +0800, Peter Xu wrote:
> On Wed, Sep 07, 2016 at 04:41:54PM +1000, David Gibson wrote:
> > On Wed, Sep 07, 2016 at 02:34:19PM +0800, Peter Xu wrote:
> > > On Wed, Sep 07, 2016 at 03:44:19PM +1000, David Gibson wrote:
> > > > > For "CHANGE", it sounds like a unmap() + a map(). However I'd say
> > > > > "ADDITION" is nowhere better...
> > > > 
> > > > Right.. this brings up a good point.
> > > > 
> > > > Changing a mapping (i.e. overwriting an existing mapping with a
> > > > different one) would also need notification, even on x86, no?  Since
> > > > it implicitly invalidates the previous mapping.
> > > > 
> > > > I'm guessing the guest will avoid this by always unmapping before it
> > > > maps.  We still need to consider this possibility when designing the
> > > > notifier interface though.
> > > > 
> > > > It seems the real notification triggers here are:
> > > >     * map - something is mapped which previously wasn't
> > > >     * unmap - something is no longer mapped which was before
> > > > 
> > > > Note that whether the second needs to be triggered depends on the
> > > > *previous* state of that IOBA range, *not* on the permissions of the
> > > > new mapping (if any).
> > > > 
> > > > A "change" - replacing one mapping with another should count as both a
> > > > "map" and "unmap" event.
> > > 
> > > Yeah... For MAP/UNMAP, it is strange in another way: e.g. for vhost,
> > > it doesn't care about map/unmap, it cares about invalidated cache.
> > 
> > I think caring about invalidated cache *is* caring about unmap.  It
> > doesn't matter whether the new mapping is something or nothing - if
> > the old mapping is no longer valid, you need to invalidate the cache,
> > yes?
> 
> Yes, I think these two are exactly the same in implementation (vhost
> needs UNMAP events of course). So that's why I called it "a naming
> issue". :)
> 
> > 
> > > So
> > > IIUC this is a question about "naming" but not the implementations...
> > > I suppose it is really a matter of taste, and both work for me (either
> > > INVALIDATION/CHANGE or UNMAP/MAP).
> > 
> > No.. it is a question of implementation.  My point is that I don't
> > think the new permission is sufficient information to let you know if
> > a notification is necessary.  You need to know if there was an
> > existing mapping at that IOBA.
> 
> My understanding is that we don't need to know that. Because IIUC
> there are only map_page() and unmap_page() in guest IOMMU driver
> (please check dma_map_ops in kernel). There is no chance for anyone to
> "change" the content of the mapping, unless it calls unmap_page() then
> with a map_page(). In that case, we'll have two IOTLB invalidation
> requests.

That's assuming a Linux guest using the current guest IOMMU model.

I don't think we do so in practice, but the PAPR hypercall interface
allows in-place changing of a mapping.  The interface is just "set
this IOPTE to this value".

> 
> Please kindly correct me if I am wrong.
> 
> Thanks,
> 
> -- peterx
> 

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 801 bytes --]

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [Qemu-devel] [PATCH 2/3] memory: add iommu_notify_flag
  2016-09-12  1:26                           ` David Gibson
@ 2016-09-12  5:13                             ` Peter Xu
  2016-09-14  4:00                               ` David Gibson
  0 siblings, 1 reply; 28+ messages in thread
From: Peter Xu @ 2016-09-12  5:13 UTC (permalink / raw)
  To: David Gibson
  Cc: Paolo Bonzini, mst, jasowang, qemu-devel, cornelia.huck,
	alex.williamson, wexu, vkaplans, dgibson

On Mon, Sep 12, 2016 at 11:26:04AM +1000, David Gibson wrote:
> On Thu, Sep 08, 2016 at 05:07:32PM +0800, Peter Xu wrote:
> > On Wed, Sep 07, 2016 at 04:41:54PM +1000, David Gibson wrote:
> > > On Wed, Sep 07, 2016 at 02:34:19PM +0800, Peter Xu wrote:
> > > > On Wed, Sep 07, 2016 at 03:44:19PM +1000, David Gibson wrote:
> > > > > > For "CHANGE", it sounds like a unmap() + a map(). However I'd say
> > > > > > "ADDITION" is nowhere better...
> > > > > 
> > > > > Right.. this brings up a good point.
> > > > > 
> > > > > Changing a mapping (i.e. overwriting an existing mapping with a
> > > > > different one) would also need notification, even on x86, no?  Since
> > > > > it implicitly invalidates the previous mapping.
> > > > > 
> > > > > I'm guessing the guest will avoid this by always unmapping before it
> > > > > maps.  We still need to consider this possibility when designing the
> > > > > notifier interface though.
> > > > > 
> > > > > It seems the real notification triggers here are:
> > > > >     * map - something is mapped which previously wasn't
> > > > >     * unmap - something is no longer mapped which was before
> > > > > 
> > > > > Note that whether the second needs to be triggered depends on the
> > > > > *previous* state of that IOBA range, *not* on the permissions of the
> > > > > new mapping (if any).
> > > > > 
> > > > > A "change" - replacing one mapping with another should count as both a
> > > > > "map" and "unmap" event.
> > > > 
> > > > Yeah... For MAP/UNMAP, it is strange in another way: e.g. for vhost,
> > > > it doesn't care about map/unmap, it cares about invalidated cache.
> > > 
> > > I think caring about invalidated cache *is* caring about unmap.  It
> > > doesn't matter whether the new mapping is something or nothing - if
> > > the old mapping is no longer valid, you need to invalidate the cache,
> > > yes?
> > 
> > Yes, I think these two are exactly the same in implementation (vhost
> > needs UNMAP events of course). So that's why I called it "a naming
> > issue". :)
> > 
> > > 
> > > > So
> > > > IIUC this is a question about "naming" but not the implementations...
> > > > I suppose it is really a matter of taste, and both work for me (either
> > > > INVALIDATION/CHANGE or UNMAP/MAP).
> > > 
> > > No.. it is a question of implementation.  My point is that I don't
> > > think the new permission is sufficient information to let you know if
> > > a notification is necessary.  You need to know if there was an
> > > existing mapping at that IOBA.
> > 
> > My understanding is that we don't need to know that. Because IIUC
> > there are only map_page() and unmap_page() in guest IOMMU driver
> > (please check dma_map_ops in kernel). There is no chance for anyone to
> > "change" the content of the mapping, unless it calls unmap_page() then
> > with a map_page(). In that case, we'll have two IOTLB invalidation
> > requests.
> 
> That's assuming a Linux guest using the current guest IOMMU model.
> 
> I don't think we do so in practice, but the PAPR hypercall interface
> allows in-place changing of a mapping.  The interface is just "set
> this IOPTE to this value".

I see. Even if so, QEMU IOMMU emulation codes can convert one CHANGE
request into UNMAP and a continuous MAP, right?

Thanks,

-- peterx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [Qemu-devel] [PATCH 2/3] memory: add iommu_notify_flag
  2016-09-12  5:13                             ` Peter Xu
@ 2016-09-14  4:00                               ` David Gibson
  2016-09-14  5:43                                 ` Peter Xu
  0 siblings, 1 reply; 28+ messages in thread
From: David Gibson @ 2016-09-14  4:00 UTC (permalink / raw)
  To: Peter Xu
  Cc: Paolo Bonzini, mst, jasowang, qemu-devel, cornelia.huck,
	alex.williamson, wexu, vkaplans, dgibson

[-- Attachment #1: Type: text/plain, Size: 3834 bytes --]

On Mon, Sep 12, 2016 at 01:13:41PM +0800, Peter Xu wrote:
> On Mon, Sep 12, 2016 at 11:26:04AM +1000, David Gibson wrote:
> > On Thu, Sep 08, 2016 at 05:07:32PM +0800, Peter Xu wrote:
> > > On Wed, Sep 07, 2016 at 04:41:54PM +1000, David Gibson wrote:
> > > > On Wed, Sep 07, 2016 at 02:34:19PM +0800, Peter Xu wrote:
> > > > > On Wed, Sep 07, 2016 at 03:44:19PM +1000, David Gibson wrote:
> > > > > > > For "CHANGE", it sounds like a unmap() + a map(). However I'd say
> > > > > > > "ADDITION" is nowhere better...
> > > > > > 
> > > > > > Right.. this brings up a good point.
> > > > > > 
> > > > > > Changing a mapping (i.e. overwriting an existing mapping with a
> > > > > > different one) would also need notification, even on x86, no?  Since
> > > > > > it implicitly invalidates the previous mapping.
> > > > > > 
> > > > > > I'm guessing the guest will avoid this by always unmapping before it
> > > > > > maps.  We still need to consider this possibility when designing the
> > > > > > notifier interface though.
> > > > > > 
> > > > > > It seems the real notification triggers here are:
> > > > > >     * map - something is mapped which previously wasn't
> > > > > >     * unmap - something is no longer mapped which was before
> > > > > > 
> > > > > > Note that whether the second needs to be triggered depends on the
> > > > > > *previous* state of that IOBA range, *not* on the permissions of the
> > > > > > new mapping (if any).
> > > > > > 
> > > > > > A "change" - replacing one mapping with another should count as both a
> > > > > > "map" and "unmap" event.
> > > > > 
> > > > > Yeah... For MAP/UNMAP, it is strange in another way: e.g. for vhost,
> > > > > it doesn't care about map/unmap, it cares about invalidated cache.
> > > > 
> > > > I think caring about invalidated cache *is* caring about unmap.  It
> > > > doesn't matter whether the new mapping is something or nothing - if
> > > > the old mapping is no longer valid, you need to invalidate the cache,
> > > > yes?
> > > 
> > > Yes, I think these two are exactly the same in implementation (vhost
> > > needs UNMAP events of course). So that's why I called it "a naming
> > > issue". :)
> > > 
> > > > 
> > > > > So
> > > > > IIUC this is a question about "naming" but not the implementations...
> > > > > I suppose it is really a matter of taste, and both work for me (either
> > > > > INVALIDATION/CHANGE or UNMAP/MAP).
> > > > 
> > > > No.. it is a question of implementation.  My point is that I don't
> > > > think the new permission is sufficient information to let you know if
> > > > a notification is necessary.  You need to know if there was an
> > > > existing mapping at that IOBA.
> > > 
> > > My understanding is that we don't need to know that. Because IIUC
> > > there are only map_page() and unmap_page() in guest IOMMU driver
> > > (please check dma_map_ops in kernel). There is no chance for anyone to
> > > "change" the content of the mapping, unless it calls unmap_page() then
> > > with a map_page(). In that case, we'll have two IOTLB invalidation
> > > requests.
> > 
> > That's assuming a Linux guest using the current guest IOMMU model.
> > 
> > I don't think we do so in practice, but the PAPR hypercall interface
> > allows in-place changing of a mapping.  The interface is just "set
> > this IOPTE to this value".
> 
> I see. Even if so, QEMU IOMMU emulation codes can convert one CHANGE
> request into UNMAP and a continuous MAP, right?

Yes, I guess so.  Why is that preferable to issuing a single
notification to both "map" and "unmap" listeners though?

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 801 bytes --]

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [Qemu-devel] [PATCH 2/3] memory: add iommu_notify_flag
  2016-09-14  4:00                               ` David Gibson
@ 2016-09-14  5:43                                 ` Peter Xu
  0 siblings, 0 replies; 28+ messages in thread
From: Peter Xu @ 2016-09-14  5:43 UTC (permalink / raw)
  To: David Gibson
  Cc: Paolo Bonzini, mst, jasowang, qemu-devel, cornelia.huck,
	alex.williamson, wexu, vkaplans, dgibson

On Wed, Sep 14, 2016 at 02:00:29PM +1000, David Gibson wrote:

[...]

> > > > > 
> > > > > > So
> > > > > > IIUC this is a question about "naming" but not the implementations...
> > > > > > I suppose it is really a matter of taste, and both work for me (either
> > > > > > INVALIDATION/CHANGE or UNMAP/MAP).
> > > > > 
> > > > > No.. it is a question of implementation.  My point is that I don't
> > > > > think the new permission is sufficient information to let you know if
> > > > > a notification is necessary.  You need to know if there was an
> > > > > existing mapping at that IOBA.
> > > > 
> > > > My understanding is that we don't need to know that. Because IIUC
> > > > there are only map_page() and unmap_page() in guest IOMMU driver
> > > > (please check dma_map_ops in kernel). There is no chance for anyone to
> > > > "change" the content of the mapping, unless it calls unmap_page() then
> > > > with a map_page(). In that case, we'll have two IOTLB invalidation
> > > > requests.
> > > 
> > > That's assuming a Linux guest using the current guest IOMMU model.
> > > 
> > > I don't think we do so in practice, but the PAPR hypercall interface
> > > allows in-place changing of a mapping.  The interface is just "set
> > > this IOPTE to this value".
> > 
> > I see. Even if so, QEMU IOMMU emulation codes can convert one CHANGE
> > request into UNMAP and a continuous MAP, right?
> 
> Yes, I guess so.  Why is that preferable to issuing a single
> notification to both "map" and "unmap" listeners though?

So I think we should be talking about the same thing here (please
correct me if I am wrong...). Please review v4 of this series and see
whether that works (I renamed CHANGE into MAP, so there will be
MAP/UNMAP, and I think things are clearer with it).

Thanks!

-- peterx

^ permalink raw reply	[flat|nested] 28+ messages in thread

end of thread, other threads:[~2016-09-14  5:43 UTC | newest]

Thread overview: 28+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-09-05  7:21 [Qemu-devel] [PATCH 0/3] memory: add IOMMU notifier type Peter Xu
2016-09-05  7:21 ` [Qemu-devel] [PATCH 1/3] memory: add one flag for IOMMU notifier Peter Xu
2016-09-05  7:21 ` [Qemu-devel] [PATCH 2/3] memory: add iommu_notify_flag Peter Xu
2016-09-05  8:04   ` Paolo Bonzini
2016-09-05  8:38     ` Peter Xu
2016-09-05  9:56       ` Paolo Bonzini
2016-09-06  5:27         ` Peter Xu
2016-09-06  7:51           ` Paolo Bonzini
2016-09-06  8:17             ` Peter Xu
2016-09-06  8:19               ` Paolo Bonzini
2016-09-06 10:31                 ` Peter Xu
2016-09-07  5:44                   ` David Gibson
2016-09-07  6:34                     ` Peter Xu
2016-09-07  6:41                       ` David Gibson
2016-09-08  9:07                         ` Peter Xu
2016-09-12  1:26                           ` David Gibson
2016-09-12  5:13                             ` Peter Xu
2016-09-14  4:00                               ` David Gibson
2016-09-14  5:43                                 ` Peter Xu
2016-09-06  5:18       ` David Gibson
2016-09-06  5:55         ` Peter Xu
2016-09-06  5:12   ` David Gibson
2016-09-06  5:33     ` Peter Xu
2016-09-05  7:21 ` [Qemu-devel] [PATCH 3/3] intel_iommu: allow IOMMU_NONE typed notifiers Peter Xu
2016-09-06  5:06 ` [Qemu-devel] [PATCH 0/3] memory: add IOMMU notifier type David Gibson
2016-09-06  5:49   ` Peter Xu
2016-09-06  6:26     ` Jason Wang
2016-09-07  4:38       ` David Gibson

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.