All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/8] iommu/iova: Implement deferred flush queue
@ 2017-08-11 14:41 Joerg Roedel
  2017-08-11 14:41 ` [PATCH 1/8] iommu/iova: Add flush-queue data structures Joerg Roedel
                   ` (7 more replies)
  0 siblings, 8 replies; 12+ messages in thread
From: Joerg Roedel @ 2017-08-11 14:41 UTC (permalink / raw)
  To: iommu; +Cc: Robin Murphy, dwmw2, Will Deacon, linux-kernel, Joerg Roedel

Hi,

here is an implementation of deferred flush queues in the
common iova code. Some users of the iova code already
implement their own flush-queues, like the AMD-Vi and
Intel VT-d drivers.

The implementation adds a per-cpu ring-buffer to the
iova_domain structure and adds code to maintain that
ring-buffer.

A timer is also used to make sure pending entries in the
ring-buffer are flushed regularily, even when the
ring-buffers don't fill up.

The AMD and Intel IOMMU drivers are converted to make use of
this code and their own implementations for deferred
flushing are removed.

The code has been tested on an AMD and an Intel machine with
network and disk IO.

Please review.

Regards,

	Joerg

Joerg Roedel (8):
  iommu/iova: Add flush-queue data structures
  iommu/iova: Implement Flush-Queue ring buffer
  iommu/iova: Add flush counters to Flush-Queue implementation
  iommu/iova: Add locking to Flush-Queues
  iommu/iova: Add flush timer
  iommu/amd: Make use of iova queue flushing
  iommu/vt-d: Allow to flush more than 4GB of device TLBs
  iommu/vt-d: Make use of iova deferred flushing

 drivers/iommu/amd_iommu.c   | 229 ++------------------------------------------
 drivers/iommu/dmar.c        |   2 +-
 drivers/iommu/intel-iommu.c | 197 ++++++++-----------------------------
 drivers/iommu/iova.c        | 183 +++++++++++++++++++++++++++++++++++
 include/linux/iova.h        |  67 +++++++++++++
 5 files changed, 298 insertions(+), 380 deletions(-)

-- 
2.7.4

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 1/8] iommu/iova: Add flush-queue data structures
  2017-08-11 14:41 [PATCH 0/8] iommu/iova: Implement deferred flush queue Joerg Roedel
@ 2017-08-11 14:41 ` Joerg Roedel
  2017-08-11 14:41   ` Joerg Roedel
                   ` (6 subsequent siblings)
  7 siblings, 0 replies; 12+ messages in thread
From: Joerg Roedel @ 2017-08-11 14:41 UTC (permalink / raw)
  To: iommu; +Cc: Robin Murphy, dwmw2, Will Deacon, linux-kernel, Joerg Roedel

From: Joerg Roedel <jroedel@suse.de>

This patch adds the basic data-structures to implement
flush-queues in the generic IOVA code. It also adds the
initialization and destroy routines for these data
structures.

The initialization routine is designed so that the use of
this feature is optional for the users of IOVA code.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/iova.c | 39 +++++++++++++++++++++++++++++++++++++++
 include/linux/iova.h | 41 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 80 insertions(+)

diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
index 246f14c..b9f6ce0 100644
--- a/drivers/iommu/iova.c
+++ b/drivers/iommu/iova.c
@@ -50,10 +50,48 @@ init_iova_domain(struct iova_domain *iovad, unsigned long granule,
 	iovad->granule = granule;
 	iovad->start_pfn = start_pfn;
 	iovad->dma_32bit_pfn = pfn_32bit + 1;
+	iovad->flush_cb = NULL;
+	iovad->fq = NULL;
 	init_iova_rcaches(iovad);
 }
 EXPORT_SYMBOL_GPL(init_iova_domain);
 
+static void free_iova_flush_queue(struct iova_domain *iovad)
+{
+	if (!iovad->fq)
+		return;
+
+	free_percpu(iovad->fq);
+
+	iovad->fq         = NULL;
+	iovad->flush_cb   = NULL;
+	iovad->entry_dtor = NULL;
+}
+
+int init_iova_flush_queue(struct iova_domain *iovad,
+			  iova_flush_cb flush_cb, iova_entry_dtor entry_dtor)
+{
+	int cpu;
+
+	iovad->fq = alloc_percpu(struct iova_fq);
+	if (!iovad->fq)
+		return -ENOMEM;
+
+	iovad->flush_cb   = flush_cb;
+	iovad->entry_dtor = entry_dtor;
+
+	for_each_possible_cpu(cpu) {
+		struct iova_fq *fq;
+
+		fq = per_cpu_ptr(iovad->fq, cpu);
+		fq->head = 0;
+		fq->tail = 0;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(init_iova_flush_queue);
+
 static struct rb_node *
 __get_cached_rbnode(struct iova_domain *iovad, unsigned long *limit_pfn)
 {
@@ -433,6 +471,7 @@ void put_iova_domain(struct iova_domain *iovad)
 	struct rb_node *node;
 	unsigned long flags;
 
+	free_iova_flush_queue(iovad);
 	free_iova_rcaches(iovad);
 	spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
 	node = rb_first(&iovad->rbroot);
diff --git a/include/linux/iova.h b/include/linux/iova.h
index e0a892a..8aa1089 100644
--- a/include/linux/iova.h
+++ b/include/linux/iova.h
@@ -36,6 +36,30 @@ struct iova_rcache {
 	struct iova_cpu_rcache __percpu *cpu_rcaches;
 };
 
+struct iova_domain;
+
+/* Call-Back from IOVA code into IOMMU drivers */
+typedef void (* iova_flush_cb)(struct iova_domain *domain);
+
+/* Destructor for per-entry data */
+typedef void (* iova_entry_dtor)(unsigned long data);
+
+/* Number of entries per Flush Queue */
+#define IOVA_FQ_SIZE	256
+
+/* Flush Queue entry for defered flushing */
+struct iova_fq_entry {
+	unsigned long iova_pfn;
+	unsigned long pages;
+	unsigned long data;
+};
+
+/* Per-CPU Flush Queue structure */
+struct iova_fq {
+	struct iova_fq_entry entries[IOVA_FQ_SIZE];
+	unsigned head, tail;
+};
+
 /* holds all the iova translations for a domain */
 struct iova_domain {
 	spinlock_t	iova_rbtree_lock; /* Lock to protect update of rbtree */
@@ -45,6 +69,14 @@ struct iova_domain {
 	unsigned long	start_pfn;	/* Lower limit for this domain */
 	unsigned long	dma_32bit_pfn;
 	struct iova_rcache rcaches[IOVA_RANGE_CACHE_MAX_SIZE];	/* IOVA range caches */
+
+	iova_flush_cb	flush_cb;	/* Call-Back function to flush IOMMU
+					   TLBs */
+
+	iova_entry_dtor entry_dtor;	/* IOMMU driver specific destructor for
+					   iova entry */
+
+	struct iova_fq __percpu *fq;	/* Flush Queue */
 };
 
 static inline unsigned long iova_size(struct iova *iova)
@@ -102,6 +134,8 @@ struct iova *reserve_iova(struct iova_domain *iovad, unsigned long pfn_lo,
 void copy_reserved_iova(struct iova_domain *from, struct iova_domain *to);
 void init_iova_domain(struct iova_domain *iovad, unsigned long granule,
 	unsigned long start_pfn, unsigned long pfn_32bit);
+int init_iova_flush_queue(struct iova_domain *iovad,
+			  iova_flush_cb flush_cb, iova_entry_dtor entry_dtor);
 struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn);
 void put_iova_domain(struct iova_domain *iovad);
 struct iova *split_and_remove_iova(struct iova_domain *iovad,
@@ -174,6 +208,13 @@ static inline void init_iova_domain(struct iova_domain *iovad,
 {
 }
 
+static inline int init_iova_flush_queue(struct iova_domain *iovad,
+					iova_flush_cb flush_cb,
+					iova_entry_dtor entry_dtor)
+{
+	return -ENODEV;
+}
+
 static inline struct iova *find_iova(struct iova_domain *iovad,
 				     unsigned long pfn)
 {
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 2/8] iommu/iova: Implement Flush-Queue ring buffer
@ 2017-08-11 14:41   ` Joerg Roedel
  0 siblings, 0 replies; 12+ messages in thread
From: Joerg Roedel @ 2017-08-11 14:41 UTC (permalink / raw)
  To: iommu; +Cc: Robin Murphy, dwmw2, Will Deacon, linux-kernel, Joerg Roedel

From: Joerg Roedel <jroedel@suse.de>

Add a function to add entries to the Flush-Queue ring
buffer. If the buffer is full, call the flush-callback and
free the entries.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/iova.c | 80 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/iova.h |  9 ++++++
 2 files changed, 89 insertions(+)

diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
index b9f6ce0..e5c9a7a 100644
--- a/drivers/iommu/iova.c
+++ b/drivers/iommu/iova.c
@@ -32,6 +32,7 @@ static unsigned long iova_rcache_get(struct iova_domain *iovad,
 				     unsigned long limit_pfn);
 static void init_iova_rcaches(struct iova_domain *iovad);
 static void free_iova_rcaches(struct iova_domain *iovad);
+static void fq_destroy_all_entries(struct iova_domain *iovad);
 
 void
 init_iova_domain(struct iova_domain *iovad, unsigned long granule,
@@ -61,6 +62,7 @@ static void free_iova_flush_queue(struct iova_domain *iovad)
 	if (!iovad->fq)
 		return;
 
+	fq_destroy_all_entries(iovad);
 	free_percpu(iovad->fq);
 
 	iovad->fq         = NULL;
@@ -461,6 +463,84 @@ free_iova_fast(struct iova_domain *iovad, unsigned long pfn, unsigned long size)
 }
 EXPORT_SYMBOL_GPL(free_iova_fast);
 
+#define fq_ring_for_each(i, fq) \
+	for ((i) = (fq)->head; (i) != (fq)->tail; (i) = ((i) + 1) % IOVA_FQ_SIZE)
+
+static inline bool fq_full(struct iova_fq *fq)
+{
+	return (((fq->tail + 1) % IOVA_FQ_SIZE) == fq->head);
+}
+
+static inline unsigned fq_ring_add(struct iova_fq *fq)
+{
+	unsigned idx = fq->tail;
+
+	fq->tail = (idx + 1) % IOVA_FQ_SIZE;
+
+	return idx;
+}
+
+static void fq_ring_free(struct iova_domain *iovad, struct iova_fq *fq)
+{
+	unsigned idx;
+
+	fq_ring_for_each(idx, fq) {
+
+		if (iovad->entry_dtor)
+			iovad->entry_dtor(fq->entries[idx].data);
+
+		free_iova_fast(iovad,
+			       fq->entries[idx].iova_pfn,
+			       fq->entries[idx].pages);
+	}
+
+	fq->head = 0;
+	fq->tail = 0;
+}
+
+static void fq_destroy_all_entries(struct iova_domain *iovad)
+{
+	int cpu;
+
+	/*
+	 * This code runs when the iova_domain is being detroyed, so don't
+	 * bother to free iovas, just call the entry_dtor on all remaining
+	 * entries.
+	 */
+	if (!iovad->entry_dtor)
+		return;
+
+	for_each_possible_cpu(cpu) {
+		struct iova_fq *fq = per_cpu_ptr(iovad->fq, cpu);
+		int idx;
+
+		fq_ring_for_each(idx, fq)
+			iovad->entry_dtor(fq->entries[idx].data);
+	}
+}
+
+void queue_iova(struct iova_domain *iovad,
+		unsigned long pfn, unsigned long pages,
+		unsigned long data)
+{
+	struct iova_fq *fq = get_cpu_ptr(iovad->fq);
+	unsigned idx;
+
+	if (fq_full(fq)) {
+		iovad->flush_cb(iovad);
+		fq_ring_free(iovad, fq);
+	}
+
+	idx = fq_ring_add(fq);
+
+	fq->entries[idx].iova_pfn = pfn;
+	fq->entries[idx].pages    = pages;
+	fq->entries[idx].data     = data;
+
+	put_cpu_ptr(iovad->fq);
+}
+EXPORT_SYMBOL_GPL(queue_iova);
+
 /**
  * put_iova_domain - destroys the iova doamin
  * @iovad: - iova domain in question.
diff --git a/include/linux/iova.h b/include/linux/iova.h
index 8aa1089..1ae8524 100644
--- a/include/linux/iova.h
+++ b/include/linux/iova.h
@@ -127,6 +127,9 @@ struct iova *alloc_iova(struct iova_domain *iovad, unsigned long size,
 	bool size_aligned);
 void free_iova_fast(struct iova_domain *iovad, unsigned long pfn,
 		    unsigned long size);
+void queue_iova(struct iova_domain *iovad,
+		unsigned long pfn, unsigned long pages,
+		unsigned long data);
 unsigned long alloc_iova_fast(struct iova_domain *iovad, unsigned long size,
 			      unsigned long limit_pfn);
 struct iova *reserve_iova(struct iova_domain *iovad, unsigned long pfn_lo,
@@ -182,6 +185,12 @@ static inline void free_iova_fast(struct iova_domain *iovad,
 {
 }
 
+static inline void queue_iova(struct iova_domain *iovad,
+			      unsigned long pfn, unsigned long pages,
+			      unsigned long data)
+{
+}
+
 static inline unsigned long alloc_iova_fast(struct iova_domain *iovad,
 					    unsigned long size,
 					    unsigned long limit_pfn)
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 2/8] iommu/iova: Implement Flush-Queue ring buffer
@ 2017-08-11 14:41   ` Joerg Roedel
  0 siblings, 0 replies; 12+ messages in thread
From: Joerg Roedel @ 2017-08-11 14:41 UTC (permalink / raw)
  To: iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA
  Cc: dwmw2-wEGCiKHe2LqWVfeAwA7xHQ, Joerg Roedel,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, Will Deacon

From: Joerg Roedel <jroedel-l3A5Bk7waGM@public.gmane.org>

Add a function to add entries to the Flush-Queue ring
buffer. If the buffer is full, call the flush-callback and
free the entries.

Signed-off-by: Joerg Roedel <jroedel-l3A5Bk7waGM@public.gmane.org>
---
 drivers/iommu/iova.c | 80 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/iova.h |  9 ++++++
 2 files changed, 89 insertions(+)

diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
index b9f6ce0..e5c9a7a 100644
--- a/drivers/iommu/iova.c
+++ b/drivers/iommu/iova.c
@@ -32,6 +32,7 @@ static unsigned long iova_rcache_get(struct iova_domain *iovad,
 				     unsigned long limit_pfn);
 static void init_iova_rcaches(struct iova_domain *iovad);
 static void free_iova_rcaches(struct iova_domain *iovad);
+static void fq_destroy_all_entries(struct iova_domain *iovad);
 
 void
 init_iova_domain(struct iova_domain *iovad, unsigned long granule,
@@ -61,6 +62,7 @@ static void free_iova_flush_queue(struct iova_domain *iovad)
 	if (!iovad->fq)
 		return;
 
+	fq_destroy_all_entries(iovad);
 	free_percpu(iovad->fq);
 
 	iovad->fq         = NULL;
@@ -461,6 +463,84 @@ free_iova_fast(struct iova_domain *iovad, unsigned long pfn, unsigned long size)
 }
 EXPORT_SYMBOL_GPL(free_iova_fast);
 
+#define fq_ring_for_each(i, fq) \
+	for ((i) = (fq)->head; (i) != (fq)->tail; (i) = ((i) + 1) % IOVA_FQ_SIZE)
+
+static inline bool fq_full(struct iova_fq *fq)
+{
+	return (((fq->tail + 1) % IOVA_FQ_SIZE) == fq->head);
+}
+
+static inline unsigned fq_ring_add(struct iova_fq *fq)
+{
+	unsigned idx = fq->tail;
+
+	fq->tail = (idx + 1) % IOVA_FQ_SIZE;
+
+	return idx;
+}
+
+static void fq_ring_free(struct iova_domain *iovad, struct iova_fq *fq)
+{
+	unsigned idx;
+
+	fq_ring_for_each(idx, fq) {
+
+		if (iovad->entry_dtor)
+			iovad->entry_dtor(fq->entries[idx].data);
+
+		free_iova_fast(iovad,
+			       fq->entries[idx].iova_pfn,
+			       fq->entries[idx].pages);
+	}
+
+	fq->head = 0;
+	fq->tail = 0;
+}
+
+static void fq_destroy_all_entries(struct iova_domain *iovad)
+{
+	int cpu;
+
+	/*
+	 * This code runs when the iova_domain is being detroyed, so don't
+	 * bother to free iovas, just call the entry_dtor on all remaining
+	 * entries.
+	 */
+	if (!iovad->entry_dtor)
+		return;
+
+	for_each_possible_cpu(cpu) {
+		struct iova_fq *fq = per_cpu_ptr(iovad->fq, cpu);
+		int idx;
+
+		fq_ring_for_each(idx, fq)
+			iovad->entry_dtor(fq->entries[idx].data);
+	}
+}
+
+void queue_iova(struct iova_domain *iovad,
+		unsigned long pfn, unsigned long pages,
+		unsigned long data)
+{
+	struct iova_fq *fq = get_cpu_ptr(iovad->fq);
+	unsigned idx;
+
+	if (fq_full(fq)) {
+		iovad->flush_cb(iovad);
+		fq_ring_free(iovad, fq);
+	}
+
+	idx = fq_ring_add(fq);
+
+	fq->entries[idx].iova_pfn = pfn;
+	fq->entries[idx].pages    = pages;
+	fq->entries[idx].data     = data;
+
+	put_cpu_ptr(iovad->fq);
+}
+EXPORT_SYMBOL_GPL(queue_iova);
+
 /**
  * put_iova_domain - destroys the iova doamin
  * @iovad: - iova domain in question.
diff --git a/include/linux/iova.h b/include/linux/iova.h
index 8aa1089..1ae8524 100644
--- a/include/linux/iova.h
+++ b/include/linux/iova.h
@@ -127,6 +127,9 @@ struct iova *alloc_iova(struct iova_domain *iovad, unsigned long size,
 	bool size_aligned);
 void free_iova_fast(struct iova_domain *iovad, unsigned long pfn,
 		    unsigned long size);
+void queue_iova(struct iova_domain *iovad,
+		unsigned long pfn, unsigned long pages,
+		unsigned long data);
 unsigned long alloc_iova_fast(struct iova_domain *iovad, unsigned long size,
 			      unsigned long limit_pfn);
 struct iova *reserve_iova(struct iova_domain *iovad, unsigned long pfn_lo,
@@ -182,6 +185,12 @@ static inline void free_iova_fast(struct iova_domain *iovad,
 {
 }
 
+static inline void queue_iova(struct iova_domain *iovad,
+			      unsigned long pfn, unsigned long pages,
+			      unsigned long data)
+{
+}
+
 static inline unsigned long alloc_iova_fast(struct iova_domain *iovad,
 					    unsigned long size,
 					    unsigned long limit_pfn)
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 3/8] iommu/iova: Add flush counters to Flush-Queue implementation
@ 2017-08-11 14:41   ` Joerg Roedel
  0 siblings, 0 replies; 12+ messages in thread
From: Joerg Roedel @ 2017-08-11 14:41 UTC (permalink / raw)
  To: iommu; +Cc: Robin Murphy, dwmw2, Will Deacon, linux-kernel, Joerg Roedel

From: Joerg Roedel <jroedel@suse.de>

There are two counters:

	* fq_flush_start_cnt  - Increased when a TLB flush
	                        is started.

	* fq_flush_finish_cnt - Increased when a TLB flush
				is finished.

The fq_flush_start_cnt is assigned to every Flush-Queue
entry on its creation. When freeing entries from the
Flush-Queue, the value in the entry is compared to the
fq_flush_finish_cnt. The entry can only be freed when its
value is less than the value of fq_flush_finish_cnt.

The reason for these counters it to take advantage of IOMMU
TLB flushes that happened on other CPUs. These already
flushed the TLB for Flush-Queue entries on other CPUs so
that they can already be freed without flushing the TLB
again.

This makes it less likely that the Flush-Queue is full and
saves IOMMU TLB flushes.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/iova.c | 27 ++++++++++++++++++++++++---
 include/linux/iova.h |  8 ++++++++
 2 files changed, 32 insertions(+), 3 deletions(-)

diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
index e5c9a7a..47b144e 100644
--- a/drivers/iommu/iova.c
+++ b/drivers/iommu/iova.c
@@ -75,6 +75,9 @@ int init_iova_flush_queue(struct iova_domain *iovad,
 {
 	int cpu;
 
+	atomic64_set(&iovad->fq_flush_start_cnt,  0);
+	atomic64_set(&iovad->fq_flush_finish_cnt, 0);
+
 	iovad->fq = alloc_percpu(struct iova_fq);
 	if (!iovad->fq)
 		return -ENOMEM;
@@ -482,20 +485,30 @@ static inline unsigned fq_ring_add(struct iova_fq *fq)
 
 static void fq_ring_free(struct iova_domain *iovad, struct iova_fq *fq)
 {
+	u64 counter = atomic64_read(&iovad->fq_flush_finish_cnt);
 	unsigned idx;
 
 	fq_ring_for_each(idx, fq) {
 
+		if (fq->entries[idx].counter >= counter)
+			break;
+
 		if (iovad->entry_dtor)
 			iovad->entry_dtor(fq->entries[idx].data);
 
 		free_iova_fast(iovad,
 			       fq->entries[idx].iova_pfn,
 			       fq->entries[idx].pages);
+
+		fq->head = (fq->head + 1) % IOVA_FQ_SIZE;
 	}
+}
 
-	fq->head = 0;
-	fq->tail = 0;
+static void iova_domain_flush(struct iova_domain *iovad)
+{
+	atomic64_inc(&iovad->fq_flush_start_cnt);
+	iovad->flush_cb(iovad);
+	atomic64_inc(&iovad->fq_flush_finish_cnt);
 }
 
 static void fq_destroy_all_entries(struct iova_domain *iovad)
@@ -526,8 +539,15 @@ void queue_iova(struct iova_domain *iovad,
 	struct iova_fq *fq = get_cpu_ptr(iovad->fq);
 	unsigned idx;
 
+	/*
+	 * First remove all entries from the flush queue that have already been
+	 * flushed out on another CPU. This makes the fq_full() check below less
+	 * likely to be true.
+	 */
+	fq_ring_free(iovad, fq);
+
 	if (fq_full(fq)) {
-		iovad->flush_cb(iovad);
+		iova_domain_flush(iovad);
 		fq_ring_free(iovad, fq);
 	}
 
@@ -536,6 +556,7 @@ void queue_iova(struct iova_domain *iovad,
 	fq->entries[idx].iova_pfn = pfn;
 	fq->entries[idx].pages    = pages;
 	fq->entries[idx].data     = data;
+	fq->entries[idx].counter  = atomic64_read(&iovad->fq_flush_start_cnt);
 
 	put_cpu_ptr(iovad->fq);
 }
diff --git a/include/linux/iova.h b/include/linux/iova.h
index 1ae8524..985b800 100644
--- a/include/linux/iova.h
+++ b/include/linux/iova.h
@@ -14,6 +14,7 @@
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/rbtree.h>
+#include <linux/atomic.h>
 #include <linux/dma-mapping.h>
 
 /* iova structure */
@@ -52,6 +53,7 @@ struct iova_fq_entry {
 	unsigned long iova_pfn;
 	unsigned long pages;
 	unsigned long data;
+	u64 counter; /* Flush counter when this entrie was added */
 };
 
 /* Per-CPU Flush Queue structure */
@@ -77,6 +79,12 @@ struct iova_domain {
 					   iova entry */
 
 	struct iova_fq __percpu *fq;	/* Flush Queue */
+
+	atomic64_t	fq_flush_start_cnt;	/* Number of TLB flushes that
+						   have been started */
+
+	atomic64_t	fq_flush_finish_cnt;	/* Number of TLB flushes that
+						   have been finished */
 };
 
 static inline unsigned long iova_size(struct iova *iova)
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 3/8] iommu/iova: Add flush counters to Flush-Queue implementation
@ 2017-08-11 14:41   ` Joerg Roedel
  0 siblings, 0 replies; 12+ messages in thread
From: Joerg Roedel @ 2017-08-11 14:41 UTC (permalink / raw)
  To: iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA
  Cc: dwmw2-wEGCiKHe2LqWVfeAwA7xHQ, Joerg Roedel,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, Will Deacon

From: Joerg Roedel <jroedel-l3A5Bk7waGM@public.gmane.org>

There are two counters:

	* fq_flush_start_cnt  - Increased when a TLB flush
	                        is started.

	* fq_flush_finish_cnt - Increased when a TLB flush
				is finished.

The fq_flush_start_cnt is assigned to every Flush-Queue
entry on its creation. When freeing entries from the
Flush-Queue, the value in the entry is compared to the
fq_flush_finish_cnt. The entry can only be freed when its
value is less than the value of fq_flush_finish_cnt.

The reason for these counters it to take advantage of IOMMU
TLB flushes that happened on other CPUs. These already
flushed the TLB for Flush-Queue entries on other CPUs so
that they can already be freed without flushing the TLB
again.

This makes it less likely that the Flush-Queue is full and
saves IOMMU TLB flushes.

Signed-off-by: Joerg Roedel <jroedel-l3A5Bk7waGM@public.gmane.org>
---
 drivers/iommu/iova.c | 27 ++++++++++++++++++++++++---
 include/linux/iova.h |  8 ++++++++
 2 files changed, 32 insertions(+), 3 deletions(-)

diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
index e5c9a7a..47b144e 100644
--- a/drivers/iommu/iova.c
+++ b/drivers/iommu/iova.c
@@ -75,6 +75,9 @@ int init_iova_flush_queue(struct iova_domain *iovad,
 {
 	int cpu;
 
+	atomic64_set(&iovad->fq_flush_start_cnt,  0);
+	atomic64_set(&iovad->fq_flush_finish_cnt, 0);
+
 	iovad->fq = alloc_percpu(struct iova_fq);
 	if (!iovad->fq)
 		return -ENOMEM;
@@ -482,20 +485,30 @@ static inline unsigned fq_ring_add(struct iova_fq *fq)
 
 static void fq_ring_free(struct iova_domain *iovad, struct iova_fq *fq)
 {
+	u64 counter = atomic64_read(&iovad->fq_flush_finish_cnt);
 	unsigned idx;
 
 	fq_ring_for_each(idx, fq) {
 
+		if (fq->entries[idx].counter >= counter)
+			break;
+
 		if (iovad->entry_dtor)
 			iovad->entry_dtor(fq->entries[idx].data);
 
 		free_iova_fast(iovad,
 			       fq->entries[idx].iova_pfn,
 			       fq->entries[idx].pages);
+
+		fq->head = (fq->head + 1) % IOVA_FQ_SIZE;
 	}
+}
 
-	fq->head = 0;
-	fq->tail = 0;
+static void iova_domain_flush(struct iova_domain *iovad)
+{
+	atomic64_inc(&iovad->fq_flush_start_cnt);
+	iovad->flush_cb(iovad);
+	atomic64_inc(&iovad->fq_flush_finish_cnt);
 }
 
 static void fq_destroy_all_entries(struct iova_domain *iovad)
@@ -526,8 +539,15 @@ void queue_iova(struct iova_domain *iovad,
 	struct iova_fq *fq = get_cpu_ptr(iovad->fq);
 	unsigned idx;
 
+	/*
+	 * First remove all entries from the flush queue that have already been
+	 * flushed out on another CPU. This makes the fq_full() check below less
+	 * likely to be true.
+	 */
+	fq_ring_free(iovad, fq);
+
 	if (fq_full(fq)) {
-		iovad->flush_cb(iovad);
+		iova_domain_flush(iovad);
 		fq_ring_free(iovad, fq);
 	}
 
@@ -536,6 +556,7 @@ void queue_iova(struct iova_domain *iovad,
 	fq->entries[idx].iova_pfn = pfn;
 	fq->entries[idx].pages    = pages;
 	fq->entries[idx].data     = data;
+	fq->entries[idx].counter  = atomic64_read(&iovad->fq_flush_start_cnt);
 
 	put_cpu_ptr(iovad->fq);
 }
diff --git a/include/linux/iova.h b/include/linux/iova.h
index 1ae8524..985b800 100644
--- a/include/linux/iova.h
+++ b/include/linux/iova.h
@@ -14,6 +14,7 @@
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/rbtree.h>
+#include <linux/atomic.h>
 #include <linux/dma-mapping.h>
 
 /* iova structure */
@@ -52,6 +53,7 @@ struct iova_fq_entry {
 	unsigned long iova_pfn;
 	unsigned long pages;
 	unsigned long data;
+	u64 counter; /* Flush counter when this entrie was added */
 };
 
 /* Per-CPU Flush Queue structure */
@@ -77,6 +79,12 @@ struct iova_domain {
 					   iova entry */
 
 	struct iova_fq __percpu *fq;	/* Flush Queue */
+
+	atomic64_t	fq_flush_start_cnt;	/* Number of TLB flushes that
+						   have been started */
+
+	atomic64_t	fq_flush_finish_cnt;	/* Number of TLB flushes that
+						   have been finished */
 };
 
 static inline unsigned long iova_size(struct iova *iova)
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 4/8] iommu/iova: Add locking to Flush-Queues
  2017-08-11 14:41 [PATCH 0/8] iommu/iova: Implement deferred flush queue Joerg Roedel
                   ` (2 preceding siblings ...)
  2017-08-11 14:41   ` Joerg Roedel
@ 2017-08-11 14:41 ` Joerg Roedel
  2017-08-11 14:41   ` Joerg Roedel
                   ` (3 subsequent siblings)
  7 siblings, 0 replies; 12+ messages in thread
From: Joerg Roedel @ 2017-08-11 14:41 UTC (permalink / raw)
  To: iommu; +Cc: Robin Murphy, dwmw2, Will Deacon, linux-kernel, Joerg Roedel

From: Joerg Roedel <jroedel@suse.de>

The lock is taken from the same CPU most of the time. But
having it allows to flush the queue also from another CPU if
necessary.

This will be used by a timer to regularily flush any pending
IOVAs from the Flush-Queues.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/iova.c | 11 +++++++++++
 include/linux/iova.h |  1 +
 2 files changed, 12 insertions(+)

diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
index 47b144e..749d395 100644
--- a/drivers/iommu/iova.c
+++ b/drivers/iommu/iova.c
@@ -91,6 +91,8 @@ int init_iova_flush_queue(struct iova_domain *iovad,
 		fq = per_cpu_ptr(iovad->fq, cpu);
 		fq->head = 0;
 		fq->tail = 0;
+
+		spin_lock_init(&fq->lock);
 	}
 
 	return 0;
@@ -471,6 +473,7 @@ EXPORT_SYMBOL_GPL(free_iova_fast);
 
 static inline bool fq_full(struct iova_fq *fq)
 {
+	assert_spin_locked(&fq->lock);
 	return (((fq->tail + 1) % IOVA_FQ_SIZE) == fq->head);
 }
 
@@ -478,6 +481,8 @@ static inline unsigned fq_ring_add(struct iova_fq *fq)
 {
 	unsigned idx = fq->tail;
 
+	assert_spin_locked(&fq->lock);
+
 	fq->tail = (idx + 1) % IOVA_FQ_SIZE;
 
 	return idx;
@@ -488,6 +493,8 @@ static void fq_ring_free(struct iova_domain *iovad, struct iova_fq *fq)
 	u64 counter = atomic64_read(&iovad->fq_flush_finish_cnt);
 	unsigned idx;
 
+	assert_spin_locked(&fq->lock);
+
 	fq_ring_for_each(idx, fq) {
 
 		if (fq->entries[idx].counter >= counter)
@@ -537,8 +544,11 @@ void queue_iova(struct iova_domain *iovad,
 		unsigned long data)
 {
 	struct iova_fq *fq = get_cpu_ptr(iovad->fq);
+	unsigned long flags;
 	unsigned idx;
 
+	spin_lock_irqsave(&fq->lock, flags);
+
 	/*
 	 * First remove all entries from the flush queue that have already been
 	 * flushed out on another CPU. This makes the fq_full() check below less
@@ -558,6 +568,7 @@ void queue_iova(struct iova_domain *iovad,
 	fq->entries[idx].data     = data;
 	fq->entries[idx].counter  = atomic64_read(&iovad->fq_flush_start_cnt);
 
+	spin_unlock_irqrestore(&fq->lock, flags);
 	put_cpu_ptr(iovad->fq);
 }
 EXPORT_SYMBOL_GPL(queue_iova);
diff --git a/include/linux/iova.h b/include/linux/iova.h
index 985b800..913a690 100644
--- a/include/linux/iova.h
+++ b/include/linux/iova.h
@@ -60,6 +60,7 @@ struct iova_fq_entry {
 struct iova_fq {
 	struct iova_fq_entry entries[IOVA_FQ_SIZE];
 	unsigned head, tail;
+	spinlock_t lock;
 };
 
 /* holds all the iova translations for a domain */
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 5/8] iommu/iova: Add flush timer
@ 2017-08-11 14:41   ` Joerg Roedel
  0 siblings, 0 replies; 12+ messages in thread
From: Joerg Roedel @ 2017-08-11 14:41 UTC (permalink / raw)
  To: iommu; +Cc: Robin Murphy, dwmw2, Will Deacon, linux-kernel, Joerg Roedel

From: Joerg Roedel <jroedel@suse.de>

Add a timer to flush entries from the Flush-Queues every
10ms. This makes sure that no stale TLB entries remain for
too long after an IOVA has been unmapped.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/iova.c | 32 ++++++++++++++++++++++++++++++++
 include/linux/iova.h |  8 ++++++++
 2 files changed, 40 insertions(+)

diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
index 749d395..33edfa7 100644
--- a/drivers/iommu/iova.c
+++ b/drivers/iommu/iova.c
@@ -33,6 +33,7 @@ static unsigned long iova_rcache_get(struct iova_domain *iovad,
 static void init_iova_rcaches(struct iova_domain *iovad);
 static void free_iova_rcaches(struct iova_domain *iovad);
 static void fq_destroy_all_entries(struct iova_domain *iovad);
+static void fq_flush_timeout(unsigned long data);
 
 void
 init_iova_domain(struct iova_domain *iovad, unsigned long granule,
@@ -62,7 +63,11 @@ static void free_iova_flush_queue(struct iova_domain *iovad)
 	if (!iovad->fq)
 		return;
 
+	if (timer_pending(&iovad->fq_timer))
+		del_timer(&iovad->fq_timer);
+
 	fq_destroy_all_entries(iovad);
+
 	free_percpu(iovad->fq);
 
 	iovad->fq         = NULL;
@@ -95,6 +100,9 @@ int init_iova_flush_queue(struct iova_domain *iovad,
 		spin_lock_init(&fq->lock);
 	}
 
+	setup_timer(&iovad->fq_timer, fq_flush_timeout, (unsigned long)iovad);
+	atomic_set(&iovad->fq_timer_on, 0);
+
 	return 0;
 }
 EXPORT_SYMBOL_GPL(init_iova_flush_queue);
@@ -539,6 +547,25 @@ static void fq_destroy_all_entries(struct iova_domain *iovad)
 	}
 }
 
+static void fq_flush_timeout(unsigned long data)
+{
+	struct iova_domain *iovad = (struct iova_domain *)data;
+	int cpu;
+
+	atomic_set(&iovad->fq_timer_on, 0);
+	iova_domain_flush(iovad);
+
+	for_each_possible_cpu(cpu) {
+		unsigned long flags;
+		struct iova_fq *fq;
+
+		fq = per_cpu_ptr(iovad->fq, cpu);
+		spin_lock_irqsave(&fq->lock, flags);
+		fq_ring_free(iovad, fq);
+		spin_unlock_irqrestore(&fq->lock, flags);
+	}
+}
+
 void queue_iova(struct iova_domain *iovad,
 		unsigned long pfn, unsigned long pages,
 		unsigned long data)
@@ -569,6 +596,11 @@ void queue_iova(struct iova_domain *iovad,
 	fq->entries[idx].counter  = atomic64_read(&iovad->fq_flush_start_cnt);
 
 	spin_unlock_irqrestore(&fq->lock, flags);
+
+	if (atomic_cmpxchg(&iovad->fq_timer_on, 0, 1) == 0)
+		mod_timer(&iovad->fq_timer,
+			  jiffies + msecs_to_jiffies(IOVA_FQ_TIMEOUT));
+
 	put_cpu_ptr(iovad->fq);
 }
 EXPORT_SYMBOL_GPL(queue_iova);
diff --git a/include/linux/iova.h b/include/linux/iova.h
index 913a690..d179b9b 100644
--- a/include/linux/iova.h
+++ b/include/linux/iova.h
@@ -48,6 +48,9 @@ typedef void (* iova_entry_dtor)(unsigned long data);
 /* Number of entries per Flush Queue */
 #define IOVA_FQ_SIZE	256
 
+/* Timeout (in ms) after which entries are flushed from the Flush-Queue */
+#define IOVA_FQ_TIMEOUT	10
+
 /* Flush Queue entry for defered flushing */
 struct iova_fq_entry {
 	unsigned long iova_pfn;
@@ -86,6 +89,11 @@ struct iova_domain {
 
 	atomic64_t	fq_flush_finish_cnt;	/* Number of TLB flushes that
 						   have been finished */
+
+	struct timer_list fq_timer;		/* Timer to regularily empty the
+						   flush-queues */
+	atomic_t fq_timer_on;			/* 1 when timer is active, 0
+						   when not */
 };
 
 static inline unsigned long iova_size(struct iova *iova)
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 5/8] iommu/iova: Add flush timer
@ 2017-08-11 14:41   ` Joerg Roedel
  0 siblings, 0 replies; 12+ messages in thread
From: Joerg Roedel @ 2017-08-11 14:41 UTC (permalink / raw)
  To: iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA
  Cc: dwmw2-wEGCiKHe2LqWVfeAwA7xHQ, Joerg Roedel,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, Will Deacon

From: Joerg Roedel <jroedel-l3A5Bk7waGM@public.gmane.org>

Add a timer to flush entries from the Flush-Queues every
10ms. This makes sure that no stale TLB entries remain for
too long after an IOVA has been unmapped.

Signed-off-by: Joerg Roedel <jroedel-l3A5Bk7waGM@public.gmane.org>
---
 drivers/iommu/iova.c | 32 ++++++++++++++++++++++++++++++++
 include/linux/iova.h |  8 ++++++++
 2 files changed, 40 insertions(+)

diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
index 749d395..33edfa7 100644
--- a/drivers/iommu/iova.c
+++ b/drivers/iommu/iova.c
@@ -33,6 +33,7 @@ static unsigned long iova_rcache_get(struct iova_domain *iovad,
 static void init_iova_rcaches(struct iova_domain *iovad);
 static void free_iova_rcaches(struct iova_domain *iovad);
 static void fq_destroy_all_entries(struct iova_domain *iovad);
+static void fq_flush_timeout(unsigned long data);
 
 void
 init_iova_domain(struct iova_domain *iovad, unsigned long granule,
@@ -62,7 +63,11 @@ static void free_iova_flush_queue(struct iova_domain *iovad)
 	if (!iovad->fq)
 		return;
 
+	if (timer_pending(&iovad->fq_timer))
+		del_timer(&iovad->fq_timer);
+
 	fq_destroy_all_entries(iovad);
+
 	free_percpu(iovad->fq);
 
 	iovad->fq         = NULL;
@@ -95,6 +100,9 @@ int init_iova_flush_queue(struct iova_domain *iovad,
 		spin_lock_init(&fq->lock);
 	}
 
+	setup_timer(&iovad->fq_timer, fq_flush_timeout, (unsigned long)iovad);
+	atomic_set(&iovad->fq_timer_on, 0);
+
 	return 0;
 }
 EXPORT_SYMBOL_GPL(init_iova_flush_queue);
@@ -539,6 +547,25 @@ static void fq_destroy_all_entries(struct iova_domain *iovad)
 	}
 }
 
+static void fq_flush_timeout(unsigned long data)
+{
+	struct iova_domain *iovad = (struct iova_domain *)data;
+	int cpu;
+
+	atomic_set(&iovad->fq_timer_on, 0);
+	iova_domain_flush(iovad);
+
+	for_each_possible_cpu(cpu) {
+		unsigned long flags;
+		struct iova_fq *fq;
+
+		fq = per_cpu_ptr(iovad->fq, cpu);
+		spin_lock_irqsave(&fq->lock, flags);
+		fq_ring_free(iovad, fq);
+		spin_unlock_irqrestore(&fq->lock, flags);
+	}
+}
+
 void queue_iova(struct iova_domain *iovad,
 		unsigned long pfn, unsigned long pages,
 		unsigned long data)
@@ -569,6 +596,11 @@ void queue_iova(struct iova_domain *iovad,
 	fq->entries[idx].counter  = atomic64_read(&iovad->fq_flush_start_cnt);
 
 	spin_unlock_irqrestore(&fq->lock, flags);
+
+	if (atomic_cmpxchg(&iovad->fq_timer_on, 0, 1) == 0)
+		mod_timer(&iovad->fq_timer,
+			  jiffies + msecs_to_jiffies(IOVA_FQ_TIMEOUT));
+
 	put_cpu_ptr(iovad->fq);
 }
 EXPORT_SYMBOL_GPL(queue_iova);
diff --git a/include/linux/iova.h b/include/linux/iova.h
index 913a690..d179b9b 100644
--- a/include/linux/iova.h
+++ b/include/linux/iova.h
@@ -48,6 +48,9 @@ typedef void (* iova_entry_dtor)(unsigned long data);
 /* Number of entries per Flush Queue */
 #define IOVA_FQ_SIZE	256
 
+/* Timeout (in ms) after which entries are flushed from the Flush-Queue */
+#define IOVA_FQ_TIMEOUT	10
+
 /* Flush Queue entry for defered flushing */
 struct iova_fq_entry {
 	unsigned long iova_pfn;
@@ -86,6 +89,11 @@ struct iova_domain {
 
 	atomic64_t	fq_flush_finish_cnt;	/* Number of TLB flushes that
 						   have been finished */
+
+	struct timer_list fq_timer;		/* Timer to regularily empty the
+						   flush-queues */
+	atomic_t fq_timer_on;			/* 1 when timer is active, 0
+						   when not */
 };
 
 static inline unsigned long iova_size(struct iova *iova)
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 6/8] iommu/amd: Make use of iova queue flushing
  2017-08-11 14:41 [PATCH 0/8] iommu/iova: Implement deferred flush queue Joerg Roedel
                   ` (4 preceding siblings ...)
  2017-08-11 14:41   ` Joerg Roedel
@ 2017-08-11 14:41 ` Joerg Roedel
  2017-08-11 14:41 ` [PATCH 7/8] iommu/vt-d: Allow to flush more than 4GB of device TLBs Joerg Roedel
  2017-08-11 14:41 ` [PATCH 8/8] iommu/vt-d: Make use of iova deferred flushing Joerg Roedel
  7 siblings, 0 replies; 12+ messages in thread
From: Joerg Roedel @ 2017-08-11 14:41 UTC (permalink / raw)
  To: iommu; +Cc: Robin Murphy, dwmw2, Will Deacon, linux-kernel, Joerg Roedel

From: Joerg Roedel <jroedel@suse.de>

Rip out the implementation in the AMD IOMMU driver and use
the one in the common iova code instead.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/amd_iommu.c | 229 ++--------------------------------------------
 1 file changed, 9 insertions(+), 220 deletions(-)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 354cbd6..5a61cf5 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -137,20 +137,7 @@ struct kmem_cache *amd_iommu_irq_cache;
 static void update_domain(struct protection_domain *domain);
 static int protection_domain_init(struct protection_domain *domain);
 static void detach_device(struct device *dev);
-
-#define FLUSH_QUEUE_SIZE 256
-
-struct flush_queue_entry {
-	unsigned long iova_pfn;
-	unsigned long pages;
-	u64 counter; /* Flush counter when this entry was added to the queue */
-};
-
-struct flush_queue {
-	struct flush_queue_entry *entries;
-	unsigned head, tail;
-	spinlock_t lock;
-};
+static void iova_domain_flush_tlb(struct iova_domain *iovad);
 
 /*
  * Data container for a dma_ops specific protection domain
@@ -161,36 +148,6 @@ struct dma_ops_domain {
 
 	/* IOVA RB-Tree */
 	struct iova_domain iovad;
-
-	struct flush_queue __percpu *flush_queue;
-
-	/*
-	 * We need two counter here to be race-free wrt. IOTLB flushing and
-	 * adding entries to the flush queue.
-	 *
-	 * The flush_start_cnt is incremented _before_ the IOTLB flush starts.
-	 * New entries added to the flush ring-buffer get their 'counter' value
-	 * from here. This way we can make sure that entries added to the queue
-	 * (or other per-cpu queues of the same domain) while the TLB is about
-	 * to be flushed are not considered to be flushed already.
-	 */
-	atomic64_t flush_start_cnt;
-
-	/*
-	 * The flush_finish_cnt is incremented when an IOTLB flush is complete.
-	 * This value is always smaller than flush_start_cnt. The queue_add
-	 * function frees all IOVAs that have a counter value smaller than
-	 * flush_finish_cnt. This makes sure that we only free IOVAs that are
-	 * flushed out of the IOTLB of the domain.
-	 */
-	atomic64_t flush_finish_cnt;
-
-	/*
-	 * Timer to make sure we don't keep IOVAs around unflushed
-	 * for too long
-	 */
-	struct timer_list flush_timer;
-	atomic_t flush_timer_on;
 };
 
 static struct iova_domain reserved_iova_ranges;
@@ -1788,178 +1745,19 @@ static void free_gcr3_table(struct protection_domain *domain)
 	free_page((unsigned long)domain->gcr3_tbl);
 }
 
-static void dma_ops_domain_free_flush_queue(struct dma_ops_domain *dom)
-{
-	int cpu;
-
-	for_each_possible_cpu(cpu) {
-		struct flush_queue *queue;
-
-		queue = per_cpu_ptr(dom->flush_queue, cpu);
-		kfree(queue->entries);
-	}
-
-	free_percpu(dom->flush_queue);
-
-	dom->flush_queue = NULL;
-}
-
-static int dma_ops_domain_alloc_flush_queue(struct dma_ops_domain *dom)
-{
-	int cpu;
-
-	atomic64_set(&dom->flush_start_cnt,  0);
-	atomic64_set(&dom->flush_finish_cnt, 0);
-
-	dom->flush_queue = alloc_percpu(struct flush_queue);
-	if (!dom->flush_queue)
-		return -ENOMEM;
-
-	/* First make sure everything is cleared */
-	for_each_possible_cpu(cpu) {
-		struct flush_queue *queue;
-
-		queue = per_cpu_ptr(dom->flush_queue, cpu);
-		queue->head    = 0;
-		queue->tail    = 0;
-		queue->entries = NULL;
-	}
-
-	/* Now start doing the allocation */
-	for_each_possible_cpu(cpu) {
-		struct flush_queue *queue;
-
-		queue = per_cpu_ptr(dom->flush_queue, cpu);
-		queue->entries = kzalloc(FLUSH_QUEUE_SIZE * sizeof(*queue->entries),
-					 GFP_KERNEL);
-		if (!queue->entries) {
-			dma_ops_domain_free_flush_queue(dom);
-			return -ENOMEM;
-		}
-
-		spin_lock_init(&queue->lock);
-	}
-
-	return 0;
-}
-
 static void dma_ops_domain_flush_tlb(struct dma_ops_domain *dom)
 {
-	atomic64_inc(&dom->flush_start_cnt);
 	domain_flush_tlb(&dom->domain);
 	domain_flush_complete(&dom->domain);
-	atomic64_inc(&dom->flush_finish_cnt);
 }
 
-static inline bool queue_ring_full(struct flush_queue *queue)
+static void iova_domain_flush_tlb(struct iova_domain *iovad)
 {
-	assert_spin_locked(&queue->lock);
-
-	return (((queue->tail + 1) % FLUSH_QUEUE_SIZE) == queue->head);
-}
+	struct dma_ops_domain *dom;
 
-#define queue_ring_for_each(i, q) \
-	for (i = (q)->head; i != (q)->tail; i = (i + 1) % FLUSH_QUEUE_SIZE)
-
-static inline unsigned queue_ring_add(struct flush_queue *queue)
-{
-	unsigned idx = queue->tail;
-
-	assert_spin_locked(&queue->lock);
-	queue->tail = (idx + 1) % FLUSH_QUEUE_SIZE;
-
-	return idx;
-}
-
-static inline void queue_ring_remove_head(struct flush_queue *queue)
-{
-	assert_spin_locked(&queue->lock);
-	queue->head = (queue->head + 1) % FLUSH_QUEUE_SIZE;
-}
-
-static void queue_ring_free_flushed(struct dma_ops_domain *dom,
-				    struct flush_queue *queue)
-{
-	u64 counter = atomic64_read(&dom->flush_finish_cnt);
-	int idx;
-
-	queue_ring_for_each(idx, queue) {
-		/*
-		 * This assumes that counter values in the ring-buffer are
-		 * monotonously rising.
-		 */
-		if (queue->entries[idx].counter >= counter)
-			break;
-
-		free_iova_fast(&dom->iovad,
-			       queue->entries[idx].iova_pfn,
-			       queue->entries[idx].pages);
-
-		queue_ring_remove_head(queue);
-	}
-}
-
-static void queue_add(struct dma_ops_domain *dom,
-		      unsigned long address, unsigned long pages)
-{
-	struct flush_queue *queue;
-	unsigned long flags;
-	int idx;
-
-	pages     = __roundup_pow_of_two(pages);
-	address >>= PAGE_SHIFT;
-
-	queue = get_cpu_ptr(dom->flush_queue);
-	spin_lock_irqsave(&queue->lock, flags);
-
-	/*
-	 * First remove the enries from the ring-buffer that are already
-	 * flushed to make the below queue_ring_full() check less likely
-	 */
-	queue_ring_free_flushed(dom, queue);
-
-	/*
-	 * When ring-queue is full, flush the entries from the IOTLB so
-	 * that we can free all entries with queue_ring_free_flushed()
-	 * below.
-	 */
-	if (queue_ring_full(queue)) {
-		dma_ops_domain_flush_tlb(dom);
-		queue_ring_free_flushed(dom, queue);
-	}
-
-	idx = queue_ring_add(queue);
-
-	queue->entries[idx].iova_pfn = address;
-	queue->entries[idx].pages    = pages;
-	queue->entries[idx].counter  = atomic64_read(&dom->flush_start_cnt);
-
-	spin_unlock_irqrestore(&queue->lock, flags);
-
-	if (atomic_cmpxchg(&dom->flush_timer_on, 0, 1) == 0)
-		mod_timer(&dom->flush_timer, jiffies + msecs_to_jiffies(10));
-
-	put_cpu_ptr(dom->flush_queue);
-}
-
-static void queue_flush_timeout(unsigned long data)
-{
-	struct dma_ops_domain *dom = (struct dma_ops_domain *)data;
-	int cpu;
-
-	atomic_set(&dom->flush_timer_on, 0);
+	dom = container_of(iovad, struct dma_ops_domain, iovad);
 
 	dma_ops_domain_flush_tlb(dom);
-
-	for_each_possible_cpu(cpu) {
-		struct flush_queue *queue;
-		unsigned long flags;
-
-		queue = per_cpu_ptr(dom->flush_queue, cpu);
-		spin_lock_irqsave(&queue->lock, flags);
-		queue_ring_free_flushed(dom, queue);
-		spin_unlock_irqrestore(&queue->lock, flags);
-	}
 }
 
 /*
@@ -1973,11 +1771,6 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom)
 
 	del_domain_from_list(&dom->domain);
 
-	if (timer_pending(&dom->flush_timer))
-		del_timer(&dom->flush_timer);
-
-	dma_ops_domain_free_flush_queue(dom);
-
 	put_iova_domain(&dom->iovad);
 
 	free_pagetable(&dom->domain);
@@ -2013,16 +1806,11 @@ static struct dma_ops_domain *dma_ops_domain_alloc(void)
 	init_iova_domain(&dma_dom->iovad, PAGE_SIZE,
 			 IOVA_START_PFN, DMA_32BIT_PFN);
 
-	/* Initialize reserved ranges */
-	copy_reserved_iova(&reserved_iova_ranges, &dma_dom->iovad);
-
-	if (dma_ops_domain_alloc_flush_queue(dma_dom))
+	if (init_iova_flush_queue(&dma_dom->iovad, iova_domain_flush_tlb, NULL))
 		goto free_dma_dom;
 
-	setup_timer(&dma_dom->flush_timer, queue_flush_timeout,
-		    (unsigned long)dma_dom);
-
-	atomic_set(&dma_dom->flush_timer_on, 0);
+	/* Initialize reserved ranges */
+	copy_reserved_iova(&reserved_iova_ranges, &dma_dom->iovad);
 
 	add_domain_to_list(&dma_dom->domain);
 
@@ -2619,7 +2407,8 @@ static void __unmap_single(struct dma_ops_domain *dma_dom,
 		domain_flush_tlb(&dma_dom->domain);
 		domain_flush_complete(&dma_dom->domain);
 	} else {
-		queue_add(dma_dom, dma_addr, pages);
+		pages = __roundup_pow_of_two(pages);
+		queue_iova(&dma_dom->iovad, dma_addr >> PAGE_SHIFT, pages, 0);
 	}
 }
 
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 7/8] iommu/vt-d: Allow to flush more than 4GB of device TLBs
  2017-08-11 14:41 [PATCH 0/8] iommu/iova: Implement deferred flush queue Joerg Roedel
                   ` (5 preceding siblings ...)
  2017-08-11 14:41 ` [PATCH 6/8] iommu/amd: Make use of iova queue flushing Joerg Roedel
@ 2017-08-11 14:41 ` Joerg Roedel
  2017-08-11 14:41 ` [PATCH 8/8] iommu/vt-d: Make use of iova deferred flushing Joerg Roedel
  7 siblings, 0 replies; 12+ messages in thread
From: Joerg Roedel @ 2017-08-11 14:41 UTC (permalink / raw)
  To: iommu; +Cc: Robin Murphy, dwmw2, Will Deacon, linux-kernel, Joerg Roedel

From: Joerg Roedel <jroedel@suse.de>

The shift qi_flush_dev_iotlb() is done on an int, which
limits the mask to 32 bits. Make the mask 64 bits wide so
that more than 4GB of address range can be flushed at once.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/dmar.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c
index c8b0329..ca5ebae 100644
--- a/drivers/iommu/dmar.c
+++ b/drivers/iommu/dmar.c
@@ -1343,7 +1343,7 @@ void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 qdep,
 
 	if (mask) {
 		BUG_ON(addr & ((1 << (VTD_PAGE_SHIFT + mask)) - 1));
-		addr |= (1 << (VTD_PAGE_SHIFT + mask - 1)) - 1;
+		addr |= (1ULL << (VTD_PAGE_SHIFT + mask - 1)) - 1;
 		desc.high = QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE;
 	} else
 		desc.high = QI_DEV_IOTLB_ADDR(addr);
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 8/8] iommu/vt-d: Make use of iova deferred flushing
  2017-08-11 14:41 [PATCH 0/8] iommu/iova: Implement deferred flush queue Joerg Roedel
                   ` (6 preceding siblings ...)
  2017-08-11 14:41 ` [PATCH 7/8] iommu/vt-d: Allow to flush more than 4GB of device TLBs Joerg Roedel
@ 2017-08-11 14:41 ` Joerg Roedel
  7 siblings, 0 replies; 12+ messages in thread
From: Joerg Roedel @ 2017-08-11 14:41 UTC (permalink / raw)
  To: iommu; +Cc: Robin Murphy, dwmw2, Will Deacon, linux-kernel, Joerg Roedel

From: Joerg Roedel <jroedel@suse.de>

Remove the deferred flushing implementation in the Intel
VT-d driver and use the one from the common iova code
instead.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/intel-iommu.c | 197 +++++++++-----------------------------------
 1 file changed, 38 insertions(+), 159 deletions(-)

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 687f18f..d5e8b86 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -458,31 +458,6 @@ static LIST_HEAD(dmar_rmrr_units);
 #define for_each_rmrr_units(rmrr) \
 	list_for_each_entry(rmrr, &dmar_rmrr_units, list)
 
-static void flush_unmaps_timeout(unsigned long data);
-
-struct deferred_flush_entry {
-	unsigned long iova_pfn;
-	unsigned long nrpages;
-	struct dmar_domain *domain;
-	struct page *freelist;
-};
-
-#define HIGH_WATER_MARK 250
-struct deferred_flush_table {
-	int next;
-	struct deferred_flush_entry entries[HIGH_WATER_MARK];
-};
-
-struct deferred_flush_data {
-	spinlock_t lock;
-	int timer_on;
-	struct timer_list timer;
-	long size;
-	struct deferred_flush_table *tables;
-};
-
-static DEFINE_PER_CPU(struct deferred_flush_data, deferred_flush);
-
 /* bitmap for indexing intel_iommus */
 static int g_num_of_iommus;
 
@@ -1309,6 +1284,13 @@ static void dma_free_pagelist(struct page *freelist)
 	}
 }
 
+static void iova_entry_free(unsigned long data)
+{
+	struct page *freelist = (struct page *)data;
+
+	dma_free_pagelist(freelist);
+}
+
 /* iommu handling */
 static int iommu_alloc_root_entry(struct intel_iommu *iommu)
 {
@@ -1622,6 +1604,25 @@ static void iommu_flush_iotlb_psi(struct intel_iommu *iommu,
 				      addr, mask);
 }
 
+static void iommu_flush_iova(struct iova_domain *iovad)
+{
+	struct dmar_domain *domain;
+	int idx;
+
+	domain = container_of(iovad, struct dmar_domain, iovad);
+
+	for_each_domain_iommu(idx, domain) {
+		struct intel_iommu *iommu = g_iommus[idx];
+		u16 did = domain->iommu_did[iommu->seq_id];
+
+		iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
+
+		if (!cap_caching_mode(iommu->cap))
+			iommu_flush_dev_iotlb(get_iommu_domain(iommu, did),
+					      0, MAX_AGAW_PFN_WIDTH);
+	}
+}
+
 static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
 {
 	u32 pmen;
@@ -1932,9 +1933,16 @@ static int domain_init(struct dmar_domain *domain, struct intel_iommu *iommu,
 {
 	int adjust_width, agaw;
 	unsigned long sagaw;
+	int err;
 
 	init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN,
 			DMA_32BIT_PFN);
+
+	err = init_iova_flush_queue(&domain->iovad,
+				    iommu_flush_iova, iova_entry_free);
+	if (err)
+		return err;
+
 	domain_reserve_special_ranges(domain);
 
 	/* calculate AGAW */
@@ -1986,14 +1994,6 @@ static void domain_exit(struct dmar_domain *domain)
 	if (!domain)
 		return;
 
-	/* Flush any lazy unmaps that may reference this domain */
-	if (!intel_iommu_strict) {
-		int cpu;
-
-		for_each_possible_cpu(cpu)
-			flush_unmaps_timeout(cpu);
-	}
-
 	/* Remove associated devices and clear attached or cached domains */
 	rcu_read_lock();
 	domain_remove_dev_info(domain);
@@ -3206,7 +3206,7 @@ static int __init init_dmars(void)
 	bool copied_tables = false;
 	struct device *dev;
 	struct intel_iommu *iommu;
-	int i, ret, cpu;
+	int i, ret;
 
 	/*
 	 * for each drhd
@@ -3239,22 +3239,6 @@ static int __init init_dmars(void)
 		goto error;
 	}
 
-	for_each_possible_cpu(cpu) {
-		struct deferred_flush_data *dfd = per_cpu_ptr(&deferred_flush,
-							      cpu);
-
-		dfd->tables = kzalloc(g_num_of_iommus *
-				      sizeof(struct deferred_flush_table),
-				      GFP_KERNEL);
-		if (!dfd->tables) {
-			ret = -ENOMEM;
-			goto free_g_iommus;
-		}
-
-		spin_lock_init(&dfd->lock);
-		setup_timer(&dfd->timer, flush_unmaps_timeout, cpu);
-	}
-
 	for_each_active_iommu(iommu, drhd) {
 		g_iommus[iommu->seq_id] = iommu;
 
@@ -3437,10 +3421,9 @@ static int __init init_dmars(void)
 		disable_dmar_iommu(iommu);
 		free_dmar_iommu(iommu);
 	}
-free_g_iommus:
-	for_each_possible_cpu(cpu)
-		kfree(per_cpu_ptr(&deferred_flush, cpu)->tables);
+
 	kfree(g_iommus);
+
 error:
 	return ret;
 }
@@ -3645,110 +3628,6 @@ static dma_addr_t intel_map_page(struct device *dev, struct page *page,
 				  dir, *dev->dma_mask);
 }
 
-static void flush_unmaps(struct deferred_flush_data *flush_data)
-{
-	int i, j;
-
-	flush_data->timer_on = 0;
-
-	/* just flush them all */
-	for (i = 0; i < g_num_of_iommus; i++) {
-		struct intel_iommu *iommu = g_iommus[i];
-		struct deferred_flush_table *flush_table =
-				&flush_data->tables[i];
-		if (!iommu)
-			continue;
-
-		if (!flush_table->next)
-			continue;
-
-		/* In caching mode, global flushes turn emulation expensive */
-		if (!cap_caching_mode(iommu->cap))
-			iommu->flush.flush_iotlb(iommu, 0, 0, 0,
-					 DMA_TLB_GLOBAL_FLUSH);
-		for (j = 0; j < flush_table->next; j++) {
-			unsigned long mask;
-			struct deferred_flush_entry *entry =
-						&flush_table->entries[j];
-			unsigned long iova_pfn = entry->iova_pfn;
-			unsigned long nrpages = entry->nrpages;
-			struct dmar_domain *domain = entry->domain;
-			struct page *freelist = entry->freelist;
-
-			/* On real hardware multiple invalidations are expensive */
-			if (cap_caching_mode(iommu->cap))
-				iommu_flush_iotlb_psi(iommu, domain,
-					mm_to_dma_pfn(iova_pfn),
-					nrpages, !freelist, 0);
-			else {
-				mask = ilog2(nrpages);
-				iommu_flush_dev_iotlb(domain,
-						(uint64_t)iova_pfn << PAGE_SHIFT, mask);
-			}
-			free_iova_fast(&domain->iovad, iova_pfn, nrpages);
-			if (freelist)
-				dma_free_pagelist(freelist);
-		}
-		flush_table->next = 0;
-	}
-
-	flush_data->size = 0;
-}
-
-static void flush_unmaps_timeout(unsigned long cpuid)
-{
-	struct deferred_flush_data *flush_data = per_cpu_ptr(&deferred_flush, cpuid);
-	unsigned long flags;
-
-	spin_lock_irqsave(&flush_data->lock, flags);
-	flush_unmaps(flush_data);
-	spin_unlock_irqrestore(&flush_data->lock, flags);
-}
-
-static void add_unmap(struct dmar_domain *dom, unsigned long iova_pfn,
-		      unsigned long nrpages, struct page *freelist)
-{
-	unsigned long flags;
-	int entry_id, iommu_id;
-	struct intel_iommu *iommu;
-	struct deferred_flush_entry *entry;
-	struct deferred_flush_data *flush_data;
-
-	flush_data = raw_cpu_ptr(&deferred_flush);
-
-	/* Flush all CPUs' entries to avoid deferring too much.  If
-	 * this becomes a bottleneck, can just flush us, and rely on
-	 * flush timer for the rest.
-	 */
-	if (flush_data->size == HIGH_WATER_MARK) {
-		int cpu;
-
-		for_each_online_cpu(cpu)
-			flush_unmaps_timeout(cpu);
-	}
-
-	spin_lock_irqsave(&flush_data->lock, flags);
-
-	iommu = domain_get_iommu(dom);
-	iommu_id = iommu->seq_id;
-
-	entry_id = flush_data->tables[iommu_id].next;
-	++(flush_data->tables[iommu_id].next);
-
-	entry = &flush_data->tables[iommu_id].entries[entry_id];
-	entry->domain = dom;
-	entry->iova_pfn = iova_pfn;
-	entry->nrpages = nrpages;
-	entry->freelist = freelist;
-
-	if (!flush_data->timer_on) {
-		mod_timer(&flush_data->timer, jiffies + msecs_to_jiffies(10));
-		flush_data->timer_on = 1;
-	}
-	flush_data->size++;
-	spin_unlock_irqrestore(&flush_data->lock, flags);
-}
-
 static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size)
 {
 	struct dmar_domain *domain;
@@ -3784,7 +3663,8 @@ static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size)
 		free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(nrpages));
 		dma_free_pagelist(freelist);
 	} else {
-		add_unmap(domain, iova_pfn, nrpages, freelist);
+		queue_iova(&domain->iovad, iova_pfn, nrpages,
+			   (unsigned long)freelist);
 		/*
 		 * queue up the release of the unmap to save the 1/6th of the
 		 * cpu used up by the iotlb flush operation...
@@ -4721,7 +4601,6 @@ static void free_all_cpu_cached_iovas(unsigned int cpu)
 static int intel_iommu_cpu_dead(unsigned int cpu)
 {
 	free_all_cpu_cached_iovas(cpu);
-	flush_unmaps_timeout(cpu);
 	return 0;
 }
 
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2017-08-11 14:44 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-08-11 14:41 [PATCH 0/8] iommu/iova: Implement deferred flush queue Joerg Roedel
2017-08-11 14:41 ` [PATCH 1/8] iommu/iova: Add flush-queue data structures Joerg Roedel
2017-08-11 14:41 ` [PATCH 2/8] iommu/iova: Implement Flush-Queue ring buffer Joerg Roedel
2017-08-11 14:41   ` Joerg Roedel
2017-08-11 14:41 ` [PATCH 3/8] iommu/iova: Add flush counters to Flush-Queue implementation Joerg Roedel
2017-08-11 14:41   ` Joerg Roedel
2017-08-11 14:41 ` [PATCH 4/8] iommu/iova: Add locking to Flush-Queues Joerg Roedel
2017-08-11 14:41 ` [PATCH 5/8] iommu/iova: Add flush timer Joerg Roedel
2017-08-11 14:41   ` Joerg Roedel
2017-08-11 14:41 ` [PATCH 6/8] iommu/amd: Make use of iova queue flushing Joerg Roedel
2017-08-11 14:41 ` [PATCH 7/8] iommu/vt-d: Allow to flush more than 4GB of device TLBs Joerg Roedel
2017-08-11 14:41 ` [PATCH 8/8] iommu/vt-d: Make use of iova deferred flushing Joerg Roedel

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.