All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 1/9] iommu/io-pgtable-arm: Allow appropriate DMA API use
@ 2015-07-29 18:46 ` Robin Murphy
  0 siblings, 0 replies; 20+ messages in thread
From: Robin Murphy @ 2015-07-29 18:46 UTC (permalink / raw)
  To: will.deacon-5wv7dgnIgG8, laurent.pinchart+renesas-ryLnwIuWjnjg/C1BVhZhaw
  Cc: linux-lFZ/pmaqli7XmaaqVzeoHQ,
	iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r

Currently, users of the LPAE page table code are (ab)using dma_map_page()
as a means to flush page table updates for non-coherent IOMMUs. Since
from the CPU's point of view, creating IOMMU page tables *is* passing
DMA buffers to a device (the IOMMU's page table walker), there's little
reason not to use the DMA API correctly.

Allow IOMMU drivers to opt into DMA API operations for page table
allocation and updates by providing their appropriate device pointer.
The expectation is that an LPAE IOMMU should have a full view of system
memory, so use streaming mappings to avoid unnecessary pressure on
ZONE_DMA, and treat any DMA translation as a warning sign.

Signed-off-by: Robin Murphy <robin.murphy-5wv7dgnIgG8@public.gmane.org>
---

Changes since v1[1]:
- Make device pointer mandatory and use DMA API unconditionally
- Remove flush_pgtable callback entirely
- Style, consistency and typo fixes

[1]:http://thread.gmane.org/gmane.linux.kernel.iommu/10357

 drivers/iommu/io-pgtable-arm.c | 107 ++++++++++++++++++++++++++++++++---------
 drivers/iommu/io-pgtable.h     |   3 ++
 2 files changed, 87 insertions(+), 23 deletions(-)

diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index 4e46021..28cca8a 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -200,12 +200,76 @@ typedef u64 arm_lpae_iopte;
 
 static bool selftest_running = false;
 
+static dma_addr_t __arm_lpae_dma_addr(struct device *dev, void *pages)
+{
+	return phys_to_dma(dev, virt_to_phys(pages));
+}
+
+static void *__arm_lpae_alloc_pages(size_t size, gfp_t gfp,
+				    struct io_pgtable_cfg *cfg)
+{
+	struct device *dev = cfg->iommu_dev;
+	dma_addr_t dma;
+	void *pages = alloc_pages_exact(size, gfp | __GFP_ZERO);
+
+	if (!pages)
+		return NULL;
+
+	if (dev) {
+		dma = dma_map_single(dev, pages, size, DMA_TO_DEVICE);
+		if (dma_mapping_error(dev, dma))
+			goto out_free;
+		/*
+		 * We depend on the IOMMU being able to work with any physical
+		 * address directly, so if the DMA layer suggests it can't by
+		 * giving us back some translation, that bodes very badly...
+		 */
+		if (dma != __arm_lpae_dma_addr(dev, pages))
+			goto out_unmap;
+	}
+
+	return pages;
+
+out_unmap:
+	dev_err(dev, "Cannot accommodate DMA translation for IOMMU page tables\n");
+	dma_unmap_single(dev, dma, size, DMA_TO_DEVICE);
+out_free:
+	free_pages_exact(pages, size);
+	return NULL;
+}
+
+static void __arm_lpae_free_pages(void *pages, size_t size,
+				  struct io_pgtable_cfg *cfg)
+{
+	struct device *dev = cfg->iommu_dev;
+
+	if (dev)
+		dma_unmap_single(dev, __arm_lpae_dma_addr(dev, pages),
+				 size, DMA_TO_DEVICE);
+	free_pages_exact(pages, size);
+}
+
+static void __arm_lpae_set_pte(arm_lpae_iopte *ptep, arm_lpae_iopte pte,
+			       struct io_pgtable_cfg *cfg, void *cookie)
+{
+	struct device *dev = cfg->iommu_dev;
+
+	*ptep = pte;
+
+	if (dev)
+		dma_sync_single_for_device(dev, __arm_lpae_dma_addr(dev, ptep),
+					   sizeof(pte), DMA_TO_DEVICE);
+	else if (cfg->tlb->flush_pgtable)
+		cfg->tlb->flush_pgtable(ptep, sizeof(pte), cookie);
+}
+
 static int arm_lpae_init_pte(struct arm_lpae_io_pgtable *data,
 			     unsigned long iova, phys_addr_t paddr,
 			     arm_lpae_iopte prot, int lvl,
 			     arm_lpae_iopte *ptep)
 {
 	arm_lpae_iopte pte = prot;
+	struct io_pgtable_cfg *cfg = &data->iop.cfg;
 
 	/* We require an unmap first */
 	if (iopte_leaf(*ptep, lvl)) {
@@ -213,7 +277,7 @@ static int arm_lpae_init_pte(struct arm_lpae_io_pgtable *data,
 		return -EEXIST;
 	}
 
-	if (data->iop.cfg.quirks & IO_PGTABLE_QUIRK_ARM_NS)
+	if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS)
 		pte |= ARM_LPAE_PTE_NS;
 
 	if (lvl == ARM_LPAE_MAX_LEVELS - 1)
@@ -224,8 +288,7 @@ static int arm_lpae_init_pte(struct arm_lpae_io_pgtable *data,
 	pte |= ARM_LPAE_PTE_AF | ARM_LPAE_PTE_SH_IS;
 	pte |= pfn_to_iopte(paddr >> data->pg_shift, data);
 
-	*ptep = pte;
-	data->iop.cfg.tlb->flush_pgtable(ptep, sizeof(*ptep), data->iop.cookie);
+	__arm_lpae_set_pte(ptep, pte, cfg, data->iop.cookie);
 	return 0;
 }
 
@@ -236,12 +299,13 @@ static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova,
 	arm_lpae_iopte *cptep, pte;
 	void *cookie = data->iop.cookie;
 	size_t block_size = ARM_LPAE_BLOCK_SIZE(lvl, data);
+	struct io_pgtable_cfg *cfg = &data->iop.cfg;
 
 	/* Find our entry at the current level */
 	ptep += ARM_LPAE_LVL_IDX(iova, lvl, data);
 
 	/* If we can install a leaf entry at this level, then do so */
-	if (size == block_size && (size & data->iop.cfg.pgsize_bitmap))
+	if (size == block_size && (size & cfg->pgsize_bitmap))
 		return arm_lpae_init_pte(data, iova, paddr, prot, lvl, ptep);
 
 	/* We can't allocate tables at the final level */
@@ -251,18 +315,15 @@ static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova,
 	/* Grab a pointer to the next level */
 	pte = *ptep;
 	if (!pte) {
-		cptep = alloc_pages_exact(1UL << data->pg_shift,
-					 GFP_ATOMIC | __GFP_ZERO);
+		cptep = __arm_lpae_alloc_pages(1UL << data->pg_shift,
+					       GFP_ATOMIC, cfg);
 		if (!cptep)
 			return -ENOMEM;
 
-		data->iop.cfg.tlb->flush_pgtable(cptep, 1UL << data->pg_shift,
-						 cookie);
 		pte = __pa(cptep) | ARM_LPAE_PTE_TYPE_TABLE;
-		if (data->iop.cfg.quirks & IO_PGTABLE_QUIRK_ARM_NS)
+		if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS)
 			pte |= ARM_LPAE_PTE_NSTABLE;
-		*ptep = pte;
-		data->iop.cfg.tlb->flush_pgtable(ptep, sizeof(*ptep), cookie);
+		__arm_lpae_set_pte(ptep, pte, cfg, cookie);
 	} else {
 		cptep = iopte_deref(pte, data);
 	}
@@ -347,7 +408,7 @@ static void __arm_lpae_free_pgtable(struct arm_lpae_io_pgtable *data, int lvl,
 		__arm_lpae_free_pgtable(data, lvl + 1, iopte_deref(pte, data));
 	}
 
-	free_pages_exact(start, table_size);
+	__arm_lpae_free_pages(start, table_size, &data->iop.cfg);
 }
 
 static void arm_lpae_free_pgtable(struct io_pgtable *iop)
@@ -366,8 +427,8 @@ static int arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data,
 	unsigned long blk_start, blk_end;
 	phys_addr_t blk_paddr;
 	arm_lpae_iopte table = 0;
+	struct io_pgtable_cfg *cfg = &data->iop.cfg;
 	void *cookie = data->iop.cookie;
-	const struct iommu_gather_ops *tlb = data->iop.cfg.tlb;
 
 	blk_start = iova & ~(blk_size - 1);
 	blk_end = blk_start + blk_size;
@@ -393,10 +454,9 @@ static int arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data,
 		}
 	}
 
-	*ptep = table;
-	tlb->flush_pgtable(ptep, sizeof(*ptep), cookie);
+	__arm_lpae_set_pte(ptep, table, cfg, cookie);
 	iova &= ~(blk_size - 1);
-	tlb->tlb_add_flush(iova, blk_size, true, cookie);
+	cfg->tlb->tlb_add_flush(iova, blk_size, true, cookie);
 	return size;
 }
 
@@ -418,13 +478,12 @@ static int __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
 
 	/* If the size matches this level, we're in the right place */
 	if (size == blk_size) {
-		*ptep = 0;
-		tlb->flush_pgtable(ptep, sizeof(*ptep), cookie);
+		__arm_lpae_set_pte(ptep, 0, &data->iop.cfg, cookie);
 
 		if (!iopte_leaf(pte, lvl)) {
 			/* Also flush any partial walks */
 			tlb->tlb_add_flush(iova, size, false, cookie);
-			tlb->tlb_sync(data->iop.cookie);
+			tlb->tlb_sync(cookie);
 			ptep = iopte_deref(pte, data);
 			__arm_lpae_free_pgtable(data, lvl + 1, ptep);
 		} else {
@@ -640,11 +699,12 @@ arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie)
 	cfg->arm_lpae_s1_cfg.mair[1] = 0;
 
 	/* Looking good; allocate a pgd */
-	data->pgd = alloc_pages_exact(data->pgd_size, GFP_KERNEL | __GFP_ZERO);
+	data->pgd = __arm_lpae_alloc_pages(data->pgd_size, GFP_KERNEL, cfg);
 	if (!data->pgd)
 		goto out_free_data;
 
-	cfg->tlb->flush_pgtable(data->pgd, data->pgd_size, cookie);
+	if (cfg->tlb->flush_pgtable)
+		cfg->tlb->flush_pgtable(data->pgd, data->pgd_size, cookie);
 
 	/* TTBRs */
 	cfg->arm_lpae_s1_cfg.ttbr[0] = virt_to_phys(data->pgd);
@@ -728,11 +788,12 @@ arm_64_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie)
 	cfg->arm_lpae_s2_cfg.vtcr = reg;
 
 	/* Allocate pgd pages */
-	data->pgd = alloc_pages_exact(data->pgd_size, GFP_KERNEL | __GFP_ZERO);
+	data->pgd = __arm_lpae_alloc_pages(data->pgd_size, GFP_KERNEL, cfg);
 	if (!data->pgd)
 		goto out_free_data;
 
-	cfg->tlb->flush_pgtable(data->pgd, data->pgd_size, cookie);
+	if (cfg->tlb->flush_pgtable)
+		cfg->tlb->flush_pgtable(data->pgd, data->pgd_size, cookie);
 
 	/* VTTBR */
 	cfg->arm_lpae_s2_cfg.vttbr = virt_to_phys(data->pgd);
diff --git a/drivers/iommu/io-pgtable.h b/drivers/iommu/io-pgtable.h
index 10e32f6..c69529c 100644
--- a/drivers/iommu/io-pgtable.h
+++ b/drivers/iommu/io-pgtable.h
@@ -41,6 +41,8 @@ struct iommu_gather_ops {
  * @ias:           Input address (iova) size, in bits.
  * @oas:           Output address (paddr) size, in bits.
  * @tlb:           TLB management callbacks for this set of tables.
+ * @iommu_dev:     The device representing the DMA configuration for the
+ *                 page table walker.
  */
 struct io_pgtable_cfg {
 	#define IO_PGTABLE_QUIRK_ARM_NS	(1 << 0)	/* Set NS bit in PTEs */
@@ -49,6 +51,7 @@ struct io_pgtable_cfg {
 	unsigned int			ias;
 	unsigned int			oas;
 	const struct iommu_gather_ops	*tlb;
+	struct device			*iommu_dev;
 
 	/* Low-level data specific to the table format */
 	union {
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH v2 1/9] iommu/io-pgtable-arm: Allow appropriate DMA API use
@ 2015-07-29 18:46 ` Robin Murphy
  0 siblings, 0 replies; 20+ messages in thread
From: Robin Murphy @ 2015-07-29 18:46 UTC (permalink / raw)
  To: linux-arm-kernel

Currently, users of the LPAE page table code are (ab)using dma_map_page()
as a means to flush page table updates for non-coherent IOMMUs. Since
from the CPU's point of view, creating IOMMU page tables *is* passing
DMA buffers to a device (the IOMMU's page table walker), there's little
reason not to use the DMA API correctly.

Allow IOMMU drivers to opt into DMA API operations for page table
allocation and updates by providing their appropriate device pointer.
The expectation is that an LPAE IOMMU should have a full view of system
memory, so use streaming mappings to avoid unnecessary pressure on
ZONE_DMA, and treat any DMA translation as a warning sign.

Signed-off-by: Robin Murphy <robin.murphy@arm.com>
---

Changes since v1[1]:
- Make device pointer mandatory and use DMA API unconditionally
- Remove flush_pgtable callback entirely
- Style, consistency and typo fixes

[1]:http://thread.gmane.org/gmane.linux.kernel.iommu/10357

 drivers/iommu/io-pgtable-arm.c | 107 ++++++++++++++++++++++++++++++++---------
 drivers/iommu/io-pgtable.h     |   3 ++
 2 files changed, 87 insertions(+), 23 deletions(-)

diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index 4e46021..28cca8a 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -200,12 +200,76 @@ typedef u64 arm_lpae_iopte;
 
 static bool selftest_running = false;
 
+static dma_addr_t __arm_lpae_dma_addr(struct device *dev, void *pages)
+{
+	return phys_to_dma(dev, virt_to_phys(pages));
+}
+
+static void *__arm_lpae_alloc_pages(size_t size, gfp_t gfp,
+				    struct io_pgtable_cfg *cfg)
+{
+	struct device *dev = cfg->iommu_dev;
+	dma_addr_t dma;
+	void *pages = alloc_pages_exact(size, gfp | __GFP_ZERO);
+
+	if (!pages)
+		return NULL;
+
+	if (dev) {
+		dma = dma_map_single(dev, pages, size, DMA_TO_DEVICE);
+		if (dma_mapping_error(dev, dma))
+			goto out_free;
+		/*
+		 * We depend on the IOMMU being able to work with any physical
+		 * address directly, so if the DMA layer suggests it can't by
+		 * giving us back some translation, that bodes very badly...
+		 */
+		if (dma != __arm_lpae_dma_addr(dev, pages))
+			goto out_unmap;
+	}
+
+	return pages;
+
+out_unmap:
+	dev_err(dev, "Cannot accommodate DMA translation for IOMMU page tables\n");
+	dma_unmap_single(dev, dma, size, DMA_TO_DEVICE);
+out_free:
+	free_pages_exact(pages, size);
+	return NULL;
+}
+
+static void __arm_lpae_free_pages(void *pages, size_t size,
+				  struct io_pgtable_cfg *cfg)
+{
+	struct device *dev = cfg->iommu_dev;
+
+	if (dev)
+		dma_unmap_single(dev, __arm_lpae_dma_addr(dev, pages),
+				 size, DMA_TO_DEVICE);
+	free_pages_exact(pages, size);
+}
+
+static void __arm_lpae_set_pte(arm_lpae_iopte *ptep, arm_lpae_iopte pte,
+			       struct io_pgtable_cfg *cfg, void *cookie)
+{
+	struct device *dev = cfg->iommu_dev;
+
+	*ptep = pte;
+
+	if (dev)
+		dma_sync_single_for_device(dev, __arm_lpae_dma_addr(dev, ptep),
+					   sizeof(pte), DMA_TO_DEVICE);
+	else if (cfg->tlb->flush_pgtable)
+		cfg->tlb->flush_pgtable(ptep, sizeof(pte), cookie);
+}
+
 static int arm_lpae_init_pte(struct arm_lpae_io_pgtable *data,
 			     unsigned long iova, phys_addr_t paddr,
 			     arm_lpae_iopte prot, int lvl,
 			     arm_lpae_iopte *ptep)
 {
 	arm_lpae_iopte pte = prot;
+	struct io_pgtable_cfg *cfg = &data->iop.cfg;
 
 	/* We require an unmap first */
 	if (iopte_leaf(*ptep, lvl)) {
@@ -213,7 +277,7 @@ static int arm_lpae_init_pte(struct arm_lpae_io_pgtable *data,
 		return -EEXIST;
 	}
 
-	if (data->iop.cfg.quirks & IO_PGTABLE_QUIRK_ARM_NS)
+	if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS)
 		pte |= ARM_LPAE_PTE_NS;
 
 	if (lvl == ARM_LPAE_MAX_LEVELS - 1)
@@ -224,8 +288,7 @@ static int arm_lpae_init_pte(struct arm_lpae_io_pgtable *data,
 	pte |= ARM_LPAE_PTE_AF | ARM_LPAE_PTE_SH_IS;
 	pte |= pfn_to_iopte(paddr >> data->pg_shift, data);
 
-	*ptep = pte;
-	data->iop.cfg.tlb->flush_pgtable(ptep, sizeof(*ptep), data->iop.cookie);
+	__arm_lpae_set_pte(ptep, pte, cfg, data->iop.cookie);
 	return 0;
 }
 
@@ -236,12 +299,13 @@ static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova,
 	arm_lpae_iopte *cptep, pte;
 	void *cookie = data->iop.cookie;
 	size_t block_size = ARM_LPAE_BLOCK_SIZE(lvl, data);
+	struct io_pgtable_cfg *cfg = &data->iop.cfg;
 
 	/* Find our entry at the current level */
 	ptep += ARM_LPAE_LVL_IDX(iova, lvl, data);
 
 	/* If we can install a leaf entry at this level, then do so */
-	if (size == block_size && (size & data->iop.cfg.pgsize_bitmap))
+	if (size == block_size && (size & cfg->pgsize_bitmap))
 		return arm_lpae_init_pte(data, iova, paddr, prot, lvl, ptep);
 
 	/* We can't allocate tables at the final level */
@@ -251,18 +315,15 @@ static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova,
 	/* Grab a pointer to the next level */
 	pte = *ptep;
 	if (!pte) {
-		cptep = alloc_pages_exact(1UL << data->pg_shift,
-					 GFP_ATOMIC | __GFP_ZERO);
+		cptep = __arm_lpae_alloc_pages(1UL << data->pg_shift,
+					       GFP_ATOMIC, cfg);
 		if (!cptep)
 			return -ENOMEM;
 
-		data->iop.cfg.tlb->flush_pgtable(cptep, 1UL << data->pg_shift,
-						 cookie);
 		pte = __pa(cptep) | ARM_LPAE_PTE_TYPE_TABLE;
-		if (data->iop.cfg.quirks & IO_PGTABLE_QUIRK_ARM_NS)
+		if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS)
 			pte |= ARM_LPAE_PTE_NSTABLE;
-		*ptep = pte;
-		data->iop.cfg.tlb->flush_pgtable(ptep, sizeof(*ptep), cookie);
+		__arm_lpae_set_pte(ptep, pte, cfg, cookie);
 	} else {
 		cptep = iopte_deref(pte, data);
 	}
@@ -347,7 +408,7 @@ static void __arm_lpae_free_pgtable(struct arm_lpae_io_pgtable *data, int lvl,
 		__arm_lpae_free_pgtable(data, lvl + 1, iopte_deref(pte, data));
 	}
 
-	free_pages_exact(start, table_size);
+	__arm_lpae_free_pages(start, table_size, &data->iop.cfg);
 }
 
 static void arm_lpae_free_pgtable(struct io_pgtable *iop)
@@ -366,8 +427,8 @@ static int arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data,
 	unsigned long blk_start, blk_end;
 	phys_addr_t blk_paddr;
 	arm_lpae_iopte table = 0;
+	struct io_pgtable_cfg *cfg = &data->iop.cfg;
 	void *cookie = data->iop.cookie;
-	const struct iommu_gather_ops *tlb = data->iop.cfg.tlb;
 
 	blk_start = iova & ~(blk_size - 1);
 	blk_end = blk_start + blk_size;
@@ -393,10 +454,9 @@ static int arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data,
 		}
 	}
 
-	*ptep = table;
-	tlb->flush_pgtable(ptep, sizeof(*ptep), cookie);
+	__arm_lpae_set_pte(ptep, table, cfg, cookie);
 	iova &= ~(blk_size - 1);
-	tlb->tlb_add_flush(iova, blk_size, true, cookie);
+	cfg->tlb->tlb_add_flush(iova, blk_size, true, cookie);
 	return size;
 }
 
@@ -418,13 +478,12 @@ static int __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
 
 	/* If the size matches this level, we're in the right place */
 	if (size == blk_size) {
-		*ptep = 0;
-		tlb->flush_pgtable(ptep, sizeof(*ptep), cookie);
+		__arm_lpae_set_pte(ptep, 0, &data->iop.cfg, cookie);
 
 		if (!iopte_leaf(pte, lvl)) {
 			/* Also flush any partial walks */
 			tlb->tlb_add_flush(iova, size, false, cookie);
-			tlb->tlb_sync(data->iop.cookie);
+			tlb->tlb_sync(cookie);
 			ptep = iopte_deref(pte, data);
 			__arm_lpae_free_pgtable(data, lvl + 1, ptep);
 		} else {
@@ -640,11 +699,12 @@ arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie)
 	cfg->arm_lpae_s1_cfg.mair[1] = 0;
 
 	/* Looking good; allocate a pgd */
-	data->pgd = alloc_pages_exact(data->pgd_size, GFP_KERNEL | __GFP_ZERO);
+	data->pgd = __arm_lpae_alloc_pages(data->pgd_size, GFP_KERNEL, cfg);
 	if (!data->pgd)
 		goto out_free_data;
 
-	cfg->tlb->flush_pgtable(data->pgd, data->pgd_size, cookie);
+	if (cfg->tlb->flush_pgtable)
+		cfg->tlb->flush_pgtable(data->pgd, data->pgd_size, cookie);
 
 	/* TTBRs */
 	cfg->arm_lpae_s1_cfg.ttbr[0] = virt_to_phys(data->pgd);
@@ -728,11 +788,12 @@ arm_64_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie)
 	cfg->arm_lpae_s2_cfg.vtcr = reg;
 
 	/* Allocate pgd pages */
-	data->pgd = alloc_pages_exact(data->pgd_size, GFP_KERNEL | __GFP_ZERO);
+	data->pgd = __arm_lpae_alloc_pages(data->pgd_size, GFP_KERNEL, cfg);
 	if (!data->pgd)
 		goto out_free_data;
 
-	cfg->tlb->flush_pgtable(data->pgd, data->pgd_size, cookie);
+	if (cfg->tlb->flush_pgtable)
+		cfg->tlb->flush_pgtable(data->pgd, data->pgd_size, cookie);
 
 	/* VTTBR */
 	cfg->arm_lpae_s2_cfg.vttbr = virt_to_phys(data->pgd);
diff --git a/drivers/iommu/io-pgtable.h b/drivers/iommu/io-pgtable.h
index 10e32f6..c69529c 100644
--- a/drivers/iommu/io-pgtable.h
+++ b/drivers/iommu/io-pgtable.h
@@ -41,6 +41,8 @@ struct iommu_gather_ops {
  * @ias:           Input address (iova) size, in bits.
  * @oas:           Output address (paddr) size, in bits.
  * @tlb:           TLB management callbacks for this set of tables.
+ * @iommu_dev:     The device representing the DMA configuration for the
+ *                 page table walker.
  */
 struct io_pgtable_cfg {
 	#define IO_PGTABLE_QUIRK_ARM_NS	(1 << 0)	/* Set NS bit in PTEs */
@@ -49,6 +51,7 @@ struct io_pgtable_cfg {
 	unsigned int			ias;
 	unsigned int			oas;
 	const struct iommu_gather_ops	*tlb;
+	struct device			*iommu_dev;
 
 	/* Low-level data specific to the table format */
 	union {
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH v2 2/9] iommu/arm-smmu: Sort out coherency
  2015-07-29 18:46 ` Robin Murphy
@ 2015-07-29 18:46     ` Robin Murphy
  -1 siblings, 0 replies; 20+ messages in thread
From: Robin Murphy @ 2015-07-29 18:46 UTC (permalink / raw)
  To: will.deacon-5wv7dgnIgG8, laurent.pinchart+renesas-ryLnwIuWjnjg/C1BVhZhaw
  Cc: linux-lFZ/pmaqli7XmaaqVzeoHQ,
	iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r

Currently, we detect whether the SMMU has coherent page table walk
capability from the IDR0.CTTW field, and base our cache maintenance
decisions on that. In preparation for fixing the bogus DMA API usage,
however, we need to ensure that the DMA API agrees about this, which
necessitates deferring to the dma-coherent property in the device tree
for the final say.

As an added bonus, since systems exist where an external CTTW signal
has been tied off incorrectly at integration, allowing DT to override
it offers a neat workaround for coherency issues with such SMMUs.

Signed-off-by: Robin Murphy <robin.murphy-5wv7dgnIgG8@public.gmane.org>
---
 Documentation/devicetree/bindings/iommu/arm,smmu.txt |  6 ++++++
 drivers/iommu/arm-smmu.c                             | 20 +++++++++++++++++---
 2 files changed, 23 insertions(+), 3 deletions(-)

diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.txt b/Documentation/devicetree/bindings/iommu/arm,smmu.txt
index 0676050..7180745 100644
--- a/Documentation/devicetree/bindings/iommu/arm,smmu.txt
+++ b/Documentation/devicetree/bindings/iommu/arm,smmu.txt
@@ -43,6 +43,12 @@ conditions.
 
 ** System MMU optional properties:
 
+- dma-coherent  : Present if page table walks made by the SMMU are
+                  cache coherent with the CPU.
+
+                  NOTE: this only applies to the SMMU itself, not
+                  masters connected upstream of the SMMU.
+
 - calxeda,smmu-secure-config-access : Enable proper handling of buggy
                   implementations that always use secure access to
                   SMMU configuration registers. In this case non-secure
diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index 4cd0c29..0583ed2 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -37,6 +37,7 @@
 #include <linux/iopoll.h>
 #include <linux/module.h>
 #include <linux/of.h>
+#include <linux/of_address.h>
 #include <linux/pci.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
@@ -1532,6 +1533,7 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
 	unsigned long size;
 	void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
 	u32 id;
+	bool cttw_dt, cttw_reg;
 
 	dev_notice(smmu->dev, "probing hardware configuration...\n");
 	dev_notice(smmu->dev, "SMMUv%d with:\n", smmu->version);
@@ -1571,10 +1573,22 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
 		dev_notice(smmu->dev, "\taddress translation ops\n");
 	}
 
-	if (id & ID0_CTTW) {
+	/*
+	 * In order for DMA API calls to work properly, we must defer to what
+	 * the DT says about coherency, regardless of what the hardware claims.
+	 * Fortunately, this also opens up a workaround for systems where the
+	 * ID register value has ended up configured incorrectly.
+	 */
+	cttw_dt = of_dma_is_coherent(smmu->dev->of_node);
+	cttw_reg = !!(id & ID0_CTTW);
+	if (cttw_dt)
 		smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
-		dev_notice(smmu->dev, "\tcoherent table walk\n");
-	}
+	if (cttw_dt || cttw_reg)
+		dev_notice(smmu->dev, "\t%scoherent table walk\n",
+			   cttw_dt ? "" : "non-");
+	if (cttw_dt != cttw_reg)
+		dev_notice(smmu->dev,
+			   "\t(IDR0.CTTW overridden by dma-coherent property)\n");
 
 	if (id & ID0_SMS) {
 		u32 smr, sid, mask;
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH v2 2/9] iommu/arm-smmu: Sort out coherency
@ 2015-07-29 18:46     ` Robin Murphy
  0 siblings, 0 replies; 20+ messages in thread
From: Robin Murphy @ 2015-07-29 18:46 UTC (permalink / raw)
  To: linux-arm-kernel

Currently, we detect whether the SMMU has coherent page table walk
capability from the IDR0.CTTW field, and base our cache maintenance
decisions on that. In preparation for fixing the bogus DMA API usage,
however, we need to ensure that the DMA API agrees about this, which
necessitates deferring to the dma-coherent property in the device tree
for the final say.

As an added bonus, since systems exist where an external CTTW signal
has been tied off incorrectly at integration, allowing DT to override
it offers a neat workaround for coherency issues with such SMMUs.

Signed-off-by: Robin Murphy <robin.murphy@arm.com>
---
 Documentation/devicetree/bindings/iommu/arm,smmu.txt |  6 ++++++
 drivers/iommu/arm-smmu.c                             | 20 +++++++++++++++++---
 2 files changed, 23 insertions(+), 3 deletions(-)

diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.txt b/Documentation/devicetree/bindings/iommu/arm,smmu.txt
index 0676050..7180745 100644
--- a/Documentation/devicetree/bindings/iommu/arm,smmu.txt
+++ b/Documentation/devicetree/bindings/iommu/arm,smmu.txt
@@ -43,6 +43,12 @@ conditions.
 
 ** System MMU optional properties:
 
+- dma-coherent  : Present if page table walks made by the SMMU are
+                  cache coherent with the CPU.
+
+                  NOTE: this only applies to the SMMU itself, not
+                  masters connected upstream of the SMMU.
+
 - calxeda,smmu-secure-config-access : Enable proper handling of buggy
                   implementations that always use secure access to
                   SMMU configuration registers. In this case non-secure
diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index 4cd0c29..0583ed2 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -37,6 +37,7 @@
 #include <linux/iopoll.h>
 #include <linux/module.h>
 #include <linux/of.h>
+#include <linux/of_address.h>
 #include <linux/pci.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
@@ -1532,6 +1533,7 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
 	unsigned long size;
 	void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
 	u32 id;
+	bool cttw_dt, cttw_reg;
 
 	dev_notice(smmu->dev, "probing hardware configuration...\n");
 	dev_notice(smmu->dev, "SMMUv%d with:\n", smmu->version);
@@ -1571,10 +1573,22 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
 		dev_notice(smmu->dev, "\taddress translation ops\n");
 	}
 
-	if (id & ID0_CTTW) {
+	/*
+	 * In order for DMA API calls to work properly, we must defer to what
+	 * the DT says about coherency, regardless of what the hardware claims.
+	 * Fortunately, this also opens up a workaround for systems where the
+	 * ID register value has ended up configured incorrectly.
+	 */
+	cttw_dt = of_dma_is_coherent(smmu->dev->of_node);
+	cttw_reg = !!(id & ID0_CTTW);
+	if (cttw_dt)
 		smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
-		dev_notice(smmu->dev, "\tcoherent table walk\n");
-	}
+	if (cttw_dt || cttw_reg)
+		dev_notice(smmu->dev, "\t%scoherent table walk\n",
+			   cttw_dt ? "" : "non-");
+	if (cttw_dt != cttw_reg)
+		dev_notice(smmu->dev,
+			   "\t(IDR0.CTTW overridden by dma-coherent property)\n");
 
 	if (id & ID0_SMS) {
 		u32 smr, sid, mask;
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH v2 3/9] iommu/arm-smmu: Clean up DMA API usage
  2015-07-29 18:46 ` Robin Murphy
@ 2015-07-29 18:46     ` Robin Murphy
  -1 siblings, 0 replies; 20+ messages in thread
From: Robin Murphy @ 2015-07-29 18:46 UTC (permalink / raw)
  To: will.deacon-5wv7dgnIgG8, laurent.pinchart+renesas-ryLnwIuWjnjg/C1BVhZhaw
  Cc: linux-lFZ/pmaqli7XmaaqVzeoHQ,
	iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r

With the correct DMA API calls now integrated into the io-pgtable code,
let that handle the flushing of non-coherent page table updates.

Signed-off-by: Robin Murphy <robin.murphy-5wv7dgnIgG8@public.gmane.org>
---
 drivers/iommu/arm-smmu.c | 22 ++++++----------------
 1 file changed, 6 insertions(+), 16 deletions(-)

diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index 0583ed2..5770ab9 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -611,24 +611,13 @@ static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
 static void arm_smmu_flush_pgtable(void *addr, size_t size, void *cookie)
 {
 	struct arm_smmu_domain *smmu_domain = cookie;
-	struct arm_smmu_device *smmu = smmu_domain->smmu;
-	unsigned long offset = (unsigned long)addr & ~PAGE_MASK;
 
-
-	/* Ensure new page tables are visible to the hardware walker */
-	if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK) {
+	/*
+	 * Ensure new page tables are visible to a coherent hardware walker.
+	 * The page table code deals with flushing for the non-coherent case.
+	 */
+	if (smmu_domain->smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
 		dsb(ishst);
-	} else {
-		/*
-		 * If the SMMU can't walk tables in the CPU caches, treat them
-		 * like non-coherent DMA since we need to flush the new entries
-		 * all the way out to memory. There's no possibility of
-		 * recursion here as the SMMU table walker will not be wired
-		 * through another SMMU.
-		 */
-		dma_map_page(smmu->dev, virt_to_page(addr), offset, size,
-			     DMA_TO_DEVICE);
-	}
 }
 
 static struct iommu_gather_ops arm_smmu_gather_ops = {
@@ -899,6 +888,7 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
 		.ias		= ias,
 		.oas		= oas,
 		.tlb		= &arm_smmu_gather_ops,
+		.iommu_dev	= smmu->dev,
 	};
 
 	smmu_domain->smmu = smmu;
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH v2 3/9] iommu/arm-smmu: Clean up DMA API usage
@ 2015-07-29 18:46     ` Robin Murphy
  0 siblings, 0 replies; 20+ messages in thread
From: Robin Murphy @ 2015-07-29 18:46 UTC (permalink / raw)
  To: linux-arm-kernel

With the correct DMA API calls now integrated into the io-pgtable code,
let that handle the flushing of non-coherent page table updates.

Signed-off-by: Robin Murphy <robin.murphy@arm.com>
---
 drivers/iommu/arm-smmu.c | 22 ++++++----------------
 1 file changed, 6 insertions(+), 16 deletions(-)

diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index 0583ed2..5770ab9 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -611,24 +611,13 @@ static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
 static void arm_smmu_flush_pgtable(void *addr, size_t size, void *cookie)
 {
 	struct arm_smmu_domain *smmu_domain = cookie;
-	struct arm_smmu_device *smmu = smmu_domain->smmu;
-	unsigned long offset = (unsigned long)addr & ~PAGE_MASK;
 
-
-	/* Ensure new page tables are visible to the hardware walker */
-	if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK) {
+	/*
+	 * Ensure new page tables are visible to a coherent hardware walker.
+	 * The page table code deals with flushing for the non-coherent case.
+	 */
+	if (smmu_domain->smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
 		dsb(ishst);
-	} else {
-		/*
-		 * If the SMMU can't walk tables in the CPU caches, treat them
-		 * like non-coherent DMA since we need to flush the new entries
-		 * all the way out to memory. There's no possibility of
-		 * recursion here as the SMMU table walker will not be wired
-		 * through another SMMU.
-		 */
-		dma_map_page(smmu->dev, virt_to_page(addr), offset, size,
-			     DMA_TO_DEVICE);
-	}
 }
 
 static struct iommu_gather_ops arm_smmu_gather_ops = {
@@ -899,6 +888,7 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
 		.ias		= ias,
 		.oas		= oas,
 		.tlb		= &arm_smmu_gather_ops,
+		.iommu_dev	= smmu->dev,
 	};
 
 	smmu_domain->smmu = smmu;
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH v2 4/9] iommu/arm-smmu-v3: Clean up DMA API usage
  2015-07-29 18:46 ` Robin Murphy
@ 2015-07-29 18:46     ` Robin Murphy
  -1 siblings, 0 replies; 20+ messages in thread
From: Robin Murphy @ 2015-07-29 18:46 UTC (permalink / raw)
  To: will.deacon-5wv7dgnIgG8, laurent.pinchart+renesas-ryLnwIuWjnjg/C1BVhZhaw
  Cc: linux-lFZ/pmaqli7XmaaqVzeoHQ,
	iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r

With the correct DMA API calls now integrated into the io-pgtable code,
let that handle the flushing of non-coherent page table updates.

Signed-off-by: Robin Murphy <robin.murphy-5wv7dgnIgG8@public.gmane.org>
---
 drivers/iommu/arm-smmu-v3.c | 18 +++---------------
 1 file changed, 3 insertions(+), 15 deletions(-)

diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index 8e9ec81..ac29d24 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -1304,23 +1304,10 @@ static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
 static void arm_smmu_flush_pgtable(void *addr, size_t size, void *cookie)
 {
 	struct arm_smmu_domain *smmu_domain = cookie;
-	struct arm_smmu_device *smmu = smmu_domain->smmu;
-	unsigned long offset = (unsigned long)addr & ~PAGE_MASK;
 
-	if (smmu->features & ARM_SMMU_FEAT_COHERENCY) {
+	/* The page table code handles flushing in the non-coherent case */
+	if (smmu_domain->smmu->features & ARM_SMMU_FEAT_COHERENCY)
 		dsb(ishst);
-	} else {
-		dma_addr_t dma_addr;
-		struct device *dev = smmu->dev;
-
-		dma_addr = dma_map_page(dev, virt_to_page(addr), offset, size,
-					DMA_TO_DEVICE);
-
-		if (dma_mapping_error(dev, dma_addr))
-			dev_err(dev, "failed to flush pgtable at %p\n", addr);
-		else
-			dma_unmap_page(dev, dma_addr, size, DMA_TO_DEVICE);
-	}
 }
 
 static struct iommu_gather_ops arm_smmu_gather_ops = {
@@ -1502,6 +1489,7 @@ static int arm_smmu_domain_finalise(struct iommu_domain *domain)
 		.ias		= ias,
 		.oas		= oas,
 		.tlb		= &arm_smmu_gather_ops,
+		.iommu_dev	= smmu->dev,
 	};
 
 	pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH v2 4/9] iommu/arm-smmu-v3: Clean up DMA API usage
@ 2015-07-29 18:46     ` Robin Murphy
  0 siblings, 0 replies; 20+ messages in thread
From: Robin Murphy @ 2015-07-29 18:46 UTC (permalink / raw)
  To: linux-arm-kernel

With the correct DMA API calls now integrated into the io-pgtable code,
let that handle the flushing of non-coherent page table updates.

Signed-off-by: Robin Murphy <robin.murphy@arm.com>
---
 drivers/iommu/arm-smmu-v3.c | 18 +++---------------
 1 file changed, 3 insertions(+), 15 deletions(-)

diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index 8e9ec81..ac29d24 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -1304,23 +1304,10 @@ static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
 static void arm_smmu_flush_pgtable(void *addr, size_t size, void *cookie)
 {
 	struct arm_smmu_domain *smmu_domain = cookie;
-	struct arm_smmu_device *smmu = smmu_domain->smmu;
-	unsigned long offset = (unsigned long)addr & ~PAGE_MASK;
 
-	if (smmu->features & ARM_SMMU_FEAT_COHERENCY) {
+	/* The page table code handles flushing in the non-coherent case */
+	if (smmu_domain->smmu->features & ARM_SMMU_FEAT_COHERENCY)
 		dsb(ishst);
-	} else {
-		dma_addr_t dma_addr;
-		struct device *dev = smmu->dev;
-
-		dma_addr = dma_map_page(dev, virt_to_page(addr), offset, size,
-					DMA_TO_DEVICE);
-
-		if (dma_mapping_error(dev, dma_addr))
-			dev_err(dev, "failed to flush pgtable at %p\n", addr);
-		else
-			dma_unmap_page(dev, dma_addr, size, DMA_TO_DEVICE);
-	}
 }
 
 static struct iommu_gather_ops arm_smmu_gather_ops = {
@@ -1502,6 +1489,7 @@ static int arm_smmu_domain_finalise(struct iommu_domain *domain)
 		.ias		= ias,
 		.oas		= oas,
 		.tlb		= &arm_smmu_gather_ops,
+		.iommu_dev	= smmu->dev,
 	};
 
 	pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH v2 5/9] iommu/ipmmu-vmsa: Clean up DMA API usage
  2015-07-29 18:46 ` Robin Murphy
@ 2015-07-29 18:46     ` Robin Murphy
  -1 siblings, 0 replies; 20+ messages in thread
From: Robin Murphy @ 2015-07-29 18:46 UTC (permalink / raw)
  To: will.deacon-5wv7dgnIgG8, laurent.pinchart+renesas-ryLnwIuWjnjg/C1BVhZhaw
  Cc: linux-lFZ/pmaqli7XmaaqVzeoHQ,
	iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r

With the correct DMA API calls now integrated into the io-pgtable code,
let that handle the flushing of non-coherent page table updates.

Signed-off-by: Robin Murphy <robin.murphy-5wv7dgnIgG8@public.gmane.org>
---
 drivers/iommu/ipmmu-vmsa.c | 19 +++++--------------
 1 file changed, 5 insertions(+), 14 deletions(-)

diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c
index 1a67c53..8cf605f 100644
--- a/drivers/iommu/ipmmu-vmsa.c
+++ b/drivers/iommu/ipmmu-vmsa.c
@@ -283,24 +283,10 @@ static void ipmmu_tlb_add_flush(unsigned long iova, size_t size, bool leaf,
 	/* The hardware doesn't support selective TLB flush. */
 }
 
-static void ipmmu_flush_pgtable(void *ptr, size_t size, void *cookie)
-{
-	unsigned long offset = (unsigned long)ptr & ~PAGE_MASK;
-	struct ipmmu_vmsa_domain *domain = cookie;
-
-	/*
-	 * TODO: Add support for coherent walk through CCI with DVM and remove
-	 * cache handling.
-	 */
-	dma_map_page(domain->mmu->dev, virt_to_page(ptr), offset, size,
-		     DMA_TO_DEVICE);
-}
-
 static struct iommu_gather_ops ipmmu_gather_ops = {
 	.tlb_flush_all = ipmmu_tlb_flush_all,
 	.tlb_add_flush = ipmmu_tlb_add_flush,
 	.tlb_sync = ipmmu_tlb_flush_all,
-	.flush_pgtable = ipmmu_flush_pgtable,
 };
 
 /* -----------------------------------------------------------------------------
@@ -327,6 +313,11 @@ static int ipmmu_domain_init_context(struct ipmmu_vmsa_domain *domain)
 	domain->cfg.ias = 32;
 	domain->cfg.oas = 40;
 	domain->cfg.tlb = &ipmmu_gather_ops;
+	/*
+	 * TODO: Add support for coherent walk through CCI with DVM and remove
+	 * cache handling. For now, delegate it to the io-pgtable code.
+	 */
+	domain->cfg.iommu_dev = domain->mmu->dev;
 
 	domain->iop = alloc_io_pgtable_ops(ARM_32_LPAE_S1, &domain->cfg,
 					   domain);
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH v2 5/9] iommu/ipmmu-vmsa: Clean up DMA API usage
@ 2015-07-29 18:46     ` Robin Murphy
  0 siblings, 0 replies; 20+ messages in thread
From: Robin Murphy @ 2015-07-29 18:46 UTC (permalink / raw)
  To: linux-arm-kernel

With the correct DMA API calls now integrated into the io-pgtable code,
let that handle the flushing of non-coherent page table updates.

Signed-off-by: Robin Murphy <robin.murphy@arm.com>
---
 drivers/iommu/ipmmu-vmsa.c | 19 +++++--------------
 1 file changed, 5 insertions(+), 14 deletions(-)

diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c
index 1a67c53..8cf605f 100644
--- a/drivers/iommu/ipmmu-vmsa.c
+++ b/drivers/iommu/ipmmu-vmsa.c
@@ -283,24 +283,10 @@ static void ipmmu_tlb_add_flush(unsigned long iova, size_t size, bool leaf,
 	/* The hardware doesn't support selective TLB flush. */
 }
 
-static void ipmmu_flush_pgtable(void *ptr, size_t size, void *cookie)
-{
-	unsigned long offset = (unsigned long)ptr & ~PAGE_MASK;
-	struct ipmmu_vmsa_domain *domain = cookie;
-
-	/*
-	 * TODO: Add support for coherent walk through CCI with DVM and remove
-	 * cache handling.
-	 */
-	dma_map_page(domain->mmu->dev, virt_to_page(ptr), offset, size,
-		     DMA_TO_DEVICE);
-}
-
 static struct iommu_gather_ops ipmmu_gather_ops = {
 	.tlb_flush_all = ipmmu_tlb_flush_all,
 	.tlb_add_flush = ipmmu_tlb_add_flush,
 	.tlb_sync = ipmmu_tlb_flush_all,
-	.flush_pgtable = ipmmu_flush_pgtable,
 };
 
 /* -----------------------------------------------------------------------------
@@ -327,6 +313,11 @@ static int ipmmu_domain_init_context(struct ipmmu_vmsa_domain *domain)
 	domain->cfg.ias = 32;
 	domain->cfg.oas = 40;
 	domain->cfg.tlb = &ipmmu_gather_ops;
+	/*
+	 * TODO: Add support for coherent walk through CCI with DVM and remove
+	 * cache handling. For now, delegate it to the io-pgtable code.
+	 */
+	domain->cfg.iommu_dev = domain->mmu->dev;
 
 	domain->iop = alloc_io_pgtable_ops(ARM_32_LPAE_S1, &domain->cfg,
 					   domain);
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH v2 6/9] iommu/io-pgtable-arm: Centralise sync points
  2015-07-29 18:46 ` Robin Murphy
@ 2015-07-29 18:46     ` Robin Murphy
  -1 siblings, 0 replies; 20+ messages in thread
From: Robin Murphy @ 2015-07-29 18:46 UTC (permalink / raw)
  To: will.deacon-5wv7dgnIgG8, laurent.pinchart+renesas-ryLnwIuWjnjg/C1BVhZhaw
  Cc: linux-lFZ/pmaqli7XmaaqVzeoHQ,
	iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r

With all current users now opted in to DMA API operations, make the
iommu_dev pointer mandatory, rendering the flush_pgtable callback
redundant for cache maintenance. However, since the DMA calls could be
nops in the case of a coherent IOMMU, we still need to ensure the page
table updates are fully synchronised against a subsequent page table
walk. In the unmap path, the TLB sync will usually need to do this
anyway, so just cement that requirement; in the map path which may
consist solely of cacheable memory writes (in the coherent case),
insert an appropriate barrier at the end of the operation, and obviate
the need to call flush_pgtable on every individual update for
synchronisation.

Signed-off-by: Robin Murphy <robin.murphy-5wv7dgnIgG8@public.gmane.org>
---
 drivers/iommu/io-pgtable-arm.c | 43 +++++++++++++++++++++++-------------------
 drivers/iommu/io-pgtable.h     |  3 ++-
 2 files changed, 26 insertions(+), 20 deletions(-)

diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index 28cca8a..0617687 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -26,6 +26,8 @@
 #include <linux/slab.h>
 #include <linux/types.h>
 
+#include <asm/barrier.h>
+
 #include "io-pgtable.h"
 
 #define ARM_LPAE_MAX_ADDR_BITS		48
@@ -215,7 +217,7 @@ static void *__arm_lpae_alloc_pages(size_t size, gfp_t gfp,
 	if (!pages)
 		return NULL;
 
-	if (dev) {
+	if (!selftest_running) {
 		dma = dma_map_single(dev, pages, size, DMA_TO_DEVICE);
 		if (dma_mapping_error(dev, dma))
 			goto out_free;
@@ -243,24 +245,22 @@ static void __arm_lpae_free_pages(void *pages, size_t size,
 {
 	struct device *dev = cfg->iommu_dev;
 
-	if (dev)
+	if (!selftest_running)
 		dma_unmap_single(dev, __arm_lpae_dma_addr(dev, pages),
 				 size, DMA_TO_DEVICE);
 	free_pages_exact(pages, size);
 }
 
 static void __arm_lpae_set_pte(arm_lpae_iopte *ptep, arm_lpae_iopte pte,
-			       struct io_pgtable_cfg *cfg, void *cookie)
+			       struct io_pgtable_cfg *cfg)
 {
 	struct device *dev = cfg->iommu_dev;
 
 	*ptep = pte;
 
-	if (dev)
+	if (!selftest_running)
 		dma_sync_single_for_device(dev, __arm_lpae_dma_addr(dev, ptep),
 					   sizeof(pte), DMA_TO_DEVICE);
-	else if (cfg->tlb->flush_pgtable)
-		cfg->tlb->flush_pgtable(ptep, sizeof(pte), cookie);
 }
 
 static int arm_lpae_init_pte(struct arm_lpae_io_pgtable *data,
@@ -288,7 +288,7 @@ static int arm_lpae_init_pte(struct arm_lpae_io_pgtable *data,
 	pte |= ARM_LPAE_PTE_AF | ARM_LPAE_PTE_SH_IS;
 	pte |= pfn_to_iopte(paddr >> data->pg_shift, data);
 
-	__arm_lpae_set_pte(ptep, pte, cfg, data->iop.cookie);
+	__arm_lpae_set_pte(ptep, pte, cfg);
 	return 0;
 }
 
@@ -297,7 +297,6 @@ static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova,
 			  int lvl, arm_lpae_iopte *ptep)
 {
 	arm_lpae_iopte *cptep, pte;
-	void *cookie = data->iop.cookie;
 	size_t block_size = ARM_LPAE_BLOCK_SIZE(lvl, data);
 	struct io_pgtable_cfg *cfg = &data->iop.cfg;
 
@@ -323,7 +322,7 @@ static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova,
 		pte = __pa(cptep) | ARM_LPAE_PTE_TYPE_TABLE;
 		if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS)
 			pte |= ARM_LPAE_PTE_NSTABLE;
-		__arm_lpae_set_pte(ptep, pte, cfg, cookie);
+		__arm_lpae_set_pte(ptep, pte, cfg);
 	} else {
 		cptep = iopte_deref(pte, data);
 	}
@@ -370,7 +369,7 @@ static int arm_lpae_map(struct io_pgtable_ops *ops, unsigned long iova,
 {
 	struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
 	arm_lpae_iopte *ptep = data->pgd;
-	int lvl = ARM_LPAE_START_LVL(data);
+	int ret, lvl = ARM_LPAE_START_LVL(data);
 	arm_lpae_iopte prot;
 
 	/* If no access, then nothing to do */
@@ -378,7 +377,14 @@ static int arm_lpae_map(struct io_pgtable_ops *ops, unsigned long iova,
 		return 0;
 
 	prot = arm_lpae_prot_to_pte(data, iommu_prot);
-	return __arm_lpae_map(data, iova, paddr, size, prot, lvl, ptep);
+	ret = __arm_lpae_map(data, iova, paddr, size, prot, lvl, ptep);
+	/*
+	 * Synchronise all PTE updates for the new mapping before there's
+	 * a chance for anything to kick off a table walk for the new iova.
+	 */
+	wmb();
+
+	return ret;
 }
 
 static void __arm_lpae_free_pgtable(struct arm_lpae_io_pgtable *data, int lvl,
@@ -428,7 +434,6 @@ static int arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data,
 	phys_addr_t blk_paddr;
 	arm_lpae_iopte table = 0;
 	struct io_pgtable_cfg *cfg = &data->iop.cfg;
-	void *cookie = data->iop.cookie;
 
 	blk_start = iova & ~(blk_size - 1);
 	blk_end = blk_start + blk_size;
@@ -454,9 +459,9 @@ static int arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data,
 		}
 	}
 
-	__arm_lpae_set_pte(ptep, table, cfg, cookie);
+	__arm_lpae_set_pte(ptep, table, cfg);
 	iova &= ~(blk_size - 1);
-	cfg->tlb->tlb_add_flush(iova, blk_size, true, cookie);
+	cfg->tlb->tlb_add_flush(iova, blk_size, true, data->iop.cookie);
 	return size;
 }
 
@@ -478,7 +483,7 @@ static int __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
 
 	/* If the size matches this level, we're in the right place */
 	if (size == blk_size) {
-		__arm_lpae_set_pte(ptep, 0, &data->iop.cfg, cookie);
+		__arm_lpae_set_pte(ptep, 0, &data->iop.cfg);
 
 		if (!iopte_leaf(pte, lvl)) {
 			/* Also flush any partial walks */
@@ -703,8 +708,8 @@ arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie)
 	if (!data->pgd)
 		goto out_free_data;
 
-	if (cfg->tlb->flush_pgtable)
-		cfg->tlb->flush_pgtable(data->pgd, data->pgd_size, cookie);
+	/* Ensure the empty pgd is visible before any actual TTBR write */
+	wmb();
 
 	/* TTBRs */
 	cfg->arm_lpae_s1_cfg.ttbr[0] = virt_to_phys(data->pgd);
@@ -792,8 +797,8 @@ arm_64_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie)
 	if (!data->pgd)
 		goto out_free_data;
 
-	if (cfg->tlb->flush_pgtable)
-		cfg->tlb->flush_pgtable(data->pgd, data->pgd_size, cookie);
+	/* Ensure the empty pgd is visible before any actual TTBR write */
+	wmb();
 
 	/* VTTBR */
 	cfg->arm_lpae_s2_cfg.vttbr = virt_to_phys(data->pgd);
diff --git a/drivers/iommu/io-pgtable.h b/drivers/iommu/io-pgtable.h
index c69529c..8f61286 100644
--- a/drivers/iommu/io-pgtable.h
+++ b/drivers/iommu/io-pgtable.h
@@ -17,7 +17,8 @@ enum io_pgtable_fmt {
  *
  * @tlb_flush_all: Synchronously invalidate the entire TLB context.
  * @tlb_add_flush: Queue up a TLB invalidation for a virtual address range.
- * @tlb_sync:      Ensure any queue TLB invalidation has taken effect.
+ * @tlb_sync:      Ensure any queued TLB invalidation has taken effect, and
+ *                 any corresponding page table updates are visible.
  * @flush_pgtable: Ensure page table updates are visible to the IOMMU.
  *
  * Note that these can all be called in atomic context and must therefore
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH v2 6/9] iommu/io-pgtable-arm: Centralise sync points
@ 2015-07-29 18:46     ` Robin Murphy
  0 siblings, 0 replies; 20+ messages in thread
From: Robin Murphy @ 2015-07-29 18:46 UTC (permalink / raw)
  To: linux-arm-kernel

With all current users now opted in to DMA API operations, make the
iommu_dev pointer mandatory, rendering the flush_pgtable callback
redundant for cache maintenance. However, since the DMA calls could be
nops in the case of a coherent IOMMU, we still need to ensure the page
table updates are fully synchronised against a subsequent page table
walk. In the unmap path, the TLB sync will usually need to do this
anyway, so just cement that requirement; in the map path which may
consist solely of cacheable memory writes (in the coherent case),
insert an appropriate barrier at the end of the operation, and obviate
the need to call flush_pgtable on every individual update for
synchronisation.

Signed-off-by: Robin Murphy <robin.murphy@arm.com>
---
 drivers/iommu/io-pgtable-arm.c | 43 +++++++++++++++++++++++-------------------
 drivers/iommu/io-pgtable.h     |  3 ++-
 2 files changed, 26 insertions(+), 20 deletions(-)

diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index 28cca8a..0617687 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -26,6 +26,8 @@
 #include <linux/slab.h>
 #include <linux/types.h>
 
+#include <asm/barrier.h>
+
 #include "io-pgtable.h"
 
 #define ARM_LPAE_MAX_ADDR_BITS		48
@@ -215,7 +217,7 @@ static void *__arm_lpae_alloc_pages(size_t size, gfp_t gfp,
 	if (!pages)
 		return NULL;
 
-	if (dev) {
+	if (!selftest_running) {
 		dma = dma_map_single(dev, pages, size, DMA_TO_DEVICE);
 		if (dma_mapping_error(dev, dma))
 			goto out_free;
@@ -243,24 +245,22 @@ static void __arm_lpae_free_pages(void *pages, size_t size,
 {
 	struct device *dev = cfg->iommu_dev;
 
-	if (dev)
+	if (!selftest_running)
 		dma_unmap_single(dev, __arm_lpae_dma_addr(dev, pages),
 				 size, DMA_TO_DEVICE);
 	free_pages_exact(pages, size);
 }
 
 static void __arm_lpae_set_pte(arm_lpae_iopte *ptep, arm_lpae_iopte pte,
-			       struct io_pgtable_cfg *cfg, void *cookie)
+			       struct io_pgtable_cfg *cfg)
 {
 	struct device *dev = cfg->iommu_dev;
 
 	*ptep = pte;
 
-	if (dev)
+	if (!selftest_running)
 		dma_sync_single_for_device(dev, __arm_lpae_dma_addr(dev, ptep),
 					   sizeof(pte), DMA_TO_DEVICE);
-	else if (cfg->tlb->flush_pgtable)
-		cfg->tlb->flush_pgtable(ptep, sizeof(pte), cookie);
 }
 
 static int arm_lpae_init_pte(struct arm_lpae_io_pgtable *data,
@@ -288,7 +288,7 @@ static int arm_lpae_init_pte(struct arm_lpae_io_pgtable *data,
 	pte |= ARM_LPAE_PTE_AF | ARM_LPAE_PTE_SH_IS;
 	pte |= pfn_to_iopte(paddr >> data->pg_shift, data);
 
-	__arm_lpae_set_pte(ptep, pte, cfg, data->iop.cookie);
+	__arm_lpae_set_pte(ptep, pte, cfg);
 	return 0;
 }
 
@@ -297,7 +297,6 @@ static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova,
 			  int lvl, arm_lpae_iopte *ptep)
 {
 	arm_lpae_iopte *cptep, pte;
-	void *cookie = data->iop.cookie;
 	size_t block_size = ARM_LPAE_BLOCK_SIZE(lvl, data);
 	struct io_pgtable_cfg *cfg = &data->iop.cfg;
 
@@ -323,7 +322,7 @@ static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova,
 		pte = __pa(cptep) | ARM_LPAE_PTE_TYPE_TABLE;
 		if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS)
 			pte |= ARM_LPAE_PTE_NSTABLE;
-		__arm_lpae_set_pte(ptep, pte, cfg, cookie);
+		__arm_lpae_set_pte(ptep, pte, cfg);
 	} else {
 		cptep = iopte_deref(pte, data);
 	}
@@ -370,7 +369,7 @@ static int arm_lpae_map(struct io_pgtable_ops *ops, unsigned long iova,
 {
 	struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
 	arm_lpae_iopte *ptep = data->pgd;
-	int lvl = ARM_LPAE_START_LVL(data);
+	int ret, lvl = ARM_LPAE_START_LVL(data);
 	arm_lpae_iopte prot;
 
 	/* If no access, then nothing to do */
@@ -378,7 +377,14 @@ static int arm_lpae_map(struct io_pgtable_ops *ops, unsigned long iova,
 		return 0;
 
 	prot = arm_lpae_prot_to_pte(data, iommu_prot);
-	return __arm_lpae_map(data, iova, paddr, size, prot, lvl, ptep);
+	ret = __arm_lpae_map(data, iova, paddr, size, prot, lvl, ptep);
+	/*
+	 * Synchronise all PTE updates for the new mapping before there's
+	 * a chance for anything to kick off a table walk for the new iova.
+	 */
+	wmb();
+
+	return ret;
 }
 
 static void __arm_lpae_free_pgtable(struct arm_lpae_io_pgtable *data, int lvl,
@@ -428,7 +434,6 @@ static int arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data,
 	phys_addr_t blk_paddr;
 	arm_lpae_iopte table = 0;
 	struct io_pgtable_cfg *cfg = &data->iop.cfg;
-	void *cookie = data->iop.cookie;
 
 	blk_start = iova & ~(blk_size - 1);
 	blk_end = blk_start + blk_size;
@@ -454,9 +459,9 @@ static int arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data,
 		}
 	}
 
-	__arm_lpae_set_pte(ptep, table, cfg, cookie);
+	__arm_lpae_set_pte(ptep, table, cfg);
 	iova &= ~(blk_size - 1);
-	cfg->tlb->tlb_add_flush(iova, blk_size, true, cookie);
+	cfg->tlb->tlb_add_flush(iova, blk_size, true, data->iop.cookie);
 	return size;
 }
 
@@ -478,7 +483,7 @@ static int __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
 
 	/* If the size matches this level, we're in the right place */
 	if (size == blk_size) {
-		__arm_lpae_set_pte(ptep, 0, &data->iop.cfg, cookie);
+		__arm_lpae_set_pte(ptep, 0, &data->iop.cfg);
 
 		if (!iopte_leaf(pte, lvl)) {
 			/* Also flush any partial walks */
@@ -703,8 +708,8 @@ arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie)
 	if (!data->pgd)
 		goto out_free_data;
 
-	if (cfg->tlb->flush_pgtable)
-		cfg->tlb->flush_pgtable(data->pgd, data->pgd_size, cookie);
+	/* Ensure the empty pgd is visible before any actual TTBR write */
+	wmb();
 
 	/* TTBRs */
 	cfg->arm_lpae_s1_cfg.ttbr[0] = virt_to_phys(data->pgd);
@@ -792,8 +797,8 @@ arm_64_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie)
 	if (!data->pgd)
 		goto out_free_data;
 
-	if (cfg->tlb->flush_pgtable)
-		cfg->tlb->flush_pgtable(data->pgd, data->pgd_size, cookie);
+	/* Ensure the empty pgd is visible before any actual TTBR write */
+	wmb();
 
 	/* VTTBR */
 	cfg->arm_lpae_s2_cfg.vttbr = virt_to_phys(data->pgd);
diff --git a/drivers/iommu/io-pgtable.h b/drivers/iommu/io-pgtable.h
index c69529c..8f61286 100644
--- a/drivers/iommu/io-pgtable.h
+++ b/drivers/iommu/io-pgtable.h
@@ -17,7 +17,8 @@ enum io_pgtable_fmt {
  *
  * @tlb_flush_all: Synchronously invalidate the entire TLB context.
  * @tlb_add_flush: Queue up a TLB invalidation for a virtual address range.
- * @tlb_sync:      Ensure any queue TLB invalidation has taken effect.
+ * @tlb_sync:      Ensure any queued TLB invalidation has taken effect, and
+ *                 any corresponding page table updates are visible.
  * @flush_pgtable: Ensure page table updates are visible to the IOMMU.
  *
  * Note that these can all be called in atomic context and must therefore
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH v2 7/9] iommu/arm-smmu: Remove arm_smmu_flush_pgtable()
  2015-07-29 18:46 ` Robin Murphy
@ 2015-07-29 18:46     ` Robin Murphy
  -1 siblings, 0 replies; 20+ messages in thread
From: Robin Murphy @ 2015-07-29 18:46 UTC (permalink / raw)
  To: will.deacon-5wv7dgnIgG8, laurent.pinchart+renesas-ryLnwIuWjnjg/C1BVhZhaw
  Cc: linux-lFZ/pmaqli7XmaaqVzeoHQ,
	iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r

With the io-pgtable code now enforcing its own appropriate sync points,
the vestigial flush_pgtable callback becomes entirely redundant, so
remove it altogether.

Signed-off-by: Robin Murphy <robin.murphy-5wv7dgnIgG8@public.gmane.org>
---
 drivers/iommu/arm-smmu.c | 13 -------------
 1 file changed, 13 deletions(-)

diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index 5770ab9..48a39df 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -608,23 +608,10 @@ static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
 	}
 }
 
-static void arm_smmu_flush_pgtable(void *addr, size_t size, void *cookie)
-{
-	struct arm_smmu_domain *smmu_domain = cookie;
-
-	/*
-	 * Ensure new page tables are visible to a coherent hardware walker.
-	 * The page table code deals with flushing for the non-coherent case.
-	 */
-	if (smmu_domain->smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
-		dsb(ishst);
-}
-
 static struct iommu_gather_ops arm_smmu_gather_ops = {
 	.tlb_flush_all	= arm_smmu_tlb_inv_context,
 	.tlb_add_flush	= arm_smmu_tlb_inv_range_nosync,
 	.tlb_sync	= arm_smmu_tlb_sync,
-	.flush_pgtable	= arm_smmu_flush_pgtable,
 };
 
 static irqreturn_t arm_smmu_context_fault(int irq, void *dev)
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH v2 7/9] iommu/arm-smmu: Remove arm_smmu_flush_pgtable()
@ 2015-07-29 18:46     ` Robin Murphy
  0 siblings, 0 replies; 20+ messages in thread
From: Robin Murphy @ 2015-07-29 18:46 UTC (permalink / raw)
  To: linux-arm-kernel

With the io-pgtable code now enforcing its own appropriate sync points,
the vestigial flush_pgtable callback becomes entirely redundant, so
remove it altogether.

Signed-off-by: Robin Murphy <robin.murphy@arm.com>
---
 drivers/iommu/arm-smmu.c | 13 -------------
 1 file changed, 13 deletions(-)

diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index 5770ab9..48a39df 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -608,23 +608,10 @@ static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
 	}
 }
 
-static void arm_smmu_flush_pgtable(void *addr, size_t size, void *cookie)
-{
-	struct arm_smmu_domain *smmu_domain = cookie;
-
-	/*
-	 * Ensure new page tables are visible to a coherent hardware walker.
-	 * The page table code deals with flushing for the non-coherent case.
-	 */
-	if (smmu_domain->smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
-		dsb(ishst);
-}
-
 static struct iommu_gather_ops arm_smmu_gather_ops = {
 	.tlb_flush_all	= arm_smmu_tlb_inv_context,
 	.tlb_add_flush	= arm_smmu_tlb_inv_range_nosync,
 	.tlb_sync	= arm_smmu_tlb_sync,
-	.flush_pgtable	= arm_smmu_flush_pgtable,
 };
 
 static irqreturn_t arm_smmu_context_fault(int irq, void *dev)
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH v2 8/9] iommu/arm-smmu-v3: Remove arm_smmu_flush_pgtable()
  2015-07-29 18:46 ` Robin Murphy
@ 2015-07-29 18:46     ` Robin Murphy
  -1 siblings, 0 replies; 20+ messages in thread
From: Robin Murphy @ 2015-07-29 18:46 UTC (permalink / raw)
  To: will.deacon-5wv7dgnIgG8, laurent.pinchart+renesas-ryLnwIuWjnjg/C1BVhZhaw
  Cc: linux-lFZ/pmaqli7XmaaqVzeoHQ,
	iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r

With the io-pgtable code now enforcing its own appropriate sync points,
the vestigial flush_pgtable callback becomes entirely redundant, so
remove it altogether.

Signed-off-by: Robin Murphy <robin.murphy-5wv7dgnIgG8@public.gmane.org>
---
 drivers/iommu/arm-smmu-v3.c | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index ac29d24..0c453fb 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -1301,20 +1301,10 @@ static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
 	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
 }
 
-static void arm_smmu_flush_pgtable(void *addr, size_t size, void *cookie)
-{
-	struct arm_smmu_domain *smmu_domain = cookie;
-
-	/* The page table code handles flushing in the non-coherent case */
-	if (smmu_domain->smmu->features & ARM_SMMU_FEAT_COHERENCY)
-		dsb(ishst);
-}
-
 static struct iommu_gather_ops arm_smmu_gather_ops = {
 	.tlb_flush_all	= arm_smmu_tlb_inv_context,
 	.tlb_add_flush	= arm_smmu_tlb_inv_range_nosync,
 	.tlb_sync	= arm_smmu_tlb_sync,
-	.flush_pgtable	= arm_smmu_flush_pgtable,
 };
 
 /* IOMMU API */
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH v2 8/9] iommu/arm-smmu-v3: Remove arm_smmu_flush_pgtable()
@ 2015-07-29 18:46     ` Robin Murphy
  0 siblings, 0 replies; 20+ messages in thread
From: Robin Murphy @ 2015-07-29 18:46 UTC (permalink / raw)
  To: linux-arm-kernel

With the io-pgtable code now enforcing its own appropriate sync points,
the vestigial flush_pgtable callback becomes entirely redundant, so
remove it altogether.

Signed-off-by: Robin Murphy <robin.murphy@arm.com>
---
 drivers/iommu/arm-smmu-v3.c | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index ac29d24..0c453fb 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -1301,20 +1301,10 @@ static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
 	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
 }
 
-static void arm_smmu_flush_pgtable(void *addr, size_t size, void *cookie)
-{
-	struct arm_smmu_domain *smmu_domain = cookie;
-
-	/* The page table code handles flushing in the non-coherent case */
-	if (smmu_domain->smmu->features & ARM_SMMU_FEAT_COHERENCY)
-		dsb(ishst);
-}
-
 static struct iommu_gather_ops arm_smmu_gather_ops = {
 	.tlb_flush_all	= arm_smmu_tlb_inv_context,
 	.tlb_add_flush	= arm_smmu_tlb_inv_range_nosync,
 	.tlb_sync	= arm_smmu_tlb_sync,
-	.flush_pgtable	= arm_smmu_flush_pgtable,
 };
 
 /* IOMMU API */
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH v2 9/9] iommu/io-pgtable: Remove flush_pgtable callback
  2015-07-29 18:46 ` Robin Murphy
@ 2015-07-29 18:46     ` Robin Murphy
  -1 siblings, 0 replies; 20+ messages in thread
From: Robin Murphy @ 2015-07-29 18:46 UTC (permalink / raw)
  To: will.deacon-5wv7dgnIgG8, laurent.pinchart+renesas-ryLnwIuWjnjg/C1BVhZhaw
  Cc: linux-lFZ/pmaqli7XmaaqVzeoHQ,
	iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r

With the users fully converted to DMA API operations, it's dead, Jim.

Signed-off-by: Robin Murphy <robin.murphy-5wv7dgnIgG8@public.gmane.org>
---
 drivers/iommu/io-pgtable-arm.c | 6 ------
 drivers/iommu/io-pgtable.h     | 2 --
 2 files changed, 8 deletions(-)

diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index 0617687..e4bc2b2 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -884,16 +884,10 @@ static void dummy_tlb_sync(void *cookie)
 	WARN_ON(cookie != cfg_cookie);
 }
 
-static void dummy_flush_pgtable(void *ptr, size_t size, void *cookie)
-{
-	WARN_ON(cookie != cfg_cookie);
-}
-
 static struct iommu_gather_ops dummy_tlb_ops __initdata = {
 	.tlb_flush_all	= dummy_tlb_flush_all,
 	.tlb_add_flush	= dummy_tlb_add_flush,
 	.tlb_sync	= dummy_tlb_sync,
-	.flush_pgtable	= dummy_flush_pgtable,
 };
 
 static void __init arm_lpae_dump_ops(struct io_pgtable_ops *ops)
diff --git a/drivers/iommu/io-pgtable.h b/drivers/iommu/io-pgtable.h
index 8f61286..68c63d9 100644
--- a/drivers/iommu/io-pgtable.h
+++ b/drivers/iommu/io-pgtable.h
@@ -19,7 +19,6 @@ enum io_pgtable_fmt {
  * @tlb_add_flush: Queue up a TLB invalidation for a virtual address range.
  * @tlb_sync:      Ensure any queued TLB invalidation has taken effect, and
  *                 any corresponding page table updates are visible.
- * @flush_pgtable: Ensure page table updates are visible to the IOMMU.
  *
  * Note that these can all be called in atomic context and must therefore
  * not block.
@@ -29,7 +28,6 @@ struct iommu_gather_ops {
 	void (*tlb_add_flush)(unsigned long iova, size_t size, bool leaf,
 			      void *cookie);
 	void (*tlb_sync)(void *cookie);
-	void (*flush_pgtable)(void *ptr, size_t size, void *cookie);
 };
 
 /**
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH v2 9/9] iommu/io-pgtable: Remove flush_pgtable callback
@ 2015-07-29 18:46     ` Robin Murphy
  0 siblings, 0 replies; 20+ messages in thread
From: Robin Murphy @ 2015-07-29 18:46 UTC (permalink / raw)
  To: linux-arm-kernel

With the users fully converted to DMA API operations, it's dead, Jim.

Signed-off-by: Robin Murphy <robin.murphy@arm.com>
---
 drivers/iommu/io-pgtable-arm.c | 6 ------
 drivers/iommu/io-pgtable.h     | 2 --
 2 files changed, 8 deletions(-)

diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index 0617687..e4bc2b2 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -884,16 +884,10 @@ static void dummy_tlb_sync(void *cookie)
 	WARN_ON(cookie != cfg_cookie);
 }
 
-static void dummy_flush_pgtable(void *ptr, size_t size, void *cookie)
-{
-	WARN_ON(cookie != cfg_cookie);
-}
-
 static struct iommu_gather_ops dummy_tlb_ops __initdata = {
 	.tlb_flush_all	= dummy_tlb_flush_all,
 	.tlb_add_flush	= dummy_tlb_add_flush,
 	.tlb_sync	= dummy_tlb_sync,
-	.flush_pgtable	= dummy_flush_pgtable,
 };
 
 static void __init arm_lpae_dump_ops(struct io_pgtable_ops *ops)
diff --git a/drivers/iommu/io-pgtable.h b/drivers/iommu/io-pgtable.h
index 8f61286..68c63d9 100644
--- a/drivers/iommu/io-pgtable.h
+++ b/drivers/iommu/io-pgtable.h
@@ -19,7 +19,6 @@ enum io_pgtable_fmt {
  * @tlb_add_flush: Queue up a TLB invalidation for a virtual address range.
  * @tlb_sync:      Ensure any queued TLB invalidation has taken effect, and
  *                 any corresponding page table updates are visible.
- * @flush_pgtable: Ensure page table updates are visible to the IOMMU.
  *
  * Note that these can all be called in atomic context and must therefore
  * not block.
@@ -29,7 +28,6 @@ struct iommu_gather_ops {
 	void (*tlb_add_flush)(unsigned long iova, size_t size, bool leaf,
 			      void *cookie);
 	void (*tlb_sync)(void *cookie);
-	void (*flush_pgtable)(void *ptr, size_t size, void *cookie);
 };
 
 /**
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* Re: [PATCH v2 1/9] iommu/io-pgtable-arm: Allow appropriate DMA API use
  2015-07-29 18:46 ` Robin Murphy
@ 2015-07-31 11:22     ` Will Deacon
  -1 siblings, 0 replies; 20+ messages in thread
From: Will Deacon @ 2015-07-31 11:22 UTC (permalink / raw)
  To: Robin Murphy
  Cc: laurent.pinchart+renesas-ryLnwIuWjnjg/C1BVhZhaw,
	linux-lFZ/pmaqli7XmaaqVzeoHQ,
	iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r

Hi Robin,

On Wed, Jul 29, 2015 at 07:46:04PM +0100, Robin Murphy wrote:
> Currently, users of the LPAE page table code are (ab)using dma_map_page()
> as a means to flush page table updates for non-coherent IOMMUs. Since
> from the CPU's point of view, creating IOMMU page tables *is* passing
> DMA buffers to a device (the IOMMU's page table walker), there's little
> reason not to use the DMA API correctly.
> 
> Allow IOMMU drivers to opt into DMA API operations for page table
> allocation and updates by providing their appropriate device pointer.
> The expectation is that an LPAE IOMMU should have a full view of system
> memory, so use streaming mappings to avoid unnecessary pressure on
> ZONE_DMA, and treat any DMA translation as a warning sign.
> 
> Signed-off-by: Robin Murphy <robin.murphy-5wv7dgnIgG8@public.gmane.org>
> ---
> 
> Changes since v1[1]:
> - Make device pointer mandatory and use DMA API unconditionally
> - Remove flush_pgtable callback entirely
> - Style, consistency and typo fixes

I think this is looking good now, thanks. I'll add it to my ARM SMMU queue
for 4.3.

Will

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH v2 1/9] iommu/io-pgtable-arm: Allow appropriate DMA API use
@ 2015-07-31 11:22     ` Will Deacon
  0 siblings, 0 replies; 20+ messages in thread
From: Will Deacon @ 2015-07-31 11:22 UTC (permalink / raw)
  To: linux-arm-kernel

Hi Robin,

On Wed, Jul 29, 2015 at 07:46:04PM +0100, Robin Murphy wrote:
> Currently, users of the LPAE page table code are (ab)using dma_map_page()
> as a means to flush page table updates for non-coherent IOMMUs. Since
> from the CPU's point of view, creating IOMMU page tables *is* passing
> DMA buffers to a device (the IOMMU's page table walker), there's little
> reason not to use the DMA API correctly.
> 
> Allow IOMMU drivers to opt into DMA API operations for page table
> allocation and updates by providing their appropriate device pointer.
> The expectation is that an LPAE IOMMU should have a full view of system
> memory, so use streaming mappings to avoid unnecessary pressure on
> ZONE_DMA, and treat any DMA translation as a warning sign.
> 
> Signed-off-by: Robin Murphy <robin.murphy@arm.com>
> ---
> 
> Changes since v1[1]:
> - Make device pointer mandatory and use DMA API unconditionally
> - Remove flush_pgtable callback entirely
> - Style, consistency and typo fixes

I think this is looking good now, thanks. I'll add it to my ARM SMMU queue
for 4.3.

Will

^ permalink raw reply	[flat|nested] 20+ messages in thread

end of thread, other threads:[~2015-07-31 11:22 UTC | newest]

Thread overview: 20+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-07-29 18:46 [PATCH v2 1/9] iommu/io-pgtable-arm: Allow appropriate DMA API use Robin Murphy
2015-07-29 18:46 ` Robin Murphy
     [not found] ` <6999933bfa9b3d50853fd023d455c837a0d7ff23.1438195011.git.robin.murphy-5wv7dgnIgG8@public.gmane.org>
2015-07-29 18:46   ` [PATCH v2 2/9] iommu/arm-smmu: Sort out coherency Robin Murphy
2015-07-29 18:46     ` Robin Murphy
2015-07-29 18:46   ` [PATCH v2 3/9] iommu/arm-smmu: Clean up DMA API usage Robin Murphy
2015-07-29 18:46     ` Robin Murphy
2015-07-29 18:46   ` [PATCH v2 4/9] iommu/arm-smmu-v3: " Robin Murphy
2015-07-29 18:46     ` Robin Murphy
2015-07-29 18:46   ` [PATCH v2 5/9] iommu/ipmmu-vmsa: " Robin Murphy
2015-07-29 18:46     ` Robin Murphy
2015-07-29 18:46   ` [PATCH v2 6/9] iommu/io-pgtable-arm: Centralise sync points Robin Murphy
2015-07-29 18:46     ` Robin Murphy
2015-07-29 18:46   ` [PATCH v2 7/9] iommu/arm-smmu: Remove arm_smmu_flush_pgtable() Robin Murphy
2015-07-29 18:46     ` Robin Murphy
2015-07-29 18:46   ` [PATCH v2 8/9] iommu/arm-smmu-v3: " Robin Murphy
2015-07-29 18:46     ` Robin Murphy
2015-07-29 18:46   ` [PATCH v2 9/9] iommu/io-pgtable: Remove flush_pgtable callback Robin Murphy
2015-07-29 18:46     ` Robin Murphy
2015-07-31 11:22   ` [PATCH v2 1/9] iommu/io-pgtable-arm: Allow appropriate DMA API use Will Deacon
2015-07-31 11:22     ` Will Deacon

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.