All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH RESEND 0/3] IOVA allocation improvements for iommu-dma
@ 2017-03-31 14:46 ` Robin Murphy
  0 siblings, 0 replies; 20+ messages in thread
From: Robin Murphy @ 2017-03-31 14:46 UTC (permalink / raw)
  To: joro-zLv9SwRftAIdnm+yROfE0A
  Cc: iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r

Hi Joerg,

Here's the series rebased onto your current next branch just for the
sake of it, and with the tags added. No other changes.

Thanks,
Robin.

Robin Murphy (3):
  iommu/dma: Convert to address-based allocation
  iommu/dma: Clean up MSI IOVA allocation
  iommu/dma: Plumb in the per-CPU IOVA caches

 drivers/iommu/dma-iommu.c | 176 ++++++++++++++++++++++++----------------------
 1 file changed, 90 insertions(+), 86 deletions(-)

-- 
2.11.0.dirty

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH RESEND 0/3] IOVA allocation improvements for iommu-dma
@ 2017-03-31 14:46 ` Robin Murphy
  0 siblings, 0 replies; 20+ messages in thread
From: Robin Murphy @ 2017-03-31 14:46 UTC (permalink / raw)
  To: linux-arm-kernel

Hi Joerg,

Here's the series rebased onto your current next branch just for the
sake of it, and with the tags added. No other changes.

Thanks,
Robin.

Robin Murphy (3):
  iommu/dma: Convert to address-based allocation
  iommu/dma: Clean up MSI IOVA allocation
  iommu/dma: Plumb in the per-CPU IOVA caches

 drivers/iommu/dma-iommu.c | 176 ++++++++++++++++++++++++----------------------
 1 file changed, 90 insertions(+), 86 deletions(-)

-- 
2.11.0.dirty

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH RESEND 1/3] iommu/dma: Convert to address-based allocation
  2017-03-31 14:46 ` Robin Murphy
@ 2017-03-31 14:46     ` Robin Murphy
  -1 siblings, 0 replies; 20+ messages in thread
From: Robin Murphy @ 2017-03-31 14:46 UTC (permalink / raw)
  To: joro-zLv9SwRftAIdnm+yROfE0A
  Cc: iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r

In preparation for some IOVA allocation improvements, clean up all the
explicit struct iova usage such that all our mapping, unmapping and
cleanup paths deal exclusively with addresses rather than implementation
details. In the process, a few of the things we're touching get renamed
for the sake of internal consistency.

Reviewed-by: Nate Watterson <nwatters-sgV2jX0FEOL9JmXXK+q4OQ@public.gmane.org>
Tested-by: Nate Watterson <nwatters-sgV2jX0FEOL9JmXXK+q4OQ@public.gmane.org>
Signed-off-by: Robin Murphy <robin.murphy-5wv7dgnIgG8@public.gmane.org>
---
 drivers/iommu/dma-iommu.c | 119 ++++++++++++++++++++++++++--------------------
 1 file changed, 67 insertions(+), 52 deletions(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 85652110c8ff..8e0b684da1ba 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -365,12 +365,12 @@ int dma_info_to_prot(enum dma_data_direction dir, bool coherent,
 	}
 }
 
-static struct iova *__alloc_iova(struct iommu_domain *domain, size_t size,
-		dma_addr_t dma_limit, struct device *dev)
+static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain *domain,
+		size_t size, dma_addr_t dma_limit, struct device *dev)
 {
 	struct iova_domain *iovad = cookie_iovad(domain);
 	unsigned long shift = iova_shift(iovad);
-	unsigned long length = iova_align(iovad, size) >> shift;
+	unsigned long iova_len = size >> shift;
 	struct iova *iova = NULL;
 
 	if (domain->geometry.force_aperture)
@@ -378,35 +378,42 @@ static struct iova *__alloc_iova(struct iommu_domain *domain, size_t size,
 
 	/* Try to get PCI devices a SAC address */
 	if (dma_limit > DMA_BIT_MASK(32) && dev_is_pci(dev))
-		iova = alloc_iova(iovad, length, DMA_BIT_MASK(32) >> shift,
+		iova = alloc_iova(iovad, iova_len, DMA_BIT_MASK(32) >> shift,
 				  true);
 	/*
 	 * Enforce size-alignment to be safe - there could perhaps be an
 	 * attribute to control this per-device, or at least per-domain...
 	 */
 	if (!iova)
-		iova = alloc_iova(iovad, length, dma_limit >> shift, true);
+		iova = alloc_iova(iovad, iova_len, dma_limit >> shift, true);
 
-	return iova;
+	return (dma_addr_t)iova->pfn_lo << shift;
 }
 
-/* The IOVA allocator knows what we mapped, so just unmap whatever that was */
-static void __iommu_dma_unmap(struct iommu_domain *domain, dma_addr_t dma_addr)
+static void iommu_dma_free_iova(struct iommu_dma_cookie *cookie,
+		dma_addr_t iova, size_t size)
 {
-	struct iova_domain *iovad = cookie_iovad(domain);
-	unsigned long shift = iova_shift(iovad);
-	unsigned long pfn = dma_addr >> shift;
-	struct iova *iova = find_iova(iovad, pfn);
-	size_t size;
+	struct iova_domain *iovad = &cookie->iovad;
+	struct iova *iova_rbnode;
 
-	if (WARN_ON(!iova))
+	iova_rbnode = find_iova(iovad, iova_pfn(iovad, iova));
+	if (WARN_ON(!iova_rbnode))
 		return;
 
-	size = iova_size(iova) << shift;
-	size -= iommu_unmap(domain, pfn << shift, size);
-	/* ...and if we can't, then something is horribly, horribly wrong */
-	WARN_ON(size > 0);
-	__free_iova(iovad, iova);
+	__free_iova(iovad, iova_rbnode);
+}
+
+static void __iommu_dma_unmap(struct iommu_domain *domain, dma_addr_t dma_addr,
+		size_t size)
+{
+	struct iova_domain *iovad = cookie_iovad(domain);
+	size_t iova_off = iova_offset(iovad, dma_addr);
+
+	dma_addr -= iova_off;
+	size = iova_align(iovad, size + iova_off);
+
+	WARN_ON(iommu_unmap(domain, dma_addr, size) != size);
+	iommu_dma_free_iova(domain->iova_cookie, dma_addr, size);
 }
 
 static void __iommu_dma_free_pages(struct page **pages, int count)
@@ -488,7 +495,7 @@ static struct page **__iommu_dma_alloc_pages(unsigned int count,
 void iommu_dma_free(struct device *dev, struct page **pages, size_t size,
 		dma_addr_t *handle)
 {
-	__iommu_dma_unmap(iommu_get_domain_for_dev(dev), *handle);
+	__iommu_dma_unmap(iommu_get_domain_for_dev(dev), *handle, size);
 	__iommu_dma_free_pages(pages, PAGE_ALIGN(size) >> PAGE_SHIFT);
 	*handle = DMA_ERROR_CODE;
 }
@@ -516,11 +523,11 @@ struct page **iommu_dma_alloc(struct device *dev, size_t size, gfp_t gfp,
 		void (*flush_page)(struct device *, const void *, phys_addr_t))
 {
 	struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
-	struct iova_domain *iovad = cookie_iovad(domain);
-	struct iova *iova;
+	struct iommu_dma_cookie *cookie = domain->iova_cookie;
+	struct iova_domain *iovad = &cookie->iovad;
 	struct page **pages;
 	struct sg_table sgt;
-	dma_addr_t dma_addr;
+	dma_addr_t iova;
 	unsigned int count, min_size, alloc_sizes = domain->pgsize_bitmap;
 
 	*handle = DMA_ERROR_CODE;
@@ -540,11 +547,11 @@ struct page **iommu_dma_alloc(struct device *dev, size_t size, gfp_t gfp,
 	if (!pages)
 		return NULL;
 
-	iova = __alloc_iova(domain, size, dev->coherent_dma_mask, dev);
+	size = iova_align(iovad, size);
+	iova = iommu_dma_alloc_iova(domain, size, dev->coherent_dma_mask, dev);
 	if (!iova)
 		goto out_free_pages;
 
-	size = iova_align(iovad, size);
 	if (sg_alloc_table_from_pages(&sgt, pages, count, 0, size, GFP_KERNEL))
 		goto out_free_iova;
 
@@ -560,19 +567,18 @@ struct page **iommu_dma_alloc(struct device *dev, size_t size, gfp_t gfp,
 		sg_miter_stop(&miter);
 	}
 
-	dma_addr = iova_dma_addr(iovad, iova);
-	if (iommu_map_sg(domain, dma_addr, sgt.sgl, sgt.orig_nents, prot)
+	if (iommu_map_sg(domain, iova, sgt.sgl, sgt.orig_nents, prot)
 			< size)
 		goto out_free_sg;
 
-	*handle = dma_addr;
+	*handle = iova;
 	sg_free_table(&sgt);
 	return pages;
 
 out_free_sg:
 	sg_free_table(&sgt);
 out_free_iova:
-	__free_iova(iovad, iova);
+	iommu_dma_free_iova(cookie, iova, size);
 out_free_pages:
 	__iommu_dma_free_pages(pages, count);
 	return NULL;
@@ -606,22 +612,22 @@ int iommu_dma_mmap(struct page **pages, size_t size, struct vm_area_struct *vma)
 static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys,
 		size_t size, int prot)
 {
-	dma_addr_t dma_addr;
 	struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
-	struct iova_domain *iovad = cookie_iovad(domain);
+	struct iommu_dma_cookie *cookie = domain->iova_cookie;
+	struct iova_domain *iovad = &cookie->iovad;
 	size_t iova_off = iova_offset(iovad, phys);
-	size_t len = iova_align(iovad, size + iova_off);
-	struct iova *iova = __alloc_iova(domain, len, dma_get_mask(dev), dev);
+	dma_addr_t iova;
 
+	size = iova_align(iovad, size + iova_off);
+	iova = iommu_dma_alloc_iova(domain, size, dma_get_mask(dev), dev);
 	if (!iova)
 		return DMA_ERROR_CODE;
 
-	dma_addr = iova_dma_addr(iovad, iova);
-	if (iommu_map(domain, dma_addr, phys - iova_off, len, prot)) {
-		__free_iova(iovad, iova);
+	if (iommu_map(domain, iova, phys - iova_off, size, prot)) {
+		iommu_dma_free_iova(cookie, iova, size);
 		return DMA_ERROR_CODE;
 	}
-	return dma_addr + iova_off;
+	return iova + iova_off;
 }
 
 dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page,
@@ -633,7 +639,7 @@ dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page,
 void iommu_dma_unmap_page(struct device *dev, dma_addr_t handle, size_t size,
 		enum dma_data_direction dir, unsigned long attrs)
 {
-	__iommu_dma_unmap(iommu_get_domain_for_dev(dev), handle);
+	__iommu_dma_unmap(iommu_get_domain_for_dev(dev), handle, size);
 }
 
 /*
@@ -722,10 +728,10 @@ int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg,
 		int nents, int prot)
 {
 	struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
-	struct iova_domain *iovad = cookie_iovad(domain);
-	struct iova *iova;
+	struct iommu_dma_cookie *cookie = domain->iova_cookie;
+	struct iova_domain *iovad = &cookie->iovad;
 	struct scatterlist *s, *prev = NULL;
-	dma_addr_t dma_addr;
+	dma_addr_t iova;
 	size_t iova_len = 0;
 	unsigned long mask = dma_get_seg_boundary(dev);
 	int i;
@@ -769,7 +775,7 @@ int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg,
 		prev = s;
 	}
 
-	iova = __alloc_iova(domain, iova_len, dma_get_mask(dev), dev);
+	iova = iommu_dma_alloc_iova(domain, iova_len, dma_get_mask(dev), dev);
 	if (!iova)
 		goto out_restore_sg;
 
@@ -777,14 +783,13 @@ int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg,
 	 * We'll leave any physical concatenation to the IOMMU driver's
 	 * implementation - it knows better than we do.
 	 */
-	dma_addr = iova_dma_addr(iovad, iova);
-	if (iommu_map_sg(domain, dma_addr, sg, nents, prot) < iova_len)
+	if (iommu_map_sg(domain, iova, sg, nents, prot) < iova_len)
 		goto out_free_iova;
 
-	return __finalise_sg(dev, sg, nents, dma_addr);
+	return __finalise_sg(dev, sg, nents, iova);
 
 out_free_iova:
-	__free_iova(iovad, iova);
+	iommu_dma_free_iova(cookie, iova, iova_len);
 out_restore_sg:
 	__invalidate_sg(sg, nents);
 	return 0;
@@ -793,11 +798,21 @@ int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg,
 void iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents,
 		enum dma_data_direction dir, unsigned long attrs)
 {
+	dma_addr_t start, end;
+	struct scatterlist *tmp;
+	int i;
 	/*
 	 * The scatterlist segments are mapped into a single
 	 * contiguous IOVA allocation, so this is incredibly easy.
 	 */
-	__iommu_dma_unmap(iommu_get_domain_for_dev(dev), sg_dma_address(sg));
+	start = sg_dma_address(sg);
+	for_each_sg(sg_next(sg), tmp, nents - 1, i) {
+		if (sg_dma_len(tmp) == 0)
+			break;
+		sg = tmp;
+	}
+	end = sg_dma_address(sg) + sg_dma_len(sg);
+	__iommu_dma_unmap(iommu_get_domain_for_dev(dev), start, end - start);
 }
 
 dma_addr_t iommu_dma_map_resource(struct device *dev, phys_addr_t phys,
@@ -810,7 +825,7 @@ dma_addr_t iommu_dma_map_resource(struct device *dev, phys_addr_t phys,
 void iommu_dma_unmap_resource(struct device *dev, dma_addr_t handle,
 		size_t size, enum dma_data_direction dir, unsigned long attrs)
 {
-	__iommu_dma_unmap(iommu_get_domain_for_dev(dev), handle);
+	__iommu_dma_unmap(iommu_get_domain_for_dev(dev), handle, size);
 }
 
 int iommu_dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
@@ -824,7 +839,7 @@ static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev,
 	struct iommu_dma_cookie *cookie = domain->iova_cookie;
 	struct iommu_dma_msi_page *msi_page;
 	struct iova_domain *iovad = cookie_iovad(domain);
-	struct iova *iova;
+	dma_addr_t iova;
 	int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
 	size_t size = cookie_msi_granule(cookie);
 
@@ -839,10 +854,10 @@ static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev,
 
 	msi_page->phys = msi_addr;
 	if (iovad) {
-		iova = __alloc_iova(domain, size, dma_get_mask(dev), dev);
+		iova = iommu_dma_alloc_iova(domain, size, dma_get_mask(dev), dev);
 		if (!iova)
 			goto out_free_page;
-		msi_page->iova = iova_dma_addr(iovad, iova);
+		msi_page->iova = iova;
 	} else {
 		msi_page->iova = cookie->msi_iova;
 		cookie->msi_iova += size;
@@ -857,7 +872,7 @@ static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev,
 
 out_free_iova:
 	if (iovad)
-		__free_iova(iovad, iova);
+		iommu_dma_free_iova(cookie, iova, size);
 	else
 		cookie->msi_iova -= size;
 out_free_page:
-- 
2.11.0.dirty

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH RESEND 1/3] iommu/dma: Convert to address-based allocation
@ 2017-03-31 14:46     ` Robin Murphy
  0 siblings, 0 replies; 20+ messages in thread
From: Robin Murphy @ 2017-03-31 14:46 UTC (permalink / raw)
  To: linux-arm-kernel

In preparation for some IOVA allocation improvements, clean up all the
explicit struct iova usage such that all our mapping, unmapping and
cleanup paths deal exclusively with addresses rather than implementation
details. In the process, a few of the things we're touching get renamed
for the sake of internal consistency.

Reviewed-by: Nate Watterson <nwatters@codeaurora.org>
Tested-by: Nate Watterson <nwatters@codeaurora.org>
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
---
 drivers/iommu/dma-iommu.c | 119 ++++++++++++++++++++++++++--------------------
 1 file changed, 67 insertions(+), 52 deletions(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 85652110c8ff..8e0b684da1ba 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -365,12 +365,12 @@ int dma_info_to_prot(enum dma_data_direction dir, bool coherent,
 	}
 }
 
-static struct iova *__alloc_iova(struct iommu_domain *domain, size_t size,
-		dma_addr_t dma_limit, struct device *dev)
+static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain *domain,
+		size_t size, dma_addr_t dma_limit, struct device *dev)
 {
 	struct iova_domain *iovad = cookie_iovad(domain);
 	unsigned long shift = iova_shift(iovad);
-	unsigned long length = iova_align(iovad, size) >> shift;
+	unsigned long iova_len = size >> shift;
 	struct iova *iova = NULL;
 
 	if (domain->geometry.force_aperture)
@@ -378,35 +378,42 @@ static struct iova *__alloc_iova(struct iommu_domain *domain, size_t size,
 
 	/* Try to get PCI devices a SAC address */
 	if (dma_limit > DMA_BIT_MASK(32) && dev_is_pci(dev))
-		iova = alloc_iova(iovad, length, DMA_BIT_MASK(32) >> shift,
+		iova = alloc_iova(iovad, iova_len, DMA_BIT_MASK(32) >> shift,
 				  true);
 	/*
 	 * Enforce size-alignment to be safe - there could perhaps be an
 	 * attribute to control this per-device, or at least per-domain...
 	 */
 	if (!iova)
-		iova = alloc_iova(iovad, length, dma_limit >> shift, true);
+		iova = alloc_iova(iovad, iova_len, dma_limit >> shift, true);
 
-	return iova;
+	return (dma_addr_t)iova->pfn_lo << shift;
 }
 
-/* The IOVA allocator knows what we mapped, so just unmap whatever that was */
-static void __iommu_dma_unmap(struct iommu_domain *domain, dma_addr_t dma_addr)
+static void iommu_dma_free_iova(struct iommu_dma_cookie *cookie,
+		dma_addr_t iova, size_t size)
 {
-	struct iova_domain *iovad = cookie_iovad(domain);
-	unsigned long shift = iova_shift(iovad);
-	unsigned long pfn = dma_addr >> shift;
-	struct iova *iova = find_iova(iovad, pfn);
-	size_t size;
+	struct iova_domain *iovad = &cookie->iovad;
+	struct iova *iova_rbnode;
 
-	if (WARN_ON(!iova))
+	iova_rbnode = find_iova(iovad, iova_pfn(iovad, iova));
+	if (WARN_ON(!iova_rbnode))
 		return;
 
-	size = iova_size(iova) << shift;
-	size -= iommu_unmap(domain, pfn << shift, size);
-	/* ...and if we can't, then something is horribly, horribly wrong */
-	WARN_ON(size > 0);
-	__free_iova(iovad, iova);
+	__free_iova(iovad, iova_rbnode);
+}
+
+static void __iommu_dma_unmap(struct iommu_domain *domain, dma_addr_t dma_addr,
+		size_t size)
+{
+	struct iova_domain *iovad = cookie_iovad(domain);
+	size_t iova_off = iova_offset(iovad, dma_addr);
+
+	dma_addr -= iova_off;
+	size = iova_align(iovad, size + iova_off);
+
+	WARN_ON(iommu_unmap(domain, dma_addr, size) != size);
+	iommu_dma_free_iova(domain->iova_cookie, dma_addr, size);
 }
 
 static void __iommu_dma_free_pages(struct page **pages, int count)
@@ -488,7 +495,7 @@ static struct page **__iommu_dma_alloc_pages(unsigned int count,
 void iommu_dma_free(struct device *dev, struct page **pages, size_t size,
 		dma_addr_t *handle)
 {
-	__iommu_dma_unmap(iommu_get_domain_for_dev(dev), *handle);
+	__iommu_dma_unmap(iommu_get_domain_for_dev(dev), *handle, size);
 	__iommu_dma_free_pages(pages, PAGE_ALIGN(size) >> PAGE_SHIFT);
 	*handle = DMA_ERROR_CODE;
 }
@@ -516,11 +523,11 @@ struct page **iommu_dma_alloc(struct device *dev, size_t size, gfp_t gfp,
 		void (*flush_page)(struct device *, const void *, phys_addr_t))
 {
 	struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
-	struct iova_domain *iovad = cookie_iovad(domain);
-	struct iova *iova;
+	struct iommu_dma_cookie *cookie = domain->iova_cookie;
+	struct iova_domain *iovad = &cookie->iovad;
 	struct page **pages;
 	struct sg_table sgt;
-	dma_addr_t dma_addr;
+	dma_addr_t iova;
 	unsigned int count, min_size, alloc_sizes = domain->pgsize_bitmap;
 
 	*handle = DMA_ERROR_CODE;
@@ -540,11 +547,11 @@ struct page **iommu_dma_alloc(struct device *dev, size_t size, gfp_t gfp,
 	if (!pages)
 		return NULL;
 
-	iova = __alloc_iova(domain, size, dev->coherent_dma_mask, dev);
+	size = iova_align(iovad, size);
+	iova = iommu_dma_alloc_iova(domain, size, dev->coherent_dma_mask, dev);
 	if (!iova)
 		goto out_free_pages;
 
-	size = iova_align(iovad, size);
 	if (sg_alloc_table_from_pages(&sgt, pages, count, 0, size, GFP_KERNEL))
 		goto out_free_iova;
 
@@ -560,19 +567,18 @@ struct page **iommu_dma_alloc(struct device *dev, size_t size, gfp_t gfp,
 		sg_miter_stop(&miter);
 	}
 
-	dma_addr = iova_dma_addr(iovad, iova);
-	if (iommu_map_sg(domain, dma_addr, sgt.sgl, sgt.orig_nents, prot)
+	if (iommu_map_sg(domain, iova, sgt.sgl, sgt.orig_nents, prot)
 			< size)
 		goto out_free_sg;
 
-	*handle = dma_addr;
+	*handle = iova;
 	sg_free_table(&sgt);
 	return pages;
 
 out_free_sg:
 	sg_free_table(&sgt);
 out_free_iova:
-	__free_iova(iovad, iova);
+	iommu_dma_free_iova(cookie, iova, size);
 out_free_pages:
 	__iommu_dma_free_pages(pages, count);
 	return NULL;
@@ -606,22 +612,22 @@ int iommu_dma_mmap(struct page **pages, size_t size, struct vm_area_struct *vma)
 static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys,
 		size_t size, int prot)
 {
-	dma_addr_t dma_addr;
 	struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
-	struct iova_domain *iovad = cookie_iovad(domain);
+	struct iommu_dma_cookie *cookie = domain->iova_cookie;
+	struct iova_domain *iovad = &cookie->iovad;
 	size_t iova_off = iova_offset(iovad, phys);
-	size_t len = iova_align(iovad, size + iova_off);
-	struct iova *iova = __alloc_iova(domain, len, dma_get_mask(dev), dev);
+	dma_addr_t iova;
 
+	size = iova_align(iovad, size + iova_off);
+	iova = iommu_dma_alloc_iova(domain, size, dma_get_mask(dev), dev);
 	if (!iova)
 		return DMA_ERROR_CODE;
 
-	dma_addr = iova_dma_addr(iovad, iova);
-	if (iommu_map(domain, dma_addr, phys - iova_off, len, prot)) {
-		__free_iova(iovad, iova);
+	if (iommu_map(domain, iova, phys - iova_off, size, prot)) {
+		iommu_dma_free_iova(cookie, iova, size);
 		return DMA_ERROR_CODE;
 	}
-	return dma_addr + iova_off;
+	return iova + iova_off;
 }
 
 dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page,
@@ -633,7 +639,7 @@ dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page,
 void iommu_dma_unmap_page(struct device *dev, dma_addr_t handle, size_t size,
 		enum dma_data_direction dir, unsigned long attrs)
 {
-	__iommu_dma_unmap(iommu_get_domain_for_dev(dev), handle);
+	__iommu_dma_unmap(iommu_get_domain_for_dev(dev), handle, size);
 }
 
 /*
@@ -722,10 +728,10 @@ int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg,
 		int nents, int prot)
 {
 	struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
-	struct iova_domain *iovad = cookie_iovad(domain);
-	struct iova *iova;
+	struct iommu_dma_cookie *cookie = domain->iova_cookie;
+	struct iova_domain *iovad = &cookie->iovad;
 	struct scatterlist *s, *prev = NULL;
-	dma_addr_t dma_addr;
+	dma_addr_t iova;
 	size_t iova_len = 0;
 	unsigned long mask = dma_get_seg_boundary(dev);
 	int i;
@@ -769,7 +775,7 @@ int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg,
 		prev = s;
 	}
 
-	iova = __alloc_iova(domain, iova_len, dma_get_mask(dev), dev);
+	iova = iommu_dma_alloc_iova(domain, iova_len, dma_get_mask(dev), dev);
 	if (!iova)
 		goto out_restore_sg;
 
@@ -777,14 +783,13 @@ int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg,
 	 * We'll leave any physical concatenation to the IOMMU driver's
 	 * implementation - it knows better than we do.
 	 */
-	dma_addr = iova_dma_addr(iovad, iova);
-	if (iommu_map_sg(domain, dma_addr, sg, nents, prot) < iova_len)
+	if (iommu_map_sg(domain, iova, sg, nents, prot) < iova_len)
 		goto out_free_iova;
 
-	return __finalise_sg(dev, sg, nents, dma_addr);
+	return __finalise_sg(dev, sg, nents, iova);
 
 out_free_iova:
-	__free_iova(iovad, iova);
+	iommu_dma_free_iova(cookie, iova, iova_len);
 out_restore_sg:
 	__invalidate_sg(sg, nents);
 	return 0;
@@ -793,11 +798,21 @@ int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg,
 void iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents,
 		enum dma_data_direction dir, unsigned long attrs)
 {
+	dma_addr_t start, end;
+	struct scatterlist *tmp;
+	int i;
 	/*
 	 * The scatterlist segments are mapped into a single
 	 * contiguous IOVA allocation, so this is incredibly easy.
 	 */
-	__iommu_dma_unmap(iommu_get_domain_for_dev(dev), sg_dma_address(sg));
+	start = sg_dma_address(sg);
+	for_each_sg(sg_next(sg), tmp, nents - 1, i) {
+		if (sg_dma_len(tmp) == 0)
+			break;
+		sg = tmp;
+	}
+	end = sg_dma_address(sg) + sg_dma_len(sg);
+	__iommu_dma_unmap(iommu_get_domain_for_dev(dev), start, end - start);
 }
 
 dma_addr_t iommu_dma_map_resource(struct device *dev, phys_addr_t phys,
@@ -810,7 +825,7 @@ dma_addr_t iommu_dma_map_resource(struct device *dev, phys_addr_t phys,
 void iommu_dma_unmap_resource(struct device *dev, dma_addr_t handle,
 		size_t size, enum dma_data_direction dir, unsigned long attrs)
 {
-	__iommu_dma_unmap(iommu_get_domain_for_dev(dev), handle);
+	__iommu_dma_unmap(iommu_get_domain_for_dev(dev), handle, size);
 }
 
 int iommu_dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
@@ -824,7 +839,7 @@ static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev,
 	struct iommu_dma_cookie *cookie = domain->iova_cookie;
 	struct iommu_dma_msi_page *msi_page;
 	struct iova_domain *iovad = cookie_iovad(domain);
-	struct iova *iova;
+	dma_addr_t iova;
 	int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
 	size_t size = cookie_msi_granule(cookie);
 
@@ -839,10 +854,10 @@ static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev,
 
 	msi_page->phys = msi_addr;
 	if (iovad) {
-		iova = __alloc_iova(domain, size, dma_get_mask(dev), dev);
+		iova = iommu_dma_alloc_iova(domain, size, dma_get_mask(dev), dev);
 		if (!iova)
 			goto out_free_page;
-		msi_page->iova = iova_dma_addr(iovad, iova);
+		msi_page->iova = iova;
 	} else {
 		msi_page->iova = cookie->msi_iova;
 		cookie->msi_iova += size;
@@ -857,7 +872,7 @@ static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev,
 
 out_free_iova:
 	if (iovad)
-		__free_iova(iovad, iova);
+		iommu_dma_free_iova(cookie, iova, size);
 	else
 		cookie->msi_iova -= size;
 out_free_page:
-- 
2.11.0.dirty

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH RESEND 2/3] iommu/dma: Clean up MSI IOVA allocation
  2017-03-31 14:46 ` Robin Murphy
@ 2017-03-31 14:46     ` Robin Murphy
  -1 siblings, 0 replies; 20+ messages in thread
From: Robin Murphy @ 2017-03-31 14:46 UTC (permalink / raw)
  To: joro-zLv9SwRftAIdnm+yROfE0A
  Cc: iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r

Now that allocation is suitably abstracted, our private alloc/free
helpers can drive the trivial MSI cookie allocator directly as well,
which lets us clean up its exposed guts from iommu_dma_map_msi_msg() and
simplify things quite a bit.

Reviewed-by: Nate Watterson <nwatters-sgV2jX0FEOL9JmXXK+q4OQ@public.gmane.org>
Tested-by: Nate Watterson <nwatters-sgV2jX0FEOL9JmXXK+q4OQ@public.gmane.org>
Signed-off-by: Robin Murphy <robin.murphy-5wv7dgnIgG8@public.gmane.org>
---
 drivers/iommu/dma-iommu.c | 58 ++++++++++++++++++++---------------------------
 1 file changed, 25 insertions(+), 33 deletions(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 8e0b684da1ba..1b94beb43036 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -61,15 +61,6 @@ static inline size_t cookie_msi_granule(struct iommu_dma_cookie *cookie)
 	return PAGE_SIZE;
 }
 
-static inline struct iova_domain *cookie_iovad(struct iommu_domain *domain)
-{
-	struct iommu_dma_cookie *cookie = domain->iova_cookie;
-
-	if (cookie->type == IOMMU_DMA_IOVA_COOKIE)
-		return &cookie->iovad;
-	return NULL;
-}
-
 static struct iommu_dma_cookie *cookie_alloc(enum iommu_dma_cookie_type type)
 {
 	struct iommu_dma_cookie *cookie;
@@ -368,11 +359,19 @@ int dma_info_to_prot(enum dma_data_direction dir, bool coherent,
 static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain *domain,
 		size_t size, dma_addr_t dma_limit, struct device *dev)
 {
-	struct iova_domain *iovad = cookie_iovad(domain);
-	unsigned long shift = iova_shift(iovad);
-	unsigned long iova_len = size >> shift;
+	struct iommu_dma_cookie *cookie = domain->iova_cookie;
+	struct iova_domain *iovad = &cookie->iovad;
+	unsigned long shift, iova_len;
 	struct iova *iova = NULL;
 
+	if (cookie->type == IOMMU_DMA_MSI_COOKIE) {
+		cookie->msi_iova += size;
+		return cookie->msi_iova - size;
+	}
+
+	shift = iova_shift(iovad);
+	iova_len = size >> shift;
+
 	if (domain->geometry.force_aperture)
 		dma_limit = min(dma_limit, domain->geometry.aperture_end);
 
@@ -396,6 +395,12 @@ static void iommu_dma_free_iova(struct iommu_dma_cookie *cookie,
 	struct iova_domain *iovad = &cookie->iovad;
 	struct iova *iova_rbnode;
 
+	/* The MSI case is only ever cleaning up its most recent allocation */
+	if (cookie->type == IOMMU_DMA_MSI_COOKIE) {
+		cookie->msi_iova -= size;
+		return;
+	}
+
 	iova_rbnode = find_iova(iovad, iova_pfn(iovad, iova));
 	if (WARN_ON(!iova_rbnode))
 		return;
@@ -406,14 +411,15 @@ static void iommu_dma_free_iova(struct iommu_dma_cookie *cookie,
 static void __iommu_dma_unmap(struct iommu_domain *domain, dma_addr_t dma_addr,
 		size_t size)
 {
-	struct iova_domain *iovad = cookie_iovad(domain);
+	struct iommu_dma_cookie *cookie = domain->iova_cookie;
+	struct iova_domain *iovad = &cookie->iovad;
 	size_t iova_off = iova_offset(iovad, dma_addr);
 
 	dma_addr -= iova_off;
 	size = iova_align(iovad, size + iova_off);
 
 	WARN_ON(iommu_unmap(domain, dma_addr, size) != size);
-	iommu_dma_free_iova(domain->iova_cookie, dma_addr, size);
+	iommu_dma_free_iova(cookie, dma_addr, size);
 }
 
 static void __iommu_dma_free_pages(struct page **pages, int count)
@@ -838,7 +844,6 @@ static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev,
 {
 	struct iommu_dma_cookie *cookie = domain->iova_cookie;
 	struct iommu_dma_msi_page *msi_page;
-	struct iova_domain *iovad = cookie_iovad(domain);
 	dma_addr_t iova;
 	int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
 	size_t size = cookie_msi_granule(cookie);
@@ -852,29 +857,16 @@ static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev,
 	if (!msi_page)
 		return NULL;
 
-	msi_page->phys = msi_addr;
-	if (iovad) {
-		iova = iommu_dma_alloc_iova(domain, size, dma_get_mask(dev), dev);
-		if (!iova)
-			goto out_free_page;
-		msi_page->iova = iova;
-	} else {
-		msi_page->iova = cookie->msi_iova;
-		cookie->msi_iova += size;
-	}
-
-	if (iommu_map(domain, msi_page->iova, msi_addr, size, prot))
-		goto out_free_iova;
+	iova = __iommu_dma_map(dev, msi_addr, size, prot);
+	if (iommu_dma_mapping_error(dev, iova))
+		goto out_free_page;
 
 	INIT_LIST_HEAD(&msi_page->list);
+	msi_page->phys = msi_addr;
+	msi_page->iova = iova;
 	list_add(&msi_page->list, &cookie->msi_page_list);
 	return msi_page;
 
-out_free_iova:
-	if (iovad)
-		iommu_dma_free_iova(cookie, iova, size);
-	else
-		cookie->msi_iova -= size;
 out_free_page:
 	kfree(msi_page);
 	return NULL;
-- 
2.11.0.dirty

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH RESEND 2/3] iommu/dma: Clean up MSI IOVA allocation
@ 2017-03-31 14:46     ` Robin Murphy
  0 siblings, 0 replies; 20+ messages in thread
From: Robin Murphy @ 2017-03-31 14:46 UTC (permalink / raw)
  To: linux-arm-kernel

Now that allocation is suitably abstracted, our private alloc/free
helpers can drive the trivial MSI cookie allocator directly as well,
which lets us clean up its exposed guts from iommu_dma_map_msi_msg() and
simplify things quite a bit.

Reviewed-by: Nate Watterson <nwatters@codeaurora.org>
Tested-by: Nate Watterson <nwatters@codeaurora.org>
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
---
 drivers/iommu/dma-iommu.c | 58 ++++++++++++++++++++---------------------------
 1 file changed, 25 insertions(+), 33 deletions(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 8e0b684da1ba..1b94beb43036 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -61,15 +61,6 @@ static inline size_t cookie_msi_granule(struct iommu_dma_cookie *cookie)
 	return PAGE_SIZE;
 }
 
-static inline struct iova_domain *cookie_iovad(struct iommu_domain *domain)
-{
-	struct iommu_dma_cookie *cookie = domain->iova_cookie;
-
-	if (cookie->type == IOMMU_DMA_IOVA_COOKIE)
-		return &cookie->iovad;
-	return NULL;
-}
-
 static struct iommu_dma_cookie *cookie_alloc(enum iommu_dma_cookie_type type)
 {
 	struct iommu_dma_cookie *cookie;
@@ -368,11 +359,19 @@ int dma_info_to_prot(enum dma_data_direction dir, bool coherent,
 static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain *domain,
 		size_t size, dma_addr_t dma_limit, struct device *dev)
 {
-	struct iova_domain *iovad = cookie_iovad(domain);
-	unsigned long shift = iova_shift(iovad);
-	unsigned long iova_len = size >> shift;
+	struct iommu_dma_cookie *cookie = domain->iova_cookie;
+	struct iova_domain *iovad = &cookie->iovad;
+	unsigned long shift, iova_len;
 	struct iova *iova = NULL;
 
+	if (cookie->type == IOMMU_DMA_MSI_COOKIE) {
+		cookie->msi_iova += size;
+		return cookie->msi_iova - size;
+	}
+
+	shift = iova_shift(iovad);
+	iova_len = size >> shift;
+
 	if (domain->geometry.force_aperture)
 		dma_limit = min(dma_limit, domain->geometry.aperture_end);
 
@@ -396,6 +395,12 @@ static void iommu_dma_free_iova(struct iommu_dma_cookie *cookie,
 	struct iova_domain *iovad = &cookie->iovad;
 	struct iova *iova_rbnode;
 
+	/* The MSI case is only ever cleaning up its most recent allocation */
+	if (cookie->type == IOMMU_DMA_MSI_COOKIE) {
+		cookie->msi_iova -= size;
+		return;
+	}
+
 	iova_rbnode = find_iova(iovad, iova_pfn(iovad, iova));
 	if (WARN_ON(!iova_rbnode))
 		return;
@@ -406,14 +411,15 @@ static void iommu_dma_free_iova(struct iommu_dma_cookie *cookie,
 static void __iommu_dma_unmap(struct iommu_domain *domain, dma_addr_t dma_addr,
 		size_t size)
 {
-	struct iova_domain *iovad = cookie_iovad(domain);
+	struct iommu_dma_cookie *cookie = domain->iova_cookie;
+	struct iova_domain *iovad = &cookie->iovad;
 	size_t iova_off = iova_offset(iovad, dma_addr);
 
 	dma_addr -= iova_off;
 	size = iova_align(iovad, size + iova_off);
 
 	WARN_ON(iommu_unmap(domain, dma_addr, size) != size);
-	iommu_dma_free_iova(domain->iova_cookie, dma_addr, size);
+	iommu_dma_free_iova(cookie, dma_addr, size);
 }
 
 static void __iommu_dma_free_pages(struct page **pages, int count)
@@ -838,7 +844,6 @@ static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev,
 {
 	struct iommu_dma_cookie *cookie = domain->iova_cookie;
 	struct iommu_dma_msi_page *msi_page;
-	struct iova_domain *iovad = cookie_iovad(domain);
 	dma_addr_t iova;
 	int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
 	size_t size = cookie_msi_granule(cookie);
@@ -852,29 +857,16 @@ static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev,
 	if (!msi_page)
 		return NULL;
 
-	msi_page->phys = msi_addr;
-	if (iovad) {
-		iova = iommu_dma_alloc_iova(domain, size, dma_get_mask(dev), dev);
-		if (!iova)
-			goto out_free_page;
-		msi_page->iova = iova;
-	} else {
-		msi_page->iova = cookie->msi_iova;
-		cookie->msi_iova += size;
-	}
-
-	if (iommu_map(domain, msi_page->iova, msi_addr, size, prot))
-		goto out_free_iova;
+	iova = __iommu_dma_map(dev, msi_addr, size, prot);
+	if (iommu_dma_mapping_error(dev, iova))
+		goto out_free_page;
 
 	INIT_LIST_HEAD(&msi_page->list);
+	msi_page->phys = msi_addr;
+	msi_page->iova = iova;
 	list_add(&msi_page->list, &cookie->msi_page_list);
 	return msi_page;
 
-out_free_iova:
-	if (iovad)
-		iommu_dma_free_iova(cookie, iova, size);
-	else
-		cookie->msi_iova -= size;
 out_free_page:
 	kfree(msi_page);
 	return NULL;
-- 
2.11.0.dirty

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH RESEND 3/3] iommu/dma: Plumb in the per-CPU IOVA caches
  2017-03-31 14:46 ` Robin Murphy
@ 2017-03-31 14:46     ` Robin Murphy
  -1 siblings, 0 replies; 20+ messages in thread
From: Robin Murphy @ 2017-03-31 14:46 UTC (permalink / raw)
  To: joro-zLv9SwRftAIdnm+yROfE0A
  Cc: iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r

With IOVA allocation suitably tidied up, we are finally free to opt in
to the per-CPU caching mechanism. The caching alone can provide a modest
improvement over walking the rbtree for weedier systems (iperf3 shows
~10% more ethernet throughput on an ARM Juno r1 constrained to a single
650MHz Cortex-A53), but the real gain will be in sidestepping the rbtree
lock contention which larger ARM-based systems with lots of parallel I/O
are starting to feel the pain of.

Reviewed-by: Nate Watterson <nwatters-sgV2jX0FEOL9JmXXK+q4OQ@public.gmane.org>
Tested-by: Nate Watterson <nwatters-sgV2jX0FEOL9JmXXK+q4OQ@public.gmane.org>
Signed-off-by: Robin Murphy <robin.murphy-5wv7dgnIgG8@public.gmane.org>
---
 drivers/iommu/dma-iommu.c | 39 ++++++++++++++++++---------------------
 1 file changed, 18 insertions(+), 21 deletions(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 1b94beb43036..8348f366ddd1 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -361,8 +361,7 @@ static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain *domain,
 {
 	struct iommu_dma_cookie *cookie = domain->iova_cookie;
 	struct iova_domain *iovad = &cookie->iovad;
-	unsigned long shift, iova_len;
-	struct iova *iova = NULL;
+	unsigned long shift, iova_len, iova = 0;
 
 	if (cookie->type == IOMMU_DMA_MSI_COOKIE) {
 		cookie->msi_iova += size;
@@ -371,41 +370,39 @@ static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain *domain,
 
 	shift = iova_shift(iovad);
 	iova_len = size >> shift;
+	/*
+	 * Freeing non-power-of-two-sized allocations back into the IOVA caches
+	 * will come back to bite us badly, so we have to waste a bit of space
+	 * rounding up anything cacheable to make sure that can't happen. The
+	 * order of the unadjusted size will still match upon freeing.
+	 */
+	if (iova_len < (1 << (IOVA_RANGE_CACHE_MAX_SIZE - 1)))
+		iova_len = roundup_pow_of_two(iova_len);
 
 	if (domain->geometry.force_aperture)
 		dma_limit = min(dma_limit, domain->geometry.aperture_end);
 
 	/* Try to get PCI devices a SAC address */
 	if (dma_limit > DMA_BIT_MASK(32) && dev_is_pci(dev))
-		iova = alloc_iova(iovad, iova_len, DMA_BIT_MASK(32) >> shift,
-				  true);
-	/*
-	 * Enforce size-alignment to be safe - there could perhaps be an
-	 * attribute to control this per-device, or at least per-domain...
-	 */
-	if (!iova)
-		iova = alloc_iova(iovad, iova_len, dma_limit >> shift, true);
+		iova = alloc_iova_fast(iovad, iova_len, DMA_BIT_MASK(32) >> shift);
 
-	return (dma_addr_t)iova->pfn_lo << shift;
+	if (!iova)
+		iova = alloc_iova_fast(iovad, iova_len, dma_limit >> shift);
+
+	return (dma_addr_t)iova << shift;
 }
 
 static void iommu_dma_free_iova(struct iommu_dma_cookie *cookie,
 		dma_addr_t iova, size_t size)
 {
 	struct iova_domain *iovad = &cookie->iovad;
-	struct iova *iova_rbnode;
+	unsigned long shift = iova_shift(iovad);
 
 	/* The MSI case is only ever cleaning up its most recent allocation */
-	if (cookie->type == IOMMU_DMA_MSI_COOKIE) {
+	if (cookie->type == IOMMU_DMA_MSI_COOKIE)
 		cookie->msi_iova -= size;
-		return;
-	}
-
-	iova_rbnode = find_iova(iovad, iova_pfn(iovad, iova));
-	if (WARN_ON(!iova_rbnode))
-		return;
-
-	__free_iova(iovad, iova_rbnode);
+	else
+		free_iova_fast(iovad, iova >> shift, size >> shift);
 }
 
 static void __iommu_dma_unmap(struct iommu_domain *domain, dma_addr_t dma_addr,
-- 
2.11.0.dirty

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH RESEND 3/3] iommu/dma: Plumb in the per-CPU IOVA caches
@ 2017-03-31 14:46     ` Robin Murphy
  0 siblings, 0 replies; 20+ messages in thread
From: Robin Murphy @ 2017-03-31 14:46 UTC (permalink / raw)
  To: linux-arm-kernel

With IOVA allocation suitably tidied up, we are finally free to opt in
to the per-CPU caching mechanism. The caching alone can provide a modest
improvement over walking the rbtree for weedier systems (iperf3 shows
~10% more ethernet throughput on an ARM Juno r1 constrained to a single
650MHz Cortex-A53), but the real gain will be in sidestepping the rbtree
lock contention which larger ARM-based systems with lots of parallel I/O
are starting to feel the pain of.

Reviewed-by: Nate Watterson <nwatters@codeaurora.org>
Tested-by: Nate Watterson <nwatters@codeaurora.org>
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
---
 drivers/iommu/dma-iommu.c | 39 ++++++++++++++++++---------------------
 1 file changed, 18 insertions(+), 21 deletions(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 1b94beb43036..8348f366ddd1 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -361,8 +361,7 @@ static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain *domain,
 {
 	struct iommu_dma_cookie *cookie = domain->iova_cookie;
 	struct iova_domain *iovad = &cookie->iovad;
-	unsigned long shift, iova_len;
-	struct iova *iova = NULL;
+	unsigned long shift, iova_len, iova = 0;
 
 	if (cookie->type == IOMMU_DMA_MSI_COOKIE) {
 		cookie->msi_iova += size;
@@ -371,41 +370,39 @@ static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain *domain,
 
 	shift = iova_shift(iovad);
 	iova_len = size >> shift;
+	/*
+	 * Freeing non-power-of-two-sized allocations back into the IOVA caches
+	 * will come back to bite us badly, so we have to waste a bit of space
+	 * rounding up anything cacheable to make sure that can't happen. The
+	 * order of the unadjusted size will still match upon freeing.
+	 */
+	if (iova_len < (1 << (IOVA_RANGE_CACHE_MAX_SIZE - 1)))
+		iova_len = roundup_pow_of_two(iova_len);
 
 	if (domain->geometry.force_aperture)
 		dma_limit = min(dma_limit, domain->geometry.aperture_end);
 
 	/* Try to get PCI devices a SAC address */
 	if (dma_limit > DMA_BIT_MASK(32) && dev_is_pci(dev))
-		iova = alloc_iova(iovad, iova_len, DMA_BIT_MASK(32) >> shift,
-				  true);
-	/*
-	 * Enforce size-alignment to be safe - there could perhaps be an
-	 * attribute to control this per-device, or at least per-domain...
-	 */
-	if (!iova)
-		iova = alloc_iova(iovad, iova_len, dma_limit >> shift, true);
+		iova = alloc_iova_fast(iovad, iova_len, DMA_BIT_MASK(32) >> shift);
 
-	return (dma_addr_t)iova->pfn_lo << shift;
+	if (!iova)
+		iova = alloc_iova_fast(iovad, iova_len, dma_limit >> shift);
+
+	return (dma_addr_t)iova << shift;
 }
 
 static void iommu_dma_free_iova(struct iommu_dma_cookie *cookie,
 		dma_addr_t iova, size_t size)
 {
 	struct iova_domain *iovad = &cookie->iovad;
-	struct iova *iova_rbnode;
+	unsigned long shift = iova_shift(iovad);
 
 	/* The MSI case is only ever cleaning up its most recent allocation */
-	if (cookie->type == IOMMU_DMA_MSI_COOKIE) {
+	if (cookie->type == IOMMU_DMA_MSI_COOKIE)
 		cookie->msi_iova -= size;
-		return;
-	}
-
-	iova_rbnode = find_iova(iovad, iova_pfn(iovad, iova));
-	if (WARN_ON(!iova_rbnode))
-		return;
-
-	__free_iova(iovad, iova_rbnode);
+	else
+		free_iova_fast(iovad, iova >> shift, size >> shift);
 }
 
 static void __iommu_dma_unmap(struct iommu_domain *domain, dma_addr_t dma_addr,
-- 
2.11.0.dirty

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* Re: [PATCH RESEND 0/3] IOVA allocation improvements for iommu-dma
  2017-03-31 14:46 ` Robin Murphy
@ 2017-04-03 10:45     ` Joerg Roedel
  -1 siblings, 0 replies; 20+ messages in thread
From: Joerg Roedel @ 2017-04-03 10:45 UTC (permalink / raw)
  To: Robin Murphy
  Cc: iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r

On Fri, Mar 31, 2017 at 03:46:04PM +0100, Robin Murphy wrote:
> Robin Murphy (3):
>   iommu/dma: Convert to address-based allocation
>   iommu/dma: Clean up MSI IOVA allocation
>   iommu/dma: Plumb in the per-CPU IOVA caches
> 
>  drivers/iommu/dma-iommu.c | 176 ++++++++++++++++++++++++----------------------
>  1 file changed, 90 insertions(+), 86 deletions(-)

Applied to arm/core, thanks Robin.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH RESEND 0/3] IOVA allocation improvements for iommu-dma
@ 2017-04-03 10:45     ` Joerg Roedel
  0 siblings, 0 replies; 20+ messages in thread
From: Joerg Roedel @ 2017-04-03 10:45 UTC (permalink / raw)
  To: linux-arm-kernel

On Fri, Mar 31, 2017 at 03:46:04PM +0100, Robin Murphy wrote:
> Robin Murphy (3):
>   iommu/dma: Convert to address-based allocation
>   iommu/dma: Clean up MSI IOVA allocation
>   iommu/dma: Plumb in the per-CPU IOVA caches
> 
>  drivers/iommu/dma-iommu.c | 176 ++++++++++++++++++++++++----------------------
>  1 file changed, 90 insertions(+), 86 deletions(-)

Applied to arm/core, thanks Robin.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [RESEND,1/3] iommu/dma: Convert to address-based allocation
  2017-03-31 14:46     ` Robin Murphy
@ 2017-04-06 18:11       ` Manoj Iyer
  -1 siblings, 0 replies; 20+ messages in thread
From: Manoj Iyer @ 2017-04-06 18:11 UTC (permalink / raw)
  To: Robin Murphy; +Cc: joro, iommu, linux-arm-kernel

On Fri, 31 Mar 2017, Robin Murphy wrote:

> In preparation for some IOVA allocation improvements, clean up all the
> explicit struct iova usage such that all our mapping, unmapping and
> cleanup paths deal exclusively with addresses rather than implementation
> details. In the process, a few of the things we're touching get renamed
> for the sake of internal consistency.
>
> Reviewed-by: Nate Watterson <nwatters@codeaurora.org>
> Tested-by: Nate Watterson <nwatters@codeaurora.org>
> Signed-off-by: Robin Murphy <robin.murphy@arm.com>
> ---
> drivers/iommu/dma-iommu.c | 119 ++++++++++++++++++++++++++--------------------
> 1 file changed, 67 insertions(+), 52 deletions(-)
>
> diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
> index 85652110c8ff..8e0b684da1ba 100644
> --- a/drivers/iommu/dma-iommu.c
> +++ b/drivers/iommu/dma-iommu.c
> @@ -365,12 +365,12 @@ int dma_info_to_prot(enum dma_data_direction dir, bool coherent,
> 	}
> }
>
> -static struct iova *__alloc_iova(struct iommu_domain *domain, size_t size,
> -		dma_addr_t dma_limit, struct device *dev)
> +static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain *domain,
> +		size_t size, dma_addr_t dma_limit, struct device *dev)
> {
> 	struct iova_domain *iovad = cookie_iovad(domain);
> 	unsigned long shift = iova_shift(iovad);
> -	unsigned long length = iova_align(iovad, size) >> shift;
> +	unsigned long iova_len = size >> shift;
> 	struct iova *iova = NULL;
>
> 	if (domain->geometry.force_aperture)
> @@ -378,35 +378,42 @@ static struct iova *__alloc_iova(struct iommu_domain *domain, size_t size,
>
> 	/* Try to get PCI devices a SAC address */
> 	if (dma_limit > DMA_BIT_MASK(32) && dev_is_pci(dev))
> -		iova = alloc_iova(iovad, length, DMA_BIT_MASK(32) >> shift,
> +		iova = alloc_iova(iovad, iova_len, DMA_BIT_MASK(32) >> shift,
> 				  true);
> 	/*
> 	 * Enforce size-alignment to be safe - there could perhaps be an
> 	 * attribute to control this per-device, or at least per-domain...
> 	 */
> 	if (!iova)
> -		iova = alloc_iova(iovad, length, dma_limit >> shift, true);
> +		iova = alloc_iova(iovad, iova_len, dma_limit >> shift, true);
>
> -	return iova;
> +	return (dma_addr_t)iova->pfn_lo << shift;
> }
>
> -/* The IOVA allocator knows what we mapped, so just unmap whatever that was */
> -static void __iommu_dma_unmap(struct iommu_domain *domain, dma_addr_t dma_addr)
> +static void iommu_dma_free_iova(struct iommu_dma_cookie *cookie,
> +		dma_addr_t iova, size_t size)
> {
> -	struct iova_domain *iovad = cookie_iovad(domain);
> -	unsigned long shift = iova_shift(iovad);
> -	unsigned long pfn = dma_addr >> shift;
> -	struct iova *iova = find_iova(iovad, pfn);
> -	size_t size;
> +	struct iova_domain *iovad = &cookie->iovad;
> +	struct iova *iova_rbnode;
>
> -	if (WARN_ON(!iova))
> +	iova_rbnode = find_iova(iovad, iova_pfn(iovad, iova));
> +	if (WARN_ON(!iova_rbnode))
> 		return;
>
> -	size = iova_size(iova) << shift;
> -	size -= iommu_unmap(domain, pfn << shift, size);
> -	/* ...and if we can't, then something is horribly, horribly wrong */
> -	WARN_ON(size > 0);
> -	__free_iova(iovad, iova);
> +	__free_iova(iovad, iova_rbnode);
> +}
> +
> +static void __iommu_dma_unmap(struct iommu_domain *domain, dma_addr_t dma_addr,
> +		size_t size)
> +{
> +	struct iova_domain *iovad = cookie_iovad(domain);
> +	size_t iova_off = iova_offset(iovad, dma_addr);
> +
> +	dma_addr -= iova_off;
> +	size = iova_align(iovad, size + iova_off);
> +
> +	WARN_ON(iommu_unmap(domain, dma_addr, size) != size);
> +	iommu_dma_free_iova(domain->iova_cookie, dma_addr, size);
> }
>
> static void __iommu_dma_free_pages(struct page **pages, int count)
> @@ -488,7 +495,7 @@ static struct page **__iommu_dma_alloc_pages(unsigned int count,
> void iommu_dma_free(struct device *dev, struct page **pages, size_t size,
> 		dma_addr_t *handle)
> {
> -	__iommu_dma_unmap(iommu_get_domain_for_dev(dev), *handle);
> +	__iommu_dma_unmap(iommu_get_domain_for_dev(dev), *handle, size);
> 	__iommu_dma_free_pages(pages, PAGE_ALIGN(size) >> PAGE_SHIFT);
> 	*handle = DMA_ERROR_CODE;
> }
> @@ -516,11 +523,11 @@ struct page **iommu_dma_alloc(struct device *dev, size_t size, gfp_t gfp,
> 		void (*flush_page)(struct device *, const void *, phys_addr_t))
> {
> 	struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
> -	struct iova_domain *iovad = cookie_iovad(domain);
> -	struct iova *iova;
> +	struct iommu_dma_cookie *cookie = domain->iova_cookie;
> +	struct iova_domain *iovad = &cookie->iovad;
> 	struct page **pages;
> 	struct sg_table sgt;
> -	dma_addr_t dma_addr;
> +	dma_addr_t iova;
> 	unsigned int count, min_size, alloc_sizes = domain->pgsize_bitmap;
>
> 	*handle = DMA_ERROR_CODE;
> @@ -540,11 +547,11 @@ struct page **iommu_dma_alloc(struct device *dev, size_t size, gfp_t gfp,
> 	if (!pages)
> 		return NULL;
>
> -	iova = __alloc_iova(domain, size, dev->coherent_dma_mask, dev);
> +	size = iova_align(iovad, size);
> +	iova = iommu_dma_alloc_iova(domain, size, dev->coherent_dma_mask, dev);
> 	if (!iova)
> 		goto out_free_pages;
>
> -	size = iova_align(iovad, size);
> 	if (sg_alloc_table_from_pages(&sgt, pages, count, 0, size, GFP_KERNEL))
> 		goto out_free_iova;
>
> @@ -560,19 +567,18 @@ struct page **iommu_dma_alloc(struct device *dev, size_t size, gfp_t gfp,
> 		sg_miter_stop(&miter);
> 	}
>
> -	dma_addr = iova_dma_addr(iovad, iova);
> -	if (iommu_map_sg(domain, dma_addr, sgt.sgl, sgt.orig_nents, prot)
> +	if (iommu_map_sg(domain, iova, sgt.sgl, sgt.orig_nents, prot)
> 			< size)
> 		goto out_free_sg;
>
> -	*handle = dma_addr;
> +	*handle = iova;
> 	sg_free_table(&sgt);
> 	return pages;
>
> out_free_sg:
> 	sg_free_table(&sgt);
> out_free_iova:
> -	__free_iova(iovad, iova);
> +	iommu_dma_free_iova(cookie, iova, size);
> out_free_pages:
> 	__iommu_dma_free_pages(pages, count);
> 	return NULL;
> @@ -606,22 +612,22 @@ int iommu_dma_mmap(struct page **pages, size_t size, struct vm_area_struct *vma)
> static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys,
> 		size_t size, int prot)
> {
> -	dma_addr_t dma_addr;
> 	struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
> -	struct iova_domain *iovad = cookie_iovad(domain);
> +	struct iommu_dma_cookie *cookie = domain->iova_cookie;
> +	struct iova_domain *iovad = &cookie->iovad;
> 	size_t iova_off = iova_offset(iovad, phys);
> -	size_t len = iova_align(iovad, size + iova_off);
> -	struct iova *iova = __alloc_iova(domain, len, dma_get_mask(dev), dev);
> +	dma_addr_t iova;
>
> +	size = iova_align(iovad, size + iova_off);
> +	iova = iommu_dma_alloc_iova(domain, size, dma_get_mask(dev), dev);
> 	if (!iova)
> 		return DMA_ERROR_CODE;
>
> -	dma_addr = iova_dma_addr(iovad, iova);
> -	if (iommu_map(domain, dma_addr, phys - iova_off, len, prot)) {
> -		__free_iova(iovad, iova);
> +	if (iommu_map(domain, iova, phys - iova_off, size, prot)) {
> +		iommu_dma_free_iova(cookie, iova, size);
> 		return DMA_ERROR_CODE;
> 	}
> -	return dma_addr + iova_off;
> +	return iova + iova_off;
> }
>
> dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page,
> @@ -633,7 +639,7 @@ dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page,
> void iommu_dma_unmap_page(struct device *dev, dma_addr_t handle, size_t size,
> 		enum dma_data_direction dir, unsigned long attrs)
> {
> -	__iommu_dma_unmap(iommu_get_domain_for_dev(dev), handle);
> +	__iommu_dma_unmap(iommu_get_domain_for_dev(dev), handle, size);
> }
>
> /*
> @@ -722,10 +728,10 @@ int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg,
> 		int nents, int prot)
> {
> 	struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
> -	struct iova_domain *iovad = cookie_iovad(domain);
> -	struct iova *iova;
> +	struct iommu_dma_cookie *cookie = domain->iova_cookie;
> +	struct iova_domain *iovad = &cookie->iovad;
> 	struct scatterlist *s, *prev = NULL;
> -	dma_addr_t dma_addr;
> +	dma_addr_t iova;
> 	size_t iova_len = 0;
> 	unsigned long mask = dma_get_seg_boundary(dev);
> 	int i;
> @@ -769,7 +775,7 @@ int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg,
> 		prev = s;
> 	}
>
> -	iova = __alloc_iova(domain, iova_len, dma_get_mask(dev), dev);
> +	iova = iommu_dma_alloc_iova(domain, iova_len, dma_get_mask(dev), dev);
> 	if (!iova)
> 		goto out_restore_sg;
>
> @@ -777,14 +783,13 @@ int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg,
> 	 * We'll leave any physical concatenation to the IOMMU driver's
> 	 * implementation - it knows better than we do.
> 	 */
> -	dma_addr = iova_dma_addr(iovad, iova);
> -	if (iommu_map_sg(domain, dma_addr, sg, nents, prot) < iova_len)
> +	if (iommu_map_sg(domain, iova, sg, nents, prot) < iova_len)
> 		goto out_free_iova;
>
> -	return __finalise_sg(dev, sg, nents, dma_addr);
> +	return __finalise_sg(dev, sg, nents, iova);
>
> out_free_iova:
> -	__free_iova(iovad, iova);
> +	iommu_dma_free_iova(cookie, iova, iova_len);
> out_restore_sg:
> 	__invalidate_sg(sg, nents);
> 	return 0;
> @@ -793,11 +798,21 @@ int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg,
> void iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents,
> 		enum dma_data_direction dir, unsigned long attrs)
> {
> +	dma_addr_t start, end;
> +	struct scatterlist *tmp;
> +	int i;
> 	/*
> 	 * The scatterlist segments are mapped into a single
> 	 * contiguous IOVA allocation, so this is incredibly easy.
> 	 */
> -	__iommu_dma_unmap(iommu_get_domain_for_dev(dev), sg_dma_address(sg));
> +	start = sg_dma_address(sg);
> +	for_each_sg(sg_next(sg), tmp, nents - 1, i) {
> +		if (sg_dma_len(tmp) == 0)
> +			break;
> +		sg = tmp;
> +	}
> +	end = sg_dma_address(sg) + sg_dma_len(sg);
> +	__iommu_dma_unmap(iommu_get_domain_for_dev(dev), start, end - start);
> }
>
> dma_addr_t iommu_dma_map_resource(struct device *dev, phys_addr_t phys,
> @@ -810,7 +825,7 @@ dma_addr_t iommu_dma_map_resource(struct device *dev, phys_addr_t phys,
> void iommu_dma_unmap_resource(struct device *dev, dma_addr_t handle,
> 		size_t size, enum dma_data_direction dir, unsigned long attrs)
> {
> -	__iommu_dma_unmap(iommu_get_domain_for_dev(dev), handle);
> +	__iommu_dma_unmap(iommu_get_domain_for_dev(dev), handle, size);
> }
>
> int iommu_dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
> @@ -824,7 +839,7 @@ static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev,
> 	struct iommu_dma_cookie *cookie = domain->iova_cookie;
> 	struct iommu_dma_msi_page *msi_page;
> 	struct iova_domain *iovad = cookie_iovad(domain);
> -	struct iova *iova;
> +	dma_addr_t iova;
> 	int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
> 	size_t size = cookie_msi_granule(cookie);
>
> @@ -839,10 +854,10 @@ static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev,
>
> 	msi_page->phys = msi_addr;
> 	if (iovad) {
> -		iova = __alloc_iova(domain, size, dma_get_mask(dev), dev);
> +		iova = iommu_dma_alloc_iova(domain, size, dma_get_mask(dev), dev);
> 		if (!iova)
> 			goto out_free_page;
> -		msi_page->iova = iova_dma_addr(iovad, iova);
> +		msi_page->iova = iova;
> 	} else {
> 		msi_page->iova = cookie->msi_iova;
> 		cookie->msi_iova += size;
> @@ -857,7 +872,7 @@ static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev,
>
> out_free_iova:
> 	if (iovad)
> -		__free_iova(iovad, iova);
> +		iommu_dma_free_iova(cookie, iova, size);
> 	else
> 		cookie->msi_iova -= size;
> out_free_page:
>

This patch series helps to resolve the Ubuntu bug, where we see the Ubuntu 
Zesty (4.10 based) kernel reporting multi cpu soft lockups on QDF2400 SDP. 
https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1680549

This patch series along with the following cherry-picks from Linus's tree
dddd632b072f iommu/dma: Implement PCI allocation optimisation
de84f5f049d9 iommu/dma: Stop getting dma_32bit_pfn wrong

were applied to Ubuntu Zesty 4.10 kernel (Ubuntu-4.10.0-18.20) and tested 
on a QDF2400 SDP.

Tested-by: Manoj Iyer <manoj.iyer@canonical.com>

--
============================
Manoj Iyer
Ubuntu/Canonical
ARM Servers - Cloud
============================

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [RESEND,1/3] iommu/dma: Convert to address-based allocation
@ 2017-04-06 18:11       ` Manoj Iyer
  0 siblings, 0 replies; 20+ messages in thread
From: Manoj Iyer @ 2017-04-06 18:11 UTC (permalink / raw)
  To: linux-arm-kernel

On Fri, 31 Mar 2017, Robin Murphy wrote:

> In preparation for some IOVA allocation improvements, clean up all the
> explicit struct iova usage such that all our mapping, unmapping and
> cleanup paths deal exclusively with addresses rather than implementation
> details. In the process, a few of the things we're touching get renamed
> for the sake of internal consistency.
>
> Reviewed-by: Nate Watterson <nwatters@codeaurora.org>
> Tested-by: Nate Watterson <nwatters@codeaurora.org>
> Signed-off-by: Robin Murphy <robin.murphy@arm.com>
> ---
> drivers/iommu/dma-iommu.c | 119 ++++++++++++++++++++++++++--------------------
> 1 file changed, 67 insertions(+), 52 deletions(-)
>
> diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
> index 85652110c8ff..8e0b684da1ba 100644
> --- a/drivers/iommu/dma-iommu.c
> +++ b/drivers/iommu/dma-iommu.c
> @@ -365,12 +365,12 @@ int dma_info_to_prot(enum dma_data_direction dir, bool coherent,
> 	}
> }
>
> -static struct iova *__alloc_iova(struct iommu_domain *domain, size_t size,
> -		dma_addr_t dma_limit, struct device *dev)
> +static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain *domain,
> +		size_t size, dma_addr_t dma_limit, struct device *dev)
> {
> 	struct iova_domain *iovad = cookie_iovad(domain);
> 	unsigned long shift = iova_shift(iovad);
> -	unsigned long length = iova_align(iovad, size) >> shift;
> +	unsigned long iova_len = size >> shift;
> 	struct iova *iova = NULL;
>
> 	if (domain->geometry.force_aperture)
> @@ -378,35 +378,42 @@ static struct iova *__alloc_iova(struct iommu_domain *domain, size_t size,
>
> 	/* Try to get PCI devices a SAC address */
> 	if (dma_limit > DMA_BIT_MASK(32) && dev_is_pci(dev))
> -		iova = alloc_iova(iovad, length, DMA_BIT_MASK(32) >> shift,
> +		iova = alloc_iova(iovad, iova_len, DMA_BIT_MASK(32) >> shift,
> 				  true);
> 	/*
> 	 * Enforce size-alignment to be safe - there could perhaps be an
> 	 * attribute to control this per-device, or at least per-domain...
> 	 */
> 	if (!iova)
> -		iova = alloc_iova(iovad, length, dma_limit >> shift, true);
> +		iova = alloc_iova(iovad, iova_len, dma_limit >> shift, true);
>
> -	return iova;
> +	return (dma_addr_t)iova->pfn_lo << shift;
> }
>
> -/* The IOVA allocator knows what we mapped, so just unmap whatever that was */
> -static void __iommu_dma_unmap(struct iommu_domain *domain, dma_addr_t dma_addr)
> +static void iommu_dma_free_iova(struct iommu_dma_cookie *cookie,
> +		dma_addr_t iova, size_t size)
> {
> -	struct iova_domain *iovad = cookie_iovad(domain);
> -	unsigned long shift = iova_shift(iovad);
> -	unsigned long pfn = dma_addr >> shift;
> -	struct iova *iova = find_iova(iovad, pfn);
> -	size_t size;
> +	struct iova_domain *iovad = &cookie->iovad;
> +	struct iova *iova_rbnode;
>
> -	if (WARN_ON(!iova))
> +	iova_rbnode = find_iova(iovad, iova_pfn(iovad, iova));
> +	if (WARN_ON(!iova_rbnode))
> 		return;
>
> -	size = iova_size(iova) << shift;
> -	size -= iommu_unmap(domain, pfn << shift, size);
> -	/* ...and if we can't, then something is horribly, horribly wrong */
> -	WARN_ON(size > 0);
> -	__free_iova(iovad, iova);
> +	__free_iova(iovad, iova_rbnode);
> +}
> +
> +static void __iommu_dma_unmap(struct iommu_domain *domain, dma_addr_t dma_addr,
> +		size_t size)
> +{
> +	struct iova_domain *iovad = cookie_iovad(domain);
> +	size_t iova_off = iova_offset(iovad, dma_addr);
> +
> +	dma_addr -= iova_off;
> +	size = iova_align(iovad, size + iova_off);
> +
> +	WARN_ON(iommu_unmap(domain, dma_addr, size) != size);
> +	iommu_dma_free_iova(domain->iova_cookie, dma_addr, size);
> }
>
> static void __iommu_dma_free_pages(struct page **pages, int count)
> @@ -488,7 +495,7 @@ static struct page **__iommu_dma_alloc_pages(unsigned int count,
> void iommu_dma_free(struct device *dev, struct page **pages, size_t size,
> 		dma_addr_t *handle)
> {
> -	__iommu_dma_unmap(iommu_get_domain_for_dev(dev), *handle);
> +	__iommu_dma_unmap(iommu_get_domain_for_dev(dev), *handle, size);
> 	__iommu_dma_free_pages(pages, PAGE_ALIGN(size) >> PAGE_SHIFT);
> 	*handle = DMA_ERROR_CODE;
> }
> @@ -516,11 +523,11 @@ struct page **iommu_dma_alloc(struct device *dev, size_t size, gfp_t gfp,
> 		void (*flush_page)(struct device *, const void *, phys_addr_t))
> {
> 	struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
> -	struct iova_domain *iovad = cookie_iovad(domain);
> -	struct iova *iova;
> +	struct iommu_dma_cookie *cookie = domain->iova_cookie;
> +	struct iova_domain *iovad = &cookie->iovad;
> 	struct page **pages;
> 	struct sg_table sgt;
> -	dma_addr_t dma_addr;
> +	dma_addr_t iova;
> 	unsigned int count, min_size, alloc_sizes = domain->pgsize_bitmap;
>
> 	*handle = DMA_ERROR_CODE;
> @@ -540,11 +547,11 @@ struct page **iommu_dma_alloc(struct device *dev, size_t size, gfp_t gfp,
> 	if (!pages)
> 		return NULL;
>
> -	iova = __alloc_iova(domain, size, dev->coherent_dma_mask, dev);
> +	size = iova_align(iovad, size);
> +	iova = iommu_dma_alloc_iova(domain, size, dev->coherent_dma_mask, dev);
> 	if (!iova)
> 		goto out_free_pages;
>
> -	size = iova_align(iovad, size);
> 	if (sg_alloc_table_from_pages(&sgt, pages, count, 0, size, GFP_KERNEL))
> 		goto out_free_iova;
>
> @@ -560,19 +567,18 @@ struct page **iommu_dma_alloc(struct device *dev, size_t size, gfp_t gfp,
> 		sg_miter_stop(&miter);
> 	}
>
> -	dma_addr = iova_dma_addr(iovad, iova);
> -	if (iommu_map_sg(domain, dma_addr, sgt.sgl, sgt.orig_nents, prot)
> +	if (iommu_map_sg(domain, iova, sgt.sgl, sgt.orig_nents, prot)
> 			< size)
> 		goto out_free_sg;
>
> -	*handle = dma_addr;
> +	*handle = iova;
> 	sg_free_table(&sgt);
> 	return pages;
>
> out_free_sg:
> 	sg_free_table(&sgt);
> out_free_iova:
> -	__free_iova(iovad, iova);
> +	iommu_dma_free_iova(cookie, iova, size);
> out_free_pages:
> 	__iommu_dma_free_pages(pages, count);
> 	return NULL;
> @@ -606,22 +612,22 @@ int iommu_dma_mmap(struct page **pages, size_t size, struct vm_area_struct *vma)
> static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys,
> 		size_t size, int prot)
> {
> -	dma_addr_t dma_addr;
> 	struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
> -	struct iova_domain *iovad = cookie_iovad(domain);
> +	struct iommu_dma_cookie *cookie = domain->iova_cookie;
> +	struct iova_domain *iovad = &cookie->iovad;
> 	size_t iova_off = iova_offset(iovad, phys);
> -	size_t len = iova_align(iovad, size + iova_off);
> -	struct iova *iova = __alloc_iova(domain, len, dma_get_mask(dev), dev);
> +	dma_addr_t iova;
>
> +	size = iova_align(iovad, size + iova_off);
> +	iova = iommu_dma_alloc_iova(domain, size, dma_get_mask(dev), dev);
> 	if (!iova)
> 		return DMA_ERROR_CODE;
>
> -	dma_addr = iova_dma_addr(iovad, iova);
> -	if (iommu_map(domain, dma_addr, phys - iova_off, len, prot)) {
> -		__free_iova(iovad, iova);
> +	if (iommu_map(domain, iova, phys - iova_off, size, prot)) {
> +		iommu_dma_free_iova(cookie, iova, size);
> 		return DMA_ERROR_CODE;
> 	}
> -	return dma_addr + iova_off;
> +	return iova + iova_off;
> }
>
> dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page,
> @@ -633,7 +639,7 @@ dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page,
> void iommu_dma_unmap_page(struct device *dev, dma_addr_t handle, size_t size,
> 		enum dma_data_direction dir, unsigned long attrs)
> {
> -	__iommu_dma_unmap(iommu_get_domain_for_dev(dev), handle);
> +	__iommu_dma_unmap(iommu_get_domain_for_dev(dev), handle, size);
> }
>
> /*
> @@ -722,10 +728,10 @@ int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg,
> 		int nents, int prot)
> {
> 	struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
> -	struct iova_domain *iovad = cookie_iovad(domain);
> -	struct iova *iova;
> +	struct iommu_dma_cookie *cookie = domain->iova_cookie;
> +	struct iova_domain *iovad = &cookie->iovad;
> 	struct scatterlist *s, *prev = NULL;
> -	dma_addr_t dma_addr;
> +	dma_addr_t iova;
> 	size_t iova_len = 0;
> 	unsigned long mask = dma_get_seg_boundary(dev);
> 	int i;
> @@ -769,7 +775,7 @@ int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg,
> 		prev = s;
> 	}
>
> -	iova = __alloc_iova(domain, iova_len, dma_get_mask(dev), dev);
> +	iova = iommu_dma_alloc_iova(domain, iova_len, dma_get_mask(dev), dev);
> 	if (!iova)
> 		goto out_restore_sg;
>
> @@ -777,14 +783,13 @@ int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg,
> 	 * We'll leave any physical concatenation to the IOMMU driver's
> 	 * implementation - it knows better than we do.
> 	 */
> -	dma_addr = iova_dma_addr(iovad, iova);
> -	if (iommu_map_sg(domain, dma_addr, sg, nents, prot) < iova_len)
> +	if (iommu_map_sg(domain, iova, sg, nents, prot) < iova_len)
> 		goto out_free_iova;
>
> -	return __finalise_sg(dev, sg, nents, dma_addr);
> +	return __finalise_sg(dev, sg, nents, iova);
>
> out_free_iova:
> -	__free_iova(iovad, iova);
> +	iommu_dma_free_iova(cookie, iova, iova_len);
> out_restore_sg:
> 	__invalidate_sg(sg, nents);
> 	return 0;
> @@ -793,11 +798,21 @@ int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg,
> void iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents,
> 		enum dma_data_direction dir, unsigned long attrs)
> {
> +	dma_addr_t start, end;
> +	struct scatterlist *tmp;
> +	int i;
> 	/*
> 	 * The scatterlist segments are mapped into a single
> 	 * contiguous IOVA allocation, so this is incredibly easy.
> 	 */
> -	__iommu_dma_unmap(iommu_get_domain_for_dev(dev), sg_dma_address(sg));
> +	start = sg_dma_address(sg);
> +	for_each_sg(sg_next(sg), tmp, nents - 1, i) {
> +		if (sg_dma_len(tmp) == 0)
> +			break;
> +		sg = tmp;
> +	}
> +	end = sg_dma_address(sg) + sg_dma_len(sg);
> +	__iommu_dma_unmap(iommu_get_domain_for_dev(dev), start, end - start);
> }
>
> dma_addr_t iommu_dma_map_resource(struct device *dev, phys_addr_t phys,
> @@ -810,7 +825,7 @@ dma_addr_t iommu_dma_map_resource(struct device *dev, phys_addr_t phys,
> void iommu_dma_unmap_resource(struct device *dev, dma_addr_t handle,
> 		size_t size, enum dma_data_direction dir, unsigned long attrs)
> {
> -	__iommu_dma_unmap(iommu_get_domain_for_dev(dev), handle);
> +	__iommu_dma_unmap(iommu_get_domain_for_dev(dev), handle, size);
> }
>
> int iommu_dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
> @@ -824,7 +839,7 @@ static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev,
> 	struct iommu_dma_cookie *cookie = domain->iova_cookie;
> 	struct iommu_dma_msi_page *msi_page;
> 	struct iova_domain *iovad = cookie_iovad(domain);
> -	struct iova *iova;
> +	dma_addr_t iova;
> 	int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
> 	size_t size = cookie_msi_granule(cookie);
>
> @@ -839,10 +854,10 @@ static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev,
>
> 	msi_page->phys = msi_addr;
> 	if (iovad) {
> -		iova = __alloc_iova(domain, size, dma_get_mask(dev), dev);
> +		iova = iommu_dma_alloc_iova(domain, size, dma_get_mask(dev), dev);
> 		if (!iova)
> 			goto out_free_page;
> -		msi_page->iova = iova_dma_addr(iovad, iova);
> +		msi_page->iova = iova;
> 	} else {
> 		msi_page->iova = cookie->msi_iova;
> 		cookie->msi_iova += size;
> @@ -857,7 +872,7 @@ static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev,
>
> out_free_iova:
> 	if (iovad)
> -		__free_iova(iovad, iova);
> +		iommu_dma_free_iova(cookie, iova, size);
> 	else
> 		cookie->msi_iova -= size;
> out_free_page:
>

This patch series helps to resolve the Ubuntu bug, where we see the Ubuntu 
Zesty (4.10 based) kernel reporting multi cpu soft lockups on QDF2400 SDP. 
https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1680549

This patch series along with the following cherry-picks from Linus's tree
dddd632b072f iommu/dma: Implement PCI allocation optimisation
de84f5f049d9 iommu/dma: Stop getting dma_32bit_pfn wrong

were applied to Ubuntu Zesty 4.10 kernel (Ubuntu-4.10.0-18.20) and tested 
on a QDF2400 SDP.

Tested-by: Manoj Iyer <manoj.iyer@canonical.com>

--
============================
Manoj Iyer
Ubuntu/Canonical
ARM Servers - Cloud
============================

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [RESEND,2/3] iommu/dma: Clean up MSI IOVA allocation
  2017-03-31 14:46     ` Robin Murphy
@ 2017-04-06 18:14       ` Manoj Iyer
  -1 siblings, 0 replies; 20+ messages in thread
From: Manoj Iyer @ 2017-04-06 18:14 UTC (permalink / raw)
  To: Robin Murphy; +Cc: joro, iommu, linux-arm-kernel

On Fri, 31 Mar 2017, Robin Murphy wrote:

> Now that allocation is suitably abstracted, our private alloc/free
> helpers can drive the trivial MSI cookie allocator directly as well,
> which lets us clean up its exposed guts from iommu_dma_map_msi_msg() and
> simplify things quite a bit.
>
> Reviewed-by: Nate Watterson <nwatters@codeaurora.org>
> Tested-by: Nate Watterson <nwatters@codeaurora.org>
> Signed-off-by: Robin Murphy <robin.murphy@arm.com>
> ---
> drivers/iommu/dma-iommu.c | 58 ++++++++++++++++++++---------------------------
> 1 file changed, 25 insertions(+), 33 deletions(-)
>
> diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
> index 8e0b684da1ba..1b94beb43036 100644
> --- a/drivers/iommu/dma-iommu.c
> +++ b/drivers/iommu/dma-iommu.c
> @@ -61,15 +61,6 @@ static inline size_t cookie_msi_granule(struct iommu_dma_cookie *cookie)
> 	return PAGE_SIZE;
> }
>
> -static inline struct iova_domain *cookie_iovad(struct iommu_domain *domain)
> -{
> -	struct iommu_dma_cookie *cookie = domain->iova_cookie;
> -
> -	if (cookie->type == IOMMU_DMA_IOVA_COOKIE)
> -		return &cookie->iovad;
> -	return NULL;
> -}
> -
> static struct iommu_dma_cookie *cookie_alloc(enum iommu_dma_cookie_type type)
> {
> 	struct iommu_dma_cookie *cookie;
> @@ -368,11 +359,19 @@ int dma_info_to_prot(enum dma_data_direction dir, bool coherent,
> static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain *domain,
> 		size_t size, dma_addr_t dma_limit, struct device *dev)
> {
> -	struct iova_domain *iovad = cookie_iovad(domain);
> -	unsigned long shift = iova_shift(iovad);
> -	unsigned long iova_len = size >> shift;
> +	struct iommu_dma_cookie *cookie = domain->iova_cookie;
> +	struct iova_domain *iovad = &cookie->iovad;
> +	unsigned long shift, iova_len;
> 	struct iova *iova = NULL;
>
> +	if (cookie->type == IOMMU_DMA_MSI_COOKIE) {
> +		cookie->msi_iova += size;
> +		return cookie->msi_iova - size;
> +	}
> +
> +	shift = iova_shift(iovad);
> +	iova_len = size >> shift;
> +
> 	if (domain->geometry.force_aperture)
> 		dma_limit = min(dma_limit, domain->geometry.aperture_end);
>
> @@ -396,6 +395,12 @@ static void iommu_dma_free_iova(struct iommu_dma_cookie *cookie,
> 	struct iova_domain *iovad = &cookie->iovad;
> 	struct iova *iova_rbnode;
>
> +	/* The MSI case is only ever cleaning up its most recent allocation */
> +	if (cookie->type == IOMMU_DMA_MSI_COOKIE) {
> +		cookie->msi_iova -= size;
> +		return;
> +	}
> +
> 	iova_rbnode = find_iova(iovad, iova_pfn(iovad, iova));
> 	if (WARN_ON(!iova_rbnode))
> 		return;
> @@ -406,14 +411,15 @@ static void iommu_dma_free_iova(struct iommu_dma_cookie *cookie,
> static void __iommu_dma_unmap(struct iommu_domain *domain, dma_addr_t dma_addr,
> 		size_t size)
> {
> -	struct iova_domain *iovad = cookie_iovad(domain);
> +	struct iommu_dma_cookie *cookie = domain->iova_cookie;
> +	struct iova_domain *iovad = &cookie->iovad;
> 	size_t iova_off = iova_offset(iovad, dma_addr);
>
> 	dma_addr -= iova_off;
> 	size = iova_align(iovad, size + iova_off);
>
> 	WARN_ON(iommu_unmap(domain, dma_addr, size) != size);
> -	iommu_dma_free_iova(domain->iova_cookie, dma_addr, size);
> +	iommu_dma_free_iova(cookie, dma_addr, size);
> }
>
> static void __iommu_dma_free_pages(struct page **pages, int count)
> @@ -838,7 +844,6 @@ static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev,
> {
> 	struct iommu_dma_cookie *cookie = domain->iova_cookie;
> 	struct iommu_dma_msi_page *msi_page;
> -	struct iova_domain *iovad = cookie_iovad(domain);
> 	dma_addr_t iova;
> 	int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
> 	size_t size = cookie_msi_granule(cookie);
> @@ -852,29 +857,16 @@ static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev,
> 	if (!msi_page)
> 		return NULL;
>
> -	msi_page->phys = msi_addr;
> -	if (iovad) {
> -		iova = iommu_dma_alloc_iova(domain, size, dma_get_mask(dev), dev);
> -		if (!iova)
> -			goto out_free_page;
> -		msi_page->iova = iova;
> -	} else {
> -		msi_page->iova = cookie->msi_iova;
> -		cookie->msi_iova += size;
> -	}
> -
> -	if (iommu_map(domain, msi_page->iova, msi_addr, size, prot))
> -		goto out_free_iova;
> +	iova = __iommu_dma_map(dev, msi_addr, size, prot);
> +	if (iommu_dma_mapping_error(dev, iova))
> +		goto out_free_page;
>
> 	INIT_LIST_HEAD(&msi_page->list);
> +	msi_page->phys = msi_addr;
> +	msi_page->iova = iova;
> 	list_add(&msi_page->list, &cookie->msi_page_list);
> 	return msi_page;
>
> -out_free_iova:
> -	if (iovad)
> -		iommu_dma_free_iova(cookie, iova, size);
> -	else
> -		cookie->msi_iova -= size;
> out_free_page:
> 	kfree(msi_page);
> 	return NULL;
>

This patch series helps to resolve the Ubuntu bug, where we see the Ubuntu 
Zesty (4.10 based) kernel reporting multi cpu soft lockups on QDF2400 SDP. 
https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1680549

This patch series along with the following cherry-picks from Linus's tree 
dddd632b072f iommu/dma: Implement PCI allocation optimisation
de84f5f049d9 iommu/dma: Stop getting dma_32bit_pfn wrong

were applied to Ubuntu Zesty 4.10 kernel (Ubuntu-4.10.0-18.20) and tested 
on a QDF2400 SDP.

Tested-by: Manoj Iyer <manoj.iyer@canonical.com>


--
============================
Manoj Iyer
Ubuntu/Canonical
ARM Servers - Cloud
============================

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [RESEND,2/3] iommu/dma: Clean up MSI IOVA allocation
@ 2017-04-06 18:14       ` Manoj Iyer
  0 siblings, 0 replies; 20+ messages in thread
From: Manoj Iyer @ 2017-04-06 18:14 UTC (permalink / raw)
  To: linux-arm-kernel

On Fri, 31 Mar 2017, Robin Murphy wrote:

> Now that allocation is suitably abstracted, our private alloc/free
> helpers can drive the trivial MSI cookie allocator directly as well,
> which lets us clean up its exposed guts from iommu_dma_map_msi_msg() and
> simplify things quite a bit.
>
> Reviewed-by: Nate Watterson <nwatters@codeaurora.org>
> Tested-by: Nate Watterson <nwatters@codeaurora.org>
> Signed-off-by: Robin Murphy <robin.murphy@arm.com>
> ---
> drivers/iommu/dma-iommu.c | 58 ++++++++++++++++++++---------------------------
> 1 file changed, 25 insertions(+), 33 deletions(-)
>
> diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
> index 8e0b684da1ba..1b94beb43036 100644
> --- a/drivers/iommu/dma-iommu.c
> +++ b/drivers/iommu/dma-iommu.c
> @@ -61,15 +61,6 @@ static inline size_t cookie_msi_granule(struct iommu_dma_cookie *cookie)
> 	return PAGE_SIZE;
> }
>
> -static inline struct iova_domain *cookie_iovad(struct iommu_domain *domain)
> -{
> -	struct iommu_dma_cookie *cookie = domain->iova_cookie;
> -
> -	if (cookie->type == IOMMU_DMA_IOVA_COOKIE)
> -		return &cookie->iovad;
> -	return NULL;
> -}
> -
> static struct iommu_dma_cookie *cookie_alloc(enum iommu_dma_cookie_type type)
> {
> 	struct iommu_dma_cookie *cookie;
> @@ -368,11 +359,19 @@ int dma_info_to_prot(enum dma_data_direction dir, bool coherent,
> static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain *domain,
> 		size_t size, dma_addr_t dma_limit, struct device *dev)
> {
> -	struct iova_domain *iovad = cookie_iovad(domain);
> -	unsigned long shift = iova_shift(iovad);
> -	unsigned long iova_len = size >> shift;
> +	struct iommu_dma_cookie *cookie = domain->iova_cookie;
> +	struct iova_domain *iovad = &cookie->iovad;
> +	unsigned long shift, iova_len;
> 	struct iova *iova = NULL;
>
> +	if (cookie->type == IOMMU_DMA_MSI_COOKIE) {
> +		cookie->msi_iova += size;
> +		return cookie->msi_iova - size;
> +	}
> +
> +	shift = iova_shift(iovad);
> +	iova_len = size >> shift;
> +
> 	if (domain->geometry.force_aperture)
> 		dma_limit = min(dma_limit, domain->geometry.aperture_end);
>
> @@ -396,6 +395,12 @@ static void iommu_dma_free_iova(struct iommu_dma_cookie *cookie,
> 	struct iova_domain *iovad = &cookie->iovad;
> 	struct iova *iova_rbnode;
>
> +	/* The MSI case is only ever cleaning up its most recent allocation */
> +	if (cookie->type == IOMMU_DMA_MSI_COOKIE) {
> +		cookie->msi_iova -= size;
> +		return;
> +	}
> +
> 	iova_rbnode = find_iova(iovad, iova_pfn(iovad, iova));
> 	if (WARN_ON(!iova_rbnode))
> 		return;
> @@ -406,14 +411,15 @@ static void iommu_dma_free_iova(struct iommu_dma_cookie *cookie,
> static void __iommu_dma_unmap(struct iommu_domain *domain, dma_addr_t dma_addr,
> 		size_t size)
> {
> -	struct iova_domain *iovad = cookie_iovad(domain);
> +	struct iommu_dma_cookie *cookie = domain->iova_cookie;
> +	struct iova_domain *iovad = &cookie->iovad;
> 	size_t iova_off = iova_offset(iovad, dma_addr);
>
> 	dma_addr -= iova_off;
> 	size = iova_align(iovad, size + iova_off);
>
> 	WARN_ON(iommu_unmap(domain, dma_addr, size) != size);
> -	iommu_dma_free_iova(domain->iova_cookie, dma_addr, size);
> +	iommu_dma_free_iova(cookie, dma_addr, size);
> }
>
> static void __iommu_dma_free_pages(struct page **pages, int count)
> @@ -838,7 +844,6 @@ static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev,
> {
> 	struct iommu_dma_cookie *cookie = domain->iova_cookie;
> 	struct iommu_dma_msi_page *msi_page;
> -	struct iova_domain *iovad = cookie_iovad(domain);
> 	dma_addr_t iova;
> 	int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
> 	size_t size = cookie_msi_granule(cookie);
> @@ -852,29 +857,16 @@ static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev,
> 	if (!msi_page)
> 		return NULL;
>
> -	msi_page->phys = msi_addr;
> -	if (iovad) {
> -		iova = iommu_dma_alloc_iova(domain, size, dma_get_mask(dev), dev);
> -		if (!iova)
> -			goto out_free_page;
> -		msi_page->iova = iova;
> -	} else {
> -		msi_page->iova = cookie->msi_iova;
> -		cookie->msi_iova += size;
> -	}
> -
> -	if (iommu_map(domain, msi_page->iova, msi_addr, size, prot))
> -		goto out_free_iova;
> +	iova = __iommu_dma_map(dev, msi_addr, size, prot);
> +	if (iommu_dma_mapping_error(dev, iova))
> +		goto out_free_page;
>
> 	INIT_LIST_HEAD(&msi_page->list);
> +	msi_page->phys = msi_addr;
> +	msi_page->iova = iova;
> 	list_add(&msi_page->list, &cookie->msi_page_list);
> 	return msi_page;
>
> -out_free_iova:
> -	if (iovad)
> -		iommu_dma_free_iova(cookie, iova, size);
> -	else
> -		cookie->msi_iova -= size;
> out_free_page:
> 	kfree(msi_page);
> 	return NULL;
>

This patch series helps to resolve the Ubuntu bug, where we see the Ubuntu 
Zesty (4.10 based) kernel reporting multi cpu soft lockups on QDF2400 SDP. 
https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1680549

This patch series along with the following cherry-picks from Linus's tree 
dddd632b072f iommu/dma: Implement PCI allocation optimisation
de84f5f049d9 iommu/dma: Stop getting dma_32bit_pfn wrong

were applied to Ubuntu Zesty 4.10 kernel (Ubuntu-4.10.0-18.20) and tested 
on a QDF2400 SDP.

Tested-by: Manoj Iyer <manoj.iyer@canonical.com>


--
============================
Manoj Iyer
Ubuntu/Canonical
ARM Servers - Cloud
============================

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [RESEND,3/3] iommu/dma: Plumb in the per-CPU IOVA caches
  2017-03-31 14:46     ` Robin Murphy
@ 2017-04-06 18:15       ` Manoj Iyer
  -1 siblings, 0 replies; 20+ messages in thread
From: Manoj Iyer @ 2017-04-06 18:15 UTC (permalink / raw)
  To: Robin Murphy; +Cc: joro, iommu, linux-arm-kernel

On Fri, 31 Mar 2017, Robin Murphy wrote:

> With IOVA allocation suitably tidied up, we are finally free to opt in
> to the per-CPU caching mechanism. The caching alone can provide a modest
> improvement over walking the rbtree for weedier systems (iperf3 shows
> ~10% more ethernet throughput on an ARM Juno r1 constrained to a single
> 650MHz Cortex-A53), but the real gain will be in sidestepping the rbtree
> lock contention which larger ARM-based systems with lots of parallel I/O
> are starting to feel the pain of.
>
> Reviewed-by: Nate Watterson <nwatters@codeaurora.org>
> Tested-by: Nate Watterson <nwatters@codeaurora.org>
> Signed-off-by: Robin Murphy <robin.murphy@arm.com>
> ---
> drivers/iommu/dma-iommu.c | 39 ++++++++++++++++++---------------------
> 1 file changed, 18 insertions(+), 21 deletions(-)
>
> diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
> index 1b94beb43036..8348f366ddd1 100644
> --- a/drivers/iommu/dma-iommu.c
> +++ b/drivers/iommu/dma-iommu.c
> @@ -361,8 +361,7 @@ static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain *domain,
> {
> 	struct iommu_dma_cookie *cookie = domain->iova_cookie;
> 	struct iova_domain *iovad = &cookie->iovad;
> -	unsigned long shift, iova_len;
> -	struct iova *iova = NULL;
> +	unsigned long shift, iova_len, iova = 0;
>
> 	if (cookie->type == IOMMU_DMA_MSI_COOKIE) {
> 		cookie->msi_iova += size;
> @@ -371,41 +370,39 @@ static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain *domain,
>
> 	shift = iova_shift(iovad);
> 	iova_len = size >> shift;
> +	/*
> +	 * Freeing non-power-of-two-sized allocations back into the IOVA caches
> +	 * will come back to bite us badly, so we have to waste a bit of space
> +	 * rounding up anything cacheable to make sure that can't happen. The
> +	 * order of the unadjusted size will still match upon freeing.
> +	 */
> +	if (iova_len < (1 << (IOVA_RANGE_CACHE_MAX_SIZE - 1)))
> +		iova_len = roundup_pow_of_two(iova_len);
>
> 	if (domain->geometry.force_aperture)
> 		dma_limit = min(dma_limit, domain->geometry.aperture_end);
>
> 	/* Try to get PCI devices a SAC address */
> 	if (dma_limit > DMA_BIT_MASK(32) && dev_is_pci(dev))
> -		iova = alloc_iova(iovad, iova_len, DMA_BIT_MASK(32) >> shift,
> -				  true);
> -	/*
> -	 * Enforce size-alignment to be safe - there could perhaps be an
> -	 * attribute to control this per-device, or at least per-domain...
> -	 */
> -	if (!iova)
> -		iova = alloc_iova(iovad, iova_len, dma_limit >> shift, true);
> +		iova = alloc_iova_fast(iovad, iova_len, DMA_BIT_MASK(32) >> shift);
>
> -	return (dma_addr_t)iova->pfn_lo << shift;
> +	if (!iova)
> +		iova = alloc_iova_fast(iovad, iova_len, dma_limit >> shift);
> +
> +	return (dma_addr_t)iova << shift;
> }
>
> static void iommu_dma_free_iova(struct iommu_dma_cookie *cookie,
> 		dma_addr_t iova, size_t size)
> {
> 	struct iova_domain *iovad = &cookie->iovad;
> -	struct iova *iova_rbnode;
> +	unsigned long shift = iova_shift(iovad);
>
> 	/* The MSI case is only ever cleaning up its most recent allocation */
> -	if (cookie->type == IOMMU_DMA_MSI_COOKIE) {
> +	if (cookie->type == IOMMU_DMA_MSI_COOKIE)
> 		cookie->msi_iova -= size;
> -		return;
> -	}
> -
> -	iova_rbnode = find_iova(iovad, iova_pfn(iovad, iova));
> -	if (WARN_ON(!iova_rbnode))
> -		return;
> -
> -	__free_iova(iovad, iova_rbnode);
> +	else
> +		free_iova_fast(iovad, iova >> shift, size >> shift);
> }
>
> static void __iommu_dma_unmap(struct iommu_domain *domain, dma_addr_t dma_addr,
>

This patch series helps to resolve the Ubuntu bug, where we see the Ubuntu 
Zesty (4.10 based) kernel reporting multi cpu soft lockups on QDF2400 SDP. 
https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1680549

This patch series along with the following cherry-picks from Linus's tree
dddd632b072f iommu/dma: Implement PCI allocation optimisation
de84f5f049d9 iommu/dma: Stop getting dma_32bit_pfn wrong

were applied to Ubuntu Zesty 4.10 kernel (Ubuntu-4.10.0-18.20) and tested 
on a QDF2400 SDP.

Tested-by: Manoj Iyer <manoj.iyer@canonical.com>


--
============================
Manoj Iyer
Ubuntu/Canonical
ARM Servers - Cloud
============================

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [RESEND,3/3] iommu/dma: Plumb in the per-CPU IOVA caches
@ 2017-04-06 18:15       ` Manoj Iyer
  0 siblings, 0 replies; 20+ messages in thread
From: Manoj Iyer @ 2017-04-06 18:15 UTC (permalink / raw)
  To: linux-arm-kernel

On Fri, 31 Mar 2017, Robin Murphy wrote:

> With IOVA allocation suitably tidied up, we are finally free to opt in
> to the per-CPU caching mechanism. The caching alone can provide a modest
> improvement over walking the rbtree for weedier systems (iperf3 shows
> ~10% more ethernet throughput on an ARM Juno r1 constrained to a single
> 650MHz Cortex-A53), but the real gain will be in sidestepping the rbtree
> lock contention which larger ARM-based systems with lots of parallel I/O
> are starting to feel the pain of.
>
> Reviewed-by: Nate Watterson <nwatters@codeaurora.org>
> Tested-by: Nate Watterson <nwatters@codeaurora.org>
> Signed-off-by: Robin Murphy <robin.murphy@arm.com>
> ---
> drivers/iommu/dma-iommu.c | 39 ++++++++++++++++++---------------------
> 1 file changed, 18 insertions(+), 21 deletions(-)
>
> diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
> index 1b94beb43036..8348f366ddd1 100644
> --- a/drivers/iommu/dma-iommu.c
> +++ b/drivers/iommu/dma-iommu.c
> @@ -361,8 +361,7 @@ static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain *domain,
> {
> 	struct iommu_dma_cookie *cookie = domain->iova_cookie;
> 	struct iova_domain *iovad = &cookie->iovad;
> -	unsigned long shift, iova_len;
> -	struct iova *iova = NULL;
> +	unsigned long shift, iova_len, iova = 0;
>
> 	if (cookie->type == IOMMU_DMA_MSI_COOKIE) {
> 		cookie->msi_iova += size;
> @@ -371,41 +370,39 @@ static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain *domain,
>
> 	shift = iova_shift(iovad);
> 	iova_len = size >> shift;
> +	/*
> +	 * Freeing non-power-of-two-sized allocations back into the IOVA caches
> +	 * will come back to bite us badly, so we have to waste a bit of space
> +	 * rounding up anything cacheable to make sure that can't happen. The
> +	 * order of the unadjusted size will still match upon freeing.
> +	 */
> +	if (iova_len < (1 << (IOVA_RANGE_CACHE_MAX_SIZE - 1)))
> +		iova_len = roundup_pow_of_two(iova_len);
>
> 	if (domain->geometry.force_aperture)
> 		dma_limit = min(dma_limit, domain->geometry.aperture_end);
>
> 	/* Try to get PCI devices a SAC address */
> 	if (dma_limit > DMA_BIT_MASK(32) && dev_is_pci(dev))
> -		iova = alloc_iova(iovad, iova_len, DMA_BIT_MASK(32) >> shift,
> -				  true);
> -	/*
> -	 * Enforce size-alignment to be safe - there could perhaps be an
> -	 * attribute to control this per-device, or at least per-domain...
> -	 */
> -	if (!iova)
> -		iova = alloc_iova(iovad, iova_len, dma_limit >> shift, true);
> +		iova = alloc_iova_fast(iovad, iova_len, DMA_BIT_MASK(32) >> shift);
>
> -	return (dma_addr_t)iova->pfn_lo << shift;
> +	if (!iova)
> +		iova = alloc_iova_fast(iovad, iova_len, dma_limit >> shift);
> +
> +	return (dma_addr_t)iova << shift;
> }
>
> static void iommu_dma_free_iova(struct iommu_dma_cookie *cookie,
> 		dma_addr_t iova, size_t size)
> {
> 	struct iova_domain *iovad = &cookie->iovad;
> -	struct iova *iova_rbnode;
> +	unsigned long shift = iova_shift(iovad);
>
> 	/* The MSI case is only ever cleaning up its most recent allocation */
> -	if (cookie->type == IOMMU_DMA_MSI_COOKIE) {
> +	if (cookie->type == IOMMU_DMA_MSI_COOKIE)
> 		cookie->msi_iova -= size;
> -		return;
> -	}
> -
> -	iova_rbnode = find_iova(iovad, iova_pfn(iovad, iova));
> -	if (WARN_ON(!iova_rbnode))
> -		return;
> -
> -	__free_iova(iovad, iova_rbnode);
> +	else
> +		free_iova_fast(iovad, iova >> shift, size >> shift);
> }
>
> static void __iommu_dma_unmap(struct iommu_domain *domain, dma_addr_t dma_addr,
>

This patch series helps to resolve the Ubuntu bug, where we see the Ubuntu 
Zesty (4.10 based) kernel reporting multi cpu soft lockups on QDF2400 SDP. 
https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1680549

This patch series along with the following cherry-picks from Linus's tree
dddd632b072f iommu/dma: Implement PCI allocation optimisation
de84f5f049d9 iommu/dma: Stop getting dma_32bit_pfn wrong

were applied to Ubuntu Zesty 4.10 kernel (Ubuntu-4.10.0-18.20) and tested 
on a QDF2400 SDP.

Tested-by: Manoj Iyer <manoj.iyer@canonical.com>


--
============================
Manoj Iyer
Ubuntu/Canonical
ARM Servers - Cloud
============================

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [RESEND,3/3] iommu/dma: Plumb in the per-CPU IOVA caches
  2017-04-06 18:15       ` Manoj Iyer
@ 2017-04-06 18:56         ` Robin Murphy
  -1 siblings, 0 replies; 20+ messages in thread
From: Robin Murphy @ 2017-04-06 18:56 UTC (permalink / raw)
  To: Manoj Iyer
  Cc: iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r

On 06/04/17 19:15, Manoj Iyer wrote:
> On Fri, 31 Mar 2017, Robin Murphy wrote:
> 
>> With IOVA allocation suitably tidied up, we are finally free to opt in
>> to the per-CPU caching mechanism. The caching alone can provide a modest
>> improvement over walking the rbtree for weedier systems (iperf3 shows
>> ~10% more ethernet throughput on an ARM Juno r1 constrained to a single
>> 650MHz Cortex-A53), but the real gain will be in sidestepping the rbtree
>> lock contention which larger ARM-based systems with lots of parallel I/O
>> are starting to feel the pain of.
>>
>> Reviewed-by: Nate Watterson <nwatters-sgV2jX0FEOL9JmXXK+q4OQ@public.gmane.org>
>> Tested-by: Nate Watterson <nwatters-sgV2jX0FEOL9JmXXK+q4OQ@public.gmane.org>
>> Signed-off-by: Robin Murphy <robin.murphy-5wv7dgnIgG8@public.gmane.org>
>> ---
>> drivers/iommu/dma-iommu.c | 39 ++++++++++++++++++---------------------
>> 1 file changed, 18 insertions(+), 21 deletions(-)
>>
>> diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
>> index 1b94beb43036..8348f366ddd1 100644
>> --- a/drivers/iommu/dma-iommu.c
>> +++ b/drivers/iommu/dma-iommu.c
>> @@ -361,8 +361,7 @@ static dma_addr_t iommu_dma_alloc_iova(struct
>> iommu_domain *domain,
>> {
>>     struct iommu_dma_cookie *cookie = domain->iova_cookie;
>>     struct iova_domain *iovad = &cookie->iovad;
>> -    unsigned long shift, iova_len;
>> -    struct iova *iova = NULL;
>> +    unsigned long shift, iova_len, iova = 0;
>>
>>     if (cookie->type == IOMMU_DMA_MSI_COOKIE) {
>>         cookie->msi_iova += size;
>> @@ -371,41 +370,39 @@ static dma_addr_t iommu_dma_alloc_iova(struct
>> iommu_domain *domain,
>>
>>     shift = iova_shift(iovad);
>>     iova_len = size >> shift;
>> +    /*
>> +     * Freeing non-power-of-two-sized allocations back into the IOVA
>> caches
>> +     * will come back to bite us badly, so we have to waste a bit of
>> space
>> +     * rounding up anything cacheable to make sure that can't happen.
>> The
>> +     * order of the unadjusted size will still match upon freeing.
>> +     */
>> +    if (iova_len < (1 << (IOVA_RANGE_CACHE_MAX_SIZE - 1)))
>> +        iova_len = roundup_pow_of_two(iova_len);
>>
>>     if (domain->geometry.force_aperture)
>>         dma_limit = min(dma_limit, domain->geometry.aperture_end);
>>
>>     /* Try to get PCI devices a SAC address */
>>     if (dma_limit > DMA_BIT_MASK(32) && dev_is_pci(dev))
>> -        iova = alloc_iova(iovad, iova_len, DMA_BIT_MASK(32) >> shift,
>> -                  true);
>> -    /*
>> -     * Enforce size-alignment to be safe - there could perhaps be an
>> -     * attribute to control this per-device, or at least per-domain...
>> -     */
>> -    if (!iova)
>> -        iova = alloc_iova(iovad, iova_len, dma_limit >> shift, true);
>> +        iova = alloc_iova_fast(iovad, iova_len, DMA_BIT_MASK(32) >>
>> shift);
>>
>> -    return (dma_addr_t)iova->pfn_lo << shift;
>> +    if (!iova)
>> +        iova = alloc_iova_fast(iovad, iova_len, dma_limit >> shift);
>> +
>> +    return (dma_addr_t)iova << shift;
>> }
>>
>> static void iommu_dma_free_iova(struct iommu_dma_cookie *cookie,
>>         dma_addr_t iova, size_t size)
>> {
>>     struct iova_domain *iovad = &cookie->iovad;
>> -    struct iova *iova_rbnode;
>> +    unsigned long shift = iova_shift(iovad);
>>
>>     /* The MSI case is only ever cleaning up its most recent
>> allocation */
>> -    if (cookie->type == IOMMU_DMA_MSI_COOKIE) {
>> +    if (cookie->type == IOMMU_DMA_MSI_COOKIE)
>>         cookie->msi_iova -= size;
>> -        return;
>> -    }
>> -
>> -    iova_rbnode = find_iova(iovad, iova_pfn(iovad, iova));
>> -    if (WARN_ON(!iova_rbnode))
>> -        return;
>> -
>> -    __free_iova(iovad, iova_rbnode);
>> +    else
>> +        free_iova_fast(iovad, iova >> shift, size >> shift);
>> }
>>
>> static void __iommu_dma_unmap(struct iommu_domain *domain, dma_addr_t
>> dma_addr,
>>
> 
> This patch series helps to resolve the Ubuntu bug, where we see the
> Ubuntu Zesty (4.10 based) kernel reporting multi cpu soft lockups on
> QDF2400 SDP. https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1680549

Wow, how many separate buffers does that driver have mapped at once to
spend 22 seconds walking the rbtree for a single allocation!? I'd almost
expect that to indicate a deadlock.

I'm guessing you wouldn't have seen this on older kernels, since I
assume that particular platform is booting via ACPI, so wouldn't have
had the SMMU enabled without the IORT support which landed in 4.10.

> This patch series along with the following cherry-picks from Linus's tree
> dddd632b072f iommu/dma: Implement PCI allocation optimisation
> de84f5f049d9 iommu/dma: Stop getting dma_32bit_pfn wrong
> 
> were applied to Ubuntu Zesty 4.10 kernel (Ubuntu-4.10.0-18.20) and
> tested on a QDF2400 SDP.
> 
> Tested-by: Manoj Iyer <manoj.iyer-Z7WLFzj8eWMS+FvcfC7Uqw@public.gmane.org>

Thanks,
Robin.

> 
> 
> -- 
> ============================
> Manoj Iyer
> Ubuntu/Canonical
> ARM Servers - Cloud
> ============================

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [RESEND,3/3] iommu/dma: Plumb in the per-CPU IOVA caches
@ 2017-04-06 18:56         ` Robin Murphy
  0 siblings, 0 replies; 20+ messages in thread
From: Robin Murphy @ 2017-04-06 18:56 UTC (permalink / raw)
  To: linux-arm-kernel

On 06/04/17 19:15, Manoj Iyer wrote:
> On Fri, 31 Mar 2017, Robin Murphy wrote:
> 
>> With IOVA allocation suitably tidied up, we are finally free to opt in
>> to the per-CPU caching mechanism. The caching alone can provide a modest
>> improvement over walking the rbtree for weedier systems (iperf3 shows
>> ~10% more ethernet throughput on an ARM Juno r1 constrained to a single
>> 650MHz Cortex-A53), but the real gain will be in sidestepping the rbtree
>> lock contention which larger ARM-based systems with lots of parallel I/O
>> are starting to feel the pain of.
>>
>> Reviewed-by: Nate Watterson <nwatters@codeaurora.org>
>> Tested-by: Nate Watterson <nwatters@codeaurora.org>
>> Signed-off-by: Robin Murphy <robin.murphy@arm.com>
>> ---
>> drivers/iommu/dma-iommu.c | 39 ++++++++++++++++++---------------------
>> 1 file changed, 18 insertions(+), 21 deletions(-)
>>
>> diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
>> index 1b94beb43036..8348f366ddd1 100644
>> --- a/drivers/iommu/dma-iommu.c
>> +++ b/drivers/iommu/dma-iommu.c
>> @@ -361,8 +361,7 @@ static dma_addr_t iommu_dma_alloc_iova(struct
>> iommu_domain *domain,
>> {
>>     struct iommu_dma_cookie *cookie = domain->iova_cookie;
>>     struct iova_domain *iovad = &cookie->iovad;
>> -    unsigned long shift, iova_len;
>> -    struct iova *iova = NULL;
>> +    unsigned long shift, iova_len, iova = 0;
>>
>>     if (cookie->type == IOMMU_DMA_MSI_COOKIE) {
>>         cookie->msi_iova += size;
>> @@ -371,41 +370,39 @@ static dma_addr_t iommu_dma_alloc_iova(struct
>> iommu_domain *domain,
>>
>>     shift = iova_shift(iovad);
>>     iova_len = size >> shift;
>> +    /*
>> +     * Freeing non-power-of-two-sized allocations back into the IOVA
>> caches
>> +     * will come back to bite us badly, so we have to waste a bit of
>> space
>> +     * rounding up anything cacheable to make sure that can't happen.
>> The
>> +     * order of the unadjusted size will still match upon freeing.
>> +     */
>> +    if (iova_len < (1 << (IOVA_RANGE_CACHE_MAX_SIZE - 1)))
>> +        iova_len = roundup_pow_of_two(iova_len);
>>
>>     if (domain->geometry.force_aperture)
>>         dma_limit = min(dma_limit, domain->geometry.aperture_end);
>>
>>     /* Try to get PCI devices a SAC address */
>>     if (dma_limit > DMA_BIT_MASK(32) && dev_is_pci(dev))
>> -        iova = alloc_iova(iovad, iova_len, DMA_BIT_MASK(32) >> shift,
>> -                  true);
>> -    /*
>> -     * Enforce size-alignment to be safe - there could perhaps be an
>> -     * attribute to control this per-device, or at least per-domain...
>> -     */
>> -    if (!iova)
>> -        iova = alloc_iova(iovad, iova_len, dma_limit >> shift, true);
>> +        iova = alloc_iova_fast(iovad, iova_len, DMA_BIT_MASK(32) >>
>> shift);
>>
>> -    return (dma_addr_t)iova->pfn_lo << shift;
>> +    if (!iova)
>> +        iova = alloc_iova_fast(iovad, iova_len, dma_limit >> shift);
>> +
>> +    return (dma_addr_t)iova << shift;
>> }
>>
>> static void iommu_dma_free_iova(struct iommu_dma_cookie *cookie,
>>         dma_addr_t iova, size_t size)
>> {
>>     struct iova_domain *iovad = &cookie->iovad;
>> -    struct iova *iova_rbnode;
>> +    unsigned long shift = iova_shift(iovad);
>>
>>     /* The MSI case is only ever cleaning up its most recent
>> allocation */
>> -    if (cookie->type == IOMMU_DMA_MSI_COOKIE) {
>> +    if (cookie->type == IOMMU_DMA_MSI_COOKIE)
>>         cookie->msi_iova -= size;
>> -        return;
>> -    }
>> -
>> -    iova_rbnode = find_iova(iovad, iova_pfn(iovad, iova));
>> -    if (WARN_ON(!iova_rbnode))
>> -        return;
>> -
>> -    __free_iova(iovad, iova_rbnode);
>> +    else
>> +        free_iova_fast(iovad, iova >> shift, size >> shift);
>> }
>>
>> static void __iommu_dma_unmap(struct iommu_domain *domain, dma_addr_t
>> dma_addr,
>>
> 
> This patch series helps to resolve the Ubuntu bug, where we see the
> Ubuntu Zesty (4.10 based) kernel reporting multi cpu soft lockups on
> QDF2400 SDP. https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1680549

Wow, how many separate buffers does that driver have mapped at once to
spend 22 seconds walking the rbtree for a single allocation!? I'd almost
expect that to indicate a deadlock.

I'm guessing you wouldn't have seen this on older kernels, since I
assume that particular platform is booting via ACPI, so wouldn't have
had the SMMU enabled without the IORT support which landed in 4.10.

> This patch series along with the following cherry-picks from Linus's tree
> dddd632b072f iommu/dma: Implement PCI allocation optimisation
> de84f5f049d9 iommu/dma: Stop getting dma_32bit_pfn wrong
> 
> were applied to Ubuntu Zesty 4.10 kernel (Ubuntu-4.10.0-18.20) and
> tested on a QDF2400 SDP.
> 
> Tested-by: Manoj Iyer <manoj.iyer@canonical.com>

Thanks,
Robin.

> 
> 
> -- 
> ============================
> Manoj Iyer
> Ubuntu/Canonical
> ARM Servers - Cloud
> ============================

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [RESEND,3/3] iommu/dma: Plumb in the per-CPU IOVA caches
  2017-04-06 18:56         ` Robin Murphy
@ 2017-04-07  7:22             ` Nate Watterson
  -1 siblings, 0 replies; 20+ messages in thread
From: Nate Watterson @ 2017-04-07  7:22 UTC (permalink / raw)
  To: Robin Murphy, Manoj Iyer
  Cc: iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r

Hi Robin,
On 4/6/2017 2:56 PM, Robin Murphy wrote:
> On 06/04/17 19:15, Manoj Iyer wrote:
>> On Fri, 31 Mar 2017, Robin Murphy wrote:
>>
>>> With IOVA allocation suitably tidied up, we are finally free to opt in
>>> to the per-CPU caching mechanism. The caching alone can provide a modest
>>> improvement over walking the rbtree for weedier systems (iperf3 shows
>>> ~10% more ethernet throughput on an ARM Juno r1 constrained to a single
>>> 650MHz Cortex-A53), but the real gain will be in sidestepping the rbtree
>>> lock contention which larger ARM-based systems with lots of parallel I/O
>>> are starting to feel the pain of.
>>>

[...]

>>
>> This patch series helps to resolve the Ubuntu bug, where we see the
>> Ubuntu Zesty (4.10 based) kernel reporting multi cpu soft lockups on
>> QDF2400 SDP. https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1680549
>
> Wow, how many separate buffers does that driver have mapped at once to
> spend 22 seconds walking the rbtree for a single allocation!? I'd almost
> expect that to indicate a deadlock.
>
> I'm guessing you wouldn't have seen this on older kernels, since I
> assume that particular platform is booting via ACPI, so wouldn't have
> had the SMMU enabled without the IORT support which landed in 4.10.
>

Although this series does improve performance, the soft lockups seen
in the Ubuntu bug Manoj mentioned were actually because while the
the mlx5 interface was being brought up, a huge number of concurrent
calls to alloc_iova() were being made with limit_pfn != dma_32bit_pfn
so the optimized iova lookup was being bypassed.

Internally we worked around the issue by adding a set_dma_mask handler
that would call iommu_dma_init_domain() to adjust dma_32bit_pfn to
match the input mask.

https://source.codeaurora.org/quic/server/kernel/commit/arch/arm64/mm/dma-mapping.c?h=qserver-4.10&id=503b36fd3866cab216fc51a5a4015aaa99daf173

This worked well, however it clearly would not have played nice with
your dma-iommu PCI optimizations that force dma_limit to 32-bits so
it was never sent out. The application of the "PCI allocation
optimisations" patch is what actually remedied the cpu soft lockups
seen by Manoj.

Back to your question of how many buffers does the mlx5 driver have
mapped at once. It seems to scale linearly with core count. For
example, with 24-cores, after doing 'ifconfig eth<n> up', ~38k calls
to alloc_iova() have been made and the min iova is ~0xF600_0000. With
48-cores, those numbers jump to ~66k calls with min iova ~0xEF00_0000.

Things get really scary when you start using 64k pages. The number of
calls to alloc_iova() stays about the same which, when combined with
the reserved PCI windows, ends up consuming all of our 32-bit iovas
forcing us to once again call alloc_iova() but this time with a
limit_pfn != dma_32bit_pfn. This is actually how I stumbled upon
the alloc_iova() underflow bug that I posted about a bit earlier.

>> This patch series along with the following cherry-picks from Linus's tree
>> dddd632b072f iommu/dma: Implement PCI allocation optimisation
>> de84f5f049d9 iommu/dma: Stop getting dma_32bit_pfn wrong
>>
>> were applied to Ubuntu Zesty 4.10 kernel (Ubuntu-4.10.0-18.20) and
>> tested on a QDF2400 SDP.
>>
>> Tested-by: Manoj Iyer <manoj.iyer-Z7WLFzj8eWMS+FvcfC7Uqw@public.gmane.org>
>
> Thanks,
> Robin.
>
>>
>>
>> --
>> ============================
>> Manoj Iyer
>> Ubuntu/Canonical
>> ARM Servers - Cloud
>> ============================
>
> _______________________________________________
> iommu mailing list
> iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA@public.gmane.org
> https://lists.linuxfoundation.org/mailman/listinfo/iommu
>

-- 
Qualcomm Datacenter Technologies as an affiliate of Qualcomm Technologies, Inc.
Qualcomm Technologies, Inc. is a member of the Code Aurora Forum, a Linux Foundation Collaborative Project.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [RESEND,3/3] iommu/dma: Plumb in the per-CPU IOVA caches
@ 2017-04-07  7:22             ` Nate Watterson
  0 siblings, 0 replies; 20+ messages in thread
From: Nate Watterson @ 2017-04-07  7:22 UTC (permalink / raw)
  To: linux-arm-kernel

Hi Robin,
On 4/6/2017 2:56 PM, Robin Murphy wrote:
> On 06/04/17 19:15, Manoj Iyer wrote:
>> On Fri, 31 Mar 2017, Robin Murphy wrote:
>>
>>> With IOVA allocation suitably tidied up, we are finally free to opt in
>>> to the per-CPU caching mechanism. The caching alone can provide a modest
>>> improvement over walking the rbtree for weedier systems (iperf3 shows
>>> ~10% more ethernet throughput on an ARM Juno r1 constrained to a single
>>> 650MHz Cortex-A53), but the real gain will be in sidestepping the rbtree
>>> lock contention which larger ARM-based systems with lots of parallel I/O
>>> are starting to feel the pain of.
>>>

[...]

>>
>> This patch series helps to resolve the Ubuntu bug, where we see the
>> Ubuntu Zesty (4.10 based) kernel reporting multi cpu soft lockups on
>> QDF2400 SDP. https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1680549
>
> Wow, how many separate buffers does that driver have mapped at once to
> spend 22 seconds walking the rbtree for a single allocation!? I'd almost
> expect that to indicate a deadlock.
>
> I'm guessing you wouldn't have seen this on older kernels, since I
> assume that particular platform is booting via ACPI, so wouldn't have
> had the SMMU enabled without the IORT support which landed in 4.10.
>

Although this series does improve performance, the soft lockups seen
in the Ubuntu bug Manoj mentioned were actually because while the
the mlx5 interface was being brought up, a huge number of concurrent
calls to alloc_iova() were being made with limit_pfn != dma_32bit_pfn
so the optimized iova lookup was being bypassed.

Internally we worked around the issue by adding a set_dma_mask handler
that would call iommu_dma_init_domain() to adjust dma_32bit_pfn to
match the input mask.

https://source.codeaurora.org/quic/server/kernel/commit/arch/arm64/mm/dma-mapping.c?h=qserver-4.10&id=503b36fd3866cab216fc51a5a4015aaa99daf173

This worked well, however it clearly would not have played nice with
your dma-iommu PCI optimizations that force dma_limit to 32-bits so
it was never sent out. The application of the "PCI allocation
optimisations" patch is what actually remedied the cpu soft lockups
seen by Manoj.

Back to your question of how many buffers does the mlx5 driver have
mapped at once. It seems to scale linearly with core count. For
example, with 24-cores, after doing 'ifconfig eth<n> up', ~38k calls
to alloc_iova() have been made and the min iova is ~0xF600_0000. With
48-cores, those numbers jump to ~66k calls with min iova ~0xEF00_0000.

Things get really scary when you start using 64k pages. The number of
calls to alloc_iova() stays about the same which, when combined with
the reserved PCI windows, ends up consuming all of our 32-bit iovas
forcing us to once again call alloc_iova() but this time with a
limit_pfn != dma_32bit_pfn. This is actually how I stumbled upon
the alloc_iova() underflow bug that I posted about a bit earlier.

>> This patch series along with the following cherry-picks from Linus's tree
>> dddd632b072f iommu/dma: Implement PCI allocation optimisation
>> de84f5f049d9 iommu/dma: Stop getting dma_32bit_pfn wrong
>>
>> were applied to Ubuntu Zesty 4.10 kernel (Ubuntu-4.10.0-18.20) and
>> tested on a QDF2400 SDP.
>>
>> Tested-by: Manoj Iyer <manoj.iyer@canonical.com>
>
> Thanks,
> Robin.
>
>>
>>
>> --
>> ============================
>> Manoj Iyer
>> Ubuntu/Canonical
>> ARM Servers - Cloud
>> ============================
>
> _______________________________________________
> iommu mailing list
> iommu at lists.linux-foundation.org
> https://lists.linuxfoundation.org/mailman/listinfo/iommu
>

-- 
Qualcomm Datacenter Technologies as an affiliate of Qualcomm Technologies, Inc.
Qualcomm Technologies, Inc. is a member of the Code Aurora Forum, a Linux Foundation Collaborative Project.

^ permalink raw reply	[flat|nested] 20+ messages in thread

end of thread, other threads:[~2017-04-07  7:22 UTC | newest]

Thread overview: 20+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-03-31 14:46 [PATCH RESEND 0/3] IOVA allocation improvements for iommu-dma Robin Murphy
2017-03-31 14:46 ` Robin Murphy
     [not found] ` <cover.1490971180.git.robin.murphy-5wv7dgnIgG8@public.gmane.org>
2017-03-31 14:46   ` [PATCH RESEND 1/3] iommu/dma: Convert to address-based allocation Robin Murphy
2017-03-31 14:46     ` Robin Murphy
2017-04-06 18:11     ` [RESEND,1/3] " Manoj Iyer
2017-04-06 18:11       ` Manoj Iyer
2017-03-31 14:46   ` [PATCH RESEND 2/3] iommu/dma: Clean up MSI IOVA allocation Robin Murphy
2017-03-31 14:46     ` Robin Murphy
2017-04-06 18:14     ` [RESEND,2/3] " Manoj Iyer
2017-04-06 18:14       ` Manoj Iyer
2017-03-31 14:46   ` [PATCH RESEND 3/3] iommu/dma: Plumb in the per-CPU IOVA caches Robin Murphy
2017-03-31 14:46     ` Robin Murphy
2017-04-06 18:15     ` [RESEND,3/3] " Manoj Iyer
2017-04-06 18:15       ` Manoj Iyer
2017-04-06 18:56       ` Robin Murphy
2017-04-06 18:56         ` Robin Murphy
     [not found]         ` <c43b1f8f-02fd-978c-0dee-cc3f76a9924b-5wv7dgnIgG8@public.gmane.org>
2017-04-07  7:22           ` Nate Watterson
2017-04-07  7:22             ` Nate Watterson
2017-04-03 10:45   ` [PATCH RESEND 0/3] IOVA allocation improvements for iommu-dma Joerg Roedel
2017-04-03 10:45     ` Joerg Roedel

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.