All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCHv2] arm64: Add atomic pool for non-coherent and CMA allocaitons.
@ 2014-06-02 20:03 ` Laura Abbott
  0 siblings, 0 replies; 17+ messages in thread
From: Laura Abbott @ 2014-06-02 20:03 UTC (permalink / raw)
  To: Will Deacon, Catalin Marinas
  Cc: Laura Abbott, Ritesh Harjani, David Riley,
	linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r,
	devicetree-u79uwXL29TY76Z2rM5mHXA

Neither CMA nor noncoherent allocations support atomic allocations.
Add a dedicated atomic pool to support this.

Change-Id: I46c8fdffe5e0687403d42b37643137c8cf344259
Signed-off-by: Laura Abbott <lauraa-sgV2jX0FEOL9JmXXK+q4OQ@public.gmane.org>
---

v2: Various bug fixes pointed out by David and Ritesh (CMA dependency, swapping
coherent, noncoherent). I'm still not sure how to address the devicetree
suggestion by Will [1][2]. I added the devicetree mailing list this time around
to get more input on this.

[1] http://lists.infradead.org/pipermail/linux-arm-kernel/2014-April/249180.html
[2] http://lists.infradead.org/pipermail/linux-arm-kernel/2014-April/249528.html

---
 arch/arm64/mm/dma-mapping.c | 192 +++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 190 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index c851eb4..792d43c 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -41,6 +41,110 @@ static pgprot_t __get_dma_pgprot(struct dma_attrs *attrs, pgprot_t prot,
 	return prot;
 }
 
+#define DEFAULT_DMA_COHERENT_POOL_SIZE  SZ_256K
+
+struct dma_pool {
+	size_t size;
+	spinlock_t lock;
+	void *coherent_vaddr;
+	void *noncoherent_vaddr;
+	unsigned long *bitmap;
+	unsigned long nr_pages;
+	struct page **pages;
+};
+
+static struct dma_pool atomic_pool = {
+	.size = DEFAULT_DMA_COHERENT_POOL_SIZE,
+};
+
+static int __init early_coherent_pool(char *p)
+{
+	atomic_pool.size = memparse(p, &p);
+	return 0;
+}
+early_param("coherent_pool", early_coherent_pool);
+
+static void *__alloc_from_pool(size_t size, struct page **ret_page,
+					bool coherent)
+{
+	struct dma_pool *pool = &atomic_pool;
+	unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
+	unsigned int pageno;
+	unsigned long flags;
+	void *ptr = NULL;
+	unsigned long align_mask;
+	void *pool_start = coherent ? pool->coherent_vaddr :
+				      pool->noncoherent_vaddr;
+
+	if (!pool->coherent_vaddr || !pool->noncoherent_vaddr) {
+		WARN(1, "Atomic pool not initialised!\n");
+		return NULL;
+	}
+
+	/*
+	 * Align the region allocation - allocations from pool are rather
+	 * small, so align them to their order in pages, minimum is a page
+	 * size. This helps reduce fragmentation of the DMA space.
+	 */
+	align_mask = (1 << get_order(size)) - 1;
+
+	spin_lock_irqsave(&pool->lock, flags);
+	pageno = bitmap_find_next_zero_area(pool->bitmap, pool->nr_pages,
+					    0, count, align_mask);
+	if (pageno < pool->nr_pages) {
+		bitmap_set(pool->bitmap, pageno, count);
+		ptr = pool_start + PAGE_SIZE * pageno;
+		*ret_page = pool->pages[pageno];
+	} else {
+		pr_err_once("ERROR: %u KiB atomic DMA coherent pool is too small!\n"
+			    "Please increase it with coherent_pool= kernel parameter!\n",
+				(unsigned)pool->size / 1024);
+	}
+	spin_unlock_irqrestore(&pool->lock, flags);
+
+	return ptr;
+}
+
+static bool __in_atomic_pool(void *start, size_t size, void *pool_start)
+{
+	struct dma_pool *pool = &atomic_pool;
+	void *end = start + size;
+	void *pool_end = pool_start + pool->size;
+
+	if (start < pool_start || start >= pool_end)
+		return false;
+
+	if (end <= pool_end)
+		return true;
+
+	WARN(1, "Wrong coherent size(%p-%p) from atomic pool(%p-%p)\n",
+		start, end - 1, pool_start, pool_end - 1);
+
+	return false;
+}
+
+static int __free_from_pool(void *start, size_t size, bool coherent)
+{
+	struct dma_pool *pool = &atomic_pool;
+	unsigned long pageno, count;
+	unsigned long flags;
+	void *pool_start = coherent ? pool->coherent_vaddr :
+				      pool->noncoherent_vaddr;
+
+	if (!__in_atomic_pool(start, size, pool_start))
+		return 0;
+
+	pageno = (start - pool_start) >> PAGE_SHIFT;
+	count = size >> PAGE_SHIFT;
+
+	spin_lock_irqsave(&pool->lock, flags);
+	bitmap_clear(pool->bitmap, pageno, count);
+	spin_unlock_irqrestore(&pool->lock, flags);
+
+	return 1;
+}
+
+
 static void *__dma_alloc_coherent(struct device *dev, size_t size,
 				  dma_addr_t *dma_handle, gfp_t flags,
 				  struct dma_attrs *attrs)
@@ -53,7 +157,16 @@ static void *__dma_alloc_coherent(struct device *dev, size_t size,
 	if (IS_ENABLED(CONFIG_ZONE_DMA) &&
 	    dev->coherent_dma_mask <= DMA_BIT_MASK(32))
 		flags |= GFP_DMA;
-	if (IS_ENABLED(CONFIG_DMA_CMA)) {
+
+	if (!(flags & __GFP_WAIT)) {
+		struct page *page = NULL;
+		void *addr = __alloc_from_pool(size, &page, true);
+
+		if (addr)
+			*dma_handle = phys_to_dma(dev, page_to_phys(page));
+
+		return addr;
+	} else if (IS_ENABLED(CONFIG_DMA_CMA)) {
 		struct page *page;
 
 		size = PAGE_ALIGN(size);
@@ -78,7 +191,9 @@ static void __dma_free_coherent(struct device *dev, size_t size,
 		return;
 	}
 
-	if (IS_ENABLED(CONFIG_DMA_CMA)) {
+	if (__free_from_pool(vaddr, size, true)) {
+		return;
+	} else if (IS_ENABLED(CONFIG_DMA_CMA)) {
 		phys_addr_t paddr = dma_to_phys(dev, dma_handle);
 
 		dma_release_from_contiguous(dev,
@@ -100,9 +215,21 @@ static void *__dma_alloc_noncoherent(struct device *dev, size_t size,
 	size = PAGE_ALIGN(size);
 	order = get_order(size);
 
+	if (!(flags & __GFP_WAIT)) {
+		struct page *page = NULL;
+		void *addr = __alloc_from_pool(size, &page, false);
+
+		if (addr)
+			*dma_handle = phys_to_dma(dev, page_to_phys(page));
+
+		return addr;
+
+	}
+
 	ptr = __dma_alloc_coherent(dev, size, dma_handle, flags, attrs);
 	if (!ptr)
 		goto no_mem;
+
 	map = kmalloc(sizeof(struct page *) << order, flags & ~GFP_DMA);
 	if (!map)
 		goto no_map;
@@ -135,6 +262,8 @@ static void __dma_free_noncoherent(struct device *dev, size_t size,
 {
 	void *swiotlb_addr = phys_to_virt(dma_to_phys(dev, dma_handle));
 
+	if (__free_from_pool(vaddr, size, false))
+		return;
 	vunmap(vaddr);
 	__dma_free_coherent(dev, size, swiotlb_addr, dma_handle, attrs);
 }
@@ -332,6 +461,65 @@ static struct notifier_block amba_bus_nb = {
 
 extern int swiotlb_late_init_with_default_size(size_t default_size);
 
+static int __init atomic_pool_init(void)
+{
+	struct dma_pool *pool = &atomic_pool;
+	pgprot_t prot = pgprot_writecombine(pgprot_default);
+	unsigned long nr_pages = pool->size >> PAGE_SHIFT;
+	unsigned long *bitmap;
+	struct page *page;
+	struct page **pages;
+	int bitmap_size = BITS_TO_LONGS(nr_pages) * sizeof(long);
+
+	bitmap = kzalloc(bitmap_size, GFP_KERNEL);
+	if (!bitmap)
+		goto no_bitmap;
+
+	pages = kzalloc(nr_pages * sizeof(struct page *), GFP_KERNEL);
+	if (!pages)
+		goto no_pages;
+
+	if (IS_ENABLED(CONFIG_CMA))
+		page = dma_alloc_from_contiguous(NULL, nr_pages,
+					get_order(pool->size));
+	else
+		page = alloc_pages(GFP_KERNEL, get_order(pool->size));
+
+
+	if (page) {
+		int i;
+		void *addr = page_address(page);
+
+		memset(addr, 0, pool->size);
+		__dma_flush_range(addr, addr + pool->size);
+
+		for (i = 0; i < nr_pages; i++)
+			pages[i] = page + i;
+
+		spin_lock_init(&pool->lock);
+		pool->pages = pages;
+		pool->noncoherent_vaddr = vmap(pages, nr_pages, VM_MAP, prot);
+		if (pool->noncoherent_vaddr == NULL)
+			goto out;
+		pool->coherent_vaddr = addr;
+		pool->bitmap = bitmap;
+		pool->nr_pages = nr_pages;
+		pr_info("DMA: preallocated %u KiB pool for atomic allocations\n",
+			(unsigned)pool->size / 1024);
+		return 0;
+	}
+
+out:
+	kfree(pages);
+no_pages:
+	kfree(bitmap);
+no_bitmap:
+	pr_err("DMA: failed to allocate %u KiB pool for atomic coherent allocation\n",
+		(unsigned)pool->size / 1024);
+	return -ENOMEM;
+}
+postcore_initcall(atomic_pool_init);
+
 static int __init swiotlb_late_init(void)
 {
 	size_t swiotlb_size = min(SZ_64M, MAX_ORDER_NR_PAGES << PAGE_SHIFT);
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
hosted by The Linux Foundation

--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCHv2] arm64: Add atomic pool for non-coherent and CMA allocaitons.
@ 2014-06-02 20:03 ` Laura Abbott
  0 siblings, 0 replies; 17+ messages in thread
From: Laura Abbott @ 2014-06-02 20:03 UTC (permalink / raw)
  To: linux-arm-kernel

Neither CMA nor noncoherent allocations support atomic allocations.
Add a dedicated atomic pool to support this.

Change-Id: I46c8fdffe5e0687403d42b37643137c8cf344259
Signed-off-by: Laura Abbott <lauraa@codeaurora.org>
---

v2: Various bug fixes pointed out by David and Ritesh (CMA dependency, swapping
coherent, noncoherent). I'm still not sure how to address the devicetree
suggestion by Will [1][2]. I added the devicetree mailing list this time around
to get more input on this.

[1] http://lists.infradead.org/pipermail/linux-arm-kernel/2014-April/249180.html
[2] http://lists.infradead.org/pipermail/linux-arm-kernel/2014-April/249528.html

---
 arch/arm64/mm/dma-mapping.c | 192 +++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 190 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index c851eb4..792d43c 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -41,6 +41,110 @@ static pgprot_t __get_dma_pgprot(struct dma_attrs *attrs, pgprot_t prot,
 	return prot;
 }
 
+#define DEFAULT_DMA_COHERENT_POOL_SIZE  SZ_256K
+
+struct dma_pool {
+	size_t size;
+	spinlock_t lock;
+	void *coherent_vaddr;
+	void *noncoherent_vaddr;
+	unsigned long *bitmap;
+	unsigned long nr_pages;
+	struct page **pages;
+};
+
+static struct dma_pool atomic_pool = {
+	.size = DEFAULT_DMA_COHERENT_POOL_SIZE,
+};
+
+static int __init early_coherent_pool(char *p)
+{
+	atomic_pool.size = memparse(p, &p);
+	return 0;
+}
+early_param("coherent_pool", early_coherent_pool);
+
+static void *__alloc_from_pool(size_t size, struct page **ret_page,
+					bool coherent)
+{
+	struct dma_pool *pool = &atomic_pool;
+	unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
+	unsigned int pageno;
+	unsigned long flags;
+	void *ptr = NULL;
+	unsigned long align_mask;
+	void *pool_start = coherent ? pool->coherent_vaddr :
+				      pool->noncoherent_vaddr;
+
+	if (!pool->coherent_vaddr || !pool->noncoherent_vaddr) {
+		WARN(1, "Atomic pool not initialised!\n");
+		return NULL;
+	}
+
+	/*
+	 * Align the region allocation - allocations from pool are rather
+	 * small, so align them to their order in pages, minimum is a page
+	 * size. This helps reduce fragmentation of the DMA space.
+	 */
+	align_mask = (1 << get_order(size)) - 1;
+
+	spin_lock_irqsave(&pool->lock, flags);
+	pageno = bitmap_find_next_zero_area(pool->bitmap, pool->nr_pages,
+					    0, count, align_mask);
+	if (pageno < pool->nr_pages) {
+		bitmap_set(pool->bitmap, pageno, count);
+		ptr = pool_start + PAGE_SIZE * pageno;
+		*ret_page = pool->pages[pageno];
+	} else {
+		pr_err_once("ERROR: %u KiB atomic DMA coherent pool is too small!\n"
+			    "Please increase it with coherent_pool= kernel parameter!\n",
+				(unsigned)pool->size / 1024);
+	}
+	spin_unlock_irqrestore(&pool->lock, flags);
+
+	return ptr;
+}
+
+static bool __in_atomic_pool(void *start, size_t size, void *pool_start)
+{
+	struct dma_pool *pool = &atomic_pool;
+	void *end = start + size;
+	void *pool_end = pool_start + pool->size;
+
+	if (start < pool_start || start >= pool_end)
+		return false;
+
+	if (end <= pool_end)
+		return true;
+
+	WARN(1, "Wrong coherent size(%p-%p) from atomic pool(%p-%p)\n",
+		start, end - 1, pool_start, pool_end - 1);
+
+	return false;
+}
+
+static int __free_from_pool(void *start, size_t size, bool coherent)
+{
+	struct dma_pool *pool = &atomic_pool;
+	unsigned long pageno, count;
+	unsigned long flags;
+	void *pool_start = coherent ? pool->coherent_vaddr :
+				      pool->noncoherent_vaddr;
+
+	if (!__in_atomic_pool(start, size, pool_start))
+		return 0;
+
+	pageno = (start - pool_start) >> PAGE_SHIFT;
+	count = size >> PAGE_SHIFT;
+
+	spin_lock_irqsave(&pool->lock, flags);
+	bitmap_clear(pool->bitmap, pageno, count);
+	spin_unlock_irqrestore(&pool->lock, flags);
+
+	return 1;
+}
+
+
 static void *__dma_alloc_coherent(struct device *dev, size_t size,
 				  dma_addr_t *dma_handle, gfp_t flags,
 				  struct dma_attrs *attrs)
@@ -53,7 +157,16 @@ static void *__dma_alloc_coherent(struct device *dev, size_t size,
 	if (IS_ENABLED(CONFIG_ZONE_DMA) &&
 	    dev->coherent_dma_mask <= DMA_BIT_MASK(32))
 		flags |= GFP_DMA;
-	if (IS_ENABLED(CONFIG_DMA_CMA)) {
+
+	if (!(flags & __GFP_WAIT)) {
+		struct page *page = NULL;
+		void *addr = __alloc_from_pool(size, &page, true);
+
+		if (addr)
+			*dma_handle = phys_to_dma(dev, page_to_phys(page));
+
+		return addr;
+	} else if (IS_ENABLED(CONFIG_DMA_CMA)) {
 		struct page *page;
 
 		size = PAGE_ALIGN(size);
@@ -78,7 +191,9 @@ static void __dma_free_coherent(struct device *dev, size_t size,
 		return;
 	}
 
-	if (IS_ENABLED(CONFIG_DMA_CMA)) {
+	if (__free_from_pool(vaddr, size, true)) {
+		return;
+	} else if (IS_ENABLED(CONFIG_DMA_CMA)) {
 		phys_addr_t paddr = dma_to_phys(dev, dma_handle);
 
 		dma_release_from_contiguous(dev,
@@ -100,9 +215,21 @@ static void *__dma_alloc_noncoherent(struct device *dev, size_t size,
 	size = PAGE_ALIGN(size);
 	order = get_order(size);
 
+	if (!(flags & __GFP_WAIT)) {
+		struct page *page = NULL;
+		void *addr = __alloc_from_pool(size, &page, false);
+
+		if (addr)
+			*dma_handle = phys_to_dma(dev, page_to_phys(page));
+
+		return addr;
+
+	}
+
 	ptr = __dma_alloc_coherent(dev, size, dma_handle, flags, attrs);
 	if (!ptr)
 		goto no_mem;
+
 	map = kmalloc(sizeof(struct page *) << order, flags & ~GFP_DMA);
 	if (!map)
 		goto no_map;
@@ -135,6 +262,8 @@ static void __dma_free_noncoherent(struct device *dev, size_t size,
 {
 	void *swiotlb_addr = phys_to_virt(dma_to_phys(dev, dma_handle));
 
+	if (__free_from_pool(vaddr, size, false))
+		return;
 	vunmap(vaddr);
 	__dma_free_coherent(dev, size, swiotlb_addr, dma_handle, attrs);
 }
@@ -332,6 +461,65 @@ static struct notifier_block amba_bus_nb = {
 
 extern int swiotlb_late_init_with_default_size(size_t default_size);
 
+static int __init atomic_pool_init(void)
+{
+	struct dma_pool *pool = &atomic_pool;
+	pgprot_t prot = pgprot_writecombine(pgprot_default);
+	unsigned long nr_pages = pool->size >> PAGE_SHIFT;
+	unsigned long *bitmap;
+	struct page *page;
+	struct page **pages;
+	int bitmap_size = BITS_TO_LONGS(nr_pages) * sizeof(long);
+
+	bitmap = kzalloc(bitmap_size, GFP_KERNEL);
+	if (!bitmap)
+		goto no_bitmap;
+
+	pages = kzalloc(nr_pages * sizeof(struct page *), GFP_KERNEL);
+	if (!pages)
+		goto no_pages;
+
+	if (IS_ENABLED(CONFIG_CMA))
+		page = dma_alloc_from_contiguous(NULL, nr_pages,
+					get_order(pool->size));
+	else
+		page = alloc_pages(GFP_KERNEL, get_order(pool->size));
+
+
+	if (page) {
+		int i;
+		void *addr = page_address(page);
+
+		memset(addr, 0, pool->size);
+		__dma_flush_range(addr, addr + pool->size);
+
+		for (i = 0; i < nr_pages; i++)
+			pages[i] = page + i;
+
+		spin_lock_init(&pool->lock);
+		pool->pages = pages;
+		pool->noncoherent_vaddr = vmap(pages, nr_pages, VM_MAP, prot);
+		if (pool->noncoherent_vaddr == NULL)
+			goto out;
+		pool->coherent_vaddr = addr;
+		pool->bitmap = bitmap;
+		pool->nr_pages = nr_pages;
+		pr_info("DMA: preallocated %u KiB pool for atomic allocations\n",
+			(unsigned)pool->size / 1024);
+		return 0;
+	}
+
+out:
+	kfree(pages);
+no_pages:
+	kfree(bitmap);
+no_bitmap:
+	pr_err("DMA: failed to allocate %u KiB pool for atomic coherent allocation\n",
+		(unsigned)pool->size / 1024);
+	return -ENOMEM;
+}
+postcore_initcall(atomic_pool_init);
+
 static int __init swiotlb_late_init(void)
 {
 	size_t swiotlb_size = min(SZ_64M, MAX_ORDER_NR_PAGES << PAGE_SHIFT);
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
hosted by The Linux Foundation

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [PATCHv2] arm64: Add atomic pool for non-coherent and CMA allocaitons.
  2014-06-02 20:03 ` Laura Abbott
@ 2014-06-03  0:23     ` David Riley
  -1 siblings, 0 replies; 17+ messages in thread
From: David Riley @ 2014-06-03  0:23 UTC (permalink / raw)
  To: Laura Abbott
  Cc: Will Deacon, Catalin Marinas, Ritesh Harjani,
	linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r,
	devicetree-u79uwXL29TY76Z2rM5mHXA

This patch addresses the issues I had previously run into.

Dave

On Mon, Jun 2, 2014 at 1:03 PM, Laura Abbott <lauraa-sgV2jX0FEOL9JmXXK+q4OQ@public.gmane.org> wrote:
> Neither CMA nor noncoherent allocations support atomic allocations.
> Add a dedicated atomic pool to support this.
>
> Change-Id: I46c8fdffe5e0687403d42b37643137c8cf344259
> Signed-off-by: Laura Abbott <lauraa-sgV2jX0FEOL9JmXXK+q4OQ@public.gmane.org>
> ---
>
> v2: Various bug fixes pointed out by David and Ritesh (CMA dependency, swapping
> coherent, noncoherent). I'm still not sure how to address the devicetree
> suggestion by Will [1][2]. I added the devicetree mailing list this time around
> to get more input on this.
>
> [1] http://lists.infradead.org/pipermail/linux-arm-kernel/2014-April/249180.html
> [2] http://lists.infradead.org/pipermail/linux-arm-kernel/2014-April/249528.html
>
> ---
>  arch/arm64/mm/dma-mapping.c | 192 +++++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 190 insertions(+), 2 deletions(-)
>
> diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
> index c851eb4..792d43c 100644
> --- a/arch/arm64/mm/dma-mapping.c
> +++ b/arch/arm64/mm/dma-mapping.c
> @@ -41,6 +41,110 @@ static pgprot_t __get_dma_pgprot(struct dma_attrs *attrs, pgprot_t prot,
>         return prot;
>  }
>
> +#define DEFAULT_DMA_COHERENT_POOL_SIZE  SZ_256K
> +
> +struct dma_pool {
> +       size_t size;
> +       spinlock_t lock;
> +       void *coherent_vaddr;
> +       void *noncoherent_vaddr;
> +       unsigned long *bitmap;
> +       unsigned long nr_pages;
> +       struct page **pages;
> +};
> +
> +static struct dma_pool atomic_pool = {
> +       .size = DEFAULT_DMA_COHERENT_POOL_SIZE,
> +};
> +
> +static int __init early_coherent_pool(char *p)
> +{
> +       atomic_pool.size = memparse(p, &p);
> +       return 0;
> +}
> +early_param("coherent_pool", early_coherent_pool);
> +
> +static void *__alloc_from_pool(size_t size, struct page **ret_page,
> +                                       bool coherent)
> +{
> +       struct dma_pool *pool = &atomic_pool;
> +       unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
> +       unsigned int pageno;
> +       unsigned long flags;
> +       void *ptr = NULL;
> +       unsigned long align_mask;
> +       void *pool_start = coherent ? pool->coherent_vaddr :
> +                                     pool->noncoherent_vaddr;
> +
> +       if (!pool->coherent_vaddr || !pool->noncoherent_vaddr) {
> +               WARN(1, "Atomic pool not initialised!\n");
> +               return NULL;
> +       }
> +
> +       /*
> +        * Align the region allocation - allocations from pool are rather
> +        * small, so align them to their order in pages, minimum is a page
> +        * size. This helps reduce fragmentation of the DMA space.
> +        */
> +       align_mask = (1 << get_order(size)) - 1;
> +
> +       spin_lock_irqsave(&pool->lock, flags);
> +       pageno = bitmap_find_next_zero_area(pool->bitmap, pool->nr_pages,
> +                                           0, count, align_mask);
> +       if (pageno < pool->nr_pages) {
> +               bitmap_set(pool->bitmap, pageno, count);
> +               ptr = pool_start + PAGE_SIZE * pageno;
> +               *ret_page = pool->pages[pageno];
> +       } else {
> +               pr_err_once("ERROR: %u KiB atomic DMA coherent pool is too small!\n"
> +                           "Please increase it with coherent_pool= kernel parameter!\n",
> +                               (unsigned)pool->size / 1024);
> +       }
> +       spin_unlock_irqrestore(&pool->lock, flags);
> +
> +       return ptr;
> +}
> +
> +static bool __in_atomic_pool(void *start, size_t size, void *pool_start)
> +{
> +       struct dma_pool *pool = &atomic_pool;
> +       void *end = start + size;
> +       void *pool_end = pool_start + pool->size;
> +
> +       if (start < pool_start || start >= pool_end)
> +               return false;
> +
> +       if (end <= pool_end)
> +               return true;
> +
> +       WARN(1, "Wrong coherent size(%p-%p) from atomic pool(%p-%p)\n",
> +               start, end - 1, pool_start, pool_end - 1);
> +
> +       return false;
> +}
> +
> +static int __free_from_pool(void *start, size_t size, bool coherent)
> +{
> +       struct dma_pool *pool = &atomic_pool;
> +       unsigned long pageno, count;
> +       unsigned long flags;
> +       void *pool_start = coherent ? pool->coherent_vaddr :
> +                                     pool->noncoherent_vaddr;
> +
> +       if (!__in_atomic_pool(start, size, pool_start))
> +               return 0;
> +
> +       pageno = (start - pool_start) >> PAGE_SHIFT;
> +       count = size >> PAGE_SHIFT;
> +
> +       spin_lock_irqsave(&pool->lock, flags);
> +       bitmap_clear(pool->bitmap, pageno, count);
> +       spin_unlock_irqrestore(&pool->lock, flags);
> +
> +       return 1;
> +}
> +
> +
>  static void *__dma_alloc_coherent(struct device *dev, size_t size,
>                                   dma_addr_t *dma_handle, gfp_t flags,
>                                   struct dma_attrs *attrs)
> @@ -53,7 +157,16 @@ static void *__dma_alloc_coherent(struct device *dev, size_t size,
>         if (IS_ENABLED(CONFIG_ZONE_DMA) &&
>             dev->coherent_dma_mask <= DMA_BIT_MASK(32))
>                 flags |= GFP_DMA;
> -       if (IS_ENABLED(CONFIG_DMA_CMA)) {
> +
> +       if (!(flags & __GFP_WAIT)) {
> +               struct page *page = NULL;
> +               void *addr = __alloc_from_pool(size, &page, true);
> +
> +               if (addr)
> +                       *dma_handle = phys_to_dma(dev, page_to_phys(page));
> +
> +               return addr;
> +       } else if (IS_ENABLED(CONFIG_DMA_CMA)) {
>                 struct page *page;
>
>                 size = PAGE_ALIGN(size);
> @@ -78,7 +191,9 @@ static void __dma_free_coherent(struct device *dev, size_t size,
>                 return;
>         }
>
> -       if (IS_ENABLED(CONFIG_DMA_CMA)) {
> +       if (__free_from_pool(vaddr, size, true)) {
> +               return;
> +       } else if (IS_ENABLED(CONFIG_DMA_CMA)) {
>                 phys_addr_t paddr = dma_to_phys(dev, dma_handle);
>
>                 dma_release_from_contiguous(dev,
> @@ -100,9 +215,21 @@ static void *__dma_alloc_noncoherent(struct device *dev, size_t size,
>         size = PAGE_ALIGN(size);
>         order = get_order(size);
>
> +       if (!(flags & __GFP_WAIT)) {
> +               struct page *page = NULL;
> +               void *addr = __alloc_from_pool(size, &page, false);
> +
> +               if (addr)
> +                       *dma_handle = phys_to_dma(dev, page_to_phys(page));
> +
> +               return addr;
> +
> +       }
> +
>         ptr = __dma_alloc_coherent(dev, size, dma_handle, flags, attrs);
>         if (!ptr)
>                 goto no_mem;
> +
>         map = kmalloc(sizeof(struct page *) << order, flags & ~GFP_DMA);
>         if (!map)
>                 goto no_map;
> @@ -135,6 +262,8 @@ static void __dma_free_noncoherent(struct device *dev, size_t size,
>  {
>         void *swiotlb_addr = phys_to_virt(dma_to_phys(dev, dma_handle));
>
> +       if (__free_from_pool(vaddr, size, false))
> +               return;
>         vunmap(vaddr);
>         __dma_free_coherent(dev, size, swiotlb_addr, dma_handle, attrs);
>  }
> @@ -332,6 +461,65 @@ static struct notifier_block amba_bus_nb = {
>
>  extern int swiotlb_late_init_with_default_size(size_t default_size);
>
> +static int __init atomic_pool_init(void)
> +{
> +       struct dma_pool *pool = &atomic_pool;
> +       pgprot_t prot = pgprot_writecombine(pgprot_default);
> +       unsigned long nr_pages = pool->size >> PAGE_SHIFT;
> +       unsigned long *bitmap;
> +       struct page *page;
> +       struct page **pages;
> +       int bitmap_size = BITS_TO_LONGS(nr_pages) * sizeof(long);
> +
> +       bitmap = kzalloc(bitmap_size, GFP_KERNEL);
> +       if (!bitmap)
> +               goto no_bitmap;
> +
> +       pages = kzalloc(nr_pages * sizeof(struct page *), GFP_KERNEL);
> +       if (!pages)
> +               goto no_pages;
> +
> +       if (IS_ENABLED(CONFIG_CMA))
> +               page = dma_alloc_from_contiguous(NULL, nr_pages,
> +                                       get_order(pool->size));
> +       else
> +               page = alloc_pages(GFP_KERNEL, get_order(pool->size));
> +
> +
> +       if (page) {
> +               int i;
> +               void *addr = page_address(page);
> +
> +               memset(addr, 0, pool->size);
> +               __dma_flush_range(addr, addr + pool->size);
> +
> +               for (i = 0; i < nr_pages; i++)
> +                       pages[i] = page + i;
> +
> +               spin_lock_init(&pool->lock);
> +               pool->pages = pages;
> +               pool->noncoherent_vaddr = vmap(pages, nr_pages, VM_MAP, prot);
> +               if (pool->noncoherent_vaddr == NULL)
> +                       goto out;
> +               pool->coherent_vaddr = addr;
> +               pool->bitmap = bitmap;
> +               pool->nr_pages = nr_pages;
> +               pr_info("DMA: preallocated %u KiB pool for atomic allocations\n",
> +                       (unsigned)pool->size / 1024);
> +               return 0;
> +       }
> +
> +out:
> +       kfree(pages);
> +no_pages:
> +       kfree(bitmap);
> +no_bitmap:
> +       pr_err("DMA: failed to allocate %u KiB pool for atomic coherent allocation\n",
> +               (unsigned)pool->size / 1024);
> +       return -ENOMEM;
> +}
> +postcore_initcall(atomic_pool_init);
> +
>  static int __init swiotlb_late_init(void)
>  {
>         size_t swiotlb_size = min(SZ_64M, MAX_ORDER_NR_PAGES << PAGE_SHIFT);
> --
> The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
> hosted by The Linux Foundation
>
--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCHv2] arm64: Add atomic pool for non-coherent and CMA allocaitons.
@ 2014-06-03  0:23     ` David Riley
  0 siblings, 0 replies; 17+ messages in thread
From: David Riley @ 2014-06-03  0:23 UTC (permalink / raw)
  To: linux-arm-kernel

This patch addresses the issues I had previously run into.

Dave

On Mon, Jun 2, 2014 at 1:03 PM, Laura Abbott <lauraa@codeaurora.org> wrote:
> Neither CMA nor noncoherent allocations support atomic allocations.
> Add a dedicated atomic pool to support this.
>
> Change-Id: I46c8fdffe5e0687403d42b37643137c8cf344259
> Signed-off-by: Laura Abbott <lauraa@codeaurora.org>
> ---
>
> v2: Various bug fixes pointed out by David and Ritesh (CMA dependency, swapping
> coherent, noncoherent). I'm still not sure how to address the devicetree
> suggestion by Will [1][2]. I added the devicetree mailing list this time around
> to get more input on this.
>
> [1] http://lists.infradead.org/pipermail/linux-arm-kernel/2014-April/249180.html
> [2] http://lists.infradead.org/pipermail/linux-arm-kernel/2014-April/249528.html
>
> ---
>  arch/arm64/mm/dma-mapping.c | 192 +++++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 190 insertions(+), 2 deletions(-)
>
> diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
> index c851eb4..792d43c 100644
> --- a/arch/arm64/mm/dma-mapping.c
> +++ b/arch/arm64/mm/dma-mapping.c
> @@ -41,6 +41,110 @@ static pgprot_t __get_dma_pgprot(struct dma_attrs *attrs, pgprot_t prot,
>         return prot;
>  }
>
> +#define DEFAULT_DMA_COHERENT_POOL_SIZE  SZ_256K
> +
> +struct dma_pool {
> +       size_t size;
> +       spinlock_t lock;
> +       void *coherent_vaddr;
> +       void *noncoherent_vaddr;
> +       unsigned long *bitmap;
> +       unsigned long nr_pages;
> +       struct page **pages;
> +};
> +
> +static struct dma_pool atomic_pool = {
> +       .size = DEFAULT_DMA_COHERENT_POOL_SIZE,
> +};
> +
> +static int __init early_coherent_pool(char *p)
> +{
> +       atomic_pool.size = memparse(p, &p);
> +       return 0;
> +}
> +early_param("coherent_pool", early_coherent_pool);
> +
> +static void *__alloc_from_pool(size_t size, struct page **ret_page,
> +                                       bool coherent)
> +{
> +       struct dma_pool *pool = &atomic_pool;
> +       unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
> +       unsigned int pageno;
> +       unsigned long flags;
> +       void *ptr = NULL;
> +       unsigned long align_mask;
> +       void *pool_start = coherent ? pool->coherent_vaddr :
> +                                     pool->noncoherent_vaddr;
> +
> +       if (!pool->coherent_vaddr || !pool->noncoherent_vaddr) {
> +               WARN(1, "Atomic pool not initialised!\n");
> +               return NULL;
> +       }
> +
> +       /*
> +        * Align the region allocation - allocations from pool are rather
> +        * small, so align them to their order in pages, minimum is a page
> +        * size. This helps reduce fragmentation of the DMA space.
> +        */
> +       align_mask = (1 << get_order(size)) - 1;
> +
> +       spin_lock_irqsave(&pool->lock, flags);
> +       pageno = bitmap_find_next_zero_area(pool->bitmap, pool->nr_pages,
> +                                           0, count, align_mask);
> +       if (pageno < pool->nr_pages) {
> +               bitmap_set(pool->bitmap, pageno, count);
> +               ptr = pool_start + PAGE_SIZE * pageno;
> +               *ret_page = pool->pages[pageno];
> +       } else {
> +               pr_err_once("ERROR: %u KiB atomic DMA coherent pool is too small!\n"
> +                           "Please increase it with coherent_pool= kernel parameter!\n",
> +                               (unsigned)pool->size / 1024);
> +       }
> +       spin_unlock_irqrestore(&pool->lock, flags);
> +
> +       return ptr;
> +}
> +
> +static bool __in_atomic_pool(void *start, size_t size, void *pool_start)
> +{
> +       struct dma_pool *pool = &atomic_pool;
> +       void *end = start + size;
> +       void *pool_end = pool_start + pool->size;
> +
> +       if (start < pool_start || start >= pool_end)
> +               return false;
> +
> +       if (end <= pool_end)
> +               return true;
> +
> +       WARN(1, "Wrong coherent size(%p-%p) from atomic pool(%p-%p)\n",
> +               start, end - 1, pool_start, pool_end - 1);
> +
> +       return false;
> +}
> +
> +static int __free_from_pool(void *start, size_t size, bool coherent)
> +{
> +       struct dma_pool *pool = &atomic_pool;
> +       unsigned long pageno, count;
> +       unsigned long flags;
> +       void *pool_start = coherent ? pool->coherent_vaddr :
> +                                     pool->noncoherent_vaddr;
> +
> +       if (!__in_atomic_pool(start, size, pool_start))
> +               return 0;
> +
> +       pageno = (start - pool_start) >> PAGE_SHIFT;
> +       count = size >> PAGE_SHIFT;
> +
> +       spin_lock_irqsave(&pool->lock, flags);
> +       bitmap_clear(pool->bitmap, pageno, count);
> +       spin_unlock_irqrestore(&pool->lock, flags);
> +
> +       return 1;
> +}
> +
> +
>  static void *__dma_alloc_coherent(struct device *dev, size_t size,
>                                   dma_addr_t *dma_handle, gfp_t flags,
>                                   struct dma_attrs *attrs)
> @@ -53,7 +157,16 @@ static void *__dma_alloc_coherent(struct device *dev, size_t size,
>         if (IS_ENABLED(CONFIG_ZONE_DMA) &&
>             dev->coherent_dma_mask <= DMA_BIT_MASK(32))
>                 flags |= GFP_DMA;
> -       if (IS_ENABLED(CONFIG_DMA_CMA)) {
> +
> +       if (!(flags & __GFP_WAIT)) {
> +               struct page *page = NULL;
> +               void *addr = __alloc_from_pool(size, &page, true);
> +
> +               if (addr)
> +                       *dma_handle = phys_to_dma(dev, page_to_phys(page));
> +
> +               return addr;
> +       } else if (IS_ENABLED(CONFIG_DMA_CMA)) {
>                 struct page *page;
>
>                 size = PAGE_ALIGN(size);
> @@ -78,7 +191,9 @@ static void __dma_free_coherent(struct device *dev, size_t size,
>                 return;
>         }
>
> -       if (IS_ENABLED(CONFIG_DMA_CMA)) {
> +       if (__free_from_pool(vaddr, size, true)) {
> +               return;
> +       } else if (IS_ENABLED(CONFIG_DMA_CMA)) {
>                 phys_addr_t paddr = dma_to_phys(dev, dma_handle);
>
>                 dma_release_from_contiguous(dev,
> @@ -100,9 +215,21 @@ static void *__dma_alloc_noncoherent(struct device *dev, size_t size,
>         size = PAGE_ALIGN(size);
>         order = get_order(size);
>
> +       if (!(flags & __GFP_WAIT)) {
> +               struct page *page = NULL;
> +               void *addr = __alloc_from_pool(size, &page, false);
> +
> +               if (addr)
> +                       *dma_handle = phys_to_dma(dev, page_to_phys(page));
> +
> +               return addr;
> +
> +       }
> +
>         ptr = __dma_alloc_coherent(dev, size, dma_handle, flags, attrs);
>         if (!ptr)
>                 goto no_mem;
> +
>         map = kmalloc(sizeof(struct page *) << order, flags & ~GFP_DMA);
>         if (!map)
>                 goto no_map;
> @@ -135,6 +262,8 @@ static void __dma_free_noncoherent(struct device *dev, size_t size,
>  {
>         void *swiotlb_addr = phys_to_virt(dma_to_phys(dev, dma_handle));
>
> +       if (__free_from_pool(vaddr, size, false))
> +               return;
>         vunmap(vaddr);
>         __dma_free_coherent(dev, size, swiotlb_addr, dma_handle, attrs);
>  }
> @@ -332,6 +461,65 @@ static struct notifier_block amba_bus_nb = {
>
>  extern int swiotlb_late_init_with_default_size(size_t default_size);
>
> +static int __init atomic_pool_init(void)
> +{
> +       struct dma_pool *pool = &atomic_pool;
> +       pgprot_t prot = pgprot_writecombine(pgprot_default);
> +       unsigned long nr_pages = pool->size >> PAGE_SHIFT;
> +       unsigned long *bitmap;
> +       struct page *page;
> +       struct page **pages;
> +       int bitmap_size = BITS_TO_LONGS(nr_pages) * sizeof(long);
> +
> +       bitmap = kzalloc(bitmap_size, GFP_KERNEL);
> +       if (!bitmap)
> +               goto no_bitmap;
> +
> +       pages = kzalloc(nr_pages * sizeof(struct page *), GFP_KERNEL);
> +       if (!pages)
> +               goto no_pages;
> +
> +       if (IS_ENABLED(CONFIG_CMA))
> +               page = dma_alloc_from_contiguous(NULL, nr_pages,
> +                                       get_order(pool->size));
> +       else
> +               page = alloc_pages(GFP_KERNEL, get_order(pool->size));
> +
> +
> +       if (page) {
> +               int i;
> +               void *addr = page_address(page);
> +
> +               memset(addr, 0, pool->size);
> +               __dma_flush_range(addr, addr + pool->size);
> +
> +               for (i = 0; i < nr_pages; i++)
> +                       pages[i] = page + i;
> +
> +               spin_lock_init(&pool->lock);
> +               pool->pages = pages;
> +               pool->noncoherent_vaddr = vmap(pages, nr_pages, VM_MAP, prot);
> +               if (pool->noncoherent_vaddr == NULL)
> +                       goto out;
> +               pool->coherent_vaddr = addr;
> +               pool->bitmap = bitmap;
> +               pool->nr_pages = nr_pages;
> +               pr_info("DMA: preallocated %u KiB pool for atomic allocations\n",
> +                       (unsigned)pool->size / 1024);
> +               return 0;
> +       }
> +
> +out:
> +       kfree(pages);
> +no_pages:
> +       kfree(bitmap);
> +no_bitmap:
> +       pr_err("DMA: failed to allocate %u KiB pool for atomic coherent allocation\n",
> +               (unsigned)pool->size / 1024);
> +       return -ENOMEM;
> +}
> +postcore_initcall(atomic_pool_init);
> +
>  static int __init swiotlb_late_init(void)
>  {
>         size_t swiotlb_size = min(SZ_64M, MAX_ORDER_NR_PAGES << PAGE_SHIFT);
> --
> The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
> hosted by The Linux Foundation
>

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCHv2] arm64: Add atomic pool for non-coherent and CMA allocaitons.
  2014-06-02 20:03 ` Laura Abbott
@ 2014-06-03 13:28     ` Will Deacon
  -1 siblings, 0 replies; 17+ messages in thread
From: Will Deacon @ 2014-06-03 13:28 UTC (permalink / raw)
  To: Laura Abbott
  Cc: Catalin Marinas, Ritesh Harjani, David Riley,
	linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r,
	devicetree-u79uwXL29TY76Z2rM5mHXA

Hi Laura,

On Mon, Jun 02, 2014 at 09:03:52PM +0100, Laura Abbott wrote:
> Neither CMA nor noncoherent allocations support atomic allocations.
> Add a dedicated atomic pool to support this.
> 
> Change-Id: I46c8fdffe5e0687403d42b37643137c8cf344259
> Signed-off-by: Laura Abbott <lauraa-sgV2jX0FEOL9JmXXK+q4OQ@public.gmane.org>
> ---
> 
> v2: Various bug fixes pointed out by David and Ritesh (CMA dependency, swapping
> coherent, noncoherent). I'm still not sure how to address the devicetree
> suggestion by Will [1][2]. I added the devicetree mailing list this time around
> to get more input on this.
> 
> [1] http://lists.infradead.org/pipermail/linux-arm-kernel/2014-April/249180.html
> [2] http://lists.infradead.org/pipermail/linux-arm-kernel/2014-April/249528.html

Perhaps that can be done later then, since from what you're saying, we need
the command-line option either way? Have you looked at how this fits in with
the iommu-helper work from Ritesh? We could put the parameter parsing in
there too.

Will
--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCHv2] arm64: Add atomic pool for non-coherent and CMA allocaitons.
@ 2014-06-03 13:28     ` Will Deacon
  0 siblings, 0 replies; 17+ messages in thread
From: Will Deacon @ 2014-06-03 13:28 UTC (permalink / raw)
  To: linux-arm-kernel

Hi Laura,

On Mon, Jun 02, 2014 at 09:03:52PM +0100, Laura Abbott wrote:
> Neither CMA nor noncoherent allocations support atomic allocations.
> Add a dedicated atomic pool to support this.
> 
> Change-Id: I46c8fdffe5e0687403d42b37643137c8cf344259
> Signed-off-by: Laura Abbott <lauraa@codeaurora.org>
> ---
> 
> v2: Various bug fixes pointed out by David and Ritesh (CMA dependency, swapping
> coherent, noncoherent). I'm still not sure how to address the devicetree
> suggestion by Will [1][2]. I added the devicetree mailing list this time around
> to get more input on this.
> 
> [1] http://lists.infradead.org/pipermail/linux-arm-kernel/2014-April/249180.html
> [2] http://lists.infradead.org/pipermail/linux-arm-kernel/2014-April/249528.html

Perhaps that can be done later then, since from what you're saying, we need
the command-line option either way? Have you looked at how this fits in with
the iommu-helper work from Ritesh? We could put the parameter parsing in
there too.

Will

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCHv2] arm64: Add atomic pool for non-coherent and CMA allocaitons.
  2014-06-03 13:28     ` Will Deacon
@ 2014-06-04  0:30         ` Laura Abbott
  -1 siblings, 0 replies; 17+ messages in thread
From: Laura Abbott @ 2014-06-04  0:30 UTC (permalink / raw)
  To: Will Deacon
  Cc: Catalin Marinas, devicetree-u79uwXL29TY76Z2rM5mHXA, David Riley,
	linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r,
	Ritesh Harjani

On 6/3/2014 6:28 AM, Will Deacon wrote:
> Hi Laura,
> 
> On Mon, Jun 02, 2014 at 09:03:52PM +0100, Laura Abbott wrote:
>> Neither CMA nor noncoherent allocations support atomic allocations.
>> Add a dedicated atomic pool to support this.
>>
>> Change-Id: I46c8fdffe5e0687403d42b37643137c8cf344259
>> Signed-off-by: Laura Abbott <lauraa-sgV2jX0FEOL9JmXXK+q4OQ@public.gmane.org>
>> ---
>>
>> v2: Various bug fixes pointed out by David and Ritesh (CMA dependency, swapping
>> coherent, noncoherent). I'm still not sure how to address the devicetree
>> suggestion by Will [1][2]. I added the devicetree mailing list this time around
>> to get more input on this.
>>
>> [1] http://lists.infradead.org/pipermail/linux-arm-kernel/2014-April/249180.html
>> [2] http://lists.infradead.org/pipermail/linux-arm-kernel/2014-April/249528.html
> 
> Perhaps that can be done later then, since from what you're saying, we need
> the command-line option either way? Have you looked at how this fits in with
> the iommu-helper work from Ritesh? We could put the parameter parsing in
> there too.
> 

This doesn't seem to overlap with Ritesh's work. The atomic mapping is still
handled in the arm specific code so I assume it would be handled in the arm64
specific code as well. Another question might be is if it would be useful to
make the atomic code common somehow between arm and arm64.

> Will
> 

Thanks,
Laura

-- 
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
hosted by The Linux Foundation
--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCHv2] arm64: Add atomic pool for non-coherent and CMA allocaitons.
@ 2014-06-04  0:30         ` Laura Abbott
  0 siblings, 0 replies; 17+ messages in thread
From: Laura Abbott @ 2014-06-04  0:30 UTC (permalink / raw)
  To: linux-arm-kernel

On 6/3/2014 6:28 AM, Will Deacon wrote:
> Hi Laura,
> 
> On Mon, Jun 02, 2014 at 09:03:52PM +0100, Laura Abbott wrote:
>> Neither CMA nor noncoherent allocations support atomic allocations.
>> Add a dedicated atomic pool to support this.
>>
>> Change-Id: I46c8fdffe5e0687403d42b37643137c8cf344259
>> Signed-off-by: Laura Abbott <lauraa@codeaurora.org>
>> ---
>>
>> v2: Various bug fixes pointed out by David and Ritesh (CMA dependency, swapping
>> coherent, noncoherent). I'm still not sure how to address the devicetree
>> suggestion by Will [1][2]. I added the devicetree mailing list this time around
>> to get more input on this.
>>
>> [1] http://lists.infradead.org/pipermail/linux-arm-kernel/2014-April/249180.html
>> [2] http://lists.infradead.org/pipermail/linux-arm-kernel/2014-April/249528.html
> 
> Perhaps that can be done later then, since from what you're saying, we need
> the command-line option either way? Have you looked at how this fits in with
> the iommu-helper work from Ritesh? We could put the parameter parsing in
> there too.
> 

This doesn't seem to overlap with Ritesh's work. The atomic mapping is still
handled in the arm specific code so I assume it would be handled in the arm64
specific code as well. Another question might be is if it would be useful to
make the atomic code common somehow between arm and arm64.

> Will
> 

Thanks,
Laura

-- 
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
hosted by The Linux Foundation

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCHv2] arm64: Add atomic pool for non-coherent and CMA allocaitons.
  2014-06-04  0:30         ` Laura Abbott
@ 2014-06-04 17:59             ` Will Deacon
  -1 siblings, 0 replies; 17+ messages in thread
From: Will Deacon @ 2014-06-04 17:59 UTC (permalink / raw)
  To: Laura Abbott
  Cc: Catalin Marinas, devicetree-u79uwXL29TY76Z2rM5mHXA, David Riley,
	linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r,
	Ritesh Harjani

On Wed, Jun 04, 2014 at 01:30:18AM +0100, Laura Abbott wrote:
> On 6/3/2014 6:28 AM, Will Deacon wrote:
> > Hi Laura,
> > 
> > On Mon, Jun 02, 2014 at 09:03:52PM +0100, Laura Abbott wrote:
> >> Neither CMA nor noncoherent allocations support atomic allocations.
> >> Add a dedicated atomic pool to support this.
> >>
> >> Change-Id: I46c8fdffe5e0687403d42b37643137c8cf344259
> >> Signed-off-by: Laura Abbott <lauraa-sgV2jX0FEOL9JmXXK+q4OQ@public.gmane.org>
> >> ---
> >>
> >> v2: Various bug fixes pointed out by David and Ritesh (CMA dependency, swapping
> >> coherent, noncoherent). I'm still not sure how to address the devicetree
> >> suggestion by Will [1][2]. I added the devicetree mailing list this time around
> >> to get more input on this.
> >>
> >> [1] http://lists.infradead.org/pipermail/linux-arm-kernel/2014-April/249180.html
> >> [2] http://lists.infradead.org/pipermail/linux-arm-kernel/2014-April/249528.html
> > 
> > Perhaps that can be done later then, since from what you're saying, we need
> > the command-line option either way? Have you looked at how this fits in with
> > the iommu-helper work from Ritesh? We could put the parameter parsing in
> > there too.
> > 
> 
> This doesn't seem to overlap with Ritesh's work. The atomic mapping is still
> handled in the arm specific code so I assume it would be handled in the arm64
> specific code as well. Another question might be is if it would be useful to
> make the atomic code common somehow between arm and arm64.

Yeah, that's what I was alluding to. The more of this code that can be
shared between architectures, the better.

Will
--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCHv2] arm64: Add atomic pool for non-coherent and CMA allocaitons.
@ 2014-06-04 17:59             ` Will Deacon
  0 siblings, 0 replies; 17+ messages in thread
From: Will Deacon @ 2014-06-04 17:59 UTC (permalink / raw)
  To: linux-arm-kernel

On Wed, Jun 04, 2014 at 01:30:18AM +0100, Laura Abbott wrote:
> On 6/3/2014 6:28 AM, Will Deacon wrote:
> > Hi Laura,
> > 
> > On Mon, Jun 02, 2014 at 09:03:52PM +0100, Laura Abbott wrote:
> >> Neither CMA nor noncoherent allocations support atomic allocations.
> >> Add a dedicated atomic pool to support this.
> >>
> >> Change-Id: I46c8fdffe5e0687403d42b37643137c8cf344259
> >> Signed-off-by: Laura Abbott <lauraa@codeaurora.org>
> >> ---
> >>
> >> v2: Various bug fixes pointed out by David and Ritesh (CMA dependency, swapping
> >> coherent, noncoherent). I'm still not sure how to address the devicetree
> >> suggestion by Will [1][2]. I added the devicetree mailing list this time around
> >> to get more input on this.
> >>
> >> [1] http://lists.infradead.org/pipermail/linux-arm-kernel/2014-April/249180.html
> >> [2] http://lists.infradead.org/pipermail/linux-arm-kernel/2014-April/249528.html
> > 
> > Perhaps that can be done later then, since from what you're saying, we need
> > the command-line option either way? Have you looked at how this fits in with
> > the iommu-helper work from Ritesh? We could put the parameter parsing in
> > there too.
> > 
> 
> This doesn't seem to overlap with Ritesh's work. The atomic mapping is still
> handled in the arm specific code so I assume it would be handled in the arm64
> specific code as well. Another question might be is if it would be useful to
> make the atomic code common somehow between arm and arm64.

Yeah, that's what I was alluding to. The more of this code that can be
shared between architectures, the better.

Will

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCHv2] arm64: Add atomic pool for non-coherent and CMA allocaitons.
  2014-06-02 20:03 ` Laura Abbott
@ 2014-06-05 17:05   ` Catalin Marinas
  -1 siblings, 0 replies; 17+ messages in thread
From: Catalin Marinas @ 2014-06-05 17:05 UTC (permalink / raw)
  To: Laura Abbott
  Cc: David Riley, devicetree, Will Deacon, linux-arm-kernel, Ritesh Harjani

Hi Laura,

On Mon, Jun 02, 2014 at 09:03:52PM +0100, Laura Abbott wrote:
> Neither CMA nor noncoherent allocations support atomic allocations.
> Add a dedicated atomic pool to support this.

CMA indeed doesn't support atomic allocations but swiotlb does, the only
problem being the vmap() to create a non-cacheable mapping. Could we not
use the atomic pool only for non-coherent allocations?

> --- a/arch/arm64/mm/dma-mapping.c
> +++ b/arch/arm64/mm/dma-mapping.c
[...]
>  static void *__dma_alloc_coherent(struct device *dev, size_t size,
>  				  dma_addr_t *dma_handle, gfp_t flags,
>  				  struct dma_attrs *attrs)
> @@ -53,7 +157,16 @@ static void *__dma_alloc_coherent(struct device *dev, size_t size,
>  	if (IS_ENABLED(CONFIG_ZONE_DMA) &&
>  	    dev->coherent_dma_mask <= DMA_BIT_MASK(32))
>  		flags |= GFP_DMA;
> -	if (IS_ENABLED(CONFIG_DMA_CMA)) {

So here just check for:

	if ((flags & __GFP_WAIT) && IS_ENABLED(CONFIG_DMA_CMA)) {

> +
> +	if (!(flags & __GFP_WAIT)) {
> +		struct page *page = NULL;
> +		void *addr = __alloc_from_pool(size, &page, true);
> +
> +		if (addr)
> +			*dma_handle = phys_to_dma(dev, page_to_phys(page));
> +
> +		return addr;

and ignore the __alloc_from_pool() call.

> @@ -78,7 +191,9 @@ static void __dma_free_coherent(struct device *dev, size_t size,
>  		return;
>  	}
>  
> -	if (IS_ENABLED(CONFIG_DMA_CMA)) {
> +	if (__free_from_pool(vaddr, size, true)) {
> +		return;
> +	} else if (IS_ENABLED(CONFIG_DMA_CMA)) {
>  		phys_addr_t paddr = dma_to_phys(dev, dma_handle);
>  
>  		dma_release_from_contiguous(dev,

Here you check for the return value of dma_release_from_contiguous() and
if false, fall back to the swiotlb release.

I guess we don't even need the IS_ENABLED(DMA_CMA) check since when
disabled those functions return NULL/false anyway.

> @@ -100,9 +215,21 @@ static void *__dma_alloc_noncoherent(struct device *dev, size_t size,
>  	size = PAGE_ALIGN(size);
>  	order = get_order(size);
>  
> +	if (!(flags & __GFP_WAIT)) {
> +		struct page *page = NULL;
> +		void *addr = __alloc_from_pool(size, &page, false);
> +
> +		if (addr)
> +			*dma_handle = phys_to_dma(dev, page_to_phys(page));
> +
> +		return addr;
> +
> +	}

Here we need the atomic pool as we can't remap the memory as uncacheable
in atomic context.

> @@ -332,6 +461,65 @@ static struct notifier_block amba_bus_nb = {
>  
>  extern int swiotlb_late_init_with_default_size(size_t default_size);
>  
> +static int __init atomic_pool_init(void)
> +{
> +	struct dma_pool *pool = &atomic_pool;
> +	pgprot_t prot = pgprot_writecombine(pgprot_default);

In linux-next I got rid of pgprot_default entirely, just use
__pgprot(PROT_NORMAL_NC).

> +	unsigned long nr_pages = pool->size >> PAGE_SHIFT;
> +	unsigned long *bitmap;
> +	struct page *page;
> +	struct page **pages;
> +	int bitmap_size = BITS_TO_LONGS(nr_pages) * sizeof(long);
> +
> +	bitmap = kzalloc(bitmap_size, GFP_KERNEL);
> +	if (!bitmap)
> +		goto no_bitmap;
> +
> +	pages = kzalloc(nr_pages * sizeof(struct page *), GFP_KERNEL);
> +	if (!pages)
> +		goto no_pages;
> +
> +	if (IS_ENABLED(CONFIG_CMA))
> +		page = dma_alloc_from_contiguous(NULL, nr_pages,
> +					get_order(pool->size));
> +	else
> +		page = alloc_pages(GFP_KERNEL, get_order(pool->size));

I think the safest is to use GFP_DMA as well. Without knowing exactly
what devices will do, what their dma masks are, I think that's a safer
bet. I plan to limit the CMA buffer to ZONE_DMA as well for lack of a
better option.

BTW, most of this code could be turned into a library, especially if we
don't need to separate coherent/non-coherent pools. Also, a lot of code
is similar to the dma_alloc_from_coherent() implementation (apart from
the ioremap() call in dma_declare_coherent_memory() and per-device pool
rather than global one).

-- 
Catalin

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCHv2] arm64: Add atomic pool for non-coherent and CMA allocaitons.
@ 2014-06-05 17:05   ` Catalin Marinas
  0 siblings, 0 replies; 17+ messages in thread
From: Catalin Marinas @ 2014-06-05 17:05 UTC (permalink / raw)
  To: linux-arm-kernel

Hi Laura,

On Mon, Jun 02, 2014 at 09:03:52PM +0100, Laura Abbott wrote:
> Neither CMA nor noncoherent allocations support atomic allocations.
> Add a dedicated atomic pool to support this.

CMA indeed doesn't support atomic allocations but swiotlb does, the only
problem being the vmap() to create a non-cacheable mapping. Could we not
use the atomic pool only for non-coherent allocations?

> --- a/arch/arm64/mm/dma-mapping.c
> +++ b/arch/arm64/mm/dma-mapping.c
[...]
>  static void *__dma_alloc_coherent(struct device *dev, size_t size,
>  				  dma_addr_t *dma_handle, gfp_t flags,
>  				  struct dma_attrs *attrs)
> @@ -53,7 +157,16 @@ static void *__dma_alloc_coherent(struct device *dev, size_t size,
>  	if (IS_ENABLED(CONFIG_ZONE_DMA) &&
>  	    dev->coherent_dma_mask <= DMA_BIT_MASK(32))
>  		flags |= GFP_DMA;
> -	if (IS_ENABLED(CONFIG_DMA_CMA)) {

So here just check for:

	if ((flags & __GFP_WAIT) && IS_ENABLED(CONFIG_DMA_CMA)) {

> +
> +	if (!(flags & __GFP_WAIT)) {
> +		struct page *page = NULL;
> +		void *addr = __alloc_from_pool(size, &page, true);
> +
> +		if (addr)
> +			*dma_handle = phys_to_dma(dev, page_to_phys(page));
> +
> +		return addr;

and ignore the __alloc_from_pool() call.

> @@ -78,7 +191,9 @@ static void __dma_free_coherent(struct device *dev, size_t size,
>  		return;
>  	}
>  
> -	if (IS_ENABLED(CONFIG_DMA_CMA)) {
> +	if (__free_from_pool(vaddr, size, true)) {
> +		return;
> +	} else if (IS_ENABLED(CONFIG_DMA_CMA)) {
>  		phys_addr_t paddr = dma_to_phys(dev, dma_handle);
>  
>  		dma_release_from_contiguous(dev,

Here you check for the return value of dma_release_from_contiguous() and
if false, fall back to the swiotlb release.

I guess we don't even need the IS_ENABLED(DMA_CMA) check since when
disabled those functions return NULL/false anyway.

> @@ -100,9 +215,21 @@ static void *__dma_alloc_noncoherent(struct device *dev, size_t size,
>  	size = PAGE_ALIGN(size);
>  	order = get_order(size);
>  
> +	if (!(flags & __GFP_WAIT)) {
> +		struct page *page = NULL;
> +		void *addr = __alloc_from_pool(size, &page, false);
> +
> +		if (addr)
> +			*dma_handle = phys_to_dma(dev, page_to_phys(page));
> +
> +		return addr;
> +
> +	}

Here we need the atomic pool as we can't remap the memory as uncacheable
in atomic context.

> @@ -332,6 +461,65 @@ static struct notifier_block amba_bus_nb = {
>  
>  extern int swiotlb_late_init_with_default_size(size_t default_size);
>  
> +static int __init atomic_pool_init(void)
> +{
> +	struct dma_pool *pool = &atomic_pool;
> +	pgprot_t prot = pgprot_writecombine(pgprot_default);

In linux-next I got rid of pgprot_default entirely, just use
__pgprot(PROT_NORMAL_NC).

> +	unsigned long nr_pages = pool->size >> PAGE_SHIFT;
> +	unsigned long *bitmap;
> +	struct page *page;
> +	struct page **pages;
> +	int bitmap_size = BITS_TO_LONGS(nr_pages) * sizeof(long);
> +
> +	bitmap = kzalloc(bitmap_size, GFP_KERNEL);
> +	if (!bitmap)
> +		goto no_bitmap;
> +
> +	pages = kzalloc(nr_pages * sizeof(struct page *), GFP_KERNEL);
> +	if (!pages)
> +		goto no_pages;
> +
> +	if (IS_ENABLED(CONFIG_CMA))
> +		page = dma_alloc_from_contiguous(NULL, nr_pages,
> +					get_order(pool->size));
> +	else
> +		page = alloc_pages(GFP_KERNEL, get_order(pool->size));

I think the safest is to use GFP_DMA as well. Without knowing exactly
what devices will do, what their dma masks are, I think that's a safer
bet. I plan to limit the CMA buffer to ZONE_DMA as well for lack of a
better option.

BTW, most of this code could be turned into a library, especially if we
don't need to separate coherent/non-coherent pools. Also, a lot of code
is similar to the dma_alloc_from_coherent() implementation (apart from
the ioremap() call in dma_declare_coherent_memory() and per-device pool
rather than global one).

-- 
Catalin

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCHv2] arm64: Add atomic pool for non-coherent and CMA allocaitons.
  2014-06-05 17:05   ` Catalin Marinas
@ 2014-06-07  0:55       ` Laura Abbott
  -1 siblings, 0 replies; 17+ messages in thread
From: Laura Abbott @ 2014-06-07  0:55 UTC (permalink / raw)
  To: Catalin Marinas
  Cc: Will Deacon, Ritesh Harjani, David Riley,
	linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r,
	devicetree-u79uwXL29TY76Z2rM5mHXA

On 6/5/2014 10:05 AM, Catalin Marinas wrote:
> Hi Laura,
> 
> On Mon, Jun 02, 2014 at 09:03:52PM +0100, Laura Abbott wrote:
>> Neither CMA nor noncoherent allocations support atomic allocations.
>> Add a dedicated atomic pool to support this.
> 
> CMA indeed doesn't support atomic allocations but swiotlb does, the only
> problem being the vmap() to create a non-cacheable mapping. Could we not
> use the atomic pool only for non-coherent allocations?
>

CMA needs the atomic pool for both non-coherent and coherent allocations.
Perhaps I should update the code so we only create the coherent atomic
pool if CMA is used.

.... 

> 
> I think the safest is to use GFP_DMA as well. Without knowing exactly
> what devices will do, what their dma masks are, I think that's a safer
> bet. I plan to limit the CMA buffer to ZONE_DMA as well for lack of a
> better option.
> 
> BTW, most of this code could be turned into a library, especially if we
> don't need to separate coherent/non-coherent pools. Also, a lot of code
> is similar to the dma_alloc_from_coherent() implementation (apart from
> the ioremap() call in dma_declare_coherent_memory() and per-device pool
> rather than global one).
> 

I'm looking into if lib/genalloc.c can be extended for this purpose which
should at least stop some of the duplicate bitmap management code. If that
doesn't seem to work, I'll pull out what we have into a library.

Thanks,
Laura
-- 
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
hosted by The Linux Foundation
--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCHv2] arm64: Add atomic pool for non-coherent and CMA allocaitons.
@ 2014-06-07  0:55       ` Laura Abbott
  0 siblings, 0 replies; 17+ messages in thread
From: Laura Abbott @ 2014-06-07  0:55 UTC (permalink / raw)
  To: linux-arm-kernel

On 6/5/2014 10:05 AM, Catalin Marinas wrote:
> Hi Laura,
> 
> On Mon, Jun 02, 2014 at 09:03:52PM +0100, Laura Abbott wrote:
>> Neither CMA nor noncoherent allocations support atomic allocations.
>> Add a dedicated atomic pool to support this.
> 
> CMA indeed doesn't support atomic allocations but swiotlb does, the only
> problem being the vmap() to create a non-cacheable mapping. Could we not
> use the atomic pool only for non-coherent allocations?
>

CMA needs the atomic pool for both non-coherent and coherent allocations.
Perhaps I should update the code so we only create the coherent atomic
pool if CMA is used.

.... 

> 
> I think the safest is to use GFP_DMA as well. Without knowing exactly
> what devices will do, what their dma masks are, I think that's a safer
> bet. I plan to limit the CMA buffer to ZONE_DMA as well for lack of a
> better option.
> 
> BTW, most of this code could be turned into a library, especially if we
> don't need to separate coherent/non-coherent pools. Also, a lot of code
> is similar to the dma_alloc_from_coherent() implementation (apart from
> the ioremap() call in dma_declare_coherent_memory() and per-device pool
> rather than global one).
> 

I'm looking into if lib/genalloc.c can be extended for this purpose which
should at least stop some of the duplicate bitmap management code. If that
doesn't seem to work, I'll pull out what we have into a library.

Thanks,
Laura
-- 
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
hosted by The Linux Foundation

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCHv2] arm64: Add atomic pool for non-coherent and CMA allocaitons.
  2014-06-07  0:55       ` Laura Abbott
@ 2014-06-09  9:27         ` Catalin Marinas
  -1 siblings, 0 replies; 17+ messages in thread
From: Catalin Marinas @ 2014-06-09  9:27 UTC (permalink / raw)
  To: Laura Abbott
  Cc: David Riley, devicetree, Will Deacon, linux-arm-kernel, Ritesh Harjani

On Sat, Jun 07, 2014 at 01:55:22AM +0100, Laura Abbott wrote:
> On 6/5/2014 10:05 AM, Catalin Marinas wrote:
> > On Mon, Jun 02, 2014 at 09:03:52PM +0100, Laura Abbott wrote:
> >> Neither CMA nor noncoherent allocations support atomic allocations.
> >> Add a dedicated atomic pool to support this.
> > 
> > CMA indeed doesn't support atomic allocations but swiotlb does, the only
> > problem being the vmap() to create a non-cacheable mapping. Could we not
> > use the atomic pool only for non-coherent allocations?
> 
> CMA needs the atomic pool for both non-coherent and coherent allocations.
> Perhaps I should update the code so we only create the coherent atomic
> pool if CMA is used.

It's also needed with non-coherent swiotlb because of vmap (but coherent
is fine).

-- 
Catalin

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCHv2] arm64: Add atomic pool for non-coherent and CMA allocaitons.
@ 2014-06-09  9:27         ` Catalin Marinas
  0 siblings, 0 replies; 17+ messages in thread
From: Catalin Marinas @ 2014-06-09  9:27 UTC (permalink / raw)
  To: linux-arm-kernel

On Sat, Jun 07, 2014 at 01:55:22AM +0100, Laura Abbott wrote:
> On 6/5/2014 10:05 AM, Catalin Marinas wrote:
> > On Mon, Jun 02, 2014 at 09:03:52PM +0100, Laura Abbott wrote:
> >> Neither CMA nor noncoherent allocations support atomic allocations.
> >> Add a dedicated atomic pool to support this.
> > 
> > CMA indeed doesn't support atomic allocations but swiotlb does, the only
> > problem being the vmap() to create a non-cacheable mapping. Could we not
> > use the atomic pool only for non-coherent allocations?
> 
> CMA needs the atomic pool for both non-coherent and coherent allocations.
> Perhaps I should update the code so we only create the coherent atomic
> pool if CMA is used.

It's also needed with non-coherent swiotlb because of vmap (but coherent
is fine).

-- 
Catalin

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCHv2] arm64: Add atomic pool for non-coherent and CMA allocaitons.
@ 2014-06-04 16:17 Ritesh Harjani
  0 siblings, 0 replies; 17+ messages in thread
From: Ritesh Harjani @ 2014-06-04 16:17 UTC (permalink / raw)
  To: Laura Abbott
  Cc: Will Deacon, Catalin Marinas, David Riley,
	devicetree-u79uwXL29TY76Z2rM5mHXA,
	linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r,
	Ritesh Harjani, mp.vikram-Re5JQEeQqe8AvxtiuMwx3w

Hi Laura,

I think you addressed all my previous comments. Just 2 more minor comments.
+ the subject heading wrongly spells "allocations as allocaitons".

On Tue, Jun 3, 2014 at 1:33 AM, Laura Abbott <lauraa-sgV2jX0FEOL9JmXXK+q4OQ@public.gmane.org> wrote:
> Neither CMA nor noncoherent allocations support atomic allocations.
> Add a dedicated atomic pool to support this.
>
> Change-Id: I46c8fdffe5e0687403d42b37643137c8cf344259
> Signed-off-by: Laura Abbott <lauraa-sgV2jX0FEOL9JmXXK+q4OQ@public.gmane.org>
> ---
>
> v2: Various bug fixes pointed out by David and Ritesh (CMA dependency, swapping
> coherent, noncoherent). I'm still not sure how to address the devicetree
> suggestion by Will [1][2]. I added the devicetree mailing list this time around
> to get more input on this.
>
> [1] http://lists.infradead.org/pipermail/linux-arm-kernel/2014-April/249180.html
> [2] http://lists.infradead.org/pipermail/linux-arm-kernel/2014-April/249528.html
>
> ---
>  arch/arm64/mm/dma-mapping.c | 192 +++++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 190 insertions(+), 2 deletions(-)
>
> diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
> index c851eb4..792d43c 100644
> --- a/arch/arm64/mm/dma-mapping.c
> +++ b/arch/arm64/mm/dma-mapping.c
> @@ -41,6 +41,110 @@ static pgprot_t __get_dma_pgprot(struct dma_attrs *attrs, pgprot_t prot,
>         return prot;
>  }
>
> +#define DEFAULT_DMA_COHERENT_POOL_SIZE  SZ_256K
> +
> +struct dma_pool {
> +       size_t size;
> +       spinlock_t lock;
> +       void *coherent_vaddr;
> +       void *noncoherent_vaddr;
> +       unsigned long *bitmap;
> +       unsigned long nr_pages;
> +       struct page **pages;
> +};
> +
> +static struct dma_pool atomic_pool = {
> +       .size = DEFAULT_DMA_COHERENT_POOL_SIZE,
> +};
> +
> +static int __init early_coherent_pool(char *p)
> +{
> +       atomic_pool.size = memparse(p, &p);
> +       return 0;
> +}
> +early_param("coherent_pool", early_coherent_pool);
> +
> +static void *__alloc_from_pool(size_t size, struct page **ret_page,
> +                                       bool coherent)
> +{
> +       struct dma_pool *pool = &atomic_pool;
> +       unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
> +       unsigned int pageno;
> +       unsigned long flags;
> +       void *ptr = NULL;
> +       unsigned long align_mask;
> +       void *pool_start = coherent ? pool->coherent_vaddr :
> +                                     pool->noncoherent_vaddr;
> +
> +       if (!pool->coherent_vaddr || !pool->noncoherent_vaddr) {
> +               WARN(1, "Atomic pool not initialised!\n");
> +               return NULL;
> +       }
> +
> +       /*
> +        * Align the region allocation - allocations from pool are rather
> +        * small, so align them to their order in pages, minimum is a page
> +        * size. This helps reduce fragmentation of the DMA space.
> +        */
> +       align_mask = (1 << get_order(size)) - 1;
> +
> +       spin_lock_irqsave(&pool->lock, flags);
> +       pageno = bitmap_find_next_zero_area(pool->bitmap, pool->nr_pages,
> +                                           0, count, align_mask);
> +       if (pageno < pool->nr_pages) {
> +               bitmap_set(pool->bitmap, pageno, count);
> +               ptr = pool_start + PAGE_SIZE * pageno;
> +               *ret_page = pool->pages[pageno];
> +       } else {
> +               pr_err_once("ERROR: %u KiB atomic DMA coherent pool is too small!\n"
> +                           "Please increase it with coherent_pool= kernel parameter!\n",
> +                               (unsigned)pool->size / 1024);
> +       }
> +       spin_unlock_irqrestore(&pool->lock, flags);
> +
> +       return ptr;
> +}
> +
> +static bool __in_atomic_pool(void *start, size_t size, void *pool_start)
> +{
> +       struct dma_pool *pool = &atomic_pool;
> +       void *end = start + size;
> +       void *pool_end = pool_start + pool->size;
> +
> +       if (start < pool_start || start >= pool_end)
> +               return false;
> +
> +       if (end <= pool_end)
> +               return true;
> +
> +       WARN(1, "Wrong coherent size(%p-%p) from atomic pool(%p-%p)\n",
> +               start, end - 1, pool_start, pool_end - 1);
> +
> +       return false;
> +}
> +
> +static int __free_from_pool(void *start, size_t size, bool coherent)
> +{
> +       struct dma_pool *pool = &atomic_pool;
> +       unsigned long pageno, count;
> +       unsigned long flags;
> +       void *pool_start = coherent ? pool->coherent_vaddr :
> +                                     pool->noncoherent_vaddr;
> +
> +       if (!__in_atomic_pool(start, size, pool_start))
> +               return 0;
> +
> +       pageno = (start - pool_start) >> PAGE_SHIFT;
> +       count = size >> PAGE_SHIFT;
> +
> +       spin_lock_irqsave(&pool->lock, flags);
> +       bitmap_clear(pool->bitmap, pageno, count);
> +       spin_unlock_irqrestore(&pool->lock, flags);
> +
> +       return 1;
> +}
> +
> +
>  static void *__dma_alloc_coherent(struct device *dev, size_t size,
>                                   dma_addr_t *dma_handle, gfp_t flags,
>                                   struct dma_attrs *attrs)
> @@ -53,7 +157,16 @@ static void *__dma_alloc_coherent(struct device *dev, size_t size,
>         if (IS_ENABLED(CONFIG_ZONE_DMA) &&
>             dev->coherent_dma_mask <= DMA_BIT_MASK(32))
>                 flags |= GFP_DMA;
> -       if (IS_ENABLED(CONFIG_DMA_CMA)) {
> +
> +       if (!(flags & __GFP_WAIT)) {
> +               struct page *page = NULL;
> +               void *addr = __alloc_from_pool(size, &page, true);
> +
> +               if (addr)
> +                       *dma_handle = phys_to_dma(dev, page_to_phys(page));
> +
> +               return addr;
> +       } else if (IS_ENABLED(CONFIG_DMA_CMA)) {
>                 struct page *page;
>
>                 size = PAGE_ALIGN(size);
> @@ -78,7 +191,9 @@ static void __dma_free_coherent(struct device *dev, size_t size,
>                 return;
>         }
>
> -       if (IS_ENABLED(CONFIG_DMA_CMA)) {
> +       if (__free_from_pool(vaddr, size, true)) {
> +               return;
> +       } else if (IS_ENABLED(CONFIG_DMA_CMA)) {
>                 phys_addr_t paddr = dma_to_phys(dev, dma_handle);
>
>                 dma_release_from_contiguous(dev,
> @@ -100,9 +215,21 @@ static void *__dma_alloc_noncoherent(struct device *dev, size_t size,
>         size = PAGE_ALIGN(size);
>         order = get_order(size);
>
> +       if (!(flags & __GFP_WAIT)) {
> +               struct page *page = NULL;
> +               void *addr = __alloc_from_pool(size, &page, false);
> +
> +               if (addr)
> +                       *dma_handle = phys_to_dma(dev, page_to_phys(page));
> +
> +               return addr;
> +
> +       }
> +
>         ptr = __dma_alloc_coherent(dev, size, dma_handle, flags, attrs);
>         if (!ptr)
>                 goto no_mem;
> +
>         map = kmalloc(sizeof(struct page *) << order, flags & ~GFP_DMA);
>         if (!map)
>                 goto no_map;
> @@ -135,6 +262,8 @@ static void __dma_free_noncoherent(struct device *dev, size_t size,
>  {
>         void *swiotlb_addr = phys_to_virt(dma_to_phys(dev, dma_handle));
>
> +       if (__free_from_pool(vaddr, size, false))
> +               return;
>         vunmap(vaddr);
>         __dma_free_coherent(dev, size, swiotlb_addr, dma_handle, attrs);
>  }
> @@ -332,6 +461,65 @@ static struct notifier_block amba_bus_nb = {
>
>  extern int swiotlb_late_init_with_default_size(size_t default_size);
>
> +static int __init atomic_pool_init(void)
> +{
> +       struct dma_pool *pool = &atomic_pool;
> +       pgprot_t prot = pgprot_writecombine(pgprot_default);

I thought pgprot_default is removed. May be its not merged, but I can
see this is removed on Catalin's branch.
But you should be using __get_dma_pgprot, right ??

> +       unsigned long nr_pages = pool->size >> PAGE_SHIFT;
> +       unsigned long *bitmap;
> +       struct page *page;
> +       struct page **pages;
> +       int bitmap_size = BITS_TO_LONGS(nr_pages) * sizeof(long);
> +
> +       bitmap = kzalloc(bitmap_size, GFP_KERNEL);
> +       if (!bitmap)
> +               goto no_bitmap;
> +
> +       pages = kzalloc(nr_pages * sizeof(struct page *), GFP_KERNEL);
> +       if (!pages)
> +               goto no_pages;
> +
> +       if (IS_ENABLED(CONFIG_CMA))
> +               page = dma_alloc_from_contiguous(NULL, nr_pages,
> +                                       get_order(pool->size));
> +       else
> +               page = alloc_pages(GFP_KERNEL, get_order(pool->size));

GFP_KERNEL | GFP_DMA ? I am not sure.

> +
> +
> +       if (page) {
> +               int i;
> +               void *addr = page_address(page);
> +
> +               memset(addr, 0, pool->size);
> +               __dma_flush_range(addr, addr + pool->size);
> +
> +               for (i = 0; i < nr_pages; i++)
> +                       pages[i] = page + i;
> +
> +               spin_lock_init(&pool->lock);
> +               pool->pages = pages;
> +               pool->noncoherent_vaddr = vmap(pages, nr_pages, VM_MAP, prot);
> +               if (pool->noncoherent_vaddr == NULL)
> +                       goto out;
> +               pool->coherent_vaddr = addr;
> +               pool->bitmap = bitmap;
> +               pool->nr_pages = nr_pages;
> +               pr_info("DMA: preallocated %u KiB pool for atomic allocations\n",
> +                       (unsigned)pool->size / 1024);
> +               return 0;
> +       }
> +
> +out:
> +       kfree(pages);
> +no_pages:
> +       kfree(bitmap);
> +no_bitmap:
> +       pr_err("DMA: failed to allocate %u KiB pool for atomic coherent allocation\n",
> +               (unsigned)pool->size / 1024);
> +       return -ENOMEM;
> +}
> +postcore_initcall(atomic_pool_init);
> +
>  static int __init swiotlb_late_init(void)
>  {
>         size_t swiotlb_size = min(SZ_64M, MAX_ORDER_NR_PAGES << PAGE_SHIFT);
> --
> The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
> hosted by The Linux Foundation
>
>
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r@public.gmane.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel


Thanks
Ritesh
--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 17+ messages in thread

end of thread, other threads:[~2014-06-09  9:27 UTC | newest]

Thread overview: 17+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-06-02 20:03 [PATCHv2] arm64: Add atomic pool for non-coherent and CMA allocaitons Laura Abbott
2014-06-02 20:03 ` Laura Abbott
     [not found] ` <1401739432-5358-1-git-send-email-lauraa-sgV2jX0FEOL9JmXXK+q4OQ@public.gmane.org>
2014-06-03  0:23   ` David Riley
2014-06-03  0:23     ` David Riley
2014-06-03 13:28   ` Will Deacon
2014-06-03 13:28     ` Will Deacon
     [not found]     ` <20140603132842.GI23149-5wv7dgnIgG8@public.gmane.org>
2014-06-04  0:30       ` Laura Abbott
2014-06-04  0:30         ` Laura Abbott
     [not found]         ` <538E689A.3050109-sgV2jX0FEOL9JmXXK+q4OQ@public.gmane.org>
2014-06-04 17:59           ` Will Deacon
2014-06-04 17:59             ` Will Deacon
2014-06-05 17:05 ` Catalin Marinas
2014-06-05 17:05   ` Catalin Marinas
     [not found]   ` <20140605170500.GC27946-5wv7dgnIgG8@public.gmane.org>
2014-06-07  0:55     ` Laura Abbott
2014-06-07  0:55       ` Laura Abbott
2014-06-09  9:27       ` Catalin Marinas
2014-06-09  9:27         ` Catalin Marinas
2014-06-04 16:17 Ritesh Harjani

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.