All of lore.kernel.org
 help / color / mirror / Atom feed
* An cma optimization patch is used for cma_[alloc|free].
@ 2021-08-13  7:00 ` Jichao Zou
  0 siblings, 0 replies; 14+ messages in thread
From: Jichao Zou @ 2021-08-13  7:00 UTC (permalink / raw)
  To: akpm, linux-kernel, linux-mm, minchan, david, song.bao.hua, hch,
	m.szyprowski, robin.murphy, iommu, JianQi Yang, Yanjune Tian

[-- Attachment #1: Type: text/plain, Size: 399 bytes --]

Pre-allocate CMA memory that configured in device
tree, this greatly improves the CMA memory
allocation efficiency, cma_[alloc|free] is less
than 1ms, old way is took a few ms to tens or
hundreds ms.

Thanks.

Best Regards,

Zou Jichao 邹纪超
Advisory Engineer, SW BSP
MBG ROW SW BJ PF BSP (CN)
Motorola Mobility, A Lenovo Company
motorola.com 





[-- Attachment #2: 0001-cma-optimize-cma-allocation.patch --]
[-- Type: application/octet-stream, Size: 4988 bytes --]

From a82dbc46e2343c394d3edcabcbc73ea6e9f403da Mon Sep 17 00:00:00 2001
From: Jichao Zou <zoujc@motorola.com>
Date: Thu, 12 Aug 2021 18:20:21 +0800
Subject: [PATCH] cma:optimize cma allocation.

Pre-allocate CMA memory that configured in device
tree, this greatly improves the CMA memory
allocation efficiency, cma_[alloc|free] is less
than 1ms, old way is took a few ms to tens or
hundreds ms.

Signed-off-by: Jichao Zou <zoujc@motorola.com>
---
 include/linux/cma.h     |  3 ++-
 kernel/dma/contiguous.c |  2 +-
 mm/cma.c                | 35 ++++++++++++++++++++++++++++++++---
 mm/cma.h                |  1 +
 4 files changed, 36 insertions(+), 5 deletions(-)

diff --git a/include/linux/cma.h b/include/linux/cma.h
index 53fd8c3cdbd0..68bc147a82a7 100644
--- a/include/linux/cma.h
+++ b/include/linux/cma.h
@@ -43,7 +43,8 @@ static inline int __init cma_declare_contiguous(phys_addr_t base,
 extern int cma_init_reserved_mem(phys_addr_t base, phys_addr_t size,
 					unsigned int order_per_bit,
 					const char *name,
-					struct cma **res_cma);
+					struct cma **res_cma,
+					unsigned long node);
 extern struct page *cma_alloc(struct cma *cma, unsigned long count, unsigned int align,
 			      bool no_warn);
 extern bool cma_release(struct cma *cma, const struct page *pages, unsigned long count);
diff --git a/kernel/dma/contiguous.c b/kernel/dma/contiguous.c
index 3d63d91cba5c..d77c2745244c 100644
--- a/kernel/dma/contiguous.c
+++ b/kernel/dma/contiguous.c
@@ -421,7 +421,7 @@ static int __init rmem_cma_setup(struct reserved_mem *rmem)
 		return -EINVAL;
 	}
 
-	err = cma_init_reserved_mem(rmem->base, rmem->size, 0, rmem->name, &cma);
+	err = cma_init_reserved_mem(rmem->base, rmem->size, 0, rmem->name, &cma, node);
 	if (err) {
 		pr_err("Reserved memory: unable to setup CMA region\n");
 		return err;
diff --git a/mm/cma.c b/mm/cma.c
index 995e15480937..c5682d03c5e9 100644
--- a/mm/cma.c
+++ b/mm/cma.c
@@ -32,6 +32,7 @@
 #include <linux/io.h>
 #include <linux/kmemleak.h>
 #include <trace/events/cma.h>
+#include <linux/of_fdt.h>
 
 #include "cma.h"
 
@@ -124,6 +125,17 @@ static void __init cma_activate_area(struct cma *cma)
 	INIT_HLIST_HEAD(&cma->mem_head);
 	spin_lock_init(&cma->mem_head_lock);
 #endif
+	if (cma->preallocated_cma) {
+		struct acr_info info = {0};
+
+		pfn = base_pfn;
+		if (!alloc_contig_range(pfn, pfn + cma->count, MIGRATE_CMA, GFP_KERNEL, &info)) {
+			pr_info("CMA area %s be pre-allocated successfully\n", cma->name);
+		} else {
+			cma->preallocated_cma = false;
+			pr_err("CMA area %s be pre-allocated failure\n", cma->name);
+		}
+	}
 
 	return;
 
@@ -159,13 +171,15 @@ core_initcall(cma_init_reserved_areas);
  *        the area will be set to "cmaN", where N is a running counter of
  *        used areas.
  * @res_cma: Pointer to store the created cma region.
+ * @node: CMA memory dtsi node.
  *
  * This function creates custom contiguous area from already reserved memory.
  */
 int __init cma_init_reserved_mem(phys_addr_t base, phys_addr_t size,
 				 unsigned int order_per_bit,
 				 const char *name,
-				 struct cma **res_cma)
+				 struct cma **res_cma,
+				 unsigned long node)
 {
 	struct cma *cma;
 	phys_addr_t alignment;
@@ -204,6 +218,9 @@ int __init cma_init_reserved_mem(phys_addr_t base, phys_addr_t size,
 	cma->base_pfn = PFN_DOWN(base);
 	cma->count = size >> PAGE_SHIFT;
 	cma->order_per_bit = order_per_bit;
+	if (node)
+		cma->preallocated_cma = of_get_flat_dt_prop(node, "linux,preallocated-cma", NULL);
+
 	*res_cma = cma;
 	cma_area_count++;
 	totalcma_pages += (size / PAGE_SIZE);
@@ -369,7 +386,7 @@ int __init cma_declare_contiguous_nid(phys_addr_t base,
 		base = addr;
 	}
 
-	ret = cma_init_reserved_mem(base, size, order_per_bit, name, res_cma);
+	ret = cma_init_reserved_mem(base, size, order_per_bit, name, res_cma, 0);
 	if (ret)
 		goto free_mem;
 
@@ -471,6 +488,16 @@ struct page *cma_alloc(struct cma *cma, unsigned long count,
 		spin_unlock_irq(&cma->lock);
 
 		pfn = cma->base_pfn + (bitmap_no << cma->order_per_bit);
+
+		/*
+		 * cma bitmap should ensure that pfn is in the cma.
+		 */
+		if (cma->preallocated_cma) {
+			BUG_ON(pfn + count > cma->base_pfn + cma->count);
+			page = pfn_to_page(pfn);
+			ret = 0;
+			break;
+		}
 		ret = alloc_contig_range(pfn, pfn + count, MIGRATE_CMA,
 				     GFP_KERNEL | (no_warn ? __GFP_NOWARN : 0));
 
@@ -551,7 +578,9 @@ bool cma_release(struct cma *cma, const struct page *pages,
 
 	VM_BUG_ON(pfn + count > cma->base_pfn + cma->count);
 
-	free_contig_range(pfn, count);
+	if (!cma->preallocated_cma)
+		free_contig_range(pfn, count);
+
 	cma_clear_bitmap(cma, pfn, count);
 	trace_cma_release(cma->name, pfn, pages, count);
 
diff --git a/mm/cma.h b/mm/cma.h
index 2c775877eae2..1778cb0e68c4 100644
--- a/mm/cma.h
+++ b/mm/cma.h
@@ -30,6 +30,7 @@ struct cma {
 	/* kobject requires dynamic object */
 	struct cma_kobject *cma_kobj;
 #endif
+	bool preallocated_cma;
 };
 
 extern struct cma cma_areas[MAX_CMA_AREAS];
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* An cma optimization patch is used for cma_[alloc|free].
@ 2021-08-13  7:00 ` Jichao Zou
  0 siblings, 0 replies; 14+ messages in thread
From: Jichao Zou @ 2021-08-13  7:00 UTC (permalink / raw)
  To: akpm, linux-kernel, linux-mm, minchan, david, song.bao.hua, hch,
	m.szyprowski, robin.murphy, iommu, JianQi Yang, Yanjune Tian

[-- Attachment #1: Type: text/plain, Size: 399 bytes --]

Pre-allocate CMA memory that configured in device
tree, this greatly improves the CMA memory
allocation efficiency, cma_[alloc|free] is less
than 1ms, old way is took a few ms to tens or
hundreds ms.

Thanks.

Best Regards,

Zou Jichao 邹纪超
Advisory Engineer, SW BSP
MBG ROW SW BJ PF BSP (CN)
Motorola Mobility, A Lenovo Company
motorola.com 





[-- Attachment #2: 0001-cma-optimize-cma-allocation.patch --]
[-- Type: application/octet-stream, Size: 4988 bytes --]

From a82dbc46e2343c394d3edcabcbc73ea6e9f403da Mon Sep 17 00:00:00 2001
From: Jichao Zou <zoujc@motorola.com>
Date: Thu, 12 Aug 2021 18:20:21 +0800
Subject: [PATCH] cma:optimize cma allocation.

Pre-allocate CMA memory that configured in device
tree, this greatly improves the CMA memory
allocation efficiency, cma_[alloc|free] is less
than 1ms, old way is took a few ms to tens or
hundreds ms.

Signed-off-by: Jichao Zou <zoujc@motorola.com>
---
 include/linux/cma.h     |  3 ++-
 kernel/dma/contiguous.c |  2 +-
 mm/cma.c                | 35 ++++++++++++++++++++++++++++++++---
 mm/cma.h                |  1 +
 4 files changed, 36 insertions(+), 5 deletions(-)

diff --git a/include/linux/cma.h b/include/linux/cma.h
index 53fd8c3cdbd0..68bc147a82a7 100644
--- a/include/linux/cma.h
+++ b/include/linux/cma.h
@@ -43,7 +43,8 @@ static inline int __init cma_declare_contiguous(phys_addr_t base,
 extern int cma_init_reserved_mem(phys_addr_t base, phys_addr_t size,
 					unsigned int order_per_bit,
 					const char *name,
-					struct cma **res_cma);
+					struct cma **res_cma,
+					unsigned long node);
 extern struct page *cma_alloc(struct cma *cma, unsigned long count, unsigned int align,
 			      bool no_warn);
 extern bool cma_release(struct cma *cma, const struct page *pages, unsigned long count);
diff --git a/kernel/dma/contiguous.c b/kernel/dma/contiguous.c
index 3d63d91cba5c..d77c2745244c 100644
--- a/kernel/dma/contiguous.c
+++ b/kernel/dma/contiguous.c
@@ -421,7 +421,7 @@ static int __init rmem_cma_setup(struct reserved_mem *rmem)
 		return -EINVAL;
 	}
 
-	err = cma_init_reserved_mem(rmem->base, rmem->size, 0, rmem->name, &cma);
+	err = cma_init_reserved_mem(rmem->base, rmem->size, 0, rmem->name, &cma, node);
 	if (err) {
 		pr_err("Reserved memory: unable to setup CMA region\n");
 		return err;
diff --git a/mm/cma.c b/mm/cma.c
index 995e15480937..c5682d03c5e9 100644
--- a/mm/cma.c
+++ b/mm/cma.c
@@ -32,6 +32,7 @@
 #include <linux/io.h>
 #include <linux/kmemleak.h>
 #include <trace/events/cma.h>
+#include <linux/of_fdt.h>
 
 #include "cma.h"
 
@@ -124,6 +125,17 @@ static void __init cma_activate_area(struct cma *cma)
 	INIT_HLIST_HEAD(&cma->mem_head);
 	spin_lock_init(&cma->mem_head_lock);
 #endif
+	if (cma->preallocated_cma) {
+		struct acr_info info = {0};
+
+		pfn = base_pfn;
+		if (!alloc_contig_range(pfn, pfn + cma->count, MIGRATE_CMA, GFP_KERNEL, &info)) {
+			pr_info("CMA area %s be pre-allocated successfully\n", cma->name);
+		} else {
+			cma->preallocated_cma = false;
+			pr_err("CMA area %s be pre-allocated failure\n", cma->name);
+		}
+	}
 
 	return;
 
@@ -159,13 +171,15 @@ core_initcall(cma_init_reserved_areas);
  *        the area will be set to "cmaN", where N is a running counter of
  *        used areas.
  * @res_cma: Pointer to store the created cma region.
+ * @node: CMA memory dtsi node.
  *
  * This function creates custom contiguous area from already reserved memory.
  */
 int __init cma_init_reserved_mem(phys_addr_t base, phys_addr_t size,
 				 unsigned int order_per_bit,
 				 const char *name,
-				 struct cma **res_cma)
+				 struct cma **res_cma,
+				 unsigned long node)
 {
 	struct cma *cma;
 	phys_addr_t alignment;
@@ -204,6 +218,9 @@ int __init cma_init_reserved_mem(phys_addr_t base, phys_addr_t size,
 	cma->base_pfn = PFN_DOWN(base);
 	cma->count = size >> PAGE_SHIFT;
 	cma->order_per_bit = order_per_bit;
+	if (node)
+		cma->preallocated_cma = of_get_flat_dt_prop(node, "linux,preallocated-cma", NULL);
+
 	*res_cma = cma;
 	cma_area_count++;
 	totalcma_pages += (size / PAGE_SIZE);
@@ -369,7 +386,7 @@ int __init cma_declare_contiguous_nid(phys_addr_t base,
 		base = addr;
 	}
 
-	ret = cma_init_reserved_mem(base, size, order_per_bit, name, res_cma);
+	ret = cma_init_reserved_mem(base, size, order_per_bit, name, res_cma, 0);
 	if (ret)
 		goto free_mem;
 
@@ -471,6 +488,16 @@ struct page *cma_alloc(struct cma *cma, unsigned long count,
 		spin_unlock_irq(&cma->lock);
 
 		pfn = cma->base_pfn + (bitmap_no << cma->order_per_bit);
+
+		/*
+		 * cma bitmap should ensure that pfn is in the cma.
+		 */
+		if (cma->preallocated_cma) {
+			BUG_ON(pfn + count > cma->base_pfn + cma->count);
+			page = pfn_to_page(pfn);
+			ret = 0;
+			break;
+		}
 		ret = alloc_contig_range(pfn, pfn + count, MIGRATE_CMA,
 				     GFP_KERNEL | (no_warn ? __GFP_NOWARN : 0));
 
@@ -551,7 +578,9 @@ bool cma_release(struct cma *cma, const struct page *pages,
 
 	VM_BUG_ON(pfn + count > cma->base_pfn + cma->count);
 
-	free_contig_range(pfn, count);
+	if (!cma->preallocated_cma)
+		free_contig_range(pfn, count);
+
 	cma_clear_bitmap(cma, pfn, count);
 	trace_cma_release(cma->name, pfn, pages, count);
 
diff --git a/mm/cma.h b/mm/cma.h
index 2c775877eae2..1778cb0e68c4 100644
--- a/mm/cma.h
+++ b/mm/cma.h
@@ -30,6 +30,7 @@ struct cma {
 	/* kobject requires dynamic object */
 	struct cma_kobject *cma_kobj;
 #endif
+	bool preallocated_cma;
 };
 
 extern struct cma cma_areas[MAX_CMA_AREAS];
-- 
2.25.1


[-- Attachment #3: Type: text/plain, Size: 156 bytes --]

_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: An cma optimization patch is used for cma_[alloc|free].
  2021-08-13  7:00 ` Jichao Zou
@ 2021-08-13  7:45   ` David Hildenbrand
  -1 siblings, 0 replies; 14+ messages in thread
From: David Hildenbrand @ 2021-08-13  7:45 UTC (permalink / raw)
  To: Jichao Zou, akpm, linux-kernel, linux-mm, minchan, song.bao.hua,
	hch, m.szyprowski, robin.murphy, iommu, JianQi Yang,
	Yanjune Tian

On 13.08.21 09:00, Jichao Zou wrote:
> Pre-allocate CMA memory that configured in device
> tree, this greatly improves the CMA memory
> allocation efficiency, cma_[alloc|free] is less
> than 1ms, old way is took a few ms to tens or
> hundreds ms.
> 

Please send patches as proper emails (man git-format-patch; man 
git-send-email).

What you propose is turning cma reservations into something comparable 
to permanent boottime allocations. From the POV of the buddy, the pages 
are always allocated and cannot be repurposed for e.g., movable 
allocations until *actually* allocated via CMA.

I don't think we want this behavior upstream.

-- 
Thanks,

David / dhildenb


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: An cma optimization patch is used for cma_[alloc|free].
@ 2021-08-13  7:45   ` David Hildenbrand
  0 siblings, 0 replies; 14+ messages in thread
From: David Hildenbrand @ 2021-08-13  7:45 UTC (permalink / raw)
  To: Jichao Zou, akpm, linux-kernel, linux-mm, minchan, song.bao.hua,
	hch, m.szyprowski, robin.murphy, iommu, JianQi Yang,
	Yanjune Tian

On 13.08.21 09:00, Jichao Zou wrote:
> Pre-allocate CMA memory that configured in device
> tree, this greatly improves the CMA memory
> allocation efficiency, cma_[alloc|free] is less
> than 1ms, old way is took a few ms to tens or
> hundreds ms.
> 

Please send patches as proper emails (man git-format-patch; man 
git-send-email).

What you propose is turning cma reservations into something comparable 
to permanent boottime allocations. From the POV of the buddy, the pages 
are always allocated and cannot be repurposed for e.g., movable 
allocations until *actually* allocated via CMA.

I don't think we want this behavior upstream.

-- 
Thanks,

David / dhildenb

_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply	[flat|nested] 14+ messages in thread

* 回复: [External]Re: An cma optimization patch is used for cma_[alloc|free].
  2021-08-13  7:45   ` David Hildenbrand
@ 2021-08-13  8:27     ` Jichao Zou
  -1 siblings, 0 replies; 14+ messages in thread
From: Jichao Zou @ 2021-08-13  8:27 UTC (permalink / raw)
  To: David Hildenbrand, akpm, linux-kernel, linux-mm, minchan,
	song.bao.hua, hch, m.szyprowski, robin.murphy, iommu,
	JianQi Yang, Yanjune Tian

Hi David,
	I'll git-send-email patch again.
	Your understanding is exactly right. 
	Let me explain the background of Patch, we are developing Android phone, kernel is 5.10.43 LTS, we encounter cma_alloc failed during kernel startup, buddy system is ready,  
01-11 14:22:08.650   216   216 E cma     : cma_alloc([216][init]:cma(ffffffff00b50000:total 8192) linux,cma(ffffffe89d084cf0), count 2, align 1 gfp_mask 0xcc0)
01-11 14:22:08.650   216   216 E cma     : cma_alloc(): memory range at ffffffff00b62880 is busy, retrying
 
	cma bitmap show memory is free, but alloc_contig_range failed, we checked it out that some drivers cma_alloc are 
	"struct page *cma_alloc(struct cma *cma, size_t count, unsigned int align, bool no_warn)"
	In 5.10.43, cma_alloc is  
	"struct page *cma_alloc(struct cma *cma, size_t count, unsigned int align, gfp_t gfp_mask)"
 	After change cma_alloc parameter with GFP_KERNEL, issue is fixed, at the same time, we found that preallocate a portion of cma memory for audio&video resulted in better performance and guarantee AV function even under memory pressure, so we try to submit this patch.

Thanks.

Best Regards,

Zou Jichao 邹纪超
Advisory Engineer, SW BSP
MBG ROW SW BJ PF BSP (CN)
Motorola Mobility, A Lenovo Company
motorola.com 
M +86 18910860212
E zoujc@lenovo.com
twitter | facebook | instagram | blog | forums 




-----邮件原件-----
发件人: David Hildenbrand <david@redhat.com> 
发送时间: 2021年8月13日 15:45
收件人: Jichao Zou <zoujc@motorola.com>; akpm@linux-foundation.org; linux-kernel@vger.kernel.org; linux-mm@kvack.org; minchan@kernel.org; song.bao.hua@hisilicon.com; hch@lst.de; m.szyprowski@samsung.com; robin.murphy@arm.com; iommu@lists.linux-foundation.org; JianQi Yang <yangj@motorola.com>; Yanjune Tian <tianyje@motorola.com>
主题: [External]Re: An cma optimization patch is used for cma_[alloc|free].

On 13.08.21 09:00, Jichao Zou wrote:
> Pre-allocate CMA memory that configured in device tree, this greatly 
> improves the CMA memory allocation efficiency, cma_[alloc|free] is 
> less than 1ms, old way is took a few ms to tens or hundreds ms.
> 

Please send patches as proper emails (man git-format-patch; man git-send-email).

What you propose is turning cma reservations into something comparable to permanent boottime allocations. From the POV of the buddy, the pages are always allocated and cannot be repurposed for e.g., movable allocations until *actually* allocated via CMA.

I don't think we want this behavior upstream.

--
Thanks,

David / dhildenb


^ permalink raw reply	[flat|nested] 14+ messages in thread

* 回复: [External]Re: An cma optimization patch is used for cma_[alloc|free].
@ 2021-08-13  8:27     ` Jichao Zou
  0 siblings, 0 replies; 14+ messages in thread
From: Jichao Zou @ 2021-08-13  8:27 UTC (permalink / raw)
  To: David Hildenbrand, akpm, linux-kernel, linux-mm, minchan,
	song.bao.hua, hch, m.szyprowski, robin.murphy, iommu,
	JianQi Yang, Yanjune Tian

Hi David,
	I'll git-send-email patch again.
	Your understanding is exactly right. 
	Let me explain the background of Patch, we are developing Android phone, kernel is 5.10.43 LTS, we encounter cma_alloc failed during kernel startup, buddy system is ready,  
01-11 14:22:08.650   216   216 E cma     : cma_alloc([216][init]:cma(ffffffff00b50000:total 8192) linux,cma(ffffffe89d084cf0), count 2, align 1 gfp_mask 0xcc0)
01-11 14:22:08.650   216   216 E cma     : cma_alloc(): memory range at ffffffff00b62880 is busy, retrying
 
	cma bitmap show memory is free, but alloc_contig_range failed, we checked it out that some drivers cma_alloc are 
	"struct page *cma_alloc(struct cma *cma, size_t count, unsigned int align, bool no_warn)"
	In 5.10.43, cma_alloc is  
	"struct page *cma_alloc(struct cma *cma, size_t count, unsigned int align, gfp_t gfp_mask)"
 	After change cma_alloc parameter with GFP_KERNEL, issue is fixed, at the same time, we found that preallocate a portion of cma memory for audio&video resulted in better performance and guarantee AV function even under memory pressure, so we try to submit this patch.

Thanks.

Best Regards,

Zou Jichao 邹纪超
Advisory Engineer, SW BSP
MBG ROW SW BJ PF BSP (CN)
Motorola Mobility, A Lenovo Company
motorola.com 
M +86 18910860212
E zoujc@lenovo.com
twitter | facebook | instagram | blog | forums 




-----邮件原件-----
发件人: David Hildenbrand <david@redhat.com> 
发送时间: 2021年8月13日 15:45
收件人: Jichao Zou <zoujc@motorola.com>; akpm@linux-foundation.org; linux-kernel@vger.kernel.org; linux-mm@kvack.org; minchan@kernel.org; song.bao.hua@hisilicon.com; hch@lst.de; m.szyprowski@samsung.com; robin.murphy@arm.com; iommu@lists.linux-foundation.org; JianQi Yang <yangj@motorola.com>; Yanjune Tian <tianyje@motorola.com>
主题: [External]Re: An cma optimization patch is used for cma_[alloc|free].

On 13.08.21 09:00, Jichao Zou wrote:
> Pre-allocate CMA memory that configured in device tree, this greatly 
> improves the CMA memory allocation efficiency, cma_[alloc|free] is 
> less than 1ms, old way is took a few ms to tens or hundreds ms.
> 

Please send patches as proper emails (man git-format-patch; man git-send-email).

What you propose is turning cma reservations into something comparable to permanent boottime allocations. From the POV of the buddy, the pages are always allocated and cannot be repurposed for e.g., movable allocations until *actually* allocated via CMA.

I don't think we want this behavior upstream.

--
Thanks,

David / dhildenb

_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: 回复: [External]Re: An cma optimization patch is used for cma_[alloc|free].
  2021-08-13  8:27     ` Jichao Zou
@ 2021-08-13  9:15       ` Robin Murphy
  -1 siblings, 0 replies; 14+ messages in thread
From: Robin Murphy @ 2021-08-13  9:15 UTC (permalink / raw)
  To: Jichao Zou, David Hildenbrand, akpm, linux-kernel, linux-mm,
	minchan, song.bao.hua, hch, m.szyprowski, iommu, JianQi Yang,
	Yanjune Tian

On 2021-08-13 09:27, Jichao Zou wrote:
> Hi David,
> 	I'll git-send-email patch again.
> 	Your understanding is exactly right.
> 	Let me explain the background of Patch, we are developing Android phone, kernel is 5.10.43 LTS, we encounter cma_alloc failed during kernel startup, buddy system is ready,
> 01-11 14:22:08.650   216   216 E cma     : cma_alloc([216][init]:cma(ffffffff00b50000:total 8192) linux,cma(ffffffe89d084cf0), count 2, align 1 gfp_mask 0xcc0)
> 01-11 14:22:08.650   216   216 E cma     : cma_alloc(): memory range at ffffffff00b62880 is busy, retrying
>   
> 	cma bitmap show memory is free, but alloc_contig_range failed, we checked it out that some drivers cma_alloc are
> 	"struct page *cma_alloc(struct cma *cma, size_t count, unsigned int align, bool no_warn)"
> 	In 5.10.43, cma_alloc is
> 	"struct page *cma_alloc(struct cma *cma, size_t count, unsigned int align, gfp_t gfp_mask)"
>   	After change cma_alloc parameter with GFP_KERNEL, issue is fixed, at the same time, we found that preallocate a portion of cma memory for audio&video resulted in better performance and guarantee AV function even under memory pressure, so we try to submit this patch.

The whole point of CMA is that the memory can be shared by moveable 
pages while it's not being used for DMA. If you want a dedicated DMA 
carveout, there are already mechanisms for that.

Robin.

> 
> Thanks.
> 
> Best Regards,
> 
> Zou Jichao 邹纪超
> Advisory Engineer, SW BSP
> MBG ROW SW BJ PF BSP (CN)
> Motorola Mobility, A Lenovo Company
> motorola.com
> M +86 18910860212
> E zoujc@lenovo.com
> twitter | facebook | instagram | blog | forums
> 
> 
> 
> 
> -----邮件原件-----
> 发件人: David Hildenbrand <david@redhat.com>
> 发送时间: 2021年8月13日 15:45
> 收件人: Jichao Zou <zoujc@motorola.com>; akpm@linux-foundation.org; linux-kernel@vger.kernel.org; linux-mm@kvack.org; minchan@kernel.org; song.bao.hua@hisilicon.com; hch@lst.de; m.szyprowski@samsung.com; robin.murphy@arm.com; iommu@lists.linux-foundation.org; JianQi Yang <yangj@motorola.com>; Yanjune Tian <tianyje@motorola.com>
> 主题: [External]Re: An cma optimization patch is used for cma_[alloc|free].
> 
> On 13.08.21 09:00, Jichao Zou wrote:
>> Pre-allocate CMA memory that configured in device tree, this greatly
>> improves the CMA memory allocation efficiency, cma_[alloc|free] is
>> less than 1ms, old way is took a few ms to tens or hundreds ms.
>>
> 
> Please send patches as proper emails (man git-format-patch; man git-send-email).
> 
> What you propose is turning cma reservations into something comparable to permanent boottime allocations. From the POV of the buddy, the pages are always allocated and cannot be repurposed for e.g., movable allocations until *actually* allocated via CMA.
> 
> I don't think we want this behavior upstream.
> 
> --
> Thanks,
> 
> David / dhildenb
> 

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: 回复: [External]Re: An cma optimization patch is used for cma_[alloc|free].
@ 2021-08-13  9:15       ` Robin Murphy
  0 siblings, 0 replies; 14+ messages in thread
From: Robin Murphy @ 2021-08-13  9:15 UTC (permalink / raw)
  To: Jichao Zou, David Hildenbrand, akpm, linux-kernel, linux-mm,
	minchan, song.bao.hua, hch, m.szyprowski, iommu, JianQi Yang,
	Yanjune Tian

On 2021-08-13 09:27, Jichao Zou wrote:
> Hi David,
> 	I'll git-send-email patch again.
> 	Your understanding is exactly right.
> 	Let me explain the background of Patch, we are developing Android phone, kernel is 5.10.43 LTS, we encounter cma_alloc failed during kernel startup, buddy system is ready,
> 01-11 14:22:08.650   216   216 E cma     : cma_alloc([216][init]:cma(ffffffff00b50000:total 8192) linux,cma(ffffffe89d084cf0), count 2, align 1 gfp_mask 0xcc0)
> 01-11 14:22:08.650   216   216 E cma     : cma_alloc(): memory range at ffffffff00b62880 is busy, retrying
>   
> 	cma bitmap show memory is free, but alloc_contig_range failed, we checked it out that some drivers cma_alloc are
> 	"struct page *cma_alloc(struct cma *cma, size_t count, unsigned int align, bool no_warn)"
> 	In 5.10.43, cma_alloc is
> 	"struct page *cma_alloc(struct cma *cma, size_t count, unsigned int align, gfp_t gfp_mask)"
>   	After change cma_alloc parameter with GFP_KERNEL, issue is fixed, at the same time, we found that preallocate a portion of cma memory for audio&video resulted in better performance and guarantee AV function even under memory pressure, so we try to submit this patch.

The whole point of CMA is that the memory can be shared by moveable 
pages while it's not being used for DMA. If you want a dedicated DMA 
carveout, there are already mechanisms for that.

Robin.

> 
> Thanks.
> 
> Best Regards,
> 
> Zou Jichao 邹纪超
> Advisory Engineer, SW BSP
> MBG ROW SW BJ PF BSP (CN)
> Motorola Mobility, A Lenovo Company
> motorola.com
> M +86 18910860212
> E zoujc@lenovo.com
> twitter | facebook | instagram | blog | forums
> 
> 
> 
> 
> -----邮件原件-----
> 发件人: David Hildenbrand <david@redhat.com>
> 发送时间: 2021年8月13日 15:45
> 收件人: Jichao Zou <zoujc@motorola.com>; akpm@linux-foundation.org; linux-kernel@vger.kernel.org; linux-mm@kvack.org; minchan@kernel.org; song.bao.hua@hisilicon.com; hch@lst.de; m.szyprowski@samsung.com; robin.murphy@arm.com; iommu@lists.linux-foundation.org; JianQi Yang <yangj@motorola.com>; Yanjune Tian <tianyje@motorola.com>
> 主题: [External]Re: An cma optimization patch is used for cma_[alloc|free].
> 
> On 13.08.21 09:00, Jichao Zou wrote:
>> Pre-allocate CMA memory that configured in device tree, this greatly
>> improves the CMA memory allocation efficiency, cma_[alloc|free] is
>> less than 1ms, old way is took a few ms to tens or hundreds ms.
>>
> 
> Please send patches as proper emails (man git-format-patch; man git-send-email).
> 
> What you propose is turning cma reservations into something comparable to permanent boottime allocations. From the POV of the buddy, the pages are always allocated and cannot be repurposed for e.g., movable allocations until *actually* allocated via CMA.
> 
> I don't think we want this behavior upstream.
> 
> --
> Thanks,
> 
> David / dhildenb
> 
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply	[flat|nested] 14+ messages in thread

* 回复: 回复: [External]Re: An cma optimization patch is used for cma_[alloc|free].
  2021-08-13  9:15       ` Robin Murphy
@ 2021-08-13  9:46         ` Jichao Zou
  -1 siblings, 0 replies; 14+ messages in thread
From: Jichao Zou @ 2021-08-13  9:46 UTC (permalink / raw)
  To: Robin Murphy, David Hildenbrand, akpm, linux-kernel, linux-mm,
	minchan, song.bao.hua, hch, m.szyprowski, iommu, JianQi Yang,
	Yanjune Tian

I got it, but in kernel that we used version, many heap drivers that in drivers/dma-buf/ are based on CMA, not DMA carveout!
If this patch is not accepted, we cancel it!!!

Thank you all.

Best Regards,

-----邮件原件-----
发件人: Robin Murphy <robin.murphy@arm.com> 
发送时间: 2021年8月13日 17:16
收件人: Jichao Zou <zoujc@motorola.com>; David Hildenbrand <david@redhat.com>; akpm@linux-foundation.org; linux-kernel@vger.kernel.org; linux-mm@kvack.org; minchan@kernel.org; song.bao.hua@hisilicon.com; hch@lst.de; m.szyprowski@samsung.com; iommu@lists.linux-foundation.org; JianQi Yang <yangj@motorola.com>; Yanjune Tian <tianyje@motorola.com>
主题: Re: 回复: [External]Re: An cma optimization patch is used for cma_[alloc|free].

On 2021-08-13 09:27, Jichao Zou wrote:
> Hi David,
> 	I'll git-send-email patch again.
> 	Your understanding is exactly right.
> 	Let me explain the background of Patch, we are developing Android phone, kernel is 5.10.43 LTS, we encounter cma_alloc failed during kernel startup, buddy system is ready,
> 01-11 14:22:08.650   216   216 E cma     : cma_alloc([216][init]:cma(ffffffff00b50000:total 8192) linux,cma(ffffffe89d084cf0), count 2, align 1 gfp_mask 0xcc0)
> 01-11 14:22:08.650   216   216 E cma     : cma_alloc(): memory range at ffffffff00b62880 is busy, retrying
>   
> 	cma bitmap show memory is free, but alloc_contig_range failed, we checked it out that some drivers cma_alloc are
> 	"struct page *cma_alloc(struct cma *cma, size_t count, unsigned int align, bool no_warn)"
> 	In 5.10.43, cma_alloc is
> 	"struct page *cma_alloc(struct cma *cma, size_t count, unsigned int align, gfp_t gfp_mask)"
>   	After change cma_alloc parameter with GFP_KERNEL, issue is fixed, at the same time, we found that preallocate a portion of cma memory for audio&video resulted in better performance and guarantee AV function even under memory pressure, so we try to submit this patch.

The whole point of CMA is that the memory can be shared by moveable pages while it's not being used for DMA. If you want a dedicated DMA carveout, there are already mechanisms for that.

Robin.

> 
> Thanks.
> 
> Best Regards,
> 
> Zou Jichao 邹纪超
> Advisory Engineer, SW BSP
> MBG ROW SW BJ PF BSP (CN)
> Motorola Mobility, A Lenovo Company
> motorola.com
> M +86 18910860212
> E zoujc@lenovo.com
> twitter | facebook | instagram | blog | forums
> 
> 
> 
> 
> -----邮件原件-----
> 发件人: David Hildenbrand <david@redhat.com>
> 发送时间: 2021年8月13日 15:45
> 收件人: Jichao Zou <zoujc@motorola.com>; akpm@linux-foundation.org; 
> linux-kernel@vger.kernel.org; linux-mm@kvack.org; minchan@kernel.org; 
> song.bao.hua@hisilicon.com; hch@lst.de; m.szyprowski@samsung.com; 
> robin.murphy@arm.com; iommu@lists.linux-foundation.org; JianQi Yang 
> <yangj@motorola.com>; Yanjune Tian <tianyje@motorola.com>
> 主题: [External]Re: An cma optimization patch is used for cma_[alloc|free].
> 
> On 13.08.21 09:00, Jichao Zou wrote:
>> Pre-allocate CMA memory that configured in device tree, this greatly 
>> improves the CMA memory allocation efficiency, cma_[alloc|free] is 
>> less than 1ms, old way is took a few ms to tens or hundreds ms.
>>
> 
> Please send patches as proper emails (man git-format-patch; man git-send-email).
> 
> What you propose is turning cma reservations into something comparable to permanent boottime allocations. From the POV of the buddy, the pages are always allocated and cannot be repurposed for e.g., movable allocations until *actually* allocated via CMA.
> 
> I don't think we want this behavior upstream.
> 
> --
> Thanks,
> 
> David / dhildenb
> 

^ permalink raw reply	[flat|nested] 14+ messages in thread

* 回复: 回复: [External]Re: An cma optimization patch is used for cma_[alloc|free].
@ 2021-08-13  9:46         ` Jichao Zou
  0 siblings, 0 replies; 14+ messages in thread
From: Jichao Zou @ 2021-08-13  9:46 UTC (permalink / raw)
  To: Robin Murphy, David Hildenbrand, akpm, linux-kernel, linux-mm,
	minchan, song.bao.hua, hch, m.szyprowski, iommu, JianQi Yang,
	Yanjune Tian

I got it, but in kernel that we used version, many heap drivers that in drivers/dma-buf/ are based on CMA, not DMA carveout!
If this patch is not accepted, we cancel it!!!

Thank you all.

Best Regards,

-----邮件原件-----
发件人: Robin Murphy <robin.murphy@arm.com> 
发送时间: 2021年8月13日 17:16
收件人: Jichao Zou <zoujc@motorola.com>; David Hildenbrand <david@redhat.com>; akpm@linux-foundation.org; linux-kernel@vger.kernel.org; linux-mm@kvack.org; minchan@kernel.org; song.bao.hua@hisilicon.com; hch@lst.de; m.szyprowski@samsung.com; iommu@lists.linux-foundation.org; JianQi Yang <yangj@motorola.com>; Yanjune Tian <tianyje@motorola.com>
主题: Re: 回复: [External]Re: An cma optimization patch is used for cma_[alloc|free].

On 2021-08-13 09:27, Jichao Zou wrote:
> Hi David,
> 	I'll git-send-email patch again.
> 	Your understanding is exactly right.
> 	Let me explain the background of Patch, we are developing Android phone, kernel is 5.10.43 LTS, we encounter cma_alloc failed during kernel startup, buddy system is ready,
> 01-11 14:22:08.650   216   216 E cma     : cma_alloc([216][init]:cma(ffffffff00b50000:total 8192) linux,cma(ffffffe89d084cf0), count 2, align 1 gfp_mask 0xcc0)
> 01-11 14:22:08.650   216   216 E cma     : cma_alloc(): memory range at ffffffff00b62880 is busy, retrying
>   
> 	cma bitmap show memory is free, but alloc_contig_range failed, we checked it out that some drivers cma_alloc are
> 	"struct page *cma_alloc(struct cma *cma, size_t count, unsigned int align, bool no_warn)"
> 	In 5.10.43, cma_alloc is
> 	"struct page *cma_alloc(struct cma *cma, size_t count, unsigned int align, gfp_t gfp_mask)"
>   	After change cma_alloc parameter with GFP_KERNEL, issue is fixed, at the same time, we found that preallocate a portion of cma memory for audio&video resulted in better performance and guarantee AV function even under memory pressure, so we try to submit this patch.

The whole point of CMA is that the memory can be shared by moveable pages while it's not being used for DMA. If you want a dedicated DMA carveout, there are already mechanisms for that.

Robin.

> 
> Thanks.
> 
> Best Regards,
> 
> Zou Jichao 邹纪超
> Advisory Engineer, SW BSP
> MBG ROW SW BJ PF BSP (CN)
> Motorola Mobility, A Lenovo Company
> motorola.com
> M +86 18910860212
> E zoujc@lenovo.com
> twitter | facebook | instagram | blog | forums
> 
> 
> 
> 
> -----邮件原件-----
> 发件人: David Hildenbrand <david@redhat.com>
> 发送时间: 2021年8月13日 15:45
> 收件人: Jichao Zou <zoujc@motorola.com>; akpm@linux-foundation.org; 
> linux-kernel@vger.kernel.org; linux-mm@kvack.org; minchan@kernel.org; 
> song.bao.hua@hisilicon.com; hch@lst.de; m.szyprowski@samsung.com; 
> robin.murphy@arm.com; iommu@lists.linux-foundation.org; JianQi Yang 
> <yangj@motorola.com>; Yanjune Tian <tianyje@motorola.com>
> 主题: [External]Re: An cma optimization patch is used for cma_[alloc|free].
> 
> On 13.08.21 09:00, Jichao Zou wrote:
>> Pre-allocate CMA memory that configured in device tree, this greatly 
>> improves the CMA memory allocation efficiency, cma_[alloc|free] is 
>> less than 1ms, old way is took a few ms to tens or hundreds ms.
>>
> 
> Please send patches as proper emails (man git-format-patch; man git-send-email).
> 
> What you propose is turning cma reservations into something comparable to permanent boottime allocations. From the POV of the buddy, the pages are always allocated and cannot be repurposed for e.g., movable allocations until *actually* allocated via CMA.
> 
> I don't think we want this behavior upstream.
> 
> --
> Thanks,
> 
> David / dhildenb
> 
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: 回复: 回复: [External]Re: An cma optimization patch is used for cma_[alloc|free].
  2021-08-13  9:46         ` Jichao Zou
@ 2021-08-13 10:08           ` Robin Murphy
  -1 siblings, 0 replies; 14+ messages in thread
From: Robin Murphy @ 2021-08-13 10:08 UTC (permalink / raw)
  To: Jichao Zou, David Hildenbrand, akpm, linux-kernel, linux-mm,
	minchan, song.bao.hua, hch, m.szyprowski, iommu, JianQi Yang,
	Yanjune Tian

On 2021-08-13 10:46, Jichao Zou wrote:
> I got it, but in kernel that we used version, many heap drivers that in drivers/dma-buf/ are based on CMA, not DMA carveout!
> If this patch is not accepted, we cancel it!!!

If you just want dma_alloc_coherent() to work automatically from a 
carveout in the same manner as CMA, without having to stick 
of_reserved_mem_device_init() calls everywhere to make drivers aware of 
per-device carveouts, then [1] is probably what you want.

If it's specifically dma-buf heaps that you're interested in, then 
hacking the common CMA code to make the CMA heap behave like a carveout 
heap is definitely the wrong approach - just implement a carveout heap 
properly. It seems the only reason that hasn't ported over from ION is 
that nobody's needed it yet[2].

Robin.

[1] https://lore.kernel.org/linux-iommu/20210712061704.4162464-1-hch@lst.de/
[2] https://lwn.net/Articles/801230/

> 
> Thank you all.
> 
> Best Regards,
> 
> -----邮件原件-----
> 发件人: Robin Murphy <robin.murphy@arm.com>
> 发送时间: 2021年8月13日 17:16
> 收件人: Jichao Zou <zoujc@motorola.com>; David Hildenbrand <david@redhat.com>; akpm@linux-foundation.org; linux-kernel@vger.kernel.org; linux-mm@kvack.org; minchan@kernel.org; song.bao.hua@hisilicon.com; hch@lst.de; m.szyprowski@samsung.com; iommu@lists.linux-foundation.org; JianQi Yang <yangj@motorola.com>; Yanjune Tian <tianyje@motorola.com>
> 主题: Re: 回复: [External]Re: An cma optimization patch is used for cma_[alloc|free].
> 
> On 2021-08-13 09:27, Jichao Zou wrote:
>> Hi David,
>> 	I'll git-send-email patch again.
>> 	Your understanding is exactly right.
>> 	Let me explain the background of Patch, we are developing Android phone, kernel is 5.10.43 LTS, we encounter cma_alloc failed during kernel startup, buddy system is ready,
>> 01-11 14:22:08.650   216   216 E cma     : cma_alloc([216][init]:cma(ffffffff00b50000:total 8192) linux,cma(ffffffe89d084cf0), count 2, align 1 gfp_mask 0xcc0)
>> 01-11 14:22:08.650   216   216 E cma     : cma_alloc(): memory range at ffffffff00b62880 is busy, retrying
>>    
>> 	cma bitmap show memory is free, but alloc_contig_range failed, we checked it out that some drivers cma_alloc are
>> 	"struct page *cma_alloc(struct cma *cma, size_t count, unsigned int align, bool no_warn)"
>> 	In 5.10.43, cma_alloc is
>> 	"struct page *cma_alloc(struct cma *cma, size_t count, unsigned int align, gfp_t gfp_mask)"
>>    	After change cma_alloc parameter with GFP_KERNEL, issue is fixed, at the same time, we found that preallocate a portion of cma memory for audio&video resulted in better performance and guarantee AV function even under memory pressure, so we try to submit this patch.
> 
> The whole point of CMA is that the memory can be shared by moveable pages while it's not being used for DMA. If you want a dedicated DMA carveout, there are already mechanisms for that.
> 
> Robin.
> 
>>
>> Thanks.
>>
>> Best Regards,
>>
>> Zou Jichao 邹纪超
>> Advisory Engineer, SW BSP
>> MBG ROW SW BJ PF BSP (CN)
>> Motorola Mobility, A Lenovo Company
>> motorola.com
>> M +86 18910860212
>> E zoujc@lenovo.com
>> twitter | facebook | instagram | blog | forums
>>
>>
>>
>>
>> -----邮件原件-----
>> 发件人: David Hildenbrand <david@redhat.com>
>> 发送时间: 2021年8月13日 15:45
>> 收件人: Jichao Zou <zoujc@motorola.com>; akpm@linux-foundation.org;
>> linux-kernel@vger.kernel.org; linux-mm@kvack.org; minchan@kernel.org;
>> song.bao.hua@hisilicon.com; hch@lst.de; m.szyprowski@samsung.com;
>> robin.murphy@arm.com; iommu@lists.linux-foundation.org; JianQi Yang
>> <yangj@motorola.com>; Yanjune Tian <tianyje@motorola.com>
>> 主题: [External]Re: An cma optimization patch is used for cma_[alloc|free].
>>
>> On 13.08.21 09:00, Jichao Zou wrote:
>>> Pre-allocate CMA memory that configured in device tree, this greatly
>>> improves the CMA memory allocation efficiency, cma_[alloc|free] is
>>> less than 1ms, old way is took a few ms to tens or hundreds ms.
>>>
>>
>> Please send patches as proper emails (man git-format-patch; man git-send-email).
>>
>> What you propose is turning cma reservations into something comparable to permanent boottime allocations. From the POV of the buddy, the pages are always allocated and cannot be repurposed for e.g., movable allocations until *actually* allocated via CMA.
>>
>> I don't think we want this behavior upstream.
>>
>> --
>> Thanks,
>>
>> David / dhildenb
>>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: 回复: 回复: [External]Re: An cma optimization patch is used for cma_[alloc|free].
@ 2021-08-13 10:08           ` Robin Murphy
  0 siblings, 0 replies; 14+ messages in thread
From: Robin Murphy @ 2021-08-13 10:08 UTC (permalink / raw)
  To: Jichao Zou, David Hildenbrand, akpm, linux-kernel, linux-mm,
	minchan, song.bao.hua, hch, m.szyprowski, iommu, JianQi Yang,
	Yanjune Tian

On 2021-08-13 10:46, Jichao Zou wrote:
> I got it, but in kernel that we used version, many heap drivers that in drivers/dma-buf/ are based on CMA, not DMA carveout!
> If this patch is not accepted, we cancel it!!!

If you just want dma_alloc_coherent() to work automatically from a 
carveout in the same manner as CMA, without having to stick 
of_reserved_mem_device_init() calls everywhere to make drivers aware of 
per-device carveouts, then [1] is probably what you want.

If it's specifically dma-buf heaps that you're interested in, then 
hacking the common CMA code to make the CMA heap behave like a carveout 
heap is definitely the wrong approach - just implement a carveout heap 
properly. It seems the only reason that hasn't ported over from ION is 
that nobody's needed it yet[2].

Robin.

[1] https://lore.kernel.org/linux-iommu/20210712061704.4162464-1-hch@lst.de/
[2] https://lwn.net/Articles/801230/

> 
> Thank you all.
> 
> Best Regards,
> 
> -----邮件原件-----
> 发件人: Robin Murphy <robin.murphy@arm.com>
> 发送时间: 2021年8月13日 17:16
> 收件人: Jichao Zou <zoujc@motorola.com>; David Hildenbrand <david@redhat.com>; akpm@linux-foundation.org; linux-kernel@vger.kernel.org; linux-mm@kvack.org; minchan@kernel.org; song.bao.hua@hisilicon.com; hch@lst.de; m.szyprowski@samsung.com; iommu@lists.linux-foundation.org; JianQi Yang <yangj@motorola.com>; Yanjune Tian <tianyje@motorola.com>
> 主题: Re: 回复: [External]Re: An cma optimization patch is used for cma_[alloc|free].
> 
> On 2021-08-13 09:27, Jichao Zou wrote:
>> Hi David,
>> 	I'll git-send-email patch again.
>> 	Your understanding is exactly right.
>> 	Let me explain the background of Patch, we are developing Android phone, kernel is 5.10.43 LTS, we encounter cma_alloc failed during kernel startup, buddy system is ready,
>> 01-11 14:22:08.650   216   216 E cma     : cma_alloc([216][init]:cma(ffffffff00b50000:total 8192) linux,cma(ffffffe89d084cf0), count 2, align 1 gfp_mask 0xcc0)
>> 01-11 14:22:08.650   216   216 E cma     : cma_alloc(): memory range at ffffffff00b62880 is busy, retrying
>>    
>> 	cma bitmap show memory is free, but alloc_contig_range failed, we checked it out that some drivers cma_alloc are
>> 	"struct page *cma_alloc(struct cma *cma, size_t count, unsigned int align, bool no_warn)"
>> 	In 5.10.43, cma_alloc is
>> 	"struct page *cma_alloc(struct cma *cma, size_t count, unsigned int align, gfp_t gfp_mask)"
>>    	After change cma_alloc parameter with GFP_KERNEL, issue is fixed, at the same time, we found that preallocate a portion of cma memory for audio&video resulted in better performance and guarantee AV function even under memory pressure, so we try to submit this patch.
> 
> The whole point of CMA is that the memory can be shared by moveable pages while it's not being used for DMA. If you want a dedicated DMA carveout, there are already mechanisms for that.
> 
> Robin.
> 
>>
>> Thanks.
>>
>> Best Regards,
>>
>> Zou Jichao 邹纪超
>> Advisory Engineer, SW BSP
>> MBG ROW SW BJ PF BSP (CN)
>> Motorola Mobility, A Lenovo Company
>> motorola.com
>> M +86 18910860212
>> E zoujc@lenovo.com
>> twitter | facebook | instagram | blog | forums
>>
>>
>>
>>
>> -----邮件原件-----
>> 发件人: David Hildenbrand <david@redhat.com>
>> 发送时间: 2021年8月13日 15:45
>> 收件人: Jichao Zou <zoujc@motorola.com>; akpm@linux-foundation.org;
>> linux-kernel@vger.kernel.org; linux-mm@kvack.org; minchan@kernel.org;
>> song.bao.hua@hisilicon.com; hch@lst.de; m.szyprowski@samsung.com;
>> robin.murphy@arm.com; iommu@lists.linux-foundation.org; JianQi Yang
>> <yangj@motorola.com>; Yanjune Tian <tianyje@motorola.com>
>> 主题: [External]Re: An cma optimization patch is used for cma_[alloc|free].
>>
>> On 13.08.21 09:00, Jichao Zou wrote:
>>> Pre-allocate CMA memory that configured in device tree, this greatly
>>> improves the CMA memory allocation efficiency, cma_[alloc|free] is
>>> less than 1ms, old way is took a few ms to tens or hundreds ms.
>>>
>>
>> Please send patches as proper emails (man git-format-patch; man git-send-email).
>>
>> What you propose is turning cma reservations into something comparable to permanent boottime allocations. From the POV of the buddy, the pages are always allocated and cannot be repurposed for e.g., movable allocations until *actually* allocated via CMA.
>>
>> I don't think we want this behavior upstream.
>>
>> --
>> Thanks,
>>
>> David / dhildenb
>>
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply	[flat|nested] 14+ messages in thread

* 回复: 回复: 回复: [External]Re: An cma optimization patch is used for cma_[alloc|free].
  2021-08-13 10:08           ` Robin Murphy
@ 2021-08-13 11:26             ` Jichao Zou
  -1 siblings, 0 replies; 14+ messages in thread
From: Jichao Zou @ 2021-08-13 11:26 UTC (permalink / raw)
  To: Robin Murphy, David Hildenbrand, akpm, linux-kernel, linux-mm,
	minchan, song.bao.hua, hch, m.szyprowski, iommu, JianQi Yang,
	Yanjune Tian

Hi Robin, David
	Thank you for your guidance very much, we'll go ahead and see if some heap drivers can use carveout heap instead.

Thanks.

Best Regards,

-----邮件原件-----
发件人: Robin Murphy <robin.murphy@arm.com> 
发送时间: 2021年8月13日 18:08
收件人: Jichao Zou <zoujc@motorola.com>; David Hildenbrand <david@redhat.com>; akpm@linux-foundation.org; linux-kernel@vger.kernel.org; linux-mm@kvack.org; minchan@kernel.org; song.bao.hua@hisilicon.com; hch@lst.de; m.szyprowski@samsung.com; iommu@lists.linux-foundation.org; JianQi Yang <yangj@motorola.com>; Yanjune Tian <tianyje@motorola.com>
主题: Re: 回复: 回复: [External]Re: An cma optimization patch is used for cma_[alloc|free].

On 2021-08-13 10:46, Jichao Zou wrote:
> I got it, but in kernel that we used version, many heap drivers that in drivers/dma-buf/ are based on CMA, not DMA carveout!
> If this patch is not accepted, we cancel it!!!

If you just want dma_alloc_coherent() to work automatically from a carveout in the same manner as CMA, without having to stick
of_reserved_mem_device_init() calls everywhere to make drivers aware of per-device carveouts, then [1] is probably what you want.

If it's specifically dma-buf heaps that you're interested in, then hacking the common CMA code to make the CMA heap behave like a carveout heap is definitely the wrong approach - just implement a carveout heap properly. It seems the only reason that hasn't ported over from ION is that nobody's needed it yet[2].

Robin.

[1] https://apc01.safelinks.protection.outlook.com/?url=https%3A%2F%2Flore.kernel.org%2Flinux-iommu%2F20210712061704.4162464-1-hch%40lst.de%2F&amp;data=04%7C01%7Czoujc%40motorola.com%7C9fca069e1915449946cb08d95e424546%7C5c7d0b28bdf8410caa934df372b16203%7C1%7C0%7C637644461066004509%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&amp;sdata=OXFP48WvObcBZX%2FwKWiPtQPNwzcWHxaKaIr5MAvaq5g%3D&amp;reserved=0
[2] https://apc01.safelinks.protection.outlook.com/?url=https%3A%2F%2Flwn.net%2FArticles%2F801230%2F&amp;data=04%7C01%7Czoujc%40motorola.com%7C9fca069e1915449946cb08d95e424546%7C5c7d0b28bdf8410caa934df372b16203%7C1%7C0%7C637644461066004509%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&amp;sdata=N7GnFf4zGMDPzfg1GGOoUvddBI4U4bp7Kib34arwkBI%3D&amp;reserved=0

> 
> Thank you all.
> 
> Best Regards,
> 
> -----邮件原件-----
> 发件人: Robin Murphy <robin.murphy@arm.com>
> 发送时间: 2021年8月13日 17:16
> 收件人: Jichao Zou <zoujc@motorola.com>; David Hildenbrand 
> <david@redhat.com>; akpm@linux-foundation.org; 
> linux-kernel@vger.kernel.org; linux-mm@kvack.org; minchan@kernel.org; 
> song.bao.hua@hisilicon.com; hch@lst.de; m.szyprowski@samsung.com; 
> iommu@lists.linux-foundation.org; JianQi Yang <yangj@motorola.com>; 
> Yanjune Tian <tianyje@motorola.com>
> 主题: Re: 回复: [External]Re: An cma optimization patch is used for cma_[alloc|free].
> 
> On 2021-08-13 09:27, Jichao Zou wrote:
>> Hi David,
>> 	I'll git-send-email patch again.
>> 	Your understanding is exactly right.
>> 	Let me explain the background of Patch, we are developing Android phone, kernel is 5.10.43 LTS, we encounter cma_alloc failed during kernel startup, buddy system is ready,
>> 01-11 14:22:08.650   216   216 E cma     : cma_alloc([216][init]:cma(ffffffff00b50000:total 8192) linux,cma(ffffffe89d084cf0), count 2, align 1 gfp_mask 0xcc0)
>> 01-11 14:22:08.650   216   216 E cma     : cma_alloc(): memory range at ffffffff00b62880 is busy, retrying
>>    
>> 	cma bitmap show memory is free, but alloc_contig_range failed, we checked it out that some drivers cma_alloc are
>> 	"struct page *cma_alloc(struct cma *cma, size_t count, unsigned int align, bool no_warn)"
>> 	In 5.10.43, cma_alloc is
>> 	"struct page *cma_alloc(struct cma *cma, size_t count, unsigned int align, gfp_t gfp_mask)"
>>    	After change cma_alloc parameter with GFP_KERNEL, issue is fixed, at the same time, we found that preallocate a portion of cma memory for audio&video resulted in better performance and guarantee AV function even under memory pressure, so we try to submit this patch.
> 
> The whole point of CMA is that the memory can be shared by moveable pages while it's not being used for DMA. If you want a dedicated DMA carveout, there are already mechanisms for that.
> 
> Robin.
> 
>>
>> Thanks.
>>
>> Best Regards,
>>
>> Zou Jichao 邹纪超
>> Advisory Engineer, SW BSP
>> MBG ROW SW BJ PF BSP (CN)
>> Motorola Mobility, A Lenovo Company
>> motorola.com
>> M +86 18910860212
>> E zoujc@lenovo.com
>> twitter | facebook | instagram | blog | forums
>>
>>
>>
>>
>> -----邮件原件-----
>> 发件人: David Hildenbrand <david@redhat.com>
>> 发送时间: 2021年8月13日 15:45
>> 收件人: Jichao Zou <zoujc@motorola.com>; akpm@linux-foundation.org; 
>> linux-kernel@vger.kernel.org; linux-mm@kvack.org; minchan@kernel.org; 
>> song.bao.hua@hisilicon.com; hch@lst.de; m.szyprowski@samsung.com; 
>> robin.murphy@arm.com; iommu@lists.linux-foundation.org; JianQi Yang 
>> <yangj@motorola.com>; Yanjune Tian <tianyje@motorola.com>
>> 主题: [External]Re: An cma optimization patch is used for cma_[alloc|free].
>>
>> On 13.08.21 09:00, Jichao Zou wrote:
>>> Pre-allocate CMA memory that configured in device tree, this greatly 
>>> improves the CMA memory allocation efficiency, cma_[alloc|free] is 
>>> less than 1ms, old way is took a few ms to tens or hundreds ms.
>>>
>>
>> Please send patches as proper emails (man git-format-patch; man git-send-email).
>>
>> What you propose is turning cma reservations into something comparable to permanent boottime allocations. From the POV of the buddy, the pages are always allocated and cannot be repurposed for e.g., movable allocations until *actually* allocated via CMA.
>>
>> I don't think we want this behavior upstream.
>>
>> --
>> Thanks,
>>
>> David / dhildenb
>>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* 回复: 回复: 回复: [External]Re: An cma optimization patch is used for cma_[alloc|free].
@ 2021-08-13 11:26             ` Jichao Zou
  0 siblings, 0 replies; 14+ messages in thread
From: Jichao Zou @ 2021-08-13 11:26 UTC (permalink / raw)
  To: Robin Murphy, David Hildenbrand, akpm, linux-kernel, linux-mm,
	minchan, song.bao.hua, hch, m.szyprowski, iommu, JianQi Yang,
	Yanjune Tian

Hi Robin, David
	Thank you for your guidance very much, we'll go ahead and see if some heap drivers can use carveout heap instead.

Thanks.

Best Regards,

-----邮件原件-----
发件人: Robin Murphy <robin.murphy@arm.com> 
发送时间: 2021年8月13日 18:08
收件人: Jichao Zou <zoujc@motorola.com>; David Hildenbrand <david@redhat.com>; akpm@linux-foundation.org; linux-kernel@vger.kernel.org; linux-mm@kvack.org; minchan@kernel.org; song.bao.hua@hisilicon.com; hch@lst.de; m.szyprowski@samsung.com; iommu@lists.linux-foundation.org; JianQi Yang <yangj@motorola.com>; Yanjune Tian <tianyje@motorola.com>
主题: Re: 回复: 回复: [External]Re: An cma optimization patch is used for cma_[alloc|free].

On 2021-08-13 10:46, Jichao Zou wrote:
> I got it, but in kernel that we used version, many heap drivers that in drivers/dma-buf/ are based on CMA, not DMA carveout!
> If this patch is not accepted, we cancel it!!!

If you just want dma_alloc_coherent() to work automatically from a carveout in the same manner as CMA, without having to stick
of_reserved_mem_device_init() calls everywhere to make drivers aware of per-device carveouts, then [1] is probably what you want.

If it's specifically dma-buf heaps that you're interested in, then hacking the common CMA code to make the CMA heap behave like a carveout heap is definitely the wrong approach - just implement a carveout heap properly. It seems the only reason that hasn't ported over from ION is that nobody's needed it yet[2].

Robin.

[1] https://apc01.safelinks.protection.outlook.com/?url=https%3A%2F%2Flore.kernel.org%2Flinux-iommu%2F20210712061704.4162464-1-hch%40lst.de%2F&amp;data=04%7C01%7Czoujc%40motorola.com%7C9fca069e1915449946cb08d95e424546%7C5c7d0b28bdf8410caa934df372b16203%7C1%7C0%7C637644461066004509%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&amp;sdata=OXFP48WvObcBZX%2FwKWiPtQPNwzcWHxaKaIr5MAvaq5g%3D&amp;reserved=0
[2] https://apc01.safelinks.protection.outlook.com/?url=https%3A%2F%2Flwn.net%2FArticles%2F801230%2F&amp;data=04%7C01%7Czoujc%40motorola.com%7C9fca069e1915449946cb08d95e424546%7C5c7d0b28bdf8410caa934df372b16203%7C1%7C0%7C637644461066004509%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&amp;sdata=N7GnFf4zGMDPzfg1GGOoUvddBI4U4bp7Kib34arwkBI%3D&amp;reserved=0

> 
> Thank you all.
> 
> Best Regards,
> 
> -----邮件原件-----
> 发件人: Robin Murphy <robin.murphy@arm.com>
> 发送时间: 2021年8月13日 17:16
> 收件人: Jichao Zou <zoujc@motorola.com>; David Hildenbrand 
> <david@redhat.com>; akpm@linux-foundation.org; 
> linux-kernel@vger.kernel.org; linux-mm@kvack.org; minchan@kernel.org; 
> song.bao.hua@hisilicon.com; hch@lst.de; m.szyprowski@samsung.com; 
> iommu@lists.linux-foundation.org; JianQi Yang <yangj@motorola.com>; 
> Yanjune Tian <tianyje@motorola.com>
> 主题: Re: 回复: [External]Re: An cma optimization patch is used for cma_[alloc|free].
> 
> On 2021-08-13 09:27, Jichao Zou wrote:
>> Hi David,
>> 	I'll git-send-email patch again.
>> 	Your understanding is exactly right.
>> 	Let me explain the background of Patch, we are developing Android phone, kernel is 5.10.43 LTS, we encounter cma_alloc failed during kernel startup, buddy system is ready,
>> 01-11 14:22:08.650   216   216 E cma     : cma_alloc([216][init]:cma(ffffffff00b50000:total 8192) linux,cma(ffffffe89d084cf0), count 2, align 1 gfp_mask 0xcc0)
>> 01-11 14:22:08.650   216   216 E cma     : cma_alloc(): memory range at ffffffff00b62880 is busy, retrying
>>    
>> 	cma bitmap show memory is free, but alloc_contig_range failed, we checked it out that some drivers cma_alloc are
>> 	"struct page *cma_alloc(struct cma *cma, size_t count, unsigned int align, bool no_warn)"
>> 	In 5.10.43, cma_alloc is
>> 	"struct page *cma_alloc(struct cma *cma, size_t count, unsigned int align, gfp_t gfp_mask)"
>>    	After change cma_alloc parameter with GFP_KERNEL, issue is fixed, at the same time, we found that preallocate a portion of cma memory for audio&video resulted in better performance and guarantee AV function even under memory pressure, so we try to submit this patch.
> 
> The whole point of CMA is that the memory can be shared by moveable pages while it's not being used for DMA. If you want a dedicated DMA carveout, there are already mechanisms for that.
> 
> Robin.
> 
>>
>> Thanks.
>>
>> Best Regards,
>>
>> Zou Jichao 邹纪超
>> Advisory Engineer, SW BSP
>> MBG ROW SW BJ PF BSP (CN)
>> Motorola Mobility, A Lenovo Company
>> motorola.com
>> M +86 18910860212
>> E zoujc@lenovo.com
>> twitter | facebook | instagram | blog | forums
>>
>>
>>
>>
>> -----邮件原件-----
>> 发件人: David Hildenbrand <david@redhat.com>
>> 发送时间: 2021年8月13日 15:45
>> 收件人: Jichao Zou <zoujc@motorola.com>; akpm@linux-foundation.org; 
>> linux-kernel@vger.kernel.org; linux-mm@kvack.org; minchan@kernel.org; 
>> song.bao.hua@hisilicon.com; hch@lst.de; m.szyprowski@samsung.com; 
>> robin.murphy@arm.com; iommu@lists.linux-foundation.org; JianQi Yang 
>> <yangj@motorola.com>; Yanjune Tian <tianyje@motorola.com>
>> 主题: [External]Re: An cma optimization patch is used for cma_[alloc|free].
>>
>> On 13.08.21 09:00, Jichao Zou wrote:
>>> Pre-allocate CMA memory that configured in device tree, this greatly 
>>> improves the CMA memory allocation efficiency, cma_[alloc|free] is 
>>> less than 1ms, old way is took a few ms to tens or hundreds ms.
>>>
>>
>> Please send patches as proper emails (man git-format-patch; man git-send-email).
>>
>> What you propose is turning cma reservations into something comparable to permanent boottime allocations. From the POV of the buddy, the pages are always allocated and cannot be repurposed for e.g., movable allocations until *actually* allocated via CMA.
>>
>> I don't think we want this behavior upstream.
>>
>> --
>> Thanks,
>>
>> David / dhildenb
>>
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2021-08-13 11:26 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-08-13  7:00 An cma optimization patch is used for cma_[alloc|free] Jichao Zou
2021-08-13  7:00 ` Jichao Zou
2021-08-13  7:45 ` David Hildenbrand
2021-08-13  7:45   ` David Hildenbrand
2021-08-13  8:27   ` 回复: [External]Re: " Jichao Zou
2021-08-13  8:27     ` Jichao Zou
2021-08-13  9:15     ` Robin Murphy
2021-08-13  9:15       ` Robin Murphy
2021-08-13  9:46       ` 回复: " Jichao Zou
2021-08-13  9:46         ` Jichao Zou
2021-08-13 10:08         ` Robin Murphy
2021-08-13 10:08           ` Robin Murphy
2021-08-13 11:26           ` 回复: " Jichao Zou
2021-08-13 11:26             ` Jichao Zou

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.