All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2] arm64: mm: convert __dma_* routines to use start, size
@ 2016-07-26  7:34 ` Kwangwoo Lee
  0 siblings, 0 replies; 20+ messages in thread
From: Kwangwoo Lee @ 2016-07-26  7:34 UTC (permalink / raw)
  To: Russell King - ARM Linux, Catalin Marinas, Will Deacon,
	Mark Rutland, linux-arm-kernel
  Cc: Kwangwoo Lee, Woosuk Chung, Hyunchul Kim, linux-kernel

v2)
change __dma_* routine names using the terminoloy guidance:
    area: takes a start and size
    range: takes a start and end
use __dma_flush_area() instead of __dma_flush_range() in dma-mapping.c

v1)
__dma_* routines have been converted to use start and size instread of
start and end addresses. The patch was origianlly for adding
__clean_dcache_area_poc() which will be used in pmem driver to clean
dcache to the PoC(Point of Coherency) in arch_wb_cache_pmem().

The functionality of __clean_dcache_area_poc()  was equivalent to
__dma_clean_range(). The difference was __dma_clean_range() uses the end
address, but __clean_dcache_area_poc() uses the size to clean.

Thus, __clean_dcache_area_poc() has been revised with a fall through
function of __dma_clean_range() after the change that __dma_* routines
use start and size instead of using start and end.

Signed-off-by: Kwangwoo Lee <kwangwoo.lee@sk.com>
---
 arch/arm64/include/asm/cacheflush.h |  3 +-
 arch/arm64/mm/cache.S               | 71 +++++++++++++++++++------------------
 arch/arm64/mm/dma-mapping.c         |  6 ++--
 3 files changed, 41 insertions(+), 39 deletions(-)

diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h
index c64268d..2e5fb97 100644
--- a/arch/arm64/include/asm/cacheflush.h
+++ b/arch/arm64/include/asm/cacheflush.h
@@ -68,6 +68,7 @@
 extern void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end);
 extern void flush_icache_range(unsigned long start, unsigned long end);
 extern void __flush_dcache_area(void *addr, size_t len);
+extern void __clean_dcache_area_poc(void *addr, size_t len);
 extern void __clean_dcache_area_pou(void *addr, size_t len);
 extern long __flush_cache_user_range(unsigned long start, unsigned long end);
 
@@ -85,7 +86,7 @@ static inline void flush_cache_page(struct vm_area_struct *vma,
  */
 extern void __dma_map_area(const void *, size_t, int);
 extern void __dma_unmap_area(const void *, size_t, int);
-extern void __dma_flush_range(const void *, const void *);
+extern void __dma_flush_area(const void *, size_t);
 
 /*
  * Copy user data from/to a page which is mapped into a different
diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S
index 50ff9ba..4415c1b 100644
--- a/arch/arm64/mm/cache.S
+++ b/arch/arm64/mm/cache.S
@@ -110,14 +110,16 @@ ENDPROC(__clean_dcache_area_pou)
  *	- end     - end address of region
  */
 ENTRY(__inval_cache_range)
+	sub	x1, x1, x0
 	/* FALLTHROUGH */
 
 /*
- *	__dma_inv_range(start, end)
+ *	__dma_inv_area(start, size)
  *	- start   - virtual start address of region
- *	- end     - virtual end address of region
+ *	- size    - size in question
  */
-__dma_inv_range:
+__dma_inv_area:
+	add	x1, x1, x0
 	dcache_line_size x2, x3
 	sub	x3, x2, #1
 	tst	x1, x3				// end cache line aligned?
@@ -136,46 +138,47 @@ __dma_inv_range:
 	dsb	sy
 	ret
 ENDPIPROC(__inval_cache_range)
-ENDPROC(__dma_inv_range)
+ENDPROC(__dma_inv_area)
+
+/*
+ *	__clean_dcache_area_poc(kaddr, size)
+ *
+ * 	Ensure that any D-cache lines for the interval [kaddr, kaddr+size)
+ * 	are cleaned to the PoC.
+ *
+ *	- kaddr   - kernel address
+ *	- size    - size in question
+ */
+ENTRY(__clean_dcache_area_poc)
+	/* FALLTHROUGH */
 
 /*
- *	__dma_clean_range(start, end)
+ *	__dma_clean_area(start, size)
  *	- start   - virtual start address of region
- *	- end     - virtual end address of region
+ *	- size    - size in question
  */
-__dma_clean_range:
-	dcache_line_size x2, x3
-	sub	x3, x2, #1
-	bic	x0, x0, x3
-1:
+__dma_clean_area:
 alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE
-	dc	cvac, x0
+	dcache_by_line_op cvac, sy, x0, x1, x2, x3
 alternative_else
-	dc	civac, x0
+	dcache_by_line_op civac, sy, x0, x1, x2, x3
 alternative_endif
-	add	x0, x0, x2
-	cmp	x0, x1
-	b.lo	1b
-	dsb	sy
 	ret
-ENDPROC(__dma_clean_range)
+ENDPIPROC(__clean_dcache_area_poc)
+ENDPROC(__dma_clean_area)
 
 /*
- *	__dma_flush_range(start, end)
+ *	__dma_flush_area(start, size)
+ *
+ *	clean & invalidate D / U line
+ *
  *	- start   - virtual start address of region
- *	- end     - virtual end address of region
+ *	- size    - size in question
  */
-ENTRY(__dma_flush_range)
-	dcache_line_size x2, x3
-	sub	x3, x2, #1
-	bic	x0, x0, x3
-1:	dc	civac, x0			// clean & invalidate D / U line
-	add	x0, x0, x2
-	cmp	x0, x1
-	b.lo	1b
-	dsb	sy
+ENTRY(__dma_flush_area)
+	dcache_by_line_op civac, sy, x0, x1, x2, x3
 	ret
-ENDPIPROC(__dma_flush_range)
+ENDPIPROC(__dma_flush_area)
 
 /*
  *	__dma_map_area(start, size, dir)
@@ -184,10 +187,9 @@ ENDPIPROC(__dma_flush_range)
  *	- dir	- DMA direction
  */
 ENTRY(__dma_map_area)
-	add	x1, x1, x0
 	cmp	w2, #DMA_FROM_DEVICE
-	b.eq	__dma_inv_range
-	b	__dma_clean_range
+	b.eq	__dma_inv_area
+	b	__dma_clean_area
 ENDPIPROC(__dma_map_area)
 
 /*
@@ -197,8 +199,7 @@ ENDPIPROC(__dma_map_area)
  *	- dir	- DMA direction
  */
 ENTRY(__dma_unmap_area)
-	add	x1, x1, x0
 	cmp	w2, #DMA_TO_DEVICE
-	b.ne	__dma_inv_range
+	b.ne	__dma_inv_area
 	ret
 ENDPIPROC(__dma_unmap_area)
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index c566ec8..e0b0712 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -165,7 +165,7 @@ static void *__dma_alloc(struct device *dev, size_t size,
 		return ptr;
 
 	/* remove any dirty cache lines on the kernel alias */
-	__dma_flush_range(ptr, ptr + size);
+	__dma_flush_area(ptr, size);
 
 	/* create a coherent mapping */
 	page = virt_to_page(ptr);
@@ -377,7 +377,7 @@ static int __init atomic_pool_init(void)
 		void *page_addr = page_address(page);
 
 		memset(page_addr, 0, atomic_pool_size);
-		__dma_flush_range(page_addr, page_addr + atomic_pool_size);
+		__dma_flush_area(page_addr, atomic_pool_size);
 
 		atomic_pool = gen_pool_create(PAGE_SHIFT, -1);
 		if (!atomic_pool)
@@ -535,7 +535,7 @@ fs_initcall(dma_debug_do_init);
 /* Thankfully, all cache ops are by VA so we can ignore phys here */
 static void flush_page(struct device *dev, const void *virt, phys_addr_t phys)
 {
-	__dma_flush_range(virt, virt + PAGE_SIZE);
+	__dma_flush_area(virt, PAGE_SIZE);
 }
 
 static void *__iommu_alloc_attrs(struct device *dev, size_t size,
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH v2] arm64: mm: convert __dma_* routines to use start, size
@ 2016-07-26  7:34 ` Kwangwoo Lee
  0 siblings, 0 replies; 20+ messages in thread
From: Kwangwoo Lee @ 2016-07-26  7:34 UTC (permalink / raw)
  To: linux-arm-kernel

v2)
change __dma_* routine names using the terminoloy guidance:
    area: takes a start and size
    range: takes a start and end
use __dma_flush_area() instead of __dma_flush_range() in dma-mapping.c

v1)
__dma_* routines have been converted to use start and size instread of
start and end addresses. The patch was origianlly for adding
__clean_dcache_area_poc() which will be used in pmem driver to clean
dcache to the PoC(Point of Coherency) in arch_wb_cache_pmem().

The functionality of __clean_dcache_area_poc()  was equivalent to
__dma_clean_range(). The difference was __dma_clean_range() uses the end
address, but __clean_dcache_area_poc() uses the size to clean.

Thus, __clean_dcache_area_poc() has been revised with a fall through
function of __dma_clean_range() after the change that __dma_* routines
use start and size instead of using start and end.

Signed-off-by: Kwangwoo Lee <kwangwoo.lee@sk.com>
---
 arch/arm64/include/asm/cacheflush.h |  3 +-
 arch/arm64/mm/cache.S               | 71 +++++++++++++++++++------------------
 arch/arm64/mm/dma-mapping.c         |  6 ++--
 3 files changed, 41 insertions(+), 39 deletions(-)

diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h
index c64268d..2e5fb97 100644
--- a/arch/arm64/include/asm/cacheflush.h
+++ b/arch/arm64/include/asm/cacheflush.h
@@ -68,6 +68,7 @@
 extern void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end);
 extern void flush_icache_range(unsigned long start, unsigned long end);
 extern void __flush_dcache_area(void *addr, size_t len);
+extern void __clean_dcache_area_poc(void *addr, size_t len);
 extern void __clean_dcache_area_pou(void *addr, size_t len);
 extern long __flush_cache_user_range(unsigned long start, unsigned long end);
 
@@ -85,7 +86,7 @@ static inline void flush_cache_page(struct vm_area_struct *vma,
  */
 extern void __dma_map_area(const void *, size_t, int);
 extern void __dma_unmap_area(const void *, size_t, int);
-extern void __dma_flush_range(const void *, const void *);
+extern void __dma_flush_area(const void *, size_t);
 
 /*
  * Copy user data from/to a page which is mapped into a different
diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S
index 50ff9ba..4415c1b 100644
--- a/arch/arm64/mm/cache.S
+++ b/arch/arm64/mm/cache.S
@@ -110,14 +110,16 @@ ENDPROC(__clean_dcache_area_pou)
  *	- end     - end address of region
  */
 ENTRY(__inval_cache_range)
+	sub	x1, x1, x0
 	/* FALLTHROUGH */
 
 /*
- *	__dma_inv_range(start, end)
+ *	__dma_inv_area(start, size)
  *	- start   - virtual start address of region
- *	- end     - virtual end address of region
+ *	- size    - size in question
  */
-__dma_inv_range:
+__dma_inv_area:
+	add	x1, x1, x0
 	dcache_line_size x2, x3
 	sub	x3, x2, #1
 	tst	x1, x3				// end cache line aligned?
@@ -136,46 +138,47 @@ __dma_inv_range:
 	dsb	sy
 	ret
 ENDPIPROC(__inval_cache_range)
-ENDPROC(__dma_inv_range)
+ENDPROC(__dma_inv_area)
+
+/*
+ *	__clean_dcache_area_poc(kaddr, size)
+ *
+ * 	Ensure that any D-cache lines for the interval [kaddr, kaddr+size)
+ * 	are cleaned to the PoC.
+ *
+ *	- kaddr   - kernel address
+ *	- size    - size in question
+ */
+ENTRY(__clean_dcache_area_poc)
+	/* FALLTHROUGH */
 
 /*
- *	__dma_clean_range(start, end)
+ *	__dma_clean_area(start, size)
  *	- start   - virtual start address of region
- *	- end     - virtual end address of region
+ *	- size    - size in question
  */
-__dma_clean_range:
-	dcache_line_size x2, x3
-	sub	x3, x2, #1
-	bic	x0, x0, x3
-1:
+__dma_clean_area:
 alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE
-	dc	cvac, x0
+	dcache_by_line_op cvac, sy, x0, x1, x2, x3
 alternative_else
-	dc	civac, x0
+	dcache_by_line_op civac, sy, x0, x1, x2, x3
 alternative_endif
-	add	x0, x0, x2
-	cmp	x0, x1
-	b.lo	1b
-	dsb	sy
 	ret
-ENDPROC(__dma_clean_range)
+ENDPIPROC(__clean_dcache_area_poc)
+ENDPROC(__dma_clean_area)
 
 /*
- *	__dma_flush_range(start, end)
+ *	__dma_flush_area(start, size)
+ *
+ *	clean & invalidate D / U line
+ *
  *	- start   - virtual start address of region
- *	- end     - virtual end address of region
+ *	- size    - size in question
  */
-ENTRY(__dma_flush_range)
-	dcache_line_size x2, x3
-	sub	x3, x2, #1
-	bic	x0, x0, x3
-1:	dc	civac, x0			// clean & invalidate D / U line
-	add	x0, x0, x2
-	cmp	x0, x1
-	b.lo	1b
-	dsb	sy
+ENTRY(__dma_flush_area)
+	dcache_by_line_op civac, sy, x0, x1, x2, x3
 	ret
-ENDPIPROC(__dma_flush_range)
+ENDPIPROC(__dma_flush_area)
 
 /*
  *	__dma_map_area(start, size, dir)
@@ -184,10 +187,9 @@ ENDPIPROC(__dma_flush_range)
  *	- dir	- DMA direction
  */
 ENTRY(__dma_map_area)
-	add	x1, x1, x0
 	cmp	w2, #DMA_FROM_DEVICE
-	b.eq	__dma_inv_range
-	b	__dma_clean_range
+	b.eq	__dma_inv_area
+	b	__dma_clean_area
 ENDPIPROC(__dma_map_area)
 
 /*
@@ -197,8 +199,7 @@ ENDPIPROC(__dma_map_area)
  *	- dir	- DMA direction
  */
 ENTRY(__dma_unmap_area)
-	add	x1, x1, x0
 	cmp	w2, #DMA_TO_DEVICE
-	b.ne	__dma_inv_range
+	b.ne	__dma_inv_area
 	ret
 ENDPIPROC(__dma_unmap_area)
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index c566ec8..e0b0712 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -165,7 +165,7 @@ static void *__dma_alloc(struct device *dev, size_t size,
 		return ptr;
 
 	/* remove any dirty cache lines on the kernel alias */
-	__dma_flush_range(ptr, ptr + size);
+	__dma_flush_area(ptr, size);
 
 	/* create a coherent mapping */
 	page = virt_to_page(ptr);
@@ -377,7 +377,7 @@ static int __init atomic_pool_init(void)
 		void *page_addr = page_address(page);
 
 		memset(page_addr, 0, atomic_pool_size);
-		__dma_flush_range(page_addr, page_addr + atomic_pool_size);
+		__dma_flush_area(page_addr, atomic_pool_size);
 
 		atomic_pool = gen_pool_create(PAGE_SHIFT, -1);
 		if (!atomic_pool)
@@ -535,7 +535,7 @@ fs_initcall(dma_debug_do_init);
 /* Thankfully, all cache ops are by VA so we can ignore phys here */
 static void flush_page(struct device *dev, const void *virt, phys_addr_t phys)
 {
-	__dma_flush_range(virt, virt + PAGE_SIZE);
+	__dma_flush_area(virt, PAGE_SIZE);
 }
 
 static void *__iommu_alloc_attrs(struct device *dev, size_t size,
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* Re: [PATCH v2] arm64: mm: convert __dma_* routines to use start, size
  2016-07-26  7:34 ` Kwangwoo Lee
@ 2016-07-26 10:43   ` Robin Murphy
  -1 siblings, 0 replies; 20+ messages in thread
From: Robin Murphy @ 2016-07-26 10:43 UTC (permalink / raw)
  To: Kwangwoo Lee, Russell King - ARM Linux, Catalin Marinas,
	Will Deacon, Mark Rutland, linux-arm-kernel
  Cc: Hyunchul Kim, linux-kernel, Woosuk Chung

On 26/07/16 08:34, Kwangwoo Lee wrote:
> v2)
> change __dma_* routine names using the terminoloy guidance:
>     area: takes a start and size
>     range: takes a start and end
> use __dma_flush_area() instead of __dma_flush_range() in dma-mapping.c
> 
> v1)
> __dma_* routines have been converted to use start and size instread of
> start and end addresses. The patch was origianlly for adding
> __clean_dcache_area_poc() which will be used in pmem driver to clean
> dcache to the PoC(Point of Coherency) in arch_wb_cache_pmem().
> 
> The functionality of __clean_dcache_area_poc()  was equivalent to
> __dma_clean_range(). The difference was __dma_clean_range() uses the end
> address, but __clean_dcache_area_poc() uses the size to clean.
> 
> Thus, __clean_dcache_area_poc() has been revised with a fall through
> function of __dma_clean_range() after the change that __dma_* routines
> use start and size instead of using start and end.
> 
> Signed-off-by: Kwangwoo Lee <kwangwoo.lee@sk.com>
> ---

Nit: the changelog relative to the previous posting wants to be here,
under the "---" separator; the commit message above should describe the
_current_ state of the patch, as that's all we'll really care about once
it's in the Git history.

>  arch/arm64/include/asm/cacheflush.h |  3 +-
>  arch/arm64/mm/cache.S               | 71 +++++++++++++++++++------------------
>  arch/arm64/mm/dma-mapping.c         |  6 ++--
>  3 files changed, 41 insertions(+), 39 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h
> index c64268d..2e5fb97 100644
> --- a/arch/arm64/include/asm/cacheflush.h
> +++ b/arch/arm64/include/asm/cacheflush.h
> @@ -68,6 +68,7 @@
>  extern void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end);
>  extern void flush_icache_range(unsigned long start, unsigned long end);
>  extern void __flush_dcache_area(void *addr, size_t len);
> +extern void __clean_dcache_area_poc(void *addr, size_t len);
>  extern void __clean_dcache_area_pou(void *addr, size_t len);
>  extern long __flush_cache_user_range(unsigned long start, unsigned long end);
>  
> @@ -85,7 +86,7 @@ static inline void flush_cache_page(struct vm_area_struct *vma,
>   */
>  extern void __dma_map_area(const void *, size_t, int);
>  extern void __dma_unmap_area(const void *, size_t, int);
> -extern void __dma_flush_range(const void *, const void *);
> +extern void __dma_flush_area(const void *, size_t);
>  
>  /*
>   * Copy user data from/to a page which is mapped into a different
> diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S
> index 50ff9ba..4415c1b 100644
> --- a/arch/arm64/mm/cache.S
> +++ b/arch/arm64/mm/cache.S
> @@ -110,14 +110,16 @@ ENDPROC(__clean_dcache_area_pou)
>   *	- end     - end address of region
>   */
>  ENTRY(__inval_cache_range)
> +	sub	x1, x1, x0

Rather than doing this, I think it would be more sensible to simply swap
the entry points.

>  	/* FALLTHROUGH */
>  
>  /*
> - *	__dma_inv_range(start, end)
> + *	__dma_inv_area(start, size)
>   *	- start   - virtual start address of region
> - *	- end     - virtual end address of region
> + *	- size    - size in question
>   */
> -__dma_inv_range:
> +__dma_inv_area:
> +	add	x1, x1, x0
>  	dcache_line_size x2, x3
>  	sub	x3, x2, #1
>  	tst	x1, x3				// end cache line aligned?
> @@ -136,46 +138,47 @@ __dma_inv_range:
>  	dsb	sy
>  	ret
>  ENDPIPROC(__inval_cache_range)
> -ENDPROC(__dma_inv_range)
> +ENDPROC(__dma_inv_area)
> +
> +/*
> + *	__clean_dcache_area_poc(kaddr, size)
> + *
> + * 	Ensure that any D-cache lines for the interval [kaddr, kaddr+size)
> + * 	are cleaned to the PoC.
> + *
> + *	- kaddr   - kernel address
> + *	- size    - size in question
> + */
> +ENTRY(__clean_dcache_area_poc)
> +	/* FALLTHROUGH */
>  
>  /*
> - *	__dma_clean_range(start, end)
> + *	__dma_clean_area(start, size)
>   *	- start   - virtual start address of region
> - *	- end     - virtual end address of region
> + *	- size    - size in question
>   */
> -__dma_clean_range:
> -	dcache_line_size x2, x3
> -	sub	x3, x2, #1
> -	bic	x0, x0, x3
> -1:
> +__dma_clean_area:
>  alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE
> -	dc	cvac, x0
> +	dcache_by_line_op cvac, sy, x0, x1, x2, x3
>  alternative_else
> -	dc	civac, x0
> +	dcache_by_line_op civac, sy, x0, x1, x2, x3

dcache_by_line_op is a relatively large macro - is there any way we can
still apply the alternative to just the one instruction which needs it,
as opposed to having to patch the entire mostly-identical routine?

Robin.

>  alternative_endif
> -	add	x0, x0, x2
> -	cmp	x0, x1
> -	b.lo	1b
> -	dsb	sy
>  	ret
> -ENDPROC(__dma_clean_range)
> +ENDPIPROC(__clean_dcache_area_poc)
> +ENDPROC(__dma_clean_area)
>  
>  /*
> - *	__dma_flush_range(start, end)
> + *	__dma_flush_area(start, size)
> + *
> + *	clean & invalidate D / U line
> + *
>   *	- start   - virtual start address of region
> - *	- end     - virtual end address of region
> + *	- size    - size in question
>   */
> -ENTRY(__dma_flush_range)
> -	dcache_line_size x2, x3
> -	sub	x3, x2, #1
> -	bic	x0, x0, x3
> -1:	dc	civac, x0			// clean & invalidate D / U line
> -	add	x0, x0, x2
> -	cmp	x0, x1
> -	b.lo	1b
> -	dsb	sy
> +ENTRY(__dma_flush_area)
> +	dcache_by_line_op civac, sy, x0, x1, x2, x3
>  	ret
> -ENDPIPROC(__dma_flush_range)
> +ENDPIPROC(__dma_flush_area)
>  
>  /*
>   *	__dma_map_area(start, size, dir)
> @@ -184,10 +187,9 @@ ENDPIPROC(__dma_flush_range)
>   *	- dir	- DMA direction
>   */
>  ENTRY(__dma_map_area)
> -	add	x1, x1, x0
>  	cmp	w2, #DMA_FROM_DEVICE
> -	b.eq	__dma_inv_range
> -	b	__dma_clean_range
> +	b.eq	__dma_inv_area
> +	b	__dma_clean_area
>  ENDPIPROC(__dma_map_area)
>  
>  /*
> @@ -197,8 +199,7 @@ ENDPIPROC(__dma_map_area)
>   *	- dir	- DMA direction
>   */
>  ENTRY(__dma_unmap_area)
> -	add	x1, x1, x0
>  	cmp	w2, #DMA_TO_DEVICE
> -	b.ne	__dma_inv_range
> +	b.ne	__dma_inv_area
>  	ret
>  ENDPIPROC(__dma_unmap_area)
> diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
> index c566ec8..e0b0712 100644
> --- a/arch/arm64/mm/dma-mapping.c
> +++ b/arch/arm64/mm/dma-mapping.c
> @@ -165,7 +165,7 @@ static void *__dma_alloc(struct device *dev, size_t size,
>  		return ptr;
>  
>  	/* remove any dirty cache lines on the kernel alias */
> -	__dma_flush_range(ptr, ptr + size);
> +	__dma_flush_area(ptr, size);
>  
>  	/* create a coherent mapping */
>  	page = virt_to_page(ptr);
> @@ -377,7 +377,7 @@ static int __init atomic_pool_init(void)
>  		void *page_addr = page_address(page);
>  
>  		memset(page_addr, 0, atomic_pool_size);
> -		__dma_flush_range(page_addr, page_addr + atomic_pool_size);
> +		__dma_flush_area(page_addr, atomic_pool_size);
>  
>  		atomic_pool = gen_pool_create(PAGE_SHIFT, -1);
>  		if (!atomic_pool)
> @@ -535,7 +535,7 @@ fs_initcall(dma_debug_do_init);
>  /* Thankfully, all cache ops are by VA so we can ignore phys here */
>  static void flush_page(struct device *dev, const void *virt, phys_addr_t phys)
>  {
> -	__dma_flush_range(virt, virt + PAGE_SIZE);
> +	__dma_flush_area(virt, PAGE_SIZE);
>  }
>  
>  static void *__iommu_alloc_attrs(struct device *dev, size_t size,
> 

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH v2] arm64: mm: convert __dma_* routines to use start, size
@ 2016-07-26 10:43   ` Robin Murphy
  0 siblings, 0 replies; 20+ messages in thread
From: Robin Murphy @ 2016-07-26 10:43 UTC (permalink / raw)
  To: linux-arm-kernel

On 26/07/16 08:34, Kwangwoo Lee wrote:
> v2)
> change __dma_* routine names using the terminoloy guidance:
>     area: takes a start and size
>     range: takes a start and end
> use __dma_flush_area() instead of __dma_flush_range() in dma-mapping.c
> 
> v1)
> __dma_* routines have been converted to use start and size instread of
> start and end addresses. The patch was origianlly for adding
> __clean_dcache_area_poc() which will be used in pmem driver to clean
> dcache to the PoC(Point of Coherency) in arch_wb_cache_pmem().
> 
> The functionality of __clean_dcache_area_poc()  was equivalent to
> __dma_clean_range(). The difference was __dma_clean_range() uses the end
> address, but __clean_dcache_area_poc() uses the size to clean.
> 
> Thus, __clean_dcache_area_poc() has been revised with a fall through
> function of __dma_clean_range() after the change that __dma_* routines
> use start and size instead of using start and end.
> 
> Signed-off-by: Kwangwoo Lee <kwangwoo.lee@sk.com>
> ---

Nit: the changelog relative to the previous posting wants to be here,
under the "---" separator; the commit message above should describe the
_current_ state of the patch, as that's all we'll really care about once
it's in the Git history.

>  arch/arm64/include/asm/cacheflush.h |  3 +-
>  arch/arm64/mm/cache.S               | 71 +++++++++++++++++++------------------
>  arch/arm64/mm/dma-mapping.c         |  6 ++--
>  3 files changed, 41 insertions(+), 39 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h
> index c64268d..2e5fb97 100644
> --- a/arch/arm64/include/asm/cacheflush.h
> +++ b/arch/arm64/include/asm/cacheflush.h
> @@ -68,6 +68,7 @@
>  extern void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end);
>  extern void flush_icache_range(unsigned long start, unsigned long end);
>  extern void __flush_dcache_area(void *addr, size_t len);
> +extern void __clean_dcache_area_poc(void *addr, size_t len);
>  extern void __clean_dcache_area_pou(void *addr, size_t len);
>  extern long __flush_cache_user_range(unsigned long start, unsigned long end);
>  
> @@ -85,7 +86,7 @@ static inline void flush_cache_page(struct vm_area_struct *vma,
>   */
>  extern void __dma_map_area(const void *, size_t, int);
>  extern void __dma_unmap_area(const void *, size_t, int);
> -extern void __dma_flush_range(const void *, const void *);
> +extern void __dma_flush_area(const void *, size_t);
>  
>  /*
>   * Copy user data from/to a page which is mapped into a different
> diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S
> index 50ff9ba..4415c1b 100644
> --- a/arch/arm64/mm/cache.S
> +++ b/arch/arm64/mm/cache.S
> @@ -110,14 +110,16 @@ ENDPROC(__clean_dcache_area_pou)
>   *	- end     - end address of region
>   */
>  ENTRY(__inval_cache_range)
> +	sub	x1, x1, x0

Rather than doing this, I think it would be more sensible to simply swap
the entry points.

>  	/* FALLTHROUGH */
>  
>  /*
> - *	__dma_inv_range(start, end)
> + *	__dma_inv_area(start, size)
>   *	- start   - virtual start address of region
> - *	- end     - virtual end address of region
> + *	- size    - size in question
>   */
> -__dma_inv_range:
> +__dma_inv_area:
> +	add	x1, x1, x0
>  	dcache_line_size x2, x3
>  	sub	x3, x2, #1
>  	tst	x1, x3				// end cache line aligned?
> @@ -136,46 +138,47 @@ __dma_inv_range:
>  	dsb	sy
>  	ret
>  ENDPIPROC(__inval_cache_range)
> -ENDPROC(__dma_inv_range)
> +ENDPROC(__dma_inv_area)
> +
> +/*
> + *	__clean_dcache_area_poc(kaddr, size)
> + *
> + * 	Ensure that any D-cache lines for the interval [kaddr, kaddr+size)
> + * 	are cleaned to the PoC.
> + *
> + *	- kaddr   - kernel address
> + *	- size    - size in question
> + */
> +ENTRY(__clean_dcache_area_poc)
> +	/* FALLTHROUGH */
>  
>  /*
> - *	__dma_clean_range(start, end)
> + *	__dma_clean_area(start, size)
>   *	- start   - virtual start address of region
> - *	- end     - virtual end address of region
> + *	- size    - size in question
>   */
> -__dma_clean_range:
> -	dcache_line_size x2, x3
> -	sub	x3, x2, #1
> -	bic	x0, x0, x3
> -1:
> +__dma_clean_area:
>  alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE
> -	dc	cvac, x0
> +	dcache_by_line_op cvac, sy, x0, x1, x2, x3
>  alternative_else
> -	dc	civac, x0
> +	dcache_by_line_op civac, sy, x0, x1, x2, x3

dcache_by_line_op is a relatively large macro - is there any way we can
still apply the alternative to just the one instruction which needs it,
as opposed to having to patch the entire mostly-identical routine?

Robin.

>  alternative_endif
> -	add	x0, x0, x2
> -	cmp	x0, x1
> -	b.lo	1b
> -	dsb	sy
>  	ret
> -ENDPROC(__dma_clean_range)
> +ENDPIPROC(__clean_dcache_area_poc)
> +ENDPROC(__dma_clean_area)
>  
>  /*
> - *	__dma_flush_range(start, end)
> + *	__dma_flush_area(start, size)
> + *
> + *	clean & invalidate D / U line
> + *
>   *	- start   - virtual start address of region
> - *	- end     - virtual end address of region
> + *	- size    - size in question
>   */
> -ENTRY(__dma_flush_range)
> -	dcache_line_size x2, x3
> -	sub	x3, x2, #1
> -	bic	x0, x0, x3
> -1:	dc	civac, x0			// clean & invalidate D / U line
> -	add	x0, x0, x2
> -	cmp	x0, x1
> -	b.lo	1b
> -	dsb	sy
> +ENTRY(__dma_flush_area)
> +	dcache_by_line_op civac, sy, x0, x1, x2, x3
>  	ret
> -ENDPIPROC(__dma_flush_range)
> +ENDPIPROC(__dma_flush_area)
>  
>  /*
>   *	__dma_map_area(start, size, dir)
> @@ -184,10 +187,9 @@ ENDPIPROC(__dma_flush_range)
>   *	- dir	- DMA direction
>   */
>  ENTRY(__dma_map_area)
> -	add	x1, x1, x0
>  	cmp	w2, #DMA_FROM_DEVICE
> -	b.eq	__dma_inv_range
> -	b	__dma_clean_range
> +	b.eq	__dma_inv_area
> +	b	__dma_clean_area
>  ENDPIPROC(__dma_map_area)
>  
>  /*
> @@ -197,8 +199,7 @@ ENDPIPROC(__dma_map_area)
>   *	- dir	- DMA direction
>   */
>  ENTRY(__dma_unmap_area)
> -	add	x1, x1, x0
>  	cmp	w2, #DMA_TO_DEVICE
> -	b.ne	__dma_inv_range
> +	b.ne	__dma_inv_area
>  	ret
>  ENDPIPROC(__dma_unmap_area)
> diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
> index c566ec8..e0b0712 100644
> --- a/arch/arm64/mm/dma-mapping.c
> +++ b/arch/arm64/mm/dma-mapping.c
> @@ -165,7 +165,7 @@ static void *__dma_alloc(struct device *dev, size_t size,
>  		return ptr;
>  
>  	/* remove any dirty cache lines on the kernel alias */
> -	__dma_flush_range(ptr, ptr + size);
> +	__dma_flush_area(ptr, size);
>  
>  	/* create a coherent mapping */
>  	page = virt_to_page(ptr);
> @@ -377,7 +377,7 @@ static int __init atomic_pool_init(void)
>  		void *page_addr = page_address(page);
>  
>  		memset(page_addr, 0, atomic_pool_size);
> -		__dma_flush_range(page_addr, page_addr + atomic_pool_size);
> +		__dma_flush_area(page_addr, atomic_pool_size);
>  
>  		atomic_pool = gen_pool_create(PAGE_SHIFT, -1);
>  		if (!atomic_pool)
> @@ -535,7 +535,7 @@ fs_initcall(dma_debug_do_init);
>  /* Thankfully, all cache ops are by VA so we can ignore phys here */
>  static void flush_page(struct device *dev, const void *virt, phys_addr_t phys)
>  {
> -	__dma_flush_range(virt, virt + PAGE_SIZE);
> +	__dma_flush_area(virt, PAGE_SIZE);
>  }
>  
>  static void *__iommu_alloc_attrs(struct device *dev, size_t size,
> 

^ permalink raw reply	[flat|nested] 20+ messages in thread

* RE: [PATCH v2] arm64: mm: convert __dma_* routines to use start, size
  2016-07-26 10:43   ` Robin Murphy
@ 2016-07-27  1:55     ` kwangwoo.lee at sk.com
  -1 siblings, 0 replies; 20+ messages in thread
From: kwangwoo.lee @ 2016-07-27  1:55 UTC (permalink / raw)
  To: Robin Murphy, Russell King - ARM Linux, Catalin Marinas,
	Will Deacon, Mark Rutland, linux-arm-kernel
  Cc: hyunchul3.kim, linux-kernel, woosuk.chung

Hi Robin,

Thanks a lot for your comments! Please, find my comments below.

> -----Original Message-----
> From: Robin Murphy [mailto:robin.murphy@arm.com]
> Sent: Tuesday, July 26, 2016 7:43 PM
> To: 이광우(LEE KWANGWOO) MS SW; Russell King - ARM Linux; Catalin Marinas; Will Deacon; Mark Rutland;
> linux-arm-kernel@lists.infradead.org
> Cc: 김현철(KIM HYUNCHUL) MS SW; linux-kernel@vger.kernel.org; 정우석(CHUNG WOO SUK) MS SW
> Subject: Re: [PATCH v2] arm64: mm: convert __dma_* routines to use start, size
> 
> On 26/07/16 08:34, Kwangwoo Lee wrote:
> > v2)
> > change __dma_* routine names using the terminoloy guidance:
> >     area: takes a start and size
> >     range: takes a start and end
> > use __dma_flush_area() instead of __dma_flush_range() in dma-mapping.c
> >
> > v1)
> > __dma_* routines have been converted to use start and size instread of
> > start and end addresses. The patch was origianlly for adding
> > __clean_dcache_area_poc() which will be used in pmem driver to clean
> > dcache to the PoC(Point of Coherency) in arch_wb_cache_pmem().
> >
> > The functionality of __clean_dcache_area_poc()  was equivalent to
> > __dma_clean_range(). The difference was __dma_clean_range() uses the end
> > address, but __clean_dcache_area_poc() uses the size to clean.
> >
> > Thus, __clean_dcache_area_poc() has been revised with a fall through
> > function of __dma_clean_range() after the change that __dma_* routines
> > use start and size instead of using start and end.
> >
> > Signed-off-by: Kwangwoo Lee <kwangwoo.lee@sk.com>
> > ---
> 
> Nit: the changelog relative to the previous posting wants to be here,
> under the "---" separator; the commit message above should describe the
> _current_ state of the patch, as that's all we'll really care about once
> it's in the Git history.

OK. I'll follow the convention and use the feature.
Thank you very much for letting me know!

> >  arch/arm64/include/asm/cacheflush.h |  3 +-
> >  arch/arm64/mm/cache.S               | 71 +++++++++++++++++++------------------
> >  arch/arm64/mm/dma-mapping.c         |  6 ++--
> >  3 files changed, 41 insertions(+), 39 deletions(-)
> >
> > diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h
> > index c64268d..2e5fb97 100644
> > --- a/arch/arm64/include/asm/cacheflush.h
> > +++ b/arch/arm64/include/asm/cacheflush.h
> > @@ -68,6 +68,7 @@
> >  extern void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end);
> >  extern void flush_icache_range(unsigned long start, unsigned long end);
> >  extern void __flush_dcache_area(void *addr, size_t len);
> > +extern void __clean_dcache_area_poc(void *addr, size_t len);
> >  extern void __clean_dcache_area_pou(void *addr, size_t len);
> >  extern long __flush_cache_user_range(unsigned long start, unsigned long end);
> >
> > @@ -85,7 +86,7 @@ static inline void flush_cache_page(struct vm_area_struct *vma,
> >   */
> >  extern void __dma_map_area(const void *, size_t, int);
> >  extern void __dma_unmap_area(const void *, size_t, int);
> > -extern void __dma_flush_range(const void *, const void *);
> > +extern void __dma_flush_area(const void *, size_t);
> >
> >  /*
> >   * Copy user data from/to a page which is mapped into a different
> > diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S
> > index 50ff9ba..4415c1b 100644
> > --- a/arch/arm64/mm/cache.S
> > +++ b/arch/arm64/mm/cache.S
> > @@ -110,14 +110,16 @@ ENDPROC(__clean_dcache_area_pou)
> >   *	- end     - end address of region
> >   */
> >  ENTRY(__inval_cache_range)
> > +	sub	x1, x1, x0
> 
> Rather than doing this, I think it would be more sensible to simply swap
> the entry points.

This is much better idea instead of adding sub instruction! :) Thanks!

> >  	/* FALLTHROUGH */
> >
> >  /*
> > - *	__dma_inv_range(start, end)
> > + *	__dma_inv_area(start, size)
> >   *	- start   - virtual start address of region
> > - *	- end     - virtual end address of region
> > + *	- size    - size in question
> >   */
> > -__dma_inv_range:
> > +__dma_inv_area:
> > +	add	x1, x1, x0
> >  	dcache_line_size x2, x3
> >  	sub	x3, x2, #1
> >  	tst	x1, x3				// end cache line aligned?
> > @@ -136,46 +138,47 @@ __dma_inv_range:
> >  	dsb	sy
> >  	ret
> >  ENDPIPROC(__inval_cache_range)
> > -ENDPROC(__dma_inv_range)
> > +ENDPROC(__dma_inv_area)
> > +
> > +/*
> > + *	__clean_dcache_area_poc(kaddr, size)
> > + *
> > + * 	Ensure that any D-cache lines for the interval [kaddr, kaddr+size)
> > + * 	are cleaned to the PoC.
> > + *
> > + *	- kaddr   - kernel address
> > + *	- size    - size in question
> > + */
> > +ENTRY(__clean_dcache_area_poc)
> > +	/* FALLTHROUGH */
> >
> >  /*
> > - *	__dma_clean_range(start, end)
> > + *	__dma_clean_area(start, size)
> >   *	- start   - virtual start address of region
> > - *	- end     - virtual end address of region
> > + *	- size    - size in question
> >   */
> > -__dma_clean_range:
> > -	dcache_line_size x2, x3
> > -	sub	x3, x2, #1
> > -	bic	x0, x0, x3
> > -1:
> > +__dma_clean_area:
> >  alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE
> > -	dc	cvac, x0
> > +	dcache_by_line_op cvac, sy, x0, x1, x2, x3
> >  alternative_else
> > -	dc	civac, x0
> > +	dcache_by_line_op civac, sy, x0, x1, x2, x3
> 
> dcache_by_line_op is a relatively large macro - is there any way we can
> still apply the alternative to just the one instruction which needs it,
> as opposed to having to patch the entire mostly-identical routine?

I agree with your opinion. Then, how do you think about using CONFIG_* options
like below? I think that alternative_* macros seems to keep the space for
unused instruction. Is it necessary? Please, share your thought about the
space. Thanks!

+__dma_clean_area:
+#if    defined(CONFIG_ARM64_ERRATUM_826319) || \
+       defined(CONFIG_ARM64_ERRATUM_827319) || \
+       defined(CONFIG_ARM64_ERRATUM_824069) || \
+       defined(CONFIG_ARM64_ERRATUM_819472)
+       dcache_by_line_op civac, sy, x0, x1, x2, x3
+#else
+       dcache_by_line_op cvac, sy, x0, x1, x2, x3
+#endif

> Robin.

Best Regards,
Kwangwoo Lee

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH v2] arm64: mm: convert __dma_* routines to use start, size
@ 2016-07-27  1:55     ` kwangwoo.lee at sk.com
  0 siblings, 0 replies; 20+ messages in thread
From: kwangwoo.lee at sk.com @ 2016-07-27  1:55 UTC (permalink / raw)
  To: linux-arm-kernel

Hi Robin,

Thanks a lot for your comments! Please, find my comments below.

> -----Original Message-----
> From: Robin Murphy [mailto:robin.murphy at arm.com]
> Sent: Tuesday, July 26, 2016 7:43 PM
> To: ???(LEE KWANGWOO) MS SW; Russell King - ARM Linux; Catalin Marinas; Will Deacon; Mark Rutland;
> linux-arm-kernel at lists.infradead.org
> Cc: ???(KIM HYUNCHUL) MS SW; linux-kernel at vger.kernel.org; ???(CHUNG WOO SUK) MS SW
> Subject: Re: [PATCH v2] arm64: mm: convert __dma_* routines to use start, size
> 
> On 26/07/16 08:34, Kwangwoo Lee wrote:
> > v2)
> > change __dma_* routine names using the terminoloy guidance:
> >     area: takes a start and size
> >     range: takes a start and end
> > use __dma_flush_area() instead of __dma_flush_range() in dma-mapping.c
> >
> > v1)
> > __dma_* routines have been converted to use start and size instread of
> > start and end addresses. The patch was origianlly for adding
> > __clean_dcache_area_poc() which will be used in pmem driver to clean
> > dcache to the PoC(Point of Coherency) in arch_wb_cache_pmem().
> >
> > The functionality of __clean_dcache_area_poc()  was equivalent to
> > __dma_clean_range(). The difference was __dma_clean_range() uses the end
> > address, but __clean_dcache_area_poc() uses the size to clean.
> >
> > Thus, __clean_dcache_area_poc() has been revised with a fall through
> > function of __dma_clean_range() after the change that __dma_* routines
> > use start and size instead of using start and end.
> >
> > Signed-off-by: Kwangwoo Lee <kwangwoo.lee@sk.com>
> > ---
> 
> Nit: the changelog relative to the previous posting wants to be here,
> under the "---" separator; the commit message above should describe the
> _current_ state of the patch, as that's all we'll really care about once
> it's in the Git history.

OK. I'll follow the convention and use the feature.
Thank you very much for letting me know!

> >  arch/arm64/include/asm/cacheflush.h |  3 +-
> >  arch/arm64/mm/cache.S               | 71 +++++++++++++++++++------------------
> >  arch/arm64/mm/dma-mapping.c         |  6 ++--
> >  3 files changed, 41 insertions(+), 39 deletions(-)
> >
> > diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h
> > index c64268d..2e5fb97 100644
> > --- a/arch/arm64/include/asm/cacheflush.h
> > +++ b/arch/arm64/include/asm/cacheflush.h
> > @@ -68,6 +68,7 @@
> >  extern void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end);
> >  extern void flush_icache_range(unsigned long start, unsigned long end);
> >  extern void __flush_dcache_area(void *addr, size_t len);
> > +extern void __clean_dcache_area_poc(void *addr, size_t len);
> >  extern void __clean_dcache_area_pou(void *addr, size_t len);
> >  extern long __flush_cache_user_range(unsigned long start, unsigned long end);
> >
> > @@ -85,7 +86,7 @@ static inline void flush_cache_page(struct vm_area_struct *vma,
> >   */
> >  extern void __dma_map_area(const void *, size_t, int);
> >  extern void __dma_unmap_area(const void *, size_t, int);
> > -extern void __dma_flush_range(const void *, const void *);
> > +extern void __dma_flush_area(const void *, size_t);
> >
> >  /*
> >   * Copy user data from/to a page which is mapped into a different
> > diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S
> > index 50ff9ba..4415c1b 100644
> > --- a/arch/arm64/mm/cache.S
> > +++ b/arch/arm64/mm/cache.S
> > @@ -110,14 +110,16 @@ ENDPROC(__clean_dcache_area_pou)
> >   *	- end     - end address of region
> >   */
> >  ENTRY(__inval_cache_range)
> > +	sub	x1, x1, x0
> 
> Rather than doing this, I think it would be more sensible to simply swap
> the entry points.

This is much better idea instead of adding sub instruction! :) Thanks!

> >  	/* FALLTHROUGH */
> >
> >  /*
> > - *	__dma_inv_range(start, end)
> > + *	__dma_inv_area(start, size)
> >   *	- start   - virtual start address of region
> > - *	- end     - virtual end address of region
> > + *	- size    - size in question
> >   */
> > -__dma_inv_range:
> > +__dma_inv_area:
> > +	add	x1, x1, x0
> >  	dcache_line_size x2, x3
> >  	sub	x3, x2, #1
> >  	tst	x1, x3				// end cache line aligned?
> > @@ -136,46 +138,47 @@ __dma_inv_range:
> >  	dsb	sy
> >  	ret
> >  ENDPIPROC(__inval_cache_range)
> > -ENDPROC(__dma_inv_range)
> > +ENDPROC(__dma_inv_area)
> > +
> > +/*
> > + *	__clean_dcache_area_poc(kaddr, size)
> > + *
> > + * 	Ensure that any D-cache lines for the interval [kaddr, kaddr+size)
> > + * 	are cleaned to the PoC.
> > + *
> > + *	- kaddr   - kernel address
> > + *	- size    - size in question
> > + */
> > +ENTRY(__clean_dcache_area_poc)
> > +	/* FALLTHROUGH */
> >
> >  /*
> > - *	__dma_clean_range(start, end)
> > + *	__dma_clean_area(start, size)
> >   *	- start   - virtual start address of region
> > - *	- end     - virtual end address of region
> > + *	- size    - size in question
> >   */
> > -__dma_clean_range:
> > -	dcache_line_size x2, x3
> > -	sub	x3, x2, #1
> > -	bic	x0, x0, x3
> > -1:
> > +__dma_clean_area:
> >  alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE
> > -	dc	cvac, x0
> > +	dcache_by_line_op cvac, sy, x0, x1, x2, x3
> >  alternative_else
> > -	dc	civac, x0
> > +	dcache_by_line_op civac, sy, x0, x1, x2, x3
> 
> dcache_by_line_op is a relatively large macro - is there any way we can
> still apply the alternative to just the one instruction which needs it,
> as opposed to having to patch the entire mostly-identical routine?

I agree with your opinion. Then, how do you think about using CONFIG_* options
like below? I think that alternative_* macros seems to keep the space for
unused instruction. Is it necessary? Please, share your thought about the
space. Thanks!

+__dma_clean_area:
+#if    defined(CONFIG_ARM64_ERRATUM_826319) || \
+       defined(CONFIG_ARM64_ERRATUM_827319) || \
+       defined(CONFIG_ARM64_ERRATUM_824069) || \
+       defined(CONFIG_ARM64_ERRATUM_819472)
+       dcache_by_line_op civac, sy, x0, x1, x2, x3
+#else
+       dcache_by_line_op cvac, sy, x0, x1, x2, x3
+#endif

> Robin.

Best Regards,
Kwangwoo Lee

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v2] arm64: mm: convert __dma_* routines to use start, size
  2016-07-27  1:55     ` kwangwoo.lee at sk.com
@ 2016-07-27  9:56       ` Robin Murphy
  -1 siblings, 0 replies; 20+ messages in thread
From: Robin Murphy @ 2016-07-27  9:56 UTC (permalink / raw)
  To: kwangwoo.lee, Russell King - ARM Linux, Catalin Marinas,
	Will Deacon, Mark Rutland, linux-arm-kernel
  Cc: hyunchul3.kim, linux-kernel, woosuk.chung

On 27/07/16 02:55, kwangwoo.lee@sk.com wrote:
[...]
>>>  /*
>>> - *	__dma_clean_range(start, end)
>>> + *	__dma_clean_area(start, size)
>>>   *	- start   - virtual start address of region
>>> - *	- end     - virtual end address of region
>>> + *	- size    - size in question
>>>   */
>>> -__dma_clean_range:
>>> -	dcache_line_size x2, x3
>>> -	sub	x3, x2, #1
>>> -	bic	x0, x0, x3
>>> -1:
>>> +__dma_clean_area:
>>>  alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE
>>> -	dc	cvac, x0
>>> +	dcache_by_line_op cvac, sy, x0, x1, x2, x3
>>>  alternative_else
>>> -	dc	civac, x0
>>> +	dcache_by_line_op civac, sy, x0, x1, x2, x3
>>
>> dcache_by_line_op is a relatively large macro - is there any way we can
>> still apply the alternative to just the one instruction which needs it,
>> as opposed to having to patch the entire mostly-identical routine?
> 
> I agree with your opinion. Then, how do you think about using CONFIG_* options
> like below? I think that alternative_* macros seems to keep the space for
> unused instruction. Is it necessary? Please, share your thought about the
> space. Thanks!
> 
> +__dma_clean_area:
> +#if    defined(CONFIG_ARM64_ERRATUM_826319) || \
> +       defined(CONFIG_ARM64_ERRATUM_827319) || \
> +       defined(CONFIG_ARM64_ERRATUM_824069) || \
> +       defined(CONFIG_ARM64_ERRATUM_819472)
> +       dcache_by_line_op civac, sy, x0, x1, x2, x3
> +#else
> +       dcache_by_line_op cvac, sy, x0, x1, x2, x3
> +#endif

That's not ideal, because we still only really want to use the
workaround if we detect a CPU which needs it, rather than baking it in
at compile time. I was thinking more along the lines of pushing the
alternative down into dcache_by_line_op, something like the idea below
(compile-tested only, may not actually be viable).

Robin.

-----8<-----
diff --git a/arch/arm64/include/asm/assembler.h
b/arch/arm64/include/asm/assembler.h
index 10b017c4bdd8..1c005c90387e 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -261,7 +261,16 @@ lr	.req	x30		// link register
 	add	\size, \kaddr, \size
 	sub	\tmp2, \tmp1, #1
 	bic	\kaddr, \kaddr, \tmp2
-9998:	dc	\op, \kaddr
+9998:
+	.ifeqs "\op", "cvac"
+alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE
+	dc	cvac, \kaddr
+alternative_else
+	dc	civac, \kaddr
+alternative_endif
+	.else
+	dc	\op, \kaddr
+	.endif
 	add	\kaddr, \kaddr, \tmp1
 	cmp	\kaddr, \size
 	b.lo	9998b

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH v2] arm64: mm: convert __dma_* routines to use start, size
@ 2016-07-27  9:56       ` Robin Murphy
  0 siblings, 0 replies; 20+ messages in thread
From: Robin Murphy @ 2016-07-27  9:56 UTC (permalink / raw)
  To: linux-arm-kernel

On 27/07/16 02:55, kwangwoo.lee at sk.com wrote:
[...]
>>>  /*
>>> - *	__dma_clean_range(start, end)
>>> + *	__dma_clean_area(start, size)
>>>   *	- start   - virtual start address of region
>>> - *	- end     - virtual end address of region
>>> + *	- size    - size in question
>>>   */
>>> -__dma_clean_range:
>>> -	dcache_line_size x2, x3
>>> -	sub	x3, x2, #1
>>> -	bic	x0, x0, x3
>>> -1:
>>> +__dma_clean_area:
>>>  alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE
>>> -	dc	cvac, x0
>>> +	dcache_by_line_op cvac, sy, x0, x1, x2, x3
>>>  alternative_else
>>> -	dc	civac, x0
>>> +	dcache_by_line_op civac, sy, x0, x1, x2, x3
>>
>> dcache_by_line_op is a relatively large macro - is there any way we can
>> still apply the alternative to just the one instruction which needs it,
>> as opposed to having to patch the entire mostly-identical routine?
> 
> I agree with your opinion. Then, how do you think about using CONFIG_* options
> like below? I think that alternative_* macros seems to keep the space for
> unused instruction. Is it necessary? Please, share your thought about the
> space. Thanks!
> 
> +__dma_clean_area:
> +#if    defined(CONFIG_ARM64_ERRATUM_826319) || \
> +       defined(CONFIG_ARM64_ERRATUM_827319) || \
> +       defined(CONFIG_ARM64_ERRATUM_824069) || \
> +       defined(CONFIG_ARM64_ERRATUM_819472)
> +       dcache_by_line_op civac, sy, x0, x1, x2, x3
> +#else
> +       dcache_by_line_op cvac, sy, x0, x1, x2, x3
> +#endif

That's not ideal, because we still only really want to use the
workaround if we detect a CPU which needs it, rather than baking it in
at compile time. I was thinking more along the lines of pushing the
alternative down into dcache_by_line_op, something like the idea below
(compile-tested only, may not actually be viable).

Robin.

-----8<-----
diff --git a/arch/arm64/include/asm/assembler.h
b/arch/arm64/include/asm/assembler.h
index 10b017c4bdd8..1c005c90387e 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -261,7 +261,16 @@ lr	.req	x30		// link register
 	add	\size, \kaddr, \size
 	sub	\tmp2, \tmp1, #1
 	bic	\kaddr, \kaddr, \tmp2
-9998:	dc	\op, \kaddr
+9998:
+	.ifeqs "\op", "cvac"
+alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE
+	dc	cvac, \kaddr
+alternative_else
+	dc	civac, \kaddr
+alternative_endif
+	.else
+	dc	\op, \kaddr
+	.endif
 	add	\kaddr, \kaddr, \tmp1
 	cmp	\kaddr, \size
 	b.lo	9998b

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* RE: [PATCH v2] arm64: mm: convert __dma_* routines to use start, size
  2016-07-27  9:56       ` Robin Murphy
@ 2016-07-28  0:08         ` kwangwoo.lee at sk.com
  -1 siblings, 0 replies; 20+ messages in thread
From: kwangwoo.lee @ 2016-07-28  0:08 UTC (permalink / raw)
  To: Robin Murphy, Russell King - ARM Linux, Catalin Marinas,
	Will Deacon, Mark Rutland, linux-arm-kernel
  Cc: hyunchul3.kim, linux-kernel, woosuk.chung

> -----Original Message-----
> From: Robin Murphy [mailto:robin.murphy@arm.com]
> Sent: Wednesday, July 27, 2016 6:56 PM
> To: 이광우(LEE KWANGWOO) MS SW; Russell King - ARM Linux; Catalin Marinas; Will Deacon; Mark Rutland;
> linux-arm-kernel@lists.infradead.org
> Cc: 김현철(KIM HYUNCHUL) MS SW; linux-kernel@vger.kernel.org; 정우석(CHUNG WOO SUK) MS SW
> Subject: Re: [PATCH v2] arm64: mm: convert __dma_* routines to use start, size
> 
> On 27/07/16 02:55, kwangwoo.lee@sk.com wrote:
> [...]
> >>>  /*
> >>> - *	__dma_clean_range(start, end)
> >>> + *	__dma_clean_area(start, size)
> >>>   *	- start   - virtual start address of region
> >>> - *	- end     - virtual end address of region
> >>> + *	- size    - size in question
> >>>   */
> >>> -__dma_clean_range:
> >>> -	dcache_line_size x2, x3
> >>> -	sub	x3, x2, #1
> >>> -	bic	x0, x0, x3
> >>> -1:
> >>> +__dma_clean_area:
> >>>  alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE
> >>> -	dc	cvac, x0
> >>> +	dcache_by_line_op cvac, sy, x0, x1, x2, x3
> >>>  alternative_else
> >>> -	dc	civac, x0
> >>> +	dcache_by_line_op civac, sy, x0, x1, x2, x3
> >>
> >> dcache_by_line_op is a relatively large macro - is there any way we can
> >> still apply the alternative to just the one instruction which needs it,
> >> as opposed to having to patch the entire mostly-identical routine?
> >
> > I agree with your opinion. Then, how do you think about using CONFIG_* options
> > like below? I think that alternative_* macros seems to keep the space for
> > unused instruction. Is it necessary? Please, share your thought about the
> > space. Thanks!
> >
> > +__dma_clean_area:
> > +#if    defined(CONFIG_ARM64_ERRATUM_826319) || \
> > +       defined(CONFIG_ARM64_ERRATUM_827319) || \
> > +       defined(CONFIG_ARM64_ERRATUM_824069) || \
> > +       defined(CONFIG_ARM64_ERRATUM_819472)
> > +       dcache_by_line_op civac, sy, x0, x1, x2, x3
> > +#else
> > +       dcache_by_line_op cvac, sy, x0, x1, x2, x3
> > +#endif
> 
> That's not ideal, because we still only really want to use the
> workaround if we detect a CPU which needs it, rather than baking it in
> at compile time. I was thinking more along the lines of pushing the
> alternative down into dcache_by_line_op, something like the idea below
> (compile-tested only, may not actually be viable).

OK. Using the capability of CPU features seems to be preferred.

> Robin.
> 
> -----8<-----
> diff --git a/arch/arm64/include/asm/assembler.h
> b/arch/arm64/include/asm/assembler.h
> index 10b017c4bdd8..1c005c90387e 100644
> --- a/arch/arm64/include/asm/assembler.h
> +++ b/arch/arm64/include/asm/assembler.h
> @@ -261,7 +261,16 @@ lr	.req	x30		// link register
>  	add	\size, \kaddr, \size
>  	sub	\tmp2, \tmp1, #1
>  	bic	\kaddr, \kaddr, \tmp2
> -9998:	dc	\op, \kaddr
> +9998:
> +	.ifeqs "\op", "cvac"
> +alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE
> +	dc	cvac, \kaddr
> +alternative_else
> +	dc	civac, \kaddr
> +alternative_endif
> +	.else
> +	dc	\op, \kaddr
> +	.endif
>  	add	\kaddr, \kaddr, \tmp1
>  	cmp	\kaddr, \size
>  	b.lo	9998b

I agree that it looks not viable because it makes the macro bigger and
conditional specifically with CVAC op.

Then.. if the number of the usage of alternative_* macros for erratum is
few (just one in this case for cache clean), I think only small change like
below seems to be optimal and there is no need to create a variant macro of
dcache_cache_by_line_op. How do you think about it?

/*
- *     __dma_clean_range(start, end)
+ *     __clean_dcache_area_poc(kaddr, size)
+ *
+ *     Ensure that any D-cache lines for the interval [kaddr, kaddr+size)
+ *     are cleaned to the PoC.
+ *
+ *     - kaddr   - kernel address
+ *     - size    - size in question
+ */
+ENTRY(__clean_dcache_area_poc)
+       /* FALLTHROUGH */
+
+/*
+ *     __dma_clean_area(start, size)
  *     - start   - virtual start address of region
- *     - end     - virtual end address of region
+ *     - size    - size in question
  */
-__dma_clean_range:
+__dma_clean_area:
+       add     x1, x1, x0
        dcache_line_size x2, x3
        sub     x3, x2, #1
        bic     x0, x0, x3
@@ -158,24 +172,21 @@ alternative_endif
        b.lo    1b
        dsb     sy
        ret
-ENDPROC(__dma_clean_range)
+ENDPIPROC(__clean_dcache_area_poc)
+ENDPROC(__dma_clean_area)

Regards,
Kwangwoo Lee

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH v2] arm64: mm: convert __dma_* routines to use start, size
@ 2016-07-28  0:08         ` kwangwoo.lee at sk.com
  0 siblings, 0 replies; 20+ messages in thread
From: kwangwoo.lee at sk.com @ 2016-07-28  0:08 UTC (permalink / raw)
  To: linux-arm-kernel

> -----Original Message-----
> From: Robin Murphy [mailto:robin.murphy at arm.com]
> Sent: Wednesday, July 27, 2016 6:56 PM
> To: ???(LEE KWANGWOO) MS SW; Russell King - ARM Linux; Catalin Marinas; Will Deacon; Mark Rutland;
> linux-arm-kernel at lists.infradead.org
> Cc: ???(KIM HYUNCHUL) MS SW; linux-kernel at vger.kernel.org; ???(CHUNG WOO SUK) MS SW
> Subject: Re: [PATCH v2] arm64: mm: convert __dma_* routines to use start, size
> 
> On 27/07/16 02:55, kwangwoo.lee at sk.com wrote:
> [...]
> >>>  /*
> >>> - *	__dma_clean_range(start, end)
> >>> + *	__dma_clean_area(start, size)
> >>>   *	- start   - virtual start address of region
> >>> - *	- end     - virtual end address of region
> >>> + *	- size    - size in question
> >>>   */
> >>> -__dma_clean_range:
> >>> -	dcache_line_size x2, x3
> >>> -	sub	x3, x2, #1
> >>> -	bic	x0, x0, x3
> >>> -1:
> >>> +__dma_clean_area:
> >>>  alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE
> >>> -	dc	cvac, x0
> >>> +	dcache_by_line_op cvac, sy, x0, x1, x2, x3
> >>>  alternative_else
> >>> -	dc	civac, x0
> >>> +	dcache_by_line_op civac, sy, x0, x1, x2, x3
> >>
> >> dcache_by_line_op is a relatively large macro - is there any way we can
> >> still apply the alternative to just the one instruction which needs it,
> >> as opposed to having to patch the entire mostly-identical routine?
> >
> > I agree with your opinion. Then, how do you think about using CONFIG_* options
> > like below? I think that alternative_* macros seems to keep the space for
> > unused instruction. Is it necessary? Please, share your thought about the
> > space. Thanks!
> >
> > +__dma_clean_area:
> > +#if    defined(CONFIG_ARM64_ERRATUM_826319) || \
> > +       defined(CONFIG_ARM64_ERRATUM_827319) || \
> > +       defined(CONFIG_ARM64_ERRATUM_824069) || \
> > +       defined(CONFIG_ARM64_ERRATUM_819472)
> > +       dcache_by_line_op civac, sy, x0, x1, x2, x3
> > +#else
> > +       dcache_by_line_op cvac, sy, x0, x1, x2, x3
> > +#endif
> 
> That's not ideal, because we still only really want to use the
> workaround if we detect a CPU which needs it, rather than baking it in
> at compile time. I was thinking more along the lines of pushing the
> alternative down into dcache_by_line_op, something like the idea below
> (compile-tested only, may not actually be viable).

OK. Using the capability of CPU features seems to be preferred.

> Robin.
> 
> -----8<-----
> diff --git a/arch/arm64/include/asm/assembler.h
> b/arch/arm64/include/asm/assembler.h
> index 10b017c4bdd8..1c005c90387e 100644
> --- a/arch/arm64/include/asm/assembler.h
> +++ b/arch/arm64/include/asm/assembler.h
> @@ -261,7 +261,16 @@ lr	.req	x30		// link register
>  	add	\size, \kaddr, \size
>  	sub	\tmp2, \tmp1, #1
>  	bic	\kaddr, \kaddr, \tmp2
> -9998:	dc	\op, \kaddr
> +9998:
> +	.ifeqs "\op", "cvac"
> +alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE
> +	dc	cvac, \kaddr
> +alternative_else
> +	dc	civac, \kaddr
> +alternative_endif
> +	.else
> +	dc	\op, \kaddr
> +	.endif
>  	add	\kaddr, \kaddr, \tmp1
>  	cmp	\kaddr, \size
>  	b.lo	9998b

I agree that it looks not viable because it makes the macro bigger and
conditional specifically with CVAC op.

Then.. if the number of the usage of alternative_* macros for erratum is
few (just one in this case for cache clean), I think only small change like
below seems to be optimal and there is no need to create a variant macro of
dcache_cache_by_line_op. How do you think about it?

/*
- *     __dma_clean_range(start, end)
+ *     __clean_dcache_area_poc(kaddr, size)
+ *
+ *     Ensure that any D-cache lines for the interval [kaddr, kaddr+size)
+ *     are cleaned to the PoC.
+ *
+ *     - kaddr   - kernel address
+ *     - size    - size in question
+ */
+ENTRY(__clean_dcache_area_poc)
+       /* FALLTHROUGH */
+
+/*
+ *     __dma_clean_area(start, size)
  *     - start   - virtual start address of region
- *     - end     - virtual end address of region
+ *     - size    - size in question
  */
-__dma_clean_range:
+__dma_clean_area:
+       add     x1, x1, x0
        dcache_line_size x2, x3
        sub     x3, x2, #1
        bic     x0, x0, x3
@@ -158,24 +172,21 @@ alternative_endif
        b.lo    1b
        dsb     sy
        ret
-ENDPROC(__dma_clean_range)
+ENDPIPROC(__clean_dcache_area_poc)
+ENDPROC(__dma_clean_area)

Regards,
Kwangwoo Lee

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v2] arm64: mm: convert __dma_* routines to use start, size
  2016-07-28  0:08         ` kwangwoo.lee at sk.com
@ 2016-07-29 17:06           ` Robin Murphy
  -1 siblings, 0 replies; 20+ messages in thread
From: Robin Murphy @ 2016-07-29 17:06 UTC (permalink / raw)
  To: kwangwoo.lee, Russell King - ARM Linux, Catalin Marinas,
	Will Deacon, Mark Rutland, linux-arm-kernel
  Cc: hyunchul3.kim, linux-kernel, woosuk.chung

On 28/07/16 01:08, kwangwoo.lee@sk.com wrote:
>> -----Original Message-----
>> From: Robin Murphy [mailto:robin.murphy@arm.com]
>> Sent: Wednesday, July 27, 2016 6:56 PM
>> To: 이광우(LEE KWANGWOO) MS SW; Russell King - ARM Linux; Catalin Marinas; Will Deacon; Mark Rutland;
>> linux-arm-kernel@lists.infradead.org
>> Cc: 김현철(KIM HYUNCHUL) MS SW; linux-kernel@vger.kernel.org; 정우석(CHUNG WOO SUK) MS SW
>> Subject: Re: [PATCH v2] arm64: mm: convert __dma_* routines to use start, size
>>
>> On 27/07/16 02:55, kwangwoo.lee@sk.com wrote:
>> [...]
>>>>>  /*
>>>>> - *	__dma_clean_range(start, end)
>>>>> + *	__dma_clean_area(start, size)
>>>>>   *	- start   - virtual start address of region
>>>>> - *	- end     - virtual end address of region
>>>>> + *	- size    - size in question
>>>>>   */
>>>>> -__dma_clean_range:
>>>>> -	dcache_line_size x2, x3
>>>>> -	sub	x3, x2, #1
>>>>> -	bic	x0, x0, x3
>>>>> -1:
>>>>> +__dma_clean_area:
>>>>>  alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE
>>>>> -	dc	cvac, x0
>>>>> +	dcache_by_line_op cvac, sy, x0, x1, x2, x3
>>>>>  alternative_else
>>>>> -	dc	civac, x0
>>>>> +	dcache_by_line_op civac, sy, x0, x1, x2, x3
>>>>
>>>> dcache_by_line_op is a relatively large macro - is there any way we can
>>>> still apply the alternative to just the one instruction which needs it,
>>>> as opposed to having to patch the entire mostly-identical routine?
>>>
>>> I agree with your opinion. Then, how do you think about using CONFIG_* options
>>> like below? I think that alternative_* macros seems to keep the space for
>>> unused instruction. Is it necessary? Please, share your thought about the
>>> space. Thanks!
>>>
>>> +__dma_clean_area:
>>> +#if    defined(CONFIG_ARM64_ERRATUM_826319) || \
>>> +       defined(CONFIG_ARM64_ERRATUM_827319) || \
>>> +       defined(CONFIG_ARM64_ERRATUM_824069) || \
>>> +       defined(CONFIG_ARM64_ERRATUM_819472)
>>> +       dcache_by_line_op civac, sy, x0, x1, x2, x3
>>> +#else
>>> +       dcache_by_line_op cvac, sy, x0, x1, x2, x3
>>> +#endif
>>
>> That's not ideal, because we still only really want to use the
>> workaround if we detect a CPU which needs it, rather than baking it in
>> at compile time. I was thinking more along the lines of pushing the
>> alternative down into dcache_by_line_op, something like the idea below
>> (compile-tested only, may not actually be viable).
> 
> OK. Using the capability of CPU features seems to be preferred.
> 
>> Robin.
>>
>> -----8<-----
>> diff --git a/arch/arm64/include/asm/assembler.h
>> b/arch/arm64/include/asm/assembler.h
>> index 10b017c4bdd8..1c005c90387e 100644
>> --- a/arch/arm64/include/asm/assembler.h
>> +++ b/arch/arm64/include/asm/assembler.h
>> @@ -261,7 +261,16 @@ lr	.req	x30		// link register
>>  	add	\size, \kaddr, \size
>>  	sub	\tmp2, \tmp1, #1
>>  	bic	\kaddr, \kaddr, \tmp2
>> -9998:	dc	\op, \kaddr
>> +9998:
>> +	.ifeqs "\op", "cvac"
>> +alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE
>> +	dc	cvac, \kaddr
>> +alternative_else
>> +	dc	civac, \kaddr
>> +alternative_endif
>> +	.else
>> +	dc	\op, \kaddr
>> +	.endif
>>  	add	\kaddr, \kaddr, \tmp1
>>  	cmp	\kaddr, \size
>>  	b.lo	9998b
> 
> I agree that it looks not viable because it makes the macro bigger and
> conditional specifically with CVAC op.

Actually, having had a poke around in the resulting disassembly, it
looks like this does work correctly. I can't think of a viable reason
for the whole dcache_by_line_op to ever be wrapped in yet another
alternative (which almost certainly would go horribly wrong), and it
would mean that any other future users are automatically covered for
free. It's just horrible to look at at the source level.

Robin.

> 
> Then.. if the number of the usage of alternative_* macros for erratum is
> few (just one in this case for cache clean), I think only small change like
> below seems to be optimal and there is no need to create a variant macro of
> dcache_cache_by_line_op. How do you think about it?
> 
> /*
> - *     __dma_clean_range(start, end)
> + *     __clean_dcache_area_poc(kaddr, size)
> + *
> + *     Ensure that any D-cache lines for the interval [kaddr, kaddr+size)
> + *     are cleaned to the PoC.
> + *
> + *     - kaddr   - kernel address
> + *     - size    - size in question
> + */
> +ENTRY(__clean_dcache_area_poc)
> +       /* FALLTHROUGH */
> +
> +/*
> + *     __dma_clean_area(start, size)
>   *     - start   - virtual start address of region
> - *     - end     - virtual end address of region
> + *     - size    - size in question
>   */
> -__dma_clean_range:
> +__dma_clean_area:
> +       add     x1, x1, x0
>         dcache_line_size x2, x3
>         sub     x3, x2, #1
>         bic     x0, x0, x3
> @@ -158,24 +172,21 @@ alternative_endif
>         b.lo    1b
>         dsb     sy
>         ret
> -ENDPROC(__dma_clean_range)
> +ENDPIPROC(__clean_dcache_area_poc)
> +ENDPROC(__dma_clean_area)
> 
> Regards,
> Kwangwoo Lee
> 

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH v2] arm64: mm: convert __dma_* routines to use start, size
@ 2016-07-29 17:06           ` Robin Murphy
  0 siblings, 0 replies; 20+ messages in thread
From: Robin Murphy @ 2016-07-29 17:06 UTC (permalink / raw)
  To: linux-arm-kernel

On 28/07/16 01:08, kwangwoo.lee at sk.com wrote:
>> -----Original Message-----
>> From: Robin Murphy [mailto:robin.murphy at arm.com]
>> Sent: Wednesday, July 27, 2016 6:56 PM
>> To: ???(LEE KWANGWOO) MS SW; Russell King - ARM Linux; Catalin Marinas; Will Deacon; Mark Rutland;
>> linux-arm-kernel at lists.infradead.org
>> Cc: ???(KIM HYUNCHUL) MS SW; linux-kernel at vger.kernel.org; ???(CHUNG WOO SUK) MS SW
>> Subject: Re: [PATCH v2] arm64: mm: convert __dma_* routines to use start, size
>>
>> On 27/07/16 02:55, kwangwoo.lee at sk.com wrote:
>> [...]
>>>>>  /*
>>>>> - *	__dma_clean_range(start, end)
>>>>> + *	__dma_clean_area(start, size)
>>>>>   *	- start   - virtual start address of region
>>>>> - *	- end     - virtual end address of region
>>>>> + *	- size    - size in question
>>>>>   */
>>>>> -__dma_clean_range:
>>>>> -	dcache_line_size x2, x3
>>>>> -	sub	x3, x2, #1
>>>>> -	bic	x0, x0, x3
>>>>> -1:
>>>>> +__dma_clean_area:
>>>>>  alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE
>>>>> -	dc	cvac, x0
>>>>> +	dcache_by_line_op cvac, sy, x0, x1, x2, x3
>>>>>  alternative_else
>>>>> -	dc	civac, x0
>>>>> +	dcache_by_line_op civac, sy, x0, x1, x2, x3
>>>>
>>>> dcache_by_line_op is a relatively large macro - is there any way we can
>>>> still apply the alternative to just the one instruction which needs it,
>>>> as opposed to having to patch the entire mostly-identical routine?
>>>
>>> I agree with your opinion. Then, how do you think about using CONFIG_* options
>>> like below? I think that alternative_* macros seems to keep the space for
>>> unused instruction. Is it necessary? Please, share your thought about the
>>> space. Thanks!
>>>
>>> +__dma_clean_area:
>>> +#if    defined(CONFIG_ARM64_ERRATUM_826319) || \
>>> +       defined(CONFIG_ARM64_ERRATUM_827319) || \
>>> +       defined(CONFIG_ARM64_ERRATUM_824069) || \
>>> +       defined(CONFIG_ARM64_ERRATUM_819472)
>>> +       dcache_by_line_op civac, sy, x0, x1, x2, x3
>>> +#else
>>> +       dcache_by_line_op cvac, sy, x0, x1, x2, x3
>>> +#endif
>>
>> That's not ideal, because we still only really want to use the
>> workaround if we detect a CPU which needs it, rather than baking it in
>> at compile time. I was thinking more along the lines of pushing the
>> alternative down into dcache_by_line_op, something like the idea below
>> (compile-tested only, may not actually be viable).
> 
> OK. Using the capability of CPU features seems to be preferred.
> 
>> Robin.
>>
>> -----8<-----
>> diff --git a/arch/arm64/include/asm/assembler.h
>> b/arch/arm64/include/asm/assembler.h
>> index 10b017c4bdd8..1c005c90387e 100644
>> --- a/arch/arm64/include/asm/assembler.h
>> +++ b/arch/arm64/include/asm/assembler.h
>> @@ -261,7 +261,16 @@ lr	.req	x30		// link register
>>  	add	\size, \kaddr, \size
>>  	sub	\tmp2, \tmp1, #1
>>  	bic	\kaddr, \kaddr, \tmp2
>> -9998:	dc	\op, \kaddr
>> +9998:
>> +	.ifeqs "\op", "cvac"
>> +alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE
>> +	dc	cvac, \kaddr
>> +alternative_else
>> +	dc	civac, \kaddr
>> +alternative_endif
>> +	.else
>> +	dc	\op, \kaddr
>> +	.endif
>>  	add	\kaddr, \kaddr, \tmp1
>>  	cmp	\kaddr, \size
>>  	b.lo	9998b
> 
> I agree that it looks not viable because it makes the macro bigger and
> conditional specifically with CVAC op.

Actually, having had a poke around in the resulting disassembly, it
looks like this does work correctly. I can't think of a viable reason
for the whole dcache_by_line_op to ever be wrapped in yet another
alternative (which almost certainly would go horribly wrong), and it
would mean that any other future users are automatically covered for
free. It's just horrible to look at at the source level.

Robin.

> 
> Then.. if the number of the usage of alternative_* macros for erratum is
> few (just one in this case for cache clean), I think only small change like
> below seems to be optimal and there is no need to create a variant macro of
> dcache_cache_by_line_op. How do you think about it?
> 
> /*
> - *     __dma_clean_range(start, end)
> + *     __clean_dcache_area_poc(kaddr, size)
> + *
> + *     Ensure that any D-cache lines for the interval [kaddr, kaddr+size)
> + *     are cleaned to the PoC.
> + *
> + *     - kaddr   - kernel address
> + *     - size    - size in question
> + */
> +ENTRY(__clean_dcache_area_poc)
> +       /* FALLTHROUGH */
> +
> +/*
> + *     __dma_clean_area(start, size)
>   *     - start   - virtual start address of region
> - *     - end     - virtual end address of region
> + *     - size    - size in question
>   */
> -__dma_clean_range:
> +__dma_clean_area:
> +       add     x1, x1, x0
>         dcache_line_size x2, x3
>         sub     x3, x2, #1
>         bic     x0, x0, x3
> @@ -158,24 +172,21 @@ alternative_endif
>         b.lo    1b
>         dsb     sy
>         ret
> -ENDPROC(__dma_clean_range)
> +ENDPIPROC(__clean_dcache_area_poc)
> +ENDPROC(__dma_clean_area)
> 
> Regards,
> Kwangwoo Lee
> 

^ permalink raw reply	[flat|nested] 20+ messages in thread

* RE: [PATCH v2] arm64: mm: convert __dma_* routines to use start, size
  2016-07-29 17:06           ` Robin Murphy
@ 2016-07-31 23:45             ` kwangwoo.lee at sk.com
  -1 siblings, 0 replies; 20+ messages in thread
From: kwangwoo.lee @ 2016-07-31 23:45 UTC (permalink / raw)
  To: Robin Murphy, Russell King - ARM Linux, Catalin Marinas,
	Will Deacon, Mark Rutland, linux-arm-kernel
  Cc: hyunchul3.kim, linux-kernel, woosuk.chung

Hi Robin,

> -----Original Message-----
> From: Robin Murphy [mailto:robin.murphy@arm.com]
> Sent: Saturday, July 30, 2016 2:06 AM
> To: 이광우(LEE KWANGWOO) MS SW; Russell King - ARM Linux; Catalin Marinas; Will Deacon; Mark Rutland;
> linux-arm-kernel@lists.infradead.org
> Cc: 김현철(KIM HYUNCHUL) MS SW; linux-kernel@vger.kernel.org; 정우석(CHUNG WOO SUK) MS SW
> Subject: Re: [PATCH v2] arm64: mm: convert __dma_* routines to use start, size
> 
> On 28/07/16 01:08, kwangwoo.lee@sk.com wrote:
> >> -----Original Message-----
> >> From: Robin Murphy [mailto:robin.murphy@arm.com]
> >> Sent: Wednesday, July 27, 2016 6:56 PM
> >> To: 이광우(LEE KWANGWOO) MS SW; Russell King - ARM Linux; Catalin Marinas; Will Deacon; Mark Rutland;
> >> linux-arm-kernel@lists.infradead.org
> >> Cc: 김현철(KIM HYUNCHUL) MS SW; linux-kernel@vger.kernel.org; 정우석(CHUNG WOO SUK) MS SW
> >> Subject: Re: [PATCH v2] arm64: mm: convert __dma_* routines to use start, size
> >>
> >> On 27/07/16 02:55, kwangwoo.lee@sk.com wrote:
> >> [...]
> >>>>>  /*
> >>>>> - *	__dma_clean_range(start, end)
> >>>>> + *	__dma_clean_area(start, size)
> >>>>>   *	- start   - virtual start address of region
> >>>>> - *	- end     - virtual end address of region
> >>>>> + *	- size    - size in question
> >>>>>   */
> >>>>> -__dma_clean_range:
> >>>>> -	dcache_line_size x2, x3
> >>>>> -	sub	x3, x2, #1
> >>>>> -	bic	x0, x0, x3
> >>>>> -1:
> >>>>> +__dma_clean_area:
> >>>>>  alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE
> >>>>> -	dc	cvac, x0
> >>>>> +	dcache_by_line_op cvac, sy, x0, x1, x2, x3
> >>>>>  alternative_else
> >>>>> -	dc	civac, x0
> >>>>> +	dcache_by_line_op civac, sy, x0, x1, x2, x3
> >>>>
> >>>> dcache_by_line_op is a relatively large macro - is there any way we can
> >>>> still apply the alternative to just the one instruction which needs it,
> >>>> as opposed to having to patch the entire mostly-identical routine?
> >>>
> >>> I agree with your opinion. Then, how do you think about using CONFIG_* options
> >>> like below? I think that alternative_* macros seems to keep the space for
> >>> unused instruction. Is it necessary? Please, share your thought about the
> >>> space. Thanks!
> >>>
> >>> +__dma_clean_area:
> >>> +#if    defined(CONFIG_ARM64_ERRATUM_826319) || \
> >>> +       defined(CONFIG_ARM64_ERRATUM_827319) || \
> >>> +       defined(CONFIG_ARM64_ERRATUM_824069) || \
> >>> +       defined(CONFIG_ARM64_ERRATUM_819472)
> >>> +       dcache_by_line_op civac, sy, x0, x1, x2, x3
> >>> +#else
> >>> +       dcache_by_line_op cvac, sy, x0, x1, x2, x3
> >>> +#endif
> >>
> >> That's not ideal, because we still only really want to use the
> >> workaround if we detect a CPU which needs it, rather than baking it in
> >> at compile time. I was thinking more along the lines of pushing the
> >> alternative down into dcache_by_line_op, something like the idea below
> >> (compile-tested only, may not actually be viable).
> >
> > OK. Using the capability of CPU features seems to be preferred.
> >
> >> Robin.
> >>
> >> -----8<-----
> >> diff --git a/arch/arm64/include/asm/assembler.h
> >> b/arch/arm64/include/asm/assembler.h
> >> index 10b017c4bdd8..1c005c90387e 100644
> >> --- a/arch/arm64/include/asm/assembler.h
> >> +++ b/arch/arm64/include/asm/assembler.h
> >> @@ -261,7 +261,16 @@ lr	.req	x30		// link register
> >>  	add	\size, \kaddr, \size
> >>  	sub	\tmp2, \tmp1, #1
> >>  	bic	\kaddr, \kaddr, \tmp2
> >> -9998:	dc	\op, \kaddr
> >> +9998:
> >> +	.ifeqs "\op", "cvac"
> >> +alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE
> >> +	dc	cvac, \kaddr
> >> +alternative_else
> >> +	dc	civac, \kaddr
> >> +alternative_endif
> >> +	.else
> >> +	dc	\op, \kaddr
> >> +	.endif
> >>  	add	\kaddr, \kaddr, \tmp1
> >>  	cmp	\kaddr, \size
> >>  	b.lo	9998b
> >
> > I agree that it looks not viable because it makes the macro bigger and
> > conditional specifically with CVAC op.
> 
> Actually, having had a poke around in the resulting disassembly, it
> looks like this does work correctly. I can't think of a viable reason
> for the whole dcache_by_line_op to ever be wrapped in yet another
> alternative (which almost certainly would go horribly wrong), and it
> would mean that any other future users are automatically covered for
> free. It's just horrible to look at at the source level.

Then, Are you going to send a patch for this? Or should I include this change?

> Robin.
> 
> >
> > Then.. if the number of the usage of alternative_* macros for erratum is
> > few (just one in this case for cache clean), I think only small change like
> > below seems to be optimal and there is no need to create a variant macro of
> > dcache_cache_by_line_op. How do you think about it?
[...]

Regards,
Kwangwoo

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH v2] arm64: mm: convert __dma_* routines to use start, size
@ 2016-07-31 23:45             ` kwangwoo.lee at sk.com
  0 siblings, 0 replies; 20+ messages in thread
From: kwangwoo.lee at sk.com @ 2016-07-31 23:45 UTC (permalink / raw)
  To: linux-arm-kernel

Hi Robin,

> -----Original Message-----
> From: Robin Murphy [mailto:robin.murphy at arm.com]
> Sent: Saturday, July 30, 2016 2:06 AM
> To: ???(LEE KWANGWOO) MS SW; Russell King - ARM Linux; Catalin Marinas; Will Deacon; Mark Rutland;
> linux-arm-kernel at lists.infradead.org
> Cc: ???(KIM HYUNCHUL) MS SW; linux-kernel at vger.kernel.org; ???(CHUNG WOO SUK) MS SW
> Subject: Re: [PATCH v2] arm64: mm: convert __dma_* routines to use start, size
> 
> On 28/07/16 01:08, kwangwoo.lee at sk.com wrote:
> >> -----Original Message-----
> >> From: Robin Murphy [mailto:robin.murphy at arm.com]
> >> Sent: Wednesday, July 27, 2016 6:56 PM
> >> To: ???(LEE KWANGWOO) MS SW; Russell King - ARM Linux; Catalin Marinas; Will Deacon; Mark Rutland;
> >> linux-arm-kernel at lists.infradead.org
> >> Cc: ???(KIM HYUNCHUL) MS SW; linux-kernel at vger.kernel.org; ???(CHUNG WOO SUK) MS SW
> >> Subject: Re: [PATCH v2] arm64: mm: convert __dma_* routines to use start, size
> >>
> >> On 27/07/16 02:55, kwangwoo.lee at sk.com wrote:
> >> [...]
> >>>>>  /*
> >>>>> - *	__dma_clean_range(start, end)
> >>>>> + *	__dma_clean_area(start, size)
> >>>>>   *	- start   - virtual start address of region
> >>>>> - *	- end     - virtual end address of region
> >>>>> + *	- size    - size in question
> >>>>>   */
> >>>>> -__dma_clean_range:
> >>>>> -	dcache_line_size x2, x3
> >>>>> -	sub	x3, x2, #1
> >>>>> -	bic	x0, x0, x3
> >>>>> -1:
> >>>>> +__dma_clean_area:
> >>>>>  alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE
> >>>>> -	dc	cvac, x0
> >>>>> +	dcache_by_line_op cvac, sy, x0, x1, x2, x3
> >>>>>  alternative_else
> >>>>> -	dc	civac, x0
> >>>>> +	dcache_by_line_op civac, sy, x0, x1, x2, x3
> >>>>
> >>>> dcache_by_line_op is a relatively large macro - is there any way we can
> >>>> still apply the alternative to just the one instruction which needs it,
> >>>> as opposed to having to patch the entire mostly-identical routine?
> >>>
> >>> I agree with your opinion. Then, how do you think about using CONFIG_* options
> >>> like below? I think that alternative_* macros seems to keep the space for
> >>> unused instruction. Is it necessary? Please, share your thought about the
> >>> space. Thanks!
> >>>
> >>> +__dma_clean_area:
> >>> +#if    defined(CONFIG_ARM64_ERRATUM_826319) || \
> >>> +       defined(CONFIG_ARM64_ERRATUM_827319) || \
> >>> +       defined(CONFIG_ARM64_ERRATUM_824069) || \
> >>> +       defined(CONFIG_ARM64_ERRATUM_819472)
> >>> +       dcache_by_line_op civac, sy, x0, x1, x2, x3
> >>> +#else
> >>> +       dcache_by_line_op cvac, sy, x0, x1, x2, x3
> >>> +#endif
> >>
> >> That's not ideal, because we still only really want to use the
> >> workaround if we detect a CPU which needs it, rather than baking it in
> >> at compile time. I was thinking more along the lines of pushing the
> >> alternative down into dcache_by_line_op, something like the idea below
> >> (compile-tested only, may not actually be viable).
> >
> > OK. Using the capability of CPU features seems to be preferred.
> >
> >> Robin.
> >>
> >> -----8<-----
> >> diff --git a/arch/arm64/include/asm/assembler.h
> >> b/arch/arm64/include/asm/assembler.h
> >> index 10b017c4bdd8..1c005c90387e 100644
> >> --- a/arch/arm64/include/asm/assembler.h
> >> +++ b/arch/arm64/include/asm/assembler.h
> >> @@ -261,7 +261,16 @@ lr	.req	x30		// link register
> >>  	add	\size, \kaddr, \size
> >>  	sub	\tmp2, \tmp1, #1
> >>  	bic	\kaddr, \kaddr, \tmp2
> >> -9998:	dc	\op, \kaddr
> >> +9998:
> >> +	.ifeqs "\op", "cvac"
> >> +alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE
> >> +	dc	cvac, \kaddr
> >> +alternative_else
> >> +	dc	civac, \kaddr
> >> +alternative_endif
> >> +	.else
> >> +	dc	\op, \kaddr
> >> +	.endif
> >>  	add	\kaddr, \kaddr, \tmp1
> >>  	cmp	\kaddr, \size
> >>  	b.lo	9998b
> >
> > I agree that it looks not viable because it makes the macro bigger and
> > conditional specifically with CVAC op.
> 
> Actually, having had a poke around in the resulting disassembly, it
> looks like this does work correctly. I can't think of a viable reason
> for the whole dcache_by_line_op to ever be wrapped in yet another
> alternative (which almost certainly would go horribly wrong), and it
> would mean that any other future users are automatically covered for
> free. It's just horrible to look at at the source level.

Then, Are you going to send a patch for this? Or should I include this change?

> Robin.
> 
> >
> > Then.. if the number of the usage of alternative_* macros for erratum is
> > few (just one in this case for cache clean), I think only small change like
> > below seems to be optimal and there is no need to create a variant macro of
> > dcache_cache_by_line_op. How do you think about it?
[...]

Regards,
Kwangwoo

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v2] arm64: mm: convert __dma_* routines to use start, size
  2016-07-31 23:45             ` kwangwoo.lee at sk.com
@ 2016-08-01 13:36               ` Robin Murphy
  -1 siblings, 0 replies; 20+ messages in thread
From: Robin Murphy @ 2016-08-01 13:36 UTC (permalink / raw)
  To: kwangwoo.lee, Russell King - ARM Linux, Catalin Marinas,
	Will Deacon, Mark Rutland, linux-arm-kernel
  Cc: hyunchul3.kim, linux-kernel, woosuk.chung

On 01/08/16 00:45, kwangwoo.lee@sk.com wrote:
[...]
>>>> -----8<-----
>>>> diff --git a/arch/arm64/include/asm/assembler.h
>>>> b/arch/arm64/include/asm/assembler.h
>>>> index 10b017c4bdd8..1c005c90387e 100644
>>>> --- a/arch/arm64/include/asm/assembler.h
>>>> +++ b/arch/arm64/include/asm/assembler.h
>>>> @@ -261,7 +261,16 @@ lr	.req	x30		// link register
>>>>  	add	\size, \kaddr, \size
>>>>  	sub	\tmp2, \tmp1, #1
>>>>  	bic	\kaddr, \kaddr, \tmp2
>>>> -9998:	dc	\op, \kaddr
>>>> +9998:
>>>> +	.ifeqs "\op", "cvac"
>>>> +alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE
>>>> +	dc	cvac, \kaddr
>>>> +alternative_else
>>>> +	dc	civac, \kaddr
>>>> +alternative_endif
>>>> +	.else
>>>> +	dc	\op, \kaddr
>>>> +	.endif
>>>>  	add	\kaddr, \kaddr, \tmp1
>>>>  	cmp	\kaddr, \size
>>>>  	b.lo	9998b
>>>
>>> I agree that it looks not viable because it makes the macro bigger and
>>> conditional specifically with CVAC op.
>>
>> Actually, having had a poke around in the resulting disassembly, it
>> looks like this does work correctly. I can't think of a viable reason
>> for the whole dcache_by_line_op to ever be wrapped in yet another
>> alternative (which almost certainly would go horribly wrong), and it
>> would mean that any other future users are automatically covered for
>> free. It's just horrible to look at at the source level.
> 
> Then, Are you going to send a patch for this? Or should I include this change?

I'll do a bit more testing just to make sure, then spin a separate patch
(and try to remember to keep you on CC..)

Robin.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH v2] arm64: mm: convert __dma_* routines to use start, size
@ 2016-08-01 13:36               ` Robin Murphy
  0 siblings, 0 replies; 20+ messages in thread
From: Robin Murphy @ 2016-08-01 13:36 UTC (permalink / raw)
  To: linux-arm-kernel

On 01/08/16 00:45, kwangwoo.lee at sk.com wrote:
[...]
>>>> -----8<-----
>>>> diff --git a/arch/arm64/include/asm/assembler.h
>>>> b/arch/arm64/include/asm/assembler.h
>>>> index 10b017c4bdd8..1c005c90387e 100644
>>>> --- a/arch/arm64/include/asm/assembler.h
>>>> +++ b/arch/arm64/include/asm/assembler.h
>>>> @@ -261,7 +261,16 @@ lr	.req	x30		// link register
>>>>  	add	\size, \kaddr, \size
>>>>  	sub	\tmp2, \tmp1, #1
>>>>  	bic	\kaddr, \kaddr, \tmp2
>>>> -9998:	dc	\op, \kaddr
>>>> +9998:
>>>> +	.ifeqs "\op", "cvac"
>>>> +alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE
>>>> +	dc	cvac, \kaddr
>>>> +alternative_else
>>>> +	dc	civac, \kaddr
>>>> +alternative_endif
>>>> +	.else
>>>> +	dc	\op, \kaddr
>>>> +	.endif
>>>>  	add	\kaddr, \kaddr, \tmp1
>>>>  	cmp	\kaddr, \size
>>>>  	b.lo	9998b
>>>
>>> I agree that it looks not viable because it makes the macro bigger and
>>> conditional specifically with CVAC op.
>>
>> Actually, having had a poke around in the resulting disassembly, it
>> looks like this does work correctly. I can't think of a viable reason
>> for the whole dcache_by_line_op to ever be wrapped in yet another
>> alternative (which almost certainly would go horribly wrong), and it
>> would mean that any other future users are automatically covered for
>> free. It's just horrible to look at at the source level.
> 
> Then, Are you going to send a patch for this? Or should I include this change?

I'll do a bit more testing just to make sure, then spin a separate patch
(and try to remember to keep you on CC..)

Robin.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v2] arm64: mm: convert __dma_* routines to use start, size
  2016-08-01 13:36               ` Robin Murphy
@ 2016-08-01 13:53                 ` Robin Murphy
  -1 siblings, 0 replies; 20+ messages in thread
From: Robin Murphy @ 2016-08-01 13:53 UTC (permalink / raw)
  To: kwangwoo.lee, Russell King - ARM Linux, Catalin Marinas,
	Will Deacon, Mark Rutland, linux-arm-kernel
  Cc: hyunchul3.kim, linux-kernel, woosuk.chung

On 01/08/16 14:36, Robin Murphy wrote:
> On 01/08/16 00:45, kwangwoo.lee@sk.com wrote:
> [...]
>>>>> -----8<-----
>>>>> diff --git a/arch/arm64/include/asm/assembler.h
>>>>> b/arch/arm64/include/asm/assembler.h
>>>>> index 10b017c4bdd8..1c005c90387e 100644
>>>>> --- a/arch/arm64/include/asm/assembler.h
>>>>> +++ b/arch/arm64/include/asm/assembler.h
>>>>> @@ -261,7 +261,16 @@ lr	.req	x30		// link register
>>>>>  	add	\size, \kaddr, \size
>>>>>  	sub	\tmp2, \tmp1, #1
>>>>>  	bic	\kaddr, \kaddr, \tmp2
>>>>> -9998:	dc	\op, \kaddr
>>>>> +9998:
>>>>> +	.ifeqs "\op", "cvac"
>>>>> +alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE
>>>>> +	dc	cvac, \kaddr
>>>>> +alternative_else
>>>>> +	dc	civac, \kaddr
>>>>> +alternative_endif
>>>>> +	.else
>>>>> +	dc	\op, \kaddr
>>>>> +	.endif
>>>>>  	add	\kaddr, \kaddr, \tmp1
>>>>>  	cmp	\kaddr, \size
>>>>>  	b.lo	9998b
>>>>
>>>> I agree that it looks not viable because it makes the macro bigger and
>>>> conditional specifically with CVAC op.
>>>
>>> Actually, having had a poke around in the resulting disassembly, it
>>> looks like this does work correctly. I can't think of a viable reason
>>> for the whole dcache_by_line_op to ever be wrapped in yet another
>>> alternative (which almost certainly would go horribly wrong), and it
>>> would mean that any other future users are automatically covered for
>>> free. It's just horrible to look at at the source level.
>>
>> Then, Are you going to send a patch for this? Or should I include this change?
> 
> I'll do a bit more testing just to make sure, then spin a separate patch
> (and try to remember to keep you on CC..)

...and said patch turns out to conflict with 823066d9edcd, since I
hadn't realised it's already been fixed! So you can go ahead with the
dcache_by_line_op cleanup as well, just rebase onto arm64/for-next/core
(or linux/master, since it's been pulled already).

Robin.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH v2] arm64: mm: convert __dma_* routines to use start, size
@ 2016-08-01 13:53                 ` Robin Murphy
  0 siblings, 0 replies; 20+ messages in thread
From: Robin Murphy @ 2016-08-01 13:53 UTC (permalink / raw)
  To: linux-arm-kernel

On 01/08/16 14:36, Robin Murphy wrote:
> On 01/08/16 00:45, kwangwoo.lee at sk.com wrote:
> [...]
>>>>> -----8<-----
>>>>> diff --git a/arch/arm64/include/asm/assembler.h
>>>>> b/arch/arm64/include/asm/assembler.h
>>>>> index 10b017c4bdd8..1c005c90387e 100644
>>>>> --- a/arch/arm64/include/asm/assembler.h
>>>>> +++ b/arch/arm64/include/asm/assembler.h
>>>>> @@ -261,7 +261,16 @@ lr	.req	x30		// link register
>>>>>  	add	\size, \kaddr, \size
>>>>>  	sub	\tmp2, \tmp1, #1
>>>>>  	bic	\kaddr, \kaddr, \tmp2
>>>>> -9998:	dc	\op, \kaddr
>>>>> +9998:
>>>>> +	.ifeqs "\op", "cvac"
>>>>> +alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE
>>>>> +	dc	cvac, \kaddr
>>>>> +alternative_else
>>>>> +	dc	civac, \kaddr
>>>>> +alternative_endif
>>>>> +	.else
>>>>> +	dc	\op, \kaddr
>>>>> +	.endif
>>>>>  	add	\kaddr, \kaddr, \tmp1
>>>>>  	cmp	\kaddr, \size
>>>>>  	b.lo	9998b
>>>>
>>>> I agree that it looks not viable because it makes the macro bigger and
>>>> conditional specifically with CVAC op.
>>>
>>> Actually, having had a poke around in the resulting disassembly, it
>>> looks like this does work correctly. I can't think of a viable reason
>>> for the whole dcache_by_line_op to ever be wrapped in yet another
>>> alternative (which almost certainly would go horribly wrong), and it
>>> would mean that any other future users are automatically covered for
>>> free. It's just horrible to look at at the source level.
>>
>> Then, Are you going to send a patch for this? Or should I include this change?
> 
> I'll do a bit more testing just to make sure, then spin a separate patch
> (and try to remember to keep you on CC..)

...and said patch turns out to conflict with 823066d9edcd, since I
hadn't realised it's already been fixed! So you can go ahead with the
dcache_by_line_op cleanup as well, just rebase onto arm64/for-next/core
(or linux/master, since it's been pulled already).

Robin.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* RE: [PATCH v2] arm64: mm: convert __dma_* routines to use start, size
  2016-08-01 13:53                 ` Robin Murphy
@ 2016-08-01 23:24                   ` kwangwoo.lee at sk.com
  -1 siblings, 0 replies; 20+ messages in thread
From: kwangwoo.lee @ 2016-08-01 23:24 UTC (permalink / raw)
  To: Robin Murphy, Russell King - ARM Linux, Catalin Marinas,
	Will Deacon, Mark Rutland, linux-arm-kernel
  Cc: hyunchul3.kim, linux-kernel, woosuk.chung

> -----Original Message-----
> From: Robin Murphy [mailto:robin.murphy@arm.com]
> Sent: Monday, August 01, 2016 10:53 PM
> To: 이광우(LEE KWANGWOO) MS SW; Russell King - ARM Linux; Catalin Marinas; Will Deacon; Mark Rutland;
> linux-arm-kernel@lists.infradead.org
> Cc: 김현철(KIM HYUNCHUL) MS SW; linux-kernel@vger.kernel.org; 정우석(CHUNG WOO SUK) MS SW
> Subject: Re: [PATCH v2] arm64: mm: convert __dma_* routines to use start, size
> 
> On 01/08/16 14:36, Robin Murphy wrote:
> > On 01/08/16 00:45, kwangwoo.lee@sk.com wrote:
> > [...]
> >>>>> -----8<-----
> >>>>> diff --git a/arch/arm64/include/asm/assembler.h
> >>>>> b/arch/arm64/include/asm/assembler.h
> >>>>> index 10b017c4bdd8..1c005c90387e 100644
> >>>>> --- a/arch/arm64/include/asm/assembler.h
> >>>>> +++ b/arch/arm64/include/asm/assembler.h
> >>>>> @@ -261,7 +261,16 @@ lr	.req	x30		// link register
> >>>>>  	add	\size, \kaddr, \size
> >>>>>  	sub	\tmp2, \tmp1, #1
> >>>>>  	bic	\kaddr, \kaddr, \tmp2
> >>>>> -9998:	dc	\op, \kaddr
> >>>>> +9998:
> >>>>> +	.ifeqs "\op", "cvac"
> >>>>> +alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE
> >>>>> +	dc	cvac, \kaddr
> >>>>> +alternative_else
> >>>>> +	dc	civac, \kaddr
> >>>>> +alternative_endif
> >>>>> +	.else
> >>>>> +	dc	\op, \kaddr
> >>>>> +	.endif
> >>>>>  	add	\kaddr, \kaddr, \tmp1
> >>>>>  	cmp	\kaddr, \size
> >>>>>  	b.lo	9998b
> >>>>
> >>>> I agree that it looks not viable because it makes the macro bigger and
> >>>> conditional specifically with CVAC op.
> >>>
> >>> Actually, having had a poke around in the resulting disassembly, it
> >>> looks like this does work correctly. I can't think of a viable reason
> >>> for the whole dcache_by_line_op to ever be wrapped in yet another
> >>> alternative (which almost certainly would go horribly wrong), and it
> >>> would mean that any other future users are automatically covered for
> >>> free. It's just horrible to look at at the source level.
> >>
> >> Then, Are you going to send a patch for this? Or should I include this change?
> >
> > I'll do a bit more testing just to make sure, then spin a separate patch
> > (and try to remember to keep you on CC..)
> 
> ...and said patch turns out to conflict with 823066d9edcd, since I
> hadn't realised it's already been fixed! So you can go ahead with the
> dcache_by_line_op cleanup as well, just rebase onto arm64/for-next/core
> (or linux/master, since it's been pulled already).

Thank you very much for the information! I'll rebase with it. 

> Robin.

Best Regards,
Kwangwoo Lee

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH v2] arm64: mm: convert __dma_* routines to use start, size
@ 2016-08-01 23:24                   ` kwangwoo.lee at sk.com
  0 siblings, 0 replies; 20+ messages in thread
From: kwangwoo.lee at sk.com @ 2016-08-01 23:24 UTC (permalink / raw)
  To: linux-arm-kernel

> -----Original Message-----
> From: Robin Murphy [mailto:robin.murphy at arm.com]
> Sent: Monday, August 01, 2016 10:53 PM
> To: ???(LEE KWANGWOO) MS SW; Russell King - ARM Linux; Catalin Marinas; Will Deacon; Mark Rutland;
> linux-arm-kernel at lists.infradead.org
> Cc: ???(KIM HYUNCHUL) MS SW; linux-kernel at vger.kernel.org; ???(CHUNG WOO SUK) MS SW
> Subject: Re: [PATCH v2] arm64: mm: convert __dma_* routines to use start, size
> 
> On 01/08/16 14:36, Robin Murphy wrote:
> > On 01/08/16 00:45, kwangwoo.lee at sk.com wrote:
> > [...]
> >>>>> -----8<-----
> >>>>> diff --git a/arch/arm64/include/asm/assembler.h
> >>>>> b/arch/arm64/include/asm/assembler.h
> >>>>> index 10b017c4bdd8..1c005c90387e 100644
> >>>>> --- a/arch/arm64/include/asm/assembler.h
> >>>>> +++ b/arch/arm64/include/asm/assembler.h
> >>>>> @@ -261,7 +261,16 @@ lr	.req	x30		// link register
> >>>>>  	add	\size, \kaddr, \size
> >>>>>  	sub	\tmp2, \tmp1, #1
> >>>>>  	bic	\kaddr, \kaddr, \tmp2
> >>>>> -9998:	dc	\op, \kaddr
> >>>>> +9998:
> >>>>> +	.ifeqs "\op", "cvac"
> >>>>> +alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE
> >>>>> +	dc	cvac, \kaddr
> >>>>> +alternative_else
> >>>>> +	dc	civac, \kaddr
> >>>>> +alternative_endif
> >>>>> +	.else
> >>>>> +	dc	\op, \kaddr
> >>>>> +	.endif
> >>>>>  	add	\kaddr, \kaddr, \tmp1
> >>>>>  	cmp	\kaddr, \size
> >>>>>  	b.lo	9998b
> >>>>
> >>>> I agree that it looks not viable because it makes the macro bigger and
> >>>> conditional specifically with CVAC op.
> >>>
> >>> Actually, having had a poke around in the resulting disassembly, it
> >>> looks like this does work correctly. I can't think of a viable reason
> >>> for the whole dcache_by_line_op to ever be wrapped in yet another
> >>> alternative (which almost certainly would go horribly wrong), and it
> >>> would mean that any other future users are automatically covered for
> >>> free. It's just horrible to look at at the source level.
> >>
> >> Then, Are you going to send a patch for this? Or should I include this change?
> >
> > I'll do a bit more testing just to make sure, then spin a separate patch
> > (and try to remember to keep you on CC..)
> 
> ...and said patch turns out to conflict with 823066d9edcd, since I
> hadn't realised it's already been fixed! So you can go ahead with the
> dcache_by_line_op cleanup as well, just rebase onto arm64/for-next/core
> (or linux/master, since it's been pulled already).

Thank you very much for the information! I'll rebase with it. 

> Robin.

Best Regards,
Kwangwoo Lee

^ permalink raw reply	[flat|nested] 20+ messages in thread

end of thread, other threads:[~2016-08-01 23:25 UTC | newest]

Thread overview: 20+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-07-26  7:34 [PATCH v2] arm64: mm: convert __dma_* routines to use start, size Kwangwoo Lee
2016-07-26  7:34 ` Kwangwoo Lee
2016-07-26 10:43 ` Robin Murphy
2016-07-26 10:43   ` Robin Murphy
2016-07-27  1:55   ` kwangwoo.lee
2016-07-27  1:55     ` kwangwoo.lee at sk.com
2016-07-27  9:56     ` Robin Murphy
2016-07-27  9:56       ` Robin Murphy
2016-07-28  0:08       ` kwangwoo.lee
2016-07-28  0:08         ` kwangwoo.lee at sk.com
2016-07-29 17:06         ` Robin Murphy
2016-07-29 17:06           ` Robin Murphy
2016-07-31 23:45           ` kwangwoo.lee
2016-07-31 23:45             ` kwangwoo.lee at sk.com
2016-08-01 13:36             ` Robin Murphy
2016-08-01 13:36               ` Robin Murphy
2016-08-01 13:53               ` Robin Murphy
2016-08-01 13:53                 ` Robin Murphy
2016-08-01 23:24                 ` kwangwoo.lee
2016-08-01 23:24                   ` kwangwoo.lee at sk.com

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.