linux-media.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [LINUX PATCH] dma-mapping: Control memset operation using gfp flags
@ 2019-09-17 20:21 Dylan Yip
  2019-09-17 20:39 ` Dylan Yip
  0 siblings, 1 reply; 2+ messages in thread
From: Dylan Yip @ 2019-09-17 20:21 UTC (permalink / raw)
  To: linux-media, satishna; +Cc: Dylan Yip

In case of 4k video buffer, the allocation from a reserved memory is
taking a long time, ~500ms. This is root caused to the memset()
operations on the allocated memory which is consuming more cpu cycles.
Due to this delay, we see that initial frames are being dropped.

To fix this, we have wrapped the default memset, done when allocating
coherent memory, under the __GFP_ZERO flag. So, we only clear
allocated memory if __GFP_ZERO flag is enabled. We believe this
should be safe as the video decoder always writes before reading.
This optimizes decoder initialization as we do not set the __GFP_ZERO
flag when allocating memory for decoder. With this optimization, we
don't see initial frame drops and decoder initialization time is
~100ms.

This patch adds plumbing through dma_alloc functions to pass gfp flag
set by user to __dma_alloc_from_coherent(). Here gfp flag is checked
for __GFP_ZERO. If present, we memset the buffer to 0 otherwise we
skip memset.

Signed-off-by: Dylan Yip <dylan.yip@xilinx.com>
---
 arch/arm/mm/dma-mapping-nommu.c |  2 +-
 include/linux/dma-mapping.h     | 11 +++++++----
 kernel/dma/coherent.c           | 15 +++++++++------
 kernel/dma/mapping.c            |  2 +-
 4 files changed, 18 insertions(+), 12 deletions(-)

diff --git a/arch/arm/mm/dma-mapping-nommu.c b/arch/arm/mm/dma-mapping-nommu.c
index 52b8255..242b2c3 100644
--- a/arch/arm/mm/dma-mapping-nommu.c
+++ b/arch/arm/mm/dma-mapping-nommu.c
@@ -35,7 +35,7 @@ static void *arm_nommu_dma_alloc(struct device *dev, size_t size,
 				 unsigned long attrs)
 
 {
-	void *ret = dma_alloc_from_global_coherent(size, dma_handle);
+	void *ret = dma_alloc_from_global_coherent(size, dma_handle, gfp);
 
 	/*
 	 * dma_alloc_from_global_coherent() may fail because:
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index f7d1eea..b715c9f 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -160,24 +160,27 @@ static inline int is_device_dma_capable(struct device *dev)
  * Don't use them in device drivers.
  */
 int dma_alloc_from_dev_coherent(struct device *dev, ssize_t size,
-				       dma_addr_t *dma_handle, void **ret);
+				       dma_addr_t *dma_handle, void **ret,
+				       gfp_t flag);
 int dma_release_from_dev_coherent(struct device *dev, int order, void *vaddr);
 
 int dma_mmap_from_dev_coherent(struct device *dev, struct vm_area_struct *vma,
 			    void *cpu_addr, size_t size, int *ret);
 
-void *dma_alloc_from_global_coherent(ssize_t size, dma_addr_t *dma_handle);
+void *dma_alloc_from_global_coherent(ssize_t size, dma_addr_t *dma_handle,
+				     gfp_t flag);
 int dma_release_from_global_coherent(int order, void *vaddr);
 int dma_mmap_from_global_coherent(struct vm_area_struct *vma, void *cpu_addr,
 				  size_t size, int *ret);
 
 #else
-#define dma_alloc_from_dev_coherent(dev, size, handle, ret) (0)
+#define dma_alloc_from_dev_coherent(dev, size, handle, ret, flag) (0)
 #define dma_release_from_dev_coherent(dev, order, vaddr) (0)
 #define dma_mmap_from_dev_coherent(dev, vma, vaddr, order, ret) (0)
 
 static inline void *dma_alloc_from_global_coherent(ssize_t size,
-						   dma_addr_t *dma_handle)
+						   dma_addr_t *dma_handle,
+						   gfp_t flag)
 {
 	return NULL;
 }
diff --git a/kernel/dma/coherent.c b/kernel/dma/coherent.c
index 29fd659..d85fab5 100644
--- a/kernel/dma/coherent.c
+++ b/kernel/dma/coherent.c
@@ -136,7 +136,7 @@ void dma_release_declared_memory(struct device *dev)
 EXPORT_SYMBOL(dma_release_declared_memory);
 
 static void *__dma_alloc_from_coherent(struct dma_coherent_mem *mem,
-		ssize_t size, dma_addr_t *dma_handle)
+		ssize_t size, dma_addr_t *dma_handle, gfp_t gfp_flag)
 {
 	int order = get_order(size);
 	unsigned long flags;
@@ -158,7 +158,8 @@ static void *__dma_alloc_from_coherent(struct dma_coherent_mem *mem,
 	*dma_handle = mem->device_base + (pageno << PAGE_SHIFT);
 	ret = mem->virt_base + (pageno << PAGE_SHIFT);
 	spin_unlock_irqrestore(&mem->spinlock, flags);
-	memset(ret, 0, size);
+	if (gfp_flag & __GFP_ZERO)
+		memset(ret, 0, size);
 	return ret;
 err:
 	spin_unlock_irqrestore(&mem->spinlock, flags);
@@ -172,6 +173,7 @@ static void *__dma_alloc_from_coherent(struct dma_coherent_mem *mem,
  * @dma_handle:	This will be filled with the correct dma handle
  * @ret:	This pointer will be filled with the virtual address
  *		to allocated area.
+ * @flag:      gfp flag set by user
  *
  * This function should be only called from per-arch dma_alloc_coherent()
  * to support allocation from per-device coherent memory pools.
@@ -180,24 +182,25 @@ static void *__dma_alloc_from_coherent(struct dma_coherent_mem *mem,
  * generic memory areas, or !0 if dma_alloc_coherent should return @ret.
  */
 int dma_alloc_from_dev_coherent(struct device *dev, ssize_t size,
-		dma_addr_t *dma_handle, void **ret)
+		dma_addr_t *dma_handle, void **ret, gfp_t flag)
 {
 	struct dma_coherent_mem *mem = dev_get_coherent_memory(dev);
 
 	if (!mem)
 		return 0;
 
-	*ret = __dma_alloc_from_coherent(mem, size, dma_handle);
+	*ret = __dma_alloc_from_coherent(mem, size, dma_handle, flag);
 	return 1;
 }
 
-void *dma_alloc_from_global_coherent(ssize_t size, dma_addr_t *dma_handle)
+void *dma_alloc_from_global_coherent(ssize_t size, dma_addr_t *dma_handle,
+				     gfp_t flag)
 {
 	if (!dma_coherent_default_memory)
 		return NULL;
 
 	return __dma_alloc_from_coherent(dma_coherent_default_memory, size,
-			dma_handle);
+			dma_handle, flag);
 }
 
 static int __dma_release_from_coherent(struct dma_coherent_mem *mem,
diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c
index b0038ca..bfea1d2 100644
--- a/kernel/dma/mapping.c
+++ b/kernel/dma/mapping.c
@@ -272,7 +272,7 @@ void *dma_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle,
 
 	WARN_ON_ONCE(!dev->coherent_dma_mask);
 
-	if (dma_alloc_from_dev_coherent(dev, size, dma_handle, &cpu_addr))
+	if (dma_alloc_from_dev_coherent(dev, size, dma_handle, &cpu_addr, flag))
 		return cpu_addr;
 
 	/* let the implementation decide on the zone to allocate from: */
-- 
2.7.4


^ permalink raw reply related	[flat|nested] 2+ messages in thread

* RE: [LINUX PATCH] dma-mapping: Control memset operation using gfp flags
  2019-09-17 20:21 [LINUX PATCH] dma-mapping: Control memset operation using gfp flags Dylan Yip
@ 2019-09-17 20:39 ` Dylan Yip
  0 siblings, 0 replies; 2+ messages in thread
From: Dylan Yip @ 2019-09-17 20:39 UTC (permalink / raw)
  To: Dylan Yip, linux-media, Satish Kumar Nagireddy

Please ignore, sent to wrong list.

> -----Original Message-----
> From: Dylan Yip <dylan.yip@xilinx.com>
> Sent: Tuesday, September 17, 2019 1:21 PM
> To: linux-media@vger.kernel.org; Satish Kumar Nagireddy
> <SATISHNA@xilinx.com>
> Cc: Dylan Yip <dylany@xilinx.com>
> Subject: [LINUX PATCH] dma-mapping: Control memset operation using gfp
> flags
> 
> In case of 4k video buffer, the allocation from a reserved memory is taking a
> long time, ~500ms. This is root caused to the memset() operations on the
> allocated memory which is consuming more cpu cycles.
> Due to this delay, we see that initial frames are being dropped.
> 
> To fix this, we have wrapped the default memset, done when allocating
> coherent memory, under the __GFP_ZERO flag. So, we only clear allocated
> memory if __GFP_ZERO flag is enabled. We believe this should be safe as the
> video decoder always writes before reading.
> This optimizes decoder initialization as we do not set the __GFP_ZERO flag
> when allocating memory for decoder. With this optimization, we don't see
> initial frame drops and decoder initialization time is ~100ms.
> 
> This patch adds plumbing through dma_alloc functions to pass gfp flag set by
> user to __dma_alloc_from_coherent(). Here gfp flag is checked for
> __GFP_ZERO. If present, we memset the buffer to 0 otherwise we skip
> memset.
> 
> Signed-off-by: Dylan Yip <dylan.yip@xilinx.com>
> ---
>  arch/arm/mm/dma-mapping-nommu.c |  2 +-
>  include/linux/dma-mapping.h     | 11 +++++++----
>  kernel/dma/coherent.c           | 15 +++++++++------
>  kernel/dma/mapping.c            |  2 +-
>  4 files changed, 18 insertions(+), 12 deletions(-)
> 
> diff --git a/arch/arm/mm/dma-mapping-nommu.c b/arch/arm/mm/dma-
> mapping-nommu.c index 52b8255..242b2c3 100644
> --- a/arch/arm/mm/dma-mapping-nommu.c
> +++ b/arch/arm/mm/dma-mapping-nommu.c
> @@ -35,7 +35,7 @@ static void *arm_nommu_dma_alloc(struct device *dev,
> size_t size,
>  				 unsigned long attrs)
> 
>  {
> -	void *ret = dma_alloc_from_global_coherent(size, dma_handle);
> +	void *ret = dma_alloc_from_global_coherent(size, dma_handle,
> gfp);
> 
>  	/*
>  	 * dma_alloc_from_global_coherent() may fail because:
> diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
> index f7d1eea..b715c9f 100644
> --- a/include/linux/dma-mapping.h
> +++ b/include/linux/dma-mapping.h
> @@ -160,24 +160,27 @@ static inline int is_device_dma_capable(struct
> device *dev)
>   * Don't use them in device drivers.
>   */
>  int dma_alloc_from_dev_coherent(struct device *dev, ssize_t size,
> -				       dma_addr_t *dma_handle, void **ret);
> +				       dma_addr_t *dma_handle, void **ret,
> +				       gfp_t flag);
>  int dma_release_from_dev_coherent(struct device *dev, int order, void
> *vaddr);
> 
>  int dma_mmap_from_dev_coherent(struct device *dev, struct
> vm_area_struct *vma,
>  			    void *cpu_addr, size_t size, int *ret);
> 
> -void *dma_alloc_from_global_coherent(ssize_t size, dma_addr_t
> *dma_handle);
> +void *dma_alloc_from_global_coherent(ssize_t size, dma_addr_t
> *dma_handle,
> +				     gfp_t flag);
>  int dma_release_from_global_coherent(int order, void *vaddr);  int
> dma_mmap_from_global_coherent(struct vm_area_struct *vma, void
> *cpu_addr,
>  				  size_t size, int *ret);
> 
>  #else
> -#define dma_alloc_from_dev_coherent(dev, size, handle, ret) (0)
> +#define dma_alloc_from_dev_coherent(dev, size, handle, ret, flag) (0)
>  #define dma_release_from_dev_coherent(dev, order, vaddr) (0)  #define
> dma_mmap_from_dev_coherent(dev, vma, vaddr, order, ret) (0)
> 
>  static inline void *dma_alloc_from_global_coherent(ssize_t size,
> -						   dma_addr_t *dma_handle)
> +						   dma_addr_t *dma_handle,
> +						   gfp_t flag)
>  {
>  	return NULL;
>  }
> diff --git a/kernel/dma/coherent.c b/kernel/dma/coherent.c index
> 29fd659..d85fab5 100644
> --- a/kernel/dma/coherent.c
> +++ b/kernel/dma/coherent.c
> @@ -136,7 +136,7 @@ void dma_release_declared_memory(struct device
> *dev)  EXPORT_SYMBOL(dma_release_declared_memory);
> 
>  static void *__dma_alloc_from_coherent(struct dma_coherent_mem
> *mem,
> -		ssize_t size, dma_addr_t *dma_handle)
> +		ssize_t size, dma_addr_t *dma_handle, gfp_t gfp_flag)
>  {
>  	int order = get_order(size);
>  	unsigned long flags;
> @@ -158,7 +158,8 @@ static void *__dma_alloc_from_coherent(struct
> dma_coherent_mem *mem,
>  	*dma_handle = mem->device_base + (pageno << PAGE_SHIFT);
>  	ret = mem->virt_base + (pageno << PAGE_SHIFT);
>  	spin_unlock_irqrestore(&mem->spinlock, flags);
> -	memset(ret, 0, size);
> +	if (gfp_flag & __GFP_ZERO)
> +		memset(ret, 0, size);
>  	return ret;
>  err:
>  	spin_unlock_irqrestore(&mem->spinlock, flags); @@ -172,6 +173,7
> @@ static void *__dma_alloc_from_coherent(struct dma_coherent_mem
> *mem,
>   * @dma_handle:	This will be filled with the correct dma handle
>   * @ret:	This pointer will be filled with the virtual address
>   *		to allocated area.
> + * @flag:      gfp flag set by user
>   *
>   * This function should be only called from per-arch dma_alloc_coherent()
>   * to support allocation from per-device coherent memory pools.
> @@ -180,24 +182,25 @@ static void *__dma_alloc_from_coherent(struct
> dma_coherent_mem *mem,
>   * generic memory areas, or !0 if dma_alloc_coherent should return @ret.
>   */
>  int dma_alloc_from_dev_coherent(struct device *dev, ssize_t size,
> -		dma_addr_t *dma_handle, void **ret)
> +		dma_addr_t *dma_handle, void **ret, gfp_t flag)
>  {
>  	struct dma_coherent_mem *mem =
> dev_get_coherent_memory(dev);
> 
>  	if (!mem)
>  		return 0;
> 
> -	*ret = __dma_alloc_from_coherent(mem, size, dma_handle);
> +	*ret = __dma_alloc_from_coherent(mem, size, dma_handle, flag);
>  	return 1;
>  }
> 
> -void *dma_alloc_from_global_coherent(ssize_t size, dma_addr_t
> *dma_handle)
> +void *dma_alloc_from_global_coherent(ssize_t size, dma_addr_t
> *dma_handle,
> +				     gfp_t flag)
>  {
>  	if (!dma_coherent_default_memory)
>  		return NULL;
> 
>  	return
> __dma_alloc_from_coherent(dma_coherent_default_memory, size,
> -			dma_handle);
> +			dma_handle, flag);
>  }
> 
>  static int __dma_release_from_coherent(struct dma_coherent_mem
> *mem, diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c index
> b0038ca..bfea1d2 100644
> --- a/kernel/dma/mapping.c
> +++ b/kernel/dma/mapping.c
> @@ -272,7 +272,7 @@ void *dma_alloc_attrs(struct device *dev, size_t size,
> dma_addr_t *dma_handle,
> 
>  	WARN_ON_ONCE(!dev->coherent_dma_mask);
> 
> -	if (dma_alloc_from_dev_coherent(dev, size, dma_handle,
> &cpu_addr))
> +	if (dma_alloc_from_dev_coherent(dev, size, dma_handle,
> &cpu_addr,
> +flag))
>  		return cpu_addr;
> 
>  	/* let the implementation decide on the zone to allocate from: */
> --
> 2.7.4


^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2019-09-17 20:39 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-09-17 20:21 [LINUX PATCH] dma-mapping: Control memset operation using gfp flags Dylan Yip
2019-09-17 20:39 ` Dylan Yip

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).