All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 2/2] staging: vc04_services: Replace dmac_map_area with dmac_map_sg
@ 2016-10-24  5:29 mzoran
  2016-10-24 13:24 ` Greg KH
  2016-10-24 17:31 ` Eric Anholt
  0 siblings, 2 replies; 9+ messages in thread
From: mzoran @ 2016-10-24  5:29 UTC (permalink / raw)
  To: gregkh; +Cc: mzoran, daniels, eric, noralf, popcornmix, devel, linux-kernel

From: Michael Zoran <mzoran@crowfest.net>

The original arm implementation uses dmac_map_area which is not
portable.  Replace it with an architecture neutral version
which uses dma_map_sg.

As you can see that for larger page sizes, the dma_map_sg
implementation is faster then the original unportable dma_map_area
implementation.  

Test                       dmac_map_area   dma_map_page dma_map_sg
vchiq_test -b 4 10000      51us/iter       76us/iter    76us
vchiq_test -b 8 10000      70us/iter       82us/iter    91us
vchiq_test -b 16 10000     94us/iter       118us/iter   121us
vchiq_test -b 32 10000     146us/iter      173us/iter   187us
vchiq_test -b 64 10000     263us/iter      328us/iter   299us
vchiq_test -b 128 10000    529us/iter      631us/iter   595us
vchiq_test -b 256 10000    2285us/iter     2275us/iter  2001us
vchiq_test -b 512 10000    4372us/iter     4616us/iter  4123us

For message sizes >= 64KB, dma_map_sg is faster then dma_map_page.

For message size >= 256KB, the dma_map_sg is the fastest
implementation.

Signed-off-by: Michael Zoran <mzoran@crowfest.net>
---
 .../interface/vchiq_arm/vchiq_2835_arm.c           | 153 ++++++++++++---------
 1 file changed, 91 insertions(+), 62 deletions(-)

diff --git a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c
index 98c6819..5ca66ee 100644
--- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c
+++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c
@@ -45,13 +45,8 @@
 #include <asm/pgtable.h>
 #include <soc/bcm2835/raspberrypi-firmware.h>
 
-#define dmac_map_area			__glue(_CACHE,_dma_map_area)
-#define dmac_unmap_area 		__glue(_CACHE,_dma_unmap_area)
-
 #define TOTAL_SLOTS (VCHIQ_SLOT_ZERO_SLOTS + 2 * 32)
 
-#define VCHIQ_ARM_ADDRESS(x) ((void *)((char *)x + g_virt_to_bus_offset))
-
 #include "vchiq_arm.h"
 #include "vchiq_2835.h"
 #include "vchiq_connected.h"
@@ -73,7 +68,7 @@ static unsigned int g_fragments_size;
 static char *g_fragments_base;
 static char *g_free_fragments;
 static struct semaphore g_free_fragments_sema;
-static unsigned long g_virt_to_bus_offset;
+static struct device *g_dev;
 
 extern int vchiq_arm_log_level;
 
@@ -84,10 +79,11 @@ vchiq_doorbell_irq(int irq, void *dev_id);
 
 static int
 create_pagelist(char __user *buf, size_t count, unsigned short type,
-                struct task_struct *task, PAGELIST_T ** ppagelist);
+		struct task_struct *task, PAGELIST_T **ppagelist,
+		dma_addr_t *dma_addr);
 
 static void
-free_pagelist(PAGELIST_T *pagelist, int actual);
+free_pagelist(dma_addr_t dma_addr, PAGELIST_T *pagelist, int actual);
 
 int vchiq_platform_init(struct platform_device *pdev, VCHIQ_STATE_T *state)
 {
@@ -101,8 +97,6 @@ int vchiq_platform_init(struct platform_device *pdev, VCHIQ_STATE_T *state)
 	int slot_mem_size, frag_mem_size;
 	int err, irq, i;
 
-	g_virt_to_bus_offset = virt_to_dma(dev, (void *)0);
-
 	(void)of_property_read_u32(dev->of_node, "cache-line-size",
 				   &g_cache_line_size);
 	g_fragments_size = 2 * g_cache_line_size;
@@ -118,7 +112,7 @@ int vchiq_platform_init(struct platform_device *pdev, VCHIQ_STATE_T *state)
 		return -ENOMEM;
 	}
 
-	WARN_ON(((int)slot_mem & (PAGE_SIZE - 1)) != 0);
+	WARN_ON(((unsigned long)slot_mem & (PAGE_SIZE - 1)) != 0);
 
 	vchiq_slot_zero = vchiq_init_slots(slot_mem, slot_mem_size);
 	if (!vchiq_slot_zero)
@@ -170,6 +164,7 @@ int vchiq_platform_init(struct platform_device *pdev, VCHIQ_STATE_T *state)
 		return err ? : -ENXIO;
 	}
 
+	g_dev = dev;
 	vchiq_log_info(vchiq_arm_log_level,
 		"vchiq_init - done (slots %pK, phys %pad)",
 		vchiq_slot_zero, &slot_phys);
@@ -233,6 +228,7 @@ vchiq_prepare_bulk_data(VCHIQ_BULK_T *bulk, VCHI_MEM_HANDLE_T memhandle,
 {
 	PAGELIST_T *pagelist;
 	int ret;
+	dma_addr_t dma_addr;
 
 	WARN_ON(memhandle != VCHI_MEM_HANDLE_INVALID);
 
@@ -241,12 +237,14 @@ vchiq_prepare_bulk_data(VCHIQ_BULK_T *bulk, VCHI_MEM_HANDLE_T memhandle,
 			? PAGELIST_READ
 			: PAGELIST_WRITE,
 			current,
-			&pagelist);
+			&pagelist,
+			&dma_addr);
+
 	if (ret != 0)
 		return VCHIQ_ERROR;
 
 	bulk->handle = memhandle;
-	bulk->data = VCHIQ_ARM_ADDRESS(pagelist);
+	bulk->data = (void *)(unsigned long)dma_addr;
 
 	/* Store the pagelist address in remote_data, which isn't used by the
 	   slave. */
@@ -259,7 +257,8 @@ void
 vchiq_complete_bulk(VCHIQ_BULK_T *bulk)
 {
 	if (bulk && bulk->remote_data && bulk->actual)
-		free_pagelist((PAGELIST_T *)bulk->remote_data, bulk->actual);
+		free_pagelist((dma_addr_t)(unsigned long)bulk->data,
+			      (PAGELIST_T *)bulk->remote_data, bulk->actual);
 }
 
 void
@@ -353,38 +352,44 @@ vchiq_doorbell_irq(int irq, void *dev_id)
 ** obscuring the new data underneath. We can solve this by transferring the
 ** partial cache lines separately, and allowing the ARM to copy into the
 ** cached area.
-
-** N.B. This implementation plays slightly fast and loose with the Linux
-** driver programming rules, e.g. its use of dmac_map_area instead of
-** dma_map_single, but it isn't a multi-platform driver and it benefits
-** from increased speed as a result.
 */
 
 static int
 create_pagelist(char __user *buf, size_t count, unsigned short type,
-	struct task_struct *task, PAGELIST_T ** ppagelist)
+		struct task_struct *task, PAGELIST_T **ppagelist,
+		dma_addr_t *dma_addr)
 {
 	PAGELIST_T *pagelist;
 	struct page **pages;
-	unsigned long *addrs;
-	unsigned int num_pages, offset, i;
-	char *addr, *base_addr, *next_addr;
-	int run, addridx, actual_pages;
+	u32 *addrs;
+	unsigned int num_pages, offset, i, j, k;
+	int actual_pages;
         unsigned long *need_release;
+	size_t pagelist_size;
+	struct scatterlist *scatterlist;
+	int dma_buffers;
+	int dir;
 
-	offset = (unsigned int)buf & (PAGE_SIZE - 1);
+	offset = ((unsigned int)(unsigned long)buf & (PAGE_SIZE - 1));
 	num_pages = (count + offset + PAGE_SIZE - 1) / PAGE_SIZE;
 
+	pagelist_size = sizeof(PAGELIST_T) +
+			(num_pages * sizeof(unsigned long)) +
+			sizeof(unsigned long) +
+			(num_pages * sizeof(pages[0]) +
+			(num_pages * sizeof(struct scatterlist)));
+
 	*ppagelist = NULL;
 
+	dir = (type == PAGELIST_WRITE) ? DMA_TO_DEVICE : DMA_FROM_DEVICE;
+
 	/* Allocate enough storage to hold the page pointers and the page
 	** list
 	*/
-	pagelist = kmalloc(sizeof(PAGELIST_T) +
-                           (num_pages * sizeof(unsigned long)) +
-                           sizeof(unsigned long) +
-                           (num_pages * sizeof(pages[0])),
-                           GFP_KERNEL);
+	pagelist = dma_zalloc_coherent(g_dev,
+				       pagelist_size,
+				       dma_addr,
+				       GFP_KERNEL);
 
 	vchiq_log_trace(vchiq_arm_log_level, "create_pagelist - %pK",
 			pagelist);
@@ -394,10 +399,9 @@ create_pagelist(char __user *buf, size_t count, unsigned short type,
 	addrs = pagelist->addrs;
         need_release = (unsigned long *)(addrs + num_pages);
 	pages = (struct page **)(addrs + num_pages + 1);
+	scatterlist = (struct scatterlist *)(pages + num_pages);
 
 	if (is_vmalloc_addr(buf)) {
-		int dir = (type == PAGELIST_WRITE) ?
-			DMA_TO_DEVICE : DMA_FROM_DEVICE;
 		unsigned long length = count;
 		unsigned int off = offset;
 
@@ -410,7 +414,6 @@ create_pagelist(char __user *buf, size_t count, unsigned short type,
 			if (bytes > length)
 				bytes = length;
 			pages[actual_pages] = pg;
-			dmac_map_area(page_address(pg) + off, bytes, dir);
 			length -= bytes;
 			off = 0;
 		}
@@ -438,7 +441,8 @@ create_pagelist(char __user *buf, size_t count, unsigned short type,
 				actual_pages--;
 				put_page(pages[actual_pages]);
 			}
-			kfree(pagelist);
+			dma_free_coherent(g_dev, pagelist_size,
+					  pagelist, *dma_addr);
 			if (actual_pages == 0)
 				actual_pages = -ENOMEM;
 			return actual_pages;
@@ -450,29 +454,38 @@ create_pagelist(char __user *buf, size_t count, unsigned short type,
 	pagelist->type = type;
 	pagelist->offset = offset;
 
-	/* Group the pages into runs of contiguous pages */
-
-	base_addr = VCHIQ_ARM_ADDRESS(page_address(pages[0]));
-	next_addr = base_addr + PAGE_SIZE;
-	addridx = 0;
-	run = 0;
-
-	for (i = 1; i < num_pages; i++) {
-		addr = VCHIQ_ARM_ADDRESS(page_address(pages[i]));
-		if ((addr == next_addr) && (run < (PAGE_SIZE - 1))) {
-			next_addr += PAGE_SIZE;
-			run++;
-		} else {
-			addrs[addridx] = (unsigned long)base_addr + run;
-			addridx++;
-			base_addr = addr;
-			next_addr = addr + PAGE_SIZE;
-			run = 0;
-		}
+	for (i = 0; i < num_pages; i++)
+		sg_set_page(scatterlist + i, pages[i], PAGE_SIZE, 0);
+
+	dma_buffers = dma_map_sg(g_dev,
+				 scatterlist,
+				 num_pages,
+				 dir);
+
+	if (dma_buffers == 0) {
+		dma_free_coherent(g_dev, pagelist_size,
+				  pagelist, *dma_addr);
+
+		return -EINTR;
 	}
 
-	addrs[addridx] = (unsigned long)base_addr + run;
-	addridx++;
+	k = 0;
+	for (i = 0; i < dma_buffers;) {
+		u32 section_length = 0;
+
+		for (j = i + 1; j < dma_buffers; j++) {
+			if (sg_dma_address(scatterlist + j) !=
+			    sg_dma_address(scatterlist + j - 1) + PAGE_SIZE) {
+				break;
+			}
+			section_length++;
+		}
+
+		addrs[k] = (u32)sg_dma_address(scatterlist + i) |
+				section_length;
+		i = j;
+		k++;
+	}
 
 	/* Partial cache lines (fragments) require special measures */
 	if ((type == PAGELIST_READ) &&
@@ -482,7 +495,10 @@ create_pagelist(char __user *buf, size_t count, unsigned short type,
 		char *fragments;
 
 		if (down_interruptible(&g_free_fragments_sema) != 0) {
-			kfree(pagelist);
+			dma_unmap_sg(g_dev, scatterlist, num_pages,
+				     DMA_BIDIRECTIONAL);
+			dma_free_coherent(g_dev, pagelist_size,
+					  pagelist, *dma_addr);
 			return -EINTR;
 		}
 
@@ -497,29 +513,42 @@ create_pagelist(char __user *buf, size_t count, unsigned short type,
 			(fragments - g_fragments_base) / g_fragments_size;
 	}
 
-	dmac_flush_range(pagelist, addrs + num_pages);
-
 	*ppagelist = pagelist;
 
 	return 0;
 }
 
 static void
-free_pagelist(PAGELIST_T *pagelist, int actual)
+free_pagelist(dma_addr_t dma_addr, PAGELIST_T *pagelist, int actual)
 {
         unsigned long *need_release;
 	struct page **pages;
 	unsigned int num_pages, i;
+	size_t pagelist_size;
+	struct scatterlist *scatterlist;
+	int dir;
 
 	vchiq_log_trace(vchiq_arm_log_level, "free_pagelist - %pK, %d",
 			pagelist, actual);
 
+	dir = (pagelist->type == PAGELIST_WRITE) ? DMA_TO_DEVICE :
+						   DMA_FROM_DEVICE;
+
 	num_pages =
 		(pagelist->length + pagelist->offset + PAGE_SIZE - 1) /
 		PAGE_SIZE;
 
+	pagelist_size = sizeof(PAGELIST_T) +
+			(num_pages * sizeof(unsigned long)) +
+			sizeof(unsigned long) +
+			(num_pages * sizeof(pages[0]) +
+			(num_pages * sizeof(struct scatterlist)));
+
         need_release = (unsigned long *)(pagelist->addrs + num_pages);
 	pages = (struct page **)(pagelist->addrs + num_pages + 1);
+	scatterlist = (struct scatterlist *)(pages + num_pages);
+
+	dma_unmap_sg(g_dev, scatterlist, num_pages, dir);
 
 	/* Deal with any partial cache lines (fragments) */
 	if (pagelist->type >= PAGELIST_READ_WITH_FRAGMENTS) {
@@ -569,8 +598,7 @@ free_pagelist(PAGELIST_T *pagelist, int actual)
 
 				if (bytes > length)
 					bytes = length;
-				dmac_unmap_area(page_address(pg) + offset,
-						bytes, DMA_FROM_DEVICE);
+
 				length -= bytes;
 				offset = 0;
 				set_page_dirty(pg);
@@ -579,5 +607,6 @@ free_pagelist(PAGELIST_T *pagelist, int actual)
 		}
 	}
 
-	kfree(pagelist);
+	dma_free_coherent(g_dev, pagelist_size,
+			  pagelist, dma_addr);
 }
-- 
2.9.3

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] staging: vc04_services: Replace dmac_map_area with dmac_map_sg
  2016-10-24  5:29 [PATCH 2/2] staging: vc04_services: Replace dmac_map_area with dmac_map_sg mzoran
@ 2016-10-24 13:24 ` Greg KH
  2016-10-24 14:01   ` Michael Zoran
  2016-10-24 16:56   ` Eric Anholt
  2016-10-24 17:31 ` Eric Anholt
  1 sibling, 2 replies; 9+ messages in thread
From: Greg KH @ 2016-10-24 13:24 UTC (permalink / raw)
  To: mzoran; +Cc: daniels, eric, noralf, popcornmix, devel, linux-kernel

On Sun, Oct 23, 2016 at 10:29:32PM -0700, mzoran@crowfest.net wrote:
> From: Michael Zoran <mzoran@crowfest.net>
> 
> The original arm implementation uses dmac_map_area which is not
> portable.  Replace it with an architecture neutral version
> which uses dma_map_sg.
> 
> As you can see that for larger page sizes, the dma_map_sg
> implementation is faster then the original unportable dma_map_area
> implementation.  
> 
> Test                       dmac_map_area   dma_map_page dma_map_sg
> vchiq_test -b 4 10000      51us/iter       76us/iter    76us
> vchiq_test -b 8 10000      70us/iter       82us/iter    91us
> vchiq_test -b 16 10000     94us/iter       118us/iter   121us
> vchiq_test -b 32 10000     146us/iter      173us/iter   187us
> vchiq_test -b 64 10000     263us/iter      328us/iter   299us
> vchiq_test -b 128 10000    529us/iter      631us/iter   595us
> vchiq_test -b 256 10000    2285us/iter     2275us/iter  2001us
> vchiq_test -b 512 10000    4372us/iter     4616us/iter  4123us
> 
> For message sizes >= 64KB, dma_map_sg is faster then dma_map_page.
> 
> For message size >= 256KB, the dma_map_sg is the fastest
> implementation.

What is the "normal" message size value when using this driver?

thanks,

greg k-h

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] staging: vc04_services: Replace dmac_map_area with dmac_map_sg
  2016-10-24 13:24 ` Greg KH
@ 2016-10-24 14:01   ` Michael Zoran
  2016-10-24 16:56   ` Eric Anholt
  1 sibling, 0 replies; 9+ messages in thread
From: Michael Zoran @ 2016-10-24 14:01 UTC (permalink / raw)
  To: Greg KH; +Cc: daniels, eric, noralf, popcornmix, devel, linux-kernel

On Mon, 2016-10-24 at 15:24 +0200, Greg KH wrote:
> On Sun, Oct 23, 2016 at 10:29:32PM -0700, mzoran@crowfest.net wrote:
> > From: Michael Zoran <mzoran@crowfest.net>
> > 
> > The original arm implementation uses dmac_map_area which is not
> > portable.  Replace it with an architecture neutral version
> > which uses dma_map_sg.
> > 
> > As you can see that for larger page sizes, the dma_map_sg
> > implementation is faster then the original unportable dma_map_area
> > implementation.  
> > 
> > Test                       dmac_map_area   dma_map_page dma_map_sg
> > vchiq_test -b 4 10000      51us/iter       76us/iter    76us
> > vchiq_test -b 8 10000      70us/iter       82us/iter    91us
> > vchiq_test -b 16 10000     94us/iter       118us/iter   121us
> > vchiq_test -b 32 10000     146us/iter      173us/iter   187us
> > vchiq_test -b 64 10000     263us/iter      328us/iter   299us
> > vchiq_test -b 128 10000    529us/iter      631us/iter   595us
> > vchiq_test -b 256 10000    2285us/iter     2275us/iter  2001us
> > vchiq_test -b 512 10000    4372us/iter     4616us/iter  4123us
> > 
> > For message sizes >= 64KB, dma_map_sg is faster then dma_map_page.
> > 
> > For message size >= 256KB, the dma_map_sg is the fastest
> > implementation.
> 
> What is the "normal" message size value when using this driver?
> 
> thanks,
> 
> greg k-h

I honestly have no idea.  From what I understand, the only code that
actually uses this code path is the closed source multimedia drivers
which I know nothing about.

Obviously, one approach would be to have the kernel collect data on
what the typical size is after running some benchmarks.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] staging: vc04_services: Replace dmac_map_area with dmac_map_sg
  2016-10-24 13:24 ` Greg KH
  2016-10-24 14:01   ` Michael Zoran
@ 2016-10-24 16:56   ` Eric Anholt
  1 sibling, 0 replies; 9+ messages in thread
From: Eric Anholt @ 2016-10-24 16:56 UTC (permalink / raw)
  To: Greg KH, mzoran; +Cc: daniels, noralf, popcornmix, devel, linux-kernel

[-- Attachment #1: Type: text/plain, Size: 1475 bytes --]

Greg KH <gregkh@linuxfoundation.org> writes:

> On Sun, Oct 23, 2016 at 10:29:32PM -0700, mzoran@crowfest.net wrote:
>> From: Michael Zoran <mzoran@crowfest.net>
>> 
>> The original arm implementation uses dmac_map_area which is not
>> portable.  Replace it with an architecture neutral version
>> which uses dma_map_sg.
>> 
>> As you can see that for larger page sizes, the dma_map_sg
>> implementation is faster then the original unportable dma_map_area
>> implementation.  
>> 
>> Test                       dmac_map_area   dma_map_page dma_map_sg
>> vchiq_test -b 4 10000      51us/iter       76us/iter    76us
>> vchiq_test -b 8 10000      70us/iter       82us/iter    91us
>> vchiq_test -b 16 10000     94us/iter       118us/iter   121us
>> vchiq_test -b 32 10000     146us/iter      173us/iter   187us
>> vchiq_test -b 64 10000     263us/iter      328us/iter   299us
>> vchiq_test -b 128 10000    529us/iter      631us/iter   595us
>> vchiq_test -b 256 10000    2285us/iter     2275us/iter  2001us
>> vchiq_test -b 512 10000    4372us/iter     4616us/iter  4123us
>> 
>> For message sizes >= 64KB, dma_map_sg is faster then dma_map_page.
>> 
>> For message size >= 256KB, the dma_map_sg is the fastest
>> implementation.
>
> What is the "normal" message size value when using this driver?

For the contents of a bulk transfer, the performance case you care about
is moving an image (video decode, camera, etc.), so on the order of 1MB.

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 800 bytes --]

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] staging: vc04_services: Replace dmac_map_area with dmac_map_sg
  2016-10-24  5:29 [PATCH 2/2] staging: vc04_services: Replace dmac_map_area with dmac_map_sg mzoran
  2016-10-24 13:24 ` Greg KH
@ 2016-10-24 17:31 ` Eric Anholt
  2016-10-25 15:00   ` Michael Zoran
  1 sibling, 1 reply; 9+ messages in thread
From: Eric Anholt @ 2016-10-24 17:31 UTC (permalink / raw)
  To: mzoran, gregkh; +Cc: mzoran, daniels, noralf, popcornmix, devel, linux-kernel

[-- Attachment #1: Type: text/plain, Size: 10759 bytes --]

mzoran@crowfest.net writes:

> From: Michael Zoran <mzoran@crowfest.net>
>
> The original arm implementation uses dmac_map_area which is not
> portable.  Replace it with an architecture neutral version
> which uses dma_map_sg.
>
> As you can see that for larger page sizes, the dma_map_sg
> implementation is faster then the original unportable dma_map_area
> implementation.  
>
> Test                       dmac_map_area   dma_map_page dma_map_sg
> vchiq_test -b 4 10000      51us/iter       76us/iter    76us
> vchiq_test -b 8 10000      70us/iter       82us/iter    91us
> vchiq_test -b 16 10000     94us/iter       118us/iter   121us
> vchiq_test -b 32 10000     146us/iter      173us/iter   187us
> vchiq_test -b 64 10000     263us/iter      328us/iter   299us
> vchiq_test -b 128 10000    529us/iter      631us/iter   595us
> vchiq_test -b 256 10000    2285us/iter     2275us/iter  2001us
> vchiq_test -b 512 10000    4372us/iter     4616us/iter  4123us
>
> For message sizes >= 64KB, dma_map_sg is faster then dma_map_page.
>
> For message size >= 256KB, the dma_map_sg is the fastest
> implementation.
>
> Signed-off-by: Michael Zoran <mzoran@crowfest.net>
> ---
>  .../interface/vchiq_arm/vchiq_2835_arm.c           | 153 ++++++++++++---------
>  1 file changed, 91 insertions(+), 62 deletions(-)
>
> diff --git a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c
> index 98c6819..5ca66ee 100644
> --- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c
> +++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c
> @@ -45,13 +45,8 @@
>  #include <asm/pgtable.h>
>  #include <soc/bcm2835/raspberrypi-firmware.h>
>  
> -#define dmac_map_area			__glue(_CACHE,_dma_map_area)
> -#define dmac_unmap_area 		__glue(_CACHE,_dma_unmap_area)
> -
>  #define TOTAL_SLOTS (VCHIQ_SLOT_ZERO_SLOTS + 2 * 32)
>  
> -#define VCHIQ_ARM_ADDRESS(x) ((void *)((char *)x + g_virt_to_bus_offset))
> -
>  #include "vchiq_arm.h"
>  #include "vchiq_2835.h"
>  #include "vchiq_connected.h"
> @@ -73,7 +68,7 @@ static unsigned int g_fragments_size;
>  static char *g_fragments_base;
>  static char *g_free_fragments;
>  static struct semaphore g_free_fragments_sema;
> -static unsigned long g_virt_to_bus_offset;
> +static struct device *g_dev;
>  
>  extern int vchiq_arm_log_level;
>  
> @@ -84,10 +79,11 @@ vchiq_doorbell_irq(int irq, void *dev_id);
>  
>  static int
>  create_pagelist(char __user *buf, size_t count, unsigned short type,
> -                struct task_struct *task, PAGELIST_T ** ppagelist);
> +		struct task_struct *task, PAGELIST_T **ppagelist,
> +		dma_addr_t *dma_addr);
>  
>  static void
> -free_pagelist(PAGELIST_T *pagelist, int actual);
> +free_pagelist(dma_addr_t dma_addr, PAGELIST_T *pagelist, int actual);
>  
>  int vchiq_platform_init(struct platform_device *pdev, VCHIQ_STATE_T *state)
>  {
> @@ -101,8 +97,6 @@ int vchiq_platform_init(struct platform_device *pdev, VCHIQ_STATE_T *state)
>  	int slot_mem_size, frag_mem_size;
>  	int err, irq, i;
>  
> -	g_virt_to_bus_offset = virt_to_dma(dev, (void *)0);
> -
>  	(void)of_property_read_u32(dev->of_node, "cache-line-size",
>  				   &g_cache_line_size);
>  	g_fragments_size = 2 * g_cache_line_size;
> @@ -118,7 +112,7 @@ int vchiq_platform_init(struct platform_device *pdev, VCHIQ_STATE_T *state)
>  		return -ENOMEM;
>  	}
>  
> -	WARN_ON(((int)slot_mem & (PAGE_SIZE - 1)) != 0);
> +	WARN_ON(((unsigned long)slot_mem & (PAGE_SIZE - 1)) != 0);
>  
>  	vchiq_slot_zero = vchiq_init_slots(slot_mem, slot_mem_size);
>  	if (!vchiq_slot_zero)
> @@ -170,6 +164,7 @@ int vchiq_platform_init(struct platform_device *pdev, VCHIQ_STATE_T *state)
>  		return err ? : -ENXIO;
>  	}
>  
> +	g_dev = dev;
>  	vchiq_log_info(vchiq_arm_log_level,
>  		"vchiq_init - done (slots %pK, phys %pad)",
>  		vchiq_slot_zero, &slot_phys);
> @@ -233,6 +228,7 @@ vchiq_prepare_bulk_data(VCHIQ_BULK_T *bulk, VCHI_MEM_HANDLE_T memhandle,
>  {
>  	PAGELIST_T *pagelist;
>  	int ret;
> +	dma_addr_t dma_addr;
>  
>  	WARN_ON(memhandle != VCHI_MEM_HANDLE_INVALID);
>  
> @@ -241,12 +237,14 @@ vchiq_prepare_bulk_data(VCHIQ_BULK_T *bulk, VCHI_MEM_HANDLE_T memhandle,
>  			? PAGELIST_READ
>  			: PAGELIST_WRITE,
>  			current,
> -			&pagelist);
> +			&pagelist,
> +			&dma_addr);
> +
>  	if (ret != 0)
>  		return VCHIQ_ERROR;
>  
>  	bulk->handle = memhandle;
> -	bulk->data = VCHIQ_ARM_ADDRESS(pagelist);
> +	bulk->data = (void *)(unsigned long)dma_addr;
>  
>  	/* Store the pagelist address in remote_data, which isn't used by the
>  	   slave. */
> @@ -259,7 +257,8 @@ void
>  vchiq_complete_bulk(VCHIQ_BULK_T *bulk)
>  {
>  	if (bulk && bulk->remote_data && bulk->actual)
> -		free_pagelist((PAGELIST_T *)bulk->remote_data, bulk->actual);
> +		free_pagelist((dma_addr_t)(unsigned long)bulk->data,
> +			      (PAGELIST_T *)bulk->remote_data, bulk->actual);
>  }
>  
>  void
> @@ -353,38 +352,44 @@ vchiq_doorbell_irq(int irq, void *dev_id)
>  ** obscuring the new data underneath. We can solve this by transferring the
>  ** partial cache lines separately, and allowing the ARM to copy into the
>  ** cached area.
> -
> -** N.B. This implementation plays slightly fast and loose with the Linux
> -** driver programming rules, e.g. its use of dmac_map_area instead of
> -** dma_map_single, but it isn't a multi-platform driver and it benefits
> -** from increased speed as a result.
>  */
>  
>  static int
>  create_pagelist(char __user *buf, size_t count, unsigned short type,
> -	struct task_struct *task, PAGELIST_T ** ppagelist)
> +		struct task_struct *task, PAGELIST_T **ppagelist,
> +		dma_addr_t *dma_addr)
>  {
>  	PAGELIST_T *pagelist;
>  	struct page **pages;
> -	unsigned long *addrs;
> -	unsigned int num_pages, offset, i;
> -	char *addr, *base_addr, *next_addr;
> -	int run, addridx, actual_pages;
> +	u32 *addrs;
> +	unsigned int num_pages, offset, i, j, k;
> +	int actual_pages;
>          unsigned long *need_release;
> +	size_t pagelist_size;
> +	struct scatterlist *scatterlist;
> +	int dma_buffers;
> +	int dir;
>  
> -	offset = (unsigned int)buf & (PAGE_SIZE - 1);
> +	offset = ((unsigned int)(unsigned long)buf & (PAGE_SIZE - 1));
>  	num_pages = (count + offset + PAGE_SIZE - 1) / PAGE_SIZE;
>  
> +	pagelist_size = sizeof(PAGELIST_T) +
> +			(num_pages * sizeof(unsigned long)) +
> +			sizeof(unsigned long) +
> +			(num_pages * sizeof(pages[0]) +
> +			(num_pages * sizeof(struct scatterlist)));
> +
>  	*ppagelist = NULL;
>  
> +	dir = (type == PAGELIST_WRITE) ? DMA_TO_DEVICE : DMA_FROM_DEVICE;
> +
>  	/* Allocate enough storage to hold the page pointers and the page
>  	** list
>  	*/
> -	pagelist = kmalloc(sizeof(PAGELIST_T) +
> -                           (num_pages * sizeof(unsigned long)) +
> -                           sizeof(unsigned long) +
> -                           (num_pages * sizeof(pages[0])),
> -                           GFP_KERNEL);
> +	pagelist = dma_zalloc_coherent(g_dev,
> +				       pagelist_size,
> +				       dma_addr,
> +				       GFP_KERNEL);
>  
>  	vchiq_log_trace(vchiq_arm_log_level, "create_pagelist - %pK",
>  			pagelist);
> @@ -394,10 +399,9 @@ create_pagelist(char __user *buf, size_t count, unsigned short type,
>  	addrs = pagelist->addrs;
>          need_release = (unsigned long *)(addrs + num_pages);
>  	pages = (struct page **)(addrs + num_pages + 1);
> +	scatterlist = (struct scatterlist *)(pages + num_pages);
>  
>  	if (is_vmalloc_addr(buf)) {
> -		int dir = (type == PAGELIST_WRITE) ?
> -			DMA_TO_DEVICE : DMA_FROM_DEVICE;
>  		unsigned long length = count;
>  		unsigned int off = offset;
>  
> @@ -410,7 +414,6 @@ create_pagelist(char __user *buf, size_t count, unsigned short type,
>  			if (bytes > length)
>  				bytes = length;
>  			pages[actual_pages] = pg;
> -			dmac_map_area(page_address(pg) + off, bytes, dir);
>  			length -= bytes;
>  			off = 0;
>  		}
> @@ -438,7 +441,8 @@ create_pagelist(char __user *buf, size_t count, unsigned short type,
>  				actual_pages--;
>  				put_page(pages[actual_pages]);
>  			}
> -			kfree(pagelist);
> +			dma_free_coherent(g_dev, pagelist_size,
> +					  pagelist, *dma_addr);
>  			if (actual_pages == 0)
>  				actual_pages = -ENOMEM;
>  			return actual_pages;
> @@ -450,29 +454,38 @@ create_pagelist(char __user *buf, size_t count, unsigned short type,
>  	pagelist->type = type;
>  	pagelist->offset = offset;
>  
> -	/* Group the pages into runs of contiguous pages */
> -
> -	base_addr = VCHIQ_ARM_ADDRESS(page_address(pages[0]));
> -	next_addr = base_addr + PAGE_SIZE;
> -	addridx = 0;
> -	run = 0;
> -
> -	for (i = 1; i < num_pages; i++) {
> -		addr = VCHIQ_ARM_ADDRESS(page_address(pages[i]));
> -		if ((addr == next_addr) && (run < (PAGE_SIZE - 1))) {
> -			next_addr += PAGE_SIZE;
> -			run++;
> -		} else {
> -			addrs[addridx] = (unsigned long)base_addr + run;
> -			addridx++;
> -			base_addr = addr;
> -			next_addr = addr + PAGE_SIZE;
> -			run = 0;
> -		}
> +	for (i = 0; i < num_pages; i++)
> +		sg_set_page(scatterlist + i, pages[i], PAGE_SIZE, 0);
> +
> +	dma_buffers = dma_map_sg(g_dev,
> +				 scatterlist,
> +				 num_pages,
> +				 dir);
> +
> +	if (dma_buffers == 0) {
> +		dma_free_coherent(g_dev, pagelist_size,
> +				  pagelist, *dma_addr);
> +
> +		return -EINTR;
>  	}
>  
> -	addrs[addridx] = (unsigned long)base_addr + run;
> -	addridx++;
> +	k = 0;
> +	for (i = 0; i < dma_buffers;) {
> +		u32 section_length = 0;
> +
> +		for (j = i + 1; j < dma_buffers; j++) {
> +			if (sg_dma_address(scatterlist + j) !=
> +			    sg_dma_address(scatterlist + j - 1) + PAGE_SIZE) {
> +				break;
> +			}
> +			section_length++;
> +		}
> +
> +		addrs[k] = (u32)sg_dma_address(scatterlist + i) |
> +				section_length;

It looks like scatterlist may not be just an array, so I think this
whole loop wants to be something like:

for_each_sg(scatterlist, sg, num_pages, i) {
	u32 len = sg_dma_len(sg);
        u32 addr = sg_dma_address(sg);

        if (k > 0 && addrs[k - 1] & PAGE_MASK +
            (addrs[k - 1] & ~PAGE_MASK) << PAGE_SHIFT) == addr) {
        	addrs[k - 1] += len;
        } else {
        	addrs[k++] = addr | len;
        }
}

Note the use of sg_dma_len(), since sg entries may be more than one
page.  I don't think any merging is actually happening on our platform
currently, thus why I left the inner loop.

Thanks for taking on doing this conversion!  This code's going to be so
much nicer when you're done.

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 800 bytes --]

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] staging: vc04_services: Replace dmac_map_area with dmac_map_sg
  2016-10-24 17:31 ` Eric Anholt
@ 2016-10-25 15:00   ` Michael Zoran
  2016-10-25 15:07     ` Michael Zoran
  0 siblings, 1 reply; 9+ messages in thread
From: Michael Zoran @ 2016-10-25 15:00 UTC (permalink / raw)
  To: Eric Anholt, gregkh; +Cc: daniels, noralf, popcornmix, devel, linux-kernel

On Mon, 2016-10-24 at 10:31 -0700, Eric Anholt wrote:
> mzoran@crowfest.net writes:
> 
> >  */
> >  
> >  static int
> >  create_pagelist(char __user *buf, size_t count, unsigned short
> > type,
> > -	struct task_struct *task, PAGELIST_T ** ppagelist)
> > +		struct task_struct *task, PAGELIST_T **ppagelist,
> > +		dma_addr_t *dma_addr)
> >  {
> >  	PAGELIST_T *pagelist;
> >  	struct page **pages;
> > -	unsigned long *addrs;
> > -	unsigned int num_pages, offset, i;
> > -	char *addr, *base_addr, *next_addr;
> > -	int run, addridx, actual_pages;
> > +	u32 *addrs;
> > +	unsigned int num_pages, offset, i, j, k;
> > +	int actual_pages;
> >          unsigned long *need_release;
> > +	size_t pagelist_size;
> > +	struct scatterlist *scatterlist;
> > +	int dma_buffers;
> > +	int dir;
> >  
> > -	offset = (unsigned int)buf & (PAGE_SIZE - 1);
> > +	offset = ((unsigned int)(unsigned long)buf & (PAGE_SIZE -
> > 1));
> >  	num_pages = (count + offset + PAGE_SIZE - 1) / PAGE_SIZE;
> >  
> > +	pagelist_size = sizeof(PAGELIST_T) +
> > +			(num_pages * sizeof(unsigned long)) +
> > +			sizeof(unsigned long) +
> > +			(num_pages * sizeof(pages[0]) +
> > +			(num_pages * sizeof(struct scatterlist)));
> > +
> >  	*ppagelist = NULL;
> >  
> > +	dir = (type == PAGELIST_WRITE) ? DMA_TO_DEVICE :
> > DMA_FROM_DEVICE;
> > +
> >  	/* Allocate enough storage to hold the page pointers and
> > the page
> >  	** list
> >  	*/
> > -	pagelist = kmalloc(sizeof(PAGELIST_T) +
> > -                           (num_pages * sizeof(unsigned long)) +
> > -                           sizeof(unsigned long) +
> > -                           (num_pages * sizeof(pages[0])),
> > -                           GFP_KERNEL);
> > +	pagelist = dma_zalloc_coherent(g_dev,
> > +				       pagelist_size,
> > +				       dma_addr,
> > +				       GFP_KERNEL);
> >  
> >  	vchiq_log_trace(vchiq_arm_log_level, "create_pagelist -
> > %pK",
> >  			pagelist);
> > @@ -394,10 +399,9 @@ create_pagelist(char __user *buf, size_t
> > count, unsigned short type,
> >  	addrs = pagelist->addrs;
> >          need_release = (unsigned long *)(addrs + num_pages);
> >  	pages = (struct page **)(addrs + num_pages + 1);
> > +	scatterlist = (struct scatterlist *)(pages + num_pages);
> >  
> >  	if (is_vmalloc_addr(buf)) {
> > -		int dir = (type == PAGELIST_WRITE) ?
> > -			DMA_TO_DEVICE : DMA_FROM_DEVICE;
> >  		unsigned long length = count;
> >  		unsigned int off = offset;
> >  
> > @@ -410,7 +414,6 @@ create_pagelist(char __user *buf, size_t count,
> > unsigned short type,
> >  			if (bytes > length)
> >  				bytes = length;
> >  			pages[actual_pages] = pg;
> > -			dmac_map_area(page_address(pg) + off,
> > bytes, dir);
> >  			length -= bytes;
> >  			off = 0;
> >  		}
> > @@ -438,7 +441,8 @@ create_pagelist(char __user *buf, size_t count,
> > unsigned short type,
> >  				actual_pages--;
> >  				put_page(pages[actual_pages]);
> >  			}
> > -			kfree(pagelist);
> > +			dma_free_coherent(g_dev, pagelist_size,
> > +					  pagelist, *dma_addr);
> >  			if (actual_pages == 0)
> >  				actual_pages = -ENOMEM;
> >  			return actual_pages;
> > @@ -450,29 +454,38 @@ create_pagelist(char __user *buf, size_t
> > count, unsigned short type,
> >  	pagelist->type = type;
> >  	pagelist->offset = offset;
> >  
> > -	/* Group the pages into runs of contiguous pages */
> > -
> > -	base_addr = VCHIQ_ARM_ADDRESS(page_address(pages[0]));
> > -	next_addr = base_addr + PAGE_SIZE;
> > -	addridx = 0;
> > -	run = 0;
> > -
> > -	for (i = 1; i < num_pages; i++) {
> > -		addr = VCHIQ_ARM_ADDRESS(page_address(pages[i]));
> > -		if ((addr == next_addr) && (run < (PAGE_SIZE -
> > 1))) {
> > -			next_addr += PAGE_SIZE;
> > -			run++;
> > -		} else {
> > -			addrs[addridx] = (unsigned long)base_addr
> > + run;
> > -			addridx++;
> > -			base_addr = addr;
> > -			next_addr = addr + PAGE_SIZE;
> > -			run = 0;
> > -		}
> > +	for (i = 0; i < num_pages; i++)
> > +		sg_set_page(scatterlist + i, pages[i], PAGE_SIZE,
> > 0);
> > +
> > +	dma_buffers = dma_map_sg(g_dev,
> > +				 scatterlist,
> > +				 num_pages,
> > +				 dir);
> > +
> > +	if (dma_buffers == 0) {
> > +		dma_free_coherent(g_dev, pagelist_size,
> > +				  pagelist, *dma_addr);
> > +
> > +		return -EINTR;
> >  	}
> >  
> > -	addrs[addridx] = (unsigned long)base_addr + run;
> > -	addridx++;
> > +	k = 0;
> > +	for (i = 0; i < dma_buffers;) {
> > +		u32 section_length = 0;
> > +
> > +		for (j = i + 1; j < dma_buffers; j++) {
> > +			if (sg_dma_address(scatterlist + j) !=
> > +			    sg_dma_address(scatterlist + j - 1) +
> > PAGE_SIZE) {
> > +				break;
> > +			}
> > +			section_length++;
> > +		}
> > +
> > +		addrs[k] = (u32)sg_dma_address(scatterlist + i) |
> > +				section_length;
> 
> It looks like scatterlist may not be just an array, so I think this
> whole loop wants to be something like:
> 
> for_each_sg(scatterlist, sg, num_pages, i) {
> 	u32 len = sg_dma_len(sg);
>         u32 addr = sg_dma_address(sg);
> 
>         if (k > 0 && addrs[k - 1] & PAGE_MASK +
>             (addrs[k - 1] & ~PAGE_MASK) << PAGE_SHIFT) == addr) {
>         	addrs[k - 1] += len;
>         } else {
>         	addrs[k++] = addr | len;
>         }
> }
> 
> Note the use of sg_dma_len(), since sg entries may be more than one
> page.  I don't think any merging is actually happening on our
> platform
> currently, thus why I left the inner loop.
> 
> Thanks for taking on doing this conversion!  This code's going to be
> so
> much nicer when you're done.

Thanks for looking at this.  

While I understand the use of sg_dma_len, I don't understand totally
the need for "for_each_sg". The scatterlist can be part of a scatter
table with all kinds of complex chaining, but in this case it is
directly allocated as an array at the top of the function.

Also note that the addrs[] list is passed to the firmware and it
requires all the items of the list be paged aligned and be a multiple
of the page size.  So I'm also a bit confused about the need for
handling scatterlists that are not page aligned or a multiple of pages.



 

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] staging: vc04_services: Replace dmac_map_area with dmac_map_sg
  2016-10-25 15:00   ` Michael Zoran
@ 2016-10-25 15:07     ` Michael Zoran
  2016-10-25 16:16       ` Eric Anholt
  0 siblings, 1 reply; 9+ messages in thread
From: Michael Zoran @ 2016-10-25 15:07 UTC (permalink / raw)
  To: Eric Anholt, gregkh; +Cc: daniels, noralf, popcornmix, devel, linux-kernel

On Tue, 2016-10-25 at 08:00 -0700, Michael Zoran wrote:
> On Mon, 2016-10-24 at 10:31 -0700, Eric Anholt wrote:
> > mzoran@crowfest.net writes:
> > 
> > >  */
> > >  
> > >  static int
> > >  create_pagelist(char __user *buf, size_t count, unsigned short
> > > type,
> > > -	struct task_struct *task, PAGELIST_T ** ppagelist)
> > > +		struct task_struct *task, PAGELIST_T
> > > **ppagelist,
> > > +		dma_addr_t *dma_addr)
> > >  {
> > >  	PAGELIST_T *pagelist;
> > >  	struct page **pages;
> > > -	unsigned long *addrs;
> > > -	unsigned int num_pages, offset, i;
> > > -	char *addr, *base_addr, *next_addr;
> > > -	int run, addridx, actual_pages;
> > > +	u32 *addrs;
> > > +	unsigned int num_pages, offset, i, j, k;
> > > +	int actual_pages;
> > >          unsigned long *need_release;
> > > +	size_t pagelist_size;
> > > +	struct scatterlist *scatterlist;
> > > +	int dma_buffers;
> > > +	int dir;
> > >  
> > > -	offset = (unsigned int)buf & (PAGE_SIZE - 1);
> > > +	offset = ((unsigned int)(unsigned long)buf & (PAGE_SIZE
> > > -
> > > 1));
> > >  	num_pages = (count + offset + PAGE_SIZE - 1) /
> > > PAGE_SIZE;
> > >  
> > > +	pagelist_size = sizeof(PAGELIST_T) +
> > > +			(num_pages * sizeof(unsigned long)) +
> > > +			sizeof(unsigned long) +
> > > +			(num_pages * sizeof(pages[0]) +
> > > +			(num_pages * sizeof(struct
> > > scatterlist)));
> > > +
> > >  	*ppagelist = NULL;
> > >  
> > > +	dir = (type == PAGELIST_WRITE) ? DMA_TO_DEVICE :
> > > DMA_FROM_DEVICE;
> > > +
> > >  	/* Allocate enough storage to hold the page pointers and
> > > the page
> > >  	** list
> > >  	*/
> > > -	pagelist = kmalloc(sizeof(PAGELIST_T) +
> > > -                           (num_pages * sizeof(unsigned long)) +
> > > -                           sizeof(unsigned long) +
> > > -                           (num_pages * sizeof(pages[0])),
> > > -                           GFP_KERNEL);
> > > +	pagelist = dma_zalloc_coherent(g_dev,
> > > +				       pagelist_size,
> > > +				       dma_addr,
> > > +				       GFP_KERNEL);
> > >  
> > >  	vchiq_log_trace(vchiq_arm_log_level, "create_pagelist -
> > > %pK",
> > >  			pagelist);
> > > @@ -394,10 +399,9 @@ create_pagelist(char __user *buf, size_t
> > > count, unsigned short type,
> > >  	addrs = pagelist->addrs;
> > >          need_release = (unsigned long *)(addrs + num_pages);
> > >  	pages = (struct page **)(addrs + num_pages + 1);
> > > +	scatterlist = (struct scatterlist *)(pages + num_pages);
> > >  
> > >  	if (is_vmalloc_addr(buf)) {
> > > -		int dir = (type == PAGELIST_WRITE) ?
> > > -			DMA_TO_DEVICE : DMA_FROM_DEVICE;
> > >  		unsigned long length = count;
> > >  		unsigned int off = offset;
> > >  
> > > @@ -410,7 +414,6 @@ create_pagelist(char __user *buf, size_t
> > > count,
> > > unsigned short type,
> > >  			if (bytes > length)
> > >  				bytes = length;
> > >  			pages[actual_pages] = pg;
> > > -			dmac_map_area(page_address(pg) + off,
> > > bytes, dir);
> > >  			length -= bytes;
> > >  			off = 0;
> > >  		}
> > > @@ -438,7 +441,8 @@ create_pagelist(char __user *buf, size_t
> > > count,
> > > unsigned short type,
> > >  				actual_pages--;
> > >  				put_page(pages[actual_pages]);
> > >  			}
> > > -			kfree(pagelist);
> > > +			dma_free_coherent(g_dev, pagelist_size,
> > > +					  pagelist, *dma_addr);
> > >  			if (actual_pages == 0)
> > >  				actual_pages = -ENOMEM;
> > >  			return actual_pages;
> > > @@ -450,29 +454,38 @@ create_pagelist(char __user *buf, size_t
> > > count, unsigned short type,
> > >  	pagelist->type = type;
> > >  	pagelist->offset = offset;
> > >  
> > > -	/* Group the pages into runs of contiguous pages */
> > > -
> > > -	base_addr = VCHIQ_ARM_ADDRESS(page_address(pages[0]));
> > > -	next_addr = base_addr + PAGE_SIZE;
> > > -	addridx = 0;
> > > -	run = 0;
> > > -
> > > -	for (i = 1; i < num_pages; i++) {
> > > -		addr =
> > > VCHIQ_ARM_ADDRESS(page_address(pages[i]));
> > > -		if ((addr == next_addr) && (run < (PAGE_SIZE -
> > > 1))) {
> > > -			next_addr += PAGE_SIZE;
> > > -			run++;
> > > -		} else {
> > > -			addrs[addridx] = (unsigned
> > > long)base_addr
> > > + run;
> > > -			addridx++;
> > > -			base_addr = addr;
> > > -			next_addr = addr + PAGE_SIZE;
> > > -			run = 0;
> > > -		}
> > > +	for (i = 0; i < num_pages; i++)
> > > +		sg_set_page(scatterlist + i, pages[i],
> > > PAGE_SIZE,
> > > 0);
> > > +
> > > +	dma_buffers = dma_map_sg(g_dev,
> > > +				 scatterlist,
> > > +				 num_pages,
> > > +				 dir);
> > > +
> > > +	if (dma_buffers == 0) {
> > > +		dma_free_coherent(g_dev, pagelist_size,
> > > +				  pagelist, *dma_addr);
> > > +
> > > +		return -EINTR;
> > >  	}
> > >  
> > > -	addrs[addridx] = (unsigned long)base_addr + run;
> > > -	addridx++;
> > > +	k = 0;
> > > +	for (i = 0; i < dma_buffers;) {
> > > +		u32 section_length = 0;
> > > +
> > > +		for (j = i + 1; j < dma_buffers; j++) {
> > > +			if (sg_dma_address(scatterlist + j) !=
> > > +			    sg_dma_address(scatterlist + j - 1)
> > > +
> > > PAGE_SIZE) {
> > > +				break;
> > > +			}
> > > +			section_length++;
> > > +		}
> > > +
> > > +		addrs[k] = (u32)sg_dma_address(scatterlist + i)
> > > |
> > > +				section_length;
> > 
> > It looks like scatterlist may not be just an array, so I think this
> > whole loop wants to be something like:
> > 
> > for_each_sg(scatterlist, sg, num_pages, i) {
> > 	u32 len = sg_dma_len(sg);
> >         u32 addr = sg_dma_address(sg);
> > 
> >         if (k > 0 && addrs[k - 1] & PAGE_MASK +
> >             (addrs[k - 1] & ~PAGE_MASK) << PAGE_SHIFT) == addr) {
> >         	addrs[k - 1] += len;
> >         } else {
> >         	addrs[k++] = addr | len;
> >         }
> > }
> > 
> > Note the use of sg_dma_len(), since sg entries may be more than one
> > page.  I don't think any merging is actually happening on our
> > platform
> > currently, thus why I left the inner loop.
> > 
> > Thanks for taking on doing this conversion!  This code's going to
> > be
> > so
> > much nicer when you're done.
> 
> Thanks for looking at this.  
> 
> While I understand the use of sg_dma_len, I don't understand totally
> the need for "for_each_sg". The scatterlist can be part of a scatter
> table with all kinds of complex chaining, but in this case it is
> directly allocated as an array at the top of the function.
> 
> Also note that the addrs[] list is passed to the firmware and it
> requires all the items of the list be paged aligned and be a multiple
> of the page size.  So I'm also a bit confused about the need for
> handling scatterlists that are not page aligned or a multiple of
> pages.
> 
> 

Sorry, but I forgot to add that the addrs list is a address anded with
a page count, not a byte count.   The example you have sent appears at
a glance to look like it is anding a byte count with the address.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] staging: vc04_services: Replace dmac_map_area with dmac_map_sg
  2016-10-25 15:07     ` Michael Zoran
@ 2016-10-25 16:16       ` Eric Anholt
  2016-10-25 16:30         ` Michael Zoran
  0 siblings, 1 reply; 9+ messages in thread
From: Eric Anholt @ 2016-10-25 16:16 UTC (permalink / raw)
  To: Michael Zoran, gregkh; +Cc: daniels, noralf, popcornmix, devel, linux-kernel

[-- Attachment #1: Type: text/plain, Size: 8000 bytes --]

Michael Zoran <mzoran@crowfest.net> writes:

> On Tue, 2016-10-25 at 08:00 -0700, Michael Zoran wrote:
>> On Mon, 2016-10-24 at 10:31 -0700, Eric Anholt wrote:
>> > mzoran@crowfest.net writes:
>> > 
>> > >  */
>> > >  
>> > >  static int
>> > >  create_pagelist(char __user *buf, size_t count, unsigned short
>> > > type,
>> > > -	struct task_struct *task, PAGELIST_T ** ppagelist)
>> > > +		struct task_struct *task, PAGELIST_T
>> > > **ppagelist,
>> > > +		dma_addr_t *dma_addr)
>> > >  {
>> > >  	PAGELIST_T *pagelist;
>> > >  	struct page **pages;
>> > > -	unsigned long *addrs;
>> > > -	unsigned int num_pages, offset, i;
>> > > -	char *addr, *base_addr, *next_addr;
>> > > -	int run, addridx, actual_pages;
>> > > +	u32 *addrs;
>> > > +	unsigned int num_pages, offset, i, j, k;
>> > > +	int actual_pages;
>> > >          unsigned long *need_release;
>> > > +	size_t pagelist_size;
>> > > +	struct scatterlist *scatterlist;
>> > > +	int dma_buffers;
>> > > +	int dir;
>> > >  
>> > > -	offset = (unsigned int)buf & (PAGE_SIZE - 1);
>> > > +	offset = ((unsigned int)(unsigned long)buf & (PAGE_SIZE
>> > > -
>> > > 1));
>> > >  	num_pages = (count + offset + PAGE_SIZE - 1) /
>> > > PAGE_SIZE;
>> > >  
>> > > +	pagelist_size = sizeof(PAGELIST_T) +
>> > > +			(num_pages * sizeof(unsigned long)) +
>> > > +			sizeof(unsigned long) +
>> > > +			(num_pages * sizeof(pages[0]) +
>> > > +			(num_pages * sizeof(struct
>> > > scatterlist)));
>> > > +
>> > >  	*ppagelist = NULL;
>> > >  
>> > > +	dir = (type == PAGELIST_WRITE) ? DMA_TO_DEVICE :
>> > > DMA_FROM_DEVICE;
>> > > +
>> > >  	/* Allocate enough storage to hold the page pointers and
>> > > the page
>> > >  	** list
>> > >  	*/
>> > > -	pagelist = kmalloc(sizeof(PAGELIST_T) +
>> > > -                           (num_pages * sizeof(unsigned long)) +
>> > > -                           sizeof(unsigned long) +
>> > > -                           (num_pages * sizeof(pages[0])),
>> > > -                           GFP_KERNEL);
>> > > +	pagelist = dma_zalloc_coherent(g_dev,
>> > > +				       pagelist_size,
>> > > +				       dma_addr,
>> > > +				       GFP_KERNEL);
>> > >  
>> > >  	vchiq_log_trace(vchiq_arm_log_level, "create_pagelist -
>> > > %pK",
>> > >  			pagelist);
>> > > @@ -394,10 +399,9 @@ create_pagelist(char __user *buf, size_t
>> > > count, unsigned short type,
>> > >  	addrs = pagelist->addrs;
>> > >          need_release = (unsigned long *)(addrs + num_pages);
>> > >  	pages = (struct page **)(addrs + num_pages + 1);
>> > > +	scatterlist = (struct scatterlist *)(pages + num_pages);
>> > >  
>> > >  	if (is_vmalloc_addr(buf)) {
>> > > -		int dir = (type == PAGELIST_WRITE) ?
>> > > -			DMA_TO_DEVICE : DMA_FROM_DEVICE;
>> > >  		unsigned long length = count;
>> > >  		unsigned int off = offset;
>> > >  
>> > > @@ -410,7 +414,6 @@ create_pagelist(char __user *buf, size_t
>> > > count,
>> > > unsigned short type,
>> > >  			if (bytes > length)
>> > >  				bytes = length;
>> > >  			pages[actual_pages] = pg;
>> > > -			dmac_map_area(page_address(pg) + off,
>> > > bytes, dir);
>> > >  			length -= bytes;
>> > >  			off = 0;
>> > >  		}
>> > > @@ -438,7 +441,8 @@ create_pagelist(char __user *buf, size_t
>> > > count,
>> > > unsigned short type,
>> > >  				actual_pages--;
>> > >  				put_page(pages[actual_pages]);
>> > >  			}
>> > > -			kfree(pagelist);
>> > > +			dma_free_coherent(g_dev, pagelist_size,
>> > > +					  pagelist, *dma_addr);
>> > >  			if (actual_pages == 0)
>> > >  				actual_pages = -ENOMEM;
>> > >  			return actual_pages;
>> > > @@ -450,29 +454,38 @@ create_pagelist(char __user *buf, size_t
>> > > count, unsigned short type,
>> > >  	pagelist->type = type;
>> > >  	pagelist->offset = offset;
>> > >  
>> > > -	/* Group the pages into runs of contiguous pages */
>> > > -
>> > > -	base_addr = VCHIQ_ARM_ADDRESS(page_address(pages[0]));
>> > > -	next_addr = base_addr + PAGE_SIZE;
>> > > -	addridx = 0;
>> > > -	run = 0;
>> > > -
>> > > -	for (i = 1; i < num_pages; i++) {
>> > > -		addr =
>> > > VCHIQ_ARM_ADDRESS(page_address(pages[i]));
>> > > -		if ((addr == next_addr) && (run < (PAGE_SIZE -
>> > > 1))) {
>> > > -			next_addr += PAGE_SIZE;
>> > > -			run++;
>> > > -		} else {
>> > > -			addrs[addridx] = (unsigned
>> > > long)base_addr
>> > > + run;
>> > > -			addridx++;
>> > > -			base_addr = addr;
>> > > -			next_addr = addr + PAGE_SIZE;
>> > > -			run = 0;
>> > > -		}
>> > > +	for (i = 0; i < num_pages; i++)
>> > > +		sg_set_page(scatterlist + i, pages[i],
>> > > PAGE_SIZE,
>> > > 0);
>> > > +
>> > > +	dma_buffers = dma_map_sg(g_dev,
>> > > +				 scatterlist,
>> > > +				 num_pages,
>> > > +				 dir);
>> > > +
>> > > +	if (dma_buffers == 0) {
>> > > +		dma_free_coherent(g_dev, pagelist_size,
>> > > +				  pagelist, *dma_addr);
>> > > +
>> > > +		return -EINTR;
>> > >  	}
>> > >  
>> > > -	addrs[addridx] = (unsigned long)base_addr + run;
>> > > -	addridx++;
>> > > +	k = 0;
>> > > +	for (i = 0; i < dma_buffers;) {
>> > > +		u32 section_length = 0;
>> > > +
>> > > +		for (j = i + 1; j < dma_buffers; j++) {
>> > > +			if (sg_dma_address(scatterlist + j) !=
>> > > +			    sg_dma_address(scatterlist + j - 1)
>> > > +
>> > > PAGE_SIZE) {
>> > > +				break;
>> > > +			}
>> > > +			section_length++;
>> > > +		}
>> > > +
>> > > +		addrs[k] = (u32)sg_dma_address(scatterlist + i)
>> > > |
>> > > +				section_length;
>> > 
>> > It looks like scatterlist may not be just an array, so I think this
>> > whole loop wants to be something like:
>> > 
>> > for_each_sg(scatterlist, sg, num_pages, i) {
>> > 	u32 len = sg_dma_len(sg);
>> >         u32 addr = sg_dma_address(sg);
>> > 
>> >         if (k > 0 && addrs[k - 1] & PAGE_MASK +
>> >             (addrs[k - 1] & ~PAGE_MASK) << PAGE_SHIFT) == addr) {
>> >         	addrs[k - 1] += len;
>> >         } else {
>> >         	addrs[k++] = addr | len;
>> >         }
>> > }
>> > 
>> > Note the use of sg_dma_len(), since sg entries may be more than one
>> > page.  I don't think any merging is actually happening on our
>> > platform
>> > currently, thus why I left the inner loop.
>> > 
>> > Thanks for taking on doing this conversion!  This code's going to
>> > be
>> > so
>> > much nicer when you're done.
>> 
>> Thanks for looking at this.  
>> 
>> While I understand the use of sg_dma_len, I don't understand totally
>> the need for "for_each_sg". The scatterlist can be part of a scatter
>> table with all kinds of complex chaining, but in this case it is
>> directly allocated as an array at the top of the function.
>> 
>> Also note that the addrs[] list is passed to the firmware and it
>> requires all the items of the list be paged aligned and be a multiple
>> of the page size.  So I'm also a bit confused about the need for
>> handling scatterlists that are not page aligned or a multiple of
>> pages.

I'm sure we can assume that sg_dma_map is going to give us back
page-aligned mappings, because we only asked for page aligned mappings.

I'm just making suggestions that follow what is describeed in
DMA-API-HOWTO.txt here.  Following the documentation seems like a good
idea unless there's a reason not to.

> Sorry, but I forgot to add that the addrs list is a address anded with
> a page count, not a byte count.   The example you have sent appears at
> a glance to look like it is anding a byte count with the address.

Looking at the type of the field being read, it looked like a page count
to me, but I was unsure and the header didn't clarify.  Looking again,
it must be bytes, so shifting would be necessary.

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 800 bytes --]

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] staging: vc04_services: Replace dmac_map_area with dmac_map_sg
  2016-10-25 16:16       ` Eric Anholt
@ 2016-10-25 16:30         ` Michael Zoran
  0 siblings, 0 replies; 9+ messages in thread
From: Michael Zoran @ 2016-10-25 16:30 UTC (permalink / raw)
  To: Eric Anholt, gregkh; +Cc: daniels, noralf, popcornmix, devel, linux-kernel

On Tue, 2016-10-25 at 09:16 -0700, Eric Anholt wrote:
> Michael Zoran <mzoran@crowfest.net> writes:
> 
> > On Tue, 2016-10-25 at 08:00 -0700, Michael Zoran wrote:
> > > On Mon, 2016-10-24 at 10:31 -0700, Eric Anholt wrote:
> > > > mzoran@crowfest.net writes:
> > > > 
> > > > >  */
> > > > >  
> > > > >  static int
> > > > >  create_pagelist(char __user *buf, size_t count, unsigned
> > > > > short
> > > > > type,
> > > > > -	struct task_struct *task, PAGELIST_T ** ppagelist)
> > > > > +		struct task_struct *task, PAGELIST_T
> > > > > **ppagelist,
> > > > > +		dma_addr_t *dma_addr)
> > > > >  {
> > > > >  	PAGELIST_T *pagelist;
> > > > >  	struct page **pages;
> > > > > -	unsigned long *addrs;
> > > > > -	unsigned int num_pages, offset, i;
> > > > > -	char *addr, *base_addr, *next_addr;
> > > > > -	int run, addridx, actual_pages;
> > > > > +	u32 *addrs;
> > > > > +	unsigned int num_pages, offset, i, j, k;
> > > > > +	int actual_pages;
> > > > >          unsigned long *need_release;
> > > > > +	size_t pagelist_size;
> > > > > +	struct scatterlist *scatterlist;
> > > > > +	int dma_buffers;
> > > > > +	int dir;
> > > > >  
> > > > > -	offset = (unsigned int)buf & (PAGE_SIZE - 1);
> > > > > +	offset = ((unsigned int)(unsigned long)buf &
> > > > > (PAGE_SIZE
> > > > > -
> > > > > 1));
> > > > >  	num_pages = (count + offset + PAGE_SIZE - 1) /
> > > > > PAGE_SIZE;
> > > > >  
> > > > > +	pagelist_size = sizeof(PAGELIST_T) +
> > > > > +			(num_pages * sizeof(unsigned long))
> > > > > +
> > > > > +			sizeof(unsigned long) +
> > > > > +			(num_pages * sizeof(pages[0]) +
> > > > > +			(num_pages * sizeof(struct
> > > > > scatterlist)));
> > > > > +
> > > > >  	*ppagelist = NULL;
> > > > >  
> > > > > +	dir = (type == PAGELIST_WRITE) ? DMA_TO_DEVICE :
> > > > > DMA_FROM_DEVICE;
> > > > > +
> > > > >  	/* Allocate enough storage to hold the page pointers
> > > > > and
> > > > > the page
> > > > >  	** list
> > > > >  	*/
> > > > > -	pagelist = kmalloc(sizeof(PAGELIST_T) +
> > > > > -                           (num_pages * sizeof(unsigned
> > > > > long)) +
> > > > > -                           sizeof(unsigned long) +
> > > > > -                           (num_pages * sizeof(pages[0])),
> > > > > -                           GFP_KERNEL);
> > > > > +	pagelist = dma_zalloc_coherent(g_dev,
> > > > > +				       pagelist_size,
> > > > > +				       dma_addr,
> > > > > +				       GFP_KERNEL);
> > > > >  
> > > > >  	vchiq_log_trace(vchiq_arm_log_level,
> > > > > "create_pagelist -
> > > > > %pK",
> > > > >  			pagelist);
> > > > > @@ -394,10 +399,9 @@ create_pagelist(char __user *buf, size_t
> > > > > count, unsigned short type,
> > > > >  	addrs = pagelist->addrs;
> > > > >          need_release = (unsigned long *)(addrs + num_pages);
> > > > >  	pages = (struct page **)(addrs + num_pages + 1);
> > > > > +	scatterlist = (struct scatterlist *)(pages +
> > > > > num_pages);
> > > > >  
> > > > >  	if (is_vmalloc_addr(buf)) {
> > > > > -		int dir = (type == PAGELIST_WRITE) ?
> > > > > -			DMA_TO_DEVICE : DMA_FROM_DEVICE;
> > > > >  		unsigned long length = count;
> > > > >  		unsigned int off = offset;
> > > > >  
> > > > > @@ -410,7 +414,6 @@ create_pagelist(char __user *buf, size_t
> > > > > count,
> > > > > unsigned short type,
> > > > >  			if (bytes > length)
> > > > >  				bytes = length;
> > > > >  			pages[actual_pages] = pg;
> > > > > -			dmac_map_area(page_address(pg) +
> > > > > off,
> > > > > bytes, dir);
> > > > >  			length -= bytes;
> > > > >  			off = 0;
> > > > >  		}
> > > > > @@ -438,7 +441,8 @@ create_pagelist(char __user *buf, size_t
> > > > > count,
> > > > > unsigned short type,
> > > > >  				actual_pages--;
> > > > >  				put_page(pages[actual_pages]
> > > > > );
> > > > >  			}
> > > > > -			kfree(pagelist);
> > > > > +			dma_free_coherent(g_dev,
> > > > > pagelist_size,
> > > > > +					  pagelist,
> > > > > *dma_addr);
> > > > >  			if (actual_pages == 0)
> > > > >  				actual_pages = -ENOMEM;
> > > > >  			return actual_pages;
> > > > > @@ -450,29 +454,38 @@ create_pagelist(char __user *buf,
> > > > > size_t
> > > > > count, unsigned short type,
> > > > >  	pagelist->type = type;
> > > > >  	pagelist->offset = offset;
> > > > >  
> > > > > -	/* Group the pages into runs of contiguous pages */
> > > > > -
> > > > > -	base_addr =
> > > > > VCHIQ_ARM_ADDRESS(page_address(pages[0]));
> > > > > -	next_addr = base_addr + PAGE_SIZE;
> > > > > -	addridx = 0;
> > > > > -	run = 0;
> > > > > -
> > > > > -	for (i = 1; i < num_pages; i++) {
> > > > > -		addr =
> > > > > VCHIQ_ARM_ADDRESS(page_address(pages[i]));
> > > > > -		if ((addr == next_addr) && (run < (PAGE_SIZE
> > > > > -
> > > > > 1))) {
> > > > > -			next_addr += PAGE_SIZE;
> > > > > -			run++;
> > > > > -		} else {
> > > > > -			addrs[addridx] = (unsigned
> > > > > long)base_addr
> > > > > + run;
> > > > > -			addridx++;
> > > > > -			base_addr = addr;
> > > > > -			next_addr = addr + PAGE_SIZE;
> > > > > -			run = 0;
> > > > > -		}
> > > > > +	for (i = 0; i < num_pages; i++)
> > > > > +		sg_set_page(scatterlist + i, pages[i],
> > > > > PAGE_SIZE,
> > > > > 0);
> > > > > +
> > > > > +	dma_buffers = dma_map_sg(g_dev,
> > > > > +				 scatterlist,
> > > > > +				 num_pages,
> > > > > +				 dir);
> > > > > +
> > > > > +	if (dma_buffers == 0) {
> > > > > +		dma_free_coherent(g_dev, pagelist_size,
> > > > > +				  pagelist, *dma_addr);
> > > > > +
> > > > > +		return -EINTR;
> > > > >  	}
> > > > >  
> > > > > -	addrs[addridx] = (unsigned long)base_addr + run;
> > > > > -	addridx++;
> > > > > +	k = 0;
> > > > > +	for (i = 0; i < dma_buffers;) {
> > > > > +		u32 section_length = 0;
> > > > > +
> > > > > +		for (j = i + 1; j < dma_buffers; j++) {
> > > > > +			if (sg_dma_address(scatterlist + j)
> > > > > !=
> > > > > +			    sg_dma_address(scatterlist + j -
> > > > > 1)
> > > > > +
> > > > > PAGE_SIZE) {
> > > > > +				break;
> > > > > +			}
> > > > > +			section_length++;
> > > > > +		}
> > > > > +
> > > > > +		addrs[k] = (u32)sg_dma_address(scatterlist +
> > > > > i)
> > > > > > 
> > > > > 
> > > > > +				section_length;
> > > > 
> > > > It looks like scatterlist may not be just an array, so I think
> > > > this
> > > > whole loop wants to be something like:
> > > > 
> > > > for_each_sg(scatterlist, sg, num_pages, i) {
> > > > 	u32 len = sg_dma_len(sg);
> > > >         u32 addr = sg_dma_address(sg);
> > > > 
> > > >         if (k > 0 && addrs[k - 1] & PAGE_MASK +
> > > >             (addrs[k - 1] & ~PAGE_MASK) << PAGE_SHIFT) == addr)
> > > > {
> > > >         	addrs[k - 1] += len;
> > > >         } else {
> > > >         	addrs[k++] = addr | len;
> > > >         }
> > > > }
> > > > 
> > > > Note the use of sg_dma_len(), since sg entries may be more than
> > > > one
> > > > page.  I don't think any merging is actually happening on our
> > > > platform
> > > > currently, thus why I left the inner loop.
> > > > 
> > > > Thanks for taking on doing this conversion!  This code's going
> > > > to
> > > > be
> > > > so
> > > > much nicer when you're done.
> > > 
> > > Thanks for looking at this.  
> > > 
> > > While I understand the use of sg_dma_len, I don't understand
> > > totally
> > > the need for "for_each_sg". The scatterlist can be part of a
> > > scatter
> > > table with all kinds of complex chaining, but in this case it is
> > > directly allocated as an array at the top of the function.
> > > 
> > > Also note that the addrs[] list is passed to the firmware and it
> > > requires all the items of the list be paged aligned and be a
> > > multiple
> > > of the page size.  So I'm also a bit confused about the need for
> > > handling scatterlists that are not page aligned or a multiple of
> > > pages.
> 
> I'm sure we can assume that sg_dma_map is going to give us back
> page-aligned mappings, because we only asked for page aligned
> mappings.
> 
> I'm just making suggestions that follow what is describeed in
> DMA-API-HOWTO.txt here.  Following the documentation seems like a
> good
> idea unless there's a reason not to.
> 
> > Sorry, but I forgot to add that the addrs list is a address anded
> > with
> > a page count, not a byte count.   The example you have sent appears
> > at
> > a glance to look like it is anding a byte count with the address.
> 
> Looking at the type of the field being read, it looked like a page
> count
> to me, but I was unsure and the header didn't clarify.  Looking
> again,
> it must be bytes, so shifting would be necessary.

Please see the pagelist_struct structure.  It contains a comment
that the addrs field is in pages.

drivers/staging/vc04_services/interface/vchiq_arm/vchiq_pagelist.h

typedef struct pagelist_struct {
	unsigned long length;
	unsigned short type;
	unsigned short offset;
	unsigned long addrs[1];	/* N.B. 12 LSBs hold the number
of following
				   pages at consecutive addresses. */
} PAGELIST_T;

If the DMA api can return lengths that aren't pages lengths, then I
don't think this is really a good API to be using especially since the
firmware requires all lengths to be pages.  

It might be good to reference the original code that is currently
checked into the stagging tree.  It also strongly suggests the length
is in pages. 

I tested the changes with vchiq_test and added some temp print outs and
I know that count in pages works and that for larger messages sizes the
changes I sent is combining pages at consecutive addresses.  Sometimes
alot of pages are being combined.

I wish meld or something similar had a way to load a diff so that it's
easier to review these patches.  But I digress...

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2016-10-25 16:30 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-10-24  5:29 [PATCH 2/2] staging: vc04_services: Replace dmac_map_area with dmac_map_sg mzoran
2016-10-24 13:24 ` Greg KH
2016-10-24 14:01   ` Michael Zoran
2016-10-24 16:56   ` Eric Anholt
2016-10-24 17:31 ` Eric Anholt
2016-10-25 15:00   ` Michael Zoran
2016-10-25 15:07     ` Michael Zoran
2016-10-25 16:16       ` Eric Anholt
2016-10-25 16:30         ` Michael Zoran

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.