All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/2] videobuf2-dma-sg: Contiguos memory allocation
@ 2013-07-19 17:02 Ricardo Ribalda Delgado
  2013-07-19 17:02 ` [PATCH 1/2] videobuf2-dma-sg: Allocate pages as contiguous as possible Ricardo Ribalda Delgado
  2013-07-19 17:02 ` [PATCH 2/2] videobuf2-dma-sg: Replace vb2_dma_sg_desc with sg_table Ricardo Ribalda Delgado
  0 siblings, 2 replies; 15+ messages in thread
From: Ricardo Ribalda Delgado @ 2013-07-19 17:02 UTC (permalink / raw)
  To: Jonathan Corbet, Mauro Carvalho Chehab, Pawel Osciak,
	Marek Szyprowski, Kyungmin Park, Ismael Luceno,
	Greg Kroah-Hartman, linux-media, Andre Heider,
	Sylwester Nawrocki
  Cc: Ricardo Ribalda Delgado

Allocate memory as contiguos as possible to support dma engines with limitated amount of sg-descriptors.

Replace private structer vb2_dma_sg_desc with generic struct sg_table.

PS: This series of patches is the evolution of my previous patch for vb2-dma-sg to allocate the memory as contiguos as possible.

v2: Contains feedback from Andre Heider and Sylwester Nawrocki

Andre: Fix error handling (--pages)
Sylwester: Squash p3 and p4 into p2

Ricardo Ribalda Delgado (2):
  videobuf2-dma-sg: Allocate pages as contiguous as possible
  videobuf2-dma-sg: Replace vb2_dma_sg_desc with sg_table

 drivers/media/platform/marvell-ccic/mcam-core.c    |   14 +-
 drivers/media/v4l2-core/videobuf2-dma-sg.c         |  149 +++++++++++---------
 drivers/staging/media/solo6x10/solo6x10-v4l2-enc.c |   20 +--
 include/media/videobuf2-dma-sg.h                   |   10 +-
 4 files changed, 105 insertions(+), 88 deletions(-)

-- 
1.7.10.4


^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH 1/2] videobuf2-dma-sg: Allocate pages as contiguous as possible
  2013-07-19 17:02 [PATCH 0/2] videobuf2-dma-sg: Contiguos memory allocation Ricardo Ribalda Delgado
@ 2013-07-19 17:02 ` Ricardo Ribalda Delgado
  2013-07-19 20:16   ` Jonathan Corbet
                     ` (3 more replies)
  2013-07-19 17:02 ` [PATCH 2/2] videobuf2-dma-sg: Replace vb2_dma_sg_desc with sg_table Ricardo Ribalda Delgado
  1 sibling, 4 replies; 15+ messages in thread
From: Ricardo Ribalda Delgado @ 2013-07-19 17:02 UTC (permalink / raw)
  To: Jonathan Corbet, Mauro Carvalho Chehab, Pawel Osciak,
	Marek Szyprowski, Kyungmin Park, Ismael Luceno,
	Greg Kroah-Hartman, linux-media, Andre Heider,
	Sylwester Nawrocki
  Cc: Ricardo Ribalda Delgado

Most DMA engines have limitations regarding the number of DMA segments
(sg-buffers) that they can handle. Videobuffers can easily spread
through houndreds of pages.

In the previous aproach, the pages were allocated individually, this
could led to the creation houndreds of dma segments (sg-buffers) that
could not be handled by some DMA engines.

This patch tries to minimize the number of DMA segments by using
alloc_pages. In the worst case it will behave as before, but most
of the times it will reduce the number of dma segments

Signed-off-by: Ricardo Ribalda Delgado <ricardo.ribalda@gmail.com>
---
 drivers/media/v4l2-core/videobuf2-dma-sg.c |   60 +++++++++++++++++++++++-----
 1 file changed, 49 insertions(+), 11 deletions(-)

diff --git a/drivers/media/v4l2-core/videobuf2-dma-sg.c b/drivers/media/v4l2-core/videobuf2-dma-sg.c
index 16ae3dc..c053605 100644
--- a/drivers/media/v4l2-core/videobuf2-dma-sg.c
+++ b/drivers/media/v4l2-core/videobuf2-dma-sg.c
@@ -42,10 +42,55 @@ struct vb2_dma_sg_buf {
 
 static void vb2_dma_sg_put(void *buf_priv);
 
+static int vb2_dma_sg_alloc_compacted(struct vb2_dma_sg_buf *buf,
+		gfp_t gfp_flags)
+{
+	unsigned int last_page = 0;
+	int size = buf->sg_desc.size;
+
+	while (size > 0) {
+		struct page *pages;
+		int order;
+		int i;
+
+		order = get_order(size);
+		/* Dont over allocate*/
+		if ((PAGE_SIZE << order) > size)
+			order--;
+
+		pages = NULL;
+		while (!pages) {
+			pages = alloc_pages(GFP_KERNEL | __GFP_ZERO |
+					__GFP_NOWARN | gfp_flags, order);
+			if (pages)
+				break;
+
+			if (order == 0)
+				while (last_page--) {
+					__free_page(buf->pages[last_page]);
+					return -ENOMEM;
+				}
+			order--;
+		}
+
+		split_page(pages, order);
+		for (i = 0; i < (1<<order); i++) {
+			buf->pages[last_page] = pages[i];
+			sg_set_page(&buf->sg_desc.sglist[last_page],
+					buf->pages[last_page], PAGE_SIZE, 0);
+			last_page++;
+		}
+
+		size -= PAGE_SIZE << order;
+	}
+
+	return 0;
+}
+
 static void *vb2_dma_sg_alloc(void *alloc_ctx, unsigned long size, gfp_t gfp_flags)
 {
 	struct vb2_dma_sg_buf *buf;
-	int i;
+	int ret;
 
 	buf = kzalloc(sizeof *buf, GFP_KERNEL);
 	if (!buf)
@@ -69,14 +114,9 @@ static void *vb2_dma_sg_alloc(void *alloc_ctx, unsigned long size, gfp_t gfp_fla
 	if (!buf->pages)
 		goto fail_pages_array_alloc;
 
-	for (i = 0; i < buf->sg_desc.num_pages; ++i) {
-		buf->pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO |
-					   __GFP_NOWARN | gfp_flags);
-		if (NULL == buf->pages[i])
-			goto fail_pages_alloc;
-		sg_set_page(&buf->sg_desc.sglist[i],
-			    buf->pages[i], PAGE_SIZE, 0);
-	}
+	ret = vb2_dma_sg_alloc_compacted(buf, gfp_flags);
+	if (ret)
+		goto fail_pages_alloc;
 
 	buf->handler.refcount = &buf->refcount;
 	buf->handler.put = vb2_dma_sg_put;
@@ -89,8 +129,6 @@ static void *vb2_dma_sg_alloc(void *alloc_ctx, unsigned long size, gfp_t gfp_fla
 	return buf;
 
 fail_pages_alloc:
-	while (--i >= 0)
-		__free_page(buf->pages[i]);
 	kfree(buf->pages);
 
 fail_pages_array_alloc:
-- 
1.7.10.4


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 2/2] videobuf2-dma-sg: Replace vb2_dma_sg_desc with sg_table
  2013-07-19 17:02 [PATCH 0/2] videobuf2-dma-sg: Contiguos memory allocation Ricardo Ribalda Delgado
  2013-07-19 17:02 ` [PATCH 1/2] videobuf2-dma-sg: Allocate pages as contiguous as possible Ricardo Ribalda Delgado
@ 2013-07-19 17:02 ` Ricardo Ribalda Delgado
  2013-08-02  6:39   ` Marek Szyprowski
  1 sibling, 1 reply; 15+ messages in thread
From: Ricardo Ribalda Delgado @ 2013-07-19 17:02 UTC (permalink / raw)
  To: Jonathan Corbet, Mauro Carvalho Chehab, Pawel Osciak,
	Marek Szyprowski, Kyungmin Park, Ismael Luceno,
	Greg Kroah-Hartman, linux-media, Andre Heider,
	Sylwester Nawrocki
  Cc: Ricardo Ribalda Delgado

Replace the private struct vb2_dma_sg_desc with the struct sg_table so
we can benefit from all the helping functions in lib/scatterlist.c for
things like allocating the sg or compacting the descriptor

marvel-ccic and solo6x10 drivers, that uses this api has been updated

Signed-off-by: Ricardo Ribalda Delgado <ricardo.ribalda@gmail.com>
---
 drivers/media/platform/marvell-ccic/mcam-core.c    |   14 +--
 drivers/media/v4l2-core/videobuf2-dma-sg.c         |  103 ++++++++------------
 drivers/staging/media/solo6x10/solo6x10-v4l2-enc.c |   20 ++--
 include/media/videobuf2-dma-sg.h                   |   10 +-
 4 files changed, 63 insertions(+), 84 deletions(-)

diff --git a/drivers/media/platform/marvell-ccic/mcam-core.c b/drivers/media/platform/marvell-ccic/mcam-core.c
index 64ab91e..0ac51bd 100644
--- a/drivers/media/platform/marvell-ccic/mcam-core.c
+++ b/drivers/media/platform/marvell-ccic/mcam-core.c
@@ -1040,16 +1040,16 @@ static int mcam_vb_sg_buf_prepare(struct vb2_buffer *vb)
 {
 	struct mcam_vb_buffer *mvb = vb_to_mvb(vb);
 	struct mcam_camera *cam = vb2_get_drv_priv(vb->vb2_queue);
-	struct vb2_dma_sg_desc *sgd = vb2_dma_sg_plane_desc(vb, 0);
+	struct sg_table *sg_table = vb2_dma_sg_plane_desc(vb, 0);
 	struct mcam_dma_desc *desc = mvb->dma_desc;
 	struct scatterlist *sg;
 	int i;
 
-	mvb->dma_desc_nent = dma_map_sg(cam->dev, sgd->sglist, sgd->num_pages,
-			DMA_FROM_DEVICE);
+	mvb->dma_desc_nent = dma_map_sg(cam->dev, sg_table->sgl,
+			sg_table->nents, DMA_FROM_DEVICE);
 	if (mvb->dma_desc_nent <= 0)
 		return -EIO;  /* Not sure what's right here */
-	for_each_sg(sgd->sglist, sg, mvb->dma_desc_nent, i) {
+	for_each_sg(sg_table->sgl, sg, mvb->dma_desc_nent, i) {
 		desc->dma_addr = sg_dma_address(sg);
 		desc->segment_len = sg_dma_len(sg);
 		desc++;
@@ -1060,9 +1060,11 @@ static int mcam_vb_sg_buf_prepare(struct vb2_buffer *vb)
 static int mcam_vb_sg_buf_finish(struct vb2_buffer *vb)
 {
 	struct mcam_camera *cam = vb2_get_drv_priv(vb->vb2_queue);
-	struct vb2_dma_sg_desc *sgd = vb2_dma_sg_plane_desc(vb, 0);
+	struct sg_table *sg_table = vb2_dma_sg_plane_desc(vb, 0);
 
-	dma_unmap_sg(cam->dev, sgd->sglist, sgd->num_pages, DMA_FROM_DEVICE);
+	if (sg_table)
+		dma_unmap_sg(cam->dev, sg_table->sgl,
+				sg_table->nents, DMA_FROM_DEVICE);
 	return 0;
 }
 
diff --git a/drivers/media/v4l2-core/videobuf2-dma-sg.c b/drivers/media/v4l2-core/videobuf2-dma-sg.c
index c053605..4b30ad5 100644
--- a/drivers/media/v4l2-core/videobuf2-dma-sg.c
+++ b/drivers/media/v4l2-core/videobuf2-dma-sg.c
@@ -35,7 +35,9 @@ struct vb2_dma_sg_buf {
 	struct page			**pages;
 	int				write;
 	int				offset;
-	struct vb2_dma_sg_desc		sg_desc;
+	struct sg_table			sg_table;
+	size_t				size;
+	unsigned int			num_pages;
 	atomic_t			refcount;
 	struct vb2_vmarea_handler	handler;
 };
@@ -46,7 +48,7 @@ static int vb2_dma_sg_alloc_compacted(struct vb2_dma_sg_buf *buf,
 		gfp_t gfp_flags)
 {
 	unsigned int last_page = 0;
-	int size = buf->sg_desc.size;
+	int size = buf->size;
 
 	while (size > 0) {
 		struct page *pages;
@@ -74,12 +76,8 @@ static int vb2_dma_sg_alloc_compacted(struct vb2_dma_sg_buf *buf,
 		}
 
 		split_page(pages, order);
-		for (i = 0; i < (1<<order); i++) {
-			buf->pages[last_page] = pages[i];
-			sg_set_page(&buf->sg_desc.sglist[last_page],
-					buf->pages[last_page], PAGE_SIZE, 0);
-			last_page++;
-		}
+		for (i = 0; i < (1<<order); i++)
+			buf->pages[last_page++] = pages[i];
 
 		size -= PAGE_SIZE << order;
 	}
@@ -91,6 +89,7 @@ static void *vb2_dma_sg_alloc(void *alloc_ctx, unsigned long size, gfp_t gfp_fla
 {
 	struct vb2_dma_sg_buf *buf;
 	int ret;
+	int num_pages;
 
 	buf = kzalloc(sizeof *buf, GFP_KERNEL);
 	if (!buf)
@@ -99,17 +98,11 @@ static void *vb2_dma_sg_alloc(void *alloc_ctx, unsigned long size, gfp_t gfp_fla
 	buf->vaddr = NULL;
 	buf->write = 0;
 	buf->offset = 0;
-	buf->sg_desc.size = size;
+	buf->size = size;
 	/* size is already page aligned */
-	buf->sg_desc.num_pages = size >> PAGE_SHIFT;
-
-	buf->sg_desc.sglist = vzalloc(buf->sg_desc.num_pages *
-				      sizeof(*buf->sg_desc.sglist));
-	if (!buf->sg_desc.sglist)
-		goto fail_sglist_alloc;
-	sg_init_table(buf->sg_desc.sglist, buf->sg_desc.num_pages);
+	buf->num_pages = size >> PAGE_SHIFT;
 
-	buf->pages = kzalloc(buf->sg_desc.num_pages * sizeof(struct page *),
+	buf->pages = kzalloc(buf->num_pages * sizeof(struct page *),
 			     GFP_KERNEL);
 	if (!buf->pages)
 		goto fail_pages_array_alloc;
@@ -118,6 +111,11 @@ static void *vb2_dma_sg_alloc(void *alloc_ctx, unsigned long size, gfp_t gfp_fla
 	if (ret)
 		goto fail_pages_alloc;
 
+	ret = sg_alloc_table_from_pages(&buf->sg_table, buf->pages,
+			buf->num_pages, 0, size, gfp_flags);
+	if (ret)
+		goto fail_table_alloc;
+
 	buf->handler.refcount = &buf->refcount;
 	buf->handler.put = vb2_dma_sg_put;
 	buf->handler.arg = buf;
@@ -125,16 +123,16 @@ static void *vb2_dma_sg_alloc(void *alloc_ctx, unsigned long size, gfp_t gfp_fla
 	atomic_inc(&buf->refcount);
 
 	dprintk(1, "%s: Allocated buffer of %d pages\n",
-		__func__, buf->sg_desc.num_pages);
+		__func__, buf->num_pages);
 	return buf;
 
+fail_table_alloc:
+	num_pages = buf->num_pages;
+	while (num_pages--)
+		__free_page(buf->pages[num_pages]);
 fail_pages_alloc:
 	kfree(buf->pages);
-
 fail_pages_array_alloc:
-	vfree(buf->sg_desc.sglist);
-
-fail_sglist_alloc:
 	kfree(buf);
 	return NULL;
 }
@@ -142,14 +140,14 @@ fail_sglist_alloc:
 static void vb2_dma_sg_put(void *buf_priv)
 {
 	struct vb2_dma_sg_buf *buf = buf_priv;
-	int i = buf->sg_desc.num_pages;
+	int i = buf->num_pages;
 
 	if (atomic_dec_and_test(&buf->refcount)) {
 		dprintk(1, "%s: Freeing buffer of %d pages\n", __func__,
-			buf->sg_desc.num_pages);
+			buf->num_pages);
 		if (buf->vaddr)
-			vm_unmap_ram(buf->vaddr, buf->sg_desc.num_pages);
-		vfree(buf->sg_desc.sglist);
+			vm_unmap_ram(buf->vaddr, buf->num_pages);
+		sg_free_table(&buf->sg_table);
 		while (--i >= 0)
 			__free_page(buf->pages[i]);
 		kfree(buf->pages);
@@ -162,7 +160,7 @@ static void *vb2_dma_sg_get_userptr(void *alloc_ctx, unsigned long vaddr,
 {
 	struct vb2_dma_sg_buf *buf;
 	unsigned long first, last;
-	int num_pages_from_user, i;
+	int num_pages_from_user;
 
 	buf = kzalloc(sizeof *buf, GFP_KERNEL);
 	if (!buf)
@@ -171,56 +169,41 @@ static void *vb2_dma_sg_get_userptr(void *alloc_ctx, unsigned long vaddr,
 	buf->vaddr = NULL;
 	buf->write = write;
 	buf->offset = vaddr & ~PAGE_MASK;
-	buf->sg_desc.size = size;
+	buf->size = size;
 
 	first = (vaddr           & PAGE_MASK) >> PAGE_SHIFT;
 	last  = ((vaddr + size - 1) & PAGE_MASK) >> PAGE_SHIFT;
-	buf->sg_desc.num_pages = last - first + 1;
-
-	buf->sg_desc.sglist = vzalloc(
-		buf->sg_desc.num_pages * sizeof(*buf->sg_desc.sglist));
-	if (!buf->sg_desc.sglist)
-		goto userptr_fail_sglist_alloc;
+	buf->num_pages = last - first + 1;
 
-	sg_init_table(buf->sg_desc.sglist, buf->sg_desc.num_pages);
-
-	buf->pages = kzalloc(buf->sg_desc.num_pages * sizeof(struct page *),
+	buf->pages = kzalloc(buf->num_pages * sizeof(struct page *),
 			     GFP_KERNEL);
 	if (!buf->pages)
-		goto userptr_fail_pages_array_alloc;
+		return NULL;
 
 	num_pages_from_user = get_user_pages(current, current->mm,
 					     vaddr & PAGE_MASK,
-					     buf->sg_desc.num_pages,
+					     buf->num_pages,
 					     write,
 					     1, /* force */
 					     buf->pages,
 					     NULL);
 
-	if (num_pages_from_user != buf->sg_desc.num_pages)
+	if (num_pages_from_user != buf->num_pages)
 		goto userptr_fail_get_user_pages;
 
-	sg_set_page(&buf->sg_desc.sglist[0], buf->pages[0],
-		    PAGE_SIZE - buf->offset, buf->offset);
-	size -= PAGE_SIZE - buf->offset;
-	for (i = 1; i < buf->sg_desc.num_pages; ++i) {
-		sg_set_page(&buf->sg_desc.sglist[i], buf->pages[i],
-			    min_t(size_t, PAGE_SIZE, size), 0);
-		size -= min_t(size_t, PAGE_SIZE, size);
-	}
+	if (sg_alloc_table_from_pages(&buf->sg_table, buf->pages,
+			buf->num_pages, buf->offset, size, 0))
+		goto userptr_fail_alloc_table_from_pages;
+
 	return buf;
 
+userptr_fail_alloc_table_from_pages:
 userptr_fail_get_user_pages:
 	dprintk(1, "get_user_pages requested/got: %d/%d]\n",
-	       num_pages_from_user, buf->sg_desc.num_pages);
+	       num_pages_from_user, buf->num_pages);
 	while (--num_pages_from_user >= 0)
 		put_page(buf->pages[num_pages_from_user]);
 	kfree(buf->pages);
-
-userptr_fail_pages_array_alloc:
-	vfree(buf->sg_desc.sglist);
-
-userptr_fail_sglist_alloc:
 	kfree(buf);
 	return NULL;
 }
@@ -232,18 +215,18 @@ userptr_fail_sglist_alloc:
 static void vb2_dma_sg_put_userptr(void *buf_priv)
 {
 	struct vb2_dma_sg_buf *buf = buf_priv;
-	int i = buf->sg_desc.num_pages;
+	int i = buf->num_pages;
 
 	dprintk(1, "%s: Releasing userspace buffer of %d pages\n",
-	       __func__, buf->sg_desc.num_pages);
+	       __func__, buf->num_pages);
 	if (buf->vaddr)
-		vm_unmap_ram(buf->vaddr, buf->sg_desc.num_pages);
+		vm_unmap_ram(buf->vaddr, buf->num_pages);
+	sg_free_table(&buf->sg_table);
 	while (--i >= 0) {
 		if (buf->write)
 			set_page_dirty_lock(buf->pages[i]);
 		put_page(buf->pages[i]);
 	}
-	vfree(buf->sg_desc.sglist);
 	kfree(buf->pages);
 	kfree(buf);
 }
@@ -256,7 +239,7 @@ static void *vb2_dma_sg_vaddr(void *buf_priv)
 
 	if (!buf->vaddr)
 		buf->vaddr = vm_map_ram(buf->pages,
-					buf->sg_desc.num_pages,
+					buf->num_pages,
 					-1,
 					PAGE_KERNEL);
 
@@ -312,7 +295,7 @@ static void *vb2_dma_sg_cookie(void *buf_priv)
 {
 	struct vb2_dma_sg_buf *buf = buf_priv;
 
-	return &buf->sg_desc;
+	return &buf->sg_table;
 }
 
 const struct vb2_mem_ops vb2_dma_sg_memops = {
diff --git a/drivers/staging/media/solo6x10/solo6x10-v4l2-enc.c b/drivers/staging/media/solo6x10/solo6x10-v4l2-enc.c
index 98e2902..cfa01f1 100644
--- a/drivers/staging/media/solo6x10/solo6x10-v4l2-enc.c
+++ b/drivers/staging/media/solo6x10/solo6x10-v4l2-enc.c
@@ -346,7 +346,7 @@ static int enc_get_mpeg_dma(struct solo_dev *solo_dev, dma_addr_t dma,
 /* Build a descriptor queue out of an SG list and send it to the P2M for
  * processing. */
 static int solo_send_desc(struct solo_enc_dev *solo_enc, int skip,
-			  struct vb2_dma_sg_desc *vbuf, int off, int size,
+			  struct sg_table *vbuf, int off, int size,
 			  unsigned int base, unsigned int base_size)
 {
 	struct solo_dev *solo_dev = solo_enc->solo_dev;
@@ -359,7 +359,7 @@ static int solo_send_desc(struct solo_enc_dev *solo_enc, int skip,
 
 	solo_enc->desc_count = 1;
 
-	for_each_sg(vbuf->sglist, sg, vbuf->num_pages, i) {
+	for_each_sg(vbuf->sgl, sg, vbuf->nents, i) {
 		struct solo_p2m_desc *desc;
 		dma_addr_t dma;
 		int len;
@@ -434,7 +434,7 @@ static int solo_fill_jpeg(struct solo_enc_dev *solo_enc,
 		struct vb2_buffer *vb, struct vop_header *vh)
 {
 	struct solo_dev *solo_dev = solo_enc->solo_dev;
-	struct vb2_dma_sg_desc *vbuf = vb2_dma_sg_plane_desc(vb, 0);
+	struct sg_table *vbuf = vb2_dma_sg_plane_desc(vb, 0);
 	int frame_size;
 	int ret;
 
@@ -443,7 +443,7 @@ static int solo_fill_jpeg(struct solo_enc_dev *solo_enc,
 	if (vb2_plane_size(vb, 0) < vh->jpeg_size + solo_enc->jpeg_len)
 		return -EIO;
 
-	sg_copy_from_buffer(vbuf->sglist, vbuf->num_pages,
+	sg_copy_from_buffer(vbuf->sgl, vbuf->nents,
 			solo_enc->jpeg_header,
 			solo_enc->jpeg_len);
 
@@ -451,12 +451,12 @@ static int solo_fill_jpeg(struct solo_enc_dev *solo_enc,
 		& ~(DMA_ALIGN - 1);
 	vb2_set_plane_payload(vb, 0, vh->jpeg_size + solo_enc->jpeg_len);
 
-	dma_map_sg(&solo_dev->pdev->dev, vbuf->sglist, vbuf->num_pages,
+	dma_map_sg(&solo_dev->pdev->dev, vbuf->sgl, vbuf->nents,
 			DMA_FROM_DEVICE);
 	ret = solo_send_desc(solo_enc, solo_enc->jpeg_len, vbuf, vh->jpeg_off,
 			frame_size, SOLO_JPEG_EXT_ADDR(solo_dev),
 			SOLO_JPEG_EXT_SIZE(solo_dev));
-	dma_unmap_sg(&solo_dev->pdev->dev, vbuf->sglist, vbuf->num_pages,
+	dma_unmap_sg(&solo_dev->pdev->dev, vbuf->sgl, vbuf->nents,
 			DMA_FROM_DEVICE);
 	return ret;
 }
@@ -465,7 +465,7 @@ static int solo_fill_mpeg(struct solo_enc_dev *solo_enc,
 		struct vb2_buffer *vb, struct vop_header *vh)
 {
 	struct solo_dev *solo_dev = solo_enc->solo_dev;
-	struct vb2_dma_sg_desc *vbuf = vb2_dma_sg_plane_desc(vb, 0);
+	struct sg_table *vbuf = vb2_dma_sg_plane_desc(vb, 0);
 	int frame_off, frame_size;
 	int skip = 0;
 	int ret;
@@ -475,7 +475,7 @@ static int solo_fill_mpeg(struct solo_enc_dev *solo_enc,
 
 	/* If this is a key frame, add extra header */
 	if (!vh->vop_type) {
-		sg_copy_from_buffer(vbuf->sglist, vbuf->num_pages,
+		sg_copy_from_buffer(vbuf->sgl, vbuf->nents,
 				solo_enc->vop,
 				solo_enc->vop_len);
 
@@ -494,12 +494,12 @@ static int solo_fill_mpeg(struct solo_enc_dev *solo_enc,
 	frame_size = (vh->mpeg_size + skip + (DMA_ALIGN - 1))
 		& ~(DMA_ALIGN - 1);
 
-	dma_map_sg(&solo_dev->pdev->dev, vbuf->sglist, vbuf->num_pages,
+	dma_map_sg(&solo_dev->pdev->dev, vbuf->sgl, vbuf->nents,
 			DMA_FROM_DEVICE);
 	ret = solo_send_desc(solo_enc, skip, vbuf, frame_off, frame_size,
 			SOLO_MP4E_EXT_ADDR(solo_dev),
 			SOLO_MP4E_EXT_SIZE(solo_dev));
-	dma_unmap_sg(&solo_dev->pdev->dev, vbuf->sglist, vbuf->num_pages,
+	dma_unmap_sg(&solo_dev->pdev->dev, vbuf->sgl, vbuf->nents,
 			DMA_FROM_DEVICE);
 	return ret;
 }
diff --git a/include/media/videobuf2-dma-sg.h b/include/media/videobuf2-dma-sg.h
index 0038526..7b89852 100644
--- a/include/media/videobuf2-dma-sg.h
+++ b/include/media/videobuf2-dma-sg.h
@@ -15,16 +15,10 @@
 
 #include <media/videobuf2-core.h>
 
-struct vb2_dma_sg_desc {
-	unsigned long		size;
-	unsigned int		num_pages;
-	struct scatterlist	*sglist;
-};
-
-static inline struct vb2_dma_sg_desc *vb2_dma_sg_plane_desc(
+static inline struct sg_table *vb2_dma_sg_plane_desc(
 		struct vb2_buffer *vb, unsigned int plane_no)
 {
-	return (struct vb2_dma_sg_desc *)vb2_plane_cookie(vb, plane_no);
+	return (struct sg_table *)vb2_plane_cookie(vb, plane_no);
 }
 
 extern const struct vb2_mem_ops vb2_dma_sg_memops;
-- 
1.7.10.4


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH 1/2] videobuf2-dma-sg: Allocate pages as contiguous as possible
  2013-07-19 17:02 ` [PATCH 1/2] videobuf2-dma-sg: Allocate pages as contiguous as possible Ricardo Ribalda Delgado
@ 2013-07-19 20:16   ` Jonathan Corbet
  2013-07-19 21:57     ` Ricardo Ribalda Delgado
  2013-07-29 11:27     ` Marek Szyprowski
  2013-08-02  6:38   ` Marek Szyprowski
                     ` (2 subsequent siblings)
  3 siblings, 2 replies; 15+ messages in thread
From: Jonathan Corbet @ 2013-07-19 20:16 UTC (permalink / raw)
  To: Ricardo Ribalda Delgado
  Cc: Mauro Carvalho Chehab, Pawel Osciak, Marek Szyprowski,
	Kyungmin Park, Ismael Luceno, Greg Kroah-Hartman, linux-media,
	Andre Heider, Sylwester Nawrocki

On Fri, 19 Jul 2013 19:02:33 +0200
Ricardo Ribalda Delgado <ricardo.ribalda@gmail.com> wrote:

> Most DMA engines have limitations regarding the number of DMA segments
> (sg-buffers) that they can handle. Videobuffers can easily spread
> through houndreds of pages.
> 
> In the previous aproach, the pages were allocated individually, this
> could led to the creation houndreds of dma segments (sg-buffers) that
> could not be handled by some DMA engines.
> 
> This patch tries to minimize the number of DMA segments by using
> alloc_pages. In the worst case it will behave as before, but most
> of the times it will reduce the number of dma segments

So I looked this over and I have a few questions...

> diff --git a/drivers/media/v4l2-core/videobuf2-dma-sg.c b/drivers/media/v4l2-core/videobuf2-dma-sg.c
> index 16ae3dc..c053605 100644
> --- a/drivers/media/v4l2-core/videobuf2-dma-sg.c
> +++ b/drivers/media/v4l2-core/videobuf2-dma-sg.c
> @@ -42,10 +42,55 @@ struct vb2_dma_sg_buf {
>  
>  static void vb2_dma_sg_put(void *buf_priv);
>  
> +static int vb2_dma_sg_alloc_compacted(struct vb2_dma_sg_buf *buf,
> +		gfp_t gfp_flags)
> +{
> +	unsigned int last_page = 0;
> +	int size = buf->sg_desc.size;
> +
> +	while (size > 0) {
> +		struct page *pages;
> +		int order;
> +		int i;
> +
> +		order = get_order(size);
> +		/* Dont over allocate*/
> +		if ((PAGE_SIZE << order) > size)
> +			order--;

Terrible things will happen if size < PAGE_SIZE.  Presumably that should
never happen, or perhaps one could say any caller who does that will get
what they deserve.

Have you considered alloc_pages_exact(), though?  That might result in
fewer segments overall.

> +		pages = NULL;
> +		while (!pages) {
> +			pages = alloc_pages(GFP_KERNEL | __GFP_ZERO |
> +					__GFP_NOWARN | gfp_flags, order);
> +			if (pages)
> +				break;
> +
> +			if (order == 0)
> +				while (last_page--) {
> +					__free_page(buf->pages[last_page]);

If I understand things, this is wrong; you relly need free_pages() with the
correct order.  Or, at least, that would be the case if you kept the pages
together, but that leads to my biggest question...

> +					return -ENOMEM;
> +				}
> +			order--;
> +		}
> +
> +		split_page(pages, order);
> +		for (i = 0; i < (1<<order); i++) {
> +			buf->pages[last_page] = pages[i];
> +			sg_set_page(&buf->sg_desc.sglist[last_page],
> +					buf->pages[last_page], PAGE_SIZE, 0);
> +			last_page++;
> +		}

You've gone to all this trouble to get a higher-order allocation so you'd
have fewer segments, then you undo it all by splitting things apart into
individual pages.  Why?  Clearly I'm missing something, this seems to
defeat the purpose of the whole exercise?

Thanks,

jon

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 1/2] videobuf2-dma-sg: Allocate pages as contiguous as possible
  2013-07-19 20:16   ` Jonathan Corbet
@ 2013-07-19 21:57     ` Ricardo Ribalda Delgado
  2013-07-29 11:27     ` Marek Szyprowski
  1 sibling, 0 replies; 15+ messages in thread
From: Ricardo Ribalda Delgado @ 2013-07-19 21:57 UTC (permalink / raw)
  To: Jonathan Corbet
  Cc: Mauro Carvalho Chehab, Pawel Osciak, Marek Szyprowski,
	Kyungmin Park, Ismael Luceno, Greg Kroah-Hartman, linux-media,
	Andre Heider, Sylwester Nawrocki

Hello Jonathan:

Thanks for your review. I am making a driver for a camera that
produces 4Mpx images with up to 10 bytes per pixel!. The camera has a
dma engine capable of moving up to 255 sg sectors.

In the original implementation of vb2-dma-sg, every page was allocated
independently, dividing the memory in more than 255 sectors.

If the memory is very segmented, then there is nothing I can do, but
if there are high order pages available I would like to use them.

The original assumption is that all the pages that compose a high
order page are contiguous in physical memory.

On Fri, Jul 19, 2013 at 10:16 PM, Jonathan Corbet <corbet@lwn.net> wrote:
> On Fri, 19 Jul 2013 19:02:33 +0200
> Ricardo Ribalda Delgado <ricardo.ribalda@gmail.com> wrote:
>
>> Most DMA engines have limitations regarding the number of DMA segments
>> (sg-buffers) that they can handle. Videobuffers can easily spread
>> through houndreds of pages.
>>
>> In the previous aproach, the pages were allocated individually, this
>> could led to the creation houndreds of dma segments (sg-buffers) that
>> could not be handled by some DMA engines.
>>
>> This patch tries to minimize the number of DMA segments by using
>> alloc_pages. In the worst case it will behave as before, but most
>> of the times it will reduce the number of dma segments
>
> So I looked this over and I have a few questions...
>
>> diff --git a/drivers/media/v4l2-core/videobuf2-dma-sg.c b/drivers/media/v4l2-core/videobuf2-dma-sg.c
>> index 16ae3dc..c053605 100644
>> --- a/drivers/media/v4l2-core/videobuf2-dma-sg.c
>> +++ b/drivers/media/v4l2-core/videobuf2-dma-sg.c
>> @@ -42,10 +42,55 @@ struct vb2_dma_sg_buf {
>>
>>  static void vb2_dma_sg_put(void *buf_priv);
>>
>> +static int vb2_dma_sg_alloc_compacted(struct vb2_dma_sg_buf *buf,
>> +             gfp_t gfp_flags)
>> +{
>> +     unsigned int last_page = 0;
>> +     int size = buf->sg_desc.size;
>> +
>> +     while (size > 0) {
>> +             struct page *pages;
>> +             int order;
>> +             int i;
>> +
>> +             order = get_order(size);
>> +             /* Dont over allocate*/
>> +             if ((PAGE_SIZE << order) > size)
>> +                     order--;
>
> Terrible things will happen if size < PAGE_SIZE.  Presumably that should
> never happen, or perhaps one could say any caller who does that will get
> what they deserve.

The caller function is vb2_dma_sg_alloc which according to its
comments is already page aligned, so that should be covered.
https://linuxtv.org/patch/18095/

>
> Have you considered alloc_pages_exact(), though?  That might result in
> fewer segments overall.

In the previous implementation I used alloc_pages_exact, but there
were two things that made me change my mind. One is that the comments
of the function says that you should free the pages with
free_pages_exact, so  should get track of the segments. The other is
that alloc_pages_exact split the highest pages into order 0, so there
could be a situation that for only one page I would split a higher
order page, and those are scarce.

>
>> +             pages = NULL;
>> +             while (!pages) {
>> +                     pages = alloc_pages(GFP_KERNEL | __GFP_ZERO |
>> +                                     __GFP_NOWARN | gfp_flags, order);
>> +                     if (pages)
>> +                             break;
>> +
>> +                     if (order == 0)
>> +                             while (last_page--) {
>> +                                     __free_page(buf->pages[last_page]);
>
> If I understand things, this is wrong; you relly need free_pages() with the
> correct order.  Or, at least, that would be the case if you kept the pages
> together, but that leads to my biggest question...

Pages are splitted, so I believe that this is right.

>
>> +                                     return -ENOMEM;
>> +                             }
>> +                     order--;
>> +             }
>> +
>> +             split_page(pages, order);
>> +             for (i = 0; i < (1<<order); i++) {
>> +                     buf->pages[last_page] = pages[i];
>> +                     sg_set_page(&buf->sg_desc.sglist[last_page],
>> +                                     buf->pages[last_page], PAGE_SIZE, 0);
>> +                     last_page++;
>> +             }
>
> You've gone to all this trouble to get a higher-order allocation so you'd
> have fewer segments, then you undo it all by splitting things apart into
> individual pages.  Why?  Clearly I'm missing something, this seems to
> defeat the purpose of the whole exercise?

I got to all this trouble to get memory as physically contiguous as
possible. I don't care if they belong to one or multiple pages, in
fact my dma controller only understand about physical addresses.

 If I don't split the pages then the calls to vm_map_ram and
vm_insert_page will fail, please take a look to:
https://lkml.org/lkml/2013/7/17/285 there I post the code that does
not split the pages and  to
http://marc.info/?l=linux-mm&m=124404111608622 where another poor guy
complains about not been able to use vm_insert_page on higher order
pages :).


Again, thank you very much for your review.

>
> Thanks,
>
> jon



--
Ricardo Ribalda

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 1/2] videobuf2-dma-sg: Allocate pages as contiguous as possible
  2013-07-19 20:16   ` Jonathan Corbet
  2013-07-19 21:57     ` Ricardo Ribalda Delgado
@ 2013-07-29 11:27     ` Marek Szyprowski
  2013-07-29 15:16       ` Jonathan Corbet
  1 sibling, 1 reply; 15+ messages in thread
From: Marek Szyprowski @ 2013-07-29 11:27 UTC (permalink / raw)
  To: Jonathan Corbet
  Cc: Ricardo Ribalda Delgado, Mauro Carvalho Chehab, Pawel Osciak,
	Kyungmin Park, Ismael Luceno, Greg Kroah-Hartman, linux-media,
	Andre Heider, Sylwester Nawrocki

Hello,

On 7/19/2013 10:16 PM, Jonathan Corbet wrote:
> On Fri, 19 Jul 2013 19:02:33 +0200
> Ricardo Ribalda Delgado <ricardo.ribalda@gmail.com> wrote:
>
> > Most DMA engines have limitations regarding the number of DMA segments
> > (sg-buffers) that they can handle. Videobuffers can easily spread
> > through houndreds of pages.
> >
> > In the previous aproach, the pages were allocated individually, this
> > could led to the creation houndreds of dma segments (sg-buffers) that
> > could not be handled by some DMA engines.
> >
> > This patch tries to minimize the number of DMA segments by using
> > alloc_pages. In the worst case it will behave as before, but most
> > of the times it will reduce the number of dma segments
>
> So I looked this over and I have a few questions...
>
> > diff --git a/drivers/media/v4l2-core/videobuf2-dma-sg.c b/drivers/media/v4l2-core/videobuf2-dma-sg.c
> > index 16ae3dc..c053605 100644
> > --- a/drivers/media/v4l2-core/videobuf2-dma-sg.c
> > +++ b/drivers/media/v4l2-core/videobuf2-dma-sg.c
> > @@ -42,10 +42,55 @@ struct vb2_dma_sg_buf {
> >
> >  static void vb2_dma_sg_put(void *buf_priv);
> >
> > +static int vb2_dma_sg_alloc_compacted(struct vb2_dma_sg_buf *buf,
> > +		gfp_t gfp_flags)
> > +{
> > +	unsigned int last_page = 0;
> > +	int size = buf->sg_desc.size;
> > +
> > +	while (size > 0) {
> > +		struct page *pages;
> > +		int order;
> > +		int i;
> > +
> > +		order = get_order(size);
> > +		/* Dont over allocate*/
> > +		if ((PAGE_SIZE << order) > size)
> > +			order--;
>
> Terrible things will happen if size < PAGE_SIZE.  Presumably that should
> never happen, or perhaps one could say any caller who does that will get
> what they deserve.

I think that page size alignment for requested buffer size should be added
at vb2 core. V4L2 buffer API is page oriented and it really makes no sense
to allocate buffers which are not a multiple of page size.


> Have you considered alloc_pages_exact(), though?  That might result in
> fewer segments overall.
>
> > +		pages = NULL;
> > +		while (!pages) {
> > +			pages = alloc_pages(GFP_KERNEL | __GFP_ZERO |
> > +					__GFP_NOWARN | gfp_flags, order);
> > +			if (pages)
> > +				break;
> > +
> > +			if (order == 0)
> > +				while (last_page--) {
> > +					__free_page(buf->pages[last_page]);
>
> If I understand things, this is wrong; you relly need free_pages() with the
> correct order.  Or, at least, that would be the case if you kept the pages
> together, but that leads to my biggest question...
>
> > +					return -ENOMEM;
> > +				}
> > +			order--;
> > +		}
> > +
> > +		split_page(pages, order);
> > +		for (i = 0; i < (1<<order); i++) {
> > +			buf->pages[last_page] = pages[i];
> > +			sg_set_page(&buf->sg_desc.sglist[last_page],
> > +					buf->pages[last_page], PAGE_SIZE, 0);
> > +			last_page++;
> > +		}
>
> You've gone to all this trouble to get a higher-order allocation so you'd
> have fewer segments, then you undo it all by splitting things apart into
> individual pages.  Why?  Clearly I'm missing something, this seems to
> defeat the purpose of the whole exercise?

Individual zero-order pages are required to get them mapped to userspace in
mmap callback.

Best regards
-- 
Marek Szyprowski
Samsung R&D Institute Poland



^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 1/2] videobuf2-dma-sg: Allocate pages as contiguous as possible
  2013-07-29 11:27     ` Marek Szyprowski
@ 2013-07-29 15:16       ` Jonathan Corbet
  2013-07-31  6:37         ` Sakari Ailus
  0 siblings, 1 reply; 15+ messages in thread
From: Jonathan Corbet @ 2013-07-29 15:16 UTC (permalink / raw)
  To: Marek Szyprowski
  Cc: Ricardo Ribalda Delgado, Mauro Carvalho Chehab, Pawel Osciak,
	Kyungmin Park, Ismael Luceno, Greg Kroah-Hartman, linux-media,
	Andre Heider, Sylwester Nawrocki

On Mon, 29 Jul 2013 13:27:12 +0200
Marek Szyprowski <m.szyprowski@samsung.com> wrote:

> > You've gone to all this trouble to get a higher-order allocation so you'd
> > have fewer segments, then you undo it all by splitting things apart into
> > individual pages.  Why?  Clearly I'm missing something, this seems to
> > defeat the purpose of the whole exercise?  
> 
> Individual zero-order pages are required to get them mapped to userspace in
> mmap callback.

Yeah, Ricardo explained that too.  The right solution might be to fix that
problem rather than work around it, but I can see why one might shy at that
task! :)

I do wonder, though, if an intermediate solution using huge pages might be
the best approach?  That would get the number of segments down pretty far,
and using huge pages for buffers would reduce TLB pressure significantly
if the CPU is working through the data at all.  Meanwhile, inserting huge
pages into the process's address space should work easily.  Just a thought.

jon

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 1/2] videobuf2-dma-sg: Allocate pages as contiguous as possible
  2013-07-29 15:16       ` Jonathan Corbet
@ 2013-07-31  6:37         ` Sakari Ailus
  2013-08-01 13:59           ` Ricardo Ribalda Delgado
  0 siblings, 1 reply; 15+ messages in thread
From: Sakari Ailus @ 2013-07-31  6:37 UTC (permalink / raw)
  To: Jonathan Corbet
  Cc: Marek Szyprowski, Ricardo Ribalda Delgado, Mauro Carvalho Chehab,
	Pawel Osciak, Kyungmin Park, Ismael Luceno, Greg Kroah-Hartman,
	linux-media, Andre Heider, Sylwester Nawrocki

Hi Jon and Sylwester,

On Mon, Jul 29, 2013 at 09:16:44AM -0600, Jonathan Corbet wrote:
> On Mon, 29 Jul 2013 13:27:12 +0200
> Marek Szyprowski <m.szyprowski@samsung.com> wrote:
> 
> > > You've gone to all this trouble to get a higher-order allocation so you'd
> > > have fewer segments, then you undo it all by splitting things apart into
> > > individual pages.  Why?  Clearly I'm missing something, this seems to
> > > defeat the purpose of the whole exercise?  
> > 
> > Individual zero-order pages are required to get them mapped to userspace in
> > mmap callback.
> 
> Yeah, Ricardo explained that too.  The right solution might be to fix that
> problem rather than work around it, but I can see why one might shy at that
> task! :)
> 
> I do wonder, though, if an intermediate solution using huge pages might be
> the best approach?  That would get the number of segments down pretty far,
> and using huge pages for buffers would reduce TLB pressure significantly
> if the CPU is working through the data at all.  Meanwhile, inserting huge
> pages into the process's address space should work easily.  Just a thought.

My ack to that.

And in the case of dma-buf the buffer doesn't need to be mapped to user
space. It'd be quite nice to be able to share higher order allocations even
if they couldn't be mapped to user space as such.

Using 2 MiB pages would probably solve Ricardo's issue, but used alone
they'd waste lots of memory for small buffers. If small pages (in Ricardo's
case) were used when 2 MiB pages would be too big, e.g. 1 MiB buffer would
already have 256 pages in it. Perhaps it'd be useful to specify whether
large pages should be always preferred over smaller ones.

-- 
Kind regards,

Sakari Ailus
e-mail: sakari.ailus@iki.fi	XMPP: sailus@retiisi.org.uk

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 1/2] videobuf2-dma-sg: Allocate pages as contiguous as possible
  2013-07-31  6:37         ` Sakari Ailus
@ 2013-08-01 13:59           ` Ricardo Ribalda Delgado
  0 siblings, 0 replies; 15+ messages in thread
From: Ricardo Ribalda Delgado @ 2013-08-01 13:59 UTC (permalink / raw)
  To: Sakari Ailus
  Cc: Jonathan Corbet, Marek Szyprowski, Mauro Carvalho Chehab,
	Pawel Osciak, Kyungmin Park, Ismael Luceno, Greg Kroah-Hartman,
	linux-media, Andre Heider, Sylwester Nawrocki

Hi Sakarius

I think the whole point of the videobuf2 is sharing pages with the
user space, so until vm_insert_page does not support high order pages
we should split them. Unfortunately the mm is completely out of my
topic, so I don't think that I could be very useful there.

With my patch, in the worst case scenario, the image is split in as
many blocks as today, but in a normal setup the ammount of blocks is 1
or two blocks in total!.

Best regards.





On Wed, Jul 31, 2013 at 8:37 AM, Sakari Ailus <sakari.ailus@iki.fi> wrote:
> Hi Jon and Sylwester,
>
> On Mon, Jul 29, 2013 at 09:16:44AM -0600, Jonathan Corbet wrote:
>> On Mon, 29 Jul 2013 13:27:12 +0200
>> Marek Szyprowski <m.szyprowski@samsung.com> wrote:
>>
>> > > You've gone to all this trouble to get a higher-order allocation so you'd
>> > > have fewer segments, then you undo it all by splitting things apart into
>> > > individual pages.  Why?  Clearly I'm missing something, this seems to
>> > > defeat the purpose of the whole exercise?
>> >
>> > Individual zero-order pages are required to get them mapped to userspace in
>> > mmap callback.
>>
>> Yeah, Ricardo explained that too.  The right solution might be to fix that
>> problem rather than work around it, but I can see why one might shy at that
>> task! :)
>>
>> I do wonder, though, if an intermediate solution using huge pages might be
>> the best approach?  That would get the number of segments down pretty far,
>> and using huge pages for buffers would reduce TLB pressure significantly
>> if the CPU is working through the data at all.  Meanwhile, inserting huge
>> pages into the process's address space should work easily.  Just a thought.
>
> My ack to that.
>
> And in the case of dma-buf the buffer doesn't need to be mapped to user
> space. It'd be quite nice to be able to share higher order allocations even
> if they couldn't be mapped to user space as such.
>
> Using 2 MiB pages would probably solve Ricardo's issue, but used alone
> they'd waste lots of memory for small buffers. If small pages (in Ricardo's
> case) were used when 2 MiB pages would be too big, e.g. 1 MiB buffer would
> already have 256 pages in it. Perhaps it'd be useful to specify whether
> large pages should be always preferred over smaller ones.
>
> --
> Kind regards,
>
> Sakari Ailus
> e-mail: sakari.ailus@iki.fi     XMPP: sailus@retiisi.org.uk



-- 
Ricardo Ribalda

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 1/2] videobuf2-dma-sg: Allocate pages as contiguous as possible
  2013-07-19 17:02 ` [PATCH 1/2] videobuf2-dma-sg: Allocate pages as contiguous as possible Ricardo Ribalda Delgado
  2013-07-19 20:16   ` Jonathan Corbet
@ 2013-08-02  6:38   ` Marek Szyprowski
  2013-08-02  8:46   ` Andre Heider
  2013-08-02 13:47   ` Andre Heider
  3 siblings, 0 replies; 15+ messages in thread
From: Marek Szyprowski @ 2013-08-02  6:38 UTC (permalink / raw)
  To: Ricardo Ribalda Delgado
  Cc: Jonathan Corbet, Mauro Carvalho Chehab, Pawel Osciak,
	Kyungmin Park, Ismael Luceno, Greg Kroah-Hartman, linux-media,
	Andre Heider, Sylwester Nawrocki

Hello,

On 7/19/2013 7:02 PM, Ricardo Ribalda Delgado wrote:
> Most DMA engines have limitations regarding the number of DMA segments
> (sg-buffers) that they can handle. Videobuffers can easily spread
> through houndreds of pages.
>
> In the previous aproach, the pages were allocated individually, this
> could led to the creation houndreds of dma segments (sg-buffers) that
> could not be handled by some DMA engines.
>
> This patch tries to minimize the number of DMA segments by using
> alloc_pages. In the worst case it will behave as before, but most
> of the times it will reduce the number of dma segments
>
> Signed-off-by: Ricardo Ribalda Delgado <ricardo.ribalda@gmail.com>

Acked-by: Marek Szyprowski <m.szyprowski@samsung.com>

> ---
>   drivers/media/v4l2-core/videobuf2-dma-sg.c |   60 +++++++++++++++++++++++-----
>   1 file changed, 49 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/media/v4l2-core/videobuf2-dma-sg.c b/drivers/media/v4l2-core/videobuf2-dma-sg.c
> index 16ae3dc..c053605 100644
> --- a/drivers/media/v4l2-core/videobuf2-dma-sg.c
> +++ b/drivers/media/v4l2-core/videobuf2-dma-sg.c
> @@ -42,10 +42,55 @@ struct vb2_dma_sg_buf {
>   
>   static void vb2_dma_sg_put(void *buf_priv);
>   
> +static int vb2_dma_sg_alloc_compacted(struct vb2_dma_sg_buf *buf,
> +		gfp_t gfp_flags)
> +{
> +	unsigned int last_page = 0;
> +	int size = buf->sg_desc.size;
> +
> +	while (size > 0) {
> +		struct page *pages;
> +		int order;
> +		int i;
> +
> +		order = get_order(size);
> +		/* Dont over allocate*/
> +		if ((PAGE_SIZE << order) > size)
> +			order--;
> +
> +		pages = NULL;
> +		while (!pages) {
> +			pages = alloc_pages(GFP_KERNEL | __GFP_ZERO |
> +					__GFP_NOWARN | gfp_flags, order);
> +			if (pages)
> +				break;
> +
> +			if (order == 0)
> +				while (last_page--) {
> +					__free_page(buf->pages[last_page]);
> +					return -ENOMEM;
> +				}
> +			order--;
> +		}
> +
> +		split_page(pages, order);
> +		for (i = 0; i < (1<<order); i++) {
> +			buf->pages[last_page] = pages[i];
> +			sg_set_page(&buf->sg_desc.sglist[last_page],
> +					buf->pages[last_page], PAGE_SIZE, 0);
> +			last_page++;
> +		}
> +
> +		size -= PAGE_SIZE << order;
> +	}
> +
> +	return 0;
> +}
> +
>   static void *vb2_dma_sg_alloc(void *alloc_ctx, unsigned long size, gfp_t gfp_flags)
>   {
>   	struct vb2_dma_sg_buf *buf;
> -	int i;
> +	int ret;
>   
>   	buf = kzalloc(sizeof *buf, GFP_KERNEL);
>   	if (!buf)
> @@ -69,14 +114,9 @@ static void *vb2_dma_sg_alloc(void *alloc_ctx, unsigned long size, gfp_t gfp_fla
>   	if (!buf->pages)
>   		goto fail_pages_array_alloc;
>   
> -	for (i = 0; i < buf->sg_desc.num_pages; ++i) {
> -		buf->pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO |
> -					   __GFP_NOWARN | gfp_flags);
> -		if (NULL == buf->pages[i])
> -			goto fail_pages_alloc;
> -		sg_set_page(&buf->sg_desc.sglist[i],
> -			    buf->pages[i], PAGE_SIZE, 0);
> -	}
> +	ret = vb2_dma_sg_alloc_compacted(buf, gfp_flags);
> +	if (ret)
> +		goto fail_pages_alloc;
>   
>   	buf->handler.refcount = &buf->refcount;
>   	buf->handler.put = vb2_dma_sg_put;
> @@ -89,8 +129,6 @@ static void *vb2_dma_sg_alloc(void *alloc_ctx, unsigned long size, gfp_t gfp_fla
>   	return buf;
>   
>   fail_pages_alloc:
> -	while (--i >= 0)
> -		__free_page(buf->pages[i]);
>   	kfree(buf->pages);
>   
>   fail_pages_array_alloc:

Best regards
-- 
Marek Szyprowski
Samsung R&D Institute Poland



^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 2/2] videobuf2-dma-sg: Replace vb2_dma_sg_desc with sg_table
  2013-07-19 17:02 ` [PATCH 2/2] videobuf2-dma-sg: Replace vb2_dma_sg_desc with sg_table Ricardo Ribalda Delgado
@ 2013-08-02  6:39   ` Marek Szyprowski
  0 siblings, 0 replies; 15+ messages in thread
From: Marek Szyprowski @ 2013-08-02  6:39 UTC (permalink / raw)
  To: Ricardo Ribalda Delgado
  Cc: Jonathan Corbet, Mauro Carvalho Chehab, Pawel Osciak,
	Kyungmin Park, Ismael Luceno, Greg Kroah-Hartman, linux-media,
	Andre Heider, Sylwester Nawrocki

Hello,

On 7/19/2013 7:02 PM, Ricardo Ribalda Delgado wrote:
> Replace the private struct vb2_dma_sg_desc with the struct sg_table so
> we can benefit from all the helping functions in lib/scatterlist.c for
> things like allocating the sg or compacting the descriptor
>
> marvel-ccic and solo6x10 drivers, that uses this api has been updated
>
> Signed-off-by: Ricardo Ribalda Delgado <ricardo.ribalda@gmail.com>

Acked-by: Marek Szyprowski <m.szyprowski@samsung.com>

> ---
>   drivers/media/platform/marvell-ccic/mcam-core.c    |   14 +--
>   drivers/media/v4l2-core/videobuf2-dma-sg.c         |  103 ++++++++------------
>   drivers/staging/media/solo6x10/solo6x10-v4l2-enc.c |   20 ++--
>   include/media/videobuf2-dma-sg.h                   |   10 +-
>   4 files changed, 63 insertions(+), 84 deletions(-)
>
> diff --git a/drivers/media/platform/marvell-ccic/mcam-core.c b/drivers/media/platform/marvell-ccic/mcam-core.c
> index 64ab91e..0ac51bd 100644
> --- a/drivers/media/platform/marvell-ccic/mcam-core.c
> +++ b/drivers/media/platform/marvell-ccic/mcam-core.c
> @@ -1040,16 +1040,16 @@ static int mcam_vb_sg_buf_prepare(struct vb2_buffer *vb)
>   {
>   	struct mcam_vb_buffer *mvb = vb_to_mvb(vb);
>   	struct mcam_camera *cam = vb2_get_drv_priv(vb->vb2_queue);
> -	struct vb2_dma_sg_desc *sgd = vb2_dma_sg_plane_desc(vb, 0);
> +	struct sg_table *sg_table = vb2_dma_sg_plane_desc(vb, 0);
>   	struct mcam_dma_desc *desc = mvb->dma_desc;
>   	struct scatterlist *sg;
>   	int i;
>   
> -	mvb->dma_desc_nent = dma_map_sg(cam->dev, sgd->sglist, sgd->num_pages,
> -			DMA_FROM_DEVICE);
> +	mvb->dma_desc_nent = dma_map_sg(cam->dev, sg_table->sgl,
> +			sg_table->nents, DMA_FROM_DEVICE);
>   	if (mvb->dma_desc_nent <= 0)
>   		return -EIO;  /* Not sure what's right here */
> -	for_each_sg(sgd->sglist, sg, mvb->dma_desc_nent, i) {
> +	for_each_sg(sg_table->sgl, sg, mvb->dma_desc_nent, i) {
>   		desc->dma_addr = sg_dma_address(sg);
>   		desc->segment_len = sg_dma_len(sg);
>   		desc++;
> @@ -1060,9 +1060,11 @@ static int mcam_vb_sg_buf_prepare(struct vb2_buffer *vb)
>   static int mcam_vb_sg_buf_finish(struct vb2_buffer *vb)
>   {
>   	struct mcam_camera *cam = vb2_get_drv_priv(vb->vb2_queue);
> -	struct vb2_dma_sg_desc *sgd = vb2_dma_sg_plane_desc(vb, 0);
> +	struct sg_table *sg_table = vb2_dma_sg_plane_desc(vb, 0);
>   
> -	dma_unmap_sg(cam->dev, sgd->sglist, sgd->num_pages, DMA_FROM_DEVICE);
> +	if (sg_table)
> +		dma_unmap_sg(cam->dev, sg_table->sgl,
> +				sg_table->nents, DMA_FROM_DEVICE);
>   	return 0;
>   }
>   
> diff --git a/drivers/media/v4l2-core/videobuf2-dma-sg.c b/drivers/media/v4l2-core/videobuf2-dma-sg.c
> index c053605..4b30ad5 100644
> --- a/drivers/media/v4l2-core/videobuf2-dma-sg.c
> +++ b/drivers/media/v4l2-core/videobuf2-dma-sg.c
> @@ -35,7 +35,9 @@ struct vb2_dma_sg_buf {
>   	struct page			**pages;
>   	int				write;
>   	int				offset;
> -	struct vb2_dma_sg_desc		sg_desc;
> +	struct sg_table			sg_table;
> +	size_t				size;
> +	unsigned int			num_pages;
>   	atomic_t			refcount;
>   	struct vb2_vmarea_handler	handler;
>   };
> @@ -46,7 +48,7 @@ static int vb2_dma_sg_alloc_compacted(struct vb2_dma_sg_buf *buf,
>   		gfp_t gfp_flags)
>   {
>   	unsigned int last_page = 0;
> -	int size = buf->sg_desc.size;
> +	int size = buf->size;
>   
>   	while (size > 0) {
>   		struct page *pages;
> @@ -74,12 +76,8 @@ static int vb2_dma_sg_alloc_compacted(struct vb2_dma_sg_buf *buf,
>   		}
>   
>   		split_page(pages, order);
> -		for (i = 0; i < (1<<order); i++) {
> -			buf->pages[last_page] = pages[i];
> -			sg_set_page(&buf->sg_desc.sglist[last_page],
> -					buf->pages[last_page], PAGE_SIZE, 0);
> -			last_page++;
> -		}
> +		for (i = 0; i < (1<<order); i++)
> +			buf->pages[last_page++] = pages[i];
>   
>   		size -= PAGE_SIZE << order;
>   	}
> @@ -91,6 +89,7 @@ static void *vb2_dma_sg_alloc(void *alloc_ctx, unsigned long size, gfp_t gfp_fla
>   {
>   	struct vb2_dma_sg_buf *buf;
>   	int ret;
> +	int num_pages;
>   
>   	buf = kzalloc(sizeof *buf, GFP_KERNEL);
>   	if (!buf)
> @@ -99,17 +98,11 @@ static void *vb2_dma_sg_alloc(void *alloc_ctx, unsigned long size, gfp_t gfp_fla
>   	buf->vaddr = NULL;
>   	buf->write = 0;
>   	buf->offset = 0;
> -	buf->sg_desc.size = size;
> +	buf->size = size;
>   	/* size is already page aligned */
> -	buf->sg_desc.num_pages = size >> PAGE_SHIFT;
> -
> -	buf->sg_desc.sglist = vzalloc(buf->sg_desc.num_pages *
> -				      sizeof(*buf->sg_desc.sglist));
> -	if (!buf->sg_desc.sglist)
> -		goto fail_sglist_alloc;
> -	sg_init_table(buf->sg_desc.sglist, buf->sg_desc.num_pages);
> +	buf->num_pages = size >> PAGE_SHIFT;
>   
> -	buf->pages = kzalloc(buf->sg_desc.num_pages * sizeof(struct page *),
> +	buf->pages = kzalloc(buf->num_pages * sizeof(struct page *),
>   			     GFP_KERNEL);
>   	if (!buf->pages)
>   		goto fail_pages_array_alloc;
> @@ -118,6 +111,11 @@ static void *vb2_dma_sg_alloc(void *alloc_ctx, unsigned long size, gfp_t gfp_fla
>   	if (ret)
>   		goto fail_pages_alloc;
>   
> +	ret = sg_alloc_table_from_pages(&buf->sg_table, buf->pages,
> +			buf->num_pages, 0, size, gfp_flags);
> +	if (ret)
> +		goto fail_table_alloc;
> +
>   	buf->handler.refcount = &buf->refcount;
>   	buf->handler.put = vb2_dma_sg_put;
>   	buf->handler.arg = buf;
> @@ -125,16 +123,16 @@ static void *vb2_dma_sg_alloc(void *alloc_ctx, unsigned long size, gfp_t gfp_fla
>   	atomic_inc(&buf->refcount);
>   
>   	dprintk(1, "%s: Allocated buffer of %d pages\n",
> -		__func__, buf->sg_desc.num_pages);
> +		__func__, buf->num_pages);
>   	return buf;
>   
> +fail_table_alloc:
> +	num_pages = buf->num_pages;
> +	while (num_pages--)
> +		__free_page(buf->pages[num_pages]);
>   fail_pages_alloc:
>   	kfree(buf->pages);
> -
>   fail_pages_array_alloc:
> -	vfree(buf->sg_desc.sglist);
> -
> -fail_sglist_alloc:
>   	kfree(buf);
>   	return NULL;
>   }
> @@ -142,14 +140,14 @@ fail_sglist_alloc:
>   static void vb2_dma_sg_put(void *buf_priv)
>   {
>   	struct vb2_dma_sg_buf *buf = buf_priv;
> -	int i = buf->sg_desc.num_pages;
> +	int i = buf->num_pages;
>   
>   	if (atomic_dec_and_test(&buf->refcount)) {
>   		dprintk(1, "%s: Freeing buffer of %d pages\n", __func__,
> -			buf->sg_desc.num_pages);
> +			buf->num_pages);
>   		if (buf->vaddr)
> -			vm_unmap_ram(buf->vaddr, buf->sg_desc.num_pages);
> -		vfree(buf->sg_desc.sglist);
> +			vm_unmap_ram(buf->vaddr, buf->num_pages);
> +		sg_free_table(&buf->sg_table);
>   		while (--i >= 0)
>   			__free_page(buf->pages[i]);
>   		kfree(buf->pages);
> @@ -162,7 +160,7 @@ static void *vb2_dma_sg_get_userptr(void *alloc_ctx, unsigned long vaddr,
>   {
>   	struct vb2_dma_sg_buf *buf;
>   	unsigned long first, last;
> -	int num_pages_from_user, i;
> +	int num_pages_from_user;
>   
>   	buf = kzalloc(sizeof *buf, GFP_KERNEL);
>   	if (!buf)
> @@ -171,56 +169,41 @@ static void *vb2_dma_sg_get_userptr(void *alloc_ctx, unsigned long vaddr,
>   	buf->vaddr = NULL;
>   	buf->write = write;
>   	buf->offset = vaddr & ~PAGE_MASK;
> -	buf->sg_desc.size = size;
> +	buf->size = size;
>   
>   	first = (vaddr           & PAGE_MASK) >> PAGE_SHIFT;
>   	last  = ((vaddr + size - 1) & PAGE_MASK) >> PAGE_SHIFT;
> -	buf->sg_desc.num_pages = last - first + 1;
> -
> -	buf->sg_desc.sglist = vzalloc(
> -		buf->sg_desc.num_pages * sizeof(*buf->sg_desc.sglist));
> -	if (!buf->sg_desc.sglist)
> -		goto userptr_fail_sglist_alloc;
> +	buf->num_pages = last - first + 1;
>   
> -	sg_init_table(buf->sg_desc.sglist, buf->sg_desc.num_pages);
> -
> -	buf->pages = kzalloc(buf->sg_desc.num_pages * sizeof(struct page *),
> +	buf->pages = kzalloc(buf->num_pages * sizeof(struct page *),
>   			     GFP_KERNEL);
>   	if (!buf->pages)
> -		goto userptr_fail_pages_array_alloc;
> +		return NULL;
>   
>   	num_pages_from_user = get_user_pages(current, current->mm,
>   					     vaddr & PAGE_MASK,
> -					     buf->sg_desc.num_pages,
> +					     buf->num_pages,
>   					     write,
>   					     1, /* force */
>   					     buf->pages,
>   					     NULL);
>   
> -	if (num_pages_from_user != buf->sg_desc.num_pages)
> +	if (num_pages_from_user != buf->num_pages)
>   		goto userptr_fail_get_user_pages;
>   
> -	sg_set_page(&buf->sg_desc.sglist[0], buf->pages[0],
> -		    PAGE_SIZE - buf->offset, buf->offset);
> -	size -= PAGE_SIZE - buf->offset;
> -	for (i = 1; i < buf->sg_desc.num_pages; ++i) {
> -		sg_set_page(&buf->sg_desc.sglist[i], buf->pages[i],
> -			    min_t(size_t, PAGE_SIZE, size), 0);
> -		size -= min_t(size_t, PAGE_SIZE, size);
> -	}
> +	if (sg_alloc_table_from_pages(&buf->sg_table, buf->pages,
> +			buf->num_pages, buf->offset, size, 0))
> +		goto userptr_fail_alloc_table_from_pages;
> +
>   	return buf;
>   
> +userptr_fail_alloc_table_from_pages:
>   userptr_fail_get_user_pages:
>   	dprintk(1, "get_user_pages requested/got: %d/%d]\n",
> -	       num_pages_from_user, buf->sg_desc.num_pages);
> +	       num_pages_from_user, buf->num_pages);
>   	while (--num_pages_from_user >= 0)
>   		put_page(buf->pages[num_pages_from_user]);
>   	kfree(buf->pages);
> -
> -userptr_fail_pages_array_alloc:
> -	vfree(buf->sg_desc.sglist);
> -
> -userptr_fail_sglist_alloc:
>   	kfree(buf);
>   	return NULL;
>   }
> @@ -232,18 +215,18 @@ userptr_fail_sglist_alloc:
>   static void vb2_dma_sg_put_userptr(void *buf_priv)
>   {
>   	struct vb2_dma_sg_buf *buf = buf_priv;
> -	int i = buf->sg_desc.num_pages;
> +	int i = buf->num_pages;
>   
>   	dprintk(1, "%s: Releasing userspace buffer of %d pages\n",
> -	       __func__, buf->sg_desc.num_pages);
> +	       __func__, buf->num_pages);
>   	if (buf->vaddr)
> -		vm_unmap_ram(buf->vaddr, buf->sg_desc.num_pages);
> +		vm_unmap_ram(buf->vaddr, buf->num_pages);
> +	sg_free_table(&buf->sg_table);
>   	while (--i >= 0) {
>   		if (buf->write)
>   			set_page_dirty_lock(buf->pages[i]);
>   		put_page(buf->pages[i]);
>   	}
> -	vfree(buf->sg_desc.sglist);
>   	kfree(buf->pages);
>   	kfree(buf);
>   }
> @@ -256,7 +239,7 @@ static void *vb2_dma_sg_vaddr(void *buf_priv)
>   
>   	if (!buf->vaddr)
>   		buf->vaddr = vm_map_ram(buf->pages,
> -					buf->sg_desc.num_pages,
> +					buf->num_pages,
>   					-1,
>   					PAGE_KERNEL);
>   
> @@ -312,7 +295,7 @@ static void *vb2_dma_sg_cookie(void *buf_priv)
>   {
>   	struct vb2_dma_sg_buf *buf = buf_priv;
>   
> -	return &buf->sg_desc;
> +	return &buf->sg_table;
>   }
>   
>   const struct vb2_mem_ops vb2_dma_sg_memops = {
> diff --git a/drivers/staging/media/solo6x10/solo6x10-v4l2-enc.c b/drivers/staging/media/solo6x10/solo6x10-v4l2-enc.c
> index 98e2902..cfa01f1 100644
> --- a/drivers/staging/media/solo6x10/solo6x10-v4l2-enc.c
> +++ b/drivers/staging/media/solo6x10/solo6x10-v4l2-enc.c
> @@ -346,7 +346,7 @@ static int enc_get_mpeg_dma(struct solo_dev *solo_dev, dma_addr_t dma,
>   /* Build a descriptor queue out of an SG list and send it to the P2M for
>    * processing. */
>   static int solo_send_desc(struct solo_enc_dev *solo_enc, int skip,
> -			  struct vb2_dma_sg_desc *vbuf, int off, int size,
> +			  struct sg_table *vbuf, int off, int size,
>   			  unsigned int base, unsigned int base_size)
>   {
>   	struct solo_dev *solo_dev = solo_enc->solo_dev;
> @@ -359,7 +359,7 @@ static int solo_send_desc(struct solo_enc_dev *solo_enc, int skip,
>   
>   	solo_enc->desc_count = 1;
>   
> -	for_each_sg(vbuf->sglist, sg, vbuf->num_pages, i) {
> +	for_each_sg(vbuf->sgl, sg, vbuf->nents, i) {
>   		struct solo_p2m_desc *desc;
>   		dma_addr_t dma;
>   		int len;
> @@ -434,7 +434,7 @@ static int solo_fill_jpeg(struct solo_enc_dev *solo_enc,
>   		struct vb2_buffer *vb, struct vop_header *vh)
>   {
>   	struct solo_dev *solo_dev = solo_enc->solo_dev;
> -	struct vb2_dma_sg_desc *vbuf = vb2_dma_sg_plane_desc(vb, 0);
> +	struct sg_table *vbuf = vb2_dma_sg_plane_desc(vb, 0);
>   	int frame_size;
>   	int ret;
>   
> @@ -443,7 +443,7 @@ static int solo_fill_jpeg(struct solo_enc_dev *solo_enc,
>   	if (vb2_plane_size(vb, 0) < vh->jpeg_size + solo_enc->jpeg_len)
>   		return -EIO;
>   
> -	sg_copy_from_buffer(vbuf->sglist, vbuf->num_pages,
> +	sg_copy_from_buffer(vbuf->sgl, vbuf->nents,
>   			solo_enc->jpeg_header,
>   			solo_enc->jpeg_len);
>   
> @@ -451,12 +451,12 @@ static int solo_fill_jpeg(struct solo_enc_dev *solo_enc,
>   		& ~(DMA_ALIGN - 1);
>   	vb2_set_plane_payload(vb, 0, vh->jpeg_size + solo_enc->jpeg_len);
>   
> -	dma_map_sg(&solo_dev->pdev->dev, vbuf->sglist, vbuf->num_pages,
> +	dma_map_sg(&solo_dev->pdev->dev, vbuf->sgl, vbuf->nents,
>   			DMA_FROM_DEVICE);
>   	ret = solo_send_desc(solo_enc, solo_enc->jpeg_len, vbuf, vh->jpeg_off,
>   			frame_size, SOLO_JPEG_EXT_ADDR(solo_dev),
>   			SOLO_JPEG_EXT_SIZE(solo_dev));
> -	dma_unmap_sg(&solo_dev->pdev->dev, vbuf->sglist, vbuf->num_pages,
> +	dma_unmap_sg(&solo_dev->pdev->dev, vbuf->sgl, vbuf->nents,
>   			DMA_FROM_DEVICE);
>   	return ret;
>   }
> @@ -465,7 +465,7 @@ static int solo_fill_mpeg(struct solo_enc_dev *solo_enc,
>   		struct vb2_buffer *vb, struct vop_header *vh)
>   {
>   	struct solo_dev *solo_dev = solo_enc->solo_dev;
> -	struct vb2_dma_sg_desc *vbuf = vb2_dma_sg_plane_desc(vb, 0);
> +	struct sg_table *vbuf = vb2_dma_sg_plane_desc(vb, 0);
>   	int frame_off, frame_size;
>   	int skip = 0;
>   	int ret;
> @@ -475,7 +475,7 @@ static int solo_fill_mpeg(struct solo_enc_dev *solo_enc,
>   
>   	/* If this is a key frame, add extra header */
>   	if (!vh->vop_type) {
> -		sg_copy_from_buffer(vbuf->sglist, vbuf->num_pages,
> +		sg_copy_from_buffer(vbuf->sgl, vbuf->nents,
>   				solo_enc->vop,
>   				solo_enc->vop_len);
>   
> @@ -494,12 +494,12 @@ static int solo_fill_mpeg(struct solo_enc_dev *solo_enc,
>   	frame_size = (vh->mpeg_size + skip + (DMA_ALIGN - 1))
>   		& ~(DMA_ALIGN - 1);
>   
> -	dma_map_sg(&solo_dev->pdev->dev, vbuf->sglist, vbuf->num_pages,
> +	dma_map_sg(&solo_dev->pdev->dev, vbuf->sgl, vbuf->nents,
>   			DMA_FROM_DEVICE);
>   	ret = solo_send_desc(solo_enc, skip, vbuf, frame_off, frame_size,
>   			SOLO_MP4E_EXT_ADDR(solo_dev),
>   			SOLO_MP4E_EXT_SIZE(solo_dev));
> -	dma_unmap_sg(&solo_dev->pdev->dev, vbuf->sglist, vbuf->num_pages,
> +	dma_unmap_sg(&solo_dev->pdev->dev, vbuf->sgl, vbuf->nents,
>   			DMA_FROM_DEVICE);
>   	return ret;
>   }
> diff --git a/include/media/videobuf2-dma-sg.h b/include/media/videobuf2-dma-sg.h
> index 0038526..7b89852 100644
> --- a/include/media/videobuf2-dma-sg.h
> +++ b/include/media/videobuf2-dma-sg.h
> @@ -15,16 +15,10 @@
>   
>   #include <media/videobuf2-core.h>
>   
> -struct vb2_dma_sg_desc {
> -	unsigned long		size;
> -	unsigned int		num_pages;
> -	struct scatterlist	*sglist;
> -};
> -
> -static inline struct vb2_dma_sg_desc *vb2_dma_sg_plane_desc(
> +static inline struct sg_table *vb2_dma_sg_plane_desc(
>   		struct vb2_buffer *vb, unsigned int plane_no)
>   {
> -	return (struct vb2_dma_sg_desc *)vb2_plane_cookie(vb, plane_no);
> +	return (struct sg_table *)vb2_plane_cookie(vb, plane_no);
>   }
>   
>   extern const struct vb2_mem_ops vb2_dma_sg_memops;

Best regards
-- 
Marek Szyprowski
Samsung R&D Institute Poland



^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 1/2] videobuf2-dma-sg: Allocate pages as contiguous as possible
  2013-07-19 17:02 ` [PATCH 1/2] videobuf2-dma-sg: Allocate pages as contiguous as possible Ricardo Ribalda Delgado
  2013-07-19 20:16   ` Jonathan Corbet
  2013-08-02  6:38   ` Marek Szyprowski
@ 2013-08-02  8:46   ` Andre Heider
  2013-08-02 11:30     ` Ricardo Ribalda Delgado
  2013-08-02 13:47   ` Andre Heider
  3 siblings, 1 reply; 15+ messages in thread
From: Andre Heider @ 2013-08-02  8:46 UTC (permalink / raw)
  To: Ricardo Ribalda Delgado
  Cc: Jonathan Corbet, Mauro Carvalho Chehab, Pawel Osciak,
	Marek Szyprowski, Kyungmin Park, Ismael Luceno,
	Greg Kroah-Hartman, linux-media, Sylwester Nawrocki

Hi Ricardo,

sorry for the late answer, but the leak I mentioned in my first reply is still there, see below.

On Fri, Jul 19, 2013 at 07:02:33PM +0200, Ricardo Ribalda Delgado wrote:
> Most DMA engines have limitations regarding the number of DMA segments
> (sg-buffers) that they can handle. Videobuffers can easily spread
> through houndreds of pages.
> 
> In the previous aproach, the pages were allocated individually, this
> could led to the creation houndreds of dma segments (sg-buffers) that
> could not be handled by some DMA engines.
> 
> This patch tries to minimize the number of DMA segments by using
> alloc_pages. In the worst case it will behave as before, but most
> of the times it will reduce the number of dma segments
> 
> Signed-off-by: Ricardo Ribalda Delgado <ricardo.ribalda@gmail.com>
> ---
>  drivers/media/v4l2-core/videobuf2-dma-sg.c |   60 +++++++++++++++++++++++-----
>  1 file changed, 49 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/media/v4l2-core/videobuf2-dma-sg.c b/drivers/media/v4l2-core/videobuf2-dma-sg.c
> index 16ae3dc..c053605 100644
> --- a/drivers/media/v4l2-core/videobuf2-dma-sg.c
> +++ b/drivers/media/v4l2-core/videobuf2-dma-sg.c
> @@ -42,10 +42,55 @@ struct vb2_dma_sg_buf {
>  
>  static void vb2_dma_sg_put(void *buf_priv);
>  
> +static int vb2_dma_sg_alloc_compacted(struct vb2_dma_sg_buf *buf,
> +		gfp_t gfp_flags)
> +{
> +	unsigned int last_page = 0;
> +	int size = buf->sg_desc.size;
> +
> +	while (size > 0) {
> +		struct page *pages;
> +		int order;
> +		int i;
> +
> +		order = get_order(size);
> +		/* Dont over allocate*/
> +		if ((PAGE_SIZE << order) > size)
> +			order--;
> +
> +		pages = NULL;
> +		while (!pages) {
> +			pages = alloc_pages(GFP_KERNEL | __GFP_ZERO |
> +					__GFP_NOWARN | gfp_flags, order);
> +			if (pages)
> +				break;
> +
> +			if (order == 0)
> +				while (last_page--) {
> +					__free_page(buf->pages[last_page]);
> +					return -ENOMEM;
> +				}

The return statement doesn't make sense in the while() scope, that way you wouldn't need the loop at all.

To prevent leaking pages of prior iterations (those with higher orders), pull the return out of there:

			while (last_page--)
				__free_page(buf->pages[last_page]);
			return -ENOMEM;

Regards,
Andre

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 1/2] videobuf2-dma-sg: Allocate pages as contiguous as possible
  2013-08-02  8:46   ` Andre Heider
@ 2013-08-02 11:30     ` Ricardo Ribalda Delgado
  0 siblings, 0 replies; 15+ messages in thread
From: Ricardo Ribalda Delgado @ 2013-08-02 11:30 UTC (permalink / raw)
  To: Andre Heider
  Cc: Jonathan Corbet, Mauro Carvalho Chehab, Pawel Osciak,
	Marek Szyprowski, Kyungmin Park, Ismael Luceno,
	Greg Kroah-Hartman, linux-media, Sylwester Nawrocki

Hi Andre,

Nice catch! thanks.

I have just uploaded a new version.

https://patchwork.linuxtv.org/patch/19502/
https://patchwork.linuxtv.org/patch/19503/


Thanks for your help

On Fri, Aug 2, 2013 at 10:46 AM, Andre Heider <a.heider@gmail.com> wrote:
> Hi Ricardo,
>
> sorry for the late answer, but the leak I mentioned in my first reply is still there, see below.
>
> On Fri, Jul 19, 2013 at 07:02:33PM +0200, Ricardo Ribalda Delgado wrote:
>> Most DMA engines have limitations regarding the number of DMA segments
>> (sg-buffers) that they can handle. Videobuffers can easily spread
>> through houndreds of pages.
>>
>> In the previous aproach, the pages were allocated individually, this
>> could led to the creation houndreds of dma segments (sg-buffers) that
>> could not be handled by some DMA engines.
>>
>> This patch tries to minimize the number of DMA segments by using
>> alloc_pages. In the worst case it will behave as before, but most
>> of the times it will reduce the number of dma segments
>>
>> Signed-off-by: Ricardo Ribalda Delgado <ricardo.ribalda@gmail.com>
>> ---
>>  drivers/media/v4l2-core/videobuf2-dma-sg.c |   60 +++++++++++++++++++++++-----
>>  1 file changed, 49 insertions(+), 11 deletions(-)
>>
>> diff --git a/drivers/media/v4l2-core/videobuf2-dma-sg.c b/drivers/media/v4l2-core/videobuf2-dma-sg.c
>> index 16ae3dc..c053605 100644
>> --- a/drivers/media/v4l2-core/videobuf2-dma-sg.c
>> +++ b/drivers/media/v4l2-core/videobuf2-dma-sg.c
>> @@ -42,10 +42,55 @@ struct vb2_dma_sg_buf {
>>
>>  static void vb2_dma_sg_put(void *buf_priv);
>>
>> +static int vb2_dma_sg_alloc_compacted(struct vb2_dma_sg_buf *buf,
>> +             gfp_t gfp_flags)
>> +{
>> +     unsigned int last_page = 0;
>> +     int size = buf->sg_desc.size;
>> +
>> +     while (size > 0) {
>> +             struct page *pages;
>> +             int order;
>> +             int i;
>> +
>> +             order = get_order(size);
>> +             /* Dont over allocate*/
>> +             if ((PAGE_SIZE << order) > size)
>> +                     order--;
>> +
>> +             pages = NULL;
>> +             while (!pages) {
>> +                     pages = alloc_pages(GFP_KERNEL | __GFP_ZERO |
>> +                                     __GFP_NOWARN | gfp_flags, order);
>> +                     if (pages)
>> +                             break;
>> +
>> +                     if (order == 0)
>> +                             while (last_page--) {
>> +                                     __free_page(buf->pages[last_page]);
>> +                                     return -ENOMEM;
>> +                             }
>
> The return statement doesn't make sense in the while() scope, that way you wouldn't need the loop at all.
>
> To prevent leaking pages of prior iterations (those with higher orders), pull the return out of there:
>
>                         while (last_page--)
>                                 __free_page(buf->pages[last_page]);
>                         return -ENOMEM;
>
> Regards,
> Andre



-- 
Ricardo Ribalda

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 1/2] videobuf2-dma-sg: Allocate pages as contiguous as possible
  2013-07-19 17:02 ` [PATCH 1/2] videobuf2-dma-sg: Allocate pages as contiguous as possible Ricardo Ribalda Delgado
                     ` (2 preceding siblings ...)
  2013-08-02  8:46   ` Andre Heider
@ 2013-08-02 13:47   ` Andre Heider
  2013-08-02 14:20     ` Ricardo Ribalda Delgado
  3 siblings, 1 reply; 15+ messages in thread
From: Andre Heider @ 2013-08-02 13:47 UTC (permalink / raw)
  To: Ricardo Ribalda Delgado
  Cc: Jonathan Corbet, Mauro Carvalho Chehab, Pawel Osciak,
	Marek Szyprowski, Kyungmin Park, Ismael Luceno,
	Greg Kroah-Hartman, linux-media, Sylwester Nawrocki

Hi Ricardo,

I messed up one thing in my initial reply, sorry :(

And two additional nitpicks, while we're at it.

On Fri, Jul 19, 2013 at 07:02:33PM +0200, Ricardo Ribalda Delgado wrote:
> Most DMA engines have limitations regarding the number of DMA segments
> (sg-buffers) that they can handle. Videobuffers can easily spread
> through houndreds of pages.
> 
> In the previous aproach, the pages were allocated individually, this
> could led to the creation houndreds of dma segments (sg-buffers) that
> could not be handled by some DMA engines.

s/houndreds/hundreds/

> 
> This patch tries to minimize the number of DMA segments by using
> alloc_pages. In the worst case it will behave as before, but most
> of the times it will reduce the number of dma segments
> 
> Signed-off-by: Ricardo Ribalda Delgado <ricardo.ribalda@gmail.com>

With those changes you can add:

Reviewed-by: Andre Heider <a.heider@gmail.com>

> ---
>  drivers/media/v4l2-core/videobuf2-dma-sg.c |   60 +++++++++++++++++++++++-----
>  1 file changed, 49 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/media/v4l2-core/videobuf2-dma-sg.c b/drivers/media/v4l2-core/videobuf2-dma-sg.c
> index 16ae3dc..c053605 100644
> --- a/drivers/media/v4l2-core/videobuf2-dma-sg.c
> +++ b/drivers/media/v4l2-core/videobuf2-dma-sg.c
> @@ -42,10 +42,55 @@ struct vb2_dma_sg_buf {
>  
>  static void vb2_dma_sg_put(void *buf_priv);
>  
> +static int vb2_dma_sg_alloc_compacted(struct vb2_dma_sg_buf *buf,
> +		gfp_t gfp_flags)
> +{
> +	unsigned int last_page = 0;
> +	int size = buf->sg_desc.size;
> +
> +	while (size > 0) {
> +		struct page *pages;
> +		int order;
> +		int i;
> +
> +		order = get_order(size);
> +		/* Dont over allocate*/
> +		if ((PAGE_SIZE << order) > size)
> +			order--;
> +
> +		pages = NULL;
> +		while (!pages) {
> +			pages = alloc_pages(GFP_KERNEL | __GFP_ZERO |
> +					__GFP_NOWARN | gfp_flags, order);
> +			if (pages)
> +				break;
> +
> +			if (order == 0)
> +				while (last_page--) {
> +					__free_page(buf->pages[last_page]);
> +					return -ENOMEM;
> +				}
> +			order--;
> +		}
> +
> +		split_page(pages, order);
> +		for (i = 0; i < (1<<order); i++) {

whitespace nit: "(1 << order)"

> +			buf->pages[last_page] = pages[i];

My fault, it should read:

			buf->pages[last_page] = &pages[i];

> +			sg_set_page(&buf->sg_desc.sglist[last_page],
> +					buf->pages[last_page], PAGE_SIZE, 0);
> +			last_page++;
> +		}
> +
> +		size -= PAGE_SIZE << order;
> +	}
> +
> +	return 0;
> +}
> +
>  static void *vb2_dma_sg_alloc(void *alloc_ctx, unsigned long size, gfp_t gfp_flags)
>  {
>  	struct vb2_dma_sg_buf *buf;
> -	int i;
> +	int ret;
>  
>  	buf = kzalloc(sizeof *buf, GFP_KERNEL);
>  	if (!buf)
> @@ -69,14 +114,9 @@ static void *vb2_dma_sg_alloc(void *alloc_ctx, unsigned long size, gfp_t gfp_fla
>  	if (!buf->pages)
>  		goto fail_pages_array_alloc;
>  
> -	for (i = 0; i < buf->sg_desc.num_pages; ++i) {
> -		buf->pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO |
> -					   __GFP_NOWARN | gfp_flags);
> -		if (NULL == buf->pages[i])
> -			goto fail_pages_alloc;
> -		sg_set_page(&buf->sg_desc.sglist[i],
> -			    buf->pages[i], PAGE_SIZE, 0);
> -	}
> +	ret = vb2_dma_sg_alloc_compacted(buf, gfp_flags);
> +	if (ret)
> +		goto fail_pages_alloc;
>  
>  	buf->handler.refcount = &buf->refcount;
>  	buf->handler.put = vb2_dma_sg_put;
> @@ -89,8 +129,6 @@ static void *vb2_dma_sg_alloc(void *alloc_ctx, unsigned long size, gfp_t gfp_fla
>  	return buf;
>  
>  fail_pages_alloc:
> -	while (--i >= 0)
> -		__free_page(buf->pages[i]);
>  	kfree(buf->pages);
>  
>  fail_pages_array_alloc:
> -- 
> 1.7.10.4
> 

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 1/2] videobuf2-dma-sg: Allocate pages as contiguous as possible
  2013-08-02 13:47   ` Andre Heider
@ 2013-08-02 14:20     ` Ricardo Ribalda Delgado
  0 siblings, 0 replies; 15+ messages in thread
From: Ricardo Ribalda Delgado @ 2013-08-02 14:20 UTC (permalink / raw)
  To: Andre Heider
  Cc: Jonathan Corbet, Mauro Carvalho Chehab, Pawel Osciak,
	Marek Szyprowski, Kyungmin Park, Ismael Luceno,
	Greg Kroah-Hartman, linux-media, Sylwester Nawrocki

Thanks, I have just send a new version.

Regards!

On Fri, Aug 2, 2013 at 3:47 PM, Andre Heider <a.heider@gmail.com> wrote:
> Hi Ricardo,
>
> I messed up one thing in my initial reply, sorry :(
>
> And two additional nitpicks, while we're at it.
>
> On Fri, Jul 19, 2013 at 07:02:33PM +0200, Ricardo Ribalda Delgado wrote:
>> Most DMA engines have limitations regarding the number of DMA segments
>> (sg-buffers) that they can handle. Videobuffers can easily spread
>> through houndreds of pages.
>>
>> In the previous aproach, the pages were allocated individually, this
>> could led to the creation houndreds of dma segments (sg-buffers) that
>> could not be handled by some DMA engines.
>
> s/houndreds/hundreds/
>
>>
>> This patch tries to minimize the number of DMA segments by using
>> alloc_pages. In the worst case it will behave as before, but most
>> of the times it will reduce the number of dma segments
>>
>> Signed-off-by: Ricardo Ribalda Delgado <ricardo.ribalda@gmail.com>
>
> With those changes you can add:
>
> Reviewed-by: Andre Heider <a.heider@gmail.com>
>
>> ---
>>  drivers/media/v4l2-core/videobuf2-dma-sg.c |   60 +++++++++++++++++++++++-----
>>  1 file changed, 49 insertions(+), 11 deletions(-)
>>
>> diff --git a/drivers/media/v4l2-core/videobuf2-dma-sg.c b/drivers/media/v4l2-core/videobuf2-dma-sg.c
>> index 16ae3dc..c053605 100644
>> --- a/drivers/media/v4l2-core/videobuf2-dma-sg.c
>> +++ b/drivers/media/v4l2-core/videobuf2-dma-sg.c
>> @@ -42,10 +42,55 @@ struct vb2_dma_sg_buf {
>>
>>  static void vb2_dma_sg_put(void *buf_priv);
>>
>> +static int vb2_dma_sg_alloc_compacted(struct vb2_dma_sg_buf *buf,
>> +             gfp_t gfp_flags)
>> +{
>> +     unsigned int last_page = 0;
>> +     int size = buf->sg_desc.size;
>> +
>> +     while (size > 0) {
>> +             struct page *pages;
>> +             int order;
>> +             int i;
>> +
>> +             order = get_order(size);
>> +             /* Dont over allocate*/
>> +             if ((PAGE_SIZE << order) > size)
>> +                     order--;
>> +
>> +             pages = NULL;
>> +             while (!pages) {
>> +                     pages = alloc_pages(GFP_KERNEL | __GFP_ZERO |
>> +                                     __GFP_NOWARN | gfp_flags, order);
>> +                     if (pages)
>> +                             break;
>> +
>> +                     if (order == 0)
>> +                             while (last_page--) {
>> +                                     __free_page(buf->pages[last_page]);
>> +                                     return -ENOMEM;
>> +                             }
>> +                     order--;
>> +             }
>> +
>> +             split_page(pages, order);
>> +             for (i = 0; i < (1<<order); i++) {
>
> whitespace nit: "(1 << order)"
>
>> +                     buf->pages[last_page] = pages[i];
>
> My fault, it should read:
>
>                         buf->pages[last_page] = &pages[i];
>
>> +                     sg_set_page(&buf->sg_desc.sglist[last_page],
>> +                                     buf->pages[last_page], PAGE_SIZE, 0);
>> +                     last_page++;
>> +             }
>> +
>> +             size -= PAGE_SIZE << order;
>> +     }
>> +
>> +     return 0;
>> +}
>> +
>>  static void *vb2_dma_sg_alloc(void *alloc_ctx, unsigned long size, gfp_t gfp_flags)
>>  {
>>       struct vb2_dma_sg_buf *buf;
>> -     int i;
>> +     int ret;
>>
>>       buf = kzalloc(sizeof *buf, GFP_KERNEL);
>>       if (!buf)
>> @@ -69,14 +114,9 @@ static void *vb2_dma_sg_alloc(void *alloc_ctx, unsigned long size, gfp_t gfp_fla
>>       if (!buf->pages)
>>               goto fail_pages_array_alloc;
>>
>> -     for (i = 0; i < buf->sg_desc.num_pages; ++i) {
>> -             buf->pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO |
>> -                                        __GFP_NOWARN | gfp_flags);
>> -             if (NULL == buf->pages[i])
>> -                     goto fail_pages_alloc;
>> -             sg_set_page(&buf->sg_desc.sglist[i],
>> -                         buf->pages[i], PAGE_SIZE, 0);
>> -     }
>> +     ret = vb2_dma_sg_alloc_compacted(buf, gfp_flags);
>> +     if (ret)
>> +             goto fail_pages_alloc;
>>
>>       buf->handler.refcount = &buf->refcount;
>>       buf->handler.put = vb2_dma_sg_put;
>> @@ -89,8 +129,6 @@ static void *vb2_dma_sg_alloc(void *alloc_ctx, unsigned long size, gfp_t gfp_fla
>>       return buf;
>>
>>  fail_pages_alloc:
>> -     while (--i >= 0)
>> -             __free_page(buf->pages[i]);
>>       kfree(buf->pages);
>>
>>  fail_pages_array_alloc:
>> --
>> 1.7.10.4
>>



-- 
Ricardo Ribalda

^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2013-08-02 14:20 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-07-19 17:02 [PATCH 0/2] videobuf2-dma-sg: Contiguos memory allocation Ricardo Ribalda Delgado
2013-07-19 17:02 ` [PATCH 1/2] videobuf2-dma-sg: Allocate pages as contiguous as possible Ricardo Ribalda Delgado
2013-07-19 20:16   ` Jonathan Corbet
2013-07-19 21:57     ` Ricardo Ribalda Delgado
2013-07-29 11:27     ` Marek Szyprowski
2013-07-29 15:16       ` Jonathan Corbet
2013-07-31  6:37         ` Sakari Ailus
2013-08-01 13:59           ` Ricardo Ribalda Delgado
2013-08-02  6:38   ` Marek Szyprowski
2013-08-02  8:46   ` Andre Heider
2013-08-02 11:30     ` Ricardo Ribalda Delgado
2013-08-02 13:47   ` Andre Heider
2013-08-02 14:20     ` Ricardo Ribalda Delgado
2013-07-19 17:02 ` [PATCH 2/2] videobuf2-dma-sg: Replace vb2_dma_sg_desc with sg_table Ricardo Ribalda Delgado
2013-08-02  6:39   ` Marek Szyprowski

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.