All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/3] Introduce & use new SG page iterators
@ 2016-05-09  9:33 ` Dave Gordon
  0 siblings, 0 replies; 7+ messages in thread
From: Dave Gordon @ 2016-05-09  9:33 UTC (permalink / raw)
  To: intel-gfx; +Cc: Dave Gordon, Imre Deak, linux-kernel

The existing for_each_sg_page() iterator is somewhat inconvenient; in
particular, the 'nents' parameters is not expressed in any useful way,
and so there is no way to get a precise (maximum) number of iterations
(and therefore pages) without knowing that the SGL has been constructed
in a specific way.

So here we introduce for_each_sgt_page(), which simplifies the
parameters down to just two: the iterator and an sg_table to iterate
over. This is ideal where we simply need to process *all* the pages,
regardless of how many there might be (e.g. put_pages()).

For more sophisticated uses, for_each_sgt_range() adds a starting (page)
offset and a maximum number of iterations to be performed. This fits the
usage required when processing only a subrange of the pages, or to avoid
any risk of overflow when filling a bounded array with per-page data.

Nearly all uses of for_each_sg_page() are then converted to the new
simpler macros.

Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
Cc: Imre Deak <imre.deak@intel.com>
Cc: linux-kernel@vger.kernel.org
---
 drivers/gpu/drm/i915/i915_cmd_parser.c  |  8 ++++----
 drivers/gpu/drm/i915/i915_gem.c         | 10 ++++------
 drivers/gpu/drm/i915/i915_gem_fence.c   |  5 +++--
 drivers/gpu/drm/i915/i915_gem_gtt.c     | 20 +++++++++-----------
 drivers/gpu/drm/i915/i915_gem_userptr.c |  2 +-
 include/linux/scatterlist.h             | 21 +++++++++++++++++++++
 lib/scatterlist.c                       | 26 ++++++++++++++++++++++++++
 7 files changed, 68 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
index c3a7603..4f08ab0 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -946,11 +946,11 @@ static u32 *vmap_batch(struct drm_i915_gem_object *obj,
 	}
 
 	i = 0;
-	for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, first_page) {
+	for_each_sgt_page_range(&sg_iter, obj->pages, first_page, npages)
 		pages[i++] = sg_page_iter_page(&sg_iter);
-		if (i == npages)
-			break;
-	}
+
+	if (WARN_ON(i != npages))
+		goto finish;
 
 	addr = vmap(pages, i, 0, PAGE_KERNEL);
 	if (addr == NULL) {
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index a88e6c9..03b2ed3 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -611,8 +611,7 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
 
 	offset = args->offset;
 
-	for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents,
-			 offset >> PAGE_SHIFT) {
+	for_each_sgt_page_range(&sg_iter, obj->pages, offset >> PAGE_SHIFT, ~0u) {
 		struct page *page = sg_page_iter_page(&sg_iter);
 
 		if (remain <= 0)
@@ -935,8 +934,7 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
 	offset = args->offset;
 	obj->dirty = 1;
 
-	for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents,
-			 offset >> PAGE_SHIFT) {
+	for_each_sgt_page_range(&sg_iter, obj->pages, offset >> PAGE_SHIFT, ~0u) {
 		struct page *page = sg_page_iter_page(&sg_iter);
 		int partial_cacheline_write;
 
@@ -2184,7 +2182,7 @@ static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj)
 	if (obj->madv == I915_MADV_DONTNEED)
 		obj->dirty = 0;
 
-	for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) {
+	for_each_sgt_page(&sg_iter, obj->pages) {
 		struct page *page = sg_page_iter_page(&sg_iter);
 
 		if (obj->dirty)
@@ -2340,7 +2338,7 @@ static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj)
 
 err_pages:
 	sg_mark_end(sg);
-	for_each_sg_page(st->sgl, &sg_iter, st->nents, 0)
+	for_each_sgt_page(&sg_iter, st)
 		put_page(sg_page_iter_page(&sg_iter));
 	sg_free_table(st);
 	kfree(st);
diff --git a/drivers/gpu/drm/i915/i915_gem_fence.c b/drivers/gpu/drm/i915/i915_gem_fence.c
index a2b938e..4fb06f6 100644
--- a/drivers/gpu/drm/i915/i915_gem_fence.c
+++ b/drivers/gpu/drm/i915/i915_gem_fence.c
@@ -752,7 +752,7 @@ void i915_gem_restore_fences(struct drm_device *dev)
 		return;
 
 	i = 0;
-	for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) {
+	for_each_sgt_page(&sg_iter, obj->pages) {
 		struct page *page = sg_page_iter_page(&sg_iter);
 		char new_bit_17 = page_to_phys(page) >> 17;
 		if ((new_bit_17 & 0x1) !=
@@ -790,7 +790,8 @@ void i915_gem_restore_fences(struct drm_device *dev)
 	}
 
 	i = 0;
-	for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) {
+
+	for_each_sgt_page(&sg_iter, obj->pages) {
 		if (page_to_phys(sg_page_iter_page(&sg_iter)) & (1 << 17))
 			__set_bit(i, obj->bit_17);
 		else
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index d284b17..7296d02 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -1844,7 +1844,7 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
 	struct sg_page_iter sg_iter;
 
 	pt_vaddr = NULL;
-	for_each_sg_page(pages->sgl, &sg_iter, pages->nents, 0) {
+	for_each_sgt_page(&sg_iter, pages) {
 		if (pt_vaddr == NULL)
 			pt_vaddr = kmap_px(ppgtt->pd.page_table[act_pt]);
 
@@ -2371,7 +2371,7 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
 
 	rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
 
-	for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) {
+	for_each_sgt_page(&sg_iter, st) {
 		addr = sg_dma_address(sg_iter.sg) +
 			(sg_iter.sg_pgoffset << PAGE_SHIFT);
 		gen8_set_pte(&gtt_entries[i],
@@ -2449,7 +2449,7 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
 
 	rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
 
-	for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) {
+	for_each_sgt_page(&sg_iter, st) {
 		addr = sg_page_iter_dma_address(&sg_iter);
 		iowrite32(vm->pte_encode(addr, level, true, flags), &gtt_entries[i]);
 		i++;
@@ -3384,6 +3384,7 @@ struct i915_vma *
 intel_rotate_fb_obj_pages(struct intel_rotation_info *rot_info,
 			  struct drm_i915_gem_object *obj)
 {
+	unsigned int n_pages = obj->base.size / PAGE_SIZE;
 	unsigned int size_pages = rot_info->plane[0].width * rot_info->plane[0].height;
 	unsigned int size_pages_uv;
 	struct sg_page_iter sg_iter;
@@ -3395,7 +3396,7 @@ struct i915_vma *
 	int ret = -ENOMEM;
 
 	/* Allocate a temporary list of source pages for random access. */
-	page_addr_list = drm_malloc_gfp(obj->base.size / PAGE_SIZE,
+	page_addr_list = drm_malloc_gfp(n_pages,
 					sizeof(dma_addr_t),
 					GFP_TEMPORARY);
 	if (!page_addr_list)
@@ -3418,11 +3419,10 @@ struct i915_vma *
 
 	/* Populate source page list from the object. */
 	i = 0;
-	for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) {
-		page_addr_list[i] = sg_page_iter_dma_address(&sg_iter);
-		i++;
-	}
+	for_each_sgt_page_range(&sg_iter, obj->pages, 0, n_pages)
+		page_addr_list[i++] = sg_page_iter_dma_address(&sg_iter);
 
+	WARN_ON(i != n_pages);
 	st->nents = 0;
 	sg = st->sgl;
 
@@ -3488,9 +3488,7 @@ struct i915_vma *
 
 	sg = st->sgl;
 	st->nents = 0;
-	for_each_sg_page(obj->pages->sgl, &obj_sg_iter, obj->pages->nents,
-		view->params.partial.offset)
-	{
+	for_each_sgt_page_range(&obj_sg_iter, obj->pages, view->params.partial.offset, ~0u) {
 		if (st->nents >= view->params.partial.size)
 			break;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
index 32d9726..3d5e4a9 100644
--- a/drivers/gpu/drm/i915/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -716,7 +716,7 @@ struct get_pages_work {
 
 	i915_gem_gtt_finish_object(obj);
 
-	for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) {
+	for_each_sgt_page(&sg_iter, obj->pages) {
 		struct page *page = sg_page_iter_page(&sg_iter);
 
 		if (obj->dirty)
diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h
index 556ec1e..2755f16 100644
--- a/include/linux/scatterlist.h
+++ b/include/linux/scatterlist.h
@@ -305,6 +305,7 @@ struct sg_page_iter {
 						 * next step */
 };
 
+bool __sgt_page_iter_next(struct sg_page_iter *piter);
 bool __sg_page_iter_next(struct sg_page_iter *piter);
 void __sg_page_iter_start(struct sg_page_iter *piter,
 			  struct scatterlist *sglist, unsigned int nents,
@@ -339,6 +340,26 @@ static inline dma_addr_t sg_page_iter_dma_address(struct sg_page_iter *piter)
 	for (__sg_page_iter_start((piter), (sglist), (nents), (pgoffset)); \
 	     __sg_page_iter_next(piter);)
 
+/**
+ * for_each_sgt_page - iterate over the pages of the given sg_table
+ * @piter:	page iterator to hold current page, sg, sg_pgoffset
+ * @sgt:	sg_table to iterate over
+ */
+#define for_each_sgt_page(piter, sgt)					\
+	for (__sg_page_iter_start((piter), (sgt)->sgl, (~0u), (0)); \
+	     __sgt_page_iter_next(piter);)
+
+/**
+ * for_each_sgt_page_range - iterate over some pages of the given sg_table
+ * @piter:	page iterator to hold current page, sg, sg_pgoffset
+ * @sgt:	sg_table to iterate over
+ * @first:	starting page offset
+ * @max:	maxiumum number of pages to iterate over
+ */
+#define for_each_sgt_page_range(piter, sgt, first, max)			\
+	for (__sg_page_iter_start((piter), (sgt)->sgl, (max), (first)); \
+	     __sgt_page_iter_next(piter);)
+
 /*
  * Mapping sg iterator
  *
diff --git a/lib/scatterlist.c b/lib/scatterlist.c
index 004fc70..ceeea63 100644
--- a/lib/scatterlist.c
+++ b/lib/scatterlist.c
@@ -469,6 +469,32 @@ bool __sg_page_iter_next(struct sg_page_iter *piter)
 }
 EXPORT_SYMBOL(__sg_page_iter_next);
 
+/*
+ * Note: we use the same iterator structure as __sg_page_iter_next()
+ * above, but interpret the 'nents' member differently. Here it is
+ * an upper bound on the number of iterations to be performed, not
+ * the number of internal list elements to be traversed.
+ */
+bool __sgt_page_iter_next(struct sg_page_iter *piter)
+{
+	if (!piter->__nents || !piter->sg)
+		return false;
+
+	piter->sg_pgoffset += piter->__pg_advance;
+	piter->__pg_advance = 1;
+
+	while (piter->sg_pgoffset >= sg_page_count(piter->sg)) {
+		piter->sg_pgoffset -= sg_page_count(piter->sg);
+		piter->sg = sg_next(piter->sg);
+		if (!piter->sg)
+			return false;
+	}
+
+	--piter->__nents;
+	return true;
+}
+EXPORT_SYMBOL(__sgt_page_iter_next);
+
 /**
  * sg_miter_start - start mapping iteration over a sg list
  * @miter: sg mapping iter to be started
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 1/3] Introduce & use new SG page iterators
@ 2016-05-09  9:33 ` Dave Gordon
  0 siblings, 0 replies; 7+ messages in thread
From: Dave Gordon @ 2016-05-09  9:33 UTC (permalink / raw)
  To: intel-gfx; +Cc: linux-kernel

The existing for_each_sg_page() iterator is somewhat inconvenient; in
particular, the 'nents' parameters is not expressed in any useful way,
and so there is no way to get a precise (maximum) number of iterations
(and therefore pages) without knowing that the SGL has been constructed
in a specific way.

So here we introduce for_each_sgt_page(), which simplifies the
parameters down to just two: the iterator and an sg_table to iterate
over. This is ideal where we simply need to process *all* the pages,
regardless of how many there might be (e.g. put_pages()).

For more sophisticated uses, for_each_sgt_range() adds a starting (page)
offset and a maximum number of iterations to be performed. This fits the
usage required when processing only a subrange of the pages, or to avoid
any risk of overflow when filling a bounded array with per-page data.

Nearly all uses of for_each_sg_page() are then converted to the new
simpler macros.

Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
Cc: Imre Deak <imre.deak@intel.com>
Cc: linux-kernel@vger.kernel.org
---
 drivers/gpu/drm/i915/i915_cmd_parser.c  |  8 ++++----
 drivers/gpu/drm/i915/i915_gem.c         | 10 ++++------
 drivers/gpu/drm/i915/i915_gem_fence.c   |  5 +++--
 drivers/gpu/drm/i915/i915_gem_gtt.c     | 20 +++++++++-----------
 drivers/gpu/drm/i915/i915_gem_userptr.c |  2 +-
 include/linux/scatterlist.h             | 21 +++++++++++++++++++++
 lib/scatterlist.c                       | 26 ++++++++++++++++++++++++++
 7 files changed, 68 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
index c3a7603..4f08ab0 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -946,11 +946,11 @@ static u32 *vmap_batch(struct drm_i915_gem_object *obj,
 	}
 
 	i = 0;
-	for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, first_page) {
+	for_each_sgt_page_range(&sg_iter, obj->pages, first_page, npages)
 		pages[i++] = sg_page_iter_page(&sg_iter);
-		if (i == npages)
-			break;
-	}
+
+	if (WARN_ON(i != npages))
+		goto finish;
 
 	addr = vmap(pages, i, 0, PAGE_KERNEL);
 	if (addr == NULL) {
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index a88e6c9..03b2ed3 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -611,8 +611,7 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
 
 	offset = args->offset;
 
-	for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents,
-			 offset >> PAGE_SHIFT) {
+	for_each_sgt_page_range(&sg_iter, obj->pages, offset >> PAGE_SHIFT, ~0u) {
 		struct page *page = sg_page_iter_page(&sg_iter);
 
 		if (remain <= 0)
@@ -935,8 +934,7 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
 	offset = args->offset;
 	obj->dirty = 1;
 
-	for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents,
-			 offset >> PAGE_SHIFT) {
+	for_each_sgt_page_range(&sg_iter, obj->pages, offset >> PAGE_SHIFT, ~0u) {
 		struct page *page = sg_page_iter_page(&sg_iter);
 		int partial_cacheline_write;
 
@@ -2184,7 +2182,7 @@ static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj)
 	if (obj->madv == I915_MADV_DONTNEED)
 		obj->dirty = 0;
 
-	for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) {
+	for_each_sgt_page(&sg_iter, obj->pages) {
 		struct page *page = sg_page_iter_page(&sg_iter);
 
 		if (obj->dirty)
@@ -2340,7 +2338,7 @@ static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj)
 
 err_pages:
 	sg_mark_end(sg);
-	for_each_sg_page(st->sgl, &sg_iter, st->nents, 0)
+	for_each_sgt_page(&sg_iter, st)
 		put_page(sg_page_iter_page(&sg_iter));
 	sg_free_table(st);
 	kfree(st);
diff --git a/drivers/gpu/drm/i915/i915_gem_fence.c b/drivers/gpu/drm/i915/i915_gem_fence.c
index a2b938e..4fb06f6 100644
--- a/drivers/gpu/drm/i915/i915_gem_fence.c
+++ b/drivers/gpu/drm/i915/i915_gem_fence.c
@@ -752,7 +752,7 @@ void i915_gem_restore_fences(struct drm_device *dev)
 		return;
 
 	i = 0;
-	for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) {
+	for_each_sgt_page(&sg_iter, obj->pages) {
 		struct page *page = sg_page_iter_page(&sg_iter);
 		char new_bit_17 = page_to_phys(page) >> 17;
 		if ((new_bit_17 & 0x1) !=
@@ -790,7 +790,8 @@ void i915_gem_restore_fences(struct drm_device *dev)
 	}
 
 	i = 0;
-	for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) {
+
+	for_each_sgt_page(&sg_iter, obj->pages) {
 		if (page_to_phys(sg_page_iter_page(&sg_iter)) & (1 << 17))
 			__set_bit(i, obj->bit_17);
 		else
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index d284b17..7296d02 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -1844,7 +1844,7 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
 	struct sg_page_iter sg_iter;
 
 	pt_vaddr = NULL;
-	for_each_sg_page(pages->sgl, &sg_iter, pages->nents, 0) {
+	for_each_sgt_page(&sg_iter, pages) {
 		if (pt_vaddr == NULL)
 			pt_vaddr = kmap_px(ppgtt->pd.page_table[act_pt]);
 
@@ -2371,7 +2371,7 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
 
 	rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
 
-	for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) {
+	for_each_sgt_page(&sg_iter, st) {
 		addr = sg_dma_address(sg_iter.sg) +
 			(sg_iter.sg_pgoffset << PAGE_SHIFT);
 		gen8_set_pte(&gtt_entries[i],
@@ -2449,7 +2449,7 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
 
 	rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
 
-	for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) {
+	for_each_sgt_page(&sg_iter, st) {
 		addr = sg_page_iter_dma_address(&sg_iter);
 		iowrite32(vm->pte_encode(addr, level, true, flags), &gtt_entries[i]);
 		i++;
@@ -3384,6 +3384,7 @@ struct i915_vma *
 intel_rotate_fb_obj_pages(struct intel_rotation_info *rot_info,
 			  struct drm_i915_gem_object *obj)
 {
+	unsigned int n_pages = obj->base.size / PAGE_SIZE;
 	unsigned int size_pages = rot_info->plane[0].width * rot_info->plane[0].height;
 	unsigned int size_pages_uv;
 	struct sg_page_iter sg_iter;
@@ -3395,7 +3396,7 @@ struct i915_vma *
 	int ret = -ENOMEM;
 
 	/* Allocate a temporary list of source pages for random access. */
-	page_addr_list = drm_malloc_gfp(obj->base.size / PAGE_SIZE,
+	page_addr_list = drm_malloc_gfp(n_pages,
 					sizeof(dma_addr_t),
 					GFP_TEMPORARY);
 	if (!page_addr_list)
@@ -3418,11 +3419,10 @@ struct i915_vma *
 
 	/* Populate source page list from the object. */
 	i = 0;
-	for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) {
-		page_addr_list[i] = sg_page_iter_dma_address(&sg_iter);
-		i++;
-	}
+	for_each_sgt_page_range(&sg_iter, obj->pages, 0, n_pages)
+		page_addr_list[i++] = sg_page_iter_dma_address(&sg_iter);
 
+	WARN_ON(i != n_pages);
 	st->nents = 0;
 	sg = st->sgl;
 
@@ -3488,9 +3488,7 @@ struct i915_vma *
 
 	sg = st->sgl;
 	st->nents = 0;
-	for_each_sg_page(obj->pages->sgl, &obj_sg_iter, obj->pages->nents,
-		view->params.partial.offset)
-	{
+	for_each_sgt_page_range(&obj_sg_iter, obj->pages, view->params.partial.offset, ~0u) {
 		if (st->nents >= view->params.partial.size)
 			break;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
index 32d9726..3d5e4a9 100644
--- a/drivers/gpu/drm/i915/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -716,7 +716,7 @@ struct get_pages_work {
 
 	i915_gem_gtt_finish_object(obj);
 
-	for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) {
+	for_each_sgt_page(&sg_iter, obj->pages) {
 		struct page *page = sg_page_iter_page(&sg_iter);
 
 		if (obj->dirty)
diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h
index 556ec1e..2755f16 100644
--- a/include/linux/scatterlist.h
+++ b/include/linux/scatterlist.h
@@ -305,6 +305,7 @@ struct sg_page_iter {
 						 * next step */
 };
 
+bool __sgt_page_iter_next(struct sg_page_iter *piter);
 bool __sg_page_iter_next(struct sg_page_iter *piter);
 void __sg_page_iter_start(struct sg_page_iter *piter,
 			  struct scatterlist *sglist, unsigned int nents,
@@ -339,6 +340,26 @@ static inline dma_addr_t sg_page_iter_dma_address(struct sg_page_iter *piter)
 	for (__sg_page_iter_start((piter), (sglist), (nents), (pgoffset)); \
 	     __sg_page_iter_next(piter);)
 
+/**
+ * for_each_sgt_page - iterate over the pages of the given sg_table
+ * @piter:	page iterator to hold current page, sg, sg_pgoffset
+ * @sgt:	sg_table to iterate over
+ */
+#define for_each_sgt_page(piter, sgt)					\
+	for (__sg_page_iter_start((piter), (sgt)->sgl, (~0u), (0)); \
+	     __sgt_page_iter_next(piter);)
+
+/**
+ * for_each_sgt_page_range - iterate over some pages of the given sg_table
+ * @piter:	page iterator to hold current page, sg, sg_pgoffset
+ * @sgt:	sg_table to iterate over
+ * @first:	starting page offset
+ * @max:	maxiumum number of pages to iterate over
+ */
+#define for_each_sgt_page_range(piter, sgt, first, max)			\
+	for (__sg_page_iter_start((piter), (sgt)->sgl, (max), (first)); \
+	     __sgt_page_iter_next(piter);)
+
 /*
  * Mapping sg iterator
  *
diff --git a/lib/scatterlist.c b/lib/scatterlist.c
index 004fc70..ceeea63 100644
--- a/lib/scatterlist.c
+++ b/lib/scatterlist.c
@@ -469,6 +469,32 @@ bool __sg_page_iter_next(struct sg_page_iter *piter)
 }
 EXPORT_SYMBOL(__sg_page_iter_next);
 
+/*
+ * Note: we use the same iterator structure as __sg_page_iter_next()
+ * above, but interpret the 'nents' member differently. Here it is
+ * an upper bound on the number of iterations to be performed, not
+ * the number of internal list elements to be traversed.
+ */
+bool __sgt_page_iter_next(struct sg_page_iter *piter)
+{
+	if (!piter->__nents || !piter->sg)
+		return false;
+
+	piter->sg_pgoffset += piter->__pg_advance;
+	piter->__pg_advance = 1;
+
+	while (piter->sg_pgoffset >= sg_page_count(piter->sg)) {
+		piter->sg_pgoffset -= sg_page_count(piter->sg);
+		piter->sg = sg_next(piter->sg);
+		if (!piter->sg)
+			return false;
+	}
+
+	--piter->__nents;
+	return true;
+}
+EXPORT_SYMBOL(__sgt_page_iter_next);
+
 /**
  * sg_miter_start - start mapping iteration over a sg list
  * @miter: sg mapping iter to be started
-- 
1.9.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 2/3] drm/i915: refactor i915_gem_object_pin_map()
  2016-05-09  9:33 ` Dave Gordon
  (?)
@ 2016-05-09  9:33 ` Dave Gordon
  -1 siblings, 0 replies; 7+ messages in thread
From: Dave Gordon @ 2016-05-09  9:33 UTC (permalink / raw)
  To: intel-gfx

The recently-added i915_gem_object_pin_map() can be further optimised
for "small" objects. To facilitate this, and simplify the error paths
before adding the new code, this patch pulls out the "mapping" part of
the operation (involving local allocations which must be undone before
return) into its own subfunction.

The next patch will then insert the new optimisation into the middle of
the now-separated subfunction.

This reorganisation will probably not affect the generated code, as the
compiler will most likely inline it anyway, but it makes the logical
structure a bit clearer and easier to modify.

v2:
    Restructure loop-over-pages & error check (Chris Wilson)

v3:
    Use new nicer SGL page-range iterator (Dave Gordon)
    Add page count to debug messages (Chris Wilson)
    Convert WARN_ON() to GEM_BUG_ON()

Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c | 59 ++++++++++++++++++++++++++---------------
 1 file changed, 38 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 03b2ed3..f459702 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2393,6 +2393,43 @@ static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj)
 	return 0;
 }
 
+/* The 'mapping' part of i915_gem_object_pin_map() below */
+static void *i915_gem_object_map(const struct drm_i915_gem_object *obj)
+{
+	unsigned long n_pages = obj->base.size >> PAGE_SHIFT;
+	struct sg_table *sgt = obj->pages;
+	struct sg_page_iter sg_iter;
+	struct page **pages;
+	unsigned long i = 0;
+	void *addr = NULL;
+
+	/* A single page can always be kmapped */
+	if (n_pages == 1)
+		return kmap(sg_page(sgt->sgl));
+
+	pages = drm_malloc_gfp(n_pages, sizeof(*pages), GFP_TEMPORARY);
+	if (pages == NULL) {
+		DRM_DEBUG_DRIVER("Failed to get space for %lu pointers\n",
+				 n_pages);
+		return NULL;
+	}
+
+	for_each_sgt_page_range(&sg_iter, sgt, 0, n_pages)
+		pages[i++] = sg_page_iter_page(&sg_iter);
+
+	/* Check that we have the expected number of pages */
+	GEM_BUG_ON(i != n_pages);
+
+	addr = vmap(pages, n_pages, 0, PAGE_KERNEL);
+	if (addr == NULL)
+		DRM_DEBUG_DRIVER("Failed to vmap %lu pages\n", n_pages);
+
+	drm_free_large(pages);
+
+	return addr;
+}
+
+/* get, pin, and map the pages of the object into kernel space */
 void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj)
 {
 	int ret;
@@ -2406,27 +2443,7 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj)
 	i915_gem_object_pin_pages(obj);
 
 	if (obj->mapping == NULL) {
-		struct page **pages;
-
-		pages = NULL;
-		if (obj->base.size == PAGE_SIZE)
-			obj->mapping = kmap(sg_page(obj->pages->sgl));
-		else
-			pages = drm_malloc_gfp(obj->base.size >> PAGE_SHIFT,
-					       sizeof(*pages),
-					       GFP_TEMPORARY);
-		if (pages != NULL) {
-			struct sg_page_iter sg_iter;
-			int n;
-
-			n = 0;
-			for_each_sg_page(obj->pages->sgl, &sg_iter,
-					 obj->pages->nents, 0)
-				pages[n++] = sg_page_iter_page(&sg_iter);
-
-			obj->mapping = vmap(pages, n, 0, PAGE_KERNEL);
-			drm_free_large(pages);
-		}
+		obj->mapping = i915_gem_object_map(obj);
 		if (obj->mapping == NULL) {
 			i915_gem_object_unpin_pages(obj);
 			return ERR_PTR(-ENOMEM);
-- 
1.9.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 3/3] drm/i915: optimise i915_gem_object_map() for small objects
  2016-05-09  9:33 ` Dave Gordon
  (?)
  (?)
@ 2016-05-09  9:33 ` Dave Gordon
  -1 siblings, 0 replies; 7+ messages in thread
From: Dave Gordon @ 2016-05-09  9:33 UTC (permalink / raw)
  To: intel-gfx

We're using this function for ringbuffers and other "small" objects, so
it's worth avoiding an extra malloc()/free() cycle if the page array is
small enough to put on the stack. Here we've chosen an arbitrary cutoff
of 32 (4k) pages, which is big enough for a ringbuffer (4 pages) or a
context image (currently up to 22 pages).

v5:
    change name of local array [Chris Wilson]

Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index f459702..f97cd83 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2399,7 +2399,8 @@ static void *i915_gem_object_map(const struct drm_i915_gem_object *obj)
 	unsigned long n_pages = obj->base.size >> PAGE_SHIFT;
 	struct sg_table *sgt = obj->pages;
 	struct sg_page_iter sg_iter;
-	struct page **pages;
+	struct page *stack_pages[32];
+	struct page **pages = stack_pages;
 	unsigned long i = 0;
 	void *addr = NULL;
 
@@ -2407,11 +2408,14 @@ static void *i915_gem_object_map(const struct drm_i915_gem_object *obj)
 	if (n_pages == 1)
 		return kmap(sg_page(sgt->sgl));
 
-	pages = drm_malloc_gfp(n_pages, sizeof(*pages), GFP_TEMPORARY);
-	if (pages == NULL) {
-		DRM_DEBUG_DRIVER("Failed to get space for %lu pointers\n",
-				 n_pages);
-		return NULL;
+	if (n_pages > ARRAY_SIZE(stack_pages)) {
+		/* Too big for stack -- allocate temporary array instead */
+		pages = drm_malloc_gfp(n_pages, sizeof(*pages), GFP_TEMPORARY);
+		if (pages == NULL) {
+			DRM_DEBUG_DRIVER("Failed to get space for %lu pointers\n",
+					 n_pages);
+			return NULL;
+		}
 	}
 
 	for_each_sgt_page_range(&sg_iter, sgt, 0, n_pages)
@@ -2424,7 +2428,8 @@ static void *i915_gem_object_map(const struct drm_i915_gem_object *obj)
 	if (addr == NULL)
 		DRM_DEBUG_DRIVER("Failed to vmap %lu pages\n", n_pages);
 
-	drm_free_large(pages);
+	if (pages != stack_pages)
+		drm_free_large(pages);
 
 	return addr;
 }
-- 
1.9.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH 1/3] Introduce & use new SG page iterators
  2016-05-09  9:33 ` Dave Gordon
@ 2016-05-09  9:51   ` Chris Wilson
  -1 siblings, 0 replies; 7+ messages in thread
From: Chris Wilson @ 2016-05-09  9:51 UTC (permalink / raw)
  To: Dave Gordon; +Cc: intel-gfx, Imre Deak, linux-kernel

On Mon, May 09, 2016 at 10:33:31AM +0100, Dave Gordon wrote:
> The existing for_each_sg_page() iterator is somewhat inconvenient; in
> particular, the 'nents' parameters is not expressed in any useful way,
> and so there is no way to get a precise (maximum) number of iterations
> (and therefore pages) without knowing that the SGL has been constructed
> in a specific way.
> 
> So here we introduce for_each_sgt_page(), which simplifies the
> parameters down to just two: the iterator and an sg_table to iterate
> over. This is ideal where we simply need to process *all* the pages,
> regardless of how many there might be (e.g. put_pages()).

It seems to be a lot of churn for something that doesn't address my core
complaint with the iterator: it is too slow and *is* one of the
rate-limiting steps in some heavy workloads (games where we have to
rewrite the page tables frequently as their textures do not fit within
the GTT).
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 1/3] Introduce & use new SG page iterators
@ 2016-05-09  9:51   ` Chris Wilson
  0 siblings, 0 replies; 7+ messages in thread
From: Chris Wilson @ 2016-05-09  9:51 UTC (permalink / raw)
  To: Dave Gordon; +Cc: intel-gfx, linux-kernel

On Mon, May 09, 2016 at 10:33:31AM +0100, Dave Gordon wrote:
> The existing for_each_sg_page() iterator is somewhat inconvenient; in
> particular, the 'nents' parameters is not expressed in any useful way,
> and so there is no way to get a precise (maximum) number of iterations
> (and therefore pages) without knowing that the SGL has been constructed
> in a specific way.
> 
> So here we introduce for_each_sgt_page(), which simplifies the
> parameters down to just two: the iterator and an sg_table to iterate
> over. This is ideal where we simply need to process *all* the pages,
> regardless of how many there might be (e.g. put_pages()).

It seems to be a lot of churn for something that doesn't address my core
complaint with the iterator: it is too slow and *is* one of the
rate-limiting steps in some heavy workloads (games where we have to
rewrite the page tables frequently as their textures do not fit within
the GTT).
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 7+ messages in thread

* ✓ Fi.CI.BAT: success for series starting with [1/3] Introduce & use new SG page iterators
  2016-05-09  9:33 ` Dave Gordon
                   ` (3 preceding siblings ...)
  (?)
@ 2016-05-09 17:22 ` Patchwork
  -1 siblings, 0 replies; 7+ messages in thread
From: Patchwork @ 2016-05-09 17:22 UTC (permalink / raw)
  To: Dave Gordon; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/3] Introduce & use new SG page iterators
URL   : https://patchwork.freedesktop.org/series/6894/
State : success

== Summary ==

Series 6894v1 Series without cover letter
http://patchwork.freedesktop.org/api/1.0/series/6894/revisions/1/mbox/

Test drv_module_reload_basic:
                dmesg-warn -> PASS       (bsw-nuc-2)
Test kms_flip:
        Subgroup basic-flip-vs-wf_vblank:
                pass       -> FAIL       (ilk-hp8440p) UNSTABLE

bdw-nuci7-2      total:219  pass:206  dwarn:0   dfail:0   fail:0   skip:13 
bdw-ultra        total:219  pass:193  dwarn:0   dfail:0   fail:0   skip:26 
bsw-nuc-2        total:218  pass:174  dwarn:0   dfail:0   fail:2   skip:42 
byt-nuc          total:218  pass:174  dwarn:0   dfail:0   fail:3   skip:41 
hsw-gt2          total:219  pass:197  dwarn:0   dfail:0   fail:1   skip:21 
ilk-hp8440p      total:219  pass:154  dwarn:0   dfail:0   fail:2   skip:63 
ivb-t430s        total:219  pass:188  dwarn:0   dfail:0   fail:0   skip:31 
skl-i7k-2        total:219  pass:191  dwarn:0   dfail:0   fail:0   skip:28 
skl-nuci5        total:219  pass:207  dwarn:0   dfail:0   fail:0   skip:12 
snb-dellxps      total:37   pass:27   dwarn:0   dfail:0   fail:0   skip:9  
snb-x220t        total:219  pass:176  dwarn:0   dfail:0   fail:1   skip:42 

Results at /archive/results/CI_IGT_test/Patchwork_2158/

61f856faa1e6ff9dddad214be1e372e8aacfc0f4 drm-intel-nightly: 2016y-05m-09d-16h-24m-01s UTC integration manifest
f686094 drm/i915: optimise i915_gem_object_map() for small objects
13433ae drm/i915: refactor i915_gem_object_pin_map()
f13906c Introduce & use new SG page iterators

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2016-05-09 17:22 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-05-09  9:33 [PATCH 1/3] Introduce & use new SG page iterators Dave Gordon
2016-05-09  9:33 ` Dave Gordon
2016-05-09  9:33 ` [PATCH 2/3] drm/i915: refactor i915_gem_object_pin_map() Dave Gordon
2016-05-09  9:33 ` [PATCH 3/3] drm/i915: optimise i915_gem_object_map() for small objects Dave Gordon
2016-05-09  9:51 ` [PATCH 1/3] Introduce & use new SG page iterators Chris Wilson
2016-05-09  9:51   ` Chris Wilson
2016-05-09 17:22 ` ✓ Fi.CI.BAT: success for series starting with [1/3] " Patchwork

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.