All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v4 1/4] drm/i915: refactor i915_gem_object_pin_map()
@ 2016-05-20  0:31 Dave Gordon
  2016-05-20  0:31 ` [PATCH v4 2/4] drm/i915: optimise i915_gem_object_map() for small objects Dave Gordon
                   ` (3 more replies)
  0 siblings, 4 replies; 8+ messages in thread
From: Dave Gordon @ 2016-05-20  0:31 UTC (permalink / raw)
  To: intel-gfx

The recently-added i915_gem_object_pin_map() can be further optimised
for "small" objects. To facilitate this, and simplify the error paths
before adding the new code, this patch pulls out the "mapping" part of
the operation (involving local allocations which must be undone before
return) into its own subfunction.

The next patch will then insert the new optimisation into the middle of
the now-separated subfunction.

This reorganisation will probably not affect the generated code, as the
compiler will most likely inline it anyway, but it makes the logical
structure a bit clearer and easier to modify.

v2:
    Restructure loop-over-pages & error check [Chris Wilson]

v3:
    Add page count to debug messages [Chris Wilson]
    Convert WARN_ON() to GEM_BUG_ON()

v4:
    Drop the DEBUG messages [Tvrtko Ursulin]

Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_gem.c | 54 +++++++++++++++++++++++++----------------
 1 file changed, 33 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 6a31445..ac5fbf2 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2398,6 +2398,38 @@ static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj)
 	return 0;
 }
 
+/* The 'mapping' part of i915_gem_object_pin_map() below */
+static void *i915_gem_object_map(const struct drm_i915_gem_object *obj)
+{
+	unsigned long n_pages = obj->base.size >> PAGE_SHIFT;
+	struct sg_table *sgt = obj->pages;
+	struct sg_page_iter sg_iter;
+	struct page **pages;
+	unsigned long i = 0;
+	void *addr;
+
+	/* A single page can always be kmapped */
+	if (n_pages == 1)
+		return kmap(sg_page(sgt->sgl));
+
+	pages = drm_malloc_gfp(n_pages, sizeof(*pages), GFP_TEMPORARY);
+	if (pages == NULL)
+		return NULL;
+
+	for_each_sg_page(sgt->sgl, &sg_iter, sgt->nents, 0)
+		pages[i++] = sg_page_iter_page(&sg_iter);
+
+	/* Check that we have the expected number of pages */
+	GEM_BUG_ON(i != n_pages);
+
+	addr = vmap(pages, n_pages, 0, PAGE_KERNEL);
+
+	drm_free_large(pages);
+
+	return addr;
+}
+
+/* get, pin, and map the pages of the object into kernel space */
 void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj)
 {
 	int ret;
@@ -2411,27 +2443,7 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj)
 	i915_gem_object_pin_pages(obj);
 
 	if (obj->mapping == NULL) {
-		struct page **pages;
-
-		pages = NULL;
-		if (obj->base.size == PAGE_SIZE)
-			obj->mapping = kmap(sg_page(obj->pages->sgl));
-		else
-			pages = drm_malloc_gfp(obj->base.size >> PAGE_SHIFT,
-					       sizeof(*pages),
-					       GFP_TEMPORARY);
-		if (pages != NULL) {
-			struct sg_page_iter sg_iter;
-			int n;
-
-			n = 0;
-			for_each_sg_page(obj->pages->sgl, &sg_iter,
-					 obj->pages->nents, 0)
-				pages[n++] = sg_page_iter_page(&sg_iter);
-
-			obj->mapping = vmap(pages, n, 0, PAGE_KERNEL);
-			drm_free_large(pages);
-		}
+		obj->mapping = i915_gem_object_map(obj);
 		if (obj->mapping == NULL) {
 			i915_gem_object_unpin_pages(obj);
 			return ERR_PTR(-ENOMEM);
-- 
1.9.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH v4 2/4] drm/i915: optimise i915_gem_object_map() for small objects
  2016-05-20  0:31 [PATCH v4 1/4] drm/i915: refactor i915_gem_object_pin_map() Dave Gordon
@ 2016-05-20  0:31 ` Dave Gordon
  2016-05-20  0:31 ` [PATCH v4 3/4] Introduce & use new lightweight SGL iterators Dave Gordon
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 8+ messages in thread
From: Dave Gordon @ 2016-05-20  0:31 UTC (permalink / raw)
  To: intel-gfx

We're using this function for ringbuffers and other "small" objects, so
it's worth avoiding an extra malloc()/free() cycle if the page array is
small enough to put on the stack. Here we've chosen an arbitrary cutoff
of 32 (4k) pages, which is big enough for a ringbuffer (4 pages) or a
context image (currently up to 22 pages).

v5:
    change name of local array [Chris Wilson]

Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_gem.c | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index ac5fbf2..5797fa2 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2404,7 +2404,8 @@ static void *i915_gem_object_map(const struct drm_i915_gem_object *obj)
 	unsigned long n_pages = obj->base.size >> PAGE_SHIFT;
 	struct sg_table *sgt = obj->pages;
 	struct sg_page_iter sg_iter;
-	struct page **pages;
+	struct page *stack_pages[32];
+	struct page **pages = stack_pages;
 	unsigned long i = 0;
 	void *addr;
 
@@ -2412,9 +2413,12 @@ static void *i915_gem_object_map(const struct drm_i915_gem_object *obj)
 	if (n_pages == 1)
 		return kmap(sg_page(sgt->sgl));
 
-	pages = drm_malloc_gfp(n_pages, sizeof(*pages), GFP_TEMPORARY);
-	if (pages == NULL)
-		return NULL;
+	if (n_pages > ARRAY_SIZE(stack_pages)) {
+		/* Too big for stack -- allocate temporary array instead */
+		pages = drm_malloc_gfp(n_pages, sizeof(*pages), GFP_TEMPORARY);
+		if (pages == NULL)
+			return NULL;
+	}
 
 	for_each_sg_page(sgt->sgl, &sg_iter, sgt->nents, 0)
 		pages[i++] = sg_page_iter_page(&sg_iter);
@@ -2424,7 +2428,8 @@ static void *i915_gem_object_map(const struct drm_i915_gem_object *obj)
 
 	addr = vmap(pages, n_pages, 0, PAGE_KERNEL);
 
-	drm_free_large(pages);
+	if (pages != stack_pages)
+		drm_free_large(pages);
 
 	return addr;
 }
-- 
1.9.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH v4 3/4] Introduce & use new lightweight SGL iterators
  2016-05-20  0:31 [PATCH v4 1/4] drm/i915: refactor i915_gem_object_pin_map() Dave Gordon
  2016-05-20  0:31 ` [PATCH v4 2/4] drm/i915: optimise i915_gem_object_map() for small objects Dave Gordon
@ 2016-05-20  0:31 ` Dave Gordon
  2016-05-20  8:15   ` Chris Wilson
  2016-05-20  0:31 ` [PATCH v4 4/4] Try inlining sg_next() Dave Gordon
  2016-05-20  9:18 ` ✗ Ro.CI.BAT: failure for series starting with [v4,1/4] drm/i915: refactor i915_gem_object_pin_map() Patchwork
  3 siblings, 1 reply; 8+ messages in thread
From: Dave Gordon @ 2016-05-20  0:31 UTC (permalink / raw)
  To: intel-gfx

The existing for_each_sg_page() iterator is somewhat heavyweight, and is
limiting i915 driver performance in a few benchmarks. So here we
introduce somewhat lighter weight iterators, primarily for use with GEM
objects or other case where we need only deal with whole aligned pages.

Unlike the old iterator, the new iterators use an internal state
structure which is not intended to be accessed by the caller; instead
each takes as a parameter an output variable which is set before each
iteration. This makes them particularly simple to use :)

One of the new iterators provides the caller with the DMA address of
each page in turn; the other provides the 'struct page' pointer required
by many memory management operations.

Various uses of for_each_sg_page() are then converted to the new macros.

Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h         | 62 +++++++++++++++++++++++++--
 drivers/gpu/drm/i915/i915_gem.c         | 20 ++++-----
 drivers/gpu/drm/i915/i915_gem_fence.c   | 14 +++---
 drivers/gpu/drm/i915/i915_gem_gtt.c     | 76 ++++++++++++++++-----------------
 drivers/gpu/drm/i915/i915_gem_userptr.c |  7 ++-
 5 files changed, 116 insertions(+), 63 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 6894d8e..01cde0f 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2120,6 +2120,10 @@ struct drm_i915_gem_object_ops {
 #define INTEL_FRONTBUFFER_ALL_MASK(pipe) \
 	(0xff << (INTEL_FRONTBUFFER_BITS_PER_PIPE * (pipe)))
 
+void i915_gem_track_fb(struct drm_i915_gem_object *old,
+		       struct drm_i915_gem_object *new,
+		       unsigned frontbuffer_bits);
+
 struct drm_i915_gem_object {
 	struct drm_gem_object base;
 
@@ -2251,9 +2255,61 @@ struct drm_i915_gem_object {
 };
 #define to_intel_bo(x) container_of(x, struct drm_i915_gem_object, base)
 
-void i915_gem_track_fb(struct drm_i915_gem_object *old,
-		       struct drm_i915_gem_object *new,
-		       unsigned frontbuffer_bits);
+/*
+ * New optimised SGL iterator for GEM objects
+ */
+struct sgt_iter {
+	struct scatterlist *sgp;
+	union {
+		unsigned long pfn;
+		unsigned long dma;
+	} ix;
+	unsigned int curr;
+	unsigned int max;
+};
+
+/* Constructor for new iterator */
+static inline struct sgt_iter
+__sgt_iter(struct scatterlist *sgl, bool dma)
+{
+	struct sgt_iter s = { .sgp = sgl };
+
+	if (s.sgp) {
+		s.max = s.curr = s.sgp->offset;
+		s.max += s.sgp->length;
+		if (dma)
+			s.ix.dma = sg_dma_address(s.sgp);
+		else
+			s.ix.pfn = page_to_pfn(sg_page(s.sgp));
+	}
+
+	return s;
+}
+
+/**
+ * for_each_sgt_dma - iterate over the DMA addresses of the given sg_table
+ * @__dmap:	DMA address (output)
+ * @__iter:	'struct sgt_iter' (iterator state, internal)
+ * @__sgt:	sg_table to iterate over (input)
+ */
+#define for_each_sgt_dma(__dmap, __iter, __sgt)				\
+	for ((__iter) = __sgt_iter((__sgt)->sgl, true);			\
+	     ((__dmap) = (__iter).ix.dma + (__iter).curr);		\
+	     (((__iter).curr += PAGE_SIZE) < (__iter).max) ||		\
+		((__iter) = __sgt_iter(sg_next((__iter).sgp), true), 0))
+
+/**
+ * for_each_sgt_page - iterate over the pages of the given sg_table
+ * @__pp:	page pointer (output)
+ * @__iter:	'struct sgt_iter' (iterator state, internal)
+ * @__sgt:	sg_table to iterate over (input)
+ */
+#define for_each_sgt_page(__pp, __iter, __sgt)				\
+	for ((__iter) = __sgt_iter((__sgt)->sgl, false);		\
+	     ((__pp) = (__iter).ix.pfn == 0 ? NULL :			\
+	           pfn_to_page((__iter).ix.pfn + ((__iter).curr >> PAGE_SHIFT)));\
+	     (((__iter).curr += PAGE_SIZE) < (__iter).max) ||		\
+		((__iter) = __sgt_iter(sg_next((__iter).sgp), false), 0))
 
 /**
  * Request queue structure.
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 5797fa2..6aab0b8 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2165,7 +2165,8 @@ static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj)
 static void
 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
 {
-	struct sg_page_iter sg_iter;
+	struct sgt_iter sgt_iter;
+	struct page *page;
 	int ret;
 
 	BUG_ON(obj->madv == __I915_MADV_PURGED);
@@ -2187,9 +2188,7 @@ static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj)
 	if (obj->madv == I915_MADV_DONTNEED)
 		obj->dirty = 0;
 
-	for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) {
-		struct page *page = sg_page_iter_page(&sg_iter);
-
+	for_each_sgt_page(page, sgt_iter, obj->pages) {
 		if (obj->dirty)
 			set_page_dirty(page);
 
@@ -2246,7 +2245,7 @@ static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj)
 	struct address_space *mapping;
 	struct sg_table *st;
 	struct scatterlist *sg;
-	struct sg_page_iter sg_iter;
+	struct sgt_iter sgt_iter;
 	struct page *page;
 	unsigned long last_pfn = 0;	/* suppress gcc warning */
 	int ret;
@@ -2343,8 +2342,8 @@ static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj)
 
 err_pages:
 	sg_mark_end(sg);
-	for_each_sg_page(st->sgl, &sg_iter, st->nents, 0)
-		put_page(sg_page_iter_page(&sg_iter));
+	for_each_sgt_page(page, sgt_iter, st)
+		put_page(page);
 	sg_free_table(st);
 	kfree(st);
 
@@ -2403,7 +2402,8 @@ static void *i915_gem_object_map(const struct drm_i915_gem_object *obj)
 {
 	unsigned long n_pages = obj->base.size >> PAGE_SHIFT;
 	struct sg_table *sgt = obj->pages;
-	struct sg_page_iter sg_iter;
+	struct sgt_iter sgt_iter;
+	struct page *page;
 	struct page *stack_pages[32];
 	struct page **pages = stack_pages;
 	unsigned long i = 0;
@@ -2420,8 +2420,8 @@ static void *i915_gem_object_map(const struct drm_i915_gem_object *obj)
 			return NULL;
 	}
 
-	for_each_sg_page(sgt->sgl, &sg_iter, sgt->nents, 0)
-		pages[i++] = sg_page_iter_page(&sg_iter);
+	for_each_sgt_page(page, sgt_iter, sgt)
+		pages[i++] = page;
 
 	/* Check that we have the expected number of pages */
 	GEM_BUG_ON(i != n_pages);
diff --git a/drivers/gpu/drm/i915/i915_gem_fence.c b/drivers/gpu/drm/i915/i915_gem_fence.c
index a2b938e..2b6bdc2 100644
--- a/drivers/gpu/drm/i915/i915_gem_fence.c
+++ b/drivers/gpu/drm/i915/i915_gem_fence.c
@@ -745,15 +745,15 @@ void i915_gem_restore_fences(struct drm_device *dev)
 void
 i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj)
 {
-	struct sg_page_iter sg_iter;
+	struct sgt_iter sgt_iter;
+	struct page *page;
 	int i;
 
 	if (obj->bit_17 == NULL)
 		return;
 
 	i = 0;
-	for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) {
-		struct page *page = sg_page_iter_page(&sg_iter);
+	for_each_sgt_page(page, sgt_iter, obj->pages) {
 		char new_bit_17 = page_to_phys(page) >> 17;
 		if ((new_bit_17 & 0x1) !=
 		    (test_bit(i, obj->bit_17) != 0)) {
@@ -775,7 +775,8 @@ void i915_gem_restore_fences(struct drm_device *dev)
 void
 i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj)
 {
-	struct sg_page_iter sg_iter;
+	struct sgt_iter sgt_iter;
+	struct page *page;
 	int page_count = obj->base.size >> PAGE_SHIFT;
 	int i;
 
@@ -790,8 +791,9 @@ void i915_gem_restore_fences(struct drm_device *dev)
 	}
 
 	i = 0;
-	for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) {
-		if (page_to_phys(sg_page_iter_page(&sg_iter)) & (1 << 17))
+
+	for_each_sgt_page(page, sgt_iter, obj->pages) {
+		if (page_to_phys(page) & (1 << 17))
 			__set_bit(i, obj->bit_17);
 		else
 			__clear_bit(i, obj->bit_17);
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 7eab619..4668477 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -1839,20 +1839,19 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
 				      enum i915_cache_level cache_level, u32 flags)
 {
 	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
-	gen6_pte_t *pt_vaddr;
 	unsigned first_entry = start >> PAGE_SHIFT;
 	unsigned act_pt = first_entry / GEN6_PTES;
 	unsigned act_pte = first_entry % GEN6_PTES;
-	struct sg_page_iter sg_iter;
+	gen6_pte_t *pt_vaddr = NULL;
+	struct sgt_iter sgt_iter;
+	dma_addr_t addr;
 
-	pt_vaddr = NULL;
-	for_each_sg_page(pages->sgl, &sg_iter, pages->nents, 0) {
+	for_each_sgt_dma(addr, sgt_iter, pages) {
 		if (pt_vaddr == NULL)
 			pt_vaddr = kmap_px(ppgtt->pd.page_table[act_pt]);
 
 		pt_vaddr[act_pte] =
-			vm->pte_encode(sg_page_iter_dma_address(&sg_iter),
-				       cache_level, true, flags);
+			vm->pte_encode(addr, cache_level, true, flags);
 
 		if (++act_pte == GEN6_PTES) {
 			kunmap_px(ppgtt, pt_vaddr);
@@ -1861,6 +1860,7 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
 			act_pte = 0;
 		}
 	}
+
 	if (pt_vaddr)
 		kunmap_px(ppgtt, pt_vaddr);
 }
@@ -2362,22 +2362,20 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
 {
 	struct drm_i915_private *dev_priv = to_i915(vm->dev);
 	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
-	unsigned first_entry = start >> PAGE_SHIFT;
-	gen8_pte_t __iomem *gtt_entries =
-		(gen8_pte_t __iomem *)ggtt->gsm + first_entry;
-	int i = 0;
-	struct sg_page_iter sg_iter;
-	dma_addr_t addr = 0; /* shut up gcc */
+	struct sgt_iter sgt_iter;
+	gen8_pte_t __iomem *gtt_entries;
+	gen8_pte_t gtt_entry;
+	dma_addr_t addr;
 	int rpm_atomic_seq;
+	int i = 0;
 
 	rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
 
-	for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) {
-		addr = sg_dma_address(sg_iter.sg) +
-			(sg_iter.sg_pgoffset << PAGE_SHIFT);
-		gen8_set_pte(&gtt_entries[i],
-			     gen8_pte_encode(addr, level, true));
-		i++;
+	gtt_entries = (gen8_pte_t __iomem *)ggtt->gsm + (start >> PAGE_SHIFT);
+
+	for_each_sgt_dma(addr, sgt_iter, st) {
+		gtt_entry = gen8_pte_encode(addr, level, true);
+		gen8_set_pte(&gtt_entries[i++], gtt_entry);
 	}
 
 	/*
@@ -2388,8 +2386,7 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
 	 * hardware should work, we must keep this posting read for paranoia.
 	 */
 	if (i != 0)
-		WARN_ON(readq(&gtt_entries[i-1])
-			!= gen8_pte_encode(addr, level, true));
+		WARN_ON(readq(&gtt_entries[i-1]) != gtt_entry);
 
 	/* This next bit makes the above posting read even more important. We
 	 * want to flush the TLBs only after we're certain all the PTE updates
@@ -2440,20 +2437,20 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
 {
 	struct drm_i915_private *dev_priv = to_i915(vm->dev);
 	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
-	unsigned first_entry = start >> PAGE_SHIFT;
-	gen6_pte_t __iomem *gtt_entries =
-		(gen6_pte_t __iomem *)ggtt->gsm + first_entry;
-	int i = 0;
-	struct sg_page_iter sg_iter;
-	dma_addr_t addr = 0;
+	struct sgt_iter sgt_iter;
+	gen6_pte_t __iomem *gtt_entries;
+	gen6_pte_t gtt_entry;
+	dma_addr_t addr;
 	int rpm_atomic_seq;
+	int i = 0;
 
 	rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
 
-	for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) {
-		addr = sg_page_iter_dma_address(&sg_iter);
-		iowrite32(vm->pte_encode(addr, level, true, flags), &gtt_entries[i]);
-		i++;
+	gtt_entries = (gen6_pte_t __iomem *)ggtt->gsm + (start >> PAGE_SHIFT);
+
+	for_each_sgt_dma(addr, sgt_iter, st) {
+		gtt_entry = vm->pte_encode(addr, level, true, flags);
+		iowrite32(gtt_entry, &gtt_entries[i++]);
 	}
 
 	/* XXX: This serves as a posting read to make sure that the PTE has
@@ -2462,10 +2459,8 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
 	 * of NUMA access patterns. Therefore, even with the way we assume
 	 * hardware should work, we must keep this posting read for paranoia.
 	 */
-	if (i != 0) {
-		unsigned long gtt = readl(&gtt_entries[i-1]);
-		WARN_ON(gtt != vm->pte_encode(addr, level, true, flags));
-	}
+	if (i != 0)
+		WARN_ON(readl(&gtt_entries[i-1]) != gtt_entry);
 
 	/* This next bit makes the above posting read even more important. We
 	 * want to flush the TLBs only after we're certain all the PTE updates
@@ -3399,9 +3394,11 @@ struct i915_vma *
 intel_rotate_fb_obj_pages(struct intel_rotation_info *rot_info,
 			  struct drm_i915_gem_object *obj)
 {
+	const size_t n_pages = obj->base.size / PAGE_SIZE;
 	unsigned int size_pages = rot_info->plane[0].width * rot_info->plane[0].height;
 	unsigned int size_pages_uv;
-	struct sg_page_iter sg_iter;
+	struct sgt_iter sgt_iter;
+	dma_addr_t dma_addr;
 	unsigned long i;
 	dma_addr_t *page_addr_list;
 	struct sg_table *st;
@@ -3410,7 +3407,7 @@ struct i915_vma *
 	int ret = -ENOMEM;
 
 	/* Allocate a temporary list of source pages for random access. */
-	page_addr_list = drm_malloc_gfp(obj->base.size / PAGE_SIZE,
+	page_addr_list = drm_malloc_gfp(n_pages,
 					sizeof(dma_addr_t),
 					GFP_TEMPORARY);
 	if (!page_addr_list)
@@ -3433,11 +3430,10 @@ struct i915_vma *
 
 	/* Populate source page list from the object. */
 	i = 0;
-	for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) {
-		page_addr_list[i] = sg_page_iter_dma_address(&sg_iter);
-		i++;
-	}
+	for_each_sgt_dma(dma_addr, sgt_iter, obj->pages)
+		page_addr_list[i++] = dma_addr;
 
+	GEM_BUG_ON(i != n_pages);
 	st->nents = 0;
 	sg = st->sgl;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
index a84625b..2314c88 100644
--- a/drivers/gpu/drm/i915/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -706,7 +706,8 @@ struct get_pages_work {
 static void
 i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj)
 {
-	struct sg_page_iter sg_iter;
+	struct sgt_iter sgt_iter;
+	struct page *page;
 
 	BUG_ON(obj->userptr.work != NULL);
 	__i915_gem_userptr_set_active(obj, false);
@@ -716,9 +717,7 @@ struct get_pages_work {
 
 	i915_gem_gtt_finish_object(obj);
 
-	for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) {
-		struct page *page = sg_page_iter_page(&sg_iter);
-
+	for_each_sgt_page(page, sgt_iter, obj->pages) {
 		if (obj->dirty)
 			set_page_dirty(page);
 
-- 
1.9.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH v4 4/4] Try inlining sg_next()
  2016-05-20  0:31 [PATCH v4 1/4] drm/i915: refactor i915_gem_object_pin_map() Dave Gordon
  2016-05-20  0:31 ` [PATCH v4 2/4] drm/i915: optimise i915_gem_object_map() for small objects Dave Gordon
  2016-05-20  0:31 ` [PATCH v4 3/4] Introduce & use new lightweight SGL iterators Dave Gordon
@ 2016-05-20  0:31 ` Dave Gordon
  2016-05-20  7:35   ` Chris Wilson
  2016-05-20  9:18 ` ✗ Ro.CI.BAT: failure for series starting with [v4,1/4] drm/i915: refactor i915_gem_object_pin_map() Patchwork
  3 siblings, 1 reply; 8+ messages in thread
From: Dave Gordon @ 2016-05-20  0:31 UTC (permalink / raw)
  To: intel-gfx

Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h | 23 +++++++++++++++++++++--
 1 file changed, 21 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 01cde0f..53ff499 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2287,6 +2287,25 @@ __sgt_iter(struct scatterlist *sgl, bool dma)
 }
 
 /**
+ * __sg_next - return the next scatterlist entry in a list
+ * @sg:		The current sg entry
+ *
+ * Description:
+ *   If the entry is the last, return NULL; otherwise, step to the next
+ *   element in the array (@sg@+1). If that's a chain pointer, follow it;
+ *   otherwise just return the pointer to the current element.
+ **/
+static inline struct scatterlist *__sg_next(struct scatterlist *sg)
+{
+#ifdef CONFIG_DEBUG_SG
+	BUG_ON(sg->sg_magic != SG_MAGIC);
+#endif
+	return sg_is_last(sg) ? NULL :
+		likely(!sg_is_chain(++sg)) ? sg :
+		sg_chain_ptr(sg);
+}
+
+/**
  * for_each_sgt_dma - iterate over the DMA addresses of the given sg_table
  * @__dmap:	DMA address (output)
  * @__iter:	'struct sgt_iter' (iterator state, internal)
@@ -2296,7 +2315,7 @@ __sgt_iter(struct scatterlist *sgl, bool dma)
 	for ((__iter) = __sgt_iter((__sgt)->sgl, true);			\
 	     ((__dmap) = (__iter).ix.dma + (__iter).curr);		\
 	     (((__iter).curr += PAGE_SIZE) < (__iter).max) ||		\
-		((__iter) = __sgt_iter(sg_next((__iter).sgp), true), 0))
+		((__iter) = __sgt_iter(__sg_next((__iter).sgp), true), 0))
 
 /**
  * for_each_sgt_page - iterate over the pages of the given sg_table
@@ -2309,7 +2328,7 @@ __sgt_iter(struct scatterlist *sgl, bool dma)
 	     ((__pp) = (__iter).ix.pfn == 0 ? NULL :			\
 	           pfn_to_page((__iter).ix.pfn + ((__iter).curr >> PAGE_SHIFT)));\
 	     (((__iter).curr += PAGE_SIZE) < (__iter).max) ||		\
-		((__iter) = __sgt_iter(sg_next((__iter).sgp), false), 0))
+		((__iter) = __sgt_iter(__sg_next((__iter).sgp), false), 0))
 
 /**
  * Request queue structure.
-- 
1.9.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH v4 4/4] Try inlining sg_next()
  2016-05-20  0:31 ` [PATCH v4 4/4] Try inlining sg_next() Dave Gordon
@ 2016-05-20  7:35   ` Chris Wilson
  0 siblings, 0 replies; 8+ messages in thread
From: Chris Wilson @ 2016-05-20  7:35 UTC (permalink / raw)
  To: Dave Gordon; +Cc: intel-gfx

On Fri, May 20, 2016 at 01:31:30AM +0100, Dave Gordon wrote:
> Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
> Cc: Chris Wilson <chris@chris-wilson.co.uk>

Much better. The effect of the inline

         gem:exec:fault:1MiB:    +4.90%
  gem:exec:fault:1MiB:forked:    +7.99%
        gem:exec:fault:16MiB:   +22.94%
 gem:exec:fault:16MiB:forked:   +19.96%
       gem:exec:fault:256MiB:   +27.45%
gem:exec:fault:256MiB:forked:   +36.89%

And it brings this series into parity with mine.

----

Avoiding the out-of-line call to sg_next() reduces the kernel execution
overhead by 10% in some workloads (for example the Unreal Engine 4 demo
Atlantis on 2GiB GTTs) which are dominated by the cost of inserting PTEs
due to texture thrashing. We can demonstrate this in a microbenchmark
that forces us to rebind the object on every execbuf, where we can
measure a 25% improvement, in the time required to execute an execbuf
requiring a texture to be rebound, for inlining the sg_next() for large
texture sizes.

Benchmark: igt/benchmarks/gem_exec_fault
Benchmark: igt/benchmarks/gem_exec_trace/Atlantis
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v4 3/4] Introduce & use new lightweight SGL iterators
  2016-05-20  0:31 ` [PATCH v4 3/4] Introduce & use new lightweight SGL iterators Dave Gordon
@ 2016-05-20  8:15   ` Chris Wilson
  2016-05-20  9:56     ` Chris Wilson
  0 siblings, 1 reply; 8+ messages in thread
From: Chris Wilson @ 2016-05-20  8:15 UTC (permalink / raw)
  To: Dave Gordon; +Cc: intel-gfx

On Fri, May 20, 2016 at 01:31:29AM +0100, Dave Gordon wrote:
> The existing for_each_sg_page() iterator is somewhat heavyweight, and is
> limiting i915 driver performance in a few benchmarks. So here we
> introduce somewhat lighter weight iterators, primarily for use with GEM
> objects or other case where we need only deal with whole aligned pages.
> 
> Unlike the old iterator, the new iterators use an internal state
> structure which is not intended to be accessed by the caller; instead
> each takes as a parameter an output variable which is set before each
> iteration. This makes them particularly simple to use :)
> 
> One of the new iterators provides the caller with the DMA address of
> each page in turn; the other provides the 'struct page' pointer required
> by many memory management operations.
> 
> Various uses of for_each_sg_page() are then converted to the new macros.
> 
> Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_drv.h         | 62 +++++++++++++++++++++++++--
>  drivers/gpu/drm/i915/i915_gem.c         | 20 ++++-----
>  drivers/gpu/drm/i915/i915_gem_fence.c   | 14 +++---
>  drivers/gpu/drm/i915/i915_gem_gtt.c     | 76 ++++++++++++++++-----------------
>  drivers/gpu/drm/i915/i915_gem_userptr.c |  7 ++-
>  5 files changed, 116 insertions(+), 63 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 6894d8e..01cde0f 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2120,6 +2120,10 @@ struct drm_i915_gem_object_ops {
>  #define INTEL_FRONTBUFFER_ALL_MASK(pipe) \
>  	(0xff << (INTEL_FRONTBUFFER_BITS_PER_PIPE * (pipe)))
>  
> +void i915_gem_track_fb(struct drm_i915_gem_object *old,
> +		       struct drm_i915_gem_object *new,
> +		       unsigned frontbuffer_bits);
> +

That's not a much better spot, since this is now before the object it
acts upon. One day we'll get our types separated from their actors.

>  struct drm_i915_gem_object {
>  	struct drm_gem_object base;
>  
> @@ -2251,9 +2255,61 @@ struct drm_i915_gem_object {
>  };
>  #define to_intel_bo(x) container_of(x, struct drm_i915_gem_object, base)
>  
> -void i915_gem_track_fb(struct drm_i915_gem_object *old,
> -		       struct drm_i915_gem_object *new,
> -		       unsigned frontbuffer_bits);
> +/*
> + * New optimised SGL iterator for GEM objects
> + */
> +struct sgt_iter {
> +	struct scatterlist *sgp;
> +	union {
> +		unsigned long pfn;
> +		unsigned long dma;
> +	} ix;

An anonymous union here would be slightly more pleasant. And we should
be using the correct types.

> +	unsigned int curr;
> +	unsigned int max;
> +};
> +
> +/* Constructor for new iterator */
> +static inline struct sgt_iter
> +__sgt_iter(struct scatterlist *sgl, bool dma)
> +{
> +	struct sgt_iter s = { .sgp = sgl };
> +
> +	if (s.sgp) {

Not acceptable just to GPF out if passed in a NULL? It's a BUG for all
our use cases. And would save a conditional every chain step.

> +		s.max = s.curr = s.sgp->offset;
> +		s.max += s.sgp->length;
> +		if (dma)
> +			s.ix.dma = sg_dma_address(s.sgp);
> +		else
> +			s.ix.pfn = page_to_pfn(sg_page(s.sgp));
> +	}
> +
> +	return s;
> +}
> +
> +/**
> + * for_each_sgt_dma - iterate over the DMA addresses of the given sg_table
> + * @__dmap:	DMA address (output)
> + * @__iter:	'struct sgt_iter' (iterator state, internal)
> + * @__sgt:	sg_table to iterate over (input)
> + */
> +#define for_each_sgt_dma(__dmap, __iter, __sgt)				\
> +	for ((__iter) = __sgt_iter((__sgt)->sgl, true);			\
> +	     ((__dmap) = (__iter).ix.dma + (__iter).curr);		\

This prevents us using dma_addr_t 0, which is the error value iirc, so ok.

> +	     (((__iter).curr += PAGE_SIZE) < (__iter).max) ||		\
> +		((__iter) = __sgt_iter(sg_next((__iter).sgp), true), 0))
> +
> +/**
> + * for_each_sgt_page - iterate over the pages of the given sg_table
> + * @__pp:	page pointer (output)
> + * @__iter:	'struct sgt_iter' (iterator state, internal)
> + * @__sgt:	sg_table to iterate over (input)
> + */
> +#define for_each_sgt_page(__pp, __iter, __sgt)				\
> +	for ((__iter) = __sgt_iter((__sgt)->sgl, false);		\
> +	     ((__pp) = (__iter).ix.pfn == 0 ? NULL :			\
> +	           pfn_to_page((__iter).ix.pfn + ((__iter).curr >> PAGE_SHIFT)));\

If we shifted in the __sgt_iter() we could do simpler instructions inside
the loop. The compiler won't know that pfn_to_page() is always true, so
maybe a pfn_to_page(),1 here will help?

> +	     (((__iter).curr += PAGE_SIZE) < (__iter).max) ||		\
> +		((__iter) = __sgt_iter(sg_next((__iter).sgp), false), 0))

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 8+ messages in thread

* ✗ Ro.CI.BAT: failure for series starting with [v4,1/4] drm/i915: refactor i915_gem_object_pin_map()
  2016-05-20  0:31 [PATCH v4 1/4] drm/i915: refactor i915_gem_object_pin_map() Dave Gordon
                   ` (2 preceding siblings ...)
  2016-05-20  0:31 ` [PATCH v4 4/4] Try inlining sg_next() Dave Gordon
@ 2016-05-20  9:18 ` Patchwork
  3 siblings, 0 replies; 8+ messages in thread
From: Patchwork @ 2016-05-20  9:18 UTC (permalink / raw)
  To: Dave Gordon; +Cc: intel-gfx

== Series Details ==

Series: series starting with [v4,1/4] drm/i915: refactor i915_gem_object_pin_map()
URL   : https://patchwork.freedesktop.org/series/7432/
State : failure

== Summary ==

Series 7432v1 Series without cover letter
http://patchwork.freedesktop.org/api/1.0/series/7432/revisions/1/mbox

Test drv_module_reload_basic:
                dmesg-warn -> PASS       (ro-bdw-i7-5600u)
Test kms_flip:
        Subgroup basic-flip-vs-wf_vblank:
                pass       -> FAIL       (ro-bdw-i7-5600u)
Test kms_pipe_crc_basic:
        Subgroup suspend-read-crc-pipe-b:
                dmesg-warn -> PASS       (ro-ivb-i7-3770)
Test kms_sink_crc_basic:
                pass       -> SKIP       (ro-skl-i7-6700hq)

fi-bdw-i7-5557u  total:217  pass:204  dwarn:0   dfail:0   fail:0   skip:13 
fi-bsw-n3050     total:216  pass:172  dwarn:0   dfail:0   fail:2   skip:42 
fi-hsw-i7-4770k  total:217  pass:195  dwarn:0   dfail:0   fail:0   skip:22 
fi-hsw-i7-4770r  total:217  pass:191  dwarn:0   dfail:0   fail:0   skip:26 
fi-skl-i7-6700k  total:217  pass:189  dwarn:0   dfail:0   fail:0   skip:28 
ro-bdw-i5-5250u  total:217  pass:179  dwarn:0   dfail:0   fail:0   skip:38 
ro-bdw-i7-5557U  total:217  pass:204  dwarn:0   dfail:0   fail:0   skip:13 
ro-bdw-i7-5600u  total:217  pass:184  dwarn:0   dfail:0   fail:2   skip:31 
ro-bsw-n3050     total:217  pass:173  dwarn:0   dfail:0   fail:2   skip:42 
ro-byt-n2820     total:216  pass:172  dwarn:0   dfail:0   fail:3   skip:41 
ro-hsw-i3-4010u  total:216  pass:191  dwarn:0   dfail:0   fail:0   skip:25 
ro-hsw-i7-4770r  total:217  pass:191  dwarn:0   dfail:0   fail:0   skip:26 
ro-ilk-i7-620lm  total:217  pass:149  dwarn:0   dfail:0   fail:1   skip:67 
ro-ilk1-i5-650   total:212  pass:150  dwarn:0   dfail:0   fail:1   skip:61 
ro-ivb-i7-3770   total:217  pass:181  dwarn:0   dfail:0   fail:0   skip:36 
ro-ivb2-i7-3770  total:217  pass:185  dwarn:0   dfail:0   fail:0   skip:32 
ro-skl-i7-6700hq total:212  pass:187  dwarn:0   dfail:0   fail:0   skip:25 
ro-snb-i7-2620M  total:217  pass:175  dwarn:0   dfail:0   fail:1   skip:41 
fi-byt-n2820 failed to connect after reboot

Results at /archive/results/CI_IGT_test/RO_Patchwork_945/

9d15199 drm-intel-nightly: 2016y-05m-20d-07h-54m-59s UTC integration manifest
7854a0c Try inlining sg_next()
cd4e1f3 Introduce & use new lightweight SGL iterators
bf7123c drm/i915: optimise i915_gem_object_map() for small objects
afffbfb drm/i915: refactor i915_gem_object_pin_map()

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v4 3/4] Introduce & use new lightweight SGL iterators
  2016-05-20  8:15   ` Chris Wilson
@ 2016-05-20  9:56     ` Chris Wilson
  0 siblings, 0 replies; 8+ messages in thread
From: Chris Wilson @ 2016-05-20  9:56 UTC (permalink / raw)
  To: Dave Gordon, intel-gfx

On Fri, May 20, 2016 at 09:15:06AM +0100, Chris Wilson wrote:
> On Fri, May 20, 2016 at 01:31:29AM +0100, Dave Gordon wrote:
> > The existing for_each_sg_page() iterator is somewhat heavyweight, and is
> > limiting i915 driver performance in a few benchmarks. So here we
> > introduce somewhat lighter weight iterators, primarily for use with GEM
> > objects or other case where we need only deal with whole aligned pages.
> > 
> > Unlike the old iterator, the new iterators use an internal state
> > structure which is not intended to be accessed by the caller; instead
> > each takes as a parameter an output variable which is set before each
> > iteration. This makes them particularly simple to use :)
> > 
> > One of the new iterators provides the caller with the DMA address of
> > each page in turn; the other provides the 'struct page' pointer required
> > by many memory management operations.
> > 
> > Various uses of for_each_sg_page() are then converted to the new macros.
> > 
> > Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
> > Cc: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >  drivers/gpu/drm/i915/i915_drv.h         | 62 +++++++++++++++++++++++++--
> >  drivers/gpu/drm/i915/i915_gem.c         | 20 ++++-----
> >  drivers/gpu/drm/i915/i915_gem_fence.c   | 14 +++---
> >  drivers/gpu/drm/i915/i915_gem_gtt.c     | 76 ++++++++++++++++-----------------
> >  drivers/gpu/drm/i915/i915_gem_userptr.c |  7 ++-
> >  5 files changed, 116 insertions(+), 63 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> > index 6894d8e..01cde0f 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.h
> > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > @@ -2120,6 +2120,10 @@ struct drm_i915_gem_object_ops {
> >  #define INTEL_FRONTBUFFER_ALL_MASK(pipe) \
> >  	(0xff << (INTEL_FRONTBUFFER_BITS_PER_PIPE * (pipe)))
> >  
> > +void i915_gem_track_fb(struct drm_i915_gem_object *old,
> > +		       struct drm_i915_gem_object *new,
> > +		       unsigned frontbuffer_bits);
> > +
> 
> That's not a much better spot, since this is now before the object it
> acts upon. One day we'll get our types separated from their actors.
> 
> >  struct drm_i915_gem_object {
> >  	struct drm_gem_object base;
> >  
> > @@ -2251,9 +2255,61 @@ struct drm_i915_gem_object {
> >  };
> >  #define to_intel_bo(x) container_of(x, struct drm_i915_gem_object, base)
> >  
> > -void i915_gem_track_fb(struct drm_i915_gem_object *old,
> > -		       struct drm_i915_gem_object *new,
> > -		       unsigned frontbuffer_bits);
> > +/*
> > + * New optimised SGL iterator for GEM objects
> > + */
> > +struct sgt_iter {
> > +	struct scatterlist *sgp;
> > +	union {
> > +		unsigned long pfn;
> > +		unsigned long dma;
> > +	} ix;
> 
> An anonymous union here would be slightly more pleasant. And we should
> be using the correct types.
> 
> > +	unsigned int curr;
> > +	unsigned int max;
> > +};
> > +
> > +/* Constructor for new iterator */
> > +static inline struct sgt_iter
> > +__sgt_iter(struct scatterlist *sgl, bool dma)
> > +{
> > +	struct sgt_iter s = { .sgp = sgl };
> > +
> > +	if (s.sgp) {
> 
> Not acceptable just to GPF out if passed in a NULL? It's a BUG for all
> our use cases. And would save a conditional every chain step.

Oh, it's how we stop after __sg_next().

Ok, played a bit and the only thing that sticks is

 /*
- * New optimised SGL iterator for GEM objects
+ * Optimised SGL iterator for GEM objects
  */
 struct sgt_iter {
        struct scatterlist *sgp;
        union {
                unsigned long pfn;
-               unsigned long dma;
-       } ix;
+               dma_addr_t dma;
+       };
        unsigned int curr;
        unsigned int max;
 };
 
-/* Constructor for new iterator */
-static inline struct sgt_iter
+static __always_inline struct sgt_iter

Nothing is ever new past the commit that introduces it!

Final diff (a few % in micro, consistently above the noise):

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 53ff4993f5b8..586f06646630 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2256,31 +2256,26 @@ struct drm_i915_gem_object {
 #define to_intel_bo(x) container_of(x, struct drm_i915_gem_object, base)
 
 /*
- * New optimised SGL iterator for GEM objects
+ * Optimised SGL iterator for GEM objects
  */
-struct sgt_iter {
+static __always_inline struct sgt_iter {
        struct scatterlist *sgp;
        union {
                unsigned long pfn;
-               unsigned long dma;
-       } ix;
+               dma_addr_t dma;
+       };
        unsigned int curr;
        unsigned int max;
-};
-
-/* Constructor for new iterator */
-static inline struct sgt_iter
-__sgt_iter(struct scatterlist *sgl, bool dma)
-{
+} __sgt_iter(struct scatterlist *sgl, bool dma) {
        struct sgt_iter s = { .sgp = sgl };
 
        if (s.sgp) {
                s.max = s.curr = s.sgp->offset;
                s.max += s.sgp->length;
                if (dma)
-                       s.ix.dma = sg_dma_address(s.sgp);
+                       s.dma = sg_dma_address(s.sgp);
                else
-                       s.ix.pfn = page_to_pfn(sg_page(s.sgp));
+                       s.pfn = page_to_pfn(sg_page(s.sgp));
        }
 
        return s;
@@ -2313,9 +2308,9 @@ static inline struct scatterlist *__sg_next(struct scatterlist *sg)
  */
 #define for_each_sgt_dma(__dmap, __iter, __sgt)                                \
        for ((__iter) = __sgt_iter((__sgt)->sgl, true);                 \
-            ((__dmap) = (__iter).ix.dma + (__iter).curr);              \
+            ((__dmap) = (__iter).dma + (__iter).curr);                 \
             (((__iter).curr += PAGE_SIZE) < (__iter).max) ||           \
-               ((__iter) = __sgt_iter(__sg_next((__iter).sgp), true), 0))
+            ((__iter) = __sgt_iter(__sg_next((__iter).sgp), true), 0))
 
 /**
  * for_each_sgt_page - iterate over the pages of the given sg_table
@@ -2325,10 +2320,10 @@ static inline struct scatterlist *__sg_next(struct scatterlist *sg)
  */
 #define for_each_sgt_page(__pp, __iter, __sgt)                         \
        for ((__iter) = __sgt_iter((__sgt)->sgl, false);                \
-            ((__pp) = (__iter).ix.pfn == 0 ? NULL :                    \
-                  pfn_to_page((__iter).ix.pfn + ((__iter).curr >> PAGE_SHIFT)));\
+            ((__pp) = (__iter).pfn == 0 ? NULL :                       \
+             pfn_to_page((__iter).pfn + ((__iter).curr >> PAGE_SHIFT))); \
             (((__iter).curr += PAGE_SIZE) < (__iter).max) ||           \
-               ((__iter) = __sgt_iter(__sg_next((__iter).sgp), false), 0))
+            ((__iter) = __sgt_iter(__sg_next((__iter).sgp), false), 0))


And with that,
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2016-05-20  9:56 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-05-20  0:31 [PATCH v4 1/4] drm/i915: refactor i915_gem_object_pin_map() Dave Gordon
2016-05-20  0:31 ` [PATCH v4 2/4] drm/i915: optimise i915_gem_object_map() for small objects Dave Gordon
2016-05-20  0:31 ` [PATCH v4 3/4] Introduce & use new lightweight SGL iterators Dave Gordon
2016-05-20  8:15   ` Chris Wilson
2016-05-20  9:56     ` Chris Wilson
2016-05-20  0:31 ` [PATCH v4 4/4] Try inlining sg_next() Dave Gordon
2016-05-20  7:35   ` Chris Wilson
2016-05-20  9:18 ` ✗ Ro.CI.BAT: failure for series starting with [v4,1/4] drm/i915: refactor i915_gem_object_pin_map() Patchwork

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.