All of lore.kernel.org
 help / color / mirror / Atom feed
* An almost complete set of reviewed patches for tiled partial
@ 2016-08-16 10:42 Chris Wilson
  2016-08-16 10:42 ` [PATCH 01/22] drm/i915: Cache kmap between relocations Chris Wilson
                   ` (22 more replies)
  0 siblings, 23 replies; 40+ messages in thread
From: Chris Wilson @ 2016-08-16 10:42 UTC (permalink / raw)
  To: intel-gfx

And fixing up the execbuf incoherency issue, which gets us to the
starting point for cmdparser and execbuf regressions which impact yet
again on how we lookup/store the VMA.
-Chris

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 40+ messages in thread

* [PATCH 01/22] drm/i915: Cache kmap between relocations
  2016-08-16 10:42 An almost complete set of reviewed patches for tiled partial Chris Wilson
@ 2016-08-16 10:42 ` Chris Wilson
  2016-08-16 10:42 ` [PATCH 02/22] drm/i915: Extract i915_gem_obj_prepare_shmem_write() Chris Wilson
                   ` (21 subsequent siblings)
  22 siblings, 0 replies; 40+ messages in thread
From: Chris Wilson @ 2016-08-16 10:42 UTC (permalink / raw)
  To: intel-gfx

When doing relocations, we have to obtain a mapping to the page
containing the target address. This is either a kmap or iomap depending
on GPU and its cache coherency. Neighbouring relocation entries are
typically within the same page and so we can cache our kmapping between
them and avoid those pesky TLB flushes.

Note that there is some sleight-of-hand in how the slow relocate works
as the reloc_entry_cache implies pagefaults disabled (as we are inside a
kmap_atomic section). However, the slow relocate code is meant to be the
fallback from the atomic fast path failing. Fortunately it works as we
already have performed the copy_from_user for the relocation array (no
more pagefaults there) and the kmap_atomic cache is enabled after we
have waited upon an active buffer (so no more sleeping in atomic).
Magic!

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 165 +++++++++++++++++++----------
 1 file changed, 111 insertions(+), 54 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 699315304748..5b718d8a2b1c 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -308,9 +308,55 @@ relocation_target(struct drm_i915_gem_relocation_entry *reloc,
 	return gen8_canonical_addr((int)reloc->delta + target_offset);
 }
 
+struct reloc_cache {
+	void *vaddr;
+	unsigned int page;
+	enum { KMAP, IOMAP } type;
+};
+
+static void reloc_cache_init(struct reloc_cache *cache)
+{
+	cache->page = -1;
+	cache->vaddr = NULL;
+}
+
+static void reloc_cache_fini(struct reloc_cache *cache)
+{
+	if (!cache->vaddr)
+		return;
+
+	switch (cache->type) {
+	case KMAP:
+		kunmap_atomic(cache->vaddr);
+		break;
+
+	case IOMAP:
+		io_mapping_unmap_atomic(cache->vaddr);
+		break;
+	}
+}
+
+static void *reloc_kmap(struct drm_i915_gem_object *obj,
+			struct reloc_cache *cache,
+			int page)
+{
+	if (cache->page == page)
+		return cache->vaddr;
+
+	if (cache->vaddr)
+		kunmap_atomic(cache->vaddr);
+
+	cache->page = page;
+	cache->vaddr = kmap_atomic(i915_gem_object_get_dirty_page(obj, page));
+	cache->type = KMAP;
+
+	return cache->vaddr;
+}
+
 static int
 relocate_entry_cpu(struct drm_i915_gem_object *obj,
 		   struct drm_i915_gem_relocation_entry *reloc,
+		   struct reloc_cache *cache,
 		   uint64_t target_offset)
 {
 	struct drm_device *dev = obj->base.dev;
@@ -323,34 +369,47 @@ relocate_entry_cpu(struct drm_i915_gem_object *obj,
 	if (ret)
 		return ret;
 
-	vaddr = kmap_atomic(i915_gem_object_get_dirty_page(obj,
-				reloc->offset >> PAGE_SHIFT));
+	vaddr = reloc_kmap(obj, cache, reloc->offset >> PAGE_SHIFT);
 	*(uint32_t *)(vaddr + page_offset) = lower_32_bits(delta);
 
-	if (INTEL_INFO(dev)->gen >= 8) {
-		page_offset = offset_in_page(page_offset + sizeof(uint32_t));
-
-		if (page_offset == 0) {
-			kunmap_atomic(vaddr);
-			vaddr = kmap_atomic(i915_gem_object_get_dirty_page(obj,
-			    (reloc->offset + sizeof(uint32_t)) >> PAGE_SHIFT));
+	if (INTEL_GEN(dev) >= 8) {
+		page_offset += sizeof(uint32_t);
+		if (page_offset == PAGE_SIZE) {
+			vaddr = reloc_kmap(obj, cache, cache->page + 1);
+			page_offset = 0;
 		}
-
 		*(uint32_t *)(vaddr + page_offset) = upper_32_bits(delta);
 	}
 
-	kunmap_atomic(vaddr);
-
 	return 0;
 }
 
+static void *reloc_iomap(struct drm_i915_private *i915,
+			 struct reloc_cache *cache,
+			 uint64_t offset)
+{
+	if (cache->page == offset >> PAGE_SHIFT)
+		return cache->vaddr;
+
+	if (cache->vaddr)
+		io_mapping_unmap_atomic(cache->vaddr);
+
+	cache->page = offset >> PAGE_SHIFT;
+	cache->vaddr =
+		io_mapping_map_atomic_wc(i915->ggtt.mappable,
+					 offset & PAGE_MASK);
+	cache->type = IOMAP;
+
+	return cache->vaddr;
+}
+
 static int
 relocate_entry_gtt(struct drm_i915_gem_object *obj,
 		   struct drm_i915_gem_relocation_entry *reloc,
+		   struct reloc_cache *cache,
 		   uint64_t target_offset)
 {
 	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
-	struct i915_ggtt *ggtt = &dev_priv->ggtt;
 	struct i915_vma *vma;
 	uint64_t delta = relocation_target(reloc, target_offset);
 	uint64_t offset;
@@ -371,28 +430,19 @@ relocate_entry_gtt(struct drm_i915_gem_object *obj,
 
 	/* Map the page containing the relocation we're going to perform.  */
 	offset = vma->node.start + reloc->offset;
-	reloc_page = io_mapping_map_atomic_wc(ggtt->mappable,
-					      offset & PAGE_MASK);
+	reloc_page = reloc_iomap(dev_priv, cache, offset);
 	iowrite32(lower_32_bits(delta), reloc_page + offset_in_page(offset));
 
 	if (INTEL_GEN(dev_priv) >= 8) {
 		offset += sizeof(uint32_t);
-
-		if (offset_in_page(offset) == 0) {
-			io_mapping_unmap_atomic(reloc_page);
-			reloc_page =
-				io_mapping_map_atomic_wc(ggtt->mappable,
-							 offset);
-		}
-
+		if (offset_in_page(offset) == 0)
+			reloc_page = reloc_iomap(dev_priv, cache, offset);
 		iowrite32(upper_32_bits(delta),
 			  reloc_page + offset_in_page(offset));
 	}
 
-	io_mapping_unmap_atomic(reloc_page);
-
 unpin:
-	i915_vma_unpin(vma);
+	__i915_vma_unpin(vma);
 	return ret;
 }
 
@@ -408,6 +458,7 @@ clflush_write32(void *addr, uint32_t value)
 static int
 relocate_entry_clflush(struct drm_i915_gem_object *obj,
 		       struct drm_i915_gem_relocation_entry *reloc,
+		       struct reloc_cache *cache,
 		       uint64_t target_offset)
 {
 	struct drm_device *dev = obj->base.dev;
@@ -420,24 +471,18 @@ relocate_entry_clflush(struct drm_i915_gem_object *obj,
 	if (ret)
 		return ret;
 
-	vaddr = kmap_atomic(i915_gem_object_get_dirty_page(obj,
-				reloc->offset >> PAGE_SHIFT));
+	vaddr = reloc_kmap(obj, cache, reloc->offset >> PAGE_SHIFT);
 	clflush_write32(vaddr + page_offset, lower_32_bits(delta));
 
-	if (INTEL_INFO(dev)->gen >= 8) {
-		page_offset = offset_in_page(page_offset + sizeof(uint32_t));
-
-		if (page_offset == 0) {
-			kunmap_atomic(vaddr);
-			vaddr = kmap_atomic(i915_gem_object_get_dirty_page(obj,
-			    (reloc->offset + sizeof(uint32_t)) >> PAGE_SHIFT));
+	if (INTEL_GEN(dev) >= 8) {
+		page_offset += sizeof(uint32_t);
+		if (page_offset == PAGE_SIZE) {
+			vaddr = reloc_kmap(obj, cache, cache->page + 1);
+			page_offset = 0;
 		}
-
 		clflush_write32(vaddr + page_offset, upper_32_bits(delta));
 	}
 
-	kunmap_atomic(vaddr);
-
 	return 0;
 }
 
@@ -458,7 +503,8 @@ static bool object_is_idle(struct drm_i915_gem_object *obj)
 static int
 i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
 				   struct eb_vmas *eb,
-				   struct drm_i915_gem_relocation_entry *reloc)
+				   struct drm_i915_gem_relocation_entry *reloc,
+				   struct reloc_cache *cache)
 {
 	struct drm_device *dev = obj->base.dev;
 	struct drm_gem_object *target_obj;
@@ -542,11 +588,11 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
 		return -EFAULT;
 
 	if (use_cpu_reloc(obj))
-		ret = relocate_entry_cpu(obj, reloc, target_offset);
+		ret = relocate_entry_cpu(obj, reloc, cache, target_offset);
 	else if (obj->map_and_fenceable)
-		ret = relocate_entry_gtt(obj, reloc, target_offset);
+		ret = relocate_entry_gtt(obj, reloc, cache, target_offset);
 	else if (static_cpu_has(X86_FEATURE_CLFLUSH))
-		ret = relocate_entry_clflush(obj, reloc, target_offset);
+		ret = relocate_entry_clflush(obj, reloc, cache, target_offset);
 	else {
 		WARN_ONCE(1, "Impossible case in relocation handling\n");
 		ret = -ENODEV;
@@ -569,9 +615,11 @@ i915_gem_execbuffer_relocate_vma(struct i915_vma *vma,
 	struct drm_i915_gem_relocation_entry stack_reloc[N_RELOC(512)];
 	struct drm_i915_gem_relocation_entry __user *user_relocs;
 	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
-	int remain, ret;
+	struct reloc_cache cache;
+	int remain, ret = 0;
 
 	user_relocs = u64_to_user_ptr(entry->relocs_ptr);
+	reloc_cache_init(&cache);
 
 	remain = entry->relocation_count;
 	while (remain) {
@@ -581,19 +629,23 @@ i915_gem_execbuffer_relocate_vma(struct i915_vma *vma,
 			count = ARRAY_SIZE(stack_reloc);
 		remain -= count;
 
-		if (__copy_from_user_inatomic(r, user_relocs, count*sizeof(r[0])))
-			return -EFAULT;
+		if (__copy_from_user_inatomic(r, user_relocs, count*sizeof(r[0]))) {
+			ret = -EFAULT;
+			goto out;
+		}
 
 		do {
 			u64 offset = r->presumed_offset;
 
-			ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, r);
+			ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, r, &cache);
 			if (ret)
-				return ret;
+				goto out;
 
 			if (r->presumed_offset != offset &&
-			    __put_user(r->presumed_offset, &user_relocs->presumed_offset)) {
-				return -EFAULT;
+			    __put_user(r->presumed_offset,
+				       &user_relocs->presumed_offset)) {
+				ret = -EFAULT;
+				goto out;
 			}
 
 			user_relocs++;
@@ -601,7 +653,9 @@ i915_gem_execbuffer_relocate_vma(struct i915_vma *vma,
 		} while (--count);
 	}
 
-	return 0;
+out:
+	reloc_cache_fini(&cache);
+	return ret;
 #undef N_RELOC
 }
 
@@ -611,15 +665,18 @@ i915_gem_execbuffer_relocate_vma_slow(struct i915_vma *vma,
 				      struct drm_i915_gem_relocation_entry *relocs)
 {
 	const struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
-	int i, ret;
+	struct reloc_cache cache;
+	int i, ret = 0;
 
+	reloc_cache_init(&cache);
 	for (i = 0; i < entry->relocation_count; i++) {
-		ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, &relocs[i]);
+		ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, &relocs[i], &cache);
 		if (ret)
-			return ret;
+			break;
 	}
+	reloc_cache_fini(&cache);
 
-	return 0;
+	return ret;
 }
 
 static int
-- 
2.8.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 02/22] drm/i915: Extract i915_gem_obj_prepare_shmem_write()
  2016-08-16 10:42 An almost complete set of reviewed patches for tiled partial Chris Wilson
  2016-08-16 10:42 ` [PATCH 01/22] drm/i915: Cache kmap between relocations Chris Wilson
@ 2016-08-16 10:42 ` Chris Wilson
  2016-08-16 10:42 ` [PATCH 03/22] drm/i915: Before accessing an object via the cpu, flush GTT writes Chris Wilson
                   ` (20 subsequent siblings)
  22 siblings, 0 replies; 40+ messages in thread
From: Chris Wilson @ 2016-08-16 10:42 UTC (permalink / raw)
  To: intel-gfx

This is a companion to i915_gem_obj_prepare_shmem_read() that prepares
the backing storage for direct writes. It first serialises with the GPU,
pins the backing storage and then indicates what clfushes are required in
order for the writes to be coherent.

Whilst here, fix support for ancient CPUs without clflush for which we
cannot do the GTT+clflush tricks.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_cmd_parser.c |   2 +-
 drivers/gpu/drm/i915/i915_drv.h        |   6 +-
 drivers/gpu/drm/i915/i915_gem.c        | 142 +++++++++++++++++++--------------
 3 files changed, 90 insertions(+), 60 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
index 1db829c8b912..c21cab7d1d61 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -973,7 +973,7 @@ static u32 *copy_batch(struct drm_i915_gem_object *dest_obj,
 		       u32 batch_start_offset,
 		       u32 batch_len)
 {
-	int needs_clflush = 0;
+	unsigned int needs_clflush;
 	void *src_base, *src;
 	void *dst = NULL;
 	int ret;
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 6bb39301999e..4c5364e85023 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3102,7 +3102,11 @@ void i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv);
 void i915_gem_release_mmap(struct drm_i915_gem_object *obj);
 
 int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
-				    int *needs_clflush);
+				    unsigned int *needs_clflush);
+int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
+				     unsigned int *needs_clflush);
+#define CLFLUSH_BEFORE 0x1
+#define CLFLUSH_AFTER 0x2
 
 int __must_check i915_gem_object_get_pages(struct drm_i915_gem_object *obj);
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index a8d0f70c22f9..370f1e053721 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -609,35 +609,95 @@ __copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset,
  * flush the object from the CPU cache.
  */
 int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
-				    int *needs_clflush)
+				    unsigned int *needs_clflush)
 {
 	int ret;
 
 	*needs_clflush = 0;
 
-	if (WARN_ON(!i915_gem_object_has_struct_page(obj)))
-		return -EINVAL;
+	if (!i915_gem_object_has_struct_page(obj))
+		return -ENODEV;
 
 	ret = i915_gem_object_wait_rendering(obj, true);
 	if (ret)
 		return ret;
 
-	if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) {
-		/* If we're not in the cpu read domain, set ourself into the gtt
-		 * read domain and manually flush cachelines (if required). This
-		 * optimizes for the case when the gpu will dirty the data
-		 * anyway again before the next pread happens. */
+	/* If we're not in the cpu read domain, set ourself into the gtt
+	 * read domain and manually flush cachelines (if required). This
+	 * optimizes for the case when the gpu will dirty the data
+	 * anyway again before the next pread happens.
+	 */
+	if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU))
 		*needs_clflush = !cpu_cache_is_coherent(obj->base.dev,
 							obj->cache_level);
+
+	ret = i915_gem_object_get_pages(obj);
+	if (ret)
+		return ret;
+
+	i915_gem_object_pin_pages(obj);
+
+	if (*needs_clflush && !static_cpu_has(X86_FEATURE_CLFLUSH)) {
+		ret = i915_gem_object_set_to_cpu_domain(obj, false);
+		if (ret) {
+			i915_gem_object_unpin_pages(obj);
+			return ret;
+		}
+		*needs_clflush = 0;
 	}
 
+	return 0;
+}
+
+int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
+				     unsigned int *needs_clflush)
+{
+	int ret;
+
+	*needs_clflush = 0;
+	if (!i915_gem_object_has_struct_page(obj))
+		return -ENODEV;
+
+	ret = i915_gem_object_wait_rendering(obj, false);
+	if (ret)
+		return ret;
+
+	/* If we're not in the cpu write domain, set ourself into the
+	 * gtt write domain and manually flush cachelines (as required).
+	 * This optimizes for the case when the gpu will use the data
+	 * right away and we therefore have to clflush anyway.
+	 */
+	if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
+		*needs_clflush |= cpu_write_needs_clflush(obj) << 1;
+
+	/* Same trick applies to invalidate partially written cachelines read
+	 * before writing.
+	 */
+	if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU))
+		*needs_clflush |= !cpu_cache_is_coherent(obj->base.dev,
+							 obj->cache_level);
+
 	ret = i915_gem_object_get_pages(obj);
 	if (ret)
 		return ret;
 
 	i915_gem_object_pin_pages(obj);
 
-	return ret;
+	if (*needs_clflush && !static_cpu_has(X86_FEATURE_CLFLUSH)) {
+		ret = i915_gem_object_set_to_cpu_domain(obj, true);
+		if (ret) {
+			i915_gem_object_unpin_pages(obj);
+			return ret;
+		}
+		*needs_clflush = 0;
+	}
+
+	if ((*needs_clflush & CLFLUSH_AFTER) == 0)
+		obj->cache_dirty = true;
+
+	intel_fb_obj_invalidate(obj, ORIGIN_CPU);
+	obj->dirty = 1;
+	return 0;
 }
 
 /* Per-page copy function for the shmem pread fastpath.
@@ -869,19 +929,14 @@ i915_gem_shmem_pread(struct drm_device *dev,
 	int needs_clflush = 0;
 	struct sg_page_iter sg_iter;
 
-	if (!i915_gem_object_has_struct_page(obj))
-		return -ENODEV;
-
-	user_data = u64_to_user_ptr(args->data_ptr);
-	remain = args->size;
-
-	obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
-
 	ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush);
 	if (ret)
 		return ret;
 
+	obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
+	user_data = u64_to_user_ptr(args->data_ptr);
 	offset = args->offset;
+	remain = args->size;
 
 	for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents,
 			 offset >> PAGE_SHIFT) {
@@ -1242,42 +1297,17 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
 	int shmem_page_offset, page_length, ret = 0;
 	int obj_do_bit17_swizzling, page_do_bit17_swizzling;
 	int hit_slowpath = 0;
-	int needs_clflush_after = 0;
-	int needs_clflush_before = 0;
+	unsigned int needs_clflush;
 	struct sg_page_iter sg_iter;
 
-	user_data = u64_to_user_ptr(args->data_ptr);
-	remain = args->size;
-
-	obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
-
-	ret = i915_gem_object_wait_rendering(obj, false);
+	ret = i915_gem_obj_prepare_shmem_write(obj, &needs_clflush);
 	if (ret)
 		return ret;
 
-	if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
-		/* If we're not in the cpu write domain, set ourself into the gtt
-		 * write domain and manually flush cachelines (if required). This
-		 * optimizes for the case when the gpu will use the data
-		 * right away and we therefore have to clflush anyway. */
-		needs_clflush_after = cpu_write_needs_clflush(obj);
-	}
-	/* Same trick applies to invalidate partially written cachelines read
-	 * before writing. */
-	if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0)
-		needs_clflush_before =
-			!cpu_cache_is_coherent(dev, obj->cache_level);
-
-	ret = i915_gem_object_get_pages(obj);
-	if (ret)
-		return ret;
-
-	intel_fb_obj_invalidate(obj, ORIGIN_CPU);
-
-	i915_gem_object_pin_pages(obj);
-
+	obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
+	user_data = u64_to_user_ptr(args->data_ptr);
 	offset = args->offset;
-	obj->dirty = 1;
+	remain = args->size;
 
 	for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents,
 			 offset >> PAGE_SHIFT) {
@@ -1301,7 +1331,7 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
 		/* If we don't overwrite a cacheline completely we need to be
 		 * careful to have up-to-date data by first clflushing. Don't
 		 * overcomplicate things and flush the entire patch. */
-		partial_cacheline_write = needs_clflush_before &&
+		partial_cacheline_write = needs_clflush & CLFLUSH_BEFORE &&
 			((shmem_page_offset | page_length)
 				& (boot_cpu_data.x86_clflush_size - 1));
 
@@ -1311,7 +1341,7 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
 		ret = shmem_pwrite_fast(page, shmem_page_offset, page_length,
 					user_data, page_do_bit17_swizzling,
 					partial_cacheline_write,
-					needs_clflush_after);
+					needs_clflush & CLFLUSH_AFTER);
 		if (ret == 0)
 			goto next_page;
 
@@ -1320,7 +1350,7 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
 		ret = shmem_pwrite_slow(page, shmem_page_offset, page_length,
 					user_data, page_do_bit17_swizzling,
 					partial_cacheline_write,
-					needs_clflush_after);
+					needs_clflush & CLFLUSH_AFTER);
 
 		mutex_lock(&dev->struct_mutex);
 
@@ -1342,17 +1372,15 @@ out:
 		 * cachelines in-line while writing and the object moved
 		 * out of the cpu write domain while we've dropped the lock.
 		 */
-		if (!needs_clflush_after &&
+		if (!(needs_clflush & CLFLUSH_AFTER) &&
 		    obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
 			if (i915_gem_clflush_object(obj, obj->pin_display))
-				needs_clflush_after = true;
+				needs_clflush |= CLFLUSH_AFTER;
 		}
 	}
 
-	if (needs_clflush_after)
+	if (needs_clflush & CLFLUSH_AFTER)
 		i915_gem_chipset_flush(to_i915(dev));
-	else
-		obj->cache_dirty = true;
 
 	intel_fb_obj_flush(obj, false, ORIGIN_CPU);
 	return ret;
@@ -1431,10 +1459,8 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
 	if (ret == -EFAULT || ret == -ENOSPC) {
 		if (obj->phys_handle)
 			ret = i915_gem_phys_pwrite(obj, args, file);
-		else if (i915_gem_object_has_struct_page(obj))
-			ret = i915_gem_shmem_pwrite(dev, obj, args, file);
 		else
-			ret = -ENODEV;
+			ret = i915_gem_shmem_pwrite(dev, obj, args, file);
 	}
 
 	i915_gem_object_put(obj);
-- 
2.8.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 03/22] drm/i915: Before accessing an object via the cpu, flush GTT writes
  2016-08-16 10:42 An almost complete set of reviewed patches for tiled partial Chris Wilson
  2016-08-16 10:42 ` [PATCH 01/22] drm/i915: Cache kmap between relocations Chris Wilson
  2016-08-16 10:42 ` [PATCH 02/22] drm/i915: Extract i915_gem_obj_prepare_shmem_write() Chris Wilson
@ 2016-08-16 10:42 ` Chris Wilson
  2016-08-16 10:42 ` [PATCH 04/22] drm/i915: Wait for writes through the GTT to land before reading back Chris Wilson
                   ` (19 subsequent siblings)
  22 siblings, 0 replies; 40+ messages in thread
From: Chris Wilson @ 2016-08-16 10:42 UTC (permalink / raw)
  To: intel-gfx

If we want to read the pages directly via the CPU, we have to be sure
that we have to flush the writes via the GTT (as the CPU can not see
the address aliasing).

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_gem.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 370f1e053721..e8c5f9748852 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -622,6 +622,8 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
 	if (ret)
 		return ret;
 
+	i915_gem_object_flush_gtt_write_domain(obj);
+
 	/* If we're not in the cpu read domain, set ourself into the gtt
 	 * read domain and manually flush cachelines (if required). This
 	 * optimizes for the case when the gpu will dirty the data
@@ -662,6 +664,8 @@ int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
 	if (ret)
 		return ret;
 
+	i915_gem_object_flush_gtt_write_domain(obj);
+
 	/* If we're not in the cpu write domain, set ourself into the
 	 * gtt write domain and manually flush cachelines (as required).
 	 * This optimizes for the case when the gpu will use the data
-- 
2.8.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 04/22] drm/i915: Wait for writes through the GTT to land before reading back
  2016-08-16 10:42 An almost complete set of reviewed patches for tiled partial Chris Wilson
                   ` (2 preceding siblings ...)
  2016-08-16 10:42 ` [PATCH 03/22] drm/i915: Before accessing an object via the cpu, flush GTT writes Chris Wilson
@ 2016-08-16 10:42 ` Chris Wilson
  2016-08-16 10:42 ` [PATCH 05/22] drm/i915: Pin the pages first in shmem prepare read/write Chris Wilson
                   ` (18 subsequent siblings)
  22 siblings, 0 replies; 40+ messages in thread
From: Chris Wilson @ 2016-08-16 10:42 UTC (permalink / raw)
  To: intel-gfx

If we quickly switch from writing through the GTT to a read of the
physical page directly with the CPU (e.g. performing relocations through
the GTT and then running the command parser), we can observe that the
writes are not visible to the CPU. It is not a coherency problem, as
extensive investigations with clflush have demonstrated, but a mere
timing issue - we have to wait for the GTT to complete it's write before
we start our read from the CPU.

The issue can be illustrated in userspace with:

	gtt = gem_mmap__gtt(fd, handle, 0, OBJECT_SIZE, PROT_READ | PROT_WRITE);
	cpu = gem_mmap__cpu(fd, handle, 0, OBJECT_SIZE, PROT_READ | PROT_WRITE);
	gem_set_domain(fd, handle, I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);

	for (i = 0; i < OBJECT_SIZE / 64; i++) {
		int x = 16*i + (i%16);
		gtt[x] = i;
		clflush(&cpu[x], sizeof(cpu[x]));
		assert(cpu[x] == i);
	}

Experimenting with that shows that this behaviour is indeed limited to
recent Atom-class hardware.

Testcase: igt/gem_exec_flush/basic-batch-default-cmd #byt
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_gem.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index e8c5f9748852..a33c7cb0549a 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3164,20 +3164,30 @@ i915_gem_clflush_object(struct drm_i915_gem_object *obj,
 static void
 i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj)
 {
+	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
 	uint32_t old_write_domain;
 
 	if (obj->base.write_domain != I915_GEM_DOMAIN_GTT)
 		return;
 
 	/* No actual flushing is required for the GTT write domain.  Writes
-	 * to it immediately go to main memory as far as we know, so there's
+	 * to it "immediately" go to main memory as far as we know, so there's
 	 * no chipset flush.  It also doesn't land in render cache.
 	 *
 	 * However, we do have to enforce the order so that all writes through
 	 * the GTT land before any writes to the device, such as updates to
 	 * the GATT itself.
+	 *
+	 * We also have to wait a bit for the writes to land from the GTT.
+	 * An uncached read (i.e. mmio) seems to be ideal for the round-trip
+	 * timing. This issue has only been observed when switching quickly
+	 * between GTT writes and CPU reads from inside the kernel on recent hw,
+	 * and it appears to only affect discrete GTT blocks (i.e. on LLC
+	 * system agents we cannot reproduce this behaviour).
 	 */
 	wmb();
+	if (INTEL_GEN(dev_priv) >= 6 && !HAS_LLC(dev_priv))
+		POSTING_READ(RING_ACTHD(dev_priv->engine[RCS].mmio_base));
 
 	old_write_domain = obj->base.write_domain;
 	obj->base.write_domain = 0;
-- 
2.8.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 05/22] drm/i915: Pin the pages first in shmem prepare read/write
  2016-08-16 10:42 An almost complete set of reviewed patches for tiled partial Chris Wilson
                   ` (3 preceding siblings ...)
  2016-08-16 10:42 ` [PATCH 04/22] drm/i915: Wait for writes through the GTT to land before reading back Chris Wilson
@ 2016-08-16 10:42 ` Chris Wilson
  2016-08-16 11:57   ` Joonas Lahtinen
  2016-08-16 10:42 ` [PATCH 06/22] drm/i915: Tidy up flush cpu/gtt write domains Chris Wilson
                   ` (17 subsequent siblings)
  22 siblings, 1 reply; 40+ messages in thread
From: Chris Wilson @ 2016-08-16 10:42 UTC (permalink / raw)
  To: intel-gfx

There is an improbable, but not impossible, case that if we leave the
pages unpin as we operate on the object, then somebody via the shrinker
may steal the lock (which lock? right now, it is struct_mutex, THE lock)
and change the cache domains after we have already inspected them.

(Whilst here, avail ourselves of the opportunity to take a couple of
steps to make the two functions look more similar.)

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c | 48 ++++++++++++++++++++++++-----------------
 1 file changed, 28 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index a33c7cb0549a..d91ecf64ed96 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -622,6 +622,12 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
 	if (ret)
 		return ret;
 
+	ret = i915_gem_object_get_pages(obj);
+	if (ret)
+		return ret;
+
+	i915_gem_object_pin_pages(obj);
+
 	i915_gem_object_flush_gtt_write_domain(obj);
 
 	/* If we're not in the cpu read domain, set ourself into the gtt
@@ -633,22 +639,20 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
 		*needs_clflush = !cpu_cache_is_coherent(obj->base.dev,
 							obj->cache_level);
 
-	ret = i915_gem_object_get_pages(obj);
-	if (ret)
-		return ret;
-
-	i915_gem_object_pin_pages(obj);
-
 	if (*needs_clflush && !static_cpu_has(X86_FEATURE_CLFLUSH)) {
 		ret = i915_gem_object_set_to_cpu_domain(obj, false);
-		if (ret) {
-			i915_gem_object_unpin_pages(obj);
-			return ret;
-		}
+		if (ret)
+			goto err_unpin;
+
 		*needs_clflush = 0;
 	}
 
+	/* return with the pages pinned */
 	return 0;
+
+err_unpin:
+	i915_gem_object_unpin_pages(obj);
+	return ret;
 }
 
 int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
@@ -664,6 +668,12 @@ int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
 	if (ret)
 		return ret;
 
+	ret = i915_gem_object_get_pages(obj);
+	if (ret)
+		return ret;
+
+	i915_gem_object_pin_pages(obj);
+
 	i915_gem_object_flush_gtt_write_domain(obj);
 
 	/* If we're not in the cpu write domain, set ourself into the
@@ -681,18 +691,11 @@ int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
 		*needs_clflush |= !cpu_cache_is_coherent(obj->base.dev,
 							 obj->cache_level);
 
-	ret = i915_gem_object_get_pages(obj);
-	if (ret)
-		return ret;
-
-	i915_gem_object_pin_pages(obj);
-
 	if (*needs_clflush && !static_cpu_has(X86_FEATURE_CLFLUSH)) {
 		ret = i915_gem_object_set_to_cpu_domain(obj, true);
-		if (ret) {
-			i915_gem_object_unpin_pages(obj);
-			return ret;
-		}
+		if (ret)
+			goto err_unpin;
+
 		*needs_clflush = 0;
 	}
 
@@ -701,7 +704,12 @@ int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
 
 	intel_fb_obj_invalidate(obj, ORIGIN_CPU);
 	obj->dirty = 1;
+	/* return with the pages pinned */
 	return 0;
+
+err_unpin:
+	i915_gem_object_unpin_pages(obj);
+	return ret;
 }
 
 /* Per-page copy function for the shmem pread fastpath.
-- 
2.8.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 06/22] drm/i915: Tidy up flush cpu/gtt write domains
  2016-08-16 10:42 An almost complete set of reviewed patches for tiled partial Chris Wilson
                   ` (4 preceding siblings ...)
  2016-08-16 10:42 ` [PATCH 05/22] drm/i915: Pin the pages first in shmem prepare read/write Chris Wilson
@ 2016-08-16 10:42 ` Chris Wilson
  2016-08-16 10:42 ` [PATCH 07/22] drm/i915: Refactor execbuffer relocation writing Chris Wilson
                   ` (16 subsequent siblings)
  22 siblings, 0 replies; 40+ messages in thread
From: Chris Wilson @ 2016-08-16 10:42 UTC (permalink / raw)
  To: intel-gfx

Since we know the write domain, we can drop the local variable and make
the code look a tiny bit simpler.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_gem.c | 15 ++++-----------
 1 file changed, 4 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index d91ecf64ed96..2b7d9444f621 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3173,7 +3173,6 @@ static void
 i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj)
 {
 	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
-	uint32_t old_write_domain;
 
 	if (obj->base.write_domain != I915_GEM_DOMAIN_GTT)
 		return;
@@ -3197,36 +3196,30 @@ i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj)
 	if (INTEL_GEN(dev_priv) >= 6 && !HAS_LLC(dev_priv))
 		POSTING_READ(RING_ACTHD(dev_priv->engine[RCS].mmio_base));
 
-	old_write_domain = obj->base.write_domain;
-	obj->base.write_domain = 0;
-
 	intel_fb_obj_flush(obj, false, ORIGIN_GTT);
 
+	obj->base.write_domain = 0;
 	trace_i915_gem_object_change_domain(obj,
 					    obj->base.read_domains,
-					    old_write_domain);
+					    I915_GEM_DOMAIN_GTT);
 }
 
 /** Flushes the CPU write domain for the object if it's dirty. */
 static void
 i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj)
 {
-	uint32_t old_write_domain;
-
 	if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
 		return;
 
 	if (i915_gem_clflush_object(obj, obj->pin_display))
 		i915_gem_chipset_flush(to_i915(obj->base.dev));
 
-	old_write_domain = obj->base.write_domain;
-	obj->base.write_domain = 0;
-
 	intel_fb_obj_flush(obj, false, ORIGIN_CPU);
 
+	obj->base.write_domain = 0;
 	trace_i915_gem_object_change_domain(obj,
 					    obj->base.read_domains,
-					    old_write_domain);
+					    I915_GEM_DOMAIN_CPU);
 }
 
 /**
-- 
2.8.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 07/22] drm/i915: Refactor execbuffer relocation writing
  2016-08-16 10:42 An almost complete set of reviewed patches for tiled partial Chris Wilson
                   ` (5 preceding siblings ...)
  2016-08-16 10:42 ` [PATCH 06/22] drm/i915: Tidy up flush cpu/gtt write domains Chris Wilson
@ 2016-08-16 10:42 ` Chris Wilson
  2016-08-17  8:47   ` Joonas Lahtinen
  2016-08-16 10:42 ` [PATCH 08/22] drm/i915: Fallback to single page GTT mmappings for relocations Chris Wilson
                   ` (15 subsequent siblings)
  22 siblings, 1 reply; 40+ messages in thread
From: Chris Wilson @ 2016-08-16 10:42 UTC (permalink / raw)
  To: intel-gfx

With the introduction of the reloc page cache, we are just one step away
from refactoring the relocation write functions into one. Not only does
it tidy the code (slightly), but it greatly simplifies the control logic
much to gcc's satisfaction.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 292 +++++++++++++++--------------
 1 file changed, 151 insertions(+), 141 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 5b718d8a2b1c..8dae6333b60f 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -39,6 +39,8 @@
 #include "intel_drv.h"
 #include "intel_frontbuffer.h"
 
+#define DBG_USE_CPU_RELOC 0 /* force relocations to use the CPU write path */
+
 #define  __EXEC_OBJECT_HAS_PIN		(1<<31)
 #define  __EXEC_OBJECT_HAS_FENCE	(1<<30)
 #define  __EXEC_OBJECT_NEEDS_MAP	(1<<29)
@@ -59,6 +61,7 @@ struct i915_execbuffer_params {
 };
 
 struct eb_vmas {
+	struct drm_i915_private *i915;
 	struct list_head vmas;
 	int and;
 	union {
@@ -68,7 +71,8 @@ struct eb_vmas {
 };
 
 static struct eb_vmas *
-eb_create(struct drm_i915_gem_execbuffer2 *args)
+eb_create(struct drm_i915_private *i915,
+	  struct drm_i915_gem_execbuffer2 *args)
 {
 	struct eb_vmas *eb = NULL;
 
@@ -95,6 +99,7 @@ eb_create(struct drm_i915_gem_execbuffer2 *args)
 	} else
 		eb->and = -args->buffer_count;
 
+	eb->i915 = i915;
 	INIT_LIST_HEAD(&eb->vmas);
 	return eb;
 }
@@ -278,6 +283,9 @@ static void eb_destroy(struct eb_vmas *eb)
 
 static inline int use_cpu_reloc(struct drm_i915_gem_object *obj)
 {
+	if (DBG_USE_CPU_RELOC)
+		return DBG_USE_CPU_RELOC > 0;
+
 	return (HAS_LLC(obj->base.dev) ||
 		obj->base.write_domain == I915_GEM_DOMAIN_CPU ||
 		obj->cache_level != I915_CACHE_NONE);
@@ -302,37 +310,58 @@ static inline uint64_t gen8_noncanonical_addr(uint64_t address)
 }
 
 static inline uint64_t
-relocation_target(struct drm_i915_gem_relocation_entry *reloc,
+relocation_target(const struct drm_i915_gem_relocation_entry *reloc,
 		  uint64_t target_offset)
 {
 	return gen8_canonical_addr((int)reloc->delta + target_offset);
 }
 
 struct reloc_cache {
-	void *vaddr;
+	struct drm_i915_private *i915;
+	struct drm_mm_node node;
+	unsigned long vaddr;
 	unsigned int page;
-	enum { KMAP, IOMAP } type;
+	bool use_64bit_reloc;
 };
 
-static void reloc_cache_init(struct reloc_cache *cache)
+static void reloc_cache_init(struct reloc_cache *cache,
+			     struct drm_i915_private *i915)
 {
 	cache->page = -1;
-	cache->vaddr = NULL;
+	cache->vaddr = 0;
+	cache->i915 = i915;
+	cache->use_64bit_reloc = INTEL_GEN(cache->i915) >= 8;
+}
+
+static inline void *unmask_page(unsigned long p)
+{
+	return (void *)(uintptr_t)(p & PAGE_MASK);
+}
+
+static inline unsigned int unmask_flags(unsigned long p)
+{
+	return p & ~PAGE_MASK;
 }
 
+#define KMAP 0x4
+
 static void reloc_cache_fini(struct reloc_cache *cache)
 {
+	void *vaddr;
+
 	if (!cache->vaddr)
 		return;
 
-	switch (cache->type) {
-	case KMAP:
-		kunmap_atomic(cache->vaddr);
-		break;
+	vaddr = unmask_page(cache->vaddr);
+	if (cache->vaddr & KMAP) {
+		if (cache->vaddr & CLFLUSH_AFTER)
+			mb();
 
-	case IOMAP:
-		io_mapping_unmap_atomic(cache->vaddr);
-		break;
+		kunmap_atomic(vaddr);
+		i915_gem_object_unpin_pages((struct drm_i915_gem_object *)cache->node.mm);
+	} else {
+		io_mapping_unmap_atomic(vaddr);
+		i915_vma_unpin((struct i915_vma *)cache->node.mm);
 	}
 }
 
@@ -340,147 +369,139 @@ static void *reloc_kmap(struct drm_i915_gem_object *obj,
 			struct reloc_cache *cache,
 			int page)
 {
-	if (cache->page == page)
-		return cache->vaddr;
+	void *vaddr;
+
+	if (cache->vaddr) {
+		kunmap_atomic(unmask_page(cache->vaddr));
+	} else {
+		unsigned int flushes;
+		int ret;
 
-	if (cache->vaddr)
-		kunmap_atomic(cache->vaddr);
+		ret = i915_gem_obj_prepare_shmem_write(obj, &flushes);
+		if (ret)
+			return ERR_PTR(ret);
+
+		cache->vaddr = flushes | KMAP;
+		cache->node.mm = (void *)obj;
+		if (flushes)
+			mb();
+	}
 
+	vaddr = kmap_atomic(i915_gem_object_get_dirty_page(obj, page));
+	cache->vaddr = unmask_flags(cache->vaddr) | (unsigned long)vaddr;
 	cache->page = page;
-	cache->vaddr = kmap_atomic(i915_gem_object_get_dirty_page(obj, page));
-	cache->type = KMAP;
 
-	return cache->vaddr;
+	return vaddr;
 }
 
-static int
-relocate_entry_cpu(struct drm_i915_gem_object *obj,
-		   struct drm_i915_gem_relocation_entry *reloc,
-		   struct reloc_cache *cache,
-		   uint64_t target_offset)
+static void *reloc_iomap(struct drm_i915_gem_object *obj,
+			 struct reloc_cache *cache,
+			 int page)
 {
-	struct drm_device *dev = obj->base.dev;
-	uint32_t page_offset = offset_in_page(reloc->offset);
-	uint64_t delta = relocation_target(reloc, target_offset);
-	char *vaddr;
-	int ret;
+	void *vaddr;
 
-	ret = i915_gem_object_set_to_cpu_domain(obj, true);
-	if (ret)
-		return ret;
+	if (cache->vaddr) {
+		io_mapping_unmap_atomic(unmask_page(cache->vaddr));
+	} else {
+		struct i915_vma *vma;
+		int ret;
 
-	vaddr = reloc_kmap(obj, cache, reloc->offset >> PAGE_SHIFT);
-	*(uint32_t *)(vaddr + page_offset) = lower_32_bits(delta);
+		if (use_cpu_reloc(obj))
+			return NULL;
 
-	if (INTEL_GEN(dev) >= 8) {
-		page_offset += sizeof(uint32_t);
-		if (page_offset == PAGE_SIZE) {
-			vaddr = reloc_kmap(obj, cache, cache->page + 1);
-			page_offset = 0;
-		}
-		*(uint32_t *)(vaddr + page_offset) = upper_32_bits(delta);
-	}
+		ret = i915_gem_object_set_to_gtt_domain(obj, true);
+		if (ret)
+			return ERR_PTR(ret);
 
-	return 0;
-}
+		vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
+					       PIN_MAPPABLE | PIN_NONBLOCK);
+		if (IS_ERR(vma))
+			return NULL;
 
-static void *reloc_iomap(struct drm_i915_private *i915,
-			 struct reloc_cache *cache,
-			 uint64_t offset)
-{
-	if (cache->page == offset >> PAGE_SHIFT)
-		return cache->vaddr;
+		ret = i915_gem_object_put_fence(obj);
+		if (ret) {
+			i915_vma_unpin(vma);
+			return ERR_PTR(ret);
+		}
 
-	if (cache->vaddr)
-		io_mapping_unmap_atomic(cache->vaddr);
+		cache->node.start = vma->node.start;
+		cache->node.mm = (void *)vma;
+	}
 
-	cache->page = offset >> PAGE_SHIFT;
-	cache->vaddr =
-		io_mapping_map_atomic_wc(i915->ggtt.mappable,
-					 offset & PAGE_MASK);
-	cache->type = IOMAP;
+	vaddr = io_mapping_map_atomic_wc(cache->i915->ggtt.mappable,
+					 cache->node.start + (page << PAGE_SHIFT));
+	cache->page = page;
+	cache->vaddr = (unsigned long)vaddr;
 
-	return cache->vaddr;
+	return vaddr;
 }
 
-static int
-relocate_entry_gtt(struct drm_i915_gem_object *obj,
-		   struct drm_i915_gem_relocation_entry *reloc,
-		   struct reloc_cache *cache,
-		   uint64_t target_offset)
+static void *reloc_vaddr(struct drm_i915_gem_object *obj,
+			 struct reloc_cache *cache,
+			 int page)
 {
-	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
-	struct i915_vma *vma;
-	uint64_t delta = relocation_target(reloc, target_offset);
-	uint64_t offset;
-	void __iomem *reloc_page;
-	int ret;
-
-	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE);
-	if (IS_ERR(vma))
-		return PTR_ERR(vma);
-
-	ret = i915_gem_object_set_to_gtt_domain(obj, true);
-	if (ret)
-		goto unpin;
-
-	ret = i915_gem_object_put_fence(obj);
-	if (ret)
-		goto unpin;
-
-	/* Map the page containing the relocation we're going to perform.  */
-	offset = vma->node.start + reloc->offset;
-	reloc_page = reloc_iomap(dev_priv, cache, offset);
-	iowrite32(lower_32_bits(delta), reloc_page + offset_in_page(offset));
+	void *vaddr;
 
-	if (INTEL_GEN(dev_priv) >= 8) {
-		offset += sizeof(uint32_t);
-		if (offset_in_page(offset) == 0)
-			reloc_page = reloc_iomap(dev_priv, cache, offset);
-		iowrite32(upper_32_bits(delta),
-			  reloc_page + offset_in_page(offset));
+	if (cache->page == page) {
+		vaddr = unmask_page(cache->vaddr);
+	} else {
+		vaddr = NULL;
+		if ((cache->vaddr & KMAP) == 0)
+			vaddr = reloc_iomap(obj, cache, page);
+		if (!vaddr)
+			vaddr = reloc_kmap(obj, cache, page);
 	}
 
-unpin:
-	__i915_vma_unpin(vma);
-	return ret;
+	return vaddr;
 }
 
-static void
-clflush_write32(void *addr, uint32_t value)
+static void clflush_write32(u32 *addr, u32 value, unsigned int flushes)
 {
-	/* This is not a fast path, so KISS. */
-	drm_clflush_virt_range(addr, sizeof(uint32_t));
-	*(uint32_t *)addr = value;
-	drm_clflush_virt_range(addr, sizeof(uint32_t));
+	if (unlikely(flushes & (CLFLUSH_BEFORE | CLFLUSH_AFTER))) {
+		if (flushes & CLFLUSH_BEFORE) {
+			clflushopt(addr);
+			mb();
+		}
+
+		*addr = value;
+
+		/* Writes to the same cacheline are serialised by the CPU
+		 * (including clflush). On the write path, we only require
+		 * that it hits memory in an orderly fashion and place
+		 * mb barriers at the start and end of the relocation phase
+		 * to ensure ordering of clflush wrt to the system.
+		 */
+		if (flushes & CLFLUSH_AFTER)
+			clflushopt(addr);
+	} else
+		*addr = value;
 }
 
 static int
-relocate_entry_clflush(struct drm_i915_gem_object *obj,
-		       struct drm_i915_gem_relocation_entry *reloc,
-		       struct reloc_cache *cache,
-		       uint64_t target_offset)
+relocate_entry(struct drm_i915_gem_object *obj,
+	       const struct drm_i915_gem_relocation_entry *reloc,
+	       struct reloc_cache *cache,
+	       u64 target_offset)
 {
-	struct drm_device *dev = obj->base.dev;
-	uint32_t page_offset = offset_in_page(reloc->offset);
-	uint64_t delta = relocation_target(reloc, target_offset);
-	char *vaddr;
-	int ret;
+	u64 offset = reloc->offset;
+	bool wide = cache->use_64bit_reloc;
+	void *vaddr;
 
-	ret = i915_gem_object_set_to_gtt_domain(obj, true);
-	if (ret)
-		return ret;
+	target_offset = relocation_target(reloc, target_offset);
+repeat:
+	vaddr = reloc_vaddr(obj, cache, offset >> PAGE_SHIFT);
+	if (IS_ERR(vaddr))
+		return PTR_ERR(vaddr);
 
-	vaddr = reloc_kmap(obj, cache, reloc->offset >> PAGE_SHIFT);
-	clflush_write32(vaddr + page_offset, lower_32_bits(delta));
+	clflush_write32(vaddr + offset_in_page(offset),
+			lower_32_bits(target_offset),
+			cache->vaddr);
 
-	if (INTEL_GEN(dev) >= 8) {
-		page_offset += sizeof(uint32_t);
-		if (page_offset == PAGE_SIZE) {
-			vaddr = reloc_kmap(obj, cache, cache->page + 1);
-			page_offset = 0;
-		}
-		clflush_write32(vaddr + page_offset, upper_32_bits(delta));
+	if (wide) {
+		offset += sizeof(u32);
+		target_offset >>= 32;
+		wide = false;
+		goto repeat;
 	}
 
 	return 0;
@@ -567,7 +588,7 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
 
 	/* Check that the relocation address is valid... */
 	if (unlikely(reloc->offset >
-		obj->base.size - (INTEL_INFO(dev)->gen >= 8 ? 8 : 4))) {
+		     obj->base.size - (cache->use_64bit_reloc ? 8 : 4))) {
 		DRM_DEBUG("Relocation beyond object bounds: "
 			  "obj %p target %d offset %d size %d.\n",
 			  obj, reloc->target_handle,
@@ -587,23 +608,12 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
 	if (pagefault_disabled() && !object_is_idle(obj))
 		return -EFAULT;
 
-	if (use_cpu_reloc(obj))
-		ret = relocate_entry_cpu(obj, reloc, cache, target_offset);
-	else if (obj->map_and_fenceable)
-		ret = relocate_entry_gtt(obj, reloc, cache, target_offset);
-	else if (static_cpu_has(X86_FEATURE_CLFLUSH))
-		ret = relocate_entry_clflush(obj, reloc, cache, target_offset);
-	else {
-		WARN_ONCE(1, "Impossible case in relocation handling\n");
-		ret = -ENODEV;
-	}
-
+	ret = relocate_entry(obj, reloc, cache, target_offset);
 	if (ret)
 		return ret;
 
 	/* and update the user's relocation entry */
 	reloc->presumed_offset = target_offset;
-
 	return 0;
 }
 
@@ -619,7 +629,7 @@ i915_gem_execbuffer_relocate_vma(struct i915_vma *vma,
 	int remain, ret = 0;
 
 	user_relocs = u64_to_user_ptr(entry->relocs_ptr);
-	reloc_cache_init(&cache);
+	reloc_cache_init(&cache, eb->i915);
 
 	remain = entry->relocation_count;
 	while (remain) {
@@ -668,7 +678,7 @@ i915_gem_execbuffer_relocate_vma_slow(struct i915_vma *vma,
 	struct reloc_cache cache;
 	int i, ret = 0;
 
-	reloc_cache_init(&cache);
+	reloc_cache_init(&cache, eb->i915);
 	for (i = 0; i < entry->relocation_count; i++) {
 		ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, &relocs[i], &cache);
 		if (ret)
@@ -1094,8 +1104,8 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req,
 	if (flush_chipset)
 		i915_gem_chipset_flush(req->engine->i915);
 
-	if (flush_domains & I915_GEM_DOMAIN_GTT)
-		wmb();
+	/* Make sure (untracked) CPU relocs/parsing are flushed */
+	wmb();
 
 	/* Unconditionally invalidate GPU caches and TLBs. */
 	return req->engine->emit_flush(req, EMIT_INVALIDATE);
@@ -1654,7 +1664,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 
 	memset(&params_master, 0x00, sizeof(params_master));
 
-	eb = eb_create(args);
+	eb = eb_create(dev_priv, args);
 	if (eb == NULL) {
 		i915_gem_context_put(ctx);
 		mutex_unlock(&dev->struct_mutex);
-- 
2.8.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 08/22] drm/i915: Fallback to single page GTT mmappings for relocations
  2016-08-16 10:42 An almost complete set of reviewed patches for tiled partial Chris Wilson
                   ` (6 preceding siblings ...)
  2016-08-16 10:42 ` [PATCH 07/22] drm/i915: Refactor execbuffer relocation writing Chris Wilson
@ 2016-08-16 10:42 ` Chris Wilson
  2016-08-17  7:19   ` Joonas Lahtinen
  2016-08-16 10:42 ` [PATCH 09/22] drm/i915: Disallow direct CPU access to stolen pages " Chris Wilson
                   ` (14 subsequent siblings)
  22 siblings, 1 reply; 40+ messages in thread
From: Chris Wilson @ 2016-08-16 10:42 UTC (permalink / raw)
  To: intel-gfx

If we cannot pin the entire object into the mappable region of the GTT,
try to pin a single page instead. This is much more likely to succeed,
and prevents us falling back to the clflush slow path.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 61 ++++++++++++++++++++++++------
 1 file changed, 50 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 8dae6333b60f..14963c9414ea 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -331,6 +331,7 @@ static void reloc_cache_init(struct reloc_cache *cache,
 	cache->vaddr = 0;
 	cache->i915 = i915;
 	cache->use_64bit_reloc = INTEL_GEN(cache->i915) >= 8;
+	cache->node.allocated = false;
 }
 
 static inline void *unmask_page(unsigned long p)
@@ -360,8 +361,18 @@ static void reloc_cache_fini(struct reloc_cache *cache)
 		kunmap_atomic(vaddr);
 		i915_gem_object_unpin_pages((struct drm_i915_gem_object *)cache->node.mm);
 	} else {
+		wmb();
 		io_mapping_unmap_atomic(vaddr);
-		i915_vma_unpin((struct i915_vma *)cache->node.mm);
+		if (cache->node.allocated) {
+			struct i915_ggtt *ggtt = &cache->i915->ggtt;
+
+			ggtt->base.clear_range(&ggtt->base,
+					       cache->node.start,
+					       cache->node.size,
+					       true);
+			drm_mm_remove_node(&cache->node);
+		} else
+			i915_vma_unpin((struct i915_vma *)cache->node.mm);
 	}
 }
 
@@ -398,8 +409,19 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj,
 			 struct reloc_cache *cache,
 			 int page)
 {
+	struct i915_ggtt *ggtt = &cache->i915->ggtt;
+	unsigned long offset;
 	void *vaddr;
 
+	if (cache->node.allocated) {
+		wmb();
+		ggtt->base.insert_page(&ggtt->base,
+				       i915_gem_object_get_dma_address(obj, page),
+				       cache->node.start, I915_CACHE_NONE, 0);
+		cache->page = page;
+		return unmask_page(cache->vaddr);
+	}
+
 	if (cache->vaddr) {
 		io_mapping_unmap_atomic(unmask_page(cache->vaddr));
 	} else {
@@ -415,21 +437,38 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj,
 
 		vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
 					       PIN_MAPPABLE | PIN_NONBLOCK);
-		if (IS_ERR(vma))
-			return NULL;
+		if (IS_ERR(vma)) {
+			memset(&cache->node, 0, sizeof(cache->node));
+			ret = drm_mm_insert_node_in_range_generic
+				(&ggtt->base.mm, &cache->node,
+				 4096, 0, 0,
+				 0, ggtt->mappable_end,
+				 DRM_MM_SEARCH_DEFAULT,
+				 DRM_MM_CREATE_DEFAULT);
+			if (ret)
+				return ERR_PTR(ret);
+		} else {
+			ret = i915_gem_object_put_fence(obj);
+			if (ret) {
+				i915_vma_unpin(vma);
+				return ERR_PTR(ret);
+			}
 
-		ret = i915_gem_object_put_fence(obj);
-		if (ret) {
-			i915_vma_unpin(vma);
-			return ERR_PTR(ret);
+			cache->node.start = vma->node.start;
+			cache->node.mm = (void *)vma;
 		}
+	}
 
-		cache->node.start = vma->node.start;
-		cache->node.mm = (void *)vma;
+	offset = cache->node.start;
+	if (cache->node.allocated) {
+		ggtt->base.insert_page(&ggtt->base,
+				       i915_gem_object_get_dma_address(obj, page),
+				       offset, I915_CACHE_NONE, 0);
+	} else {
+		offset += page << PAGE_SHIFT;
 	}
 
-	vaddr = io_mapping_map_atomic_wc(cache->i915->ggtt.mappable,
-					 cache->node.start + (page << PAGE_SHIFT));
+	vaddr = io_mapping_map_atomic_wc(cache->i915->ggtt.mappable, offset);
 	cache->page = page;
 	cache->vaddr = (unsigned long)vaddr;
 
-- 
2.8.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 09/22] drm/i915: Disallow direct CPU access to stolen pages for relocations
  2016-08-16 10:42 An almost complete set of reviewed patches for tiled partial Chris Wilson
                   ` (7 preceding siblings ...)
  2016-08-16 10:42 ` [PATCH 08/22] drm/i915: Fallback to single page GTT mmappings for relocations Chris Wilson
@ 2016-08-16 10:42 ` Chris Wilson
  2016-08-16 11:49   ` Joonas Lahtinen
  2016-08-16 10:42 ` [PATCH 10/22] drm/i915: Move map-and-fenceable tracking to the VMA Chris Wilson
                   ` (13 subsequent siblings)
  22 siblings, 1 reply; 40+ messages in thread
From: Chris Wilson @ 2016-08-16 10:42 UTC (permalink / raw)
  To: intel-gfx

As we cannot access the backing pages behind stolen objects, we should
not attempt to do so for relocations.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 14963c9414ea..98f9945c5b1d 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -283,6 +283,9 @@ static void eb_destroy(struct eb_vmas *eb)
 
 static inline int use_cpu_reloc(struct drm_i915_gem_object *obj)
 {
+	if (!i915_gem_object_has_struct_page(obj))
+		return false;
+
 	if (DBG_USE_CPU_RELOC)
 		return DBG_USE_CPU_RELOC > 0;
 
-- 
2.8.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 10/22] drm/i915: Move map-and-fenceable tracking to the VMA
  2016-08-16 10:42 An almost complete set of reviewed patches for tiled partial Chris Wilson
                   ` (8 preceding siblings ...)
  2016-08-16 10:42 ` [PATCH 09/22] drm/i915: Disallow direct CPU access to stolen pages " Chris Wilson
@ 2016-08-16 10:42 ` Chris Wilson
  2016-08-16 10:42 ` [PATCH 11/22] drm/i915: Allow ringbuffers to be bound anywhere Chris Wilson
                   ` (12 subsequent siblings)
  22 siblings, 0 replies; 40+ messages in thread
From: Chris Wilson @ 2016-08-16 10:42 UTC (permalink / raw)
  To: intel-gfx

By moving map-and-fenceable tracking from the object to the VMA, we gain
fine-grained tracking and the ability to track individual fences on the VMA
(subsequent patch).

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h            |  6 -----
 drivers/gpu/drm/i915/i915_gem.c            | 37 ++++++++++++++----------------
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  4 ++--
 drivers/gpu/drm/i915/i915_gem_fence.c      |  7 +++---
 drivers/gpu/drm/i915/i915_gem_gtt.c        |  2 +-
 drivers/gpu/drm/i915/i915_gem_gtt.h        | 10 ++++++--
 drivers/gpu/drm/i915/i915_gem_tiling.c     |  4 ++--
 drivers/gpu/drm/i915/intel_display.c       |  6 ++---
 8 files changed, 36 insertions(+), 40 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 4c5364e85023..712a2b1295c3 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2196,12 +2196,6 @@ struct drm_i915_gem_object {
 	unsigned int fence_dirty:1;
 
 	/**
-	 * Is the object at the current location in the gtt mappable and
-	 * fenceable? Used to avoid costly recalculations.
-	 */
-	unsigned int map_and_fenceable:1;
-
-	/**
 	 * Whether the current gtt mapping needs to be mappable (and isn't just
 	 * mappable by accident). Track pin and fault separate for a more
 	 * accurate mappable working set.
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 2b7d9444f621..7f2c7dc72838 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2890,8 +2890,7 @@ int i915_vma_unbind(struct i915_vma *vma)
 	GEM_BUG_ON(obj->bind_count == 0);
 	GEM_BUG_ON(!obj->pages);
 
-	if (i915_vma_is_ggtt(vma) &&
-	    vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) {
+	if (i915_vma_is_map_and_fenceable(vma)) {
 		i915_gem_object_finish_gtt(obj);
 
 		/* release the fence reg _after_ flushing */
@@ -2900,6 +2899,7 @@ int i915_vma_unbind(struct i915_vma *vma)
 			return ret;
 
 		__i915_vma_iounmap(vma);
+		vma->flags &= ~I915_VMA_CAN_FENCE;
 	}
 
 	if (likely(!vma->vm->closed)) {
@@ -2911,13 +2911,10 @@ int i915_vma_unbind(struct i915_vma *vma)
 	drm_mm_remove_node(&vma->node);
 	list_move_tail(&vma->vm_link, &vma->vm->unbound_list);
 
-	if (i915_vma_is_ggtt(vma)) {
-		if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) {
-			obj->map_and_fenceable = false;
-		} else if (vma->pages) {
-			sg_free_table(vma->pages);
-			kfree(vma->pages);
-		}
+	if (vma->pages != obj->pages) {
+		GEM_BUG_ON(!vma->pages);
+		sg_free_table(vma->pages);
+		kfree(vma->pages);
 	}
 	vma->pages = NULL;
 
@@ -3694,8 +3691,6 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
 static bool
 i915_vma_misplaced(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 {
-	struct drm_i915_gem_object *obj = vma->obj;
-
 	if (!drm_mm_node_allocated(&vma->node))
 		return false;
 
@@ -3705,7 +3700,7 @@ i915_vma_misplaced(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 	if (alignment && vma->node.start & (alignment - 1))
 		return true;
 
-	if (flags & PIN_MAPPABLE && !obj->map_and_fenceable)
+	if (flags & PIN_MAPPABLE && !i915_vma_is_map_and_fenceable(vma))
 		return true;
 
 	if (flags & PIN_OFFSET_BIAS &&
@@ -3727,10 +3722,10 @@ void __i915_vma_set_map_and_fenceable(struct i915_vma *vma)
 	u32 fence_size, fence_alignment;
 
 	fence_size = i915_gem_get_ggtt_size(dev_priv,
-					    obj->base.size,
+					    vma->size,
 					    i915_gem_object_get_tiling(obj));
 	fence_alignment = i915_gem_get_ggtt_alignment(dev_priv,
-						      obj->base.size,
+						      vma->size,
 						      i915_gem_object_get_tiling(obj),
 						      true);
 
@@ -3740,7 +3735,10 @@ void __i915_vma_set_map_and_fenceable(struct i915_vma *vma)
 	mappable = (vma->node.start + fence_size <=
 		    dev_priv->ggtt.mappable_end);
 
-	obj->map_and_fenceable = mappable && fenceable;
+	if (mappable && fenceable)
+		vma->flags |= I915_VMA_CAN_FENCE;
+	else
+		vma->flags &= ~I915_VMA_CAN_FENCE;
 }
 
 int __i915_vma_do_pin(struct i915_vma *vma,
@@ -3800,12 +3798,11 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
 
 		WARN(i915_vma_is_pinned(vma),
 		     "bo is already pinned in ggtt with incorrect alignment:"
-		     " offset=%08x, req.alignment=%llx, req.map_and_fenceable=%d,"
-		     " obj->map_and_fenceable=%d\n",
-		     i915_ggtt_offset(vma),
-		     alignment,
+		     " offset=%08x, req.alignment=%llx,"
+		     " req.map_and_fenceable=%d, vma->map_and_fenceable=%d\n",
+		     i915_ggtt_offset(vma), alignment,
 		     !!(flags & PIN_MAPPABLE),
-		     obj->map_and_fenceable);
+		     i915_vma_is_map_and_fenceable(vma));
 		ret = i915_vma_unbind(vma);
 		if (ret)
 			return ERR_PTR(ret);
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 98f9945c5b1d..572e6a4f9d7e 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -853,7 +853,6 @@ static bool
 eb_vma_misplaced(struct i915_vma *vma)
 {
 	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
-	struct drm_i915_gem_object *obj = vma->obj;
 
 	WARN_ON(entry->flags & __EXEC_OBJECT_NEEDS_MAP &&
 		!i915_vma_is_ggtt(vma));
@@ -874,7 +873,8 @@ eb_vma_misplaced(struct i915_vma *vma)
 		return true;
 
 	/* avoid costly ping-pong once a batch bo ended up non-mappable */
-	if (entry->flags & __EXEC_OBJECT_NEEDS_MAP && !obj->map_and_fenceable)
+	if (entry->flags & __EXEC_OBJECT_NEEDS_MAP &&
+	    !i915_vma_is_map_and_fenceable(vma))
 		return !only_mappable_for_reloc(entry->flags);
 
 	if ((entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) == 0 &&
diff --git a/drivers/gpu/drm/i915/i915_gem_fence.c b/drivers/gpu/drm/i915/i915_gem_fence.c
index 334c3c4e8357..f4facd8b475e 100644
--- a/drivers/gpu/drm/i915/i915_gem_fence.c
+++ b/drivers/gpu/drm/i915/i915_gem_fence.c
@@ -130,7 +130,9 @@ static void i915_write_fence_reg(struct drm_device *dev, int reg,
 		     !is_power_of_2(vma->node.size) ||
 		     (vma->node.start & (vma->node.size - 1)),
 		     "object 0x%08llx [fenceable? %d] not 1M or pot-size (0x%08llx) aligned\n",
-		     vma->node.start, obj->map_and_fenceable, vma->node.size);
+		     vma->node.start,
+		     i915_vma_is_map_and_fenceable(vma),
+		     vma->node.size);
 
 		if (tiling == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev))
 			tile_width = 128;
@@ -389,9 +391,6 @@ i915_gem_object_get_fence(struct drm_i915_gem_object *obj)
 			return 0;
 		}
 	} else if (enable) {
-		if (WARN_ON(!obj->map_and_fenceable))
-			return -EINVAL;
-
 		reg = i915_find_fence_reg(dev);
 		if (IS_ERR(reg))
 			return PTR_ERR(reg);
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index cbeec4cfe8a4..f658c9e28893 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -3688,7 +3688,7 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma)
 	assert_rpm_wakelock_held(to_i915(vma->vm->dev));
 
 	lockdep_assert_held(&vma->vm->dev->struct_mutex);
-	if (WARN_ON(!vma->obj->map_and_fenceable))
+	if (WARN_ON(!i915_vma_is_map_and_fenceable(vma)))
 		return IO_ERR_PTR(-ENODEV);
 
 	GEM_BUG_ON(!i915_vma_is_ggtt(vma));
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index 79a08a050487..ed0a6c7a1883 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -197,8 +197,9 @@ struct i915_vma {
 #define I915_VMA_LOCAL_BIND	BIT(7)
 #define I915_VMA_BIND_MASK (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND | I915_VMA_PIN_OVERFLOW)
 
-#define I915_VMA_GGTT	BIT(8)
-#define I915_VMA_CLOSED BIT(9)
+#define I915_VMA_GGTT		BIT(8)
+#define I915_VMA_CAN_FENCE	BIT(9)
+#define I915_VMA_CLOSED		BIT(10)
 
 	unsigned int active;
 	struct i915_gem_active last_read[I915_NUM_ENGINES];
@@ -239,6 +240,11 @@ static inline bool i915_vma_is_ggtt(const struct i915_vma *vma)
 	return vma->flags & I915_VMA_GGTT;
 }
 
+static inline bool i915_vma_is_map_and_fenceable(const struct i915_vma *vma)
+{
+	return vma->flags & I915_VMA_CAN_FENCE;
+}
+
 static inline bool i915_vma_is_closed(const struct i915_vma *vma)
 {
 	return vma->flags & I915_VMA_CLOSED;
diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
index bfefb63a55ef..af70d4460a9e 100644
--- a/drivers/gpu/drm/i915/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
@@ -134,7 +134,7 @@ i915_gem_object_fence_prepare(struct drm_i915_gem_object *obj, int tiling_mode)
 	if (!vma)
 		return 0;
 
-	if (!obj->map_and_fenceable)
+	if (!i915_vma_is_map_and_fenceable(vma))
 		return 0;
 
 	if (IS_GEN3(dev_priv)) {
@@ -145,7 +145,7 @@ i915_gem_object_fence_prepare(struct drm_i915_gem_object *obj, int tiling_mode)
 			goto bad;
 	}
 
-	size = i915_gem_get_ggtt_size(dev_priv, obj->base.size, tiling_mode);
+	size = i915_gem_get_ggtt_size(dev_priv, vma->size, tiling_mode);
 	if (vma->node.size < size)
 		goto bad;
 
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index bb4473b26fd2..dab306dae018 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -2224,7 +2224,7 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, unsigned int rotation)
 	 * framebuffer compression.  For simplicity, we always install
 	 * a fence as the cost is not that onerous.
 	 */
-	if (view.type == I915_GGTT_VIEW_NORMAL) {
+	if (i915_vma_is_map_and_fenceable(vma)) {
 		ret = i915_gem_object_get_fence(obj);
 		if (ret == -EDEADLK) {
 			/*
@@ -2262,11 +2262,11 @@ void intel_unpin_fb_obj(struct drm_framebuffer *fb, unsigned int rotation)
 	WARN_ON(!mutex_is_locked(&obj->base.dev->struct_mutex));
 
 	intel_fill_fb_ggtt_view(&view, fb, rotation);
+	vma = i915_gem_object_to_ggtt(obj, &view);
 
-	if (view.type == I915_GGTT_VIEW_NORMAL)
+	if (i915_vma_is_map_and_fenceable(vma))
 		i915_gem_object_unpin_fence(obj);
 
-	vma = i915_gem_object_to_ggtt(obj, &view);
 	i915_gem_object_unpin_from_display_plane(vma);
 }
 
-- 
2.8.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 11/22] drm/i915: Allow ringbuffers to be bound anywhere
  2016-08-16 10:42 An almost complete set of reviewed patches for tiled partial Chris Wilson
                   ` (9 preceding siblings ...)
  2016-08-16 10:42 ` [PATCH 10/22] drm/i915: Move map-and-fenceable tracking to the VMA Chris Wilson
@ 2016-08-16 10:42 ` Chris Wilson
  2016-08-16 12:52   ` Joonas Lahtinen
  2016-08-16 10:42 ` [PATCH 12/22] drm/i915: Allocate rings from stolen Chris Wilson
                   ` (11 subsequent siblings)
  22 siblings, 1 reply; 40+ messages in thread
From: Chris Wilson @ 2016-08-16 10:42 UTC (permalink / raw)
  To: intel-gfx

Now that we have WC vmapping available, we can bind our rings anywhere
in the GGTT and do not need to restrict them to the mappable region.
Except for stolen objects, for which direct access is verbatim and we
must use the mappable aperture.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/intel_ringbuffer.c | 15 ++++++++-------
 drivers/gpu/drm/i915/intel_ringbuffer.h |  1 -
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index fa22bd87bab0..a96a470dfe03 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1889,17 +1889,20 @@ int intel_ring_pin(struct intel_ring *ring)
 {
 	/* Ring wraparound at offset 0 sometimes hangs. No idea why. */
 	unsigned int flags = PIN_GLOBAL | PIN_OFFSET_BIAS | 4096;
+	enum i915_map_type map;
 	struct i915_vma *vma = ring->vma;
 	void *addr;
 	int ret;
 
 	GEM_BUG_ON(ring->vaddr);
 
-	if (ring->needs_iomap)
+	map = HAS_LLC(ring->engine->i915) ? I915_MAP_WB : I915_MAP_WC;
+
+	if (vma->obj->stolen)
 		flags |= PIN_MAPPABLE;
 
 	if (!(vma->flags & I915_VMA_GLOBAL_BIND)) {
-		if (flags & PIN_MAPPABLE)
+		if (flags & PIN_MAPPABLE || map == I915_MAP_WC)
 			ret = i915_gem_object_set_to_gtt_domain(vma->obj, true);
 		else
 			ret = i915_gem_object_set_to_cpu_domain(vma->obj, true);
@@ -1911,10 +1914,10 @@ int intel_ring_pin(struct intel_ring *ring)
 	if (unlikely(ret))
 		return ret;
 
-	if (flags & PIN_MAPPABLE)
+	if (i915_vma_is_map_and_fenceable(vma))
 		addr = (void __force *)i915_vma_pin_iomap(vma);
 	else
-		addr = i915_gem_object_pin_map(vma->obj, I915_MAP_WB);
+		addr = i915_gem_object_pin_map(vma->obj, map);
 	if (IS_ERR(addr))
 		goto err;
 
@@ -1931,7 +1934,7 @@ void intel_ring_unpin(struct intel_ring *ring)
 	GEM_BUG_ON(!ring->vma);
 	GEM_BUG_ON(!ring->vaddr);
 
-	if (ring->needs_iomap)
+	if (i915_vma_is_map_and_fenceable(ring->vma))
 		i915_vma_unpin_iomap(ring->vma);
 	else
 		i915_gem_object_unpin_map(ring->vma->obj);
@@ -2002,8 +2005,6 @@ intel_engine_create_ring(struct intel_engine_cs *engine, int size)
 		return ERR_CAST(vma);
 	}
 	ring->vma = vma;
-	if (!HAS_LLC(engine->i915) || vma->obj->stolen)
-		ring->needs_iomap = true;
 
 	list_add(&ring->link, &engine->buffers);
 	return ring;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index e3777572c70e..107c2def5754 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -96,7 +96,6 @@ struct intel_ring {
 	int space;
 	int size;
 	int effective_size;
-	bool needs_iomap;
 
 	/** We track the position of the requests in the ring buffer, and
 	 * when each is retired we increment last_retired_head as the GPU
-- 
2.8.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 12/22] drm/i915: Allocate rings from stolen
  2016-08-16 10:42 An almost complete set of reviewed patches for tiled partial Chris Wilson
                   ` (10 preceding siblings ...)
  2016-08-16 10:42 ` [PATCH 11/22] drm/i915: Allow ringbuffers to be bound anywhere Chris Wilson
@ 2016-08-16 10:42 ` Chris Wilson
  2016-08-16 11:47   ` Joonas Lahtinen
  2016-08-16 10:42 ` [PATCH 13/22] drm/i915/userptr: Make gup errors stickier Chris Wilson
                   ` (10 subsequent siblings)
  22 siblings, 1 reply; 40+ messages in thread
From: Chris Wilson @ 2016-08-16 10:42 UTC (permalink / raw)
  To: intel-gfx

If we have stolen available, make use of it for ringbuffer allocation.
Previously this was restricted to !llc platforms, as writing to stolen
requires a GGTT mapping - but now that we have partial mappable support,
the mappable aperture isn't quite so precious so we can use it more
freely and ringbuffers are a good user for the otherwise wasted stolen.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/intel_ringbuffer.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index a96a470dfe03..5a5412f3db46 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1949,10 +1949,8 @@ intel_ring_create_vma(struct drm_i915_private *dev_priv, int size)
 	struct drm_i915_gem_object *obj;
 	struct i915_vma *vma;
 
-	obj = ERR_PTR(-ENODEV);
-	if (!HAS_LLC(dev_priv))
-		obj = i915_gem_object_create_stolen(&dev_priv->drm, size);
-	if (IS_ERR(obj))
+	obj = i915_gem_object_create_stolen(&dev_priv->drm, size);
+	if (obj == NULL)
 		obj = i915_gem_object_create(&dev_priv->drm, size);
 	if (IS_ERR(obj))
 		return ERR_CAST(obj);
-- 
2.8.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 13/22] drm/i915/userptr: Make gup errors stickier
  2016-08-16 10:42 An almost complete set of reviewed patches for tiled partial Chris Wilson
                   ` (11 preceding siblings ...)
  2016-08-16 10:42 ` [PATCH 12/22] drm/i915: Allocate rings from stolen Chris Wilson
@ 2016-08-16 10:42 ` Chris Wilson
  2016-08-16 10:42 ` [PATCH 14/22] drm/i915: Rename fence.lru_list to link Chris Wilson
                   ` (9 subsequent siblings)
  22 siblings, 0 replies; 40+ messages in thread
From: Chris Wilson @ 2016-08-16 10:42 UTC (permalink / raw)
  To: intel-gfx

Keep any error reported by the gup_worker until we are notified that the
arena has changed (via the mmu-notifier). This has the importance of
making two consecutive calls to i915_gem_object_get_pages() reporting
the same error, and curtailing a loop of detecting a fault and requeueing
a gup_worker.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_userptr.c | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
index 57218cca7e05..be54825ef3e8 100644
--- a/drivers/gpu/drm/i915/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -542,8 +542,6 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work)
 			}
 		}
 		obj->userptr.work = ERR_PTR(ret);
-		if (ret)
-			__i915_gem_userptr_set_active(obj, false);
 	}
 
 	obj->userptr.workers--;
@@ -628,15 +626,14 @@ i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
 	 * to the vma (discard or cloning) which should prevent the more
 	 * egregious cases from causing harm.
 	 */
-	if (IS_ERR(obj->userptr.work)) {
-		/* active flag will have been dropped already by the worker */
-		ret = PTR_ERR(obj->userptr.work);
-		obj->userptr.work = NULL;
-		return ret;
-	}
-	if (obj->userptr.work)
+
+	if (obj->userptr.work) {
 		/* active flag should still be held for the pending work */
-		return -EAGAIN;
+		if (IS_ERR(obj->userptr.work))
+			return PTR_ERR(obj->userptr.work);
+		else
+			return -EAGAIN;
+	}
 
 	/* Let the mmu-notifier know that we have begun and need cancellation */
 	ret = __i915_gem_userptr_set_active(obj, true);
-- 
2.8.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 14/22] drm/i915: Rename fence.lru_list to link
  2016-08-16 10:42 An almost complete set of reviewed patches for tiled partial Chris Wilson
                   ` (12 preceding siblings ...)
  2016-08-16 10:42 ` [PATCH 13/22] drm/i915/userptr: Make gup errors stickier Chris Wilson
@ 2016-08-16 10:42 ` Chris Wilson
  2016-08-16 11:39   ` Joonas Lahtinen
  2016-08-16 10:42 ` [PATCH 15/22] drm/i915: Move fence tracking from object to vma Chris Wilson
                   ` (8 subsequent siblings)
  22 siblings, 1 reply; 40+ messages in thread
From: Chris Wilson @ 2016-08-16 10:42 UTC (permalink / raw)
  To: intel-gfx

Our current practice is to only name the actual list (here
dev_priv->fence_list) using "list", and elements upon that list are
referred to as "link". Further, the lru nature is of the list and not of
the node and including in the name does not disambiguate the link from
anything else.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h            | 2 +-
 drivers/gpu/drm/i915/i915_gem.c            | 2 +-
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 2 +-
 drivers/gpu/drm/i915/i915_gem_fence.c      | 9 ++++-----
 4 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 712a2b1295c3..b1679c3ab9de 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -461,7 +461,7 @@ struct intel_overlay_error_state;
 #define I915_MAX_NUM_FENCE_BITS 6
 
 struct drm_i915_fence_reg {
-	struct list_head lru_list;
+	struct list_head link;
 	struct drm_i915_gem_object *obj;
 	int pin_count;
 };
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 7f2c7dc72838..db66f9c7fd34 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4544,7 +4544,7 @@ i915_gem_load_init(struct drm_device *dev)
 	for (i = 0; i < I915_NUM_ENGINES; i++)
 		init_engine_lists(&dev_priv->engine[i]);
 	for (i = 0; i < I915_MAX_NUM_FENCES; i++)
-		INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list);
+		INIT_LIST_HEAD(&dev_priv->fence_regs[i].link);
 	INIT_DELAYED_WORK(&dev_priv->gt.retire_work,
 			  i915_gem_retire_work_handler);
 	INIT_DELAYED_WORK(&dev_priv->gt.idle_work,
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 572e6a4f9d7e..b9ab4f46c27d 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1313,7 +1313,7 @@ void i915_vma_move_to_active(struct i915_vma *vma,
 		if (flags & __EXEC_OBJECT_HAS_FENCE) {
 			struct drm_i915_private *dev_priv = req->i915;
 
-			list_move_tail(&dev_priv->fence_regs[obj->fence_reg].lru_list,
+			list_move_tail(&dev_priv->fence_regs[obj->fence_reg].link,
 				       &dev_priv->mm.fence_list);
 		}
 	}
diff --git a/drivers/gpu/drm/i915/i915_gem_fence.c b/drivers/gpu/drm/i915/i915_gem_fence.c
index f4facd8b475e..f4b212e673e0 100644
--- a/drivers/gpu/drm/i915/i915_gem_fence.c
+++ b/drivers/gpu/drm/i915/i915_gem_fence.c
@@ -245,11 +245,11 @@ static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
 	if (enable) {
 		obj->fence_reg = reg;
 		fence->obj = obj;
-		list_move_tail(&fence->lru_list, &dev_priv->mm.fence_list);
+		list_move_tail(&fence->link, &dev_priv->mm.fence_list);
 	} else {
 		obj->fence_reg = I915_FENCE_REG_NONE;
 		fence->obj = NULL;
-		list_del_init(&fence->lru_list);
+		list_del_init(&fence->link);
 	}
 	obj->fence_dirty = false;
 }
@@ -331,7 +331,7 @@ i915_find_fence_reg(struct drm_device *dev)
 		goto deadlock;
 
 	/* None available, try to steal one or wait for a user to finish */
-	list_for_each_entry(reg, &dev_priv->mm.fence_list, lru_list) {
+	list_for_each_entry(reg, &dev_priv->mm.fence_list, link) {
 		if (reg->pin_count)
 			continue;
 
@@ -386,8 +386,7 @@ i915_gem_object_get_fence(struct drm_i915_gem_object *obj)
 	if (obj->fence_reg != I915_FENCE_REG_NONE) {
 		reg = &dev_priv->fence_regs[obj->fence_reg];
 		if (!obj->fence_dirty) {
-			list_move_tail(&reg->lru_list,
-				       &dev_priv->mm.fence_list);
+			list_move_tail(&reg->link, &dev_priv->mm.fence_list);
 			return 0;
 		}
 	} else if (enable) {
-- 
2.8.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 15/22] drm/i915: Move fence tracking from object to vma
  2016-08-16 10:42 An almost complete set of reviewed patches for tiled partial Chris Wilson
                   ` (13 preceding siblings ...)
  2016-08-16 10:42 ` [PATCH 14/22] drm/i915: Rename fence.lru_list to link Chris Wilson
@ 2016-08-16 10:42 ` Chris Wilson
  2016-08-16 11:38   ` Joonas Lahtinen
  2016-08-16 10:42 ` [PATCH 16/22] drm/i915: Choose partial chunksize based on tile row size Chris Wilson
                   ` (7 subsequent siblings)
  22 siblings, 1 reply; 40+ messages in thread
From: Chris Wilson @ 2016-08-16 10:42 UTC (permalink / raw)
  To: intel-gfx

In order to handle tiled partial GTT mmappings, we need to associate the
fence with an individual vma.

v2: A couple of silly drops replaced spotted by Joonas

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_debugfs.c        |  16 +-
 drivers/gpu/drm/i915/i915_drv.h            |  82 ++++--
 drivers/gpu/drm/i915/i915_gem.c            |  30 +-
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  20 +-
 drivers/gpu/drm/i915/i915_gem_fence.c      | 422 +++++++++++------------------
 drivers/gpu/drm/i915/i915_gem_gtt.c        |   2 +
 drivers/gpu/drm/i915/i915_gem_gtt.h        |   8 +
 drivers/gpu/drm/i915/i915_gem_tiling.c     |  67 +++--
 drivers/gpu/drm/i915/i915_gpu_error.c      |   2 +-
 drivers/gpu/drm/i915/intel_display.c       |  57 ++--
 drivers/gpu/drm/i915/intel_fbc.c           |  10 +-
 drivers/gpu/drm/i915/intel_overlay.c       |   2 +-
 12 files changed, 330 insertions(+), 388 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 4de76049e1e4..7e34f8320a31 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -152,11 +152,9 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
 		seq_printf(m, "%x ",
 			   i915_gem_active_get_seqno(&obj->last_read[id],
 						     &obj->base.dev->struct_mutex));
-	seq_printf(m, "] %x %x%s%s%s",
+	seq_printf(m, "] %x %s%s%s",
 		   i915_gem_active_get_seqno(&obj->last_write,
 					     &obj->base.dev->struct_mutex),
-		   i915_gem_active_get_seqno(&obj->last_fence,
-					     &obj->base.dev->struct_mutex),
 		   i915_cache_level_str(to_i915(obj->base.dev), obj->cache_level),
 		   obj->dirty ? " dirty" : "",
 		   obj->madv == I915_MADV_DONTNEED ? " purgeable" : "");
@@ -169,8 +167,6 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
 	seq_printf(m, " (pinned x %d)", pin_count);
 	if (obj->pin_display)
 		seq_printf(m, " (display)");
-	if (obj->fence_reg != I915_FENCE_REG_NONE)
-		seq_printf(m, " (fence: %d)", obj->fence_reg);
 	list_for_each_entry(vma, &obj->vma_list, obj_link) {
 		if (!drm_mm_node_allocated(&vma->node))
 			continue;
@@ -180,6 +176,10 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
 			   vma->node.start, vma->node.size);
 		if (i915_vma_is_ggtt(vma))
 			seq_printf(m, ", type: %u", vma->ggtt_view.type);
+		if (vma->fence)
+			seq_printf(m, " , fence: %d%s",
+				   vma->fence->id,
+				   i915_gem_active_isset(&vma->last_fence) ? "*" : "");
 		seq_puts(m, ")");
 	}
 	if (obj->stolen)
@@ -938,14 +938,14 @@ static int i915_gem_fence_regs_info(struct seq_file *m, void *data)
 
 	seq_printf(m, "Total fences = %d\n", dev_priv->num_fence_regs);
 	for (i = 0; i < dev_priv->num_fence_regs; i++) {
-		struct drm_i915_gem_object *obj = dev_priv->fence_regs[i].obj;
+		struct i915_vma *vma = dev_priv->fence_regs[i].vma;
 
 		seq_printf(m, "Fence %d, pin count = %d, object = ",
 			   i, dev_priv->fence_regs[i].pin_count);
-		if (obj == NULL)
+		if (!vma)
 			seq_puts(m, "unused");
 		else
-			describe_obj(m, obj);
+			describe_obj(m, vma->obj);
 		seq_putc(m, '\n');
 	}
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index b1679c3ab9de..05efc0501f3c 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -455,15 +455,21 @@ struct intel_opregion {
 struct intel_overlay;
 struct intel_overlay_error_state;
 
-#define I915_FENCE_REG_NONE -1
-#define I915_MAX_NUM_FENCES 32
-/* 32 fences + sign bit for FENCE_REG_NONE */
-#define I915_MAX_NUM_FENCE_BITS 6
-
 struct drm_i915_fence_reg {
 	struct list_head link;
-	struct drm_i915_gem_object *obj;
+	struct drm_i915_private *i915;
+	struct i915_vma *vma;
 	int pin_count;
+	int id;
+	/**
+	 * Whether the tiling parameters for the currently
+	 * associated fence register have changed. Note that
+	 * for the purposes of tracking tiling changes we also
+	 * treat the unfenced register, the register slot that
+	 * the object occupies whilst it executes a fenced
+	 * command (such as BLT on gen2/3), as a "fence".
+	 */
+	bool dirty;
 };
 
 struct sdvo_device_mapping {
@@ -2175,27 +2181,11 @@ struct drm_i915_gem_object {
 	unsigned int dirty:1;
 
 	/**
-	 * Fence register bits (if any) for this object.  Will be set
-	 * as needed when mapped into the GTT.
-	 * Protected by dev->struct_mutex.
-	 */
-	signed int fence_reg:I915_MAX_NUM_FENCE_BITS;
-
-	/**
 	 * Advice: are the backing pages purgeable?
 	 */
 	unsigned int madv:2;
 
 	/**
-	 * Whether the tiling parameters for the currently associated fence
-	 * register have changed. Note that for the purposes of tracking
-	 * tiling changes we also treat the unfenced register, the register
-	 * slot that the object occupies whilst it executes a fenced
-	 * command (such as BLT on gen2/3), as a "fence".
-	 */
-	unsigned int fence_dirty:1;
-
-	/**
 	 * Whether the current gtt mapping needs to be mappable (and isn't just
 	 * mappable by accident). Track pin and fault separate for a more
 	 * accurate mappable working set.
@@ -2243,7 +2233,6 @@ struct drm_i915_gem_object {
 	 */
 	struct i915_gem_active last_read[I915_NUM_ENGINES];
 	struct i915_gem_active last_write;
-	struct i915_gem_active last_fence;
 
 	/** References from framebuffers, locks out tiling changes. */
 	unsigned long framebuffer_references;
@@ -3339,11 +3328,50 @@ i915_gem_object_ggtt_offset(struct drm_i915_gem_object *o,
 }
 
 /* i915_gem_fence.c */
-int __must_check i915_gem_object_get_fence(struct drm_i915_gem_object *obj);
-int __must_check i915_gem_object_put_fence(struct drm_i915_gem_object *obj);
+int __must_check i915_vma_get_fence(struct i915_vma *vma);
+int __must_check i915_vma_put_fence(struct i915_vma *vma);
+
+/**
+ * i915_vma_pin_fence - pin fencing state
+ * @vma: vma to pin fencing for
+ *
+ * This pins the fencing state (whether tiled or untiled) to make sure the
+ * vma (and its object) is ready to be used as a scanout target. Fencing
+ * status must be synchronize first by calling i915_vma_get_fence():
+ *
+ * The resulting fence pin reference must be released again with
+ * i915_vma_unpin_fence().
+ *
+ * Returns:
+ *
+ * True if the vma has a fence, false otherwise.
+ */
+static inline bool
+i915_vma_pin_fence(struct i915_vma *vma)
+{
+	if (vma->fence) {
+		vma->fence->pin_count++;
+		return true;
+	} else
+		return false;
+}
 
-bool i915_gem_object_pin_fence(struct drm_i915_gem_object *obj);
-void i915_gem_object_unpin_fence(struct drm_i915_gem_object *obj);
+/**
+ * i915_vma_unpin_fence - unpin fencing state
+ * @vma: vma to unpin fencing for
+ *
+ * This releases the fence pin reference acquired through
+ * i915_vma_pin_fence. It will handle both objects with and without an
+ * attached fence correctly, callers do not need to distinguish this.
+ */
+static inline void
+i915_vma_unpin_fence(struct i915_vma *vma)
+{
+	if (vma->fence) {
+		GEM_BUG_ON(vma->fence->pin_count <= 0);
+		vma->fence->pin_count--;
+	}
+}
 
 void i915_gem_restore_fences(struct drm_device *dev);
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index db66f9c7fd34..e7a193a91863 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -841,7 +841,7 @@ i915_gem_gtt_pread(struct drm_device *dev,
 	} else {
 		node.start = i915_ggtt_offset(vma);
 		node.allocated = false;
-		ret = i915_gem_object_put_fence(obj);
+		ret = i915_vma_put_fence(vma);
 		if (ret)
 			goto out_unpin;
 	}
@@ -1140,7 +1140,7 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915,
 	} else {
 		node.start = i915_ggtt_offset(vma);
 		node.allocated = false;
-		ret = i915_gem_object_put_fence(obj);
+		ret = i915_vma_put_fence(vma);
 		if (ret)
 			goto out_unpin;
 	}
@@ -1745,7 +1745,7 @@ int i915_gem_fault(struct vm_area_struct *area, struct vm_fault *vmf)
 	if (ret)
 		goto err_unpin;
 
-	ret = i915_gem_object_get_fence(obj);
+	ret = i915_vma_get_fence(vma);
 	if (ret)
 		goto err_unpin;
 
@@ -2894,7 +2894,7 @@ int i915_vma_unbind(struct i915_vma *vma)
 		i915_gem_object_finish_gtt(obj);
 
 		/* release the fence reg _after_ flushing */
-		ret = i915_gem_object_put_fence(obj);
+		ret = i915_vma_put_fence(vma);
 		if (ret)
 			return ret;
 
@@ -3376,9 +3376,11 @@ restart:
 			 * dropped the fence as all snoopable access is
 			 * supposed to be linear.
 			 */
-			ret = i915_gem_object_put_fence(obj);
-			if (ret)
-				return ret;
+			list_for_each_entry(vma, &obj->vma_list, obj_link) {
+				ret = i915_vma_put_fence(vma);
+				if (ret)
+					return ret;
+			}
 		} else {
 			/* We either have incoherent backing store and
 			 * so no GTT access or the architecture is fully
@@ -4056,14 +4058,12 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
 				    i915_gem_object_retire__read);
 	init_request_active(&obj->last_write,
 			    i915_gem_object_retire__write);
-	init_request_active(&obj->last_fence, NULL);
 	INIT_LIST_HEAD(&obj->obj_exec_link);
 	INIT_LIST_HEAD(&obj->vma_list);
 	INIT_LIST_HEAD(&obj->batch_pool_link);
 
 	obj->ops = ops;
 
-	obj->fence_reg = I915_FENCE_REG_NONE;
 	obj->madv = I915_MADV_WILLNEED;
 
 	i915_gem_info_add_obj(to_i915(obj->base.dev), obj->base.size);
@@ -4493,6 +4493,7 @@ void
 i915_gem_load_init_fences(struct drm_i915_private *dev_priv)
 {
 	struct drm_device *dev = &dev_priv->drm;
+	int i;
 
 	if (INTEL_INFO(dev_priv)->gen >= 7 && !IS_VALLEYVIEW(dev_priv) &&
 	    !IS_CHERRYVIEW(dev_priv))
@@ -4508,6 +4509,13 @@ i915_gem_load_init_fences(struct drm_i915_private *dev_priv)
 				I915_READ(vgtif_reg(avail_rs.fence_num));
 
 	/* Initialize fence registers to zero */
+	for (i = 0; i < dev_priv->num_fence_regs; i++) {
+		struct drm_i915_fence_reg *fence = &dev_priv->fence_regs[i];
+
+		fence->i915 = dev_priv;
+		fence->id = i;
+		list_add_tail(&fence->link, &dev_priv->mm.fence_list);
+	}
 	i915_gem_restore_fences(dev);
 
 	i915_gem_detect_bit_6_swizzle(dev);
@@ -4543,8 +4551,6 @@ i915_gem_load_init(struct drm_device *dev)
 	INIT_LIST_HEAD(&dev_priv->mm.fence_list);
 	for (i = 0; i < I915_NUM_ENGINES; i++)
 		init_engine_lists(&dev_priv->engine[i]);
-	for (i = 0; i < I915_MAX_NUM_FENCES; i++)
-		INIT_LIST_HEAD(&dev_priv->fence_regs[i].link);
 	INIT_DELAYED_WORK(&dev_priv->gt.retire_work,
 			  i915_gem_retire_work_handler);
 	INIT_DELAYED_WORK(&dev_priv->gt.idle_work,
@@ -4554,8 +4560,6 @@ i915_gem_load_init(struct drm_device *dev)
 
 	dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL;
 
-	INIT_LIST_HEAD(&dev_priv->mm.fence_list);
-
 	init_waitqueue_head(&dev_priv->pending_flip_queue);
 
 	dev_priv->mm.interruptible = true;
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index b9ab4f46c27d..c012a0d94878 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -250,7 +250,6 @@ static void
 i915_gem_execbuffer_unreserve_vma(struct i915_vma *vma)
 {
 	struct drm_i915_gem_exec_object2 *entry;
-	struct drm_i915_gem_object *obj = vma->obj;
 
 	if (!drm_mm_node_allocated(&vma->node))
 		return;
@@ -258,7 +257,7 @@ i915_gem_execbuffer_unreserve_vma(struct i915_vma *vma)
 	entry = vma->exec_entry;
 
 	if (entry->flags & __EXEC_OBJECT_HAS_FENCE)
-		i915_gem_object_unpin_fence(obj);
+		i915_vma_unpin_fence(vma);
 
 	if (entry->flags & __EXEC_OBJECT_HAS_PIN)
 		__i915_vma_unpin(vma);
@@ -451,7 +450,7 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj,
 			if (ret)
 				return ERR_PTR(ret);
 		} else {
-			ret = i915_gem_object_put_fence(obj);
+			ret = i915_vma_put_fence(vma);
 			if (ret) {
 				i915_vma_unpin(vma);
 				return ERR_PTR(ret);
@@ -807,11 +806,11 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
 	entry->flags |= __EXEC_OBJECT_HAS_PIN;
 
 	if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) {
-		ret = i915_gem_object_get_fence(obj);
+		ret = i915_vma_get_fence(vma);
 		if (ret)
 			return ret;
 
-		if (i915_gem_object_pin_fence(obj))
+		if (i915_vma_pin_fence(vma))
 			entry->flags |= __EXEC_OBJECT_HAS_FENCE;
 	}
 
@@ -1308,15 +1307,8 @@ void i915_vma_move_to_active(struct i915_vma *vma,
 		obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
 	}
 
-	if (flags & EXEC_OBJECT_NEEDS_FENCE) {
-		i915_gem_active_set(&obj->last_fence, req);
-		if (flags & __EXEC_OBJECT_HAS_FENCE) {
-			struct drm_i915_private *dev_priv = req->i915;
-
-			list_move_tail(&dev_priv->fence_regs[obj->fence_reg].link,
-				       &dev_priv->mm.fence_list);
-		}
-	}
+	if (flags & EXEC_OBJECT_NEEDS_FENCE)
+		i915_gem_active_set(&vma->last_fence, req);
 
 	i915_vma_set_active(vma, idx);
 	i915_gem_active_set(&vma->last_read[idx], req);
diff --git a/drivers/gpu/drm/i915/i915_gem_fence.c b/drivers/gpu/drm/i915/i915_gem_fence.c
index f4b212e673e0..397c177fbc70 100644
--- a/drivers/gpu/drm/i915/i915_gem_fence.c
+++ b/drivers/gpu/drm/i915/i915_gem_fence.c
@@ -55,74 +55,73 @@
  * CPU ptes into GTT mmaps (not the GTT ptes themselves) as needed.
  */
 
-static void i965_write_fence_reg(struct drm_device *dev, int reg,
-				 struct drm_i915_gem_object *obj)
+#define pipelined 0
+
+static void i965_write_fence_reg(struct drm_i915_fence_reg *fence,
+				 struct i915_vma *vma)
 {
-	struct drm_i915_private *dev_priv = to_i915(dev);
 	i915_reg_t fence_reg_lo, fence_reg_hi;
 	int fence_pitch_shift;
+	u64 val;
 
-	if (INTEL_INFO(dev)->gen >= 6) {
-		fence_reg_lo = FENCE_REG_GEN6_LO(reg);
-		fence_reg_hi = FENCE_REG_GEN6_HI(reg);
+	if (INTEL_INFO(fence->i915)->gen >= 6) {
+		fence_reg_lo = FENCE_REG_GEN6_LO(fence->id);
+		fence_reg_hi = FENCE_REG_GEN6_HI(fence->id);
 		fence_pitch_shift = GEN6_FENCE_PITCH_SHIFT;
+
 	} else {
-		fence_reg_lo = FENCE_REG_965_LO(reg);
-		fence_reg_hi = FENCE_REG_965_HI(reg);
+		fence_reg_lo = FENCE_REG_965_LO(fence->id);
+		fence_reg_hi = FENCE_REG_965_HI(fence->id);
 		fence_pitch_shift = I965_FENCE_PITCH_SHIFT;
 	}
 
-	/* To w/a incoherency with non-atomic 64-bit register updates,
-	 * we split the 64-bit update into two 32-bit writes. In order
-	 * for a partial fence not to be evaluated between writes, we
-	 * precede the update with write to turn off the fence register,
-	 * and only enable the fence as the last step.
-	 *
-	 * For extra levels of paranoia, we make sure each step lands
-	 * before applying the next step.
-	 */
-	I915_WRITE(fence_reg_lo, 0);
-	POSTING_READ(fence_reg_lo);
-
-	if (obj) {
-		struct i915_vma *vma = i915_gem_object_to_ggtt(obj, NULL);
-		unsigned int tiling = i915_gem_object_get_tiling(obj);
-		unsigned int stride = i915_gem_object_get_stride(obj);
-		u32 size = vma->node.size;
-		u32 row_size = stride * (tiling == I915_TILING_Y ? 32 : 8);
-		u64 val;
-
-		/* Adjust fence size to match tiled area */
-		size = rounddown(size, row_size);
+	val = 0;
+	if (vma) {
+		unsigned int tiling = i915_gem_object_get_tiling(vma->obj);
+		bool is_y_tiled = tiling == I915_TILING_Y;
+		unsigned int stride = i915_gem_object_get_stride(vma->obj);
+		u32 row_size = stride * (is_y_tiled ? 32 : 8);
+		u32 size = rounddown((u32)vma->node.size, row_size);
 
 		val = ((vma->node.start + size - 4096) & 0xfffff000) << 32;
 		val |= vma->node.start & 0xfffff000;
 		val |= (u64)((stride / 128) - 1) << fence_pitch_shift;
-		if (tiling == I915_TILING_Y)
-			val |= 1 << I965_FENCE_TILING_Y_SHIFT;
+		if (is_y_tiled)
+			val |= BIT(I965_FENCE_TILING_Y_SHIFT);
 		val |= I965_FENCE_REG_VALID;
+	}
 
-		I915_WRITE(fence_reg_hi, val >> 32);
-		POSTING_READ(fence_reg_hi);
+	if (!pipelined) {
+		struct drm_i915_private *dev_priv = fence->i915;
 
-		I915_WRITE(fence_reg_lo, val);
+		/* To w/a incoherency with non-atomic 64-bit register updates,
+		 * we split the 64-bit update into two 32-bit writes. In order
+		 * for a partial fence not to be evaluated between writes, we
+		 * precede the update with write to turn off the fence register,
+		 * and only enable the fence as the last step.
+		 *
+		 * For extra levels of paranoia, we make sure each step lands
+		 * before applying the next step.
+		 */
+		I915_WRITE(fence_reg_lo, 0);
+		POSTING_READ(fence_reg_lo);
+
+		I915_WRITE(fence_reg_hi, upper_32_bits(val));
+		I915_WRITE(fence_reg_lo, lower_32_bits(val));
 		POSTING_READ(fence_reg_lo);
-	} else {
-		I915_WRITE(fence_reg_hi, 0);
-		POSTING_READ(fence_reg_hi);
 	}
 }
 
-static void i915_write_fence_reg(struct drm_device *dev, int reg,
-				 struct drm_i915_gem_object *obj)
+static void i915_write_fence_reg(struct drm_i915_fence_reg *fence,
+				 struct i915_vma *vma)
 {
-	struct drm_i915_private *dev_priv = to_i915(dev);
 	u32 val;
 
-	if (obj) {
-		struct i915_vma *vma = i915_gem_object_to_ggtt(obj, NULL);
-		unsigned int tiling = i915_gem_object_get_tiling(obj);
-		unsigned int stride = i915_gem_object_get_stride(obj);
+	val = 0;
+	if (vma) {
+		unsigned int tiling = i915_gem_object_get_tiling(vma->obj);
+		bool is_y_tiled = tiling == I915_TILING_Y;
+		unsigned int stride = i915_gem_object_get_stride(vma->obj);
 		int pitch_val;
 		int tile_width;
 
@@ -134,7 +133,7 @@ static void i915_write_fence_reg(struct drm_device *dev, int reg,
 		     i915_vma_is_map_and_fenceable(vma),
 		     vma->node.size);
 
-		if (tiling == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev))
+		if (is_y_tiled && HAS_128_BYTE_Y_TILING(fence->i915))
 			tile_width = 128;
 		else
 			tile_width = 512;
@@ -144,28 +143,32 @@ static void i915_write_fence_reg(struct drm_device *dev, int reg,
 		pitch_val = ffs(pitch_val) - 1;
 
 		val = vma->node.start;
-		if (tiling == I915_TILING_Y)
-			val |= 1 << I830_FENCE_TILING_Y_SHIFT;
+		if (is_y_tiled)
+			val |= BIT(I830_FENCE_TILING_Y_SHIFT);
 		val |= I915_FENCE_SIZE_BITS(vma->node.size);
 		val |= pitch_val << I830_FENCE_PITCH_SHIFT;
 		val |= I830_FENCE_REG_VALID;
-	} else
-		val = 0;
+	}
 
-	I915_WRITE(FENCE_REG(reg), val);
-	POSTING_READ(FENCE_REG(reg));
+	if (!pipelined) {
+		struct drm_i915_private *dev_priv = fence->i915;
+		i915_reg_t reg = FENCE_REG(fence->id);
+
+		I915_WRITE(reg, val);
+		POSTING_READ(reg);
+	}
 }
 
-static void i830_write_fence_reg(struct drm_device *dev, int reg,
-				struct drm_i915_gem_object *obj)
+static void i830_write_fence_reg(struct drm_i915_fence_reg *fence,
+				 struct i915_vma *vma)
 {
-	struct drm_i915_private *dev_priv = to_i915(dev);
 	u32 val;
 
-	if (obj) {
-		struct i915_vma *vma = i915_gem_object_to_ggtt(obj, NULL);
-		unsigned int tiling = i915_gem_object_get_tiling(obj);
-		unsigned int stride = i915_gem_object_get_stride(obj);
+	val = 0;
+	if (vma) {
+		unsigned int tiling = i915_gem_object_get_tiling(vma->obj);
+		bool is_y_tiled = tiling == I915_TILING_Y;
+		unsigned int stride = i915_gem_object_get_stride(vma->obj);
 		u32 pitch_val;
 
 		WARN((vma->node.start & ~I830_FENCE_START_MASK) ||
@@ -178,104 +181,102 @@ static void i830_write_fence_reg(struct drm_device *dev, int reg,
 		pitch_val = ffs(pitch_val) - 1;
 
 		val = vma->node.start;
-		if (tiling == I915_TILING_Y)
-			val |= 1 << I830_FENCE_TILING_Y_SHIFT;
+		if (is_y_tiled)
+			val |= BIT(I830_FENCE_TILING_Y_SHIFT);
 		val |= I830_FENCE_SIZE_BITS(vma->node.size);
 		val |= pitch_val << I830_FENCE_PITCH_SHIFT;
 		val |= I830_FENCE_REG_VALID;
-	} else
-		val = 0;
+	}
 
-	I915_WRITE(FENCE_REG(reg), val);
-	POSTING_READ(FENCE_REG(reg));
-}
+	if (!pipelined) {
+		struct drm_i915_private *dev_priv = fence->i915;
+		i915_reg_t reg = FENCE_REG(fence->id);
 
-inline static bool i915_gem_object_needs_mb(struct drm_i915_gem_object *obj)
-{
-	return obj && obj->base.read_domains & I915_GEM_DOMAIN_GTT;
+		I915_WRITE(reg, val);
+		POSTING_READ(reg);
+	}
 }
 
-static void i915_gem_write_fence(struct drm_device *dev, int reg,
-				 struct drm_i915_gem_object *obj)
+static void fence_write(struct drm_i915_fence_reg *fence,
+			struct i915_vma *vma)
 {
-	struct drm_i915_private *dev_priv = to_i915(dev);
-
-	/* Ensure that all CPU reads are completed before installing a fence
-	 * and all writes before removing the fence.
+	/* Previous access through the fence register is marshalled by
+	 * the mb() inside the fault handlers (i915_gem_release_mmaps)
+	 * and explicitly managed for internal users.
 	 */
-	if (i915_gem_object_needs_mb(dev_priv->fence_regs[reg].obj))
-		mb();
-
-	WARN(obj &&
-	     (!i915_gem_object_get_stride(obj) ||
-	      !i915_gem_object_get_tiling(obj)),
-	     "bogus fence setup with stride: 0x%x, tiling mode: %i\n",
-	     i915_gem_object_get_stride(obj),
-	     i915_gem_object_get_tiling(obj));
-
-	if (IS_GEN2(dev))
-		i830_write_fence_reg(dev, reg, obj);
-	else if (IS_GEN3(dev))
-		i915_write_fence_reg(dev, reg, obj);
-	else if (INTEL_INFO(dev)->gen >= 4)
-		i965_write_fence_reg(dev, reg, obj);
-
-	/* And similarly be paranoid that no direct access to this region
-	 * is reordered to before the fence is installed.
+
+	if (IS_GEN2(fence->i915))
+		i830_write_fence_reg(fence, vma);
+	else if (IS_GEN3(fence->i915))
+		i915_write_fence_reg(fence, vma);
+	else
+		i965_write_fence_reg(fence, vma);
+
+	/* Access through the fenced region afterwards is
+	 * ordered by the posting reads whilst writing the registers.
 	 */
-	if (i915_gem_object_needs_mb(obj))
-		mb();
-}
 
-static inline int fence_number(struct drm_i915_private *dev_priv,
-			       struct drm_i915_fence_reg *fence)
-{
-	return fence - dev_priv->fence_regs;
+	fence->dirty = false;
 }
 
-static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
-					 struct drm_i915_fence_reg *fence,
-					 bool enable)
+static int fence_update(struct drm_i915_fence_reg *fence,
+			struct i915_vma *vma)
 {
-	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
-	int reg = fence_number(dev_priv, fence);
+	int ret;
 
-	i915_gem_write_fence(obj->base.dev, reg, enable ? obj : NULL);
+	if (vma) {
+		if (!i915_vma_is_map_and_fenceable(vma))
+			return -EINVAL;
 
-	if (enable) {
-		obj->fence_reg = reg;
-		fence->obj = obj;
-		list_move_tail(&fence->link, &dev_priv->mm.fence_list);
-	} else {
-		obj->fence_reg = I915_FENCE_REG_NONE;
-		fence->obj = NULL;
-		list_del_init(&fence->link);
+		if (WARN(!i915_gem_object_get_stride(vma->obj) ||
+			 !i915_gem_object_get_tiling(vma->obj),
+			 "bogus fence setup with stride: 0x%x, tiling mode: %i\n",
+			 i915_gem_object_get_stride(vma->obj),
+			 i915_gem_object_get_tiling(vma->obj)))
+			return -EINVAL;
+
+		ret = i915_gem_active_retire(&vma->last_fence,
+					     &vma->obj->base.dev->struct_mutex);
+		if (ret)
+			return ret;
 	}
-	obj->fence_dirty = false;
-}
 
-static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj)
-{
-	if (i915_gem_object_is_tiled(obj))
-		i915_gem_release_mmap(obj);
+	if (fence->vma) {
+		ret = i915_gem_active_retire(&fence->vma->last_fence,
+				      &fence->vma->obj->base.dev->struct_mutex);
+		if (ret)
+			return ret;
+	}
 
-	/* As we do not have an associated fence register, we will force
-	 * a tiling change if we ever need to acquire one.
-	 */
-	obj->fence_dirty = false;
-	obj->fence_reg = I915_FENCE_REG_NONE;
-}
+	if (fence->vma && fence->vma != vma) {
+		/* Ensure that all userspace CPU access is completed before
+		 * stealing the fence.
+		 */
+		i915_gem_release_mmap(fence->vma->obj);
 
-static int
-i915_gem_object_wait_fence(struct drm_i915_gem_object *obj)
-{
-	return i915_gem_active_retire(&obj->last_fence,
-				      &obj->base.dev->struct_mutex);
+		fence->vma->fence = NULL;
+		fence->vma = NULL;
+
+		list_move(&fence->link, &fence->i915->mm.fence_list);
+	}
+
+	fence_write(fence, vma);
+
+	if (vma) {
+		if (fence->vma != vma) {
+			vma->fence = fence;
+			fence->vma = vma;
+		}
+
+		list_move_tail(&fence->link, &fence->i915->mm.fence_list);
+	}
+
+	return 0;
 }
 
 /**
- * i915_gem_object_put_fence - force-remove fence for an object
- * @obj: object to map through a fence reg
+ * i915_vma_put_fence - force-remove fence for a VMA
+ * @vma: vma to map linearly (not through a fence reg)
  *
  * This function force-removes any fence from the given object, which is useful
  * if the kernel wants to do untiled GTT access.
@@ -285,70 +286,40 @@ i915_gem_object_wait_fence(struct drm_i915_gem_object *obj)
  * 0 on success, negative error code on failure.
  */
 int
-i915_gem_object_put_fence(struct drm_i915_gem_object *obj)
+i915_vma_put_fence(struct i915_vma *vma)
 {
-	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
-	struct drm_i915_fence_reg *fence;
-	int ret;
-
-	ret = i915_gem_object_wait_fence(obj);
-	if (ret)
-		return ret;
+	struct drm_i915_fence_reg *fence = vma->fence;
 
-	if (obj->fence_reg == I915_FENCE_REG_NONE)
+	if (!fence)
 		return 0;
 
-	fence = &dev_priv->fence_regs[obj->fence_reg];
-
 	if (WARN_ON(fence->pin_count))
 		return -EBUSY;
 
-	i915_gem_object_fence_lost(obj);
-	i915_gem_object_update_fence(obj, fence, false);
-
-	return 0;
+	return fence_update(fence, NULL);
 }
 
-static struct drm_i915_fence_reg *
-i915_find_fence_reg(struct drm_device *dev)
+static struct drm_i915_fence_reg *fence_find(struct drm_i915_private *dev_priv)
 {
-	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct drm_i915_fence_reg *reg, *avail;
-	int i;
-
-	/* First try to find a free reg */
-	avail = NULL;
-	for (i = 0; i < dev_priv->num_fence_regs; i++) {
-		reg = &dev_priv->fence_regs[i];
-		if (!reg->obj)
-			return reg;
-
-		if (!reg->pin_count)
-			avail = reg;
-	}
-
-	if (avail == NULL)
-		goto deadlock;
+	struct drm_i915_fence_reg *fence;
 
-	/* None available, try to steal one or wait for a user to finish */
-	list_for_each_entry(reg, &dev_priv->mm.fence_list, link) {
-		if (reg->pin_count)
+	list_for_each_entry(fence, &dev_priv->mm.fence_list, link) {
+		if (fence->pin_count)
 			continue;
 
-		return reg;
+		return fence;
 	}
 
-deadlock:
 	/* Wait for completion of pending flips which consume fences */
-	if (intel_has_pending_fb_unpin(dev))
+	if (intel_has_pending_fb_unpin(&dev_priv->drm))
 		return ERR_PTR(-EAGAIN);
 
 	return ERR_PTR(-EDEADLK);
 }
 
 /**
- * i915_gem_object_get_fence - set up fencing for an object
- * @obj: object to map through a fence reg
+ * i915_vma_get_fence - set up fencing for a vma
+ * @vma: vma to map through a fence reg
  *
  * When mapping objects through the GTT, userspace wants to be able to write
  * to them without having to worry about swizzling if the object is tiled.
@@ -365,93 +336,27 @@ deadlock:
  * 0 on success, negative error code on failure.
  */
 int
-i915_gem_object_get_fence(struct drm_i915_gem_object *obj)
+i915_vma_get_fence(struct i915_vma *vma)
 {
-	struct drm_device *dev = obj->base.dev;
-	struct drm_i915_private *dev_priv = to_i915(dev);
-	bool enable = i915_gem_object_is_tiled(obj);
-	struct drm_i915_fence_reg *reg;
-	int ret;
-
-	/* Have we updated the tiling parameters upon the object and so
-	 * will need to serialise the write to the associated fence register?
-	 */
-	if (obj->fence_dirty) {
-		ret = i915_gem_object_wait_fence(obj);
-		if (ret)
-			return ret;
-	}
+	struct drm_i915_fence_reg *fence;
+	struct i915_vma *set = i915_gem_object_is_tiled(vma->obj) ? vma : NULL;
 
 	/* Just update our place in the LRU if our fence is getting reused. */
-	if (obj->fence_reg != I915_FENCE_REG_NONE) {
-		reg = &dev_priv->fence_regs[obj->fence_reg];
-		if (!obj->fence_dirty) {
-			list_move_tail(&reg->link, &dev_priv->mm.fence_list);
+	if (vma->fence) {
+		fence = vma->fence;
+		if (!fence->dirty) {
+			list_move_tail(&fence->link,
+				       &fence->i915->mm.fence_list);
 			return 0;
 		}
-	} else if (enable) {
-		reg = i915_find_fence_reg(dev);
-		if (IS_ERR(reg))
-			return PTR_ERR(reg);
-
-		if (reg->obj) {
-			struct drm_i915_gem_object *old = reg->obj;
-
-			ret = i915_gem_object_wait_fence(old);
-			if (ret)
-				return ret;
-
-			i915_gem_object_fence_lost(old);
-		}
+	} else if (set) {
+		fence = fence_find(to_i915(vma->vm->dev));
+		if (IS_ERR(fence))
+			return PTR_ERR(fence);
 	} else
 		return 0;
 
-	i915_gem_object_update_fence(obj, reg, enable);
-
-	return 0;
-}
-
-/**
- * i915_gem_object_pin_fence - pin fencing state
- * @obj: object to pin fencing for
- *
- * This pins the fencing state (whether tiled or untiled) to make sure the
- * object is ready to be used as a scanout target. Fencing status must be
- * synchronize first by calling i915_gem_object_get_fence():
- *
- * The resulting fence pin reference must be released again with
- * i915_gem_object_unpin_fence().
- *
- * Returns:
- *
- * True if the object has a fence, false otherwise.
- */
-bool
-i915_gem_object_pin_fence(struct drm_i915_gem_object *obj)
-{
-	if (obj->fence_reg != I915_FENCE_REG_NONE) {
-		to_i915(obj->base.dev)->fence_regs[obj->fence_reg].pin_count++;
-		return true;
-	} else
-		return false;
-}
-
-/**
- * i915_gem_object_unpin_fence - unpin fencing state
- * @obj: object to unpin fencing for
- *
- * This releases the fence pin reference acquired through
- * i915_gem_object_pin_fence. It will handle both objects with and without an
- * attached fence correctly, callers do not need to distinguish this.
- */
-void
-i915_gem_object_unpin_fence(struct drm_i915_gem_object *obj)
-{
-	if (obj->fence_reg != I915_FENCE_REG_NONE) {
-		struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
-		WARN_ON(dev_priv->fence_regs[obj->fence_reg].pin_count <= 0);
-		dev_priv->fence_regs[obj->fence_reg].pin_count--;
-	}
+	return fence_update(fence, set);
 }
 
 /**
@@ -473,12 +378,7 @@ void i915_gem_restore_fences(struct drm_device *dev)
 		 * Commit delayed tiling changes if we have an object still
 		 * attached to the fence, otherwise just clear the fence.
 		 */
-		if (reg->obj) {
-			i915_gem_object_update_fence(reg->obj, reg,
-						     i915_gem_object_get_tiling(reg->obj));
-		} else {
-			i915_gem_write_fence(dev, i, NULL);
-		}
+		fence_write(reg, reg->vma);
 	}
 }
 
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index f658c9e28893..d03f9180ce76 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -3339,6 +3339,7 @@ void i915_vma_destroy(struct i915_vma *vma)
 	GEM_BUG_ON(vma->node.allocated);
 	GEM_BUG_ON(i915_vma_is_active(vma));
 	GEM_BUG_ON(!i915_vma_is_closed(vma));
+	GEM_BUG_ON(vma->fence);
 
 	list_del(&vma->vm_link);
 	if (!i915_vma_is_ggtt(vma))
@@ -3374,6 +3375,7 @@ __i915_vma_create(struct drm_i915_gem_object *obj,
 	INIT_LIST_HEAD(&vma->exec_list);
 	for (i = 0; i < ARRAY_SIZE(vma->last_read); i++)
 		init_request_active(&vma->last_read[i], i915_vma_retire);
+	init_request_active(&vma->last_fence, NULL);
 	list_add(&vma->vm_link, &vm->unbound_list);
 	vma->vm = vm;
 	vma->obj = obj;
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index ed0a6c7a1883..4b67347834b5 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -38,7 +38,13 @@
 
 #include "i915_gem_request.h"
 
+#define I915_FENCE_REG_NONE -1
+#define I915_MAX_NUM_FENCES 32
+/* 32 fences + sign bit for FENCE_REG_NONE */
+#define I915_MAX_NUM_FENCE_BITS 6
+
 struct drm_i915_file_private;
+struct drm_i915_fence_reg;
 
 typedef uint32_t gen6_pte_t;
 typedef uint64_t gen8_pte_t;
@@ -174,6 +180,7 @@ struct i915_vma {
 	struct drm_mm_node node;
 	struct drm_i915_gem_object *obj;
 	struct i915_address_space *vm;
+	struct drm_i915_fence_reg *fence;
 	struct sg_table *pages;
 	void __iomem *iomap;
 	u64 size;
@@ -203,6 +210,7 @@ struct i915_vma {
 
 	unsigned int active;
 	struct i915_gem_active last_read[I915_NUM_ENGINES];
+	struct i915_gem_active last_fence;
 
 	/**
 	 * Support different GGTT views into the same object.
diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
index af70d4460a9e..a14b1e3d4c78 100644
--- a/drivers/gpu/drm/i915/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
@@ -116,13 +116,39 @@ i915_tiling_ok(struct drm_device *dev, int stride, int size, int tiling_mode)
 	return true;
 }
 
+static bool i915_vma_fence_prepare(struct i915_vma *vma, int tiling_mode)
+{
+	struct drm_i915_private *dev_priv = to_i915(vma->vm->dev);
+	u32 size;
+
+	if (!i915_vma_is_map_and_fenceable(vma))
+		return true;
+
+	if (INTEL_GEN(dev_priv) == 3) {
+		if (vma->node.start & ~I915_FENCE_START_MASK)
+			return false;
+	} else {
+		if (vma->node.start & ~I830_FENCE_START_MASK)
+			return false;
+	}
+
+	size = i915_gem_get_ggtt_size(dev_priv, vma->size, tiling_mode);
+	if (vma->node.size < size)
+		return false;
+
+	if (vma->node.start & (size - 1))
+		return false;
+
+	return true;
+}
+
 /* Make the current GTT allocation valid for the change in tiling. */
 static int
 i915_gem_object_fence_prepare(struct drm_i915_gem_object *obj, int tiling_mode)
 {
 	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
 	struct i915_vma *vma;
-	u32 size;
+	int ret;
 
 	if (tiling_mode == I915_TILING_NONE)
 		return 0;
@@ -130,32 +156,16 @@ i915_gem_object_fence_prepare(struct drm_i915_gem_object *obj, int tiling_mode)
 	if (INTEL_GEN(dev_priv) >= 4)
 		return 0;
 
-	vma = i915_gem_object_to_ggtt(obj, NULL);
-	if (!vma)
-		return 0;
-
-	if (!i915_vma_is_map_and_fenceable(vma))
-		return 0;
+	list_for_each_entry(vma, &obj->vma_list, obj_link) {
+		if (i915_vma_fence_prepare(vma, tiling_mode))
+			continue;
 
-	if (IS_GEN3(dev_priv)) {
-		if (vma->node.start & ~I915_FENCE_START_MASK)
-			goto bad;
-	} else {
-		if (vma->node.start & ~I830_FENCE_START_MASK)
-			goto bad;
+		ret = i915_vma_unbind(vma);
+		if (ret)
+			return ret;
 	}
 
-	size = i915_gem_get_ggtt_size(dev_priv, vma->size, tiling_mode);
-	if (vma->node.size < size)
-		goto bad;
-
-	if (vma->node.start & (size - 1))
-		goto bad;
-
 	return 0;
-
-bad:
-	return i915_vma_unbind(vma);
 }
 
 /**
@@ -248,6 +258,8 @@ i915_gem_set_tiling(struct drm_device *dev, void *data,
 
 		err = i915_gem_object_fence_prepare(obj, args->tiling_mode);
 		if (!err) {
+			struct i915_vma *vma;
+
 			if (obj->pages &&
 			    obj->madv == I915_MADV_WILLNEED &&
 			    dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) {
@@ -257,11 +269,12 @@ i915_gem_set_tiling(struct drm_device *dev, void *data,
 					i915_gem_object_pin_pages(obj);
 			}
 
-			obj->fence_dirty =
-				!i915_gem_active_is_idle(&obj->last_fence,
-							 &dev->struct_mutex) ||
-				obj->fence_reg != I915_FENCE_REG_NONE;
+			list_for_each_entry(vma, &obj->vma_list, obj_link) {
+				if (!vma->fence)
+					continue;
 
+				vma->fence->dirty = true;
+			}
 			obj->tiling_and_stride =
 				args->stride | args->tiling_mode;
 
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 404ae3356beb..0bed4ac63720 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -842,7 +842,7 @@ static void capture_bo(struct drm_i915_error_buffer *err,
 	err->gtt_offset = vma->node.start;
 	err->read_domains = obj->base.read_domains;
 	err->write_domain = obj->base.write_domain;
-	err->fence_reg = obj->fence_reg;
+	err->fence_reg = vma->fence ? vma->fence->id : -1;
 	err->tiling = i915_gem_object_get_tiling(obj);
 	err->dirty = obj->dirty;
 	err->purgeable = obj->madv != I915_MADV_WILLNEED;
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index dab306dae018..bf575d24a535 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -2188,7 +2188,6 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, unsigned int rotation)
 	struct i915_ggtt_view view;
 	struct i915_vma *vma;
 	u32 alignment;
-	int ret;
 
 	WARN_ON(!mutex_is_locked(&dev->struct_mutex));
 
@@ -2214,43 +2213,33 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, unsigned int rotation)
 	intel_runtime_pm_get(dev_priv);
 
 	vma = i915_gem_object_pin_to_display_plane(obj, alignment, &view);
-	if (IS_ERR(vma)) {
-		ret = PTR_ERR(vma);
-		goto err_pm;
-	}
+	if (IS_ERR(vma))
+		goto err;
 
-	/* Install a fence for tiled scan-out. Pre-i965 always needs a
-	 * fence, whereas 965+ only requires a fence if using
-	 * framebuffer compression.  For simplicity, we always install
-	 * a fence as the cost is not that onerous.
-	 */
 	if (i915_vma_is_map_and_fenceable(vma)) {
-		ret = i915_gem_object_get_fence(obj);
-		if (ret == -EDEADLK) {
-			/*
-			 * -EDEADLK means there are no free fences
-			 * no pending flips.
-			 *
-			 * This is propagated to atomic, but it uses
-			 * -EDEADLK to force a locking recovery, so
-			 * change the returned error to -EBUSY.
-			 */
-			ret = -EBUSY;
-			goto err_unpin;
-		} else if (ret)
-			goto err_unpin;
-
-		i915_gem_object_pin_fence(obj);
+		/* Install a fence for tiled scan-out. Pre-i965 always needs a
+		 * fence, whereas 965+ only requires a fence if using
+		 * framebuffer compression.  For simplicity, we always, when
+		 * possible, install a fence as the cost is not that onerous.
+		 *
+		 * If we fail to fence the tiled scanout, then either the
+		 * modeset will reject the change (which is highly unlikely as
+		 * the affected systems, all but one, do not have unmappable
+		 * space) or we will not be able to enable full powersaving
+		 * techniques (also likely not to apply due to various limits
+		 * FBC and the like impose on the size of the buffer, which
+		 * presumably we violated anyway with this unmappable buffer).
+		 * Anyway, it is presumably better to stumble onwards with
+		 * something and try to run the system in a "less than optimal"
+		 * mode that matches the user configuration.
+		 */
+		if (i915_vma_get_fence(vma) == 0)
+			i915_vma_pin_fence(vma);
 	}
 
+err:
 	intel_runtime_pm_put(dev_priv);
 	return vma;
-
-err_unpin:
-	i915_gem_object_unpin_from_display_plane(vma);
-err_pm:
-	intel_runtime_pm_put(dev_priv);
-	return ERR_PTR(ret);
 }
 
 void intel_unpin_fb_obj(struct drm_framebuffer *fb, unsigned int rotation)
@@ -2264,9 +2253,7 @@ void intel_unpin_fb_obj(struct drm_framebuffer *fb, unsigned int rotation)
 	intel_fill_fb_ggtt_view(&view, fb, rotation);
 	vma = i915_gem_object_to_ggtt(obj, &view);
 
-	if (i915_vma_is_map_and_fenceable(vma))
-		i915_gem_object_unpin_fence(obj);
-
+	i915_vma_unpin_fence(vma);
 	i915_gem_object_unpin_from_display_plane(vma);
 }
 
diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c
index e122052c4081..40bf2e4c804d 100644
--- a/drivers/gpu/drm/i915/intel_fbc.c
+++ b/drivers/gpu/drm/i915/intel_fbc.c
@@ -709,6 +709,14 @@ static bool intel_fbc_hw_tracking_covers_screen(struct intel_crtc *crtc)
 	return effective_w <= max_w && effective_h <= max_h;
 }
 
+/* XXX replace me when we have VMA tracking for intel_plane_state */
+static int get_fence_id(struct drm_framebuffer *fb)
+{
+	struct i915_vma *vma = i915_gem_object_to_ggtt(intel_fb_obj(fb), NULL);
+
+	return vma && vma->fence ? vma->fence->id : I915_FENCE_REG_NONE;
+}
+
 static void intel_fbc_update_state_cache(struct intel_crtc *crtc,
 					 struct intel_crtc_state *crtc_state,
 					 struct intel_plane_state *plane_state)
@@ -740,7 +748,7 @@ static void intel_fbc_update_state_cache(struct intel_crtc *crtc,
 		cache->fb.ilk_ggtt_offset = i915_gem_object_ggtt_offset(obj, NULL);
 	cache->fb.pixel_format = fb->pixel_format;
 	cache->fb.stride = fb->pitches[0];
-	cache->fb.fence_reg = obj->fence_reg;
+	cache->fb.fence_reg = get_fence_id(fb);
 	cache->fb.tiling_mode = i915_gem_object_get_tiling(obj);
 }
 
diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c
index 87689df7f514..a480323446fe 100644
--- a/drivers/gpu/drm/i915/intel_overlay.c
+++ b/drivers/gpu/drm/i915/intel_overlay.c
@@ -760,7 +760,7 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay,
 	if (IS_ERR(vma))
 		return PTR_ERR(vma);
 
-	ret = i915_gem_object_put_fence(new_bo);
+	ret = i915_vma_put_fence(vma);
 	if (ret)
 		goto out_unpin;
 
-- 
2.8.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 16/22] drm/i915: Choose partial chunksize based on tile row size
  2016-08-16 10:42 An almost complete set of reviewed patches for tiled partial Chris Wilson
                   ` (14 preceding siblings ...)
  2016-08-16 10:42 ` [PATCH 15/22] drm/i915: Move fence tracking from object to vma Chris Wilson
@ 2016-08-16 10:42 ` Chris Wilson
  2016-08-16 10:42 ` [PATCH 17/22] drm/i915: Fix partial GGTT faulting Chris Wilson
                   ` (6 subsequent siblings)
  22 siblings, 0 replies; 40+ messages in thread
From: Chris Wilson @ 2016-08-16 10:42 UTC (permalink / raw)
  To: intel-gfx

In order to support setting up fences for partial mappings of an object,
we have to align those mappings with the fence. The minimum chunksize we
choose is at least the size of a single tile row.

v2: Make minimum chunk size a define for later use

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_gem.c | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index e7a193a91863..48e4756e89cc 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1663,6 +1663,16 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
 	return 0;
 }
 
+static unsigned int tile_row_pages(struct drm_i915_gem_object *obj)
+{
+	u64 size;
+
+	size = i915_gem_object_get_stride(obj);
+	size *= i915_gem_object_get_tiling(obj) == I915_TILING_Y ? 32 : 8;
+
+	return size >> PAGE_SHIFT;
+}
+
 /**
  * i915_gem_fault - fault a page into the GTT
  * @area: CPU VMA in question
@@ -1681,6 +1691,7 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
  */
 int i915_gem_fault(struct vm_area_struct *area, struct vm_fault *vmf)
 {
+#define MIN_CHUNK_PAGES ((1 << 20) >> PAGE_SHIFT) /* 1 MiB */
 	struct drm_i915_gem_object *obj = to_intel_bo(area->vm_private_data);
 	struct drm_device *dev = obj->base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
@@ -1722,7 +1733,11 @@ int i915_gem_fault(struct vm_area_struct *area, struct vm_fault *vmf)
 	/* Use a partial view if the object is bigger than the aperture. */
 	if (obj->base.size >= ggtt->mappable_end &&
 	    !i915_gem_object_is_tiled(obj)) {
-		static const unsigned int chunk_size = 256; // 1 MiB
+		unsigned int chunk_size;
+
+		chunk_size = MIN_CHUNK_PAGES;
+		if (i915_gem_object_is_tiled(obj))
+			chunk_size = max(chunk_size, tile_row_pages(obj));
 
 		memset(&view, 0, sizeof(view));
 		view.type = I915_GGTT_VIEW_PARTIAL;
-- 
2.8.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 17/22] drm/i915: Fix partial GGTT faulting
  2016-08-16 10:42 An almost complete set of reviewed patches for tiled partial Chris Wilson
                   ` (15 preceding siblings ...)
  2016-08-16 10:42 ` [PATCH 16/22] drm/i915: Choose partial chunksize based on tile row size Chris Wilson
@ 2016-08-16 10:42 ` Chris Wilson
  2016-08-16 10:42 ` [PATCH 18/22] drm/i915: Choose not to evict faultable objects from the GGTT Chris Wilson
                   ` (5 subsequent siblings)
  22 siblings, 0 replies; 40+ messages in thread
From: Chris Wilson @ 2016-08-16 10:42 UTC (permalink / raw)
  To: intel-gfx

We want to always use the partial VMA as a fallback for a failure to
bind the object into the GGTT. This extends the support partial objects
in the GGTT to cover everything, not just objects too large.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_gem.c | 70 +++++++++++++++++++++--------------------
 1 file changed, 36 insertions(+), 34 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 48e4756e89cc..74ea0926ad70 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1696,7 +1696,6 @@ int i915_gem_fault(struct vm_area_struct *area, struct vm_fault *vmf)
 	struct drm_device *dev = obj->base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
-	struct i915_ggtt_view view = i915_ggtt_view_normal;
 	bool write = !!(vmf->flags & FAULT_FLAG_WRITE);
 	struct i915_vma *vma;
 	pgoff_t page_offset;
@@ -1730,27 +1729,31 @@ int i915_gem_fault(struct vm_area_struct *area, struct vm_fault *vmf)
 		goto err_unlock;
 	}
 
-	/* Use a partial view if the object is bigger than the aperture. */
-	if (obj->base.size >= ggtt->mappable_end &&
-	    !i915_gem_object_is_tiled(obj)) {
+	/* Now pin it into the GTT as needed */
+	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
+				       PIN_MAPPABLE | PIN_NONBLOCK);
+	if (IS_ERR(vma)) {
+		struct i915_ggtt_view partial;
 		unsigned int chunk_size;
 
+		/* Use a partial view if it is bigger than available space */
 		chunk_size = MIN_CHUNK_PAGES;
 		if (i915_gem_object_is_tiled(obj))
 			chunk_size = max(chunk_size, tile_row_pages(obj));
 
-		memset(&view, 0, sizeof(view));
-		view.type = I915_GGTT_VIEW_PARTIAL;
-		view.params.partial.offset = rounddown(page_offset, chunk_size);
-		view.params.partial.size =
+		memset(&partial, 0, sizeof(partial));
+		partial.type = I915_GGTT_VIEW_PARTIAL;
+		partial.params.partial.offset =
+			rounddown(page_offset, chunk_size);
+		partial.params.partial.size =
 			min_t(unsigned int,
 			      chunk_size,
 			      (area->vm_end - area->vm_start) / PAGE_SIZE -
-			      view.params.partial.offset);
-	}
+			      partial.params.partial.offset);
 
-	/* Now pin it into the GTT if needed */
-	vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, PIN_MAPPABLE);
+		vma = i915_gem_object_ggtt_pin(obj, &partial, 0, 0,
+					       PIN_MAPPABLE);
+	}
 	if (IS_ERR(vma)) {
 		ret = PTR_ERR(vma);
 		goto err_unlock;
@@ -1768,26 +1771,7 @@ int i915_gem_fault(struct vm_area_struct *area, struct vm_fault *vmf)
 	pfn = ggtt->mappable_base + i915_ggtt_offset(vma);
 	pfn >>= PAGE_SHIFT;
 
-	if (unlikely(view.type == I915_GGTT_VIEW_PARTIAL)) {
-		/* Overriding existing pages in partial view does not cause
-		 * us any trouble as TLBs are still valid because the fault
-		 * is due to userspace losing part of the mapping or never
-		 * having accessed it before (at this partials' range).
-		 */
-		unsigned long base = area->vm_start +
-				     (view.params.partial.offset << PAGE_SHIFT);
-		unsigned int i;
-
-		for (i = 0; i < view.params.partial.size; i++) {
-			ret = vm_insert_pfn(area,
-					    base + i * PAGE_SIZE,
-					    pfn + i);
-			if (ret)
-				break;
-		}
-
-		obj->fault_mappable = true;
-	} else {
+	if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) {
 		if (!obj->fault_mappable) {
 			unsigned long size =
 				min_t(unsigned long,
@@ -1803,13 +1787,31 @@ int i915_gem_fault(struct vm_area_struct *area, struct vm_fault *vmf)
 				if (ret)
 					break;
 			}
-
-			obj->fault_mappable = true;
 		} else
 			ret = vm_insert_pfn(area,
 					    (unsigned long)vmf->virtual_address,
 					    pfn + page_offset);
+	} else {
+		/* Overriding existing pages in partial view does not cause
+		 * us any trouble as TLBs are still valid because the fault
+		 * is due to userspace losing part of the mapping or never
+		 * having accessed it before (at this partials' range).
+		 */
+		const struct i915_ggtt_view *view = &vma->ggtt_view;
+		unsigned long base = area->vm_start +
+			(view->params.partial.offset << PAGE_SHIFT);
+		unsigned int i;
+
+		for (i = 0; i < view->params.partial.size; i++) {
+			ret = vm_insert_pfn(area,
+					    base + i * PAGE_SIZE,
+					    pfn + i);
+			if (ret)
+				break;
+		}
 	}
+
+	obj->fault_mappable = true;
 err_unpin:
 	__i915_vma_unpin(vma);
 err_unlock:
-- 
2.8.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 18/22] drm/i915: Choose not to evict faultable objects from the GGTT
  2016-08-16 10:42 An almost complete set of reviewed patches for tiled partial Chris Wilson
                   ` (16 preceding siblings ...)
  2016-08-16 10:42 ` [PATCH 17/22] drm/i915: Fix partial GGTT faulting Chris Wilson
@ 2016-08-16 10:42 ` Chris Wilson
  2016-08-16 11:31   ` Joonas Lahtinen
  2016-08-16 10:42 ` [PATCH 19/22] drm/i915: Fallback to using unmappable memory for scanout Chris Wilson
                   ` (4 subsequent siblings)
  22 siblings, 1 reply; 40+ messages in thread
From: Chris Wilson @ 2016-08-16 10:42 UTC (permalink / raw)
  To: intel-gfx

Often times we do not want to evict mapped objects from the GGTT as
these are quite expensive to teardown and frequently reused (causing an
equally, if not more so, expensive setup). In particular, when faulting
in a new object we want to avoid evicting an active object, or else we
may trigger a page-fault-of-doom as we ping-pong between evicting two
objects.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c       | 18 ++++++++++++++++--
 drivers/gpu/drm/i915/i915_gem_evict.c |  7 +++++--
 drivers/gpu/drm/i915/i915_gem_gtt.h   |  1 +
 3 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 74ea0926ad70..e28d283256f0 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1700,6 +1700,7 @@ int i915_gem_fault(struct vm_area_struct *area, struct vm_fault *vmf)
 	struct i915_vma *vma;
 	pgoff_t page_offset;
 	unsigned long pfn;
+	unsigned int flags;
 	int ret;
 
 	/* We don't use vmf->pgoff since that has the fake offset */
@@ -1729,9 +1730,16 @@ int i915_gem_fault(struct vm_area_struct *area, struct vm_fault *vmf)
 		goto err_unlock;
 	}
 
+	/* If the object is smaller than a couple of partial vma, it is
+	 * not worth only creating a single partial vma - we may as well
+	 * clear enough space for the full object.
+	 */
+	flags = PIN_MAPPABLE;
+	if (obj->base.size > 2 * MIN_CHUNK_PAGES << PAGE_SHIFT)
+		flags |= PIN_NONBLOCK | PIN_NONFAULT;
+
 	/* Now pin it into the GTT as needed */
-	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
-				       PIN_MAPPABLE | PIN_NONBLOCK);
+	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, flags);
 	if (IS_ERR(vma)) {
 		struct i915_ggtt_view partial;
 		unsigned int chunk_size;
@@ -1751,6 +1759,12 @@ int i915_gem_fault(struct vm_area_struct *area, struct vm_fault *vmf)
 			      (area->vm_end - area->vm_start) / PAGE_SIZE -
 			      partial.params.partial.offset);
 
+		/* If the partial covers the entire object, just create a
+		 * normal VMA.
+		 */
+		if (chunk_size <= obj->base.size >> PAGE_SHIFT)
+			partial.type = I915_GGTT_VIEW_NORMAL;
+
 		vma = i915_gem_object_ggtt_pin(obj, &partial, 0, 0,
 					       PIN_MAPPABLE);
 	}
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index f76c06e92677..815d5fbe07ac 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -47,7 +47,7 @@ gpu_is_idle(struct drm_i915_private *dev_priv)
 }
 
 static bool
-mark_free(struct i915_vma *vma, struct list_head *unwind)
+mark_free(struct i915_vma *vma, unsigned int flags, struct list_head *unwind)
 {
 	if (i915_vma_is_pinned(vma))
 		return false;
@@ -55,6 +55,9 @@ mark_free(struct i915_vma *vma, struct list_head *unwind)
 	if (WARN_ON(!list_empty(&vma->exec_list)))
 		return false;
 
+	if (flags & PIN_NONFAULT && vma->obj->fault_mappable)
+		return false;
+
 	list_add(&vma->exec_list, unwind);
 	return drm_mm_scan_add_block(&vma->node);
 }
@@ -129,7 +132,7 @@ search_again:
 	phase = phases;
 	do {
 		list_for_each_entry(vma, *phase, vm_link)
-			if (mark_free(vma, &eviction_list))
+			if (mark_free(vma, flags, &eviction_list))
 				goto found;
 	} while (*++phase);
 
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index 4b67347834b5..52fded446390 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -639,6 +639,7 @@ void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj);
 #define PIN_NONBLOCK		BIT(0)
 #define PIN_MAPPABLE		BIT(1)
 #define PIN_ZONE_4G		BIT(2)
+#define PIN_NONFAULT		BIT(3)
 
 #define PIN_MBZ			BIT(5) /* I915_VMA_PIN_OVERFLOW */
 #define PIN_GLOBAL		BIT(6) /* I915_VMA_GLOBAL_BIND */
-- 
2.8.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 19/22] drm/i915: Fallback to using unmappable memory for scanout
  2016-08-16 10:42 An almost complete set of reviewed patches for tiled partial Chris Wilson
                   ` (17 preceding siblings ...)
  2016-08-16 10:42 ` [PATCH 18/22] drm/i915: Choose not to evict faultable objects from the GGTT Chris Wilson
@ 2016-08-16 10:42 ` Chris Wilson
  2016-08-16 10:42 ` [PATCH 20/22] drm/i915: Track display alignment on VMA Chris Wilson
                   ` (3 subsequent siblings)
  22 siblings, 0 replies; 40+ messages in thread
From: Chris Wilson @ 2016-08-16 10:42 UTC (permalink / raw)
  To: intel-gfx; +Cc: Daniel Vetter

The existing ABI says that scanouts are pinned into the mappable region
so that legacy clients (e.g. old Xorg or plymouthd) can write directly
into the scanout through a GTT mapping. However if the surface does not
fit into the mappable region, we are better off just trying to fit it
anywhere and hoping for the best. (Any userspace that is capable of
using ginormous scanouts is also likely not to rely on pure GTT
updates.) With the partial vma fault support, we are no longer
restricted to only using scanouts that we can pin (though it is still
preferred for performance reasons and for powersaving features like
FBC).

v2: Skip fence pinning when not mappable.
v3: Add a comment to explain the possible ramifications of not being
    able to use fences for unmappable scanouts.
v4: Rebase to skip over some local patches
v5: Rebase to defer until after we have unmappable GTT fault support

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Deepak S <deepak.s@linux.intel.com>
Cc: Damien Lespiau <damien.lespiau@intel.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_gem.c  | 14 ++++++++++----
 drivers/gpu/drm/i915/intel_fbc.c |  4 ++++
 2 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index e28d283256f0..9455e110fb1b 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3570,11 +3570,17 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
 
 	/* As the user may map the buffer once pinned in the display plane
 	 * (e.g. libkms for the bootup splash), we have to ensure that we
-	 * always use map_and_fenceable for all scanout buffers.
+	 * always use map_and_fenceable for all scanout buffers. However,
+	 * it may simply be too big to fit into mappable, in which case
+	 * put it anyway and hope that userspace can cope (but always first
+	 * try to preserve the existing ABI).
 	 */
-	vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment,
-				       view->type == I915_GGTT_VIEW_NORMAL ?
-				       PIN_MAPPABLE : 0);
+	vma = ERR_PTR(-ENOSPC);
+	if (view->type == I915_GGTT_VIEW_NORMAL)
+		vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment,
+					       PIN_MAPPABLE | PIN_NONBLOCK);
+	if (IS_ERR(vma))
+		vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, 0);
 	if (IS_ERR(vma))
 		goto err_unpin_display;
 
diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c
index 40bf2e4c804d..37415f96f906 100644
--- a/drivers/gpu/drm/i915/intel_fbc.c
+++ b/drivers/gpu/drm/i915/intel_fbc.c
@@ -776,6 +776,10 @@ static bool intel_fbc_can_activate(struct intel_crtc *crtc)
 
 	/* The use of a CPU fence is mandatory in order to detect writes
 	 * by the CPU to the scanout and trigger updates to the FBC.
+	 *
+	 * Note that is possible for a tiled surface to be unmappable (and
+	 * so have no fence associated with it) due to aperture constaints
+	 * at the time of pinning.
 	 */
 	if (cache->fb.tiling_mode != I915_TILING_X ||
 	    cache->fb.fence_reg == I915_FENCE_REG_NONE) {
-- 
2.8.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 20/22] drm/i915: Track display alignment on VMA
  2016-08-16 10:42 An almost complete set of reviewed patches for tiled partial Chris Wilson
                   ` (18 preceding siblings ...)
  2016-08-16 10:42 ` [PATCH 19/22] drm/i915: Fallback to using unmappable memory for scanout Chris Wilson
@ 2016-08-16 10:42 ` Chris Wilson
  2016-08-16 10:42 ` [PATCH 21/22] drm/i915: Bump the inactive tracking for all VMA accessed Chris Wilson
                   ` (2 subsequent siblings)
  22 siblings, 0 replies; 40+ messages in thread
From: Chris Wilson @ 2016-08-16 10:42 UTC (permalink / raw)
  To: intel-gfx

When using the aliasing ppgtt and pageflipping with the shrinker/eviction
active, we note that we often have to rebind the backbuffer before
flipping onto the scanout because it has an invalid alignment. If we
store the worst-case alignment required for a VMA, we can avoid having
to rebind at critical junctures.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_gem.c     | 21 ++++++++-------------
 drivers/gpu/drm/i915/i915_gem_gtt.h |  1 +
 2 files changed, 9 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 9455e110fb1b..a76dffd3fbd7 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3039,7 +3039,6 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 	struct drm_i915_private *dev_priv = to_i915(vma->vm->dev);
 	struct drm_i915_gem_object *obj = vma->obj;
 	u64 start, end;
-	u64 min_alignment;
 	int ret;
 
 	GEM_BUG_ON(vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND));
@@ -3050,17 +3049,10 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 		size = i915_gem_get_ggtt_size(dev_priv, size,
 					      i915_gem_object_get_tiling(obj));
 
-	min_alignment =
-		i915_gem_get_ggtt_alignment(dev_priv, size,
-					    i915_gem_object_get_tiling(obj),
-					    flags & PIN_MAPPABLE);
-	if (alignment == 0)
-		alignment = min_alignment;
-	if (alignment & (min_alignment - 1)) {
-		DRM_DEBUG("Invalid object alignment requested %llu, minimum %llu\n",
-			  alignment, min_alignment);
-		return -EINVAL;
-	}
+	alignment = max(max(alignment, vma->display_alignment),
+			i915_gem_get_ggtt_alignment(dev_priv, size,
+						    i915_gem_object_get_tiling(obj),
+						    flags & PIN_MAPPABLE));
 
 	start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0;
 
@@ -3584,6 +3576,8 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
 	if (IS_ERR(vma))
 		goto err_unpin_display;
 
+	vma->display_alignment = max_t(u64, vma->display_alignment, alignment);
+
 	WARN_ON(obj->pin_display > i915_vma_pin_count(vma));
 
 	i915_gem_object_flush_cpu_write_domain(obj);
@@ -3614,7 +3608,8 @@ i915_gem_object_unpin_from_display_plane(struct i915_vma *vma)
 	if (WARN_ON(vma->obj->pin_display == 0))
 		return;
 
-	vma->obj->pin_display--;
+	if (--vma->obj->pin_display == 0)
+		vma->display_alignment = 0;
 
 	i915_vma_unpin(vma);
 	WARN_ON(vma->obj->pin_display > i915_vma_pin_count(vma));
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index 52fded446390..396c467e4323 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -184,6 +184,7 @@ struct i915_vma {
 	struct sg_table *pages;
 	void __iomem *iomap;
 	u64 size;
+	u64 display_alignment;
 
 	unsigned int flags;
 	/**
-- 
2.8.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 21/22] drm/i915: Bump the inactive tracking for all VMA accessed
  2016-08-16 10:42 An almost complete set of reviewed patches for tiled partial Chris Wilson
                   ` (19 preceding siblings ...)
  2016-08-16 10:42 ` [PATCH 20/22] drm/i915: Track display alignment on VMA Chris Wilson
@ 2016-08-16 10:42 ` Chris Wilson
  2016-08-16 11:20   ` Joonas Lahtinen
  2016-08-16 10:42 ` [PATCH 22/22] drm/i915: Stop discarding GTT cache-domain on unbind vma Chris Wilson
  2016-08-16 11:25 ` ✗ Ro.CI.BAT: failure for series starting with [01/22] drm/i915: Cache kmap between relocations Patchwork
  22 siblings, 1 reply; 40+ messages in thread
From: Chris Wilson @ 2016-08-16 10:42 UTC (permalink / raw)
  To: intel-gfx

We track the LRU access for eviction and bump the last access for the
user GGTT on set-to-gtt. When we do so we need to not only bump the
primary GGTT VMA but all partials as well. Similarly we want to
bump the last access tracking for when unpinning an object from the
scanout so that they do not get promptly evicted and hopefully remain
available for reuse on the next frame.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c | 29 +++++++++++++++++++++++------
 1 file changed, 23 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index a76dffd3fbd7..ab3c96f09050 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3242,6 +3242,24 @@ i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj)
 					    I915_GEM_DOMAIN_CPU);
 }
 
+static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
+{
+	struct i915_vma *vma;
+
+	list_for_each_entry(vma, &obj->vma_list, obj_link) {
+		if (!i915_vma_is_ggtt(vma))
+			continue;
+
+		if (i915_vma_is_active(vma))
+			continue;
+
+		if (!drm_mm_node_allocated(&vma->node))
+			continue;
+
+		list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
+	}
+}
+
 /**
  * Moves a single object to the GTT read, and possibly write domain.
  * @obj: object to act on
@@ -3254,7 +3272,6 @@ int
 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
 {
 	uint32_t old_write_domain, old_read_domains;
-	struct i915_vma *vma;
 	int ret;
 
 	ret = i915_gem_object_wait_rendering(obj, !write);
@@ -3304,11 +3321,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
 					    old_write_domain);
 
 	/* And bump the LRU for this access */
-	vma = i915_gem_object_to_ggtt(obj, NULL);
-	if (vma &&
-	    drm_mm_node_allocated(&vma->node) &&
-	    !i915_vma_is_active(vma))
-		list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
+	i915_gem_object_bump_inactive_ggtt(obj);
 
 	return 0;
 }
@@ -3611,6 +3624,10 @@ i915_gem_object_unpin_from_display_plane(struct i915_vma *vma)
 	if (--vma->obj->pin_display == 0)
 		vma->display_alignment = 0;
 
+	/* Bump the LRU to try and avoid premature eviction whilst flipping  */
+	if (!i915_vma_is_active(vma))
+		list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
+
 	i915_vma_unpin(vma);
 	WARN_ON(vma->obj->pin_display > i915_vma_pin_count(vma));
 }
-- 
2.8.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 22/22] drm/i915: Stop discarding GTT cache-domain on unbind vma
  2016-08-16 10:42 An almost complete set of reviewed patches for tiled partial Chris Wilson
                   ` (20 preceding siblings ...)
  2016-08-16 10:42 ` [PATCH 21/22] drm/i915: Bump the inactive tracking for all VMA accessed Chris Wilson
@ 2016-08-16 10:42 ` Chris Wilson
  2016-08-16 11:25 ` ✗ Ro.CI.BAT: failure for series starting with [01/22] drm/i915: Cache kmap between relocations Patchwork
  22 siblings, 0 replies; 40+ messages in thread
From: Chris Wilson @ 2016-08-16 10:42 UTC (permalink / raw)
  To: intel-gfx; +Cc: Akash Goel

Since commit 43566dedde54 ("drm/i915: Broaden application of
set-domain(GTT)") we allowed objects to be in the GTT domain, but unbound.
Therefore removing the GTT cache domain when removing the GGTT vma is no
longer semantically correct.

An unfortunate side-effect is we lose the wondrously named
i915_gem_object_finish_gtt(), not to be confused with
i915_gem_gtt_finish_object()!

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Akash Goel <akash.goel@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_gem.c | 26 +++-----------------------
 1 file changed, 3 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index ab3c96f09050..aef87ee05cc1 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2846,27 +2846,6 @@ i915_gem_object_sync(struct drm_i915_gem_object *obj,
 	return 0;
 }
 
-static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj)
-{
-	u32 old_write_domain, old_read_domains;
-
-	/* Force a pagefault for domain tracking on next user access */
-	i915_gem_release_mmap(obj);
-
-	if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0)
-		return;
-
-	old_read_domains = obj->base.read_domains;
-	old_write_domain = obj->base.write_domain;
-
-	obj->base.read_domains &= ~I915_GEM_DOMAIN_GTT;
-	obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT;
-
-	trace_i915_gem_object_change_domain(obj,
-					    old_read_domains,
-					    old_write_domain);
-}
-
 static void __i915_vma_iounmap(struct i915_vma *vma)
 {
 	GEM_BUG_ON(i915_vma_is_pinned(vma));
@@ -2922,13 +2901,14 @@ int i915_vma_unbind(struct i915_vma *vma)
 	GEM_BUG_ON(!obj->pages);
 
 	if (i915_vma_is_map_and_fenceable(vma)) {
-		i915_gem_object_finish_gtt(obj);
-
 		/* release the fence reg _after_ flushing */
 		ret = i915_vma_put_fence(vma);
 		if (ret)
 			return ret;
 
+		/* Force a pagefault for domain tracking on next user access */
+		i915_gem_release_mmap(obj);
+
 		__i915_vma_iounmap(vma);
 		vma->flags &= ~I915_VMA_CAN_FENCE;
 	}
-- 
2.8.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 40+ messages in thread

* Re: [PATCH 21/22] drm/i915: Bump the inactive tracking for all VMA accessed
  2016-08-16 10:42 ` [PATCH 21/22] drm/i915: Bump the inactive tracking for all VMA accessed Chris Wilson
@ 2016-08-16 11:20   ` Joonas Lahtinen
  0 siblings, 0 replies; 40+ messages in thread
From: Joonas Lahtinen @ 2016-08-16 11:20 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On ti, 2016-08-16 at 11:42 +0100, Chris Wilson wrote:
> We track the LRU access for eviction and bump the last access for the
> user GGTT on set-to-gtt. When we do so we need to not only bump the
> primary GGTT VMA but all partials as well. Similarly we want to
> bump the last access tracking for when unpinning an object from the
> scanout so that they do not get promptly evicted and hopefully remain
> available for reuse on the next frame.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 40+ messages in thread

* ✗ Ro.CI.BAT: failure for series starting with [01/22] drm/i915: Cache kmap between relocations
  2016-08-16 10:42 An almost complete set of reviewed patches for tiled partial Chris Wilson
                   ` (21 preceding siblings ...)
  2016-08-16 10:42 ` [PATCH 22/22] drm/i915: Stop discarding GTT cache-domain on unbind vma Chris Wilson
@ 2016-08-16 11:25 ` Patchwork
  22 siblings, 0 replies; 40+ messages in thread
From: Patchwork @ 2016-08-16 11:25 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [01/22] drm/i915: Cache kmap between relocations
URL   : https://patchwork.freedesktop.org/series/11156/
State : failure

== Summary ==

Series 11156v1 Series without cover letter
http://patchwork.freedesktop.org/api/1.0/series/11156/revisions/1/mbox

Test drv_module_reload_basic:
                pass       -> FAIL       (ro-ilk1-i5-650)
Test gem_exec_parallel:
        Subgroup basic:
                pass       -> FAIL       (ro-ilk1-i5-650)
Test kms_cursor_legacy:
        Subgroup basic-flip-vs-cursor-legacy:
                pass       -> FAIL       (ro-skl3-i5-6260u)
                pass       -> FAIL       (ro-bdw-i5-5250u)
        Subgroup basic-flip-vs-cursor-varying-size:
                fail       -> PASS       (ro-bdw-i5-5250u)
Test kms_pipe_crc_basic:
        Subgroup suspend-read-crc-pipe-c:
                pass       -> DMESG-WARN (ro-bdw-i7-5600u)

fi-kbl-qkkr      total:244  pass:185  dwarn:28  dfail:0   fail:3   skip:28 
fi-skl-i7-6700k  total:244  pass:208  dwarn:4   dfail:2   fail:2   skip:28 
fi-snb-i7-2600   total:244  pass:202  dwarn:0   dfail:0   fail:0   skip:42 
ro-bdw-i5-5250u  total:240  pass:219  dwarn:1   dfail:0   fail:1   skip:19 
ro-bdw-i7-5600u  total:240  pass:206  dwarn:1   dfail:0   fail:1   skip:32 
ro-bsw-n3050     total:240  pass:31   dwarn:0   dfail:0   fail:0   skip:209
ro-hsw-i3-4010u  total:240  pass:214  dwarn:0   dfail:0   fail:0   skip:26 
ro-hsw-i7-4770r  total:240  pass:185  dwarn:0   dfail:0   fail:0   skip:55 
ro-ilk1-i5-650   total:235  pass:172  dwarn:0   dfail:0   fail:3   skip:60 
ro-ivb2-i7-3770  total:240  pass:209  dwarn:0   dfail:0   fail:0   skip:31 
ro-skl3-i5-6260u total:240  pass:222  dwarn:0   dfail:0   fail:4   skip:14 
ro-bdw-i7-5557U failed to connect after reboot
ro-byt-n2820 failed to connect after reboot

Results at /archive/results/CI_IGT_test/RO_Patchwork_1888/

ab22a8a drm-intel-nightly: 2016y-08m-16d-09h-55m-52s UTC integration manifest
ab7f8c5 drm/i915: Stop discarding GTT cache-domain on unbind vma
53ba198 drm/i915: Bump the inactive tracking for all VMA accessed
4e177d6 drm/i915: Track display alignment on VMA
062a78e drm/i915: Fallback to using unmappable memory for scanout
18fdfac drm/i915: Choose not to evict faultable objects from the GGTT
3baf057 drm/i915: Fix partial GGTT faulting
9c046f2b drm/i915: Choose partial chunksize based on tile row size
2fc7859 drm/i915: Move fence tracking from object to vma
0ded174 drm/i915: Rename fence.lru_list to link
49d370b drm/i915/userptr: Make gup errors stickier
63341dd drm/i915: Allocate rings from stolen
f8a981c drm/i915: Allow ringbuffers to be bound anywhere
dae7096 drm/i915: Move map-and-fenceable tracking to the VMA
263585f drm/i915: Disallow direct CPU access to stolen pages for relocations
19d22b8 drm/i915: Fallback to single page GTT mmappings for relocations
a85f8ac drm/i915: Refactor execbuffer relocation writing
1f60d9e drm/i915: Tidy up flush cpu/gtt write domains
4b5bf9f drm/i915: Pin the pages first in shmem prepare read/write
5ca92e8 drm/i915: Wait for writes through the GTT to land before reading back
f7bf8f0 drm/i915: Before accessing an object via the cpu, flush GTT writes
95fbe56 drm/i915: Extract i915_gem_obj_prepare_shmem_write()
76d9a19 drm/i915: Cache kmap between relocations

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 18/22] drm/i915: Choose not to evict faultable objects from the GGTT
  2016-08-16 10:42 ` [PATCH 18/22] drm/i915: Choose not to evict faultable objects from the GGTT Chris Wilson
@ 2016-08-16 11:31   ` Joonas Lahtinen
  2016-08-16 11:46     ` Chris Wilson
  0 siblings, 1 reply; 40+ messages in thread
From: Joonas Lahtinen @ 2016-08-16 11:31 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On ti, 2016-08-16 at 11:42 +0100, Chris Wilson wrote:
> @@ -1751,6 +1759,12 @@ int i915_gem_fault(struct vm_area_struct *area, struct vm_fault *vmf)
>  			      (area->vm_end - area->vm_start) / PAGE_SIZE -
>  			      partial.params.partial.offset);
>  
> +		/* If the partial covers the entire object, just create a
> +		 * normal VMA.
> +		 */
> +		if (chunk_size <= obj->base.size >> PAGE_SHIFT)

Surely you meant >= ?

> +			partial.type = I915_GGTT_VIEW_NORMAL;
> +

The 'partial' name is now even worse, at least this is close to the
call site.

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 15/22] drm/i915: Move fence tracking from object to vma
  2016-08-16 10:42 ` [PATCH 15/22] drm/i915: Move fence tracking from object to vma Chris Wilson
@ 2016-08-16 11:38   ` Joonas Lahtinen
  0 siblings, 0 replies; 40+ messages in thread
From: Joonas Lahtinen @ 2016-08-16 11:38 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On ti, 2016-08-16 at 11:42 +0100, Chris Wilson wrote:
> In order to handle tiled partial GTT mmappings, we need to associate the
> fence with an individual vma.
> 
> v2: A couple of silly drops replaced spotted by Joonas

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 14/22] drm/i915: Rename fence.lru_list to link
  2016-08-16 10:42 ` [PATCH 14/22] drm/i915: Rename fence.lru_list to link Chris Wilson
@ 2016-08-16 11:39   ` Joonas Lahtinen
  0 siblings, 0 replies; 40+ messages in thread
From: Joonas Lahtinen @ 2016-08-16 11:39 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On ti, 2016-08-16 at 11:42 +0100, Chris Wilson wrote:
> Our current practice is to only name the actual list (here
> dev_priv->fence_list) using "list", and elements upon that list are
> referred to as "link". Further, the lru nature is of the list and not of
> the node and including in the name does not disambiguate the link from
> anything else.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 18/22] drm/i915: Choose not to evict faultable objects from the GGTT
  2016-08-16 11:31   ` Joonas Lahtinen
@ 2016-08-16 11:46     ` Chris Wilson
  0 siblings, 0 replies; 40+ messages in thread
From: Chris Wilson @ 2016-08-16 11:46 UTC (permalink / raw)
  To: Joonas Lahtinen; +Cc: intel-gfx

On Tue, Aug 16, 2016 at 02:31:00PM +0300, Joonas Lahtinen wrote:
> On ti, 2016-08-16 at 11:42 +0100, Chris Wilson wrote:
> > @@ -1751,6 +1759,12 @@ int i915_gem_fault(struct vm_area_struct *area, struct vm_fault *vmf)
> >  			      (area->vm_end - area->vm_start) / PAGE_SIZE -
> >  			      partial.params.partial.offset);
> >  
> > +		/* If the partial covers the entire object, just create a
> > +		 * normal VMA.
> > +		 */
> > +		if (chunk_size <= obj->base.size >> PAGE_SHIFT)
> 
> Surely you meant >= ?

Sigh. Yes.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 12/22] drm/i915: Allocate rings from stolen
  2016-08-16 10:42 ` [PATCH 12/22] drm/i915: Allocate rings from stolen Chris Wilson
@ 2016-08-16 11:47   ` Joonas Lahtinen
  0 siblings, 0 replies; 40+ messages in thread
From: Joonas Lahtinen @ 2016-08-16 11:47 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On ti, 2016-08-16 at 11:42 +0100, Chris Wilson wrote:
> @@ -1949,10 +1949,8 @@ intel_ring_create_vma(struct drm_i915_private *dev_priv, int size)
>  	struct drm_i915_gem_object *obj;
>  	struct i915_vma *vma;
>  
> -	obj = ERR_PTR(-ENODEV);
> -	if (!HAS_LLC(dev_priv))
> -		obj = i915_gem_object_create_stolen(&dev_priv->drm, size);
> -	if (IS_ERR(obj))

This previous test should have been IS_ERR_OR_NULL() causing the
fallback to normal path has not worked on LLC platforms.

> +	obj = i915_gem_object_create_stolen(&dev_priv->drm, size);
> +	if (obj == NULL)

!obj

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 09/22] drm/i915: Disallow direct CPU access to stolen pages for relocations
  2016-08-16 10:42 ` [PATCH 09/22] drm/i915: Disallow direct CPU access to stolen pages " Chris Wilson
@ 2016-08-16 11:49   ` Joonas Lahtinen
  0 siblings, 0 replies; 40+ messages in thread
From: Joonas Lahtinen @ 2016-08-16 11:49 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On ti, 2016-08-16 at 11:42 +0100, Chris Wilson wrote:
> As we cannot access the backing pages behind stolen objects, we should
> not attempt to do so for relocations.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 05/22] drm/i915: Pin the pages first in shmem prepare read/write
  2016-08-16 10:42 ` [PATCH 05/22] drm/i915: Pin the pages first in shmem prepare read/write Chris Wilson
@ 2016-08-16 11:57   ` Joonas Lahtinen
  2016-08-16 12:02     ` Chris Wilson
  0 siblings, 1 reply; 40+ messages in thread
From: Joonas Lahtinen @ 2016-08-16 11:57 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On ti, 2016-08-16 at 11:42 +0100, Chris Wilson wrote:
> @@ -622,6 +622,12 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
>  	if (ret)
>  		return ret;
>  
> +	ret = i915_gem_object_get_pages(obj);
> +	if (ret)
> +		return ret;
> +
> +	i915_gem_object_pin_pages(obj);

The pin_pages function is now mustcheck (or I'm lost between series),
so;

	ret = i915_gem_object_pin_pages(obj);
	if (ret)
		goto err;

>  int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
> @@ -664,6 +668,12 @@ int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
>  	if (ret)
>  		return ret;
>  
> +	ret = i915_gem_object_get_pages(obj);
> +	if (ret)
> +		return ret;
> +
> +	i915_gem_object_pin_pages(obj);
> +

Ditto.

With those (or me proved wrong on this);

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 05/22] drm/i915: Pin the pages first in shmem prepare read/write
  2016-08-16 11:57   ` Joonas Lahtinen
@ 2016-08-16 12:02     ` Chris Wilson
  2016-08-16 12:46       ` Joonas Lahtinen
  0 siblings, 1 reply; 40+ messages in thread
From: Chris Wilson @ 2016-08-16 12:02 UTC (permalink / raw)
  To: Joonas Lahtinen; +Cc: intel-gfx

On Tue, Aug 16, 2016 at 02:57:15PM +0300, Joonas Lahtinen wrote:
> On ti, 2016-08-16 at 11:42 +0100, Chris Wilson wrote:
> > @@ -622,6 +622,12 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
> >  	if (ret)
> >  		return ret;
> >  
> > +	ret = i915_gem_object_get_pages(obj);
> > +	if (ret)
> > +		return ret;
> > +
> > +	i915_gem_object_pin_pages(obj);
> 
> The pin_pages function is now mustcheck (or I'm lost between series),
> so;

We haven't yet applied the get/pin pages API changes yet.

I've penciled those in for later when splitting out a lock for the
backing storage. Other people may want the changes earlier?
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 05/22] drm/i915: Pin the pages first in shmem prepare read/write
  2016-08-16 12:02     ` Chris Wilson
@ 2016-08-16 12:46       ` Joonas Lahtinen
  0 siblings, 0 replies; 40+ messages in thread
From: Joonas Lahtinen @ 2016-08-16 12:46 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On ti, 2016-08-16 at 13:02 +0100, Chris Wilson wrote:
> On Tue, Aug 16, 2016 at 02:57:15PM +0300, Joonas Lahtinen wrote:
> > 
> > On ti, 2016-08-16 at 11:42 +0100, Chris Wilson wrote:
> > > 
> > > @@ -622,6 +622,12 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
> > >  	if (ret)
> > >  		return ret;
> > >  
> > > +	ret = i915_gem_object_get_pages(obj);
> > > +	if (ret)
> > > +		return ret;
> > > +
> > > +	i915_gem_object_pin_pages(obj);
> > The pin_pages function is now mustcheck (or I'm lost between series),
> > so;
> We haven't yet applied the get/pin pages API changes yet.
> 
> I've penciled those in for later when splitting out a lock for the
> backing storage. Other people may want the changes earlier?

Then the code should be fine as is. You can add my R-b.

Regards, Joonas

> -Chris
> 
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 11/22] drm/i915: Allow ringbuffers to be bound anywhere
  2016-08-16 10:42 ` [PATCH 11/22] drm/i915: Allow ringbuffers to be bound anywhere Chris Wilson
@ 2016-08-16 12:52   ` Joonas Lahtinen
  0 siblings, 0 replies; 40+ messages in thread
From: Joonas Lahtinen @ 2016-08-16 12:52 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On ti, 2016-08-16 at 11:42 +0100, Chris Wilson wrote:
> Now that we have WC vmapping available, we can bind our rings anywhere
> in the GGTT and do not need to restrict them to the mappable region.
> Except for stolen objects, for which direct access is verbatim and we
> must use the mappable aperture.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

This could use some Tested-by tags on affected generations.

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 08/22] drm/i915: Fallback to single page GTT mmappings for relocations
  2016-08-16 10:42 ` [PATCH 08/22] drm/i915: Fallback to single page GTT mmappings for relocations Chris Wilson
@ 2016-08-17  7:19   ` Joonas Lahtinen
  2016-08-17  7:57     ` Chris Wilson
  0 siblings, 1 reply; 40+ messages in thread
From: Joonas Lahtinen @ 2016-08-17  7:19 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On ti, 2016-08-16 at 11:42 +0100, Chris Wilson wrote:
> @@ -360,8 +361,18 @@ static void reloc_cache_fini(struct reloc_cache *cache)
>  		kunmap_atomic(vaddr);
>  		i915_gem_object_unpin_pages((struct drm_i915_gem_object *)cache->node.mm);
>  	} else {
> +		wmb();

Why wmb() here? In other spot it's inside if (cache->node.allocated)?
This alters the original code path, too.

>  		io_mapping_unmap_atomic(vaddr);
> -		i915_vma_unpin((struct i915_vma *)cache->node.mm);
> +		if (cache->node.allocated) {
> +			struct i915_ggtt *ggtt = &cache->i915->ggtt;
> +
> +			ggtt->base.clear_range(&ggtt->base,
> +					       cache->node.start,
> +					       cache->node.size,
> +					       true);
> +			drm_mm_remove_node(&cache->node);
> +		} else
> +			i915_vma_unpin((struct i915_vma *)cache->node.mm);

Pretty sure this needs braces too; "This does not apply if only one
branch of a conditional statement is a single statement; in the latter
case use braces in both branches:"

> @@ -415,21 +437,38 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj,
>  
>  		vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
>  					       PIN_MAPPABLE | PIN_NONBLOCK);
> -		if (IS_ERR(vma))
> -			return NULL;
> +		if (IS_ERR(vma)) {
> +			memset(&cache->node, 0, sizeof(cache->node));
> +			ret = drm_mm_insert_node_in_range_generic
> +				(&ggtt->base.mm, &cache->node,

That brace sure needs to be on the previous line. With above fixes;

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 08/22] drm/i915: Fallback to single page GTT mmappings for relocations
  2016-08-17  7:19   ` Joonas Lahtinen
@ 2016-08-17  7:57     ` Chris Wilson
  0 siblings, 0 replies; 40+ messages in thread
From: Chris Wilson @ 2016-08-17  7:57 UTC (permalink / raw)
  To: Joonas Lahtinen; +Cc: intel-gfx

On Wed, Aug 17, 2016 at 10:19:50AM +0300, Joonas Lahtinen wrote:
> On ti, 2016-08-16 at 11:42 +0100, Chris Wilson wrote:
> > @@ -360,8 +361,18 @@ static void reloc_cache_fini(struct reloc_cache *cache)
> >  		kunmap_atomic(vaddr);
> >  		i915_gem_object_unpin_pages((struct drm_i915_gem_object *)cache->node.mm);
> >  	} else {
> > +		wmb();
> 
> Why wmb() here? In other spot it's inside if (cache->node.allocated)?
> This alters the original code path, too.

Because it's required for the single page and it's not a huge cost for
the whole mapping since we are required to flush the wcb anyway. It is
simpler to write and rationalize with just a single path.
 
> >  		io_mapping_unmap_atomic(vaddr);
> > -		i915_vma_unpin((struct i915_vma *)cache->node.mm);
> > +		if (cache->node.allocated) {
> > +			struct i915_ggtt *ggtt = &cache->i915->ggtt;
> > +
> > +			ggtt->base.clear_range(&ggtt->base,
> > +					       cache->node.start,
> > +					       cache->node.size,
> > +					       true);
> > +			drm_mm_remove_node(&cache->node);
> > +		} else
> > +			i915_vma_unpin((struct i915_vma *)cache->node.mm);
> 
> Pretty sure this needs braces too; "This does not apply if only one
> branch of a conditional statement is a single statement; in the latter
> case use braces in both branches:"
> 
> > @@ -415,21 +437,38 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj,
> >  
> >  		vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
> >  					       PIN_MAPPABLE | PIN_NONBLOCK);
> > -		if (IS_ERR(vma))
> > -			return NULL;
> > +		if (IS_ERR(vma)) {
> > +			memset(&cache->node, 0, sizeof(cache->node));
> > +			ret = drm_mm_insert_node_in_range_generic
> > +				(&ggtt->base.mm, &cache->node,
> 
> That brace sure needs to be on the previous line. With above fixes;

What brace? The bracket? No it doesn't fit.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 07/22] drm/i915: Refactor execbuffer relocation writing
  2016-08-16 10:42 ` [PATCH 07/22] drm/i915: Refactor execbuffer relocation writing Chris Wilson
@ 2016-08-17  8:47   ` Joonas Lahtinen
  2016-08-17  8:57     ` Chris Wilson
  0 siblings, 1 reply; 40+ messages in thread
From: Joonas Lahtinen @ 2016-08-17  8:47 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On ti, 2016-08-16 at 11:42 +0100, Chris Wilson wrote:
> @@ -278,6 +283,9 @@ static void eb_destroy(struct eb_vmas *eb)
>  
>  static inline int use_cpu_reloc(struct drm_i915_gem_object *obj)
>  {
> +	if (DBG_USE_CPU_RELOC)
> +		return DBG_USE_CPU_RELOC > 0;

If DBG_USE_CPU_RELOC == 0, this path is never taken. So it would have
to be set at -1 to yield false. Unexpected when defining it. 0 and 1 is
the de facto. So drop a comment at defining site.

> +#define KMAP 0x4

At least make a comment that CLFLUSH_AFTER and CLFLUSH_BEFORE are also
in the flag set.

> +
>  static void reloc_cache_fini(struct reloc_cache *cache)
>  {
> +	void *vaddr;
> +
>  	if (!cache->vaddr)
>  		return;
>  
> -	switch (cache->type) {
> -	case KMAP:
> -		kunmap_atomic(cache->vaddr);
> -		break;
> +	vaddr = unmask_page(cache->vaddr);
> +	if (cache->vaddr & KMAP) {
> +		if (cache->vaddr & CLFLUSH_AFTER)
> +			mb();
>  
> -	case IOMAP:
> -		io_mapping_unmap_atomic(cache->vaddr);
> -		break;
> +		kunmap_atomic(vaddr);
> +		i915_gem_object_unpin_pages((struct drm_i915_gem_object *)cache->node.mm);

I'd prefer i915_gem_obj_cleanup_shmem_write() for symmetry or a comment
here.

> +	} else {
> +		io_mapping_unmap_atomic(vaddr);
> +		i915_vma_unpin((struct i915_vma *)cache->node.mm);

This does have a clear counterpart.

> -	clflush_write32(vaddr + page_offset, lower_32_bits(delta));
> +	clflush_write32(vaddr + offset_in_page(offset),
> +			lower_32_bits(target_offset),
> +			cache->vaddr);

unmap_flags(cache->vaddr) for clarity

This could use another set of eyes, the patch is horribly mangled.

But with my eyes, with couple of comments added;

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 07/22] drm/i915: Refactor execbuffer relocation writing
  2016-08-17  8:47   ` Joonas Lahtinen
@ 2016-08-17  8:57     ` Chris Wilson
  2016-08-17  9:26       ` Joonas Lahtinen
  0 siblings, 1 reply; 40+ messages in thread
From: Chris Wilson @ 2016-08-17  8:57 UTC (permalink / raw)
  To: Joonas Lahtinen; +Cc: intel-gfx

On Wed, Aug 17, 2016 at 11:47:07AM +0300, Joonas Lahtinen wrote:
> On ti, 2016-08-16 at 11:42 +0100, Chris Wilson wrote:
> > @@ -278,6 +283,9 @@ static void eb_destroy(struct eb_vmas *eb)
> >  
> >  static inline int use_cpu_reloc(struct drm_i915_gem_object *obj)
> >  {
> > +	if (DBG_USE_CPU_RELOC)
> > +		return DBG_USE_CPU_RELOC > 0;
> 
> If DBG_USE_CPU_RELOC == 0, this path is never taken. So it would have
> to be set at -1 to yield false. Unexpected when defining it. 0 and 1 is
> the de facto. So drop a comment at defining site.
> 
> > +#define KMAP 0x4
> 
> At least make a comment that CLFLUSH_AFTER and CLFLUSH_BEFORE are also
> in the flag set.

#define CLFLUSH_FLAGS (BEFORE | AFTER)

BUILD_BUG_ON(KMAP & CLFLUSH_FLAGS)
BUILD_BUG_ON((KMAP | CLFLUSH_FLAGS) & PAGE_MASK)
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 07/22] drm/i915: Refactor execbuffer relocation writing
  2016-08-17  8:57     ` Chris Wilson
@ 2016-08-17  9:26       ` Joonas Lahtinen
  0 siblings, 0 replies; 40+ messages in thread
From: Joonas Lahtinen @ 2016-08-17  9:26 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On ke, 2016-08-17 at 09:57 +0100, Chris Wilson wrote:
> On Wed, Aug 17, 2016 at 11:47:07AM +0300, Joonas Lahtinen wrote:
> > 
> > On ti, 2016-08-16 at 11:42 +0100, Chris Wilson wrote:
> > > 
> > > @@ -278,6 +283,9 @@ static void eb_destroy(struct eb_vmas *eb)
> > >  
> > >  static inline int use_cpu_reloc(struct drm_i915_gem_object *obj)
> > >  {
> > > +	if (DBG_USE_CPU_RELOC)
> > > +		return DBG_USE_CPU_RELOC > 0;
> > If DBG_USE_CPU_RELOC == 0, this path is never taken. So it would have
> > to be set at -1 to yield false. Unexpected when defining it. 0 and 1 is
> > the de facto. So drop a comment at defining site.
> > 
> > > 
> > > +#define KMAP 0x4
> > At least make a comment that CLFLUSH_AFTER and CLFLUSH_BEFORE are also
> > in the flag set.
> #define CLFLUSH_FLAGS (BEFORE | AFTER)
> 
> BUILD_BUG_ON(KMAP & CLFLUSH_FLAGS)
> BUILD_BUG_ON((KMAP | CLFLUSH_FLAGS) & PAGE_MASK)

That's even better!

Regards, Joonas

> -Chris
> 
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 40+ messages in thread

end of thread, other threads:[~2016-08-17  9:26 UTC | newest]

Thread overview: 40+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-08-16 10:42 An almost complete set of reviewed patches for tiled partial Chris Wilson
2016-08-16 10:42 ` [PATCH 01/22] drm/i915: Cache kmap between relocations Chris Wilson
2016-08-16 10:42 ` [PATCH 02/22] drm/i915: Extract i915_gem_obj_prepare_shmem_write() Chris Wilson
2016-08-16 10:42 ` [PATCH 03/22] drm/i915: Before accessing an object via the cpu, flush GTT writes Chris Wilson
2016-08-16 10:42 ` [PATCH 04/22] drm/i915: Wait for writes through the GTT to land before reading back Chris Wilson
2016-08-16 10:42 ` [PATCH 05/22] drm/i915: Pin the pages first in shmem prepare read/write Chris Wilson
2016-08-16 11:57   ` Joonas Lahtinen
2016-08-16 12:02     ` Chris Wilson
2016-08-16 12:46       ` Joonas Lahtinen
2016-08-16 10:42 ` [PATCH 06/22] drm/i915: Tidy up flush cpu/gtt write domains Chris Wilson
2016-08-16 10:42 ` [PATCH 07/22] drm/i915: Refactor execbuffer relocation writing Chris Wilson
2016-08-17  8:47   ` Joonas Lahtinen
2016-08-17  8:57     ` Chris Wilson
2016-08-17  9:26       ` Joonas Lahtinen
2016-08-16 10:42 ` [PATCH 08/22] drm/i915: Fallback to single page GTT mmappings for relocations Chris Wilson
2016-08-17  7:19   ` Joonas Lahtinen
2016-08-17  7:57     ` Chris Wilson
2016-08-16 10:42 ` [PATCH 09/22] drm/i915: Disallow direct CPU access to stolen pages " Chris Wilson
2016-08-16 11:49   ` Joonas Lahtinen
2016-08-16 10:42 ` [PATCH 10/22] drm/i915: Move map-and-fenceable tracking to the VMA Chris Wilson
2016-08-16 10:42 ` [PATCH 11/22] drm/i915: Allow ringbuffers to be bound anywhere Chris Wilson
2016-08-16 12:52   ` Joonas Lahtinen
2016-08-16 10:42 ` [PATCH 12/22] drm/i915: Allocate rings from stolen Chris Wilson
2016-08-16 11:47   ` Joonas Lahtinen
2016-08-16 10:42 ` [PATCH 13/22] drm/i915/userptr: Make gup errors stickier Chris Wilson
2016-08-16 10:42 ` [PATCH 14/22] drm/i915: Rename fence.lru_list to link Chris Wilson
2016-08-16 11:39   ` Joonas Lahtinen
2016-08-16 10:42 ` [PATCH 15/22] drm/i915: Move fence tracking from object to vma Chris Wilson
2016-08-16 11:38   ` Joonas Lahtinen
2016-08-16 10:42 ` [PATCH 16/22] drm/i915: Choose partial chunksize based on tile row size Chris Wilson
2016-08-16 10:42 ` [PATCH 17/22] drm/i915: Fix partial GGTT faulting Chris Wilson
2016-08-16 10:42 ` [PATCH 18/22] drm/i915: Choose not to evict faultable objects from the GGTT Chris Wilson
2016-08-16 11:31   ` Joonas Lahtinen
2016-08-16 11:46     ` Chris Wilson
2016-08-16 10:42 ` [PATCH 19/22] drm/i915: Fallback to using unmappable memory for scanout Chris Wilson
2016-08-16 10:42 ` [PATCH 20/22] drm/i915: Track display alignment on VMA Chris Wilson
2016-08-16 10:42 ` [PATCH 21/22] drm/i915: Bump the inactive tracking for all VMA accessed Chris Wilson
2016-08-16 11:20   ` Joonas Lahtinen
2016-08-16 10:42 ` [PATCH 22/22] drm/i915: Stop discarding GTT cache-domain on unbind vma Chris Wilson
2016-08-16 11:25 ` ✗ Ro.CI.BAT: failure for series starting with [01/22] drm/i915: Cache kmap between relocations Patchwork

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.