All of lore.kernel.org
 help / color / mirror / Atom feed
* [Intel-gfx] [PATCH 01/26] Revert "drm/i915/gem: Async GPU relocations only"
@ 2020-06-23 14:28 Maarten Lankhorst
  2020-06-23 14:28 ` [Intel-gfx] [PATCH 02/26] drm/i915: Revert relocation chaining commits Maarten Lankhorst
                   ` (30 more replies)
  0 siblings, 31 replies; 61+ messages in thread
From: Maarten Lankhorst @ 2020-06-23 14:28 UTC (permalink / raw)
  To: intel-gfx

This reverts commit 9e0f9464e2ab36b864359a59b0e9058fdef0ce47,
and related commit 7ac2d2536dfa7 ("drm/i915/gem: Delete unused code").

Breaks the execbuf ww locking series.

Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
---
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 314 ++++++++++++++++--
 .../i915/gem/selftests/i915_gem_execbuffer.c  |  21 +-
 2 files changed, 308 insertions(+), 27 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index c38ab51e82f0..ef488acf44db 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -45,6 +45,13 @@ struct eb_vma_array {
 	struct eb_vma vma[];
 };
 
+enum {
+	FORCE_CPU_RELOC = 1,
+	FORCE_GTT_RELOC,
+	FORCE_GPU_RELOC,
+#define DBG_FORCE_RELOC 0 /* choose one of the above! */
+};
+
 #define __EXEC_OBJECT_HAS_PIN		BIT(31)
 #define __EXEC_OBJECT_HAS_FENCE		BIT(30)
 #define __EXEC_OBJECT_NEEDS_MAP		BIT(29)
@@ -253,6 +260,8 @@ struct i915_execbuffer {
 	 */
 	struct reloc_cache {
 		struct drm_mm_node node; /** temporary GTT binding */
+		unsigned long vaddr; /** Current kmap address */
+		unsigned long page; /** Currently mapped page index */
 		unsigned int gen; /** Cached value of INTEL_GEN */
 		bool use_64bit_reloc : 1;
 		bool has_llc : 1;
@@ -596,6 +605,23 @@ eb_add_vma(struct i915_execbuffer *eb,
 	}
 }
 
+static inline int use_cpu_reloc(const struct reloc_cache *cache,
+				const struct drm_i915_gem_object *obj)
+{
+	if (!i915_gem_object_has_struct_page(obj))
+		return false;
+
+	if (DBG_FORCE_RELOC == FORCE_CPU_RELOC)
+		return true;
+
+	if (DBG_FORCE_RELOC == FORCE_GTT_RELOC)
+		return false;
+
+	return (cache->has_llc ||
+		obj->cache_dirty ||
+		obj->cache_level != I915_CACHE_NONE);
+}
+
 static int eb_reserve_vma(const struct i915_execbuffer *eb,
 			  struct eb_vma *ev,
 			  u64 pin_flags)
@@ -919,6 +945,8 @@ relocation_target(const struct drm_i915_gem_relocation_entry *reloc,
 static void reloc_cache_init(struct reloc_cache *cache,
 			     struct drm_i915_private *i915)
 {
+	cache->page = -1;
+	cache->vaddr = 0;
 	/* Must be a variable in the struct to allow GCC to unroll. */
 	cache->gen = INTEL_GEN(i915);
 	cache->has_llc = HAS_LLC(i915);
@@ -930,6 +958,25 @@ static void reloc_cache_init(struct reloc_cache *cache,
 	cache->target = NULL;
 }
 
+static inline void *unmask_page(unsigned long p)
+{
+	return (void *)(uintptr_t)(p & PAGE_MASK);
+}
+
+static inline unsigned int unmask_flags(unsigned long p)
+{
+	return p & ~PAGE_MASK;
+}
+
+#define KMAP 0x4 /* after CLFLUSH_FLAGS */
+
+static inline struct i915_ggtt *cache_to_ggtt(struct reloc_cache *cache)
+{
+	struct drm_i915_private *i915 =
+		container_of(cache, struct i915_execbuffer, reloc_cache)->i915;
+	return &i915->ggtt;
+}
+
 #define RELOC_TAIL 4
 
 static int reloc_gpu_chain(struct reloc_cache *cache)
@@ -1042,6 +1089,181 @@ static int reloc_gpu_flush(struct reloc_cache *cache)
 	return err;
 }
 
+static void reloc_cache_reset(struct reloc_cache *cache)
+{
+	void *vaddr;
+
+	if (!cache->vaddr)
+		return;
+
+	vaddr = unmask_page(cache->vaddr);
+	if (cache->vaddr & KMAP) {
+		if (cache->vaddr & CLFLUSH_AFTER)
+			mb();
+
+		kunmap_atomic(vaddr);
+		i915_gem_object_finish_access((struct drm_i915_gem_object *)cache->node.mm);
+	} else {
+		struct i915_ggtt *ggtt = cache_to_ggtt(cache);
+
+		intel_gt_flush_ggtt_writes(ggtt->vm.gt);
+		io_mapping_unmap_atomic((void __iomem *)vaddr);
+
+		if (drm_mm_node_allocated(&cache->node)) {
+			ggtt->vm.clear_range(&ggtt->vm,
+					     cache->node.start,
+					     cache->node.size);
+			mutex_lock(&ggtt->vm.mutex);
+			drm_mm_remove_node(&cache->node);
+			mutex_unlock(&ggtt->vm.mutex);
+		} else {
+			i915_vma_unpin((struct i915_vma *)cache->node.mm);
+		}
+	}
+
+	cache->vaddr = 0;
+	cache->page = -1;
+}
+
+static void *reloc_kmap(struct drm_i915_gem_object *obj,
+			struct reloc_cache *cache,
+			unsigned long page)
+{
+	void *vaddr;
+
+	if (cache->vaddr) {
+		kunmap_atomic(unmask_page(cache->vaddr));
+	} else {
+		unsigned int flushes;
+		int err;
+
+		err = i915_gem_object_prepare_write(obj, &flushes);
+		if (err)
+			return ERR_PTR(err);
+
+		BUILD_BUG_ON(KMAP & CLFLUSH_FLAGS);
+		BUILD_BUG_ON((KMAP | CLFLUSH_FLAGS) & PAGE_MASK);
+
+		cache->vaddr = flushes | KMAP;
+		cache->node.mm = (void *)obj;
+		if (flushes)
+			mb();
+	}
+
+	vaddr = kmap_atomic(i915_gem_object_get_dirty_page(obj, page));
+	cache->vaddr = unmask_flags(cache->vaddr) | (unsigned long)vaddr;
+	cache->page = page;
+
+	return vaddr;
+}
+
+static void *reloc_iomap(struct drm_i915_gem_object *obj,
+			 struct reloc_cache *cache,
+			 unsigned long page)
+{
+	struct i915_ggtt *ggtt = cache_to_ggtt(cache);
+	unsigned long offset;
+	void *vaddr;
+
+	if (cache->vaddr) {
+		intel_gt_flush_ggtt_writes(ggtt->vm.gt);
+		io_mapping_unmap_atomic((void __force __iomem *) unmask_page(cache->vaddr));
+	} else {
+		struct i915_vma *vma;
+		int err;
+
+		if (i915_gem_object_is_tiled(obj))
+			return ERR_PTR(-EINVAL);
+
+		if (use_cpu_reloc(cache, obj))
+			return NULL;
+
+		i915_gem_object_lock(obj);
+		err = i915_gem_object_set_to_gtt_domain(obj, true);
+		i915_gem_object_unlock(obj);
+		if (err)
+			return ERR_PTR(err);
+
+		vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
+					       PIN_MAPPABLE |
+					       PIN_NONBLOCK /* NOWARN */ |
+					       PIN_NOEVICT);
+		if (IS_ERR(vma)) {
+			memset(&cache->node, 0, sizeof(cache->node));
+			mutex_lock(&ggtt->vm.mutex);
+			err = drm_mm_insert_node_in_range
+				(&ggtt->vm.mm, &cache->node,
+				 PAGE_SIZE, 0, I915_COLOR_UNEVICTABLE,
+				 0, ggtt->mappable_end,
+				 DRM_MM_INSERT_LOW);
+			mutex_unlock(&ggtt->vm.mutex);
+			if (err) /* no inactive aperture space, use cpu reloc */
+				return NULL;
+		} else {
+			cache->node.start = vma->node.start;
+			cache->node.mm = (void *)vma;
+		}
+	}
+
+	offset = cache->node.start;
+	if (drm_mm_node_allocated(&cache->node)) {
+		ggtt->vm.insert_page(&ggtt->vm,
+				     i915_gem_object_get_dma_address(obj, page),
+				     offset, I915_CACHE_NONE, 0);
+	} else {
+		offset += page << PAGE_SHIFT;
+	}
+
+	vaddr = (void __force *)io_mapping_map_atomic_wc(&ggtt->iomap,
+							 offset);
+	cache->page = page;
+	cache->vaddr = (unsigned long)vaddr;
+
+	return vaddr;
+}
+
+static void *reloc_vaddr(struct drm_i915_gem_object *obj,
+			 struct reloc_cache *cache,
+			 unsigned long page)
+{
+	void *vaddr;
+
+	if (cache->page == page) {
+		vaddr = unmask_page(cache->vaddr);
+	} else {
+		vaddr = NULL;
+		if ((cache->vaddr & KMAP) == 0)
+			vaddr = reloc_iomap(obj, cache, page);
+		if (!vaddr)
+			vaddr = reloc_kmap(obj, cache, page);
+	}
+
+	return vaddr;
+}
+
+static void clflush_write32(u32 *addr, u32 value, unsigned int flushes)
+{
+	if (unlikely(flushes & (CLFLUSH_BEFORE | CLFLUSH_AFTER))) {
+		if (flushes & CLFLUSH_BEFORE) {
+			clflushopt(addr);
+			mb();
+		}
+
+		*addr = value;
+
+		/*
+		 * Writes to the same cacheline are serialised by the CPU
+		 * (including clflush). On the write path, we only require
+		 * that it hits memory in an orderly fashion and place
+		 * mb barriers at the start and end of the relocation phase
+		 * to ensure ordering of clflush wrt to the system.
+		 */
+		if (flushes & CLFLUSH_AFTER)
+			clflushopt(addr);
+	} else
+		*addr = value;
+}
+
 static int reloc_move_to_gpu(struct i915_request *rq, struct i915_vma *vma)
 {
 	struct drm_i915_gem_object *obj = vma->obj;
@@ -1207,6 +1429,17 @@ static u32 *reloc_gpu(struct i915_execbuffer *eb,
 	return cmd;
 }
 
+static inline bool use_reloc_gpu(struct i915_vma *vma)
+{
+	if (DBG_FORCE_RELOC == FORCE_GPU_RELOC)
+		return true;
+
+	if (DBG_FORCE_RELOC)
+		return false;
+
+	return !dma_resv_test_signaled_rcu(vma->resv, true);
+}
+
 static unsigned long vma_phys_addr(struct i915_vma *vma, u32 offset)
 {
 	struct page *page;
@@ -1221,10 +1454,10 @@ static unsigned long vma_phys_addr(struct i915_vma *vma, u32 offset)
 	return addr + offset_in_page(offset);
 }
 
-static int __reloc_entry_gpu(struct i915_execbuffer *eb,
-			     struct i915_vma *vma,
-			     u64 offset,
-			     u64 target_addr)
+static bool __reloc_entry_gpu(struct i915_execbuffer *eb,
+			      struct i915_vma *vma,
+			      u64 offset,
+			      u64 target_addr)
 {
 	const unsigned int gen = eb->reloc_cache.gen;
 	unsigned int len;
@@ -1240,7 +1473,7 @@ static int __reloc_entry_gpu(struct i915_execbuffer *eb,
 
 	batch = reloc_gpu(eb, vma, len);
 	if (IS_ERR(batch))
-		return PTR_ERR(batch);
+		return false;
 
 	addr = gen8_canonical_addr(vma->node.start + offset);
 	if (gen >= 8) {
@@ -1289,21 +1522,55 @@ static int __reloc_entry_gpu(struct i915_execbuffer *eb,
 		*batch++ = target_addr;
 	}
 
-	return 0;
+	return true;
+}
+
+static bool reloc_entry_gpu(struct i915_execbuffer *eb,
+			    struct i915_vma *vma,
+			    u64 offset,
+			    u64 target_addr)
+{
+	if (eb->reloc_cache.vaddr)
+		return false;
+
+	if (!use_reloc_gpu(vma))
+		return false;
+
+	return __reloc_entry_gpu(eb, vma, offset, target_addr);
 }
 
 static u64
-relocate_entry(struct i915_execbuffer *eb,
-	       struct i915_vma *vma,
+relocate_entry(struct i915_vma *vma,
 	       const struct drm_i915_gem_relocation_entry *reloc,
+	       struct i915_execbuffer *eb,
 	       const struct i915_vma *target)
 {
 	u64 target_addr = relocation_target(reloc, target);
-	int err;
-
-	err = __reloc_entry_gpu(eb, vma, reloc->offset, target_addr);
-	if (err)
-		return err;
+	u64 offset = reloc->offset;
+
+	if (!reloc_entry_gpu(eb, vma, offset, target_addr)) {
+		bool wide = eb->reloc_cache.use_64bit_reloc;
+		void *vaddr;
+
+repeat:
+		vaddr = reloc_vaddr(vma->obj,
+				    &eb->reloc_cache,
+				    offset >> PAGE_SHIFT);
+		if (IS_ERR(vaddr))
+			return PTR_ERR(vaddr);
+
+		GEM_BUG_ON(!IS_ALIGNED(offset, sizeof(u32)));
+		clflush_write32(vaddr + offset_in_page(offset),
+				lower_32_bits(target_addr),
+				eb->reloc_cache.vaddr);
+
+		if (wide) {
+			offset += sizeof(u32);
+			target_addr >>= 32;
+			wide = false;
+			goto repeat;
+		}
+	}
 
 	return target->node.start | UPDATE;
 }
@@ -1368,7 +1635,8 @@ eb_relocate_entry(struct i915_execbuffer *eb,
 	 * If the relocation already has the right value in it, no
 	 * more work needs to be done.
 	 */
-	if (gen8_canonical_addr(target->vma->node.start) == reloc->presumed_offset)
+	if (!DBG_FORCE_RELOC &&
+	    gen8_canonical_addr(target->vma->node.start) == reloc->presumed_offset)
 		return 0;
 
 	/* Check that the relocation address is valid... */
@@ -1400,7 +1668,7 @@ eb_relocate_entry(struct i915_execbuffer *eb,
 	ev->flags &= ~EXEC_OBJECT_ASYNC;
 
 	/* and update the user's relocation entry */
-	return relocate_entry(eb, ev->vma, reloc, target->vma);
+	return relocate_entry(ev->vma, reloc, eb, target->vma);
 }
 
 static int eb_relocate_vma(struct i915_execbuffer *eb, struct eb_vma *ev)
@@ -1438,8 +1706,10 @@ static int eb_relocate_vma(struct i915_execbuffer *eb, struct eb_vma *ev)
 		 * this is bad and so lockdep complains vehemently.
 		 */
 		copied = __copy_from_user(r, urelocs, count * sizeof(r[0]));
-		if (unlikely(copied))
-			return -EFAULT;
+		if (unlikely(copied)) {
+			remain = -EFAULT;
+			goto out;
+		}
 
 		remain -= count;
 		do {
@@ -1447,7 +1717,8 @@ static int eb_relocate_vma(struct i915_execbuffer *eb, struct eb_vma *ev)
 
 			if (likely(offset == 0)) {
 			} else if ((s64)offset < 0) {
-				return (int)offset;
+				remain = (int)offset;
+				goto out;
 			} else {
 				/*
 				 * Note that reporting an error now
@@ -1477,8 +1748,9 @@ static int eb_relocate_vma(struct i915_execbuffer *eb, struct eb_vma *ev)
 		} while (r++, --count);
 		urelocs += ARRAY_SIZE(stack);
 	} while (remain);
-
-	return 0;
+out:
+	reloc_cache_reset(&eb->reloc_cache);
+	return remain;
 }
 
 static int eb_relocate(struct i915_execbuffer *eb)
@@ -2386,7 +2658,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 	eb.i915 = i915;
 	eb.file = file;
 	eb.args = args;
-	if (!(args->flags & I915_EXEC_NO_RELOC))
+	if (DBG_FORCE_RELOC || !(args->flags & I915_EXEC_NO_RELOC))
 		args->flags |= __EXEC_HAS_RELOC;
 
 	eb.exec = exec;
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c
index 57c14d3340cd..a49016f8ee0d 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c
@@ -37,14 +37,20 @@ static int __igt_gpu_reloc(struct i915_execbuffer *eb,
 		return err;
 
 	/* 8-Byte aligned */
-	err = __reloc_entry_gpu(eb, vma, offsets[0] * sizeof(u32), 0);
-	if (err)
+	if (!__reloc_entry_gpu(eb, vma,
+			       offsets[0] * sizeof(u32),
+			       0)) {
+		err = -EIO;
 		goto unpin_vma;
+	}
 
 	/* !8-Byte aligned */
-	err = __reloc_entry_gpu(eb, vma, offsets[1] * sizeof(u32), 1);
-	if (err)
+	if (!__reloc_entry_gpu(eb, vma,
+			       offsets[1] * sizeof(u32),
+			       1)) {
+		err = -EIO;
 		goto unpin_vma;
+	}
 
 	/* Skip to the end of the cmd page */
 	i = PAGE_SIZE / sizeof(u32) - RELOC_TAIL - 1;
@@ -54,9 +60,12 @@ static int __igt_gpu_reloc(struct i915_execbuffer *eb,
 	eb->reloc_cache.rq_size += i;
 
 	/* Force batch chaining */
-	err = __reloc_entry_gpu(eb, vma, offsets[2] * sizeof(u32), 2);
-	if (err)
+	if (!__reloc_entry_gpu(eb, vma,
+			       offsets[2] * sizeof(u32),
+			       2)) {
+		err = -EIO;
 		goto unpin_vma;
+	}
 
 	GEM_BUG_ON(!eb->reloc_cache.rq);
 	rq = i915_request_get(eb->reloc_cache.rq);

base-commit: 24b806b0a1dd38c734e771ece9dd1ab6492bbb96
prerequisite-patch-id: e6315738715ac4ffccaeb4c4bf5a94651fb8da1d
-- 
2.27.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 61+ messages in thread

* [Intel-gfx] [PATCH 02/26] drm/i915: Revert relocation chaining commits.
  2020-06-23 14:28 [Intel-gfx] [PATCH 01/26] Revert "drm/i915/gem: Async GPU relocations only" Maarten Lankhorst
@ 2020-06-23 14:28 ` Maarten Lankhorst
  2020-06-23 14:28 ` [Intel-gfx] [PATCH 03/26] Revert "drm/i915/gem: Drop relocation slowpath" Maarten Lankhorst
                   ` (29 subsequent siblings)
  30 siblings, 0 replies; 61+ messages in thread
From: Maarten Lankhorst @ 2020-06-23 14:28 UTC (permalink / raw)
  To: intel-gfx

This reverts commit 964a9b0f611ee ("drm/i915/gem: Use chained reloc batches")
and commit 0e97fbb080553 ("drm/i915/gem: Use a single chained reloc batches
for a single execbuf").

This breaks ww mutex -EDEADLK handling, and we can deal with relocations
fine without it.  The ww mutexes protect concurrent access to the BO's.

Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
---
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 171 ++++--------------
 .../i915/gem/selftests/i915_gem_execbuffer.c  |   8 +-
 2 files changed, 35 insertions(+), 144 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index ef488acf44db..ea8c668d76e0 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -268,9 +268,7 @@ struct i915_execbuffer {
 		bool has_fence : 1;
 		bool needs_unfenced : 1;
 
-		struct i915_vma *target;
 		struct i915_request *rq;
-		struct i915_vma *rq_vma;
 		u32 *rq_cmd;
 		unsigned int rq_size;
 	} reloc_cache;
@@ -955,7 +953,7 @@ static void reloc_cache_init(struct reloc_cache *cache,
 	cache->needs_unfenced = INTEL_INFO(i915)->unfenced_needs_alignment;
 	cache->node.flags = 0;
 	cache->rq = NULL;
-	cache->target = NULL;
+	cache->rq_size = 0;
 }
 
 static inline void *unmask_page(unsigned long p)
@@ -977,122 +975,29 @@ static inline struct i915_ggtt *cache_to_ggtt(struct reloc_cache *cache)
 	return &i915->ggtt;
 }
 
-#define RELOC_TAIL 4
-
-static int reloc_gpu_chain(struct reloc_cache *cache)
+static void reloc_gpu_flush(struct reloc_cache *cache)
 {
-	struct intel_gt_buffer_pool_node *pool;
-	struct i915_request *rq = cache->rq;
-	struct i915_vma *batch;
-	u32 *cmd;
-	int err;
-
-	pool = intel_gt_get_buffer_pool(rq->engine->gt, PAGE_SIZE);
-	if (IS_ERR(pool))
-		return PTR_ERR(pool);
-
-	batch = i915_vma_instance(pool->obj, rq->context->vm, NULL);
-	if (IS_ERR(batch)) {
-		err = PTR_ERR(batch);
-		goto out_pool;
-	}
-
-	err = i915_vma_pin(batch, 0, 0, PIN_USER | PIN_NONBLOCK);
-	if (err)
-		goto out_pool;
-
-	GEM_BUG_ON(cache->rq_size + RELOC_TAIL > PAGE_SIZE  / sizeof(u32));
-	cmd = cache->rq_cmd + cache->rq_size;
-	*cmd++ = MI_ARB_CHECK;
-	if (cache->gen >= 8)
-		*cmd++ = MI_BATCH_BUFFER_START_GEN8;
-	else if (cache->gen >= 6)
-		*cmd++ = MI_BATCH_BUFFER_START;
-	else
-		*cmd++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT;
-	*cmd++ = lower_32_bits(batch->node.start);
-	*cmd++ = upper_32_bits(batch->node.start); /* Always 0 for gen<8 */
-	i915_gem_object_flush_map(cache->rq_vma->obj);
-	i915_gem_object_unpin_map(cache->rq_vma->obj);
-	cache->rq_vma = NULL;
-
-	err = intel_gt_buffer_pool_mark_active(pool, rq);
-	if (err == 0) {
-		i915_vma_lock(batch);
-		err = i915_request_await_object(rq, batch->obj, false);
-		if (err == 0)
-			err = i915_vma_move_to_active(batch, rq, 0);
-		i915_vma_unlock(batch);
-	}
-	i915_vma_unpin(batch);
-	if (err)
-		goto out_pool;
+	struct drm_i915_gem_object *obj = cache->rq->batch->obj;
 
-	cmd = i915_gem_object_pin_map(batch->obj,
-				      cache->has_llc ?
-				      I915_MAP_FORCE_WB :
-				      I915_MAP_FORCE_WC);
-	if (IS_ERR(cmd)) {
-		err = PTR_ERR(cmd);
-		goto out_pool;
-	}
+	GEM_BUG_ON(cache->rq_size >= obj->base.size / sizeof(u32));
+	cache->rq_cmd[cache->rq_size] = MI_BATCH_BUFFER_END;
 
-	/* Return with batch mapping (cmd) still pinned */
-	cache->rq_cmd = cmd;
-	cache->rq_size = 0;
-	cache->rq_vma = batch;
+	__i915_gem_object_flush_map(obj, 0, sizeof(u32) * (cache->rq_size + 1));
+	i915_gem_object_unpin_map(obj);
 
-out_pool:
-	intel_gt_buffer_pool_put(pool);
-	return err;
-}
+	intel_gt_chipset_flush(cache->rq->engine->gt);
 
-static unsigned int reloc_bb_flags(const struct reloc_cache *cache)
-{
-	return cache->gen > 5 ? 0 : I915_DISPATCH_SECURE;
-}
-
-static int reloc_gpu_flush(struct reloc_cache *cache)
-{
-	struct i915_request *rq;
-	int err;
-
-	rq = fetch_and_zero(&cache->rq);
-	if (!rq)
-		return 0;
-
-	if (cache->rq_vma) {
-		struct drm_i915_gem_object *obj = cache->rq_vma->obj;
-
-		GEM_BUG_ON(cache->rq_size >= obj->base.size / sizeof(u32));
-		cache->rq_cmd[cache->rq_size++] = MI_BATCH_BUFFER_END;
-
-		__i915_gem_object_flush_map(obj,
-					    0, sizeof(u32) * cache->rq_size);
-		i915_gem_object_unpin_map(obj);
-	}
-
-	err = 0;
-	if (rq->engine->emit_init_breadcrumb)
-		err = rq->engine->emit_init_breadcrumb(rq);
-	if (!err)
-		err = rq->engine->emit_bb_start(rq,
-						rq->batch->node.start,
-						PAGE_SIZE,
-						reloc_bb_flags(cache));
-	if (err)
-		i915_request_set_error_once(rq, err);
-
-	intel_gt_chipset_flush(rq->engine->gt);
-	i915_request_add(rq);
-
-	return err;
+	i915_request_add(cache->rq);
+	cache->rq = NULL;
 }
 
 static void reloc_cache_reset(struct reloc_cache *cache)
 {
 	void *vaddr;
 
+	if (cache->rq)
+		reloc_gpu_flush(cache);
+
 	if (!cache->vaddr)
 		return;
 
@@ -1286,6 +1191,7 @@ static int reloc_move_to_gpu(struct i915_request *rq, struct i915_vma *vma)
 
 static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
 			     struct intel_engine_cs *engine,
+			     struct i915_vma *vma,
 			     unsigned int len)
 {
 	struct reloc_cache *cache = &eb->reloc_cache;
@@ -1308,7 +1214,7 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
 		goto out_pool;
 	}
 
-	batch = i915_vma_instance(pool->obj, eb->context->vm, NULL);
+	batch = i915_vma_instance(pool->obj, vma->vm, NULL);
 	if (IS_ERR(batch)) {
 		err = PTR_ERR(batch);
 		goto err_unmap;
@@ -1344,6 +1250,16 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
 	if (err)
 		goto err_request;
 
+	err = reloc_move_to_gpu(rq, vma);
+	if (err)
+		goto err_request;
+
+	err = eb->engine->emit_bb_start(rq,
+					batch->node.start, PAGE_SIZE,
+					cache->gen > 5 ? 0 : I915_DISPATCH_SECURE);
+	if (err)
+		goto skip_request;
+
 	i915_vma_lock(batch);
 	err = i915_request_await_object(rq, batch->obj, false);
 	if (err == 0)
@@ -1358,7 +1274,6 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
 	cache->rq = rq;
 	cache->rq_cmd = cmd;
 	cache->rq_size = 0;
-	cache->rq_vma = batch;
 
 	/* Return with batch mapping (cmd) still pinned */
 	goto out_pool;
@@ -1387,9 +1302,12 @@ static u32 *reloc_gpu(struct i915_execbuffer *eb,
 {
 	struct reloc_cache *cache = &eb->reloc_cache;
 	u32 *cmd;
-	int err;
+
+	if (cache->rq_size > PAGE_SIZE/sizeof(u32) - (len + 1))
+		reloc_gpu_flush(cache);
 
 	if (unlikely(!cache->rq)) {
+		int err;
 		struct intel_engine_cs *engine = eb->engine;
 
 		if (!reloc_can_use_engine(engine)) {
@@ -1398,31 +1316,11 @@ static u32 *reloc_gpu(struct i915_execbuffer *eb,
 				return ERR_PTR(-ENODEV);
 		}
 
-		err = __reloc_gpu_alloc(eb, engine, len);
+		err = __reloc_gpu_alloc(eb, engine, vma, len);
 		if (unlikely(err))
 			return ERR_PTR(err);
 	}
 
-	if (vma != cache->target) {
-		err = reloc_move_to_gpu(cache->rq, vma);
-		if (unlikely(err)) {
-			i915_request_set_error_once(cache->rq, err);
-			return ERR_PTR(err);
-		}
-
-		cache->target = vma;
-	}
-
-	if (unlikely(cache->rq_size + len >
-		     PAGE_SIZE / sizeof(u32) - RELOC_TAIL)) {
-		err = reloc_gpu_chain(cache);
-		if (unlikely(err)) {
-			i915_request_set_error_once(cache->rq, err);
-			return ERR_PTR(err);
-		}
-	}
-
-	GEM_BUG_ON(cache->rq_size + len >= PAGE_SIZE  / sizeof(u32));
 	cmd = cache->rq_cmd + cache->rq_size;
 	cache->rq_size += len;
 
@@ -1770,20 +1668,15 @@ static int eb_relocate(struct i915_execbuffer *eb)
 	/* The objects are in their final locations, apply the relocations. */
 	if (eb->args->flags & __EXEC_HAS_RELOC) {
 		struct eb_vma *ev;
-		int flush;
 
 		list_for_each_entry(ev, &eb->relocs, reloc_link) {
 			err = eb_relocate_vma(eb, ev);
 			if (err)
-				break;
+				return err;
 		}
-
-		flush = reloc_gpu_flush(&eb->reloc_cache);
-		if (!err)
-			err = flush;
 	}
 
-	return err;
+	return 0;
 }
 
 static int eb_move_to_gpu(struct i915_execbuffer *eb)
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c
index a49016f8ee0d..580884cffec3 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c
@@ -53,13 +53,13 @@ static int __igt_gpu_reloc(struct i915_execbuffer *eb,
 	}
 
 	/* Skip to the end of the cmd page */
-	i = PAGE_SIZE / sizeof(u32) - RELOC_TAIL - 1;
+	i = PAGE_SIZE / sizeof(u32) - 1;
 	i -= eb->reloc_cache.rq_size;
 	memset32(eb->reloc_cache.rq_cmd + eb->reloc_cache.rq_size,
 		 MI_NOOP, i);
 	eb->reloc_cache.rq_size += i;
 
-	/* Force batch chaining */
+	/* Force next batch */
 	if (!__reloc_entry_gpu(eb, vma,
 			       offsets[2] * sizeof(u32),
 			       2)) {
@@ -69,9 +69,7 @@ static int __igt_gpu_reloc(struct i915_execbuffer *eb,
 
 	GEM_BUG_ON(!eb->reloc_cache.rq);
 	rq = i915_request_get(eb->reloc_cache.rq);
-	err = reloc_gpu_flush(&eb->reloc_cache);
-	if (err)
-		goto put_rq;
+	reloc_gpu_flush(&eb->reloc_cache);
 	GEM_BUG_ON(eb->reloc_cache.rq);
 
 	err = i915_gem_object_wait(obj, I915_WAIT_INTERRUPTIBLE, HZ / 2);
-- 
2.27.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 61+ messages in thread

* [Intel-gfx] [PATCH 03/26] Revert "drm/i915/gem: Drop relocation slowpath".
  2020-06-23 14:28 [Intel-gfx] [PATCH 01/26] Revert "drm/i915/gem: Async GPU relocations only" Maarten Lankhorst
  2020-06-23 14:28 ` [Intel-gfx] [PATCH 02/26] drm/i915: Revert relocation chaining commits Maarten Lankhorst
@ 2020-06-23 14:28 ` Maarten Lankhorst
  2020-06-23 14:28 ` [Intel-gfx] [PATCH 04/26] drm/i915: Add an implementation for i915_gem_ww_ctx locking, v2 Maarten Lankhorst
                   ` (28 subsequent siblings)
  30 siblings, 0 replies; 61+ messages in thread
From: Maarten Lankhorst @ 2020-06-23 14:28 UTC (permalink / raw)
  To: intel-gfx; +Cc: Matthew Auld, Chris Wilson

This reverts commit 7dc8f1143778 ("drm/i915/gem: Drop relocation
slowpath"). We need the slowpath relocation for taking ww-mutex
inside the page fault handler, and we will take this mutex when
pinning all objects.

[mlankhorst: Adjusted for reloc_gpu_flush() changes]

Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Matthew Auld <matthew.auld@intel.com>
Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
---
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 249 +++++++++++++++++-
 1 file changed, 248 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index ea8c668d76e0..2b4c210638c1 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -1603,7 +1603,9 @@ static int eb_relocate_vma(struct i915_execbuffer *eb, struct eb_vma *ev)
 		 * we would try to acquire the struct mutex again. Obviously
 		 * this is bad and so lockdep complains vehemently.
 		 */
-		copied = __copy_from_user(r, urelocs, count * sizeof(r[0]));
+		pagefault_disable();
+		copied = __copy_from_user_inatomic(r, urelocs, count * sizeof(r[0]));
+		pagefault_enable();
 		if (unlikely(copied)) {
 			remain = -EFAULT;
 			goto out;
@@ -1651,6 +1653,248 @@ static int eb_relocate_vma(struct i915_execbuffer *eb, struct eb_vma *ev)
 	return remain;
 }
 
+static int
+eb_relocate_vma_slow(struct i915_execbuffer *eb, struct eb_vma *ev)
+{
+	const struct drm_i915_gem_exec_object2 *entry = ev->exec;
+	struct drm_i915_gem_relocation_entry *relocs =
+		u64_to_ptr(typeof(*relocs), entry->relocs_ptr);
+	unsigned int i;
+	int err;
+
+	for (i = 0; i < entry->relocation_count; i++) {
+		u64 offset = eb_relocate_entry(eb, ev, &relocs[i]);
+
+		if ((s64)offset < 0) {
+			err = (int)offset;
+			goto err;
+		}
+	}
+	err = 0;
+err:
+	reloc_cache_reset(&eb->reloc_cache);
+	return err;
+}
+
+static int check_relocations(const struct drm_i915_gem_exec_object2 *entry)
+{
+	const char __user *addr, *end;
+	unsigned long size;
+	char __maybe_unused c;
+
+	size = entry->relocation_count;
+	if (size == 0)
+		return 0;
+
+	if (size > N_RELOC(ULONG_MAX))
+		return -EINVAL;
+
+	addr = u64_to_user_ptr(entry->relocs_ptr);
+	size *= sizeof(struct drm_i915_gem_relocation_entry);
+	if (!access_ok(addr, size))
+		return -EFAULT;
+
+	end = addr + size;
+	for (; addr < end; addr += PAGE_SIZE) {
+		int err = __get_user(c, addr);
+		if (err)
+			return err;
+	}
+	return __get_user(c, end - 1);
+}
+
+static int eb_copy_relocations(const struct i915_execbuffer *eb)
+{
+	struct drm_i915_gem_relocation_entry *relocs;
+	const unsigned int count = eb->buffer_count;
+	unsigned int i;
+	int err;
+
+	for (i = 0; i < count; i++) {
+		const unsigned int nreloc = eb->exec[i].relocation_count;
+		struct drm_i915_gem_relocation_entry __user *urelocs;
+		unsigned long size;
+		unsigned long copied;
+
+		if (nreloc == 0)
+			continue;
+
+		err = check_relocations(&eb->exec[i]);
+		if (err)
+			goto err;
+
+		urelocs = u64_to_user_ptr(eb->exec[i].relocs_ptr);
+		size = nreloc * sizeof(*relocs);
+
+		relocs = kvmalloc_array(size, 1, GFP_KERNEL);
+		if (!relocs) {
+			err = -ENOMEM;
+			goto err;
+		}
+
+		/* copy_from_user is limited to < 4GiB */
+		copied = 0;
+		do {
+			unsigned int len =
+				min_t(u64, BIT_ULL(31), size - copied);
+
+			if (__copy_from_user((char *)relocs + copied,
+					     (char __user *)urelocs + copied,
+					     len))
+				goto end;
+
+			copied += len;
+		} while (copied < size);
+
+		/*
+		 * As we do not update the known relocation offsets after
+		 * relocating (due to the complexities in lock handling),
+		 * we need to mark them as invalid now so that we force the
+		 * relocation processing next time. Just in case the target
+		 * object is evicted and then rebound into its old
+		 * presumed_offset before the next execbuffer - if that
+		 * happened we would make the mistake of assuming that the
+		 * relocations were valid.
+		 */
+		if (!user_access_begin(urelocs, size))
+			goto end;
+
+		for (copied = 0; copied < nreloc; copied++)
+			unsafe_put_user(-1,
+					&urelocs[copied].presumed_offset,
+					end_user);
+		user_access_end();
+
+		eb->exec[i].relocs_ptr = (uintptr_t)relocs;
+	}
+
+	return 0;
+
+end_user:
+	user_access_end();
+end:
+	kvfree(relocs);
+	err = -EFAULT;
+err:
+	while (i--) {
+		relocs = u64_to_ptr(typeof(*relocs), eb->exec[i].relocs_ptr);
+		if (eb->exec[i].relocation_count)
+			kvfree(relocs);
+	}
+	return err;
+}
+
+static int eb_prefault_relocations(const struct i915_execbuffer *eb)
+{
+	const unsigned int count = eb->buffer_count;
+	unsigned int i;
+
+	for (i = 0; i < count; i++) {
+		int err;
+
+		err = check_relocations(&eb->exec[i]);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static noinline int eb_relocate_slow(struct i915_execbuffer *eb)
+{
+	bool have_copy = false;
+	struct eb_vma *ev;
+	int err = 0;
+
+repeat:
+	if (signal_pending(current)) {
+		err = -ERESTARTSYS;
+		goto out;
+	}
+
+	/*
+	 * We take 3 passes through the slowpatch.
+	 *
+	 * 1 - we try to just prefault all the user relocation entries and
+	 * then attempt to reuse the atomic pagefault disabled fast path again.
+	 *
+	 * 2 - we copy the user entries to a local buffer here outside of the
+	 * local and allow ourselves to wait upon any rendering before
+	 * relocations
+	 *
+	 * 3 - we already have a local copy of the relocation entries, but
+	 * were interrupted (EAGAIN) whilst waiting for the objects, try again.
+	 */
+	if (!err) {
+		err = eb_prefault_relocations(eb);
+	} else if (!have_copy) {
+		err = eb_copy_relocations(eb);
+		have_copy = err == 0;
+	} else {
+		cond_resched();
+		err = 0;
+	}
+	if (err)
+		goto out;
+
+	err = mutex_lock_interruptible(&eb->i915->drm.struct_mutex);
+	if (err)
+		goto out;
+
+	list_for_each_entry(ev, &eb->relocs, reloc_link) {
+		if (!have_copy) {
+			pagefault_disable();
+			err = eb_relocate_vma(eb, ev);
+			pagefault_enable();
+			if (err)
+				break;
+		} else {
+			err = eb_relocate_vma_slow(eb, ev);
+			if (err)
+				break;
+		}
+	}
+
+	reloc_gpu_flush(&eb->reloc_cache);
+	mutex_unlock(&eb->i915->drm.struct_mutex);
+	if (err && !have_copy)
+		goto repeat;
+
+	if (err)
+		goto err;
+
+	/*
+	 * Leave the user relocations as are, this is the painfully slow path,
+	 * and we want to avoid the complication of dropping the lock whilst
+	 * having buffers reserved in the aperture and so causing spurious
+	 * ENOSPC for random operations.
+	 */
+
+err:
+	if (err == -EAGAIN)
+		goto repeat;
+
+out:
+	if (have_copy) {
+		const unsigned int count = eb->buffer_count;
+		unsigned int i;
+
+		for (i = 0; i < count; i++) {
+			const struct drm_i915_gem_exec_object2 *entry =
+				&eb->exec[i];
+			struct drm_i915_gem_relocation_entry *relocs;
+
+			if (!entry->relocation_count)
+				continue;
+
+			relocs = u64_to_ptr(typeof(*relocs), entry->relocs_ptr);
+			kvfree(relocs);
+		}
+	}
+
+	return err;
+}
+
 static int eb_relocate(struct i915_execbuffer *eb)
 {
 	int err;
@@ -1674,6 +1918,9 @@ static int eb_relocate(struct i915_execbuffer *eb)
 			if (err)
 				return err;
 		}
+
+		if (err)
+			return eb_relocate_slow(eb);
 	}
 
 	return 0;
-- 
2.27.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 61+ messages in thread

* [Intel-gfx] [PATCH 04/26] drm/i915: Add an implementation for i915_gem_ww_ctx locking, v2.
  2020-06-23 14:28 [Intel-gfx] [PATCH 01/26] Revert "drm/i915/gem: Async GPU relocations only" Maarten Lankhorst
  2020-06-23 14:28 ` [Intel-gfx] [PATCH 02/26] drm/i915: Revert relocation chaining commits Maarten Lankhorst
  2020-06-23 14:28 ` [Intel-gfx] [PATCH 03/26] Revert "drm/i915/gem: Drop relocation slowpath" Maarten Lankhorst
@ 2020-06-23 14:28 ` Maarten Lankhorst
  2020-06-24  7:10   ` Thomas Hellström (Intel)
                     ` (2 more replies)
  2020-06-23 14:28 ` [Intel-gfx] [PATCH 05/26] drm/i915: Remove locking from i915_gem_object_prepare_read/write Maarten Lankhorst
                   ` (27 subsequent siblings)
  30 siblings, 3 replies; 61+ messages in thread
From: Maarten Lankhorst @ 2020-06-23 14:28 UTC (permalink / raw)
  To: intel-gfx

i915_gem_ww_ctx is used to lock all gem bo's for pinning and memory
eviction. We don't use it yet, but lets start adding the definition
first.

To use it, we have to pass a non-NULL ww to gem_object_lock, and don't
unlock directly. It is done in i915_gem_ww_ctx_fini.

Changes since v1:
- Change ww_ctx and obj order in locking functions (Jonas Lahtinen)

Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
---
 drivers/gpu/drm/i915/display/intel_display.c  |  4 +-
 .../gpu/drm/i915/gem/i915_gem_client_blt.c    |  2 +-
 drivers/gpu/drm/i915/gem/i915_gem_context.c   |  2 +-
 drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c    |  4 +-
 drivers/gpu/drm/i915/gem/i915_gem_domain.c    | 10 ++--
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c    |  4 +-
 drivers/gpu/drm/i915/gem/i915_gem_object.c    |  2 +-
 drivers/gpu/drm/i915/gem/i915_gem_object.h    | 38 +++++++++++---
 .../gpu/drm/i915/gem/i915_gem_object_types.h  |  9 ++++
 drivers/gpu/drm/i915/gem/i915_gem_pm.c        |  2 +-
 drivers/gpu/drm/i915/gem/i915_gem_tiling.c    |  2 +-
 .../gpu/drm/i915/gem/selftests/huge_pages.c   |  2 +-
 .../i915/gem/selftests/i915_gem_client_blt.c  |  2 +-
 .../i915/gem/selftests/i915_gem_coherency.c   | 10 ++--
 .../drm/i915/gem/selftests/i915_gem_context.c |  4 +-
 .../drm/i915/gem/selftests/i915_gem_mman.c    |  4 +-
 .../drm/i915/gem/selftests/i915_gem_phys.c    |  2 +-
 .../gpu/drm/i915/gt/selftest_workarounds.c    |  2 +-
 drivers/gpu/drm/i915/gvt/cmd_parser.c         |  2 +-
 drivers/gpu/drm/i915/i915_gem.c               | 52 +++++++++++++++++--
 drivers/gpu/drm/i915/i915_gem.h               | 11 ++++
 drivers/gpu/drm/i915/selftests/i915_gem.c     | 41 +++++++++++++++
 drivers/gpu/drm/i915/selftests/i915_vma.c     |  2 +-
 .../drm/i915/selftests/intel_memory_region.c  |  2 +-
 24 files changed, 173 insertions(+), 42 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
index 7457813ef273..e909ccc37a54 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -2309,7 +2309,7 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb,
 
 void intel_unpin_fb_vma(struct i915_vma *vma, unsigned long flags)
 {
-	i915_gem_object_lock(vma->obj);
+	i915_gem_object_lock(vma->obj, NULL);
 	if (flags & PLANE_HAS_FENCE)
 		i915_vma_unpin_fence(vma);
 	i915_gem_object_unpin_from_display_plane(vma);
@@ -17112,7 +17112,7 @@ static int intel_framebuffer_init(struct intel_framebuffer *intel_fb,
 	if (!intel_fb->frontbuffer)
 		return -ENOMEM;
 
-	i915_gem_object_lock(obj);
+	i915_gem_object_lock(obj, NULL);
 	tiling = i915_gem_object_get_tiling(obj);
 	stride = i915_gem_object_get_stride(obj);
 	i915_gem_object_unlock(obj);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c
index d3a86a4d5c04..c182091c00ff 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c
@@ -286,7 +286,7 @@ int i915_gem_schedule_fill_pages_blt(struct drm_i915_gem_object *obj,
 	dma_fence_init(&work->dma, &clear_pages_work_ops, &fence_lock, 0, 0);
 	i915_sw_fence_init(&work->wait, clear_pages_work_notify);
 
-	i915_gem_object_lock(obj);
+	i915_gem_object_lock(obj, NULL);
 	err = i915_sw_fence_await_reservation(&work->wait,
 					      obj->base.resv, NULL, true, 0,
 					      I915_FENCE_GFP);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index 30c229fcb404..a996583640ee 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -113,7 +113,7 @@ static void lut_close(struct i915_gem_context *ctx)
 			continue;
 
 		rcu_read_unlock();
-		i915_gem_object_lock(obj);
+		i915_gem_object_lock(obj, NULL);
 		list_for_each_entry(lut, &obj->lut_list, obj_link) {
 			if (lut->ctx != ctx)
 				continue;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
index 2679380159fc..27fddc22a7c6 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
@@ -128,7 +128,7 @@ static int i915_gem_begin_cpu_access(struct dma_buf *dma_buf, enum dma_data_dire
 	if (err)
 		return err;
 
-	err = i915_gem_object_lock_interruptible(obj);
+	err = i915_gem_object_lock_interruptible(obj, NULL);
 	if (err)
 		goto out;
 
@@ -149,7 +149,7 @@ static int i915_gem_end_cpu_access(struct dma_buf *dma_buf, enum dma_data_direct
 	if (err)
 		return err;
 
-	err = i915_gem_object_lock_interruptible(obj);
+	err = i915_gem_object_lock_interruptible(obj, NULL);
 	if (err)
 		goto out;
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
index 7f76fc68f498..c0acfc97fae3 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
@@ -32,7 +32,7 @@ void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj)
 	if (!i915_gem_object_is_framebuffer(obj))
 		return;
 
-	i915_gem_object_lock(obj);
+	i915_gem_object_lock(obj, NULL);
 	__i915_gem_object_flush_for_display(obj);
 	i915_gem_object_unlock(obj);
 }
@@ -197,7 +197,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
 	if (ret)
 		return ret;
 
-	ret = i915_gem_object_lock_interruptible(obj);
+	ret = i915_gem_object_lock_interruptible(obj, NULL);
 	if (ret)
 		return ret;
 
@@ -536,7 +536,7 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
 	if (err)
 		goto out;
 
-	err = i915_gem_object_lock_interruptible(obj);
+	err = i915_gem_object_lock_interruptible(obj, NULL);
 	if (err)
 		goto out_unpin;
 
@@ -576,7 +576,7 @@ int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj,
 	if (!i915_gem_object_has_struct_page(obj))
 		return -ENODEV;
 
-	ret = i915_gem_object_lock_interruptible(obj);
+	ret = i915_gem_object_lock_interruptible(obj, NULL);
 	if (ret)
 		return ret;
 
@@ -630,7 +630,7 @@ int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj,
 	if (!i915_gem_object_has_struct_page(obj))
 		return -ENODEV;
 
-	ret = i915_gem_object_lock_interruptible(obj);
+	ret = i915_gem_object_lock_interruptible(obj, NULL);
 	if (ret)
 		return ret;
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 2b4c210638c1..391d22051b20 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -813,7 +813,7 @@ static int __eb_add_lut(struct i915_execbuffer *eb,
 		if (err == 0) { /* And nor has this handle */
 			struct drm_i915_gem_object *obj = vma->obj;
 
-			i915_gem_object_lock(obj);
+			i915_gem_object_lock(obj, NULL);
 			if (idr_find(&eb->file->object_idr, handle) == obj) {
 				list_add(&lut->obj_link, &obj->lut_list);
 			} else {
@@ -1083,7 +1083,7 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj,
 		if (use_cpu_reloc(cache, obj))
 			return NULL;
 
-		i915_gem_object_lock(obj);
+		i915_gem_object_lock(obj, NULL);
 		err = i915_gem_object_set_to_gtt_domain(obj, true);
 		i915_gem_object_unlock(obj);
 		if (err)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index b6ec5b50d93b..b59e2d40c347 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -108,7 +108,7 @@ void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file)
 	struct i915_lut_handle *lut, *ln;
 	LIST_HEAD(close);
 
-	i915_gem_object_lock(obj);
+	i915_gem_object_lock(obj, NULL);
 	list_for_each_entry_safe(lut, ln, &obj->lut_list, obj_link) {
 		struct i915_gem_context *ctx = lut->ctx;
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index 2faa481cc18f..5103067269b0 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -110,20 +110,44 @@ i915_gem_object_put(struct drm_i915_gem_object *obj)
 
 #define assert_object_held(obj) dma_resv_assert_held((obj)->base.resv)
 
-static inline void i915_gem_object_lock(struct drm_i915_gem_object *obj)
+static inline int __i915_gem_object_lock(struct drm_i915_gem_object *obj,
+					 struct i915_gem_ww_ctx *ww,
+					 bool intr)
 {
-	dma_resv_lock(obj->base.resv, NULL);
+	int ret;
+
+	if (intr)
+		ret = dma_resv_lock_interruptible(obj->base.resv, ww ? &ww->ctx : NULL);
+	else
+		ret = dma_resv_lock(obj->base.resv, ww ? &ww->ctx : NULL);
+
+	if (!ret && ww)
+		list_add_tail(&obj->obj_link, &ww->obj_list);
+	if (ret == -EALREADY)
+		ret = 0;
+
+	if (ret == -EDEADLK)
+		ww->contended = obj;
+
+	return ret;
 }
 
-static inline bool i915_gem_object_trylock(struct drm_i915_gem_object *obj)
+static inline int i915_gem_object_lock(struct drm_i915_gem_object *obj,
+				       struct i915_gem_ww_ctx *ww)
 {
-	return dma_resv_trylock(obj->base.resv);
+	return __i915_gem_object_lock(obj, ww, ww && ww->intr);
 }
 
-static inline int
-i915_gem_object_lock_interruptible(struct drm_i915_gem_object *obj)
+static inline int i915_gem_object_lock_interruptible(struct drm_i915_gem_object *obj,
+						     struct i915_gem_ww_ctx *ww)
 {
-	return dma_resv_lock_interruptible(obj->base.resv, NULL);
+	WARN_ON(ww && !ww->intr);
+	return __i915_gem_object_lock(obj, ww, true);
+}
+
+static inline bool i915_gem_object_trylock(struct drm_i915_gem_object *obj)
+{
+	return dma_resv_trylock(obj->base.resv);
 }
 
 static inline void i915_gem_object_unlock(struct drm_i915_gem_object *obj)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index b1f82a11aef2..3740c0080e38 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -122,6 +122,15 @@ struct drm_i915_gem_object {
 	 */
 	struct list_head lut_list;
 
+	/**
+	 * @obj_link: Link into @i915_gem_ww_ctx.obj_list
+	 *
+	 * When we lock this object through i915_gem_object_lock() with a
+	 * context, we add it to the list to ensure we can unlock everything
+	 * when i915_gem_ww_ctx_backoff() or i915_gem_ww_ctx_fini() are called.
+	 */
+	struct list_head obj_link;
+
 	/** Stolen memory for this object, instead of being backed by shmem. */
 	struct drm_mm_node *stolen;
 	union {
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
index 3d215164dd5a..40d3e40500fa 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
@@ -84,7 +84,7 @@ void i915_gem_suspend_late(struct drm_i915_private *i915)
 
 			spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
 
-			i915_gem_object_lock(obj);
+			i915_gem_object_lock(obj, NULL);
 			drm_WARN_ON(&i915->drm,
 			    i915_gem_object_set_to_gtt_domain(obj, false));
 			i915_gem_object_unlock(obj);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_tiling.c b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c
index 0158e49bf9bb..65fbf29c4852 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c
@@ -249,7 +249,7 @@ i915_gem_object_set_tiling(struct drm_i915_gem_object *obj,
 	 * whilst executing a fenced command for an untiled object.
 	 */
 
-	i915_gem_object_lock(obj);
+	i915_gem_object_lock(obj, NULL);
 	if (i915_gem_object_is_framebuffer(obj)) {
 		i915_gem_object_unlock(obj);
 		return -EBUSY;
diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
index 8291ede6902c..eb2011ccb92b 100644
--- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
+++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
@@ -947,7 +947,7 @@ static int gpu_write(struct intel_context *ce,
 {
 	int err;
 
-	i915_gem_object_lock(vma->obj);
+	i915_gem_object_lock(vma->obj, NULL);
 	err = i915_gem_object_set_to_gtt_domain(vma->obj, true);
 	i915_gem_object_unlock(vma->obj);
 	if (err)
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
index 299c29e9ad86..4e36d4897ea6 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
@@ -75,7 +75,7 @@ static int __igt_client_fill(struct intel_engine_cs *engine)
 		if (err)
 			goto err_unpin;
 
-		i915_gem_object_lock(obj);
+		i915_gem_object_lock(obj, NULL);
 		err = i915_gem_object_set_to_cpu_domain(obj, false);
 		i915_gem_object_unlock(obj);
 		if (err)
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
index 87d7d8aa080f..1de2959b153c 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
@@ -82,7 +82,7 @@ static int gtt_set(struct context *ctx, unsigned long offset, u32 v)
 	u32 __iomem *map;
 	int err = 0;
 
-	i915_gem_object_lock(ctx->obj);
+	i915_gem_object_lock(ctx->obj, NULL);
 	err = i915_gem_object_set_to_gtt_domain(ctx->obj, true);
 	i915_gem_object_unlock(ctx->obj);
 	if (err)
@@ -115,7 +115,7 @@ static int gtt_get(struct context *ctx, unsigned long offset, u32 *v)
 	u32 __iomem *map;
 	int err = 0;
 
-	i915_gem_object_lock(ctx->obj);
+	i915_gem_object_lock(ctx->obj, NULL);
 	err = i915_gem_object_set_to_gtt_domain(ctx->obj, false);
 	i915_gem_object_unlock(ctx->obj);
 	if (err)
@@ -147,7 +147,7 @@ static int wc_set(struct context *ctx, unsigned long offset, u32 v)
 	u32 *map;
 	int err;
 
-	i915_gem_object_lock(ctx->obj);
+	i915_gem_object_lock(ctx->obj, NULL);
 	err = i915_gem_object_set_to_wc_domain(ctx->obj, true);
 	i915_gem_object_unlock(ctx->obj);
 	if (err)
@@ -170,7 +170,7 @@ static int wc_get(struct context *ctx, unsigned long offset, u32 *v)
 	u32 *map;
 	int err;
 
-	i915_gem_object_lock(ctx->obj);
+	i915_gem_object_lock(ctx->obj, NULL);
 	err = i915_gem_object_set_to_wc_domain(ctx->obj, false);
 	i915_gem_object_unlock(ctx->obj);
 	if (err)
@@ -193,7 +193,7 @@ static int gpu_set(struct context *ctx, unsigned long offset, u32 v)
 	u32 *cs;
 	int err;
 
-	i915_gem_object_lock(ctx->obj);
+	i915_gem_object_lock(ctx->obj, NULL);
 	err = i915_gem_object_set_to_gtt_domain(ctx->obj, true);
 	i915_gem_object_unlock(ctx->obj);
 	if (err)
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
index b81978890641..438c15ef2184 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
@@ -950,7 +950,7 @@ emit_rpcs_query(struct drm_i915_gem_object *obj,
 	if (IS_ERR(vma))
 		return PTR_ERR(vma);
 
-	i915_gem_object_lock(obj);
+	i915_gem_object_lock(obj, NULL);
 	err = i915_gem_object_set_to_gtt_domain(obj, false);
 	i915_gem_object_unlock(obj);
 	if (err)
@@ -1706,7 +1706,7 @@ static int read_from_scratch(struct i915_gem_context *ctx,
 
 	i915_request_add(rq);
 
-	i915_gem_object_lock(obj);
+	i915_gem_object_lock(obj, NULL);
 	err = i915_gem_object_set_to_cpu_domain(obj, false);
 	i915_gem_object_unlock(obj);
 	if (err)
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
index 9c7402ce5bf9..9fb95a45bcad 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
@@ -103,7 +103,7 @@ static int check_partial_mapping(struct drm_i915_gem_object *obj,
 	GEM_BUG_ON(i915_gem_object_get_tiling(obj) != tile->tiling);
 	GEM_BUG_ON(i915_gem_object_get_stride(obj) != tile->stride);
 
-	i915_gem_object_lock(obj);
+	i915_gem_object_lock(obj, NULL);
 	err = i915_gem_object_set_to_gtt_domain(obj, true);
 	i915_gem_object_unlock(obj);
 	if (err) {
@@ -188,7 +188,7 @@ static int check_partial_mappings(struct drm_i915_gem_object *obj,
 	GEM_BUG_ON(i915_gem_object_get_tiling(obj) != tile->tiling);
 	GEM_BUG_ON(i915_gem_object_get_stride(obj) != tile->stride);
 
-	i915_gem_object_lock(obj);
+	i915_gem_object_lock(obj, NULL);
 	err = i915_gem_object_set_to_gtt_domain(obj, true);
 	i915_gem_object_unlock(obj);
 	if (err) {
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c
index 34932871b3a5..a94243dc4c5c 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c
@@ -44,7 +44,7 @@ static int mock_phys_object(void *arg)
 	}
 
 	/* Make the object dirty so that put_pages must do copy back the data */
-	i915_gem_object_lock(obj);
+	i915_gem_object_lock(obj, NULL);
 	err = i915_gem_object_set_to_gtt_domain(obj, true);
 	i915_gem_object_unlock(obj);
 	if (err) {
diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
index febc9e6692ba..61a0532d0f3d 100644
--- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
@@ -214,7 +214,7 @@ static int check_whitelist(struct i915_gem_context *ctx,
 		return PTR_ERR(results);
 
 	err = 0;
-	i915_gem_object_lock(results);
+	i915_gem_object_lock(results, NULL);
 	intel_wedge_on_timeout(&wedge, engine->gt, HZ / 5) /* safety net! */
 		err = i915_gem_object_set_to_cpu_domain(results, false);
 	i915_gem_object_unlock(results);
diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c
index f1940939260a..943c8d232703 100644
--- a/drivers/gpu/drm/i915/gvt/cmd_parser.c
+++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c
@@ -2982,7 +2982,7 @@ static int shadow_indirect_ctx(struct intel_shadow_wa_ctx *wa_ctx)
 		goto put_obj;
 	}
 
-	i915_gem_object_lock(obj);
+	i915_gem_object_lock(obj, NULL);
 	ret = i915_gem_object_set_to_cpu_domain(obj, false);
 	i915_gem_object_unlock(obj);
 	if (ret) {
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 9aa3066cb75d..1e06752835e5 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -420,7 +420,7 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj,
 		GEM_BUG_ON(!drm_mm_node_allocated(&node));
 	}
 
-	ret = i915_gem_object_lock_interruptible(obj);
+	ret = i915_gem_object_lock_interruptible(obj, NULL);
 	if (ret)
 		goto out_unpin;
 
@@ -619,7 +619,7 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
 		GEM_BUG_ON(!drm_mm_node_allocated(&node));
 	}
 
-	ret = i915_gem_object_lock_interruptible(obj);
+	ret = i915_gem_object_lock_interruptible(obj, NULL);
 	if (ret)
 		goto out_unpin;
 
@@ -1290,7 +1290,7 @@ int i915_gem_freeze_late(struct drm_i915_private *i915)
 	i915_gem_drain_freed_objects(i915);
 
 	list_for_each_entry(obj, &i915->mm.shrink_list, mm.link) {
-		i915_gem_object_lock(obj);
+		i915_gem_object_lock(obj, NULL);
 		drm_WARN_ON(&i915->drm,
 			    i915_gem_object_set_to_cpu_domain(obj, true));
 		i915_gem_object_unlock(obj);
@@ -1344,6 +1344,52 @@ int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file)
 	return ret;
 }
 
+void i915_gem_ww_ctx_init(struct i915_gem_ww_ctx *ww, bool intr)
+{
+	ww_acquire_init(&ww->ctx, &reservation_ww_class);
+	INIT_LIST_HEAD(&ww->obj_list);
+	ww->intr = intr;
+	ww->contended = NULL;
+}
+
+static void i915_gem_ww_ctx_unlock_all(struct i915_gem_ww_ctx *ww)
+{
+	struct drm_i915_gem_object *obj;
+
+	while ((obj = list_first_entry_or_null(&ww->obj_list, struct drm_i915_gem_object, obj_link))) {
+		list_del(&obj->obj_link);
+		i915_gem_object_unlock(obj);
+	}
+}
+
+void i915_gem_ww_ctx_fini(struct i915_gem_ww_ctx *ww)
+{
+	i915_gem_ww_ctx_unlock_all(ww);
+	WARN_ON(ww->contended);
+	ww_acquire_fini(&ww->ctx);
+}
+
+int __must_check i915_gem_ww_ctx_backoff(struct i915_gem_ww_ctx *ww)
+{
+	int ret = 0;
+
+	if (WARN_ON(!ww->contended))
+		return -EINVAL;
+
+	i915_gem_ww_ctx_unlock_all(ww);
+	if (ww->intr)
+		ret = dma_resv_lock_slow_interruptible(ww->contended->base.resv, &ww->ctx);
+	else
+		dma_resv_lock_slow(ww->contended->base.resv, &ww->ctx);
+
+	if (!ret)
+		list_add_tail(&ww->contended->obj_link, &ww->obj_list);
+
+	ww->contended = NULL;
+
+	return ret;
+}
+
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
 #include "selftests/mock_gem_device.c"
 #include "selftests/i915_gem.c"
diff --git a/drivers/gpu/drm/i915/i915_gem.h b/drivers/gpu/drm/i915/i915_gem.h
index 1753c84d6c0d..988755dbf4be 100644
--- a/drivers/gpu/drm/i915/i915_gem.h
+++ b/drivers/gpu/drm/i915/i915_gem.h
@@ -116,4 +116,15 @@ static inline bool __tasklet_is_scheduled(struct tasklet_struct *t)
 	return test_bit(TASKLET_STATE_SCHED, &t->state);
 }
 
+struct i915_gem_ww_ctx {
+	struct ww_acquire_ctx ctx;
+	struct list_head obj_list;
+	bool intr;
+	struct drm_i915_gem_object *contended;
+};
+
+void i915_gem_ww_ctx_init(struct i915_gem_ww_ctx *ctx, bool intr);
+void i915_gem_ww_ctx_fini(struct i915_gem_ww_ctx *ctx);
+int __must_check i915_gem_ww_ctx_backoff(struct i915_gem_ww_ctx *ctx);
+
 #endif /* __I915_GEM_H__ */
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem.c b/drivers/gpu/drm/i915/selftests/i915_gem.c
index 88d400b9df88..23a6132c5f4e 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem.c
@@ -199,11 +199,52 @@ static int igt_gem_hibernate(void *arg)
 	return err;
 }
 
+static int igt_gem_ww_ctx(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct drm_i915_gem_object *obj, *obj2;
+	struct i915_gem_ww_ctx ww;
+	int err = 0;
+
+	obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
+	if (IS_ERR(obj))
+		return PTR_ERR(obj);
+
+	obj2 = i915_gem_object_create_internal(i915, PAGE_SIZE);
+	if (IS_ERR(obj)) {
+		err = PTR_ERR(obj);
+		goto put1;
+	}
+
+	i915_gem_ww_ctx_init(&ww, true);
+retry:
+	/* Lock the objects, twice for good measure (-EALREADY handling) */
+	err = i915_gem_object_lock(obj, &ww);
+	if (!err)
+		err = i915_gem_object_lock_interruptible(obj, &ww);
+	if (!err)
+		err = i915_gem_object_lock_interruptible(obj2, &ww);
+	if (!err)
+		err = i915_gem_object_lock(obj2, &ww);
+
+	if (err == -EDEADLK) {
+		err = i915_gem_ww_ctx_backoff(&ww);
+		if (!err)
+			goto retry;
+	}
+	i915_gem_ww_ctx_fini(&ww);
+	i915_gem_object_put(obj2);
+put1:
+	i915_gem_object_put(obj);
+	return err;
+}
+
 int i915_gem_live_selftests(struct drm_i915_private *i915)
 {
 	static const struct i915_subtest tests[] = {
 		SUBTEST(igt_gem_suspend),
 		SUBTEST(igt_gem_hibernate),
+		SUBTEST(igt_gem_ww_ctx),
 	};
 
 	if (intel_gt_is_wedged(&i915->gt))
diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c
index af89c7fc8f59..88c5e9acb84c 100644
--- a/drivers/gpu/drm/i915/selftests/i915_vma.c
+++ b/drivers/gpu/drm/i915/selftests/i915_vma.c
@@ -892,7 +892,7 @@ static int igt_vma_remapped_gtt(void *arg)
 			unsigned int x, y;
 			int err;
 
-			i915_gem_object_lock(obj);
+			i915_gem_object_lock(obj, NULL);
 			err = i915_gem_object_set_to_gtt_domain(obj, true);
 			i915_gem_object_unlock(obj);
 			if (err)
diff --git a/drivers/gpu/drm/i915/selftests/intel_memory_region.c b/drivers/gpu/drm/i915/selftests/intel_memory_region.c
index 6e80d99048e4..957a7a52def7 100644
--- a/drivers/gpu/drm/i915/selftests/intel_memory_region.c
+++ b/drivers/gpu/drm/i915/selftests/intel_memory_region.c
@@ -509,7 +509,7 @@ static int igt_lmem_write_cpu(void *arg)
 	if (err)
 		goto out_unpin;
 
-	i915_gem_object_lock(obj);
+	i915_gem_object_lock(obj, NULL);
 	err = i915_gem_object_set_to_wc_domain(obj, true);
 	i915_gem_object_unlock(obj);
 	if (err)
-- 
2.27.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 61+ messages in thread

* [Intel-gfx] [PATCH 05/26] drm/i915: Remove locking from i915_gem_object_prepare_read/write
  2020-06-23 14:28 [Intel-gfx] [PATCH 01/26] Revert "drm/i915/gem: Async GPU relocations only" Maarten Lankhorst
                   ` (2 preceding siblings ...)
  2020-06-23 14:28 ` [Intel-gfx] [PATCH 04/26] drm/i915: Add an implementation for i915_gem_ww_ctx locking, v2 Maarten Lankhorst
@ 2020-06-23 14:28 ` Maarten Lankhorst
  2020-06-26 13:32   ` Thomas Hellström (Intel)
  2020-06-29 12:56   ` Tvrtko Ursulin
  2020-06-23 14:28 ` [Intel-gfx] [PATCH 06/26] drm/i915: Parse command buffer earlier in eb_relocate(slow) Maarten Lankhorst
                   ` (26 subsequent siblings)
  30 siblings, 2 replies; 61+ messages in thread
From: Maarten Lankhorst @ 2020-06-23 14:28 UTC (permalink / raw)
  To: intel-gfx

Execbuffer submission will perform its own WW locking, and we
cannot rely on the implicit lock there.

This also makes it clear that the GVT code will get a lockdep splat when
multiple batchbuffer shadows need to be performed in the same instance,
fix that up.

Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_domain.c    | 20 ++++++-------------
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 13 ++++++++++--
 drivers/gpu/drm/i915/gem/i915_gem_object.h    |  1 -
 .../gpu/drm/i915/gem/selftests/huge_pages.c   |  5 ++++-
 .../i915/gem/selftests/i915_gem_coherency.c   | 14 +++++++++----
 .../drm/i915/gem/selftests/i915_gem_context.c | 12 ++++++++---
 drivers/gpu/drm/i915/gvt/cmd_parser.c         |  1 +
 drivers/gpu/drm/i915/i915_gem.c               | 20 +++++++++++++++++--
 8 files changed, 59 insertions(+), 27 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
index c0acfc97fae3..8ebceebd11b0 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
@@ -576,19 +576,17 @@ int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj,
 	if (!i915_gem_object_has_struct_page(obj))
 		return -ENODEV;
 
-	ret = i915_gem_object_lock_interruptible(obj, NULL);
-	if (ret)
-		return ret;
+	assert_object_held(obj);
 
 	ret = i915_gem_object_wait(obj,
 				   I915_WAIT_INTERRUPTIBLE,
 				   MAX_SCHEDULE_TIMEOUT);
 	if (ret)
-		goto err_unlock;
+		return ret;
 
 	ret = i915_gem_object_pin_pages(obj);
 	if (ret)
-		goto err_unlock;
+		return ret;
 
 	if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ ||
 	    !static_cpu_has(X86_FEATURE_CLFLUSH)) {
@@ -616,8 +614,6 @@ int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj,
 
 err_unpin:
 	i915_gem_object_unpin_pages(obj);
-err_unlock:
-	i915_gem_object_unlock(obj);
 	return ret;
 }
 
@@ -630,20 +626,18 @@ int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj,
 	if (!i915_gem_object_has_struct_page(obj))
 		return -ENODEV;
 
-	ret = i915_gem_object_lock_interruptible(obj, NULL);
-	if (ret)
-		return ret;
+	assert_object_held(obj);
 
 	ret = i915_gem_object_wait(obj,
 				   I915_WAIT_INTERRUPTIBLE |
 				   I915_WAIT_ALL,
 				   MAX_SCHEDULE_TIMEOUT);
 	if (ret)
-		goto err_unlock;
+		return ret;
 
 	ret = i915_gem_object_pin_pages(obj);
 	if (ret)
-		goto err_unlock;
+		return ret;
 
 	if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE ||
 	    !static_cpu_has(X86_FEATURE_CLFLUSH)) {
@@ -680,7 +674,5 @@ int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj,
 
 err_unpin:
 	i915_gem_object_unpin_pages(obj);
-err_unlock:
-	i915_gem_object_unlock(obj);
 	return ret;
 }
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 391d22051b20..f896b1a4b38a 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -1003,11 +1003,14 @@ static void reloc_cache_reset(struct reloc_cache *cache)
 
 	vaddr = unmask_page(cache->vaddr);
 	if (cache->vaddr & KMAP) {
+		struct drm_i915_gem_object *obj =
+			(struct drm_i915_gem_object *)cache->node.mm;
 		if (cache->vaddr & CLFLUSH_AFTER)
 			mb();
 
 		kunmap_atomic(vaddr);
-		i915_gem_object_finish_access((struct drm_i915_gem_object *)cache->node.mm);
+		i915_gem_object_finish_access(obj);
+		i915_gem_object_unlock(obj);
 	} else {
 		struct i915_ggtt *ggtt = cache_to_ggtt(cache);
 
@@ -1042,10 +1045,16 @@ static void *reloc_kmap(struct drm_i915_gem_object *obj,
 		unsigned int flushes;
 		int err;
 
-		err = i915_gem_object_prepare_write(obj, &flushes);
+		err = i915_gem_object_lock_interruptible(obj, NULL);
 		if (err)
 			return ERR_PTR(err);
 
+		err = i915_gem_object_prepare_write(obj, &flushes);
+		if (err) {
+			i915_gem_object_unlock(obj);
+			return ERR_PTR(err);
+		}
+
 		BUILD_BUG_ON(KMAP & CLFLUSH_FLAGS);
 		BUILD_BUG_ON((KMAP | CLFLUSH_FLAGS) & PAGE_MASK);
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index 5103067269b0..11b8e2735071 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -434,7 +434,6 @@ static inline void
 i915_gem_object_finish_access(struct drm_i915_gem_object *obj)
 {
 	i915_gem_object_unpin_pages(obj);
-	i915_gem_object_unlock(obj);
 }
 
 static inline struct intel_engine_cs *
diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
index eb2011ccb92b..fff11327a8da 100644
--- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
+++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
@@ -964,9 +964,10 @@ __cpu_check_shmem(struct drm_i915_gem_object *obj, u32 dword, u32 val)
 	unsigned long n;
 	int err;
 
+	i915_gem_object_lock(obj, NULL);
 	err = i915_gem_object_prepare_read(obj, &needs_flush);
 	if (err)
-		return err;
+		goto err_unlock;
 
 	for (n = 0; n < obj->base.size >> PAGE_SHIFT; ++n) {
 		u32 *ptr = kmap_atomic(i915_gem_object_get_page(obj, n));
@@ -986,6 +987,8 @@ __cpu_check_shmem(struct drm_i915_gem_object *obj, u32 dword, u32 val)
 	}
 
 	i915_gem_object_finish_access(obj);
+err_unlock:
+	i915_gem_object_unlock(obj);
 
 	return err;
 }
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
index 1de2959b153c..dcdfc396f2f8 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
@@ -27,9 +27,10 @@ static int cpu_set(struct context *ctx, unsigned long offset, u32 v)
 	u32 *cpu;
 	int err;
 
+	i915_gem_object_lock(ctx->obj, NULL);
 	err = i915_gem_object_prepare_write(ctx->obj, &needs_clflush);
 	if (err)
-		return err;
+		goto out;
 
 	page = i915_gem_object_get_page(ctx->obj, offset >> PAGE_SHIFT);
 	map = kmap_atomic(page);
@@ -46,7 +47,9 @@ static int cpu_set(struct context *ctx, unsigned long offset, u32 v)
 	kunmap_atomic(map);
 	i915_gem_object_finish_access(ctx->obj);
 
-	return 0;
+out:
+	i915_gem_object_unlock(ctx->obj);
+	return err;
 }
 
 static int cpu_get(struct context *ctx, unsigned long offset, u32 *v)
@@ -57,9 +60,10 @@ static int cpu_get(struct context *ctx, unsigned long offset, u32 *v)
 	u32 *cpu;
 	int err;
 
+	i915_gem_object_lock(ctx->obj, NULL);
 	err = i915_gem_object_prepare_read(ctx->obj, &needs_clflush);
 	if (err)
-		return err;
+		goto out;
 
 	page = i915_gem_object_get_page(ctx->obj, offset >> PAGE_SHIFT);
 	map = kmap_atomic(page);
@@ -73,7 +77,9 @@ static int cpu_get(struct context *ctx, unsigned long offset, u32 *v)
 	kunmap_atomic(map);
 	i915_gem_object_finish_access(ctx->obj);
 
-	return 0;
+out:
+	i915_gem_object_unlock(ctx->obj);
+	return err;
 }
 
 static int gtt_set(struct context *ctx, unsigned long offset, u32 v)
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
index 438c15ef2184..76671f587b9d 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
@@ -461,9 +461,10 @@ static int cpu_fill(struct drm_i915_gem_object *obj, u32 value)
 	unsigned int n, m, need_flush;
 	int err;
 
+	i915_gem_object_lock(obj, NULL);
 	err = i915_gem_object_prepare_write(obj, &need_flush);
 	if (err)
-		return err;
+		goto out;
 
 	for (n = 0; n < real_page_count(obj); n++) {
 		u32 *map;
@@ -479,7 +480,9 @@ static int cpu_fill(struct drm_i915_gem_object *obj, u32 value)
 	i915_gem_object_finish_access(obj);
 	obj->read_domains = I915_GEM_DOMAIN_GTT | I915_GEM_DOMAIN_CPU;
 	obj->write_domain = 0;
-	return 0;
+out:
+	i915_gem_object_unlock(obj);
+	return err;
 }
 
 static noinline int cpu_check(struct drm_i915_gem_object *obj,
@@ -488,9 +491,10 @@ static noinline int cpu_check(struct drm_i915_gem_object *obj,
 	unsigned int n, m, needs_flush;
 	int err;
 
+	i915_gem_object_lock(obj, NULL);
 	err = i915_gem_object_prepare_read(obj, &needs_flush);
 	if (err)
-		return err;
+		goto out_unlock;
 
 	for (n = 0; n < real_page_count(obj); n++) {
 		u32 *map;
@@ -527,6 +531,8 @@ static noinline int cpu_check(struct drm_i915_gem_object *obj,
 	}
 
 	i915_gem_object_finish_access(obj);
+out_unlock:
+	i915_gem_object_unlock(obj);
 	return err;
 }
 
diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c
index 943c8d232703..d0a599b51bfe 100644
--- a/drivers/gpu/drm/i915/gvt/cmd_parser.c
+++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c
@@ -1923,6 +1923,7 @@ static int perform_bb_shadow(struct parser_exec_state *s)
 	if (ret)
 		goto err_unmap;
 
+	i915_gem_object_unlock(bb->obj);
 	INIT_LIST_HEAD(&bb->list);
 	list_add(&bb->list, &s->workload->shadow_bb);
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 1e06752835e5..33f6f88c8b08 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -335,12 +335,20 @@ i915_gem_shmem_pread(struct drm_i915_gem_object *obj,
 	u64 remain;
 	int ret;
 
-	ret = i915_gem_object_prepare_read(obj, &needs_clflush);
+	ret = i915_gem_object_lock_interruptible(obj, NULL);
 	if (ret)
 		return ret;
 
+	ret = i915_gem_object_prepare_read(obj, &needs_clflush);
+	if (ret) {
+		i915_gem_object_unlock(obj);
+		return ret;
+	}
+
 	fence = i915_gem_object_lock_fence(obj);
 	i915_gem_object_finish_access(obj);
+	i915_gem_object_unlock(obj);
+
 	if (!fence)
 		return -ENOMEM;
 
@@ -734,12 +742,20 @@ i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj,
 	u64 remain;
 	int ret;
 
-	ret = i915_gem_object_prepare_write(obj, &needs_clflush);
+	ret = i915_gem_object_lock_interruptible(obj, NULL);
 	if (ret)
 		return ret;
 
+	ret = i915_gem_object_prepare_write(obj, &needs_clflush);
+	if (ret) {
+		i915_gem_object_unlock(obj);
+		return ret;
+	}
+
 	fence = i915_gem_object_lock_fence(obj);
 	i915_gem_object_finish_access(obj);
+	i915_gem_object_unlock(obj);
+
 	if (!fence)
 		return -ENOMEM;
 
-- 
2.27.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 61+ messages in thread

* [Intel-gfx] [PATCH 06/26] drm/i915: Parse command buffer earlier in eb_relocate(slow)
  2020-06-23 14:28 [Intel-gfx] [PATCH 01/26] Revert "drm/i915/gem: Async GPU relocations only" Maarten Lankhorst
                   ` (3 preceding siblings ...)
  2020-06-23 14:28 ` [Intel-gfx] [PATCH 05/26] drm/i915: Remove locking from i915_gem_object_prepare_read/write Maarten Lankhorst
@ 2020-06-23 14:28 ` Maarten Lankhorst
  2020-06-26 14:41   ` Thomas Hellström (Intel)
  2020-06-29 14:42   ` Tvrtko Ursulin
  2020-06-23 14:28 ` [Intel-gfx] [PATCH 07/26] Revert "drm/i915/gem: Split eb_vma into its own allocation" Maarten Lankhorst
                   ` (25 subsequent siblings)
  30 siblings, 2 replies; 61+ messages in thread
From: Maarten Lankhorst @ 2020-06-23 14:28 UTC (permalink / raw)
  To: intel-gfx

We want to introduce backoff logic, but we need to lock the
pool object as well for command parsing. Because of this, we
will need backoff logic for the engine pool obj, move the batch
validation up slightly to eb_lookup_vmas, and the actual command
parsing in a separate function which can get called from execbuf
relocation fast and slowpath.

Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
---
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 66 ++++++++++---------
 1 file changed, 36 insertions(+), 30 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index f896b1a4b38a..7cb44915cfc7 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -290,6 +290,8 @@ struct i915_execbuffer {
 	struct eb_vma_array *array;
 };
 
+static int eb_parse(struct i915_execbuffer *eb);
+
 static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb)
 {
 	return intel_engine_requires_cmd_parser(eb->engine) ||
@@ -873,6 +875,7 @@ static struct i915_vma *eb_lookup_vma(struct i915_execbuffer *eb, u32 handle)
 
 static int eb_lookup_vmas(struct i915_execbuffer *eb)
 {
+	struct drm_i915_private *i915 = eb->i915;
 	unsigned int batch = eb_batch_index(eb);
 	unsigned int i;
 	int err = 0;
@@ -886,18 +889,37 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
 		vma = eb_lookup_vma(eb, eb->exec[i].handle);
 		if (IS_ERR(vma)) {
 			err = PTR_ERR(vma);
-			break;
+			goto err;
 		}
 
 		err = eb_validate_vma(eb, &eb->exec[i], vma);
 		if (unlikely(err)) {
 			i915_vma_put(vma);
-			break;
+			goto err;
 		}
 
 		eb_add_vma(eb, i, batch, vma);
 	}
 
+	if (unlikely(eb->batch->flags & EXEC_OBJECT_WRITE)) {
+		drm_dbg(&i915->drm,
+			"Attempting to use self-modifying batch buffer\n");
+		return -EINVAL;
+	}
+
+	if (range_overflows_t(u64,
+			      eb->batch_start_offset, eb->batch_len,
+			      eb->batch->vma->size)) {
+		drm_dbg(&i915->drm, "Attempting to use out-of-bounds batch\n");
+		return -EINVAL;
+	}
+
+	if (eb->batch_len == 0)
+		eb->batch_len = eb->batch->vma->size - eb->batch_start_offset;
+
+	return 0;
+
+err:
 	eb->vma[i].vma = NULL;
 	return err;
 }
@@ -1809,7 +1831,7 @@ static int eb_prefault_relocations(const struct i915_execbuffer *eb)
 	return 0;
 }
 
-static noinline int eb_relocate_slow(struct i915_execbuffer *eb)
+static noinline int eb_relocate_parse_slow(struct i915_execbuffer *eb)
 {
 	bool have_copy = false;
 	struct eb_vma *ev;
@@ -1872,6 +1894,11 @@ static noinline int eb_relocate_slow(struct i915_execbuffer *eb)
 	if (err)
 		goto err;
 
+	/* as last step, parse the command buffer */
+	err = eb_parse(eb);
+	if (err)
+		goto err;
+
 	/*
 	 * Leave the user relocations as are, this is the painfully slow path,
 	 * and we want to avoid the complication of dropping the lock whilst
@@ -1904,7 +1931,7 @@ static noinline int eb_relocate_slow(struct i915_execbuffer *eb)
 	return err;
 }
 
-static int eb_relocate(struct i915_execbuffer *eb)
+static int eb_relocate_parse(struct i915_execbuffer *eb)
 {
 	int err;
 
@@ -1932,7 +1959,7 @@ static int eb_relocate(struct i915_execbuffer *eb)
 			return eb_relocate_slow(eb);
 	}
 
-	return 0;
+	return eb_parse(eb);
 }
 
 static int eb_move_to_gpu(struct i915_execbuffer *eb)
@@ -2870,7 +2897,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 	if (unlikely(err))
 		goto err_context;
 
-	err = eb_relocate(&eb);
+	err = eb_relocate_parse(&eb);
 	if (err) {
 		/*
 		 * If the user expects the execobject.offset and
@@ -2883,33 +2910,10 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 		goto err_vma;
 	}
 
-	if (unlikely(eb.batch->flags & EXEC_OBJECT_WRITE)) {
-		drm_dbg(&i915->drm,
-			"Attempting to use self-modifying batch buffer\n");
-		err = -EINVAL;
-		goto err_vma;
-	}
-
-	if (range_overflows_t(u64,
-			      eb.batch_start_offset, eb.batch_len,
-			      eb.batch->vma->size)) {
-		drm_dbg(&i915->drm, "Attempting to use out-of-bounds batch\n");
-		err = -EINVAL;
-		goto err_vma;
-	}
-
-	if (eb.batch_len == 0)
-		eb.batch_len = eb.batch->vma->size - eb.batch_start_offset;
-
-	err = eb_parse(&eb);
-	if (err)
-		goto err_vma;
-
 	/*
 	 * snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
 	 * batch" bit. Hence we need to pin secure batches into the global gtt.
 	 * hsw should have this fixed, but bdw mucks it up again. */
-	batch = eb.batch->vma;
 	if (eb.batch_flags & I915_DISPATCH_SECURE) {
 		struct i915_vma *vma;
 
@@ -2923,13 +2927,15 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 		 *   fitting due to fragmentation.
 		 * So this is actually safe.
 		 */
-		vma = i915_gem_object_ggtt_pin(batch->obj, NULL, 0, 0, 0);
+		vma = i915_gem_object_ggtt_pin(eb.batch->vma->obj, NULL, 0, 0, 0);
 		if (IS_ERR(vma)) {
 			err = PTR_ERR(vma);
 			goto err_parse;
 		}
 
 		batch = vma;
+	} else {
+		batch = eb.batch->vma;
 	}
 
 	/* All GPU relocation batches must be submitted prior to the user rq */
-- 
2.27.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 61+ messages in thread

* [Intel-gfx] [PATCH 07/26] Revert "drm/i915/gem: Split eb_vma into its own allocation"
  2020-06-23 14:28 [Intel-gfx] [PATCH 01/26] Revert "drm/i915/gem: Async GPU relocations only" Maarten Lankhorst
                   ` (4 preceding siblings ...)
  2020-06-23 14:28 ` [Intel-gfx] [PATCH 06/26] drm/i915: Parse command buffer earlier in eb_relocate(slow) Maarten Lankhorst
@ 2020-06-23 14:28 ` Maarten Lankhorst
  2020-06-29 15:08   ` Tvrtko Ursulin
  2020-06-23 14:28 ` [Intel-gfx] [PATCH 08/26] drm/i915/gem: Make eb_add_lut interruptible wait on object lock Maarten Lankhorst
                   ` (24 subsequent siblings)
  30 siblings, 1 reply; 61+ messages in thread
From: Maarten Lankhorst @ 2020-06-23 14:28 UTC (permalink / raw)
  To: intel-gfx

This reverts commit 0f1dd02295f35dcdcbaafcbcbbec0753884ab974.
This conflicts with the ww mutex handling, which needs to drop
the references after gpu submission anyway, because otherwise we
may risk unlocking a BO after first freeing it.

Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
---
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 124 +++++++-----------
 1 file changed, 51 insertions(+), 73 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 7cb44915cfc7..2636a130fb57 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -40,11 +40,6 @@ struct eb_vma {
 	u32 handle;
 };
 
-struct eb_vma_array {
-	struct kref kref;
-	struct eb_vma vma[];
-};
-
 enum {
 	FORCE_CPU_RELOC = 1,
 	FORCE_GTT_RELOC,
@@ -57,6 +52,7 @@ enum {
 #define __EXEC_OBJECT_NEEDS_MAP		BIT(29)
 #define __EXEC_OBJECT_NEEDS_BIAS	BIT(28)
 #define __EXEC_OBJECT_INTERNAL_FLAGS	(~0u << 28) /* all of the above */
+#define __EXEC_OBJECT_RESERVED (__EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_FENCE)
 
 #define __EXEC_HAS_RELOC	BIT(31)
 #define __EXEC_INTERNAL_FLAGS	(~0u << 31)
@@ -287,7 +283,6 @@ struct i915_execbuffer {
 	 */
 	int lut_size;
 	struct hlist_head *buckets; /** ht for relocation handles */
-	struct eb_vma_array *array;
 };
 
 static int eb_parse(struct i915_execbuffer *eb);
@@ -299,62 +294,8 @@ static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb)
 		 eb->args->batch_len);
 }
 
-static struct eb_vma_array *eb_vma_array_create(unsigned int count)
-{
-	struct eb_vma_array *arr;
-
-	arr = kvmalloc(struct_size(arr, vma, count), GFP_KERNEL | __GFP_NOWARN);
-	if (!arr)
-		return NULL;
-
-	kref_init(&arr->kref);
-	arr->vma[0].vma = NULL;
-
-	return arr;
-}
-
-static inline void eb_unreserve_vma(struct eb_vma *ev)
-{
-	struct i915_vma *vma = ev->vma;
-
-	if (unlikely(ev->flags & __EXEC_OBJECT_HAS_FENCE))
-		__i915_vma_unpin_fence(vma);
-
-	if (ev->flags & __EXEC_OBJECT_HAS_PIN)
-		__i915_vma_unpin(vma);
-
-	ev->flags &= ~(__EXEC_OBJECT_HAS_PIN |
-		       __EXEC_OBJECT_HAS_FENCE);
-}
-
-static void eb_vma_array_destroy(struct kref *kref)
-{
-	struct eb_vma_array *arr = container_of(kref, typeof(*arr), kref);
-	struct eb_vma *ev = arr->vma;
-
-	while (ev->vma) {
-		eb_unreserve_vma(ev);
-		i915_vma_put(ev->vma);
-		ev++;
-	}
-
-	kvfree(arr);
-}
-
-static void eb_vma_array_put(struct eb_vma_array *arr)
-{
-	kref_put(&arr->kref, eb_vma_array_destroy);
-}
-
 static int eb_create(struct i915_execbuffer *eb)
 {
-	/* Allocate an extra slot for use by the command parser + sentinel */
-	eb->array = eb_vma_array_create(eb->buffer_count + 2);
-	if (!eb->array)
-		return -ENOMEM;
-
-	eb->vma = eb->array->vma;
-
 	if (!(eb->args->flags & I915_EXEC_HANDLE_LUT)) {
 		unsigned int size = 1 + ilog2(eb->buffer_count);
 
@@ -388,10 +329,8 @@ static int eb_create(struct i915_execbuffer *eb)
 				break;
 		} while (--size);
 
-		if (unlikely(!size)) {
-			eb_vma_array_put(eb->array);
+		if (unlikely(!size))
 			return -ENOMEM;
-		}
 
 		eb->lut_size = size;
 	} else {
@@ -502,6 +441,26 @@ eb_pin_vma(struct i915_execbuffer *eb,
 	return !eb_vma_misplaced(entry, vma, ev->flags);
 }
 
+static inline void __eb_unreserve_vma(struct i915_vma *vma, unsigned int flags)
+{
+	GEM_BUG_ON(!(flags & __EXEC_OBJECT_HAS_PIN));
+
+	if (unlikely(flags & __EXEC_OBJECT_HAS_FENCE))
+		__i915_vma_unpin_fence(vma);
+
+	__i915_vma_unpin(vma);
+}
+
+static inline void
+eb_unreserve_vma(struct eb_vma *ev)
+{
+	if (!(ev->flags & __EXEC_OBJECT_HAS_PIN))
+		return;
+
+	__eb_unreserve_vma(ev->vma, ev->flags);
+	ev->flags &= ~__EXEC_OBJECT_RESERVED;
+}
+
 static int
 eb_validate_vma(struct i915_execbuffer *eb,
 		struct drm_i915_gem_exec_object2 *entry,
@@ -944,13 +903,31 @@ eb_get_vma(const struct i915_execbuffer *eb, unsigned long handle)
 	}
 }
 
+static void eb_release_vmas(const struct i915_execbuffer *eb)
+{
+	const unsigned int count = eb->buffer_count;
+	unsigned int i;
+
+	for (i = 0; i < count; i++) {
+		struct eb_vma *ev = &eb->vma[i];
+		struct i915_vma *vma = ev->vma;
+
+		if (!vma)
+			break;
+
+		eb->vma[i].vma = NULL;
+
+		if (ev->flags & __EXEC_OBJECT_HAS_PIN)
+			__eb_unreserve_vma(vma, ev->flags);
+
+		i915_vma_put(vma);
+	}
+}
+
 static void eb_destroy(const struct i915_execbuffer *eb)
 {
 	GEM_BUG_ON(eb->reloc_cache.rq);
 
-	if (eb->array)
-		eb_vma_array_put(eb->array);
-
 	if (eb->lut_size > 0)
 		kfree(eb->buckets);
 }
@@ -2039,12 +2016,9 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
 			err = i915_vma_move_to_active(vma, eb->request, flags);
 
 		i915_vma_unlock(vma);
-		eb_unreserve_vma(ev);
 	}
 	ww_acquire_fini(&acquire);
 
-	eb_vma_array_put(fetch_and_zero(&eb->array));
-
 	if (unlikely(err))
 		goto err_skip;
 
@@ -2340,7 +2314,6 @@ static int eb_parse(struct i915_execbuffer *eb)
 	eb->vma[eb->buffer_count].vma = i915_vma_get(shadow);
 	eb->vma[eb->buffer_count].flags = __EXEC_OBJECT_HAS_PIN;
 	eb->batch = &eb->vma[eb->buffer_count++];
-	eb->vma[eb->buffer_count].vma = NULL;
 
 	eb->trampoline = trampoline;
 	eb->batch_start_offset = 0;
@@ -2838,6 +2811,8 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 		args->flags |= __EXEC_HAS_RELOC;
 
 	eb.exec = exec;
+	eb.vma = (struct eb_vma *)(exec + args->buffer_count + 1);
+	eb.vma[0].vma = NULL;
 
 	eb.invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS;
 	reloc_cache_init(&eb.reloc_cache, eb.i915);
@@ -3014,6 +2989,8 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 	if (batch->private)
 		intel_gt_buffer_pool_put(batch->private);
 err_vma:
+	if (eb.exec)
+		eb_release_vmas(&eb);
 	if (eb.trampoline)
 		i915_vma_unpin(eb.trampoline);
 	eb_unpin_engine(&eb);
@@ -3031,7 +3008,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 
 static size_t eb_element_size(void)
 {
-	return sizeof(struct drm_i915_gem_exec_object2);
+	return sizeof(struct drm_i915_gem_exec_object2) + sizeof(struct eb_vma);
 }
 
 static bool check_buffer_count(size_t count)
@@ -3087,7 +3064,7 @@ i915_gem_execbuffer_ioctl(struct drm_device *dev, void *data,
 	/* Copy in the exec list from userland */
 	exec_list = kvmalloc_array(count, sizeof(*exec_list),
 				   __GFP_NOWARN | GFP_KERNEL);
-	exec2_list = kvmalloc_array(count, eb_element_size(),
+	exec2_list = kvmalloc_array(count + 1, eb_element_size(),
 				    __GFP_NOWARN | GFP_KERNEL);
 	if (exec_list == NULL || exec2_list == NULL) {
 		drm_dbg(&i915->drm,
@@ -3165,7 +3142,8 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
 	if (err)
 		return err;
 
-	exec2_list = kvmalloc_array(count, eb_element_size(),
+	/* Allocate an extra slot for use by the command parser */
+	exec2_list = kvmalloc_array(count + 1, eb_element_size(),
 				    __GFP_NOWARN | GFP_KERNEL);
 	if (exec2_list == NULL) {
 		drm_dbg(&i915->drm, "Failed to allocate exec list for %zd buffers\n",
-- 
2.27.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 61+ messages in thread

* [Intel-gfx] [PATCH 08/26] drm/i915/gem: Make eb_add_lut interruptible wait on object lock.
  2020-06-23 14:28 [Intel-gfx] [PATCH 01/26] Revert "drm/i915/gem: Async GPU relocations only" Maarten Lankhorst
                   ` (5 preceding siblings ...)
  2020-06-23 14:28 ` [Intel-gfx] [PATCH 07/26] Revert "drm/i915/gem: Split eb_vma into its own allocation" Maarten Lankhorst
@ 2020-06-23 14:28 ` Maarten Lankhorst
  2020-06-26 13:52   ` Thomas Hellström (Intel)
  2020-06-29 15:14   ` Tvrtko Ursulin
  2020-06-23 14:28 ` [Intel-gfx] [PATCH 09/26] drm/i915: Use per object locking in execbuf, v12 Maarten Lankhorst
                   ` (23 subsequent siblings)
  30 siblings, 2 replies; 61+ messages in thread
From: Maarten Lankhorst @ 2020-06-23 14:28 UTC (permalink / raw)
  To: intel-gfx

The lock here should be interruptible, so we can backoff if needed.

Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 2636a130fb57..aa441af81431 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -774,7 +774,12 @@ static int __eb_add_lut(struct i915_execbuffer *eb,
 		if (err == 0) { /* And nor has this handle */
 			struct drm_i915_gem_object *obj = vma->obj;
 
-			i915_gem_object_lock(obj, NULL);
+			err = i915_gem_object_lock_interruptible(obj, NULL);
+			if (err) {
+				radix_tree_delete(&ctx->handles_vma, handle);
+				goto unlock;
+			}
+
 			if (idr_find(&eb->file->object_idr, handle) == obj) {
 				list_add(&lut->obj_link, &obj->lut_list);
 			} else {
@@ -783,6 +788,7 @@ static int __eb_add_lut(struct i915_execbuffer *eb,
 			}
 			i915_gem_object_unlock(obj);
 		}
+unlock:
 		mutex_unlock(&ctx->mutex);
 	}
 	if (unlikely(err))
-- 
2.27.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 61+ messages in thread

* [Intel-gfx] [PATCH 09/26] drm/i915: Use per object locking in execbuf, v12.
  2020-06-23 14:28 [Intel-gfx] [PATCH 01/26] Revert "drm/i915/gem: Async GPU relocations only" Maarten Lankhorst
                   ` (6 preceding siblings ...)
  2020-06-23 14:28 ` [Intel-gfx] [PATCH 08/26] drm/i915/gem: Make eb_add_lut interruptible wait on object lock Maarten Lankhorst
@ 2020-06-23 14:28 ` Maarten Lankhorst
  2020-06-23 14:28 ` [Intel-gfx] [PATCH 10/26] drm/i915: Use ww locking in intel_renderstate Maarten Lankhorst
                   ` (22 subsequent siblings)
  30 siblings, 0 replies; 61+ messages in thread
From: Maarten Lankhorst @ 2020-06-23 14:28 UTC (permalink / raw)
  To: intel-gfx

Now that we changed execbuf submission slightly to allow us to do all
pinning in one place, we can now simply add ww versions on top of
struct_mutex. All we have to do is a separate path for -EDEADLK
handling, which needs to unpin all gem bo's before dropping the lock,
then starting over.

This finally allows us to do parallel submission, but because not
all of the pinning code uses the ww ctx yet, we cannot completely
drop struct_mutex yet.

Changes since v1:
- Keep struct_mutex for now. :(
Changes since v2:
- Make sure we always lock the ww context in slowpath.
Changes since v3:
- Don't call __eb_unreserve_vma in eb_move_to_gpu now; this can be
  done on normal unlock path.
- Unconditionally release vmas and context.
Changes since v4:
- Rebased on top of struct_mutex reduction.
Changes since v5:
- Remove training wheels.
Changes since v6:
- Fix accidentally broken -ENOSPC handling.
Changes since v7:
- Handle gt buffer pool better.
Changes since v8:
- Properly clear variables, to make -EDEADLK handling not BUG.
Change since v9:
- Fix unpinning fence on pnv and below.
Changes since v10:
- Make relocation gpu chaining working again.
Changes since v11:
- Remove relocation chaining, pain to make it work.

Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
---
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 378 ++++++++++--------
 .../i915/gem/selftests/i915_gem_execbuffer.c  |  62 +--
 drivers/gpu/drm/i915/i915_gem.c               |   6 +
 drivers/gpu/drm/i915/i915_gem.h               |   1 +
 4 files changed, 265 insertions(+), 182 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index aa441af81431..e00ae140e4d0 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -249,6 +249,8 @@ struct i915_execbuffer {
 	/** list of vma that have execobj.relocation_count */
 	struct list_head relocs;
 
+	struct i915_gem_ww_ctx ww;
+
 	/**
 	 * Track the most recently used object for relocations, as we
 	 * frequently have to perform multiple relocations within the same
@@ -267,14 +269,18 @@ struct i915_execbuffer {
 		struct i915_request *rq;
 		u32 *rq_cmd;
 		unsigned int rq_size;
+		struct intel_gt_buffer_pool_node *pool;
 	} reloc_cache;
 
+	struct intel_gt_buffer_pool_node *reloc_pool; /** relocation pool for -EDEADLK handling */
+
 	u64 invalid_flags; /** Set of execobj.flags that are invalid */
 	u32 context_flags; /** Set of execobj.flags to insert from the ctx */
 
 	u32 batch_start_offset; /** Location within object of batch */
 	u32 batch_len; /** Length of batch within object */
 	u32 batch_flags; /** Flags composed for emit_bb_start() */
+	struct intel_gt_buffer_pool_node *batch_pool; /** pool node for batch buffer */
 
 	/**
 	 * Indicate either the size of the hastable used to resolve
@@ -441,23 +447,16 @@ eb_pin_vma(struct i915_execbuffer *eb,
 	return !eb_vma_misplaced(entry, vma, ev->flags);
 }
 
-static inline void __eb_unreserve_vma(struct i915_vma *vma, unsigned int flags)
-{
-	GEM_BUG_ON(!(flags & __EXEC_OBJECT_HAS_PIN));
-
-	if (unlikely(flags & __EXEC_OBJECT_HAS_FENCE))
-		__i915_vma_unpin_fence(vma);
-
-	__i915_vma_unpin(vma);
-}
-
 static inline void
 eb_unreserve_vma(struct eb_vma *ev)
 {
 	if (!(ev->flags & __EXEC_OBJECT_HAS_PIN))
 		return;
 
-	__eb_unreserve_vma(ev->vma, ev->flags);
+	if (unlikely(ev->flags & __EXEC_OBJECT_HAS_FENCE))
+		__i915_vma_unpin_fence(ev->vma);
+
+	__i915_vma_unpin(ev->vma);
 	ev->flags &= ~__EXEC_OBJECT_RESERVED;
 }
 
@@ -552,16 +551,6 @@ eb_add_vma(struct i915_execbuffer *eb,
 
 		eb->batch = ev;
 	}
-
-	if (eb_pin_vma(eb, entry, ev)) {
-		if (entry->offset != vma->node.start) {
-			entry->offset = vma->node.start | UPDATE;
-			eb->args->flags |= __EXEC_HAS_RELOC;
-		}
-	} else {
-		eb_unreserve_vma(ev);
-		list_add_tail(&ev->bind_link, &eb->unbound);
-	}
 }
 
 static inline int use_cpu_reloc(const struct reloc_cache *cache,
@@ -646,10 +635,6 @@ static int eb_reserve(struct i915_execbuffer *eb)
 	 * This avoid unnecessary unbinding of later objects in order to make
 	 * room for the earlier objects *unless* we need to defragment.
 	 */
-
-	if (mutex_lock_interruptible(&eb->i915->drm.struct_mutex))
-		return -EINTR;
-
 	pass = 0;
 	do {
 		list_for_each_entry(ev, &eb->unbound, bind_link) {
@@ -657,8 +642,8 @@ static int eb_reserve(struct i915_execbuffer *eb)
 			if (err)
 				break;
 		}
-		if (!(err == -ENOSPC || err == -EAGAIN))
-			break;
+		if (err != -ENOSPC)
+			return err;
 
 		/* Resort *all* the objects into priority order */
 		INIT_LIST_HEAD(&eb->unbound);
@@ -688,13 +673,6 @@ static int eb_reserve(struct i915_execbuffer *eb)
 		}
 		list_splice_tail(&last, &eb->unbound);
 
-		if (err == -EAGAIN) {
-			mutex_unlock(&eb->i915->drm.struct_mutex);
-			flush_workqueue(eb->i915->mm.userptr_wq);
-			mutex_lock(&eb->i915->drm.struct_mutex);
-			continue;
-		}
-
 		switch (pass++) {
 		case 0:
 			break;
@@ -705,20 +683,15 @@ static int eb_reserve(struct i915_execbuffer *eb)
 			err = i915_gem_evict_vm(eb->context->vm);
 			mutex_unlock(&eb->context->vm->mutex);
 			if (err)
-				goto unlock;
+				return err;
 			break;
 
 		default:
-			err = -ENOSPC;
-			goto unlock;
+			return -ENOSPC;
 		}
 
 		pin_flags = PIN_USER;
 	} while (1);
-
-unlock:
-	mutex_unlock(&eb->i915->drm.struct_mutex);
-	return err;
 }
 
 static unsigned int eb_batch_index(const struct i915_execbuffer *eb)
@@ -846,7 +819,6 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
 	int err = 0;
 
 	INIT_LIST_HEAD(&eb->relocs);
-	INIT_LIST_HEAD(&eb->unbound);
 
 	for (i = 0; i < eb->buffer_count; i++) {
 		struct i915_vma *vma;
@@ -889,6 +861,48 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
 	return err;
 }
 
+static int eb_validate_vmas(struct i915_execbuffer *eb)
+{
+	unsigned int i;
+	int err;
+
+	INIT_LIST_HEAD(&eb->unbound);
+
+	for (i = 0; i < eb->buffer_count; i++) {
+		struct drm_i915_gem_exec_object2 *entry = &eb->exec[i];
+		struct eb_vma *ev = &eb->vma[i];
+		struct i915_vma *vma = ev->vma;
+
+		err = i915_gem_object_lock(vma->obj, &eb->ww);
+		if (err)
+			return err;
+
+		if (eb_pin_vma(eb, entry, ev)) {
+			if (entry->offset != vma->node.start) {
+				entry->offset = vma->node.start | UPDATE;
+				eb->args->flags |= __EXEC_HAS_RELOC;
+			}
+		} else {
+			eb_unreserve_vma(ev);
+
+			list_add_tail(&ev->bind_link, &eb->unbound);
+			if (drm_mm_node_allocated(&vma->node)) {
+				err = i915_vma_unbind(vma);
+				if (err)
+					return err;
+			}
+		}
+
+		GEM_BUG_ON(drm_mm_node_allocated(&vma->node) &&
+			   eb_vma_misplaced(&eb->exec[i], vma, ev->flags));
+	}
+
+	if (!list_empty(&eb->unbound))
+		return eb_reserve(eb);
+
+	return 0;
+}
+
 static struct eb_vma *
 eb_get_vma(const struct i915_execbuffer *eb, unsigned long handle)
 {
@@ -909,7 +923,7 @@ eb_get_vma(const struct i915_execbuffer *eb, unsigned long handle)
 	}
 }
 
-static void eb_release_vmas(const struct i915_execbuffer *eb)
+static void eb_release_vmas(const struct i915_execbuffer *eb, bool final)
 {
 	const unsigned int count = eb->buffer_count;
 	unsigned int i;
@@ -921,12 +935,10 @@ static void eb_release_vmas(const struct i915_execbuffer *eb)
 		if (!vma)
 			break;
 
-		eb->vma[i].vma = NULL;
-
-		if (ev->flags & __EXEC_OBJECT_HAS_PIN)
-			__eb_unreserve_vma(vma, ev->flags);
+		eb_unreserve_vma(ev);
 
-		i915_vma_put(vma);
+		if (final)
+			i915_vma_put(vma);
 	}
 }
 
@@ -945,6 +957,14 @@ relocation_target(const struct drm_i915_gem_relocation_entry *reloc,
 	return gen8_canonical_addr((int)reloc->delta + target->node.start);
 }
 
+static void reloc_cache_clear(struct reloc_cache *cache)
+{
+	cache->rq = NULL;
+	cache->rq_cmd = NULL;
+	cache->pool = NULL;
+	cache->rq_size = 0;
+}
+
 static void reloc_cache_init(struct reloc_cache *cache,
 			     struct drm_i915_private *i915)
 {
@@ -957,8 +977,7 @@ static void reloc_cache_init(struct reloc_cache *cache,
 	cache->has_fence = cache->gen < 4;
 	cache->needs_unfenced = INTEL_INFO(i915)->unfenced_needs_alignment;
 	cache->node.flags = 0;
-	cache->rq = NULL;
-	cache->rq_size = 0;
+	reloc_cache_clear(cache);
 }
 
 static inline void *unmask_page(unsigned long p)
@@ -980,7 +999,23 @@ static inline struct i915_ggtt *cache_to_ggtt(struct reloc_cache *cache)
 	return &i915->ggtt;
 }
 
-static void reloc_gpu_flush(struct reloc_cache *cache)
+static void reloc_cache_put_pool(struct i915_execbuffer *eb, struct reloc_cache *cache)
+{
+	if (!cache->pool)
+		return;
+
+	/*
+	 * This is a bit nasty, normally we keep objects locked until the end
+	 * of execbuffer, but we already submit this, and have to unlock before
+	 * dropping the reference. Fortunately we can only hold 1 pool node at
+	 * a time, so this should be harmless.
+	 */
+	i915_gem_ww_unlock_single(cache->pool->obj);
+	intel_gt_buffer_pool_put(cache->pool);
+	cache->pool = NULL;
+}
+
+static void reloc_gpu_flush(struct i915_execbuffer *eb, struct reloc_cache *cache)
 {
 	struct drm_i915_gem_object *obj = cache->rq->batch->obj;
 
@@ -993,15 +1028,18 @@ static void reloc_gpu_flush(struct reloc_cache *cache)
 	intel_gt_chipset_flush(cache->rq->engine->gt);
 
 	i915_request_add(cache->rq);
-	cache->rq = NULL;
+	reloc_cache_put_pool(eb, cache);
+	reloc_cache_clear(cache);
+
+	eb->reloc_pool = NULL;
 }
 
-static void reloc_cache_reset(struct reloc_cache *cache)
+static void reloc_cache_reset(struct reloc_cache *cache, struct i915_execbuffer *eb)
 {
 	void *vaddr;
 
 	if (cache->rq)
-		reloc_gpu_flush(cache);
+		reloc_gpu_flush(eb, cache);
 
 	if (!cache->vaddr)
 		return;
@@ -1015,7 +1053,6 @@ static void reloc_cache_reset(struct reloc_cache *cache)
 
 		kunmap_atomic(vaddr);
 		i915_gem_object_finish_access(obj);
-		i915_gem_object_unlock(obj);
 	} else {
 		struct i915_ggtt *ggtt = cache_to_ggtt(cache);
 
@@ -1050,15 +1087,9 @@ static void *reloc_kmap(struct drm_i915_gem_object *obj,
 		unsigned int flushes;
 		int err;
 
-		err = i915_gem_object_lock_interruptible(obj, NULL);
-		if (err)
-			return ERR_PTR(err);
-
 		err = i915_gem_object_prepare_write(obj, &flushes);
-		if (err) {
-			i915_gem_object_unlock(obj);
+		if (err)
 			return ERR_PTR(err);
-		}
 
 		BUILD_BUG_ON(KMAP & CLFLUSH_FLAGS);
 		BUILD_BUG_ON((KMAP | CLFLUSH_FLAGS) & PAGE_MASK);
@@ -1097,9 +1128,7 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj,
 		if (use_cpu_reloc(cache, obj))
 			return NULL;
 
-		i915_gem_object_lock(obj, NULL);
 		err = i915_gem_object_set_to_gtt_domain(obj, true);
-		i915_gem_object_unlock(obj);
 		if (err)
 			return ERR_PTR(err);
 
@@ -1188,7 +1217,7 @@ static int reloc_move_to_gpu(struct i915_request *rq, struct i915_vma *vma)
 	struct drm_i915_gem_object *obj = vma->obj;
 	int err;
 
-	i915_vma_lock(vma);
+	assert_vma_held(vma);
 
 	if (obj->cache_dirty & ~obj->cache_coherent)
 		i915_gem_clflush_object(obj, 0);
@@ -1198,8 +1227,6 @@ static int reloc_move_to_gpu(struct i915_request *rq, struct i915_vma *vma)
 	if (err == 0)
 		err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
 
-	i915_vma_unlock(vma);
-
 	return err;
 }
 
@@ -1209,15 +1236,22 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
 			     unsigned int len)
 {
 	struct reloc_cache *cache = &eb->reloc_cache;
-	struct intel_gt_buffer_pool_node *pool;
+	struct intel_gt_buffer_pool_node *pool = eb->reloc_pool;
 	struct i915_request *rq;
 	struct i915_vma *batch;
 	u32 *cmd;
 	int err;
 
-	pool = intel_gt_get_buffer_pool(engine->gt, PAGE_SIZE);
-	if (IS_ERR(pool))
-		return PTR_ERR(pool);
+	if (!pool) {
+		pool = intel_gt_get_buffer_pool(engine->gt, PAGE_SIZE);
+		if (IS_ERR(pool))
+			return PTR_ERR(pool);
+	}
+	eb->reloc_pool = NULL;
+
+	err = i915_gem_object_lock(pool->obj, &eb->ww);
+	if (err)
+		goto err_pool;
 
 	cmd = i915_gem_object_pin_map(pool->obj,
 				      cache->has_llc ?
@@ -1225,7 +1259,7 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
 				      I915_MAP_FORCE_WC);
 	if (IS_ERR(cmd)) {
 		err = PTR_ERR(cmd);
-		goto out_pool;
+		goto err_pool;
 	}
 
 	batch = i915_vma_instance(pool->obj, vma->vm, NULL);
@@ -1274,11 +1308,10 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
 	if (err)
 		goto skip_request;
 
-	i915_vma_lock(batch);
+	assert_vma_held(batch);
 	err = i915_request_await_object(rq, batch->obj, false);
 	if (err == 0)
 		err = i915_vma_move_to_active(batch, rq, 0);
-	i915_vma_unlock(batch);
 	if (err)
 		goto skip_request;
 
@@ -1288,9 +1321,10 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
 	cache->rq = rq;
 	cache->rq_cmd = cmd;
 	cache->rq_size = 0;
+	cache->pool = pool;
 
 	/* Return with batch mapping (cmd) still pinned */
-	goto out_pool;
+	return 0;
 
 skip_request:
 	i915_request_set_error_once(rq, err);
@@ -1300,8 +1334,8 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
 	i915_vma_unpin(batch);
 err_unmap:
 	i915_gem_object_unpin_map(pool->obj);
-out_pool:
-	intel_gt_buffer_pool_put(pool);
+err_pool:
+	eb->reloc_pool = pool;
 	return err;
 }
 
@@ -1318,7 +1352,7 @@ static u32 *reloc_gpu(struct i915_execbuffer *eb,
 	u32 *cmd;
 
 	if (cache->rq_size > PAGE_SIZE/sizeof(u32) - (len + 1))
-		reloc_gpu_flush(cache);
+		reloc_gpu_flush(eb, cache);
 
 	if (unlikely(!cache->rq)) {
 		int err;
@@ -1366,7 +1400,7 @@ static unsigned long vma_phys_addr(struct i915_vma *vma, u32 offset)
 	return addr + offset_in_page(offset);
 }
 
-static bool __reloc_entry_gpu(struct i915_execbuffer *eb,
+static int __reloc_entry_gpu(struct i915_execbuffer *eb,
 			      struct i915_vma *vma,
 			      u64 offset,
 			      u64 target_addr)
@@ -1384,7 +1418,9 @@ static bool __reloc_entry_gpu(struct i915_execbuffer *eb,
 		len = 3;
 
 	batch = reloc_gpu(eb, vma, len);
-	if (IS_ERR(batch))
+	if (batch == ERR_PTR(-EDEADLK))
+		return (s64)-EDEADLK;
+	else if (IS_ERR(batch))
 		return false;
 
 	addr = gen8_canonical_addr(vma->node.start + offset);
@@ -1437,7 +1473,7 @@ static bool __reloc_entry_gpu(struct i915_execbuffer *eb,
 	return true;
 }
 
-static bool reloc_entry_gpu(struct i915_execbuffer *eb,
+static int reloc_entry_gpu(struct i915_execbuffer *eb,
 			    struct i915_vma *vma,
 			    u64 offset,
 			    u64 target_addr)
@@ -1459,8 +1495,12 @@ relocate_entry(struct i915_vma *vma,
 {
 	u64 target_addr = relocation_target(reloc, target);
 	u64 offset = reloc->offset;
+	int reloc_gpu = reloc_entry_gpu(eb, vma, offset, target_addr);
 
-	if (!reloc_entry_gpu(eb, vma, offset, target_addr)) {
+	if (reloc_gpu < 0)
+		return reloc_gpu;
+	
+	if (!reloc_gpu) {
 		bool wide = eb->reloc_cache.use_64bit_reloc;
 		void *vaddr;
 
@@ -1663,7 +1703,7 @@ static int eb_relocate_vma(struct i915_execbuffer *eb, struct eb_vma *ev)
 		urelocs += ARRAY_SIZE(stack);
 	} while (remain);
 out:
-	reloc_cache_reset(&eb->reloc_cache);
+	reloc_cache_reset(&eb->reloc_cache, eb);
 	return remain;
 }
 
@@ -1686,7 +1726,7 @@ eb_relocate_vma_slow(struct i915_execbuffer *eb, struct eb_vma *ev)
 	}
 	err = 0;
 err:
-	reloc_cache_reset(&eb->reloc_cache);
+	reloc_cache_reset(&eb->reloc_cache, eb);
 	return err;
 }
 
@@ -1826,6 +1866,10 @@ static noinline int eb_relocate_parse_slow(struct i915_execbuffer *eb)
 		goto out;
 	}
 
+	/* We may process another execbuffer during the unlock... */
+	eb_release_vmas(eb, false);
+	i915_gem_ww_ctx_fini(&eb->ww);
+
 	/*
 	 * We take 3 passes through the slowpatch.
 	 *
@@ -1848,12 +1892,20 @@ static noinline int eb_relocate_parse_slow(struct i915_execbuffer *eb)
 		cond_resched();
 		err = 0;
 	}
+
+	flush_workqueue(eb->i915->mm.userptr_wq);
+
+	i915_gem_ww_ctx_init(&eb->ww, true);
 	if (err)
 		goto out;
 
-	err = mutex_lock_interruptible(&eb->i915->drm.struct_mutex);
+	/* reacquire the objects */
+repeat_validate:
+	err = eb_validate_vmas(eb);
 	if (err)
-		goto out;
+		goto err;
+
+	GEM_BUG_ON(!eb->batch);
 
 	list_for_each_entry(ev, &eb->relocs, reloc_link) {
 		if (!have_copy) {
@@ -1869,8 +1921,9 @@ static noinline int eb_relocate_parse_slow(struct i915_execbuffer *eb)
 		}
 	}
 
-	reloc_gpu_flush(&eb->reloc_cache);
-	mutex_unlock(&eb->i915->drm.struct_mutex);
+	if (err == -EDEADLK)
+		goto err;
+
 	if (err && !have_copy)
 		goto repeat;
 
@@ -1890,6 +1943,13 @@ static noinline int eb_relocate_parse_slow(struct i915_execbuffer *eb)
 	 */
 
 err:
+	if (err == -EDEADLK) {
+		eb_release_vmas(eb, false);
+		err = i915_gem_ww_ctx_backoff(&eb->ww);
+		if (!err)
+			goto repeat_validate;
+	}
+
 	if (err == -EAGAIN)
 		goto repeat;
 
@@ -1918,15 +1978,12 @@ static int eb_relocate_parse(struct i915_execbuffer *eb)
 {
 	int err;
 
-	err = eb_lookup_vmas(eb);
-	if (err)
-		return err;
-
-	if (!list_empty(&eb->unbound)) {
-		err = eb_reserve(eb);
-		if (err)
-			return err;
-	}
+retry:
+	err = eb_validate_vmas(eb);
+	if (err == -EAGAIN)
+		goto slow;
+	else if (err)
+		goto err;
 
 	/* The objects are in their final locations, apply the relocations. */
 	if (eb->args->flags & __EXEC_HAS_RELOC) {
@@ -1935,48 +1992,49 @@ static int eb_relocate_parse(struct i915_execbuffer *eb)
 		list_for_each_entry(ev, &eb->relocs, reloc_link) {
 			err = eb_relocate_vma(eb, ev);
 			if (err)
-				return err;
+				break;
 		}
 
-		if (err)
-			return eb_relocate_slow(eb);
+		if (err == -EDEADLK)
+			goto err;
+		else if (err)
+			goto slow;
 	}
 
-	return eb_parse(eb);
+	if (!err)
+		err = eb_parse(eb);
+
+err:
+	if (err == -EDEADLK) {
+		eb_release_vmas(eb, false);
+		err = i915_gem_ww_ctx_backoff(&eb->ww);
+		if (!err)
+			goto retry;
+	}
+
+	return err;
+
+slow:
+	err = eb_relocate_parse_slow(eb);
+	if (err)
+		/*
+		 * If the user expects the execobject.offset and
+		 * reloc.presumed_offset to be an exact match,
+		 * as for using NO_RELOC, then we cannot update
+		 * the execobject.offset until we have completed
+		 * relocation.
+		 */
+		eb->args->flags &= ~__EXEC_HAS_RELOC;
+
+	return err;
 }
 
 static int eb_move_to_gpu(struct i915_execbuffer *eb)
 {
 	const unsigned int count = eb->buffer_count;
-	struct ww_acquire_ctx acquire;
-	unsigned int i;
+	unsigned int i = count;
 	int err = 0;
 
-	ww_acquire_init(&acquire, &reservation_ww_class);
-
-	for (i = 0; i < count; i++) {
-		struct eb_vma *ev = &eb->vma[i];
-		struct i915_vma *vma = ev->vma;
-
-		err = ww_mutex_lock_interruptible(&vma->resv->lock, &acquire);
-		if (err == -EDEADLK) {
-			GEM_BUG_ON(i == 0);
-			do {
-				int j = i - 1;
-
-				ww_mutex_unlock(&eb->vma[j].vma->resv->lock);
-
-				swap(eb->vma[i],  eb->vma[j]);
-			} while (--i);
-
-			err = ww_mutex_lock_slow_interruptible(&vma->resv->lock,
-							       &acquire);
-		}
-		if (err)
-			break;
-	}
-	ww_acquire_done(&acquire);
-
 	while (i--) {
 		struct eb_vma *ev = &eb->vma[i];
 		struct i915_vma *vma = ev->vma;
@@ -2020,10 +2078,7 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
 
 		if (err == 0)
 			err = i915_vma_move_to_active(vma, eb->request, flags);
-
-		i915_vma_unlock(vma);
 	}
-	ww_acquire_fini(&acquire);
 
 	if (unlikely(err))
 		goto err_skip;
@@ -2214,36 +2269,26 @@ static int eb_parse_pipeline(struct i915_execbuffer *eb,
 	if (err)
 		goto err_commit;
 
-	err = dma_resv_lock_interruptible(pw->batch->resv, NULL);
-	if (err)
-		goto err_commit;
-
 	err = dma_resv_reserve_shared(pw->batch->resv, 1);
 	if (err)
-		goto err_commit_unlock;
+		goto err_commit;
 
 	/* Wait for all writes (and relocs) into the batch to complete */
 	err = i915_sw_fence_await_reservation(&pw->base.chain,
 					      pw->batch->resv, NULL, false,
 					      0, I915_FENCE_GFP);
 	if (err < 0)
-		goto err_commit_unlock;
+		goto err_commit;
 
 	/* Keep the batch alive and unwritten as we parse */
 	dma_resv_add_shared_fence(pw->batch->resv, &pw->base.dma);
 
-	dma_resv_unlock(pw->batch->resv);
-
 	/* Force execution to wait for completion of the parser */
-	dma_resv_lock(shadow->resv, NULL);
 	dma_resv_add_excl_fence(shadow->resv, &pw->base.dma);
-	dma_resv_unlock(shadow->resv);
 
 	dma_fence_work_commit_imm(&pw->base);
 	return 0;
 
-err_commit_unlock:
-	dma_resv_unlock(pw->batch->resv);
 err_commit:
 	i915_sw_fence_set_error_once(&pw->base.chain, err);
 	dma_fence_work_commit_imm(&pw->base);
@@ -2261,7 +2306,7 @@ static int eb_parse_pipeline(struct i915_execbuffer *eb,
 static int eb_parse(struct i915_execbuffer *eb)
 {
 	struct drm_i915_private *i915 = eb->i915;
-	struct intel_gt_buffer_pool_node *pool;
+	struct intel_gt_buffer_pool_node *pool = eb->batch_pool;
 	struct i915_vma *shadow, *trampoline;
 	unsigned int len;
 	int err;
@@ -2284,9 +2329,16 @@ static int eb_parse(struct i915_execbuffer *eb)
 		len += I915_CMD_PARSER_TRAMPOLINE_SIZE;
 	}
 
-	pool = intel_gt_get_buffer_pool(eb->engine->gt, len);
-	if (IS_ERR(pool))
-		return PTR_ERR(pool);
+	if (!pool) {
+		pool = intel_gt_get_buffer_pool(eb->engine->gt, len);
+		if (IS_ERR(pool))
+			return PTR_ERR(pool);
+		eb->batch_pool = pool;
+	}
+
+	err = i915_gem_object_lock(pool->obj, &eb->ww);
+	if (err)
+		goto err;
 
 	shadow = shadow_batch_pin(pool->obj, eb->context->vm, PIN_USER);
 	if (IS_ERR(shadow)) {
@@ -2332,7 +2384,6 @@ static int eb_parse(struct i915_execbuffer *eb)
 err_shadow:
 	i915_vma_unpin(shadow);
 err:
-	intel_gt_buffer_pool_put(pool);
 	return err;
 }
 
@@ -2819,6 +2870,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 	eb.exec = exec;
 	eb.vma = (struct eb_vma *)(exec + args->buffer_count + 1);
 	eb.vma[0].vma = NULL;
+	eb.reloc_pool = eb.batch_pool = NULL;
 
 	eb.invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS;
 	reloc_cache_init(&eb.reloc_cache, eb.i915);
@@ -2878,6 +2930,14 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 	if (unlikely(err))
 		goto err_context;
 
+	err = eb_lookup_vmas(&eb);
+	if (err) {
+		eb_release_vmas(&eb, true);
+		goto err_engine;
+	}
+
+	i915_gem_ww_ctx_init(&eb.ww, true);
+
 	err = eb_relocate_parse(&eb);
 	if (err) {
 		/*
@@ -2891,6 +2951,8 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 		goto err_vma;
 	}
 
+	ww_acquire_done(&eb.ww.ctx);
+
 	/*
 	 * snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
 	 * batch" bit. Hence we need to pin secure batches into the global gtt.
@@ -2911,7 +2973,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 		vma = i915_gem_object_ggtt_pin(eb.batch->vma->obj, NULL, 0, 0, 0);
 		if (IS_ERR(vma)) {
 			err = PTR_ERR(vma);
-			goto err_parse;
+			goto err_vma;
 		}
 
 		batch = vma;
@@ -2963,8 +3025,8 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 	 * to explicitly hold another reference here.
 	 */
 	eb.request->batch = batch;
-	if (batch->private)
-		intel_gt_buffer_pool_mark_active(batch->private, eb.request);
+	if (eb.batch_pool)
+		intel_gt_buffer_pool_mark_active(eb.batch_pool, eb.request);
 
 	trace_i915_request_queue(eb.request, eb.batch_flags);
 	err = eb_submit(&eb, batch);
@@ -2991,14 +3053,18 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 err_batch_unpin:
 	if (eb.batch_flags & I915_DISPATCH_SECURE)
 		i915_vma_unpin(batch);
-err_parse:
-	if (batch->private)
-		intel_gt_buffer_pool_put(batch->private);
 err_vma:
-	if (eb.exec)
-		eb_release_vmas(&eb);
+	eb_release_vmas(&eb, true);
 	if (eb.trampoline)
 		i915_vma_unpin(eb.trampoline);
+	WARN_ON(err == -EDEADLK);
+	i915_gem_ww_ctx_fini(&eb.ww);
+
+	if (eb.batch_pool)
+		intel_gt_buffer_pool_put(eb.batch_pool);
+	if (eb.reloc_pool)
+		intel_gt_buffer_pool_put(eb.reloc_pool);
+err_engine:
 	eb_unpin_engine(&eb);
 err_context:
 	i915_gem_context_put(eb.gem_context);
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c
index 580884cffec3..2a421c64fafd 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c
@@ -32,25 +32,19 @@ static int __igt_gpu_reloc(struct i915_execbuffer *eb,
 	if (IS_ERR(vma))
 		return PTR_ERR(vma);
 
-	err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_HIGH);
+	err = i915_vma_pin_ww(vma, &eb->ww, 0, 0, PIN_USER | PIN_HIGH);
 	if (err)
 		return err;
 
 	/* 8-Byte aligned */
-	if (!__reloc_entry_gpu(eb, vma,
-			       offsets[0] * sizeof(u32),
-			       0)) {
-		err = -EIO;
-		goto unpin_vma;
-	}
+	err = __reloc_entry_gpu(eb, vma, offsets[0] * sizeof(u32), 0);
+	if (err <= 0)
+		goto reloc_err;
 
 	/* !8-Byte aligned */
-	if (!__reloc_entry_gpu(eb, vma,
-			       offsets[1] * sizeof(u32),
-			       1)) {
-		err = -EIO;
-		goto unpin_vma;
-	}
+	err = __reloc_entry_gpu(eb, vma, offsets[1] * sizeof(u32), 1);
+	if (err <= 0)
+		goto reloc_err;
 
 	/* Skip to the end of the cmd page */
 	i = PAGE_SIZE / sizeof(u32) - 1;
@@ -60,16 +54,13 @@ static int __igt_gpu_reloc(struct i915_execbuffer *eb,
 	eb->reloc_cache.rq_size += i;
 
 	/* Force next batch */
-	if (!__reloc_entry_gpu(eb, vma,
-			       offsets[2] * sizeof(u32),
-			       2)) {
-		err = -EIO;
-		goto unpin_vma;
-	}
+	err = __reloc_entry_gpu(eb, vma, offsets[2] * sizeof(u32), 2);
+	if (err <= 0)
+		goto reloc_err;
 
 	GEM_BUG_ON(!eb->reloc_cache.rq);
 	rq = i915_request_get(eb->reloc_cache.rq);
-	reloc_gpu_flush(&eb->reloc_cache);
+	reloc_gpu_flush(eb, &eb->reloc_cache);
 	GEM_BUG_ON(eb->reloc_cache.rq);
 
 	err = i915_gem_object_wait(obj, I915_WAIT_INTERRUPTIBLE, HZ / 2);
@@ -101,6 +92,11 @@ static int __igt_gpu_reloc(struct i915_execbuffer *eb,
 unpin_vma:
 	i915_vma_unpin(vma);
 	return err;
+
+reloc_err:
+	if (!err)
+		err = -EIO;
+	goto unpin_vma;
 }
 
 static int igt_gpu_reloc(void *arg)
@@ -122,6 +118,8 @@ static int igt_gpu_reloc(void *arg)
 		goto err_scratch;
 	}
 
+	intel_gt_pm_get(&eb.i915->gt);
+
 	for_each_uabi_engine(eb.engine, eb.i915) {
 		reloc_cache_init(&eb.reloc_cache, eb.i915);
 		memset(map, POISON_INUSE, 4096);
@@ -132,15 +130,26 @@ static int igt_gpu_reloc(void *arg)
 			err = PTR_ERR(eb.context);
 			goto err_pm;
 		}
+		eb.reloc_pool = NULL;
 
-		err = intel_context_pin(eb.context);
-		if (err)
-			goto err_put;
+		i915_gem_ww_ctx_init(&eb.ww, false);
+retry:
+		err = intel_context_pin_ww(eb.context, &eb.ww);
+		if (!err) {
+			err = __igt_gpu_reloc(&eb, scratch);
+
+			intel_context_unpin(eb.context);
+		}
+		if (err == -EDEADLK) {
+			err = i915_gem_ww_ctx_backoff(&eb.ww);
+			if (!err)
+				goto retry;
+		}
+		i915_gem_ww_ctx_fini(&eb.ww);
 
-		err = __igt_gpu_reloc(&eb, scratch);
+		if (eb.reloc_pool)
+			intel_gt_buffer_pool_put(eb.reloc_pool);
 
-		intel_context_unpin(eb.context);
-err_put:
 		intel_context_put(eb.context);
 err_pm:
 		intel_engine_pm_put(eb.engine);
@@ -151,6 +160,7 @@ static int igt_gpu_reloc(void *arg)
 	if (igt_flush_test(eb.i915))
 		err = -EIO;
 
+	intel_gt_pm_put(&eb.i915->gt);
 err_scratch:
 	i915_gem_object_put(scratch);
 	return err;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 33f6f88c8b08..20653b660b61 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1378,6 +1378,12 @@ static void i915_gem_ww_ctx_unlock_all(struct i915_gem_ww_ctx *ww)
 	}
 }
 
+void i915_gem_ww_unlock_single(struct drm_i915_gem_object *obj)
+{
+	list_del(&obj->obj_link);
+	i915_gem_object_unlock(obj);
+}
+
 void i915_gem_ww_ctx_fini(struct i915_gem_ww_ctx *ww)
 {
 	i915_gem_ww_ctx_unlock_all(ww);
diff --git a/drivers/gpu/drm/i915/i915_gem.h b/drivers/gpu/drm/i915/i915_gem.h
index 988755dbf4be..f6bef9894111 100644
--- a/drivers/gpu/drm/i915/i915_gem.h
+++ b/drivers/gpu/drm/i915/i915_gem.h
@@ -126,5 +126,6 @@ struct i915_gem_ww_ctx {
 void i915_gem_ww_ctx_init(struct i915_gem_ww_ctx *ctx, bool intr);
 void i915_gem_ww_ctx_fini(struct i915_gem_ww_ctx *ctx);
 int __must_check i915_gem_ww_ctx_backoff(struct i915_gem_ww_ctx *ctx);
+void i915_gem_ww_unlock_single(struct drm_i915_gem_object *obj);
 
 #endif /* __I915_GEM_H__ */
-- 
2.27.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 61+ messages in thread

* [Intel-gfx] [PATCH 10/26] drm/i915: Use ww locking in intel_renderstate.
  2020-06-23 14:28 [Intel-gfx] [PATCH 01/26] Revert "drm/i915/gem: Async GPU relocations only" Maarten Lankhorst
                   ` (7 preceding siblings ...)
  2020-06-23 14:28 ` [Intel-gfx] [PATCH 09/26] drm/i915: Use per object locking in execbuf, v12 Maarten Lankhorst
@ 2020-06-23 14:28 ` Maarten Lankhorst
  2020-06-23 14:28 ` [Intel-gfx] [PATCH 11/26] drm/i915: Add ww context handling to context_barrier_task Maarten Lankhorst
                   ` (21 subsequent siblings)
  30 siblings, 0 replies; 61+ messages in thread
From: Maarten Lankhorst @ 2020-06-23 14:28 UTC (permalink / raw)
  To: intel-gfx

We want to start using ww locking in intel_context_pin, for this
we need to lock multiple objects, and the single i915_gem_object_lock
is not enough.

Convert to using ww-waiting, and make sure we always pin intel_context_state,
even if we don't have a renderstate object.

Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
---
 drivers/gpu/drm/i915/gt/intel_gt.c          | 21 +++---
 drivers/gpu/drm/i915/gt/intel_renderstate.c | 73 +++++++++++++++------
 drivers/gpu/drm/i915/gt/intel_renderstate.h |  9 ++-
 3 files changed, 71 insertions(+), 32 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
index ebc29b6ee86c..24a0e47a2477 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -398,21 +398,20 @@ static int __engines_record_defaults(struct intel_gt *gt)
 		/* We must be able to switch to something! */
 		GEM_BUG_ON(!engine->kernel_context);
 
-		err = intel_renderstate_init(&so, engine);
-		if (err)
-			goto out;
-
 		ce = intel_context_create(engine);
 		if (IS_ERR(ce)) {
 			err = PTR_ERR(ce);
 			goto out;
 		}
 
-		rq = intel_context_create_request(ce);
+		err = intel_renderstate_init(&so, ce);
+		if (err)
+			goto err;
+
+		rq = i915_request_create(ce);
 		if (IS_ERR(rq)) {
 			err = PTR_ERR(rq);
-			intel_context_put(ce);
-			goto out;
+			goto err_fini;
 		}
 
 		err = intel_engine_emit_ctx_wa(rq);
@@ -426,9 +425,13 @@ static int __engines_record_defaults(struct intel_gt *gt)
 err_rq:
 		requests[id] = i915_request_get(rq);
 		i915_request_add(rq);
-		intel_renderstate_fini(&so);
-		if (err)
+err_fini:
+		intel_renderstate_fini(&so, ce);
+err:
+		if (err) {
+			intel_context_put(ce);
 			goto out;
+		}
 	}
 
 	/* Flush the default context image to memory, and enable powersaving. */
diff --git a/drivers/gpu/drm/i915/gt/intel_renderstate.c b/drivers/gpu/drm/i915/gt/intel_renderstate.c
index 6db23389e427..3f7881de6a3c 100644
--- a/drivers/gpu/drm/i915/gt/intel_renderstate.c
+++ b/drivers/gpu/drm/i915/gt/intel_renderstate.c
@@ -27,6 +27,7 @@
 
 #include "i915_drv.h"
 #include "intel_renderstate.h"
+#include "gt/intel_context.h"
 #include "intel_ring.h"
 
 static const struct intel_renderstate_rodata *
@@ -157,33 +158,47 @@ static int render_state_setup(struct intel_renderstate *so,
 #undef OUT_BATCH
 
 int intel_renderstate_init(struct intel_renderstate *so,
-			   struct intel_engine_cs *engine)
+			   struct intel_context *ce)
 {
-	struct drm_i915_gem_object *obj;
+	struct intel_engine_cs *engine = ce->engine;
+	struct drm_i915_gem_object *obj = NULL;
 	int err;
 
 	memset(so, 0, sizeof(*so));
 
 	so->rodata = render_state_get_rodata(engine);
-	if (!so->rodata)
-		return 0;
+	if (so->rodata) {
+		if (so->rodata->batch_items * 4 > PAGE_SIZE)
+			return -EINVAL;
+
+		obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE);
+		if (IS_ERR(obj))
+			return PTR_ERR(obj);
+
+		so->vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
+		if (IS_ERR(so->vma)) {
+			err = PTR_ERR(so->vma);
+			goto err_obj;
+		}
+	}
 
-	if (so->rodata->batch_items * 4 > PAGE_SIZE)
-		return -EINVAL;
+	i915_gem_ww_ctx_init(&so->ww, true);
+retry:
+	err = intel_context_pin(ce);
+	if (err)
+		goto err_fini;
 
-	obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE);
-	if (IS_ERR(obj))
-		return PTR_ERR(obj);
+	/* return early if there's nothing to setup */
+	if (!err && !so->rodata)
+		return 0;
 
-	so->vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
-	if (IS_ERR(so->vma)) {
-		err = PTR_ERR(so->vma);
-		goto err_obj;
-	}
+	err = i915_gem_object_lock(so->vma->obj, &so->ww);
+	if (err)
+		goto err_context;
 
 	err = i915_vma_pin(so->vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
 	if (err)
-		goto err_obj;
+		goto err_context;
 
 	err = render_state_setup(so, engine->i915);
 	if (err)
@@ -193,8 +208,18 @@ int intel_renderstate_init(struct intel_renderstate *so,
 
 err_unpin:
 	i915_vma_unpin(so->vma);
+err_context:
+	intel_context_unpin(ce);
+err_fini:
+	if (err == -EDEADLK) {
+		err = i915_gem_ww_ctx_backoff(&so->ww);
+		if (!err)
+			goto retry;
+	}
+	i915_gem_ww_ctx_fini(&so->ww);
 err_obj:
-	i915_gem_object_put(obj);
+	if (obj)
+		i915_gem_object_put(obj);
 	so->vma = NULL;
 	return err;
 }
@@ -208,11 +233,9 @@ int intel_renderstate_emit(struct intel_renderstate *so,
 	if (!so->vma)
 		return 0;
 
-	i915_vma_lock(so->vma);
 	err = i915_request_await_object(rq, so->vma->obj, false);
 	if (err == 0)
 		err = i915_vma_move_to_active(so->vma, rq, 0);
-	i915_vma_unlock(so->vma);
 	if (err)
 		return err;
 
@@ -233,7 +256,17 @@ int intel_renderstate_emit(struct intel_renderstate *so,
 	return 0;
 }
 
-void intel_renderstate_fini(struct intel_renderstate *so)
+void intel_renderstate_fini(struct intel_renderstate *so,
+			    struct intel_context *ce)
 {
-	i915_vma_unpin_and_release(&so->vma, 0);
+	if (so->vma) {
+		i915_vma_unpin(so->vma);
+		i915_vma_close(so->vma);
+	}
+
+	intel_context_unpin(ce);
+	i915_gem_ww_ctx_fini(&so->ww);
+
+	if (so->vma)
+		i915_gem_object_put(so->vma->obj);
 }
diff --git a/drivers/gpu/drm/i915/gt/intel_renderstate.h b/drivers/gpu/drm/i915/gt/intel_renderstate.h
index 5700be69a05a..713aa1e86c80 100644
--- a/drivers/gpu/drm/i915/gt/intel_renderstate.h
+++ b/drivers/gpu/drm/i915/gt/intel_renderstate.h
@@ -25,9 +25,10 @@
 #define _INTEL_RENDERSTATE_H_
 
 #include <linux/types.h>
+#include "i915_gem.h"
 
 struct i915_request;
-struct intel_engine_cs;
+struct intel_context;
 struct i915_vma;
 
 struct intel_renderstate_rodata {
@@ -49,6 +50,7 @@ extern const struct intel_renderstate_rodata gen8_null_state;
 extern const struct intel_renderstate_rodata gen9_null_state;
 
 struct intel_renderstate {
+	struct i915_gem_ww_ctx ww;
 	const struct intel_renderstate_rodata *rodata;
 	struct i915_vma *vma;
 	u32 batch_offset;
@@ -58,9 +60,10 @@ struct intel_renderstate {
 };
 
 int intel_renderstate_init(struct intel_renderstate *so,
-			   struct intel_engine_cs *engine);
+			   struct intel_context *ce);
 int intel_renderstate_emit(struct intel_renderstate *so,
 			   struct i915_request *rq);
-void intel_renderstate_fini(struct intel_renderstate *so);
+void intel_renderstate_fini(struct intel_renderstate *so,
+			    struct intel_context *ce);
 
 #endif /* _INTEL_RENDERSTATE_H_ */
-- 
2.27.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 61+ messages in thread

* [Intel-gfx] [PATCH 11/26] drm/i915: Add ww context handling to context_barrier_task
  2020-06-23 14:28 [Intel-gfx] [PATCH 01/26] Revert "drm/i915/gem: Async GPU relocations only" Maarten Lankhorst
                   ` (8 preceding siblings ...)
  2020-06-23 14:28 ` [Intel-gfx] [PATCH 10/26] drm/i915: Use ww locking in intel_renderstate Maarten Lankhorst
@ 2020-06-23 14:28 ` Maarten Lankhorst
  2020-06-23 14:28 ` [Intel-gfx] [PATCH 12/26] drm/i915: Nuke arguments to eb_pin_engine Maarten Lankhorst
                   ` (20 subsequent siblings)
  30 siblings, 0 replies; 61+ messages in thread
From: Maarten Lankhorst @ 2020-06-23 14:28 UTC (permalink / raw)
  To: intel-gfx

This is required if we want to pass a ww context in intel_context_pin
and gen6_ppgtt_pin().

Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c   | 55 ++++++++++++++-----
 .../drm/i915/gem/selftests/i915_gem_context.c | 22 +++-----
 2 files changed, 48 insertions(+), 29 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index a996583640ee..a1e709557704 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -1094,6 +1094,7 @@ I915_SELFTEST_DECLARE(static intel_engine_mask_t context_barrier_inject_fault);
 static int context_barrier_task(struct i915_gem_context *ctx,
 				intel_engine_mask_t engines,
 				bool (*skip)(struct intel_context *ce, void *data),
+				int (*pin)(struct intel_context *ce, struct i915_gem_ww_ctx *ww, void *data),
 				int (*emit)(struct i915_request *rq, void *data),
 				void (*task)(void *data),
 				void *data)
@@ -1101,6 +1102,7 @@ static int context_barrier_task(struct i915_gem_context *ctx,
 	struct context_barrier_task *cb;
 	struct i915_gem_engines_iter it;
 	struct i915_gem_engines *e;
+	struct i915_gem_ww_ctx ww;
 	struct intel_context *ce;
 	int err = 0;
 
@@ -1138,10 +1140,21 @@ static int context_barrier_task(struct i915_gem_context *ctx,
 		if (skip && skip(ce, data))
 			continue;
 
-		rq = intel_context_create_request(ce);
+		i915_gem_ww_ctx_init(&ww, true);
+retry:
+		err = intel_context_pin(ce);
+		if (err)
+			goto err;
+
+		if (pin)
+			err = pin(ce, &ww, data);
+		if (err)
+			goto err_unpin;
+
+		rq = i915_request_create(ce);
 		if (IS_ERR(rq)) {
 			err = PTR_ERR(rq);
-			break;
+			goto err_unpin;
 		}
 
 		err = 0;
@@ -1151,6 +1164,16 @@ static int context_barrier_task(struct i915_gem_context *ctx,
 			err = i915_active_add_request(&cb->base, rq);
 
 		i915_request_add(rq);
+err_unpin:
+		intel_context_unpin(ce);
+err:
+		if (err == -EDEADLK) {
+			err = i915_gem_ww_ctx_backoff(&ww);
+			if (!err)
+				goto retry;
+		}
+		i915_gem_ww_ctx_fini(&ww);
+
 		if (err)
 			break;
 	}
@@ -1206,6 +1229,17 @@ static void set_ppgtt_barrier(void *data)
 	i915_vm_close(old);
 }
 
+static int pin_ppgtt_update(struct intel_context *ce, struct i915_gem_ww_ctx *ww, void *data)
+{
+	struct i915_address_space *vm = ce->vm;
+
+	if (!HAS_LOGICAL_RING_CONTEXTS(vm->i915))
+		/* ppGTT is not part of the legacy context image */
+		return gen6_ppgtt_pin(i915_vm_to_ppgtt(vm));
+
+	return 0;
+}
+
 static int emit_ppgtt_update(struct i915_request *rq, void *data)
 {
 	struct i915_address_space *vm = rq->context->vm;
@@ -1262,20 +1296,10 @@ static int emit_ppgtt_update(struct i915_request *rq, void *data)
 
 static bool skip_ppgtt_update(struct intel_context *ce, void *data)
 {
-	if (!test_bit(CONTEXT_ALLOC_BIT, &ce->flags))
-		return true;
-
 	if (HAS_LOGICAL_RING_CONTEXTS(ce->engine->i915))
-		return false;
-
-	if (!atomic_read(&ce->pin_count))
-		return true;
-
-	/* ppGTT is not part of the legacy context image */
-	if (gen6_ppgtt_pin(i915_vm_to_ppgtt(ce->vm)))
-		return true;
-
-	return false;
+		return !ce->state;
+	else
+		return !atomic_read(&ce->pin_count);
 }
 
 static int set_ppgtt(struct drm_i915_file_private *file_priv,
@@ -1326,6 +1350,7 @@ static int set_ppgtt(struct drm_i915_file_private *file_priv,
 	 */
 	err = context_barrier_task(ctx, ALL_ENGINES,
 				   skip_ppgtt_update,
+				   pin_ppgtt_update,
 				   emit_ppgtt_update,
 				   set_ppgtt_barrier,
 				   old);
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
index 76671f587b9d..1217f7a43069 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
@@ -1917,8 +1917,8 @@ static int mock_context_barrier(void *arg)
 		return -ENOMEM;
 
 	counter = 0;
-	err = context_barrier_task(ctx, 0,
-				   NULL, NULL, mock_barrier_task, &counter);
+	err = context_barrier_task(ctx, 0, NULL, NULL, NULL,
+				   mock_barrier_task, &counter);
 	if (err) {
 		pr_err("Failed at line %d, err=%d\n", __LINE__, err);
 		goto out;
@@ -1930,11 +1930,8 @@ static int mock_context_barrier(void *arg)
 	}
 
 	counter = 0;
-	err = context_barrier_task(ctx, ALL_ENGINES,
-				   skip_unused_engines,
-				   NULL,
-				   mock_barrier_task,
-				   &counter);
+	err = context_barrier_task(ctx, ALL_ENGINES, skip_unused_engines,
+				   NULL, NULL, mock_barrier_task, &counter);
 	if (err) {
 		pr_err("Failed at line %d, err=%d\n", __LINE__, err);
 		goto out;
@@ -1954,8 +1951,8 @@ static int mock_context_barrier(void *arg)
 
 	counter = 0;
 	context_barrier_inject_fault = BIT(RCS0);
-	err = context_barrier_task(ctx, ALL_ENGINES,
-				   NULL, NULL, mock_barrier_task, &counter);
+	err = context_barrier_task(ctx, ALL_ENGINES, NULL, NULL, NULL,
+				   mock_barrier_task, &counter);
 	context_barrier_inject_fault = 0;
 	if (err == -ENXIO)
 		err = 0;
@@ -1969,11 +1966,8 @@ static int mock_context_barrier(void *arg)
 		goto out;
 
 	counter = 0;
-	err = context_barrier_task(ctx, ALL_ENGINES,
-				   skip_unused_engines,
-				   NULL,
-				   mock_barrier_task,
-				   &counter);
+	err = context_barrier_task(ctx, ALL_ENGINES, skip_unused_engines,
+				   NULL, NULL, mock_barrier_task, &counter);
 	if (err) {
 		pr_err("Failed at line %d, err=%d\n", __LINE__, err);
 		goto out;
-- 
2.27.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 61+ messages in thread

* [Intel-gfx] [PATCH 12/26] drm/i915: Nuke arguments to eb_pin_engine
  2020-06-23 14:28 [Intel-gfx] [PATCH 01/26] Revert "drm/i915/gem: Async GPU relocations only" Maarten Lankhorst
                   ` (9 preceding siblings ...)
  2020-06-23 14:28 ` [Intel-gfx] [PATCH 11/26] drm/i915: Add ww context handling to context_barrier_task Maarten Lankhorst
@ 2020-06-23 14:28 ` Maarten Lankhorst
  2020-06-23 14:28 ` [Intel-gfx] [PATCH 13/26] drm/i915: Pin engine before pinning all objects, v4 Maarten Lankhorst
                   ` (19 subsequent siblings)
  30 siblings, 0 replies; 61+ messages in thread
From: Maarten Lankhorst @ 2020-06-23 14:28 UTC (permalink / raw)
  To: intel-gfx

Those arguments are already set as eb.file and eb.args, so kill off
the extra arguments. This will allow us to move eb_pin_engine() to
after we reserved all BO's.

Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index e00ae140e4d0..83d83884b22d 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -2605,11 +2605,10 @@ static void eb_unpin_engine(struct i915_execbuffer *eb)
 }
 
 static unsigned int
-eb_select_legacy_ring(struct i915_execbuffer *eb,
-		      struct drm_file *file,
-		      struct drm_i915_gem_execbuffer2 *args)
+eb_select_legacy_ring(struct i915_execbuffer *eb)
 {
 	struct drm_i915_private *i915 = eb->i915;
+	struct drm_i915_gem_execbuffer2 *args = eb->args;
 	unsigned int user_ring_id = args->flags & I915_EXEC_RING_MASK;
 
 	if (user_ring_id != I915_EXEC_BSD &&
@@ -2624,7 +2623,7 @@ eb_select_legacy_ring(struct i915_execbuffer *eb,
 		unsigned int bsd_idx = args->flags & I915_EXEC_BSD_MASK;
 
 		if (bsd_idx == I915_EXEC_BSD_DEFAULT) {
-			bsd_idx = gen8_dispatch_bsd_engine(i915, file);
+			bsd_idx = gen8_dispatch_bsd_engine(i915, eb->file);
 		} else if (bsd_idx >= I915_EXEC_BSD_RING1 &&
 			   bsd_idx <= I915_EXEC_BSD_RING2) {
 			bsd_idx >>= I915_EXEC_BSD_SHIFT;
@@ -2649,18 +2648,16 @@ eb_select_legacy_ring(struct i915_execbuffer *eb,
 }
 
 static int
-eb_pin_engine(struct i915_execbuffer *eb,
-	      struct drm_file *file,
-	      struct drm_i915_gem_execbuffer2 *args)
+eb_pin_engine(struct i915_execbuffer *eb)
 {
 	struct intel_context *ce;
 	unsigned int idx;
 	int err;
 
 	if (i915_gem_context_user_engines(eb->gem_context))
-		idx = args->flags & I915_EXEC_RING_MASK;
+		idx = eb->args->flags & I915_EXEC_RING_MASK;
 	else
-		idx = eb_select_legacy_ring(eb, file, args);
+		idx = eb_select_legacy_ring(eb);
 
 	ce = i915_gem_context_get_engine(eb->gem_context, idx);
 	if (IS_ERR(ce))
@@ -2926,7 +2923,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 	if (unlikely(err))
 		goto err_destroy;
 
-	err = eb_pin_engine(&eb, file, args);
+	err = eb_pin_engine(&eb);
 	if (unlikely(err))
 		goto err_context;
 
-- 
2.27.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 61+ messages in thread

* [Intel-gfx] [PATCH 13/26] drm/i915: Pin engine before pinning all objects, v4.
  2020-06-23 14:28 [Intel-gfx] [PATCH 01/26] Revert "drm/i915/gem: Async GPU relocations only" Maarten Lankhorst
                   ` (10 preceding siblings ...)
  2020-06-23 14:28 ` [Intel-gfx] [PATCH 12/26] drm/i915: Nuke arguments to eb_pin_engine Maarten Lankhorst
@ 2020-06-23 14:28 ` Maarten Lankhorst
  2020-06-23 14:28 ` [Intel-gfx] [PATCH 14/26] drm/i915: Rework intel_context pinning to do everything outside of pin_mutex Maarten Lankhorst
                   ` (18 subsequent siblings)
  30 siblings, 0 replies; 61+ messages in thread
From: Maarten Lankhorst @ 2020-06-23 14:28 UTC (permalink / raw)
  To: intel-gfx

We want to lock all gem objects, including the engine context objects,
rework the throttling to ensure that we can do this. Now we only throttle
once, but can take eb_pin_engine while acquiring objects. This means we
will have to drop the lock to wait. If we don't have to throttle we can
still take the fastpath, if not we will take the slowpath and wait for
the throttle request while unlocked.

The engine has to be pinned as first step, otherwise gpu relocations
won't work.

Changes since v1:
- Only need to get a throttled request in the fastpath, no need for
  a global flag any more.
- Always free the waited request correctly.
Changes since v2:
- Use intel_engine_pm_get()/put() to keeep engine pool alive during
  EDEADLK handling.
Changes since v3:
- Fix small rq leak.

Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
---
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 185 ++++++++++++------
 1 file changed, 129 insertions(+), 56 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 83d83884b22d..9990e4677b9a 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -55,7 +55,8 @@ enum {
 #define __EXEC_OBJECT_RESERVED (__EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_FENCE)
 
 #define __EXEC_HAS_RELOC	BIT(31)
-#define __EXEC_INTERNAL_FLAGS	(~0u << 31)
+#define __EXEC_ENGINE_PINNED	BIT(30)
+#define __EXEC_INTERNAL_FLAGS	(~0u << 30)
 #define UPDATE			PIN_OFFSET_FIXED
 
 #define BATCH_OFFSET_BIAS (256*1024)
@@ -292,6 +293,9 @@ struct i915_execbuffer {
 };
 
 static int eb_parse(struct i915_execbuffer *eb);
+static struct i915_request *eb_pin_engine(struct i915_execbuffer *eb,
+					  bool throttle);
+static void eb_unpin_engine(struct i915_execbuffer *eb);
 
 static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb)
 {
@@ -923,7 +927,7 @@ eb_get_vma(const struct i915_execbuffer *eb, unsigned long handle)
 	}
 }
 
-static void eb_release_vmas(const struct i915_execbuffer *eb, bool final)
+static void eb_release_vmas(struct i915_execbuffer *eb, bool final)
 {
 	const unsigned int count = eb->buffer_count;
 	unsigned int i;
@@ -940,6 +944,8 @@ static void eb_release_vmas(const struct i915_execbuffer *eb, bool final)
 		if (final)
 			i915_vma_put(vma);
 	}
+
+	eb_unpin_engine(eb);
 }
 
 static void eb_destroy(const struct i915_execbuffer *eb)
@@ -1854,7 +1860,8 @@ static int eb_prefault_relocations(const struct i915_execbuffer *eb)
 	return 0;
 }
 
-static noinline int eb_relocate_parse_slow(struct i915_execbuffer *eb)
+static noinline int eb_relocate_parse_slow(struct i915_execbuffer *eb,
+					   struct i915_request *rq)
 {
 	bool have_copy = false;
 	struct eb_vma *ev;
@@ -1870,6 +1877,21 @@ static noinline int eb_relocate_parse_slow(struct i915_execbuffer *eb)
 	eb_release_vmas(eb, false);
 	i915_gem_ww_ctx_fini(&eb->ww);
 
+	if (rq) {
+		/* nonblocking is always false */
+		if (i915_request_wait(rq, I915_WAIT_INTERRUPTIBLE,
+				      MAX_SCHEDULE_TIMEOUT) < 0) {
+			i915_request_put(rq);
+			rq = NULL;
+
+			err = -EINTR;
+			goto err_relock;
+		}
+
+		i915_request_put(rq);
+		rq = NULL;
+	}
+
 	/*
 	 * We take 3 passes through the slowpatch.
 	 *
@@ -1893,14 +1915,25 @@ static noinline int eb_relocate_parse_slow(struct i915_execbuffer *eb)
 		err = 0;
 	}
 
-	flush_workqueue(eb->i915->mm.userptr_wq);
+	if (!err)
+		flush_workqueue(eb->i915->mm.userptr_wq);
 
+err_relock:
 	i915_gem_ww_ctx_init(&eb->ww, true);
 	if (err)
 		goto out;
 
 	/* reacquire the objects */
 repeat_validate:
+	rq = eb_pin_engine(eb, false);
+	if (IS_ERR(rq)) {
+		err = PTR_ERR(rq);
+		goto err;
+	}
+
+	/* We didn't throttle, should be NULL */
+	GEM_WARN_ON(rq);
+
 	err = eb_validate_vmas(eb);
 	if (err)
 		goto err;
@@ -1971,14 +2004,49 @@ static noinline int eb_relocate_parse_slow(struct i915_execbuffer *eb)
 		}
 	}
 
+	if (rq)
+		i915_request_put(rq);
+
 	return err;
 }
 
 static int eb_relocate_parse(struct i915_execbuffer *eb)
 {
 	int err;
+	struct i915_request *rq = NULL;
+	bool throttle = true;
 
 retry:
+	rq = eb_pin_engine(eb, throttle);
+	if (IS_ERR(rq)) {
+		err = PTR_ERR(rq);
+		rq = NULL;
+		if (err != -EDEADLK)
+			return err;
+
+		goto err;
+	}
+
+	if (rq) {
+		bool nonblock = eb->file->filp->f_flags & O_NONBLOCK;
+
+		/* Need to drop all locks now for throttling, take slowpath */
+		err = i915_request_wait(rq, I915_WAIT_INTERRUPTIBLE, 0);
+		if (err == -ETIME) {
+			if (nonblock) {
+				err = -EWOULDBLOCK;
+				i915_request_put(rq);
+				goto err;
+			}
+			goto slow;
+		}
+		i915_request_put(rq);
+		rq = NULL;
+	}
+
+	/* only throttle once, even if we didn't need to throttle */
+	throttle = false;
+
 	err = eb_validate_vmas(eb);
 	if (err == -EAGAIN)
 		goto slow;
@@ -2015,7 +2083,7 @@ static int eb_relocate_parse(struct i915_execbuffer *eb)
 	return err;
 
 slow:
-	err = eb_relocate_parse_slow(eb);
+	err = eb_relocate_parse_slow(eb, rq);
 	if (err)
 		/*
 		 * If the user expects the execobject.offset and
@@ -2481,7 +2549,7 @@ static const enum intel_engine_id user_ring_map[] = {
 	[I915_EXEC_VEBOX]	= VECS0
 };
 
-static struct i915_request *eb_throttle(struct intel_context *ce)
+static struct i915_request *eb_throttle(struct i915_execbuffer *eb, struct intel_context *ce)
 {
 	struct intel_ring *ring = ce->ring;
 	struct intel_timeline *tl = ce->timeline;
@@ -2515,22 +2583,17 @@ static struct i915_request *eb_throttle(struct intel_context *ce)
 	return i915_request_get(rq);
 }
 
-static int __eb_pin_engine(struct i915_execbuffer *eb, struct intel_context *ce)
+static struct i915_request *eb_pin_engine(struct i915_execbuffer *eb, bool throttle)
 {
+	struct intel_context *ce = eb->context;
 	struct intel_timeline *tl;
-	struct i915_request *rq;
+	struct i915_request *rq = NULL;
 	int err;
 
-	/*
-	 * ABI: Before userspace accesses the GPU (e.g. execbuffer), report
-	 * EIO if the GPU is already wedged.
-	 */
-	err = intel_gt_terminally_wedged(ce->engine->gt);
-	if (err)
-		return err;
+	GEM_BUG_ON(eb->args->flags & __EXEC_ENGINE_PINNED);
 
 	if (unlikely(intel_context_is_banned(ce)))
-		return -EIO;
+		return ERR_PTR(-EIO);
 
 	/*
 	 * Pinning the contexts may generate requests in order to acquire
@@ -2539,7 +2602,7 @@ static int __eb_pin_engine(struct i915_execbuffer *eb, struct intel_context *ce)
 	 */
 	err = intel_context_pin(ce);
 	if (err)
-		return err;
+		return ERR_PTR(err);
 
 	/*
 	 * Take a local wakeref for preparing to dispatch the execbuf as
@@ -2551,45 +2614,17 @@ static int __eb_pin_engine(struct i915_execbuffer *eb, struct intel_context *ce)
 	 */
 	tl = intel_context_timeline_lock(ce);
 	if (IS_ERR(tl)) {
-		err = PTR_ERR(tl);
-		goto err_unpin;
+		intel_context_unpin(ce);
+		return ERR_CAST(tl);
 	}
 
 	intel_context_enter(ce);
-	rq = eb_throttle(ce);
-
+	if (throttle)
+		rq = eb_throttle(eb, ce);
 	intel_context_timeline_unlock(tl);
 
-	if (rq) {
-		bool nonblock = eb->file->filp->f_flags & O_NONBLOCK;
-		long timeout;
-
-		timeout = MAX_SCHEDULE_TIMEOUT;
-		if (nonblock)
-			timeout = 0;
-
-		timeout = i915_request_wait(rq,
-					    I915_WAIT_INTERRUPTIBLE,
-					    timeout);
-		i915_request_put(rq);
-
-		if (timeout < 0) {
-			err = nonblock ? -EWOULDBLOCK : timeout;
-			goto err_exit;
-		}
-	}
-
-	eb->engine = ce->engine;
-	eb->context = ce;
-	return 0;
-
-err_exit:
-	mutex_lock(&tl->mutex);
-	intel_context_exit(ce);
-	intel_context_timeline_unlock(tl);
-err_unpin:
-	intel_context_unpin(ce);
-	return err;
+	eb->args->flags |= __EXEC_ENGINE_PINNED;
+	return rq;
 }
 
 static void eb_unpin_engine(struct i915_execbuffer *eb)
@@ -2597,6 +2632,11 @@ static void eb_unpin_engine(struct i915_execbuffer *eb)
 	struct intel_context *ce = eb->context;
 	struct intel_timeline *tl = ce->timeline;
 
+	if (!(eb->args->flags & __EXEC_ENGINE_PINNED))
+		return;
+
+	eb->args->flags &= ~__EXEC_ENGINE_PINNED;
+
 	mutex_lock(&tl->mutex);
 	intel_context_exit(ce);
 	mutex_unlock(&tl->mutex);
@@ -2648,7 +2688,7 @@ eb_select_legacy_ring(struct i915_execbuffer *eb)
 }
 
 static int
-eb_pin_engine(struct i915_execbuffer *eb)
+eb_select_engine(struct i915_execbuffer *eb)
 {
 	struct intel_context *ce;
 	unsigned int idx;
@@ -2663,10 +2703,43 @@ eb_pin_engine(struct i915_execbuffer *eb)
 	if (IS_ERR(ce))
 		return PTR_ERR(ce);
 
-	err = __eb_pin_engine(eb, ce);
-	intel_context_put(ce);
+	intel_gt_pm_get(ce->engine->gt);
 
+	if (!test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) {
+		err = intel_context_alloc_state(ce);
+		if (err)
+			goto err;
+	}
+
+	/*
+	 * ABI: Before userspace accesses the GPU (e.g. execbuffer), report
+	 * EIO if the GPU is already wedged.
+	 */
+	err = intel_gt_terminally_wedged(ce->engine->gt);
+	if (err)
+		goto err;
+
+	eb->context = ce;
+	eb->engine = ce->engine;
+
+	/*
+	 * Make sure engine pool stays alive even if we call intel_context_put
+	 * during ww handling. The pool is destroyed when last pm reference
+	 * is dropped, which breaks our -EDEADLK handling.
+	 */
 	return err;
+
+err:
+	intel_gt_pm_put(ce->engine->gt);
+	intel_context_put(ce);
+	return err;
+}
+
+static void
+eb_put_engine(struct i915_execbuffer *eb)
+{
+	intel_gt_pm_put(eb->engine->gt);
+	intel_context_put(eb->context);
 }
 
 static void
@@ -2923,7 +2996,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 	if (unlikely(err))
 		goto err_destroy;
 
-	err = eb_pin_engine(&eb);
+	err = eb_select_engine(&eb);
 	if (unlikely(err))
 		goto err_context;
 
@@ -3062,7 +3135,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 	if (eb.reloc_pool)
 		intel_gt_buffer_pool_put(eb.reloc_pool);
 err_engine:
-	eb_unpin_engine(&eb);
+	eb_put_engine(&eb);
 err_context:
 	i915_gem_context_put(eb.gem_context);
 err_destroy:
-- 
2.27.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 61+ messages in thread

* [Intel-gfx] [PATCH 14/26] drm/i915: Rework intel_context pinning to do everything outside of pin_mutex
  2020-06-23 14:28 [Intel-gfx] [PATCH 01/26] Revert "drm/i915/gem: Async GPU relocations only" Maarten Lankhorst
                   ` (11 preceding siblings ...)
  2020-06-23 14:28 ` [Intel-gfx] [PATCH 13/26] drm/i915: Pin engine before pinning all objects, v4 Maarten Lankhorst
@ 2020-06-23 14:28 ` Maarten Lankhorst
  2020-06-23 14:28 ` [Intel-gfx] [PATCH 15/26] drm/i915: Make sure execbuffer always passes ww state to i915_vma_pin Maarten Lankhorst
                   ` (17 subsequent siblings)
  30 siblings, 0 replies; 61+ messages in thread
From: Maarten Lankhorst @ 2020-06-23 14:28 UTC (permalink / raw)
  To: intel-gfx

Instead of doing everything inside of pin_mutex, we move all pinning
outside. Because i915_active has its own reference counting and
pinning is also having the same issues vs mutexes, we make sure
everything is pinned first, so the pinning in i915_active only needs
to bump refcounts. This allows us to take pin refcounts correctly
all the time.

Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
---
 drivers/gpu/drm/i915/gt/intel_context.c       | 232 +++++++++++-------
 drivers/gpu/drm/i915/gt/intel_context_types.h |   4 +-
 drivers/gpu/drm/i915/gt/intel_lrc.c           |  34 ++-
 .../gpu/drm/i915/gt/intel_ring_submission.c   |  13 +-
 drivers/gpu/drm/i915/gt/mock_engine.c         |  13 +-
 5 files changed, 190 insertions(+), 106 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index e4aece20bc80..c039e87a46c4 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -93,79 +93,6 @@ static void intel_context_active_release(struct intel_context *ce)
 	i915_active_release(&ce->active);
 }
 
-int __intel_context_do_pin(struct intel_context *ce)
-{
-	int err;
-
-	if (unlikely(!test_bit(CONTEXT_ALLOC_BIT, &ce->flags))) {
-		err = intel_context_alloc_state(ce);
-		if (err)
-			return err;
-	}
-
-	err = i915_active_acquire(&ce->active);
-	if (err)
-		return err;
-
-	if (mutex_lock_interruptible(&ce->pin_mutex)) {
-		err = -EINTR;
-		goto out_release;
-	}
-
-	if (unlikely(intel_context_is_closed(ce))) {
-		err = -ENOENT;
-		goto out_unlock;
-	}
-
-	if (likely(!atomic_add_unless(&ce->pin_count, 1, 0))) {
-		err = intel_context_active_acquire(ce);
-		if (unlikely(err))
-			goto out_unlock;
-
-		err = ce->ops->pin(ce);
-		if (unlikely(err))
-			goto err_active;
-
-		CE_TRACE(ce, "pin ring:{start:%08x, head:%04x, tail:%04x}\n",
-			 i915_ggtt_offset(ce->ring->vma),
-			 ce->ring->head, ce->ring->tail);
-
-		smp_mb__before_atomic(); /* flush pin before it is visible */
-		atomic_inc(&ce->pin_count);
-	}
-
-	GEM_BUG_ON(!intel_context_is_pinned(ce)); /* no overflow! */
-	GEM_BUG_ON(i915_active_is_idle(&ce->active));
-	goto out_unlock;
-
-err_active:
-	intel_context_active_release(ce);
-out_unlock:
-	mutex_unlock(&ce->pin_mutex);
-out_release:
-	i915_active_release(&ce->active);
-	return err;
-}
-
-void intel_context_unpin(struct intel_context *ce)
-{
-	if (!atomic_dec_and_test(&ce->pin_count))
-		return;
-
-	CE_TRACE(ce, "unpin\n");
-	ce->ops->unpin(ce);
-
-	/*
-	 * Once released, we may asynchronously drop the active reference.
-	 * As that may be the only reference keeping the context alive,
-	 * take an extra now so that it is not freed before we finish
-	 * dereferencing it.
-	 */
-	intel_context_get(ce);
-	intel_context_active_release(ce);
-	intel_context_put(ce);
-}
-
 static int __context_pin_state(struct i915_vma *vma)
 {
 	unsigned int bias = i915_ggtt_pin_bias(vma) | PIN_OFFSET_BIAS;
@@ -225,6 +152,138 @@ static void __ring_retire(struct intel_ring *ring)
 	i915_active_release(&ring->vma->active);
 }
 
+static int intel_context_pre_pin(struct intel_context *ce)
+{
+	int err;
+
+	CE_TRACE(ce, "active\n");
+
+	err = __ring_active(ce->ring);
+	if (err)
+		return err;
+
+	err = intel_timeline_pin(ce->timeline);
+	if (err)
+		goto err_ring;
+
+	if (!ce->state)
+		return 0;
+
+	err = __context_pin_state(ce->state);
+	if (err)
+		goto err_timeline;
+
+
+	return 0;
+
+err_timeline:
+	intel_timeline_unpin(ce->timeline);
+err_ring:
+	__ring_retire(ce->ring);
+	return err;
+}
+
+static void intel_context_post_unpin(struct intel_context *ce)
+{
+	if (ce->state)
+		__context_unpin_state(ce->state);
+
+	intel_timeline_unpin(ce->timeline);
+	__ring_retire(ce->ring);
+}
+
+int __intel_context_do_pin(struct intel_context *ce)
+{
+	bool handoff = false;
+	void *vaddr;
+	int err = 0;
+
+	if (unlikely(!test_bit(CONTEXT_ALLOC_BIT, &ce->flags))) {
+		err = intel_context_alloc_state(ce);
+		if (err)
+			return err;
+	}
+
+	/*
+	 * We always pin the context/ring/timeline here, to ensure a pin
+	 * refcount for __intel_context_active(), which prevent a lock
+	 * inversion of ce->pin_mutex vs dma_resv_lock().
+	 */
+	err = intel_context_pre_pin(ce);
+	if (err)
+		return err;
+
+	err = i915_active_acquire(&ce->active);
+	if (err)
+		goto err_ctx_unpin;
+
+	err = ce->ops->pre_pin(ce, &vaddr);
+	if (err)
+		goto err_release;
+
+	err = mutex_lock_interruptible(&ce->pin_mutex);
+	if (err)
+		goto err_post_unpin;
+
+	if (unlikely(intel_context_is_closed(ce))) {
+		err = -ENOENT;
+		goto err_unlock;
+	}
+
+	if (likely(!atomic_add_unless(&ce->pin_count, 1, 0))) {
+		err = intel_context_active_acquire(ce);
+		if (unlikely(err))
+			goto err_unlock;
+
+		err = ce->ops->pin(ce, vaddr);
+		if (err) {
+			intel_context_active_release(ce);
+			goto err_unlock;
+		}
+
+		CE_TRACE(ce, "pin ring:{start:%08x, head:%04x, tail:%04x}\n",
+			 i915_ggtt_offset(ce->ring->vma),
+			 ce->ring->head, ce->ring->tail);
+
+		handoff = true;
+		smp_mb__before_atomic(); /* flush pin before it is visible */
+		atomic_inc(&ce->pin_count);
+	}
+
+	GEM_BUG_ON(!intel_context_is_pinned(ce)); /* no overflow! */
+
+err_unlock:
+	mutex_unlock(&ce->pin_mutex);
+err_post_unpin:
+	if (!handoff)
+		ce->ops->post_unpin(ce);
+err_release:
+	i915_active_release(&ce->active);
+err_ctx_unpin:
+	intel_context_post_unpin(ce);
+	return err;
+}
+
+void intel_context_unpin(struct intel_context *ce)
+{
+	if (!atomic_dec_and_test(&ce->pin_count))
+		return;
+
+	CE_TRACE(ce, "unpin\n");
+	ce->ops->unpin(ce);
+	ce->ops->post_unpin(ce);
+
+	/*
+	 * Once released, we may asynchronously drop the active reference.
+	 * As that may be the only reference keeping the context alive,
+	 * take an extra now so that it is not freed before we finish
+	 * dereferencing it.
+	 */
+	intel_context_get(ce);
+	intel_context_active_release(ce);
+	intel_context_put(ce);
+}
+
 __i915_active_call
 static void __intel_context_retire(struct i915_active *active)
 {
@@ -235,12 +294,7 @@ static void __intel_context_retire(struct i915_active *active)
 		 intel_context_get_avg_runtime_ns(ce));
 
 	set_bit(CONTEXT_VALID_BIT, &ce->flags);
-	if (ce->state)
-		__context_unpin_state(ce->state);
-
-	intel_timeline_unpin(ce->timeline);
-	__ring_retire(ce->ring);
-
+	intel_context_post_unpin(ce);
 	intel_context_put(ce);
 }
 
@@ -249,29 +303,25 @@ static int __intel_context_active(struct i915_active *active)
 	struct intel_context *ce = container_of(active, typeof(*ce), active);
 	int err;
 
-	CE_TRACE(ce, "active\n");
-
 	intel_context_get(ce);
 
+	/* everything should already be activated by intel_context_pre_pin() */
 	err = __ring_active(ce->ring);
-	if (err)
+	if (GEM_WARN_ON(err))
 		goto err_put;
 
 	err = intel_timeline_pin(ce->timeline);
-	if (err)
+	if (GEM_WARN_ON(err))
 		goto err_ring;
 
-	if (!ce->state)
-		return 0;
-
-	err = __context_pin_state(ce->state);
-	if (err)
-		goto err_timeline;
+	if (ce->state) {
+		GEM_WARN_ON(!i915_active_acquire_if_busy(&ce->state->active));
+		__i915_vma_pin(ce->state);
+		i915_vma_make_unshrinkable(ce->state);
+	}
 
 	return 0;
 
-err_timeline:
-	intel_timeline_unpin(ce->timeline);
 err_ring:
 	__ring_retire(ce->ring);
 err_put:
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
index 4954b0df4864..ca8e05b4d3ef 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -30,8 +30,10 @@ struct intel_ring;
 struct intel_context_ops {
 	int (*alloc)(struct intel_context *ce);
 
-	int (*pin)(struct intel_context *ce);
+	int (*pre_pin)(struct intel_context *ce, void **vaddr);
+	int (*pin)(struct intel_context *ce, void *vaddr);
 	void (*unpin)(struct intel_context *ce);
+	void (*post_unpin)(struct intel_context *ce);
 
 	void (*enter)(struct intel_context *ce);
 	void (*exit)(struct intel_context *ce);
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index e866b8d721ed..3dca5e5d8451 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -3279,7 +3279,10 @@ static void execlists_context_unpin(struct intel_context *ce)
 {
 	check_redzone((void *)ce->lrc_reg_state - LRC_STATE_OFFSET,
 		      ce->engine);
+}
 
+static void execlists_context_post_unpin(struct intel_context *ce)
+{
 	i915_gem_object_unpin_map(ce->state->obj);
 }
 
@@ -3441,20 +3444,23 @@ __execlists_update_reg_state(const struct intel_context *ce,
 }
 
 static int
-__execlists_context_pin(struct intel_context *ce,
-			struct intel_engine_cs *engine)
+execlists_context_pre_pin(struct intel_context *ce, void **vaddr)
 {
-	void *vaddr;
-
 	GEM_BUG_ON(!ce->state);
 	GEM_BUG_ON(!i915_vma_is_pinned(ce->state));
 
-	vaddr = i915_gem_object_pin_map(ce->state->obj,
-					i915_coherent_map_type(engine->i915) |
+	*vaddr = i915_gem_object_pin_map(ce->state->obj,
+					i915_coherent_map_type(ce->engine->i915) |
 					I915_MAP_OVERRIDE);
-	if (IS_ERR(vaddr))
-		return PTR_ERR(vaddr);
 
+	return PTR_ERR_OR_ZERO(*vaddr);
+}
+
+static int
+__execlists_context_pin(struct intel_context *ce,
+			struct intel_engine_cs *engine,
+			void *vaddr)
+{
 	ce->lrc.lrca = lrc_descriptor(ce, engine) | CTX_DESC_FORCE_RESTORE;
 	ce->lrc_reg_state = vaddr + LRC_STATE_OFFSET;
 	__execlists_update_reg_state(ce, engine, ce->ring->tail);
@@ -3462,9 +3468,9 @@ __execlists_context_pin(struct intel_context *ce,
 	return 0;
 }
 
-static int execlists_context_pin(struct intel_context *ce)
+static int execlists_context_pin(struct intel_context *ce, void *vaddr)
 {
-	return __execlists_context_pin(ce, ce->engine);
+	return __execlists_context_pin(ce, ce->engine, vaddr);
 }
 
 static int execlists_context_alloc(struct intel_context *ce)
@@ -3490,8 +3496,10 @@ static void execlists_context_reset(struct intel_context *ce)
 static const struct intel_context_ops execlists_context_ops = {
 	.alloc = execlists_context_alloc,
 
+	.pre_pin = execlists_context_pre_pin,
 	.pin = execlists_context_pin,
 	.unpin = execlists_context_unpin,
+	.post_unpin = execlists_context_post_unpin,
 
 	.enter = intel_context_enter_engine,
 	.exit = intel_context_exit_engine,
@@ -5419,13 +5427,13 @@ static int virtual_context_alloc(struct intel_context *ce)
 	return __execlists_context_alloc(ce, ve->siblings[0]);
 }
 
-static int virtual_context_pin(struct intel_context *ce)
+static int virtual_context_pin(struct intel_context *ce, void *vaddr)
 {
 	struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
 	int err;
 
 	/* Note: we must use a real engine class for setting up reg state */
-	err = __execlists_context_pin(ce, ve->siblings[0]);
+	err = __execlists_context_pin(ce, ve->siblings[0], vaddr);
 	if (err)
 		return err;
 
@@ -5458,8 +5466,10 @@ static void virtual_context_exit(struct intel_context *ce)
 static const struct intel_context_ops virtual_context_ops = {
 	.alloc = virtual_context_alloc,
 
+	.pre_pin = execlists_context_pre_pin,
 	.pin = virtual_context_pin,
 	.unpin = execlists_context_unpin,
+	.post_unpin = execlists_context_post_unpin,
 
 	.enter = virtual_context_enter,
 	.exit = virtual_context_exit,
diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
index 68a08486fc87..6914abf3a88d 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
@@ -496,6 +496,10 @@ static void __context_unpin_ppgtt(struct intel_context *ce)
 }
 
 static void ring_context_unpin(struct intel_context *ce)
+{
+}
+
+static void ring_context_post_unpin(struct intel_context *ce)
 {
 	__context_unpin_ppgtt(ce);
 }
@@ -584,11 +588,16 @@ static int ring_context_alloc(struct intel_context *ce)
 	return 0;
 }
 
-static int ring_context_pin(struct intel_context *ce)
+static int ring_context_pre_pin(struct intel_context *ce, void **unused)
 {
 	return __context_pin_ppgtt(ce);
 }
 
+static int ring_context_pin(struct intel_context *ce, void *unused)
+{
+	return 0;
+}
+
 static void ring_context_reset(struct intel_context *ce)
 {
 	intel_ring_reset(ce->ring, ce->ring->emit);
@@ -597,8 +606,10 @@ static void ring_context_reset(struct intel_context *ce)
 static const struct intel_context_ops ring_context_ops = {
 	.alloc = ring_context_alloc,
 
+	.pre_pin = ring_context_pre_pin,
 	.pin = ring_context_pin,
 	.unpin = ring_context_unpin,
+	.post_unpin = ring_context_post_unpin,
 
 	.enter = intel_context_enter_engine,
 	.exit = intel_context_exit_engine,
diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c
index b8dd3cbc8696..62664601e683 100644
--- a/drivers/gpu/drm/i915/gt/mock_engine.c
+++ b/drivers/gpu/drm/i915/gt/mock_engine.c
@@ -131,6 +131,10 @@ static void mock_context_unpin(struct intel_context *ce)
 {
 }
 
+static void mock_context_post_unpin(struct intel_context *ce)
+{
+}
+
 static void mock_context_destroy(struct kref *ref)
 {
 	struct intel_context *ce = container_of(ref, typeof(*ce), ref);
@@ -164,7 +168,12 @@ static int mock_context_alloc(struct intel_context *ce)
 	return 0;
 }
 
-static int mock_context_pin(struct intel_context *ce)
+static int mock_context_pre_pin(struct intel_context *ce, void **unused)
+{
+	return 0;
+}
+
+static int mock_context_pin(struct intel_context *ce, void *unused)
 {
 	return 0;
 }
@@ -176,8 +185,10 @@ static void mock_context_reset(struct intel_context *ce)
 static const struct intel_context_ops mock_context_ops = {
 	.alloc = mock_context_alloc,
 
+	.pre_pin = mock_context_pre_pin,
 	.pin = mock_context_pin,
 	.unpin = mock_context_unpin,
+	.post_unpin = mock_context_post_unpin,
 
 	.enter = intel_context_enter_engine,
 	.exit = intel_context_exit_engine,
-- 
2.27.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 61+ messages in thread

* [Intel-gfx] [PATCH 15/26] drm/i915: Make sure execbuffer always passes ww state to i915_vma_pin.
  2020-06-23 14:28 [Intel-gfx] [PATCH 01/26] Revert "drm/i915/gem: Async GPU relocations only" Maarten Lankhorst
                   ` (12 preceding siblings ...)
  2020-06-23 14:28 ` [Intel-gfx] [PATCH 14/26] drm/i915: Rework intel_context pinning to do everything outside of pin_mutex Maarten Lankhorst
@ 2020-06-23 14:28 ` Maarten Lankhorst
  2020-06-25 14:32   ` Thomas Hellström (Intel)
  2020-06-23 14:28 ` [Intel-gfx] [PATCH 16/26] drm/i915: Convert i915_gem_object/client_blt.c to use ww locking as well, v2 Maarten Lankhorst
                   ` (16 subsequent siblings)
  30 siblings, 1 reply; 61+ messages in thread
From: Maarten Lankhorst @ 2020-06-23 14:28 UTC (permalink / raw)
  To: intel-gfx

As a preparation step for full object locking and wait/wound handling
during pin and object mapping, ensure that we always pass the ww context
in i915_gem_execbuffer.c to i915_vma_pin, use lockdep to ensure this
happens.

This also requires changing the order of eb_parse slightly, to ensure
we pass ww at a point where we could still handle -EDEADLK safely.

Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
---
 drivers/gpu/drm/i915/display/intel_display.c  |   2 +-
 drivers/gpu/drm/i915/gem/i915_gem_context.c   |   4 +-
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 138 ++++++++++--------
 drivers/gpu/drm/i915/gt/gen6_ppgtt.c          |   4 +-
 drivers/gpu/drm/i915/gt/gen6_ppgtt.h          |   4 +-
 drivers/gpu/drm/i915/gt/intel_context.c       |  65 ++++++---
 drivers/gpu/drm/i915/gt/intel_context.h       |  13 ++
 drivers/gpu/drm/i915/gt/intel_context_types.h |   3 +-
 drivers/gpu/drm/i915/gt/intel_engine_cs.c     |   2 +-
 drivers/gpu/drm/i915/gt/intel_gt.c            |   2 +-
 drivers/gpu/drm/i915/gt/intel_lrc.c           |   5 +-
 drivers/gpu/drm/i915/gt/intel_renderstate.c   |   2 +-
 drivers/gpu/drm/i915/gt/intel_ring.c          |  10 +-
 drivers/gpu/drm/i915/gt/intel_ring.h          |   3 +-
 .../gpu/drm/i915/gt/intel_ring_submission.c   |  15 +-
 drivers/gpu/drm/i915/gt/intel_timeline.c      |  12 +-
 drivers/gpu/drm/i915/gt/intel_timeline.h      |   3 +-
 drivers/gpu/drm/i915/gt/mock_engine.c         |   3 +-
 drivers/gpu/drm/i915/gt/selftest_lrc.c        |   2 +-
 drivers/gpu/drm/i915/gt/selftest_timeline.c   |   4 +-
 drivers/gpu/drm/i915/gt/uc/intel_guc.c        |   2 +-
 drivers/gpu/drm/i915/i915_drv.h               |  13 +-
 drivers/gpu/drm/i915/i915_gem.c               |  11 +-
 drivers/gpu/drm/i915/i915_vma.c               |  13 +-
 drivers/gpu/drm/i915/i915_vma.h               |  13 +-
 25 files changed, 214 insertions(+), 134 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
index e909ccc37a54..759a94010d3c 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -3449,7 +3449,7 @@ initial_plane_vma(struct drm_i915_private *i915,
 	if (IS_ERR(vma))
 		goto err_obj;
 
-	if (i915_ggtt_pin(vma, 0, PIN_MAPPABLE | PIN_OFFSET_FIXED | base))
+	if (i915_ggtt_pin(vma, NULL, 0, PIN_MAPPABLE | PIN_OFFSET_FIXED | base))
 		goto err_obj;
 
 	if (i915_gem_object_is_tiled(obj) &&
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index a1e709557704..b9d38e8edb5b 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -1142,7 +1142,7 @@ static int context_barrier_task(struct i915_gem_context *ctx,
 
 		i915_gem_ww_ctx_init(&ww, true);
 retry:
-		err = intel_context_pin(ce);
+		err = intel_context_pin_ww(ce, &ww);
 		if (err)
 			goto err;
 
@@ -1235,7 +1235,7 @@ static int pin_ppgtt_update(struct intel_context *ce, struct i915_gem_ww_ctx *ww
 
 	if (!HAS_LOGICAL_RING_CONTEXTS(vm->i915))
 		/* ppGTT is not part of the legacy context image */
-		return gen6_ppgtt_pin(i915_vm_to_ppgtt(vm));
+		return gen6_ppgtt_pin(i915_vm_to_ppgtt(vm), ww);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 9990e4677b9a..680d9f0d55f8 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -424,16 +424,17 @@ eb_pin_vma(struct i915_execbuffer *eb,
 		pin_flags |= PIN_GLOBAL;
 
 	/* Attempt to reuse the current location if available */
-	if (unlikely(i915_vma_pin(vma, 0, 0, pin_flags))) {
+	/* TODO: Add -EDEADLK handling here */
+	if (unlikely(i915_vma_pin_ww(vma, &eb->ww, 0, 0, pin_flags))) {
 		if (entry->flags & EXEC_OBJECT_PINNED)
 			return false;
 
 		/* Failing that pick any _free_ space if suitable */
-		if (unlikely(i915_vma_pin(vma,
-					  entry->pad_to_size,
-					  entry->alignment,
-					  eb_pin_flags(entry, ev->flags) |
-					  PIN_USER | PIN_NOEVICT)))
+		if (unlikely(i915_vma_pin_ww(vma, &eb->ww,
+					     entry->pad_to_size,
+					     entry->alignment,
+					     eb_pin_flags(entry, ev->flags) |
+					     PIN_USER | PIN_NOEVICT)))
 			return false;
 	}
 
@@ -574,7 +575,7 @@ static inline int use_cpu_reloc(const struct reloc_cache *cache,
 		obj->cache_level != I915_CACHE_NONE);
 }
 
-static int eb_reserve_vma(const struct i915_execbuffer *eb,
+static int eb_reserve_vma(struct i915_execbuffer *eb,
 			  struct eb_vma *ev,
 			  u64 pin_flags)
 {
@@ -589,7 +590,7 @@ static int eb_reserve_vma(const struct i915_execbuffer *eb,
 			return err;
 	}
 
-	err = i915_vma_pin(vma,
+	err = i915_vma_pin_ww(vma, &eb->ww,
 			   entry->pad_to_size, entry->alignment,
 			   eb_pin_flags(entry, ev->flags) | pin_flags);
 	if (err)
@@ -1114,9 +1115,10 @@ static void *reloc_kmap(struct drm_i915_gem_object *obj,
 }
 
 static void *reloc_iomap(struct drm_i915_gem_object *obj,
-			 struct reloc_cache *cache,
+			 struct i915_execbuffer *eb,
 			 unsigned long page)
 {
+	struct reloc_cache *cache = &eb->reloc_cache;
 	struct i915_ggtt *ggtt = cache_to_ggtt(cache);
 	unsigned long offset;
 	void *vaddr;
@@ -1138,10 +1140,13 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj,
 		if (err)
 			return ERR_PTR(err);
 
-		vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
-					       PIN_MAPPABLE |
-					       PIN_NONBLOCK /* NOWARN */ |
-					       PIN_NOEVICT);
+		vma = i915_gem_object_ggtt_pin_ww(obj, &eb->ww, NULL, 0, 0,
+						  PIN_MAPPABLE |
+						  PIN_NONBLOCK /* NOWARN */ |
+						  PIN_NOEVICT);
+		if (vma == ERR_PTR(-EDEADLK))
+			return vma;
+
 		if (IS_ERR(vma)) {
 			memset(&cache->node, 0, sizeof(cache->node));
 			mutex_lock(&ggtt->vm.mutex);
@@ -1177,9 +1182,10 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj,
 }
 
 static void *reloc_vaddr(struct drm_i915_gem_object *obj,
-			 struct reloc_cache *cache,
+			 struct i915_execbuffer *eb,
 			 unsigned long page)
 {
+	struct reloc_cache *cache = &eb->reloc_cache;
 	void *vaddr;
 
 	if (cache->page == page) {
@@ -1187,7 +1193,7 @@ static void *reloc_vaddr(struct drm_i915_gem_object *obj,
 	} else {
 		vaddr = NULL;
 		if ((cache->vaddr & KMAP) == 0)
-			vaddr = reloc_iomap(obj, cache, page);
+			vaddr = reloc_iomap(obj, eb, page);
 		if (!vaddr)
 			vaddr = reloc_kmap(obj, cache, page);
 	}
@@ -1274,7 +1280,7 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
 		goto err_unmap;
 	}
 
-	err = i915_vma_pin(batch, 0, 0, PIN_USER | PIN_NONBLOCK);
+	err = i915_vma_pin_ww(batch, &eb->ww, 0, 0, PIN_USER | PIN_NONBLOCK);
 	if (err)
 		goto err_unmap;
 
@@ -1511,8 +1517,7 @@ relocate_entry(struct i915_vma *vma,
 		void *vaddr;
 
 repeat:
-		vaddr = reloc_vaddr(vma->obj,
-				    &eb->reloc_cache,
+		vaddr = reloc_vaddr(vma->obj, eb,
 				    offset >> PAGE_SHIFT);
 		if (IS_ERR(vaddr))
 			return PTR_ERR(vaddr);
@@ -1928,6 +1933,7 @@ static noinline int eb_relocate_parse_slow(struct i915_execbuffer *eb,
 	rq = eb_pin_engine(eb, false);
 	if (IS_ERR(rq)) {
 		err = PTR_ERR(rq);
+		rq = NULL;
 		goto err;
 	}
 
@@ -2210,7 +2216,8 @@ static int i915_reset_gen7_sol_offsets(struct i915_request *rq)
 }
 
 static struct i915_vma *
-shadow_batch_pin(struct drm_i915_gem_object *obj,
+shadow_batch_pin(struct i915_execbuffer *eb,
+		 struct drm_i915_gem_object *obj,
 		 struct i915_address_space *vm,
 		 unsigned int flags)
 {
@@ -2221,7 +2228,7 @@ shadow_batch_pin(struct drm_i915_gem_object *obj,
 	if (IS_ERR(vma))
 		return vma;
 
-	err = i915_vma_pin(vma, 0, 0, flags);
+	err = i915_vma_pin_ww(vma, &eb->ww, 0, 0, flags);
 	if (err)
 		return ERR_PTR(err);
 
@@ -2371,16 +2378,33 @@ static int eb_parse_pipeline(struct i915_execbuffer *eb,
 	return err;
 }
 
+static struct i915_vma *eb_dispatch_secure(struct i915_execbuffer *eb, struct i915_vma *vma)
+{
+	/*
+	 * snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
+	 * batch" bit. Hence we need to pin secure batches into the global gtt.
+	 * hsw should have this fixed, but bdw mucks it up again. */
+	if (eb->batch_flags & I915_DISPATCH_SECURE)
+		return i915_gem_object_ggtt_pin_ww(vma->obj, &eb->ww, NULL, 0, 0, 0);
+
+	return NULL;
+}
+
 static int eb_parse(struct i915_execbuffer *eb)
 {
 	struct drm_i915_private *i915 = eb->i915;
 	struct intel_gt_buffer_pool_node *pool = eb->batch_pool;
-	struct i915_vma *shadow, *trampoline;
+	struct i915_vma *shadow, *trampoline, *batch;
 	unsigned int len;
 	int err;
 
-	if (!eb_use_cmdparser(eb))
-		return 0;
+	if (!eb_use_cmdparser(eb)) {
+		batch = eb_dispatch_secure(eb, eb->batch->vma);
+		if (IS_ERR(batch))
+			return PTR_ERR(batch);
+
+		goto secure_batch;
+	}
 
 	len = eb->batch_len;
 	if (!CMDPARSER_USES_GGTT(eb->i915)) {
@@ -2408,7 +2432,7 @@ static int eb_parse(struct i915_execbuffer *eb)
 	if (err)
 		goto err;
 
-	shadow = shadow_batch_pin(pool->obj, eb->context->vm, PIN_USER);
+	shadow = shadow_batch_pin(eb, pool->obj, eb->context->vm, PIN_USER);
 	if (IS_ERR(shadow)) {
 		err = PTR_ERR(shadow);
 		goto err;
@@ -2420,7 +2444,7 @@ static int eb_parse(struct i915_execbuffer *eb)
 	if (CMDPARSER_USES_GGTT(eb->i915)) {
 		trampoline = shadow;
 
-		shadow = shadow_batch_pin(pool->obj,
+		shadow = shadow_batch_pin(eb, pool->obj,
 					  &eb->engine->gt->ggtt->vm,
 					  PIN_GLOBAL);
 		if (IS_ERR(shadow)) {
@@ -2433,19 +2457,34 @@ static int eb_parse(struct i915_execbuffer *eb)
 		eb->batch_flags |= I915_DISPATCH_SECURE;
 	}
 
+	batch = eb_dispatch_secure(eb, shadow);
+	if (IS_ERR(batch)) {
+		err = PTR_ERR(batch);
+		goto err_trampoline;
+	}
+
 	err = eb_parse_pipeline(eb, shadow, trampoline);
 	if (err)
-		goto err_trampoline;
+		goto err_unpin_batch;
 
-	eb->vma[eb->buffer_count].vma = i915_vma_get(shadow);
-	eb->vma[eb->buffer_count].flags = __EXEC_OBJECT_HAS_PIN;
 	eb->batch = &eb->vma[eb->buffer_count++];
+	eb->batch->vma = i915_vma_get(shadow);
+	eb->batch->flags = __EXEC_OBJECT_HAS_PIN;
 
 	eb->trampoline = trampoline;
 	eb->batch_start_offset = 0;
 
+secure_batch:
+	if (batch) {
+		eb->batch = &eb->vma[eb->buffer_count++];
+		eb->batch->flags = __EXEC_OBJECT_HAS_PIN;
+		eb->batch->vma = i915_vma_get(batch);
+	}
 	return 0;
 
+err_unpin_batch:
+	if (batch)
+		i915_vma_unpin(batch);
 err_trampoline:
 	if (trampoline)
 		i915_vma_unpin(trampoline);
@@ -2600,7 +2639,7 @@ static struct i915_request *eb_pin_engine(struct i915_execbuffer *eb, bool throt
 	 * GGTT space, so do this first before we reserve a seqno for
 	 * ourselves.
 	 */
-	err = intel_context_pin(ce);
+	err = intel_context_pin_ww(ce, &eb->ww);
 	if (err)
 		return ERR_PTR(err);
 
@@ -3023,33 +3062,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 
 	ww_acquire_done(&eb.ww.ctx);
 
-	/*
-	 * snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
-	 * batch" bit. Hence we need to pin secure batches into the global gtt.
-	 * hsw should have this fixed, but bdw mucks it up again. */
-	if (eb.batch_flags & I915_DISPATCH_SECURE) {
-		struct i915_vma *vma;
-
-		/*
-		 * So on first glance it looks freaky that we pin the batch here
-		 * outside of the reservation loop. But:
-		 * - The batch is already pinned into the relevant ppgtt, so we
-		 *   already have the backing storage fully allocated.
-		 * - No other BO uses the global gtt (well contexts, but meh),
-		 *   so we don't really have issues with multiple objects not
-		 *   fitting due to fragmentation.
-		 * So this is actually safe.
-		 */
-		vma = i915_gem_object_ggtt_pin(eb.batch->vma->obj, NULL, 0, 0, 0);
-		if (IS_ERR(vma)) {
-			err = PTR_ERR(vma);
-			goto err_vma;
-		}
-
-		batch = vma;
-	} else {
-		batch = eb.batch->vma;
-	}
+	batch = eb.batch->vma;
 
 	/* All GPU relocation batches must be submitted prior to the user rq */
 	GEM_BUG_ON(eb.reloc_cache.rq);
@@ -3058,7 +3071,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 	eb.request = i915_request_create(eb.context);
 	if (IS_ERR(eb.request)) {
 		err = PTR_ERR(eb.request);
-		goto err_batch_unpin;
+		goto err_vma;
 	}
 
 	if (in_fence) {
@@ -3120,9 +3133,6 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 	}
 	i915_request_put(eb.request);
 
-err_batch_unpin:
-	if (eb.batch_flags & I915_DISPATCH_SECURE)
-		i915_vma_unpin(batch);
 err_vma:
 	eb_release_vmas(&eb, true);
 	if (eb.trampoline)
@@ -3206,7 +3216,9 @@ i915_gem_execbuffer_ioctl(struct drm_device *dev, void *data,
 	/* Copy in the exec list from userland */
 	exec_list = kvmalloc_array(count, sizeof(*exec_list),
 				   __GFP_NOWARN | GFP_KERNEL);
-	exec2_list = kvmalloc_array(count + 1, eb_element_size(),
+
+	/* Allocate extra slots for use by the command parser */
+	exec2_list = kvmalloc_array(count + 2, eb_element_size(),
 				    __GFP_NOWARN | GFP_KERNEL);
 	if (exec_list == NULL || exec2_list == NULL) {
 		drm_dbg(&i915->drm,
@@ -3284,8 +3296,8 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
 	if (err)
 		return err;
 
-	/* Allocate an extra slot for use by the command parser */
-	exec2_list = kvmalloc_array(count + 1, eb_element_size(),
+	/* Allocate extra slots for use by the command parser */
+	exec2_list = kvmalloc_array(count + 2, eb_element_size(),
 				    __GFP_NOWARN | GFP_KERNEL);
 	if (exec2_list == NULL) {
 		drm_dbg(&i915->drm, "Failed to allocate exec list for %zd buffers\n",
diff --git a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
index f4fec7eb4064..8248efa9229f 100644
--- a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
@@ -376,7 +376,7 @@ static struct i915_vma *pd_vma_create(struct gen6_ppgtt *ppgtt, int size)
 	return vma;
 }
 
-int gen6_ppgtt_pin(struct i915_ppgtt *base)
+int gen6_ppgtt_pin(struct i915_ppgtt *base, struct i915_gem_ww_ctx *ww)
 {
 	struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base);
 	int err;
@@ -402,7 +402,7 @@ int gen6_ppgtt_pin(struct i915_ppgtt *base)
 	 */
 	err = 0;
 	if (!atomic_read(&ppgtt->pin_count))
-		err = i915_ggtt_pin(ppgtt->vma, GEN6_PD_ALIGN, PIN_HIGH);
+		err = i915_ggtt_pin(ppgtt->vma, ww, GEN6_PD_ALIGN, PIN_HIGH);
 	if (!err)
 		atomic_inc(&ppgtt->pin_count);
 	mutex_unlock(&ppgtt->pin_mutex);
diff --git a/drivers/gpu/drm/i915/gt/gen6_ppgtt.h b/drivers/gpu/drm/i915/gt/gen6_ppgtt.h
index 72e481806c96..00032a931bae 100644
--- a/drivers/gpu/drm/i915/gt/gen6_ppgtt.h
+++ b/drivers/gpu/drm/i915/gt/gen6_ppgtt.h
@@ -8,6 +8,8 @@
 
 #include "intel_gtt.h"
 
+struct i915_gem_ww_ctx;
+
 struct gen6_ppgtt {
 	struct i915_ppgtt base;
 
@@ -66,7 +68,7 @@ static inline struct gen6_ppgtt *to_gen6_ppgtt(struct i915_ppgtt *base)
 		     (pt = i915_pt_entry(pd, iter), true);		\
 	     ++iter)
 
-int gen6_ppgtt_pin(struct i915_ppgtt *base);
+int gen6_ppgtt_pin(struct i915_ppgtt *base, struct i915_gem_ww_ctx *ww);
 void gen6_ppgtt_unpin(struct i915_ppgtt *base);
 void gen6_ppgtt_unpin_all(struct i915_ppgtt *base);
 void gen6_ppgtt_enable(struct intel_gt *gt);
diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index c039e87a46c4..64948386630f 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -93,12 +93,12 @@ static void intel_context_active_release(struct intel_context *ce)
 	i915_active_release(&ce->active);
 }
 
-static int __context_pin_state(struct i915_vma *vma)
+static int __context_pin_state(struct i915_vma *vma, struct i915_gem_ww_ctx *ww)
 {
 	unsigned int bias = i915_ggtt_pin_bias(vma) | PIN_OFFSET_BIAS;
 	int err;
 
-	err = i915_ggtt_pin(vma, 0, bias | PIN_HIGH);
+	err = i915_ggtt_pin(vma, ww, 0, bias | PIN_HIGH);
 	if (err)
 		return err;
 
@@ -127,7 +127,8 @@ static void __context_unpin_state(struct i915_vma *vma)
 	__i915_vma_unpin(vma);
 }
 
-static int __ring_active(struct intel_ring *ring)
+static int __ring_active(struct intel_ring *ring,
+			 struct i915_gem_ww_ctx *ww)
 {
 	int err;
 
@@ -135,7 +136,7 @@ static int __ring_active(struct intel_ring *ring)
 	if (err)
 		return err;
 
-	err = intel_ring_pin(ring);
+	err = intel_ring_pin(ring, ww);
 	if (err)
 		goto err_active;
 
@@ -152,24 +153,25 @@ static void __ring_retire(struct intel_ring *ring)
 	i915_active_release(&ring->vma->active);
 }
 
-static int intel_context_pre_pin(struct intel_context *ce)
+static int intel_context_pre_pin(struct intel_context *ce,
+				 struct i915_gem_ww_ctx *ww)
 {
 	int err;
 
 	CE_TRACE(ce, "active\n");
 
-	err = __ring_active(ce->ring);
+	err = __ring_active(ce->ring, ww);
 	if (err)
 		return err;
 
-	err = intel_timeline_pin(ce->timeline);
+	err = intel_timeline_pin(ce->timeline, ww);
 	if (err)
 		goto err_ring;
 
 	if (!ce->state)
 		return 0;
 
-	err = __context_pin_state(ce->state);
+	err = __context_pin_state(ce->state, ww);
 	if (err)
 		goto err_timeline;
 
@@ -192,7 +194,8 @@ static void intel_context_post_unpin(struct intel_context *ce)
 	__ring_retire(ce->ring);
 }
 
-int __intel_context_do_pin(struct intel_context *ce)
+int __intel_context_do_pin_ww(struct intel_context *ce,
+			      struct i915_gem_ww_ctx *ww)
 {
 	bool handoff = false;
 	void *vaddr;
@@ -209,7 +212,14 @@ int __intel_context_do_pin(struct intel_context *ce)
 	 * refcount for __intel_context_active(), which prevent a lock
 	 * inversion of ce->pin_mutex vs dma_resv_lock().
 	 */
-	err = intel_context_pre_pin(ce);
+
+	err = i915_gem_object_lock(ce->timeline->hwsp_ggtt->obj, ww);
+	if (!err && ce->ring->vma->obj)
+		err = i915_gem_object_lock(ce->ring->vma->obj, ww);
+	if (!err && ce->state)
+		err = i915_gem_object_lock(ce->state->obj, ww);
+	if (!err)
+		err = intel_context_pre_pin(ce, ww);
 	if (err)
 		return err;
 
@@ -217,7 +227,7 @@ int __intel_context_do_pin(struct intel_context *ce)
 	if (err)
 		goto err_ctx_unpin;
 
-	err = ce->ops->pre_pin(ce, &vaddr);
+	err = ce->ops->pre_pin(ce, ww, &vaddr);
 	if (err)
 		goto err_release;
 
@@ -264,6 +274,23 @@ int __intel_context_do_pin(struct intel_context *ce)
 	return err;
 }
 
+int __intel_context_do_pin(struct intel_context *ce)
+{
+	struct i915_gem_ww_ctx ww;
+	int err;
+
+	i915_gem_ww_ctx_init(&ww, true);
+retry:
+	err = __intel_context_do_pin_ww(ce, &ww);
+	if (err == -EDEADLK) {
+		err = i915_gem_ww_ctx_backoff(&ww);
+		if (!err)
+			goto retry;
+	}
+	i915_gem_ww_ctx_fini(&ww);
+	return err;
+}
+
 void intel_context_unpin(struct intel_context *ce)
 {
 	if (!atomic_dec_and_test(&ce->pin_count))
@@ -301,18 +328,14 @@ static void __intel_context_retire(struct i915_active *active)
 static int __intel_context_active(struct i915_active *active)
 {
 	struct intel_context *ce = container_of(active, typeof(*ce), active);
-	int err;
 
 	intel_context_get(ce);
 
 	/* everything should already be activated by intel_context_pre_pin() */
-	err = __ring_active(ce->ring);
-	if (GEM_WARN_ON(err))
-		goto err_put;
+	GEM_WARN_ON(!i915_active_acquire_if_busy(&ce->ring->vma->active));
+	__intel_ring_pin(ce->ring);
 
-	err = intel_timeline_pin(ce->timeline);
-	if (GEM_WARN_ON(err))
-		goto err_ring;
+	__intel_timeline_pin(ce->timeline);
 
 	if (ce->state) {
 		GEM_WARN_ON(!i915_active_acquire_if_busy(&ce->state->active));
@@ -321,12 +344,6 @@ static int __intel_context_active(struct i915_active *active)
 	}
 
 	return 0;
-
-err_ring:
-	__ring_retire(ce->ring);
-err_put:
-	intel_context_put(ce);
-	return err;
 }
 
 void
diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
index 07be021882cc..fda2eba81e22 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.h
+++ b/drivers/gpu/drm/i915/gt/intel_context.h
@@ -25,6 +25,8 @@
 		     ##__VA_ARGS__);					\
 } while (0)
 
+struct i915_gem_ww_ctx;
+
 void intel_context_init(struct intel_context *ce,
 			struct intel_engine_cs *engine);
 void intel_context_fini(struct intel_context *ce);
@@ -81,6 +83,8 @@ static inline void intel_context_unlock_pinned(struct intel_context *ce)
 }
 
 int __intel_context_do_pin(struct intel_context *ce);
+int __intel_context_do_pin_ww(struct intel_context *ce,
+			      struct i915_gem_ww_ctx *ww);
 
 static inline bool intel_context_pin_if_active(struct intel_context *ce)
 {
@@ -95,6 +99,15 @@ static inline int intel_context_pin(struct intel_context *ce)
 	return __intel_context_do_pin(ce);
 }
 
+static inline int intel_context_pin_ww(struct intel_context *ce,
+				       struct i915_gem_ww_ctx *ww)
+{
+	if (likely(intel_context_pin_if_active(ce)))
+		return 0;
+
+	return __intel_context_do_pin_ww(ce, ww);
+}
+
 static inline void __intel_context_pin(struct intel_context *ce)
 {
 	GEM_BUG_ON(!intel_context_is_pinned(ce));
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
index ca8e05b4d3ef..552cb57a2e8c 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -23,6 +23,7 @@
 DECLARE_EWMA(runtime, 3, 8);
 
 struct i915_gem_context;
+struct i915_gem_ww_ctx;
 struct i915_vma;
 struct intel_context;
 struct intel_ring;
@@ -30,7 +31,7 @@ struct intel_ring;
 struct intel_context_ops {
 	int (*alloc)(struct intel_context *ce);
 
-	int (*pre_pin)(struct intel_context *ce, void **vaddr);
+	int (*pre_pin)(struct intel_context *ce, struct i915_gem_ww_ctx *ww, void **vaddr);
 	int (*pin)(struct intel_context *ce, void *vaddr);
 	void (*unpin)(struct intel_context *ce);
 	void (*post_unpin)(struct intel_context *ce);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 7bf2f76212f0..8167c4b2f795 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -559,7 +559,7 @@ static int pin_ggtt_status_page(struct intel_engine_cs *engine,
 	else
 		flags = PIN_HIGH;
 
-	return i915_ggtt_pin(vma, 0, flags);
+	return i915_ggtt_pin(vma, NULL, 0, flags);
 }
 
 static int init_status_page(struct intel_engine_cs *engine)
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
index 24a0e47a2477..1942f53a60c2 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -348,7 +348,7 @@ static int intel_gt_init_scratch(struct intel_gt *gt, unsigned int size)
 		goto err_unref;
 	}
 
-	ret = i915_ggtt_pin(vma, 0, PIN_HIGH);
+	ret = i915_ggtt_pin(vma, NULL, 0, PIN_HIGH);
 	if (ret)
 		goto err_unref;
 
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 3dca5e5d8451..7c016f5e244f 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -3444,7 +3444,8 @@ __execlists_update_reg_state(const struct intel_context *ce,
 }
 
 static int
-execlists_context_pre_pin(struct intel_context *ce, void **vaddr)
+execlists_context_pre_pin(struct intel_context *ce,
+			  struct i915_gem_ww_ctx *ww, void **vaddr)
 {
 	GEM_BUG_ON(!ce->state);
 	GEM_BUG_ON(!i915_vma_is_pinned(ce->state));
@@ -3863,7 +3864,7 @@ static int lrc_setup_wa_ctx(struct intel_engine_cs *engine)
 		goto err;
 	}
 
-	err = i915_ggtt_pin(vma, 0, PIN_HIGH);
+	err = i915_ggtt_pin(vma, NULL, 0, PIN_HIGH);
 	if (err)
 		goto err;
 
diff --git a/drivers/gpu/drm/i915/gt/intel_renderstate.c b/drivers/gpu/drm/i915/gt/intel_renderstate.c
index 3f7881de6a3c..76b39f4c29b5 100644
--- a/drivers/gpu/drm/i915/gt/intel_renderstate.c
+++ b/drivers/gpu/drm/i915/gt/intel_renderstate.c
@@ -184,7 +184,7 @@ int intel_renderstate_init(struct intel_renderstate *so,
 
 	i915_gem_ww_ctx_init(&so->ww, true);
 retry:
-	err = intel_context_pin(ce);
+	err = intel_context_pin_ww(ce, &so->ww);
 	if (err)
 		goto err_fini;
 
diff --git a/drivers/gpu/drm/i915/gt/intel_ring.c b/drivers/gpu/drm/i915/gt/intel_ring.c
index bdb324167ef3..4034a4bac7f0 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring.c
@@ -21,7 +21,13 @@ unsigned int intel_ring_update_space(struct intel_ring *ring)
 	return space;
 }
 
-int intel_ring_pin(struct intel_ring *ring)
+void __intel_ring_pin(struct intel_ring *ring)
+{
+	GEM_BUG_ON(!atomic_read(&ring->pin_count));
+	atomic_inc(&ring->pin_count);
+}
+
+int intel_ring_pin(struct intel_ring *ring, struct i915_gem_ww_ctx *ww)
 {
 	struct i915_vma *vma = ring->vma;
 	unsigned int flags;
@@ -39,7 +45,7 @@ int intel_ring_pin(struct intel_ring *ring)
 	else
 		flags |= PIN_HIGH;
 
-	ret = i915_ggtt_pin(vma, 0, flags);
+	ret = i915_ggtt_pin(vma, ww, 0, flags);
 	if (unlikely(ret))
 		goto err_unpin;
 
diff --git a/drivers/gpu/drm/i915/gt/intel_ring.h b/drivers/gpu/drm/i915/gt/intel_ring.h
index cc0ebca65167..1700579bdc93 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring.h
+++ b/drivers/gpu/drm/i915/gt/intel_ring.h
@@ -21,7 +21,8 @@ int intel_ring_cacheline_align(struct i915_request *rq);
 
 unsigned int intel_ring_update_space(struct intel_ring *ring);
 
-int intel_ring_pin(struct intel_ring *ring);
+void __intel_ring_pin(struct intel_ring *ring);
+int intel_ring_pin(struct intel_ring *ring, struct i915_gem_ww_ctx *ww);
 void intel_ring_unpin(struct intel_ring *ring);
 void intel_ring_reset(struct intel_ring *ring, u32 tail);
 
diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
index 6914abf3a88d..4201b9841fd7 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
@@ -474,14 +474,16 @@ static void ring_context_destroy(struct kref *ref)
 	intel_context_free(ce);
 }
 
-static int __context_pin_ppgtt(struct intel_context *ce)
+static int ring_context_pre_pin(struct intel_context *ce,
+				struct i915_gem_ww_ctx *ww,
+				void **unused)
 {
 	struct i915_address_space *vm;
 	int err = 0;
 
 	vm = vm_alias(ce->vm);
 	if (vm)
-		err = gen6_ppgtt_pin(i915_vm_to_ppgtt((vm)));
+		err = gen6_ppgtt_pin(i915_vm_to_ppgtt((vm)), ww);
 
 	return err;
 }
@@ -588,11 +590,6 @@ static int ring_context_alloc(struct intel_context *ce)
 	return 0;
 }
 
-static int ring_context_pre_pin(struct intel_context *ce, void **unused)
-{
-	return __context_pin_ppgtt(ce);
-}
-
 static int ring_context_pin(struct intel_context *ce, void *unused)
 {
 	return 0;
@@ -1268,7 +1265,7 @@ int intel_ring_submission_setup(struct intel_engine_cs *engine)
 	}
 	GEM_BUG_ON(timeline->has_initial_breadcrumb);
 
-	err = intel_timeline_pin(timeline);
+	err = intel_timeline_pin(timeline, NULL);
 	if (err)
 		goto err_timeline;
 
@@ -1278,7 +1275,7 @@ int intel_ring_submission_setup(struct intel_engine_cs *engine)
 		goto err_timeline_unpin;
 	}
 
-	err = intel_ring_pin(ring);
+	err = intel_ring_pin(ring, NULL);
 	if (err)
 		goto err_ring;
 
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c
index 4546284fede1..e53f958bb819 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline.c
+++ b/drivers/gpu/drm/i915/gt/intel_timeline.c
@@ -313,14 +313,20 @@ intel_timeline_create(struct intel_gt *gt, struct i915_vma *global_hwsp)
 	return timeline;
 }
 
-int intel_timeline_pin(struct intel_timeline *tl)
+void __intel_timeline_pin(struct intel_timeline *tl)
+{
+	GEM_BUG_ON(!atomic_read(&tl->pin_count));
+	atomic_inc(&tl->pin_count);
+}
+
+int intel_timeline_pin(struct intel_timeline *tl, struct i915_gem_ww_ctx *ww)
 {
 	int err;
 
 	if (atomic_add_unless(&tl->pin_count, 1, 0))
 		return 0;
 
-	err = i915_ggtt_pin(tl->hwsp_ggtt, 0, PIN_HIGH);
+	err = i915_ggtt_pin(tl->hwsp_ggtt, ww, 0, PIN_HIGH);
 	if (err)
 		return err;
 
@@ -460,7 +466,7 @@ __intel_timeline_get_seqno(struct intel_timeline *tl,
 		goto err_rollback;
 	}
 
-	err = i915_ggtt_pin(vma, 0, PIN_HIGH);
+	err = i915_ggtt_pin(vma, NULL, 0, PIN_HIGH);
 	if (err) {
 		__idle_hwsp_free(vma->private, cacheline);
 		goto err_rollback;
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.h b/drivers/gpu/drm/i915/gt/intel_timeline.h
index 4298b9ac7327..ff293dfdbc3b 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline.h
+++ b/drivers/gpu/drm/i915/gt/intel_timeline.h
@@ -71,7 +71,8 @@ static inline bool intel_timeline_sync_is_later(struct intel_timeline *tl,
 	return __intel_timeline_sync_is_later(tl, fence->context, fence->seqno);
 }
 
-int intel_timeline_pin(struct intel_timeline *tl);
+void __intel_timeline_pin(struct intel_timeline *tl);
+int intel_timeline_pin(struct intel_timeline *tl, struct i915_gem_ww_ctx *ww);
 void intel_timeline_enter(struct intel_timeline *tl);
 int intel_timeline_get_seqno(struct intel_timeline *tl,
 			     struct i915_request *rq,
diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c
index 62664601e683..f349cb9115ce 100644
--- a/drivers/gpu/drm/i915/gt/mock_engine.c
+++ b/drivers/gpu/drm/i915/gt/mock_engine.c
@@ -168,7 +168,8 @@ static int mock_context_alloc(struct intel_context *ce)
 	return 0;
 }
 
-static int mock_context_pre_pin(struct intel_context *ce, void **unused)
+static int mock_context_pre_pin(struct intel_context *ce,
+				struct i915_gem_ww_ctx *ww, void **unused)
 {
 	return 0;
 }
diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index daa4aabab9a7..19ae1e6ba976 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -3090,7 +3090,7 @@ static struct i915_vma *create_global(struct intel_gt *gt, size_t sz)
 		return vma;
 	}
 
-	err = i915_ggtt_pin(vma, 0, 0);
+	err = i915_ggtt_pin(vma, NULL, 0, 0);
 	if (err) {
 		i915_vma_put(vma);
 		return ERR_PTR(err);
diff --git a/drivers/gpu/drm/i915/gt/selftest_timeline.c b/drivers/gpu/drm/i915/gt/selftest_timeline.c
index fcdee951579b..efeb354c81ad 100644
--- a/drivers/gpu/drm/i915/gt/selftest_timeline.c
+++ b/drivers/gpu/drm/i915/gt/selftest_timeline.c
@@ -455,7 +455,7 @@ tl_write(struct intel_timeline *tl, struct intel_engine_cs *engine, u32 value)
 	struct i915_request *rq;
 	int err;
 
-	err = intel_timeline_pin(tl);
+	err = intel_timeline_pin(tl, NULL);
 	if (err) {
 		rq = ERR_PTR(err);
 		goto out;
@@ -665,7 +665,7 @@ static int live_hwsp_wrap(void *arg)
 	if (!tl->has_initial_breadcrumb || !tl->hwsp_cacheline)
 		goto out_free;
 
-	err = intel_timeline_pin(tl);
+	err = intel_timeline_pin(tl, NULL);
 	if (err)
 		goto out_free;
 
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
index 861657897c0f..942c7c187adb 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
@@ -677,7 +677,7 @@ struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size)
 		goto err;
 
 	flags = PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma);
-	ret = i915_ggtt_pin(vma, 0, flags);
+	ret = i915_ggtt_pin(vma, NULL, 0, flags);
 	if (ret) {
 		vma = ERR_PTR(ret);
 		goto err;
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 5649f8e502fe..e99037506a14 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1773,11 +1773,18 @@ static inline void i915_gem_drain_workqueue(struct drm_i915_private *i915)
 }
 
 struct i915_vma * __must_check
+i915_gem_object_ggtt_pin_ww(struct drm_i915_gem_object *obj,
+			    struct i915_gem_ww_ctx *ww,
+			    const struct i915_ggtt_view *view,
+			    u64 size, u64 alignment, u64 flags);
+
+static inline struct i915_vma * __must_check
 i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
 			 const struct i915_ggtt_view *view,
-			 u64 size,
-			 u64 alignment,
-			 u64 flags);
+			 u64 size, u64 alignment, u64 flags)
+{
+	return i915_gem_object_ggtt_pin_ww(obj, NULL, view, size, alignment, flags);
+}
 
 int i915_gem_object_unbind(struct drm_i915_gem_object *obj,
 			   unsigned long flags);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 20653b660b61..625b4fc8842f 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -962,11 +962,10 @@ static void discard_ggtt_vma(struct i915_vma *vma)
 }
 
 struct i915_vma *
-i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
-			 const struct i915_ggtt_view *view,
-			 u64 size,
-			 u64 alignment,
-			 u64 flags)
+i915_gem_object_ggtt_pin_ww(struct drm_i915_gem_object *obj,
+			    struct i915_gem_ww_ctx *ww,
+			    const struct i915_ggtt_view *view,
+			    u64 size, u64 alignment, u64 flags)
 {
 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
 	struct i915_ggtt *ggtt = &i915->ggtt;
@@ -1032,7 +1031,7 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
 			return ERR_PTR(ret);
 	}
 
-	ret = i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL);
+	ret = i915_vma_pin_ww(vma, ww, size, alignment, flags | PIN_GLOBAL);
 	if (ret)
 		return ERR_PTR(ret);
 
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 1f63c4a1f055..d540bf45bdb0 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -856,13 +856,19 @@ static void vma_unbind_pages(struct i915_vma *vma)
 	__vma_put_pages(vma, count | count << I915_VMA_PAGES_BIAS);
 }
 
-int i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
+int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
+		    u64 size, u64 alignment, u64 flags)
 {
 	struct i915_vma_work *work = NULL;
 	intel_wakeref_t wakeref = 0;
 	unsigned int bound;
 	int err;
 
+#ifdef CONFIG_PROVE_LOCKING
+	if (debug_locks && lockdep_is_held(&vma->vm->i915->drm.struct_mutex))
+		WARN_ON(!ww);
+#endif
+
 	BUILD_BUG_ON(PIN_GLOBAL != I915_VMA_GLOBAL_BIND);
 	BUILD_BUG_ON(PIN_USER != I915_VMA_LOCAL_BIND);
 
@@ -992,7 +998,8 @@ static void flush_idle_contexts(struct intel_gt *gt)
 	intel_gt_wait_for_idle(gt, MAX_SCHEDULE_TIMEOUT);
 }
 
-int i915_ggtt_pin(struct i915_vma *vma, u32 align, unsigned int flags)
+int i915_ggtt_pin(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
+		  u32 align, unsigned int flags)
 {
 	struct i915_address_space *vm = vma->vm;
 	int err;
@@ -1000,7 +1007,7 @@ int i915_ggtt_pin(struct i915_vma *vma, u32 align, unsigned int flags)
 	GEM_BUG_ON(!i915_vma_is_ggtt(vma));
 
 	do {
-		err = i915_vma_pin(vma, 0, align, flags | PIN_GLOBAL);
+		err = i915_vma_pin_ww(vma, ww, 0, align, flags | PIN_GLOBAL);
 		if (err != -ENOSPC) {
 			if (!err) {
 				err = i915_vma_wait_for_bind(vma);
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index d0d01f909548..5b3a3c653454 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -237,8 +237,17 @@ static inline void i915_vma_unlock(struct i915_vma *vma)
 }
 
 int __must_check
-i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags);
-int i915_ggtt_pin(struct i915_vma *vma, u32 align, unsigned int flags);
+i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
+		u64 size, u64 alignment, u64 flags);
+
+static inline int __must_check
+i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
+{
+	return i915_vma_pin_ww(vma, NULL, size, alignment, flags);
+}
+
+int i915_ggtt_pin(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
+		  u32 align, unsigned int flags);
 
 static inline int i915_vma_pin_count(const struct i915_vma *vma)
 {
-- 
2.27.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 61+ messages in thread

* [Intel-gfx] [PATCH 16/26] drm/i915: Convert i915_gem_object/client_blt.c to use ww locking as well, v2.
  2020-06-23 14:28 [Intel-gfx] [PATCH 01/26] Revert "drm/i915/gem: Async GPU relocations only" Maarten Lankhorst
                   ` (13 preceding siblings ...)
  2020-06-23 14:28 ` [Intel-gfx] [PATCH 15/26] drm/i915: Make sure execbuffer always passes ww state to i915_vma_pin Maarten Lankhorst
@ 2020-06-23 14:28 ` Maarten Lankhorst
  2020-06-23 14:28 ` [Intel-gfx] [PATCH 17/26] drm/i915: Kill last user of intel_context_create_request outside of selftests Maarten Lankhorst
                   ` (15 subsequent siblings)
  30 siblings, 0 replies; 61+ messages in thread
From: Maarten Lankhorst @ 2020-06-23 14:28 UTC (permalink / raw)
  To: intel-gfx

This is the last part outside of selftests that still don't use the
correct lock ordering of timeline->mutex vs resv_lock.

With gem fixed, there are a few places that still get locking wrong:
- gvt/scheduler.c
- i915_perf.c
- Most if not all selftests.

Changes since v1:
- Add intel_engine_pm_get/put() calls to fix use-after-free when using
  intel_engine_get_pool().

Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
---
 .../gpu/drm/i915/gem/i915_gem_client_blt.c    |  78 +++++++--
 .../gpu/drm/i915/gem/i915_gem_object_blt.c    | 152 ++++++++++++------
 .../gpu/drm/i915/gem/i915_gem_object_blt.h    |   3 +
 3 files changed, 163 insertions(+), 70 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c
index c182091c00ff..c141d7ce8a75 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c
@@ -156,6 +156,7 @@ static void clear_pages_worker(struct work_struct *work)
 	struct clear_pages_work *w = container_of(work, typeof(*w), work);
 	struct drm_i915_gem_object *obj = w->sleeve->vma->obj;
 	struct i915_vma *vma = w->sleeve->vma;
+	struct i915_gem_ww_ctx ww;
 	struct i915_request *rq;
 	struct i915_vma *batch;
 	int err = w->dma.error;
@@ -171,17 +172,20 @@ static void clear_pages_worker(struct work_struct *work)
 	obj->read_domains = I915_GEM_GPU_DOMAINS;
 	obj->write_domain = 0;
 
-	err = i915_vma_pin(vma, 0, 0, PIN_USER);
-	if (unlikely(err))
+	i915_gem_ww_ctx_init(&ww, false);
+	intel_engine_pm_get(w->ce->engine);
+retry:
+	err = intel_context_pin_ww(w->ce, &ww);
+	if (err)
 		goto out_signal;
 
-	batch = intel_emit_vma_fill_blt(w->ce, vma, w->value);
+	batch = intel_emit_vma_fill_blt(w->ce, vma, &ww, w->value);
 	if (IS_ERR(batch)) {
 		err = PTR_ERR(batch);
-		goto out_unpin;
+		goto out_ctx;
 	}
 
-	rq = intel_context_create_request(w->ce);
+	rq = i915_request_create(w->ce);
 	if (IS_ERR(rq)) {
 		err = PTR_ERR(rq);
 		goto out_batch;
@@ -223,9 +227,19 @@ static void clear_pages_worker(struct work_struct *work)
 	i915_request_add(rq);
 out_batch:
 	intel_emit_vma_release(w->ce, batch);
-out_unpin:
-	i915_vma_unpin(vma);
+out_ctx:
+	intel_context_unpin(w->ce);
 out_signal:
+	if (err == -EDEADLK) {
+		err = i915_gem_ww_ctx_backoff(&ww);
+		if (!err)
+			goto retry;
+	}
+	i915_gem_ww_ctx_fini(&ww);
+
+	i915_vma_unpin(w->sleeve->vma);
+	intel_engine_pm_put(w->ce->engine);
+
 	if (unlikely(err)) {
 		dma_fence_set_error(&w->dma, err);
 		dma_fence_signal(&w->dma);
@@ -233,6 +247,44 @@ static void clear_pages_worker(struct work_struct *work)
 	}
 }
 
+static int pin_wait_clear_pages_work(struct clear_pages_work *w,
+				     struct intel_context *ce)
+{
+	struct i915_vma *vma = w->sleeve->vma;
+	struct i915_gem_ww_ctx ww;
+	int err;
+
+	i915_gem_ww_ctx_init(&ww, false);
+retry:
+	err = i915_gem_object_lock(vma->obj, &ww);
+	if (err)
+		goto out;
+
+	err = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_USER);
+	if (unlikely(err))
+		goto out;
+
+	err = i915_sw_fence_await_reservation(&w->wait,
+					      vma->obj->base.resv, NULL,
+					      true, 0, I915_FENCE_GFP);
+	if (err)
+		goto err_unpin_vma;
+
+	dma_resv_add_excl_fence(vma->obj->base.resv, &w->dma);
+
+err_unpin_vma:
+	if (err)
+		i915_vma_unpin(vma);
+out:
+	if (err == -EDEADLK) {
+		err = i915_gem_ww_ctx_backoff(&ww);
+		if (!err)
+			goto retry;
+	}
+	i915_gem_ww_ctx_fini(&ww);
+	return err;
+}
+
 static int __i915_sw_fence_call
 clear_pages_work_notify(struct i915_sw_fence *fence,
 			enum i915_sw_fence_notify state)
@@ -286,17 +338,9 @@ int i915_gem_schedule_fill_pages_blt(struct drm_i915_gem_object *obj,
 	dma_fence_init(&work->dma, &clear_pages_work_ops, &fence_lock, 0, 0);
 	i915_sw_fence_init(&work->wait, clear_pages_work_notify);
 
-	i915_gem_object_lock(obj, NULL);
-	err = i915_sw_fence_await_reservation(&work->wait,
-					      obj->base.resv, NULL, true, 0,
-					      I915_FENCE_GFP);
-	if (err < 0) {
+	err = pin_wait_clear_pages_work(work, ce);
+	if (err < 0)
 		dma_fence_set_error(&work->dma, err);
-	} else {
-		dma_resv_add_excl_fence(obj->base.resv, &work->dma);
-		err = 0;
-	}
-	i915_gem_object_unlock(obj);
 
 	dma_fence_get(&work->dma);
 	i915_sw_fence_commit(&work->wait);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c
index bfdb32d46877..d93eb36160c9 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c
@@ -14,6 +14,7 @@
 
 struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce,
 					 struct i915_vma *vma,
+					 struct i915_gem_ww_ctx *ww,
 					 u32 value)
 {
 	struct drm_i915_private *i915 = ce->vm->i915;
@@ -39,10 +40,24 @@ struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce,
 		goto out_pm;
 	}
 
+	err = i915_gem_object_lock(pool->obj, ww);
+	if (err)
+		goto out_put;
+
+	batch = i915_vma_instance(pool->obj, ce->vm, NULL);
+	if (IS_ERR(batch)) {
+		err = PTR_ERR(batch);
+		goto out_put;
+	}
+
+	err = i915_vma_pin_ww(batch, ww, 0, 0, PIN_USER);
+	if (unlikely(err))
+		goto out_put;
+
 	cmd = i915_gem_object_pin_map(pool->obj, I915_MAP_WC);
 	if (IS_ERR(cmd)) {
 		err = PTR_ERR(cmd);
-		goto out_put;
+		goto out_unpin;
 	}
 
 	rem = vma->size;
@@ -84,19 +99,11 @@ struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce,
 
 	intel_gt_chipset_flush(ce->vm->gt);
 
-	batch = i915_vma_instance(pool->obj, ce->vm, NULL);
-	if (IS_ERR(batch)) {
-		err = PTR_ERR(batch);
-		goto out_put;
-	}
-
-	err = i915_vma_pin(batch, 0, 0, PIN_USER);
-	if (unlikely(err))
-		goto out_put;
-
 	batch->private = pool;
 	return batch;
 
+out_unpin:
+	i915_vma_unpin(batch);
 out_put:
 	intel_gt_buffer_pool_put(pool);
 out_pm:
@@ -108,11 +115,9 @@ int intel_emit_vma_mark_active(struct i915_vma *vma, struct i915_request *rq)
 {
 	int err;
 
-	i915_vma_lock(vma);
 	err = i915_request_await_object(rq, vma->obj, false);
 	if (err == 0)
 		err = i915_vma_move_to_active(vma, rq, 0);
-	i915_vma_unlock(vma);
 	if (unlikely(err))
 		return err;
 
@@ -141,6 +146,7 @@ int i915_gem_object_fill_blt(struct drm_i915_gem_object *obj,
 			     struct intel_context *ce,
 			     u32 value)
 {
+	struct i915_gem_ww_ctx ww;
 	struct i915_request *rq;
 	struct i915_vma *batch;
 	struct i915_vma *vma;
@@ -150,17 +156,28 @@ int i915_gem_object_fill_blt(struct drm_i915_gem_object *obj,
 	if (IS_ERR(vma))
 		return PTR_ERR(vma);
 
-	err = i915_vma_pin(vma, 0, 0, PIN_USER);
-	if (unlikely(err))
-		return err;
+	i915_gem_ww_ctx_init(&ww, true);
+	intel_engine_pm_get(ce->engine);
+retry:
+	err = i915_gem_object_lock(obj, &ww);
+	if (err)
+		goto out;
 
-	batch = intel_emit_vma_fill_blt(ce, vma, value);
+	err = intel_context_pin_ww(ce, &ww);
+	if (err)
+		goto out;
+
+	err = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_USER);
+	if (err)
+		goto out_ctx;
+
+	batch = intel_emit_vma_fill_blt(ce, vma, &ww, value);
 	if (IS_ERR(batch)) {
 		err = PTR_ERR(batch);
-		goto out_unpin;
+		goto out_vma;
 	}
 
-	rq = intel_context_create_request(ce);
+	rq = i915_request_create(ce);
 	if (IS_ERR(rq)) {
 		err = PTR_ERR(rq);
 		goto out_batch;
@@ -170,11 +187,9 @@ int i915_gem_object_fill_blt(struct drm_i915_gem_object *obj,
 	if (unlikely(err))
 		goto out_request;
 
-	i915_vma_lock(vma);
 	err = move_obj_to_gpu(vma->obj, rq, true);
 	if (err == 0)
 		err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
-	i915_vma_unlock(vma);
 	if (unlikely(err))
 		goto out_request;
 
@@ -193,8 +208,18 @@ int i915_gem_object_fill_blt(struct drm_i915_gem_object *obj,
 	i915_request_add(rq);
 out_batch:
 	intel_emit_vma_release(ce, batch);
-out_unpin:
+out_vma:
 	i915_vma_unpin(vma);
+out_ctx:
+	intel_context_unpin(ce);
+out:
+	if (err == -EDEADLK) {
+		err = i915_gem_ww_ctx_backoff(&ww);
+		if (!err)
+			goto retry;
+	}
+	i915_gem_ww_ctx_fini(&ww);
+	intel_engine_pm_put(ce->engine);
 	return err;
 }
 
@@ -210,6 +235,7 @@ static bool wa_1209644611_applies(struct drm_i915_private *i915, u32 size)
 }
 
 struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce,
+					 struct i915_gem_ww_ctx *ww,
 					 struct i915_vma *src,
 					 struct i915_vma *dst)
 {
@@ -236,10 +262,24 @@ struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce,
 		goto out_pm;
 	}
 
+	err = i915_gem_object_lock(pool->obj, ww);
+	if (err)
+		goto out_put;
+
+	batch = i915_vma_instance(pool->obj, ce->vm, NULL);
+	if (IS_ERR(batch)) {
+		err = PTR_ERR(batch);
+		goto out_put;
+	}
+
+	err = i915_vma_pin_ww(batch, ww, 0, 0, PIN_USER);
+	if (unlikely(err))
+		goto out_put;
+
 	cmd = i915_gem_object_pin_map(pool->obj, I915_MAP_WC);
 	if (IS_ERR(cmd)) {
 		err = PTR_ERR(cmd);
-		goto out_put;
+		goto out_unpin;
 	}
 
 	rem = src->size;
@@ -296,20 +336,11 @@ struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce,
 	i915_gem_object_unpin_map(pool->obj);
 
 	intel_gt_chipset_flush(ce->vm->gt);
-
-	batch = i915_vma_instance(pool->obj, ce->vm, NULL);
-	if (IS_ERR(batch)) {
-		err = PTR_ERR(batch);
-		goto out_put;
-	}
-
-	err = i915_vma_pin(batch, 0, 0, PIN_USER);
-	if (unlikely(err))
-		goto out_put;
-
 	batch->private = pool;
 	return batch;
 
+out_unpin:
+	i915_vma_unpin(batch);
 out_put:
 	intel_gt_buffer_pool_put(pool);
 out_pm:
@@ -321,10 +352,9 @@ int i915_gem_object_copy_blt(struct drm_i915_gem_object *src,
 			     struct drm_i915_gem_object *dst,
 			     struct intel_context *ce)
 {
-	struct drm_gem_object *objs[] = { &src->base, &dst->base };
 	struct i915_address_space *vm = ce->vm;
 	struct i915_vma *vma[2], *batch;
-	struct ww_acquire_ctx acquire;
+	struct i915_gem_ww_ctx ww;
 	struct i915_request *rq;
 	int err, i;
 
@@ -332,25 +362,36 @@ int i915_gem_object_copy_blt(struct drm_i915_gem_object *src,
 	if (IS_ERR(vma[0]))
 		return PTR_ERR(vma[0]);
 
-	err = i915_vma_pin(vma[0], 0, 0, PIN_USER);
-	if (unlikely(err))
-		return err;
-
 	vma[1] = i915_vma_instance(dst, vm, NULL);
 	if (IS_ERR(vma[1]))
-		goto out_unpin_src;
+		return PTR_ERR(vma);
 
-	err = i915_vma_pin(vma[1], 0, 0, PIN_USER);
+	i915_gem_ww_ctx_init(&ww, true);
+	intel_engine_pm_get(ce->engine);
+retry:
+	err = i915_gem_object_lock(src, &ww);
+	if (!err)
+		err = i915_gem_object_lock(dst, &ww);
+	if (!err)
+		err = intel_context_pin_ww(ce, &ww);
+	if (err)
+		goto out;
+
+	err = i915_vma_pin_ww(vma[0], &ww, 0, 0, PIN_USER);
+	if (err)
+		goto out_ctx;
+
+	err = i915_vma_pin_ww(vma[1], &ww, 0, 0, PIN_USER);
 	if (unlikely(err))
 		goto out_unpin_src;
 
-	batch = intel_emit_vma_copy_blt(ce, vma[0], vma[1]);
+	batch = intel_emit_vma_copy_blt(ce, &ww, vma[0], vma[1]);
 	if (IS_ERR(batch)) {
 		err = PTR_ERR(batch);
 		goto out_unpin_dst;
 	}
 
-	rq = intel_context_create_request(ce);
+	rq = i915_request_create(ce);
 	if (IS_ERR(rq)) {
 		err = PTR_ERR(rq);
 		goto out_batch;
@@ -360,14 +401,10 @@ int i915_gem_object_copy_blt(struct drm_i915_gem_object *src,
 	if (unlikely(err))
 		goto out_request;
 
-	err = drm_gem_lock_reservations(objs, ARRAY_SIZE(objs), &acquire);
-	if (unlikely(err))
-		goto out_request;
-
 	for (i = 0; i < ARRAY_SIZE(vma); i++) {
 		err = move_obj_to_gpu(vma[i]->obj, rq, i);
 		if (unlikely(err))
-			goto out_unlock;
+			goto out_request;
 	}
 
 	for (i = 0; i < ARRAY_SIZE(vma); i++) {
@@ -375,20 +412,19 @@ int i915_gem_object_copy_blt(struct drm_i915_gem_object *src,
 
 		err = i915_vma_move_to_active(vma[i], rq, flags);
 		if (unlikely(err))
-			goto out_unlock;
+			goto out_request;
 	}
 
 	if (rq->engine->emit_init_breadcrumb) {
 		err = rq->engine->emit_init_breadcrumb(rq);
 		if (unlikely(err))
-			goto out_unlock;
+			goto out_request;
 	}
 
 	err = rq->engine->emit_bb_start(rq,
 					batch->node.start, batch->node.size,
 					0);
-out_unlock:
-	drm_gem_unlock_reservations(objs, ARRAY_SIZE(objs), &acquire);
+
 out_request:
 	if (unlikely(err))
 		i915_request_set_error_once(rq, err);
@@ -400,6 +436,16 @@ int i915_gem_object_copy_blt(struct drm_i915_gem_object *src,
 	i915_vma_unpin(vma[1]);
 out_unpin_src:
 	i915_vma_unpin(vma[0]);
+out_ctx:
+	intel_context_unpin(ce);
+out:
+	if (err == -EDEADLK) {
+		err = i915_gem_ww_ctx_backoff(&ww);
+		if (!err)
+			goto retry;
+	}
+	i915_gem_ww_ctx_fini(&ww);
+	intel_engine_pm_put(ce->engine);
 	return err;
 }
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.h b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.h
index 8bcd336a90dc..2409fdcccf0e 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.h
@@ -13,12 +13,15 @@
 #include "i915_vma.h"
 
 struct drm_i915_gem_object;
+struct i915_gem_ww_ctx;
 
 struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce,
 					 struct i915_vma *vma,
+					 struct i915_gem_ww_ctx *ww,
 					 u32 value);
 
 struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce,
+					 struct i915_gem_ww_ctx *ww,
 					 struct i915_vma *src,
 					 struct i915_vma *dst);
 
-- 
2.27.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 61+ messages in thread

* [Intel-gfx] [PATCH 17/26] drm/i915: Kill last user of intel_context_create_request outside of selftests
  2020-06-23 14:28 [Intel-gfx] [PATCH 01/26] Revert "drm/i915/gem: Async GPU relocations only" Maarten Lankhorst
                   ` (14 preceding siblings ...)
  2020-06-23 14:28 ` [Intel-gfx] [PATCH 16/26] drm/i915: Convert i915_gem_object/client_blt.c to use ww locking as well, v2 Maarten Lankhorst
@ 2020-06-23 14:28 ` Maarten Lankhorst
  2020-06-23 14:28 ` [Intel-gfx] [PATCH 18/26] drm/i915: Convert i915_perf to ww locking as well Maarten Lankhorst
                   ` (14 subsequent siblings)
  30 siblings, 0 replies; 61+ messages in thread
From: Maarten Lankhorst @ 2020-06-23 14:28 UTC (permalink / raw)
  To: intel-gfx

Instead of using intel_context_create_request(), use intel_context_pin()
and i915_create_request directly.

Now all those calls are gone outside of selftests. :)

Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
---
 drivers/gpu/drm/i915/gt/intel_workarounds.c | 43 ++++++++++++++-------
 1 file changed, 29 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index 2da366821dda..705f627f7f47 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -2042,6 +2042,7 @@ static int engine_wa_list_verify(struct intel_context *ce,
 	const struct i915_wa *wa;
 	struct i915_request *rq;
 	struct i915_vma *vma;
+	struct i915_gem_ww_ctx ww;
 	unsigned int i;
 	u32 *results;
 	int err;
@@ -2054,29 +2055,34 @@ static int engine_wa_list_verify(struct intel_context *ce,
 		return PTR_ERR(vma);
 
 	intel_engine_pm_get(ce->engine);
-	rq = intel_context_create_request(ce);
-	intel_engine_pm_put(ce->engine);
+	i915_gem_ww_ctx_init(&ww, false);
+retry:
+	err = i915_gem_object_lock(vma->obj, &ww);
+	if (err == 0)
+		err = intel_context_pin_ww(ce, &ww);
+	if (err)
+		goto err_pm;
+
+	rq = i915_request_create(ce);
 	if (IS_ERR(rq)) {
 		err = PTR_ERR(rq);
-		goto err_vma;
+		goto err_unpin;
 	}
 
-	i915_vma_lock(vma);
 	err = i915_request_await_object(rq, vma->obj, true);
 	if (err == 0)
 		err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
-	i915_vma_unlock(vma);
-	if (err) {
-		i915_request_add(rq);
-		goto err_vma;
-	}
-
-	err = wa_list_srm(rq, wal, vma);
-	if (err)
-		goto err_vma;
+	if (err == 0)
+		err = wa_list_srm(rq, wal, vma);
 
 	i915_request_get(rq);
+	if (err)
+		i915_request_set_error_once(rq, err);
 	i915_request_add(rq);
+
+	if (err)
+		goto err_rq;
+
 	if (i915_request_wait(rq, 0, HZ / 5) < 0) {
 		err = -ETIME;
 		goto err_rq;
@@ -2101,7 +2107,16 @@ static int engine_wa_list_verify(struct intel_context *ce,
 
 err_rq:
 	i915_request_put(rq);
-err_vma:
+err_unpin:
+	intel_context_unpin(ce);
+err_pm:
+	if (err == -EDEADLK) {
+		err = i915_gem_ww_ctx_backoff(&ww);
+		if (!err)
+			goto retry;
+	}
+	i915_gem_ww_ctx_fini(&ww);
+	intel_engine_pm_put(ce->engine);
 	i915_vma_unpin(vma);
 	i915_vma_put(vma);
 	return err;
-- 
2.27.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 61+ messages in thread

* [Intel-gfx] [PATCH 18/26] drm/i915: Convert i915_perf to ww locking as well
  2020-06-23 14:28 [Intel-gfx] [PATCH 01/26] Revert "drm/i915/gem: Async GPU relocations only" Maarten Lankhorst
                   ` (15 preceding siblings ...)
  2020-06-23 14:28 ` [Intel-gfx] [PATCH 17/26] drm/i915: Kill last user of intel_context_create_request outside of selftests Maarten Lankhorst
@ 2020-06-23 14:28 ` Maarten Lankhorst
  2020-06-23 14:28 ` [Intel-gfx] [PATCH 19/26] drm/i915: Dirty hack to fix selftests locking inversion Maarten Lankhorst
                   ` (13 subsequent siblings)
  30 siblings, 0 replies; 61+ messages in thread
From: Maarten Lankhorst @ 2020-06-23 14:28 UTC (permalink / raw)
  To: intel-gfx

We have the ordering of timeline->mutex vs resv_lock wrong,
convert the i915_pin_vma and intel_context_pin as well to
future-proof this.

We may need to do future changes to do this more transaction-like,
and only get down to a single i915_gem_ww_ctx, but for now this
should work.

Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_perf.c | 57 +++++++++++++++++++++++---------
 1 file changed, 42 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 25329b7600c9..71a77951467a 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -1195,24 +1195,39 @@ static struct intel_context *oa_pin_context(struct i915_perf_stream *stream)
 	struct i915_gem_engines_iter it;
 	struct i915_gem_context *ctx = stream->ctx;
 	struct intel_context *ce;
-	int err;
+	struct i915_gem_ww_ctx ww;
+	int err = -ENODEV;
 
 	for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) {
 		if (ce->engine != stream->engine) /* first match! */
 			continue;
 
-		/*
-		 * As the ID is the gtt offset of the context's vma we
-		 * pin the vma to ensure the ID remains fixed.
-		 */
-		err = intel_context_pin(ce);
-		if (err == 0) {
-			stream->pinned_ctx = ce;
-			break;
-		}
+		err = 0;
+		break;
 	}
 	i915_gem_context_unlock_engines(ctx);
 
+	if (err)
+		return ERR_PTR(err);
+
+	i915_gem_ww_ctx_init(&ww, true);
+retry:
+	/*
+	 * As the ID is the gtt offset of the context's vma we
+	 * pin the vma to ensure the ID remains fixed.
+	 */
+	err = intel_context_pin_ww(ce, &ww);
+	if (err == -EDEADLK) {
+		err = i915_gem_ww_ctx_backoff(&ww);
+		if (!err)
+			goto retry;
+	}
+	i915_gem_ww_ctx_fini(&ww);
+
+	if (err)
+		return ERR_PTR(err);
+
+	stream->pinned_ctx = ce;
 	return stream->pinned_ctx;
 }
 
@@ -1922,15 +1937,22 @@ emit_oa_config(struct i915_perf_stream *stream,
 {
 	struct i915_request *rq;
 	struct i915_vma *vma;
+	struct i915_gem_ww_ctx ww;
 	int err;
 
 	vma = get_oa_vma(stream, oa_config);
 	if (IS_ERR(vma))
 		return PTR_ERR(vma);
 
-	err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
+	i915_gem_ww_ctx_init(&ww, true);
+retry:
+	err = i915_gem_object_lock(vma->obj, &ww);
+	if (err)
+		goto err;
+
+	err = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_GLOBAL | PIN_HIGH);
 	if (err)
-		goto err_vma_put;
+		goto err;
 
 	intel_engine_pm_get(ce->engine);
 	rq = i915_request_create(ce);
@@ -1952,11 +1974,9 @@ emit_oa_config(struct i915_perf_stream *stream,
 			goto err_add_request;
 	}
 
-	i915_vma_lock(vma);
 	err = i915_request_await_object(rq, vma->obj, 0);
 	if (!err)
 		err = i915_vma_move_to_active(vma, rq, 0);
-	i915_vma_unlock(vma);
 	if (err)
 		goto err_add_request;
 
@@ -1970,7 +1990,14 @@ emit_oa_config(struct i915_perf_stream *stream,
 	i915_request_add(rq);
 err_vma_unpin:
 	i915_vma_unpin(vma);
-err_vma_put:
+err:
+	if (err == -EDEADLK) {
+		err = i915_gem_ww_ctx_backoff(&ww);
+		if (!err)
+			goto retry;
+	}
+
+	i915_gem_ww_ctx_fini(&ww);
 	i915_vma_put(vma);
 	return err;
 }
-- 
2.27.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 61+ messages in thread

* [Intel-gfx] [PATCH 19/26] drm/i915: Dirty hack to fix selftests locking inversion
  2020-06-23 14:28 [Intel-gfx] [PATCH 01/26] Revert "drm/i915/gem: Async GPU relocations only" Maarten Lankhorst
                   ` (16 preceding siblings ...)
  2020-06-23 14:28 ` [Intel-gfx] [PATCH 18/26] drm/i915: Convert i915_perf to ww locking as well Maarten Lankhorst
@ 2020-06-23 14:28 ` Maarten Lankhorst
  2020-06-23 14:28 ` [Intel-gfx] [PATCH 20/26] drm/i915/selftests: Fix locking inversion in lrc selftest Maarten Lankhorst
                   ` (12 subsequent siblings)
  30 siblings, 0 replies; 61+ messages in thread
From: Maarten Lankhorst @ 2020-06-23 14:28 UTC (permalink / raw)
  To: intel-gfx

Some i915 selftests still use i915_vma_lock() as inner lock, and
intel_context_create_request() intel_timeline->mutex as outer lock.
Fortunately for selftests this is not an issue, they should be fixed
but we can move ahead and cleanify lockdep now.

Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
---
 drivers/gpu/drm/i915/gt/intel_context.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index 64948386630f..fe9fff5a63b1 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -459,6 +459,18 @@ struct i915_request *intel_context_create_request(struct intel_context *ce)
 	rq = i915_request_create(ce);
 	intel_context_unpin(ce);
 
+	if (IS_ERR(rq))
+		return rq;
+
+	/*
+	 * timeline->mutex should be the inner lock, but is used as outer lock.
+	 * Hack around this to shut up lockdep in selftests..
+	 */
+	lockdep_unpin_lock(&ce->timeline->mutex, rq->cookie);
+	mutex_release(&ce->timeline->mutex.dep_map, _RET_IP_);
+	mutex_acquire(&ce->timeline->mutex.dep_map, SINGLE_DEPTH_NESTING, 0, _RET_IP_);
+	rq->cookie = lockdep_pin_lock(&ce->timeline->mutex);
+
 	return rq;
 }
 
-- 
2.27.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 61+ messages in thread

* [Intel-gfx] [PATCH 20/26] drm/i915/selftests: Fix locking inversion in lrc selftest.
  2020-06-23 14:28 [Intel-gfx] [PATCH 01/26] Revert "drm/i915/gem: Async GPU relocations only" Maarten Lankhorst
                   ` (17 preceding siblings ...)
  2020-06-23 14:28 ` [Intel-gfx] [PATCH 19/26] drm/i915: Dirty hack to fix selftests locking inversion Maarten Lankhorst
@ 2020-06-23 14:28 ` Maarten Lankhorst
  2020-06-23 14:28 ` [Intel-gfx] [PATCH 21/26] drm/i915: Use ww pinning for intel_context_create_request() Maarten Lankhorst
                   ` (11 subsequent siblings)
  30 siblings, 0 replies; 61+ messages in thread
From: Maarten Lankhorst @ 2020-06-23 14:28 UTC (permalink / raw)
  To: intel-gfx

This function does not use intel_context_create_request, so it has
to use the same locking order as normal code. This is required to
shut up lockdep in selftests.

Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
---
 drivers/gpu/drm/i915/gt/selftest_lrc.c | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index 19ae1e6ba976..65abb3a14c2d 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -4999,6 +4999,7 @@ static int __live_lrc_state(struct intel_engine_cs *engine,
 {
 	struct intel_context *ce;
 	struct i915_request *rq;
+	struct i915_gem_ww_ctx ww;
 	enum {
 		RING_START_IDX = 0,
 		RING_TAIL_IDX,
@@ -5013,7 +5014,11 @@ static int __live_lrc_state(struct intel_engine_cs *engine,
 	if (IS_ERR(ce))
 		return PTR_ERR(ce);
 
-	err = intel_context_pin(ce);
+	i915_gem_ww_ctx_init(&ww, false);
+retry:
+	err = i915_gem_object_lock(scratch->obj, &ww);
+	if (!err)
+		err = intel_context_pin_ww(ce, &ww);
 	if (err)
 		goto err_put;
 
@@ -5042,11 +5047,9 @@ static int __live_lrc_state(struct intel_engine_cs *engine,
 	*cs++ = i915_ggtt_offset(scratch) + RING_TAIL_IDX * sizeof(u32);
 	*cs++ = 0;
 
-	i915_vma_lock(scratch);
 	err = i915_request_await_object(rq, scratch->obj, true);
 	if (!err)
 		err = i915_vma_move_to_active(scratch, rq, EXEC_OBJECT_WRITE);
-	i915_vma_unlock(scratch);
 
 	i915_request_get(rq);
 	i915_request_add(rq);
@@ -5083,6 +5086,12 @@ static int __live_lrc_state(struct intel_engine_cs *engine,
 err_unpin:
 	intel_context_unpin(ce);
 err_put:
+	if (err == -EDEADLK) {
+		err = i915_gem_ww_ctx_backoff(&ww);
+		if (!err)
+			goto retry;
+	}
+	i915_gem_ww_ctx_fini(&ww);
 	intel_context_put(ce);
 	return err;
 }
-- 
2.27.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 61+ messages in thread

* [Intel-gfx] [PATCH 21/26] drm/i915: Use ww pinning for intel_context_create_request()
  2020-06-23 14:28 [Intel-gfx] [PATCH 01/26] Revert "drm/i915/gem: Async GPU relocations only" Maarten Lankhorst
                   ` (18 preceding siblings ...)
  2020-06-23 14:28 ` [Intel-gfx] [PATCH 20/26] drm/i915/selftests: Fix locking inversion in lrc selftest Maarten Lankhorst
@ 2020-06-23 14:28 ` Maarten Lankhorst
  2020-06-23 14:28 ` [Intel-gfx] [PATCH 22/26] drm/i915: Move i915_vma_lock in the selftests to avoid lock inversion, v2 Maarten Lankhorst
                   ` (10 subsequent siblings)
  30 siblings, 0 replies; 61+ messages in thread
From: Maarten Lankhorst @ 2020-06-23 14:28 UTC (permalink / raw)
  To: intel-gfx

We want to get rid of intel_context_pin(), convert
intel_context_create_request() first. :)

Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
---
 drivers/gpu/drm/i915/gt/intel_context.c | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index fe9fff5a63b1..e148e2d69ae1 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -449,15 +449,25 @@ int intel_context_prepare_remote_request(struct intel_context *ce,
 
 struct i915_request *intel_context_create_request(struct intel_context *ce)
 {
+	struct i915_gem_ww_ctx ww;
 	struct i915_request *rq;
 	int err;
 
-	err = intel_context_pin(ce);
-	if (unlikely(err))
-		return ERR_PTR(err);
+	i915_gem_ww_ctx_init(&ww, true);
+retry:
+	err = intel_context_pin_ww(ce, &ww);
+	if (!err) {
+		rq = i915_request_create(ce);
+		intel_context_unpin(ce);
+	} else if (err == -EDEADLK) {
+		err = i915_gem_ww_ctx_backoff(&ww);
+		if (!err)
+			goto retry;
+	} else {
+		rq = ERR_PTR(err);
+	}
 
-	rq = i915_request_create(ce);
-	intel_context_unpin(ce);
+	i915_gem_ww_ctx_fini(&ww);
 
 	if (IS_ERR(rq))
 		return rq;
-- 
2.27.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 61+ messages in thread

* [Intel-gfx] [PATCH 22/26] drm/i915: Move i915_vma_lock in the selftests to avoid lock inversion, v2.
  2020-06-23 14:28 [Intel-gfx] [PATCH 01/26] Revert "drm/i915/gem: Async GPU relocations only" Maarten Lankhorst
                   ` (19 preceding siblings ...)
  2020-06-23 14:28 ` [Intel-gfx] [PATCH 21/26] drm/i915: Use ww pinning for intel_context_create_request() Maarten Lankhorst
@ 2020-06-23 14:28 ` Maarten Lankhorst
  2020-06-23 14:28 ` [Intel-gfx] [PATCH 23/26] drm/i915: Add ww locking to vm_fault_gtt Maarten Lankhorst
                   ` (9 subsequent siblings)
  30 siblings, 0 replies; 61+ messages in thread
From: Maarten Lankhorst @ 2020-06-23 14:28 UTC (permalink / raw)
  To: intel-gfx

Make sure vma_lock is not used as inner lock when kernel context is used,
and add ww handling where appropriate.

Ensure that execbuf selftests keep passing by using ww handling.

Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
---
 .../i915/gem/selftests/i915_gem_coherency.c   | 26 ++++++------
 .../drm/i915/gem/selftests/i915_gem_mman.c    | 41 ++++++++++++++-----
 drivers/gpu/drm/i915/gt/selftest_rps.c        | 30 ++++++++------
 drivers/gpu/drm/i915/selftests/i915_request.c | 18 +++++---
 4 files changed, 75 insertions(+), 40 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
index dcdfc396f2f8..7049a6bbc03d 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
@@ -201,25 +201,25 @@ static int gpu_set(struct context *ctx, unsigned long offset, u32 v)
 
 	i915_gem_object_lock(ctx->obj, NULL);
 	err = i915_gem_object_set_to_gtt_domain(ctx->obj, true);
-	i915_gem_object_unlock(ctx->obj);
 	if (err)
-		return err;
+		goto out_unlock;
 
 	vma = i915_gem_object_ggtt_pin(ctx->obj, NULL, 0, 0, 0);
-	if (IS_ERR(vma))
-		return PTR_ERR(vma);
+	if (IS_ERR(vma)) {
+		err = PTR_ERR(vma);
+		goto out_unlock;
+	}
 
 	rq = intel_engine_create_kernel_request(ctx->engine);
 	if (IS_ERR(rq)) {
-		i915_vma_unpin(vma);
-		return PTR_ERR(rq);
+		err = PTR_ERR(rq);
+		goto out_unpin;
 	}
 
 	cs = intel_ring_begin(rq, 4);
 	if (IS_ERR(cs)) {
-		i915_request_add(rq);
-		i915_vma_unpin(vma);
-		return PTR_ERR(cs);
+		err = PTR_ERR(cs);
+		goto out_rq;
 	}
 
 	if (INTEL_GEN(ctx->engine->i915) >= 8) {
@@ -240,14 +240,16 @@ static int gpu_set(struct context *ctx, unsigned long offset, u32 v)
 	}
 	intel_ring_advance(rq, cs);
 
-	i915_vma_lock(vma);
 	err = i915_request_await_object(rq, vma->obj, true);
 	if (err == 0)
 		err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
-	i915_vma_unlock(vma);
-	i915_vma_unpin(vma);
 
+out_rq:
 	i915_request_add(rq);
+out_unpin:
+	i915_vma_unpin(vma);
+out_unlock:
+	i915_gem_object_unlock(ctx->obj);
 
 	return err;
 }
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
index 9fb95a45bcad..d27d87a678c8 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
@@ -528,31 +528,42 @@ static int make_obj_busy(struct drm_i915_gem_object *obj)
 	for_each_uabi_engine(engine, i915) {
 		struct i915_request *rq;
 		struct i915_vma *vma;
+		struct i915_gem_ww_ctx ww;
 		int err;
 
 		vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
 		if (IS_ERR(vma))
 			return PTR_ERR(vma);
 
-		err = i915_vma_pin(vma, 0, 0, PIN_USER);
+		i915_gem_ww_ctx_init(&ww, false);
+retry:
+		err = i915_gem_object_lock(obj, &ww);
+		if (!err)
+			err = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_USER);
 		if (err)
-			return err;
+			goto err;
 
 		rq = intel_engine_create_kernel_request(engine);
 		if (IS_ERR(rq)) {
-			i915_vma_unpin(vma);
-			return PTR_ERR(rq);
+			err = PTR_ERR(rq);
+			goto err_unpin;
 		}
 
-		i915_vma_lock(vma);
 		err = i915_request_await_object(rq, vma->obj, true);
 		if (err == 0)
 			err = i915_vma_move_to_active(vma, rq,
 						      EXEC_OBJECT_WRITE);
-		i915_vma_unlock(vma);
 
 		i915_request_add(rq);
+err_unpin:
 		i915_vma_unpin(vma);
+err:
+		if (err == -EDEADLK) {
+			err = i915_gem_ww_ctx_backoff(&ww);
+			if (!err)
+				goto retry;
+		}
+		i915_gem_ww_ctx_fini(&ww);
 		if (err)
 			return err;
 	}
@@ -1123,6 +1134,7 @@ static int __igt_mmap_gpu(struct drm_i915_private *i915,
 	for_each_uabi_engine(engine, i915) {
 		struct i915_request *rq;
 		struct i915_vma *vma;
+		struct i915_gem_ww_ctx ww;
 
 		vma = i915_vma_instance(obj, engine->kernel_context->vm, NULL);
 		if (IS_ERR(vma)) {
@@ -1130,9 +1142,13 @@ static int __igt_mmap_gpu(struct drm_i915_private *i915,
 			goto out_unmap;
 		}
 
-		err = i915_vma_pin(vma, 0, 0, PIN_USER);
+		i915_gem_ww_ctx_init(&ww, false);
+retry:
+		err = i915_gem_object_lock(obj, &ww);
+		if (!err)
+			err = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_USER);
 		if (err)
-			goto out_unmap;
+			goto out_ww;
 
 		rq = i915_request_create(engine->kernel_context);
 		if (IS_ERR(rq)) {
@@ -1140,11 +1156,9 @@ static int __igt_mmap_gpu(struct drm_i915_private *i915,
 			goto out_unpin;
 		}
 
-		i915_vma_lock(vma);
 		err = i915_request_await_object(rq, vma->obj, false);
 		if (err == 0)
 			err = i915_vma_move_to_active(vma, rq, 0);
-		i915_vma_unlock(vma);
 
 		err = engine->emit_bb_start(rq, vma->node.start, 0, 0);
 		i915_request_get(rq);
@@ -1166,6 +1180,13 @@ static int __igt_mmap_gpu(struct drm_i915_private *i915,
 
 out_unpin:
 		i915_vma_unpin(vma);
+out_ww:
+		if (err == -EDEADLK) {
+			err = i915_gem_ww_ctx_backoff(&ww);
+			if (!err)
+				goto retry;
+		}
+		i915_gem_ww_ctx_fini(&ww);
 		if (err)
 			goto out_unmap;
 	}
diff --git a/drivers/gpu/drm/i915/gt/selftest_rps.c b/drivers/gpu/drm/i915/gt/selftest_rps.c
index bb753f0c12eb..ec1cfcfa0706 100644
--- a/drivers/gpu/drm/i915/gt/selftest_rps.c
+++ b/drivers/gpu/drm/i915/gt/selftest_rps.c
@@ -77,20 +77,20 @@ create_spin_counter(struct intel_engine_cs *engine,
 
 	vma = i915_vma_instance(obj, vm, NULL);
 	if (IS_ERR(vma)) {
-		i915_gem_object_put(obj);
-		return vma;
+		err = PTR_ERR(vma);
+		goto err_put;
 	}
 
 	err = i915_vma_pin(vma, 0, 0, PIN_USER);
-	if (err) {
-		i915_vma_put(vma);
-		return ERR_PTR(err);
-	}
+	if (err)
+		goto err_unlock;
+
+	i915_vma_lock(vma);
 
 	base = i915_gem_object_pin_map(obj, I915_MAP_WC);
 	if (IS_ERR(base)) {
-		i915_gem_object_put(obj);
-		return ERR_CAST(base);
+		err = PTR_ERR(base);
+		goto err_unpin;
 	}
 	cs = base;
 
@@ -134,6 +134,14 @@ create_spin_counter(struct intel_engine_cs *engine,
 	*cancel = base + loop;
 	*counter = srm ? memset32(base + end, 0, 1) : NULL;
 	return vma;
+
+err_unpin:
+	i915_vma_unpin(vma);
+err_unlock:
+	i915_vma_unlock(vma);
+err_put:
+	i915_gem_object_put(obj);
+	return ERR_PTR(err);
 }
 
 static u8 wait_for_freq(struct intel_rps *rps, u8 freq, int timeout_ms)
@@ -639,7 +647,6 @@ int live_rps_frequency_cs(void *arg)
 			goto err_vma;
 		}
 
-		i915_vma_lock(vma);
 		err = i915_request_await_object(rq, vma->obj, false);
 		if (!err)
 			err = i915_vma_move_to_active(vma, rq, 0);
@@ -647,7 +654,6 @@ int live_rps_frequency_cs(void *arg)
 			err = rq->engine->emit_bb_start(rq,
 							vma->node.start,
 							PAGE_SIZE, 0);
-		i915_vma_unlock(vma);
 		i915_request_add(rq);
 		if (err)
 			goto err_vma;
@@ -708,6 +714,7 @@ int live_rps_frequency_cs(void *arg)
 		i915_gem_object_flush_map(vma->obj);
 		i915_gem_object_unpin_map(vma->obj);
 		i915_vma_unpin(vma);
+		i915_vma_unlock(vma);
 		i915_vma_put(vma);
 
 		st_engine_heartbeat_enable(engine);
@@ -781,7 +788,6 @@ int live_rps_frequency_srm(void *arg)
 			goto err_vma;
 		}
 
-		i915_vma_lock(vma);
 		err = i915_request_await_object(rq, vma->obj, false);
 		if (!err)
 			err = i915_vma_move_to_active(vma, rq, 0);
@@ -789,7 +795,6 @@ int live_rps_frequency_srm(void *arg)
 			err = rq->engine->emit_bb_start(rq,
 							vma->node.start,
 							PAGE_SIZE, 0);
-		i915_vma_unlock(vma);
 		i915_request_add(rq);
 		if (err)
 			goto err_vma;
@@ -849,6 +854,7 @@ int live_rps_frequency_srm(void *arg)
 		i915_gem_object_flush_map(vma->obj);
 		i915_gem_object_unpin_map(vma->obj);
 		i915_vma_unpin(vma);
+		i915_vma_unlock(vma);
 		i915_vma_put(vma);
 
 		st_engine_heartbeat_enable(engine);
diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c
index 9271aad7f779..07311437330f 100644
--- a/drivers/gpu/drm/i915/selftests/i915_request.c
+++ b/drivers/gpu/drm/i915/selftests/i915_request.c
@@ -862,6 +862,8 @@ static int live_all_engines(void *arg)
 		goto out_free;
 	}
 
+	i915_vma_lock(batch);
+
 	idx = 0;
 	for_each_uabi_engine(engine, i915) {
 		request[idx] = intel_engine_create_kernel_request(engine);
@@ -872,11 +874,9 @@ static int live_all_engines(void *arg)
 			goto out_request;
 		}
 
-		i915_vma_lock(batch);
 		err = i915_request_await_object(request[idx], batch->obj, 0);
 		if (err == 0)
 			err = i915_vma_move_to_active(batch, request[idx], 0);
-		i915_vma_unlock(batch);
 		GEM_BUG_ON(err);
 
 		err = engine->emit_bb_start(request[idx],
@@ -891,6 +891,8 @@ static int live_all_engines(void *arg)
 		idx++;
 	}
 
+	i915_vma_unlock(batch);
+
 	idx = 0;
 	for_each_uabi_engine(engine, i915) {
 		if (i915_request_completed(request[idx])) {
@@ -981,12 +983,13 @@ static int live_sequential_engines(void *arg)
 			goto out_free;
 		}
 
+		i915_vma_lock(batch);
 		request[idx] = intel_engine_create_kernel_request(engine);
 		if (IS_ERR(request[idx])) {
 			err = PTR_ERR(request[idx]);
 			pr_err("%s: Request allocation failed for %s with err=%d\n",
 			       __func__, engine->name, err);
-			goto out_request;
+			goto out_unlock;
 		}
 
 		if (prev) {
@@ -996,16 +999,14 @@ static int live_sequential_engines(void *arg)
 				i915_request_add(request[idx]);
 				pr_err("%s: Request await failed for %s with err=%d\n",
 				       __func__, engine->name, err);
-				goto out_request;
+				goto out_unlock;
 			}
 		}
 
-		i915_vma_lock(batch);
 		err = i915_request_await_object(request[idx],
 						batch->obj, false);
 		if (err == 0)
 			err = i915_vma_move_to_active(batch, request[idx], 0);
-		i915_vma_unlock(batch);
 		GEM_BUG_ON(err);
 
 		err = engine->emit_bb_start(request[idx],
@@ -1020,6 +1021,11 @@ static int live_sequential_engines(void *arg)
 
 		prev = request[idx];
 		idx++;
+
+out_unlock:
+		i915_vma_unlock(batch);
+		if (err)
+			goto out_request;
 	}
 
 	idx = 0;
-- 
2.27.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 61+ messages in thread

* [Intel-gfx] [PATCH 23/26] drm/i915: Add ww locking to vm_fault_gtt
  2020-06-23 14:28 [Intel-gfx] [PATCH 01/26] Revert "drm/i915/gem: Async GPU relocations only" Maarten Lankhorst
                   ` (20 preceding siblings ...)
  2020-06-23 14:28 ` [Intel-gfx] [PATCH 22/26] drm/i915: Move i915_vma_lock in the selftests to avoid lock inversion, v2 Maarten Lankhorst
@ 2020-06-23 14:28 ` Maarten Lankhorst
  2020-06-23 14:28 ` [Intel-gfx] [PATCH 24/26] drm/i915: Add ww locking to pin_to_display_plane Maarten Lankhorst
                   ` (8 subsequent siblings)
  30 siblings, 0 replies; 61+ messages in thread
From: Maarten Lankhorst @ 2020-06-23 14:28 UTC (permalink / raw)
  To: intel-gfx

Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_mman.c | 51 +++++++++++++++---------
 1 file changed, 33 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
index fe27c5b344e3..874fa0489f6d 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
@@ -283,37 +283,46 @@ static vm_fault_t vm_fault_gtt(struct vm_fault *vmf)
 	struct intel_runtime_pm *rpm = &i915->runtime_pm;
 	struct i915_ggtt *ggtt = &i915->ggtt;
 	bool write = area->vm_flags & VM_WRITE;
+	struct i915_gem_ww_ctx ww;
 	intel_wakeref_t wakeref;
 	struct i915_vma *vma;
 	pgoff_t page_offset;
 	int srcu;
 	int ret;
 
-	/* Sanity check that we allow writing into this object */
-	if (i915_gem_object_is_readonly(obj) && write)
-		return VM_FAULT_SIGBUS;
-
 	/* We don't use vmf->pgoff since that has the fake offset */
 	page_offset = (vmf->address - area->vm_start) >> PAGE_SHIFT;
 
 	trace_i915_gem_object_fault(obj, page_offset, true, write);
 
-	ret = i915_gem_object_pin_pages(obj);
+	wakeref = intel_runtime_pm_get(rpm);
+
+	i915_gem_ww_ctx_init(&ww, true);
+retry:
+	ret = i915_gem_object_lock(obj, &ww);
 	if (ret)
-		goto err;
+		goto err_rpm;
 
-	wakeref = intel_runtime_pm_get(rpm);
+	/* Sanity check that we allow writing into this object */
+	if (i915_gem_object_is_readonly(obj) && write) {
+		ret = -EFAULT;
+		goto err_rpm;
+	}
 
-	ret = intel_gt_reset_trylock(ggtt->vm.gt, &srcu);
+	ret = i915_gem_object_pin_pages(obj);
 	if (ret)
 		goto err_rpm;
 
+	ret = intel_gt_reset_trylock(ggtt->vm.gt, &srcu);
+	if (ret)
+		goto err_pages;
+
 	/* Now pin it into the GTT as needed */
-	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
-				       PIN_MAPPABLE |
-				       PIN_NONBLOCK /* NOWARN */ |
-				       PIN_NOEVICT);
-	if (IS_ERR(vma)) {
+	vma = i915_gem_object_ggtt_pin_ww(obj, &ww, NULL, 0, 0,
+					  PIN_MAPPABLE |
+					  PIN_NONBLOCK /* NOWARN */ |
+					  PIN_NOEVICT);
+	if (IS_ERR(vma) && vma != ERR_PTR(-EDEADLK)) {
 		/* Use a partial view if it is bigger than available space */
 		struct i915_ggtt_view view =
 			compute_partial_view(obj, page_offset, MIN_CHUNK_PAGES);
@@ -328,11 +337,11 @@ static vm_fault_t vm_fault_gtt(struct vm_fault *vmf)
 		 * all hope that the hardware is able to track future writes.
 		 */
 
-		vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags);
-		if (IS_ERR(vma)) {
+		vma = i915_gem_object_ggtt_pin_ww(obj, &ww, &view, 0, 0, flags);
+		if (IS_ERR(vma) && vma != ERR_PTR(-EDEADLK)) {
 			flags = PIN_MAPPABLE;
 			view.type = I915_GGTT_VIEW_PARTIAL;
-			vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags);
+			vma = i915_gem_object_ggtt_pin_ww(obj, &ww, &view, 0, 0, flags);
 		}
 
 		/* The entire mappable GGTT is pinned? Unexpected! */
@@ -389,10 +398,16 @@ static vm_fault_t vm_fault_gtt(struct vm_fault *vmf)
 	__i915_vma_unpin(vma);
 err_reset:
 	intel_gt_reset_unlock(ggtt->vm.gt, srcu);
+err_pages:
+	i915_gem_object_unpin_pages(obj);
 err_rpm:
+	if (ret == -EDEADLK) {
+		ret = i915_gem_ww_ctx_backoff(&ww);
+		if (!ret)
+			goto retry;
+	}
+	i915_gem_ww_ctx_fini(&ww);
 	intel_runtime_pm_put(rpm, wakeref);
-	i915_gem_object_unpin_pages(obj);
-err:
 	return i915_error_to_vmf_fault(ret);
 }
 
-- 
2.27.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 61+ messages in thread

* [Intel-gfx] [PATCH 24/26] drm/i915: Add ww locking to pin_to_display_plane
  2020-06-23 14:28 [Intel-gfx] [PATCH 01/26] Revert "drm/i915/gem: Async GPU relocations only" Maarten Lankhorst
                   ` (21 preceding siblings ...)
  2020-06-23 14:28 ` [Intel-gfx] [PATCH 23/26] drm/i915: Add ww locking to vm_fault_gtt Maarten Lankhorst
@ 2020-06-23 14:28 ` Maarten Lankhorst
  2020-06-23 14:28 ` [Intel-gfx] [PATCH 25/26] drm/i915: Ensure we hold the pin mutex Maarten Lankhorst
                   ` (7 subsequent siblings)
  30 siblings, 0 replies; 61+ messages in thread
From: Maarten Lankhorst @ 2020-06-23 14:28 UTC (permalink / raw)
  To: intel-gfx

Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_domain.c | 65 ++++++++++++++++------
 drivers/gpu/drm/i915/gem/i915_gem_object.h |  1 +
 2 files changed, 49 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
index 8ebceebd11b0..c0d153284984 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
@@ -37,6 +37,12 @@ void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj)
 	i915_gem_object_unlock(obj);
 }
 
+void i915_gem_object_flush_if_display_locked(struct drm_i915_gem_object *obj)
+{
+	if (i915_gem_object_is_framebuffer(obj))
+		__i915_gem_object_flush_for_display(obj);
+}
+
 /**
  * Moves a single object to the WC read, and possibly write domain.
  * @obj: object to act on
@@ -197,18 +203,12 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
 	if (ret)
 		return ret;
 
-	ret = i915_gem_object_lock_interruptible(obj, NULL);
-	if (ret)
-		return ret;
-
 	/* Always invalidate stale cachelines */
 	if (obj->cache_level != cache_level) {
 		i915_gem_object_set_cache_coherency(obj, cache_level);
 		obj->cache_dirty = true;
 	}
 
-	i915_gem_object_unlock(obj);
-
 	/* The cache-level will be applied when each vma is rebound. */
 	return i915_gem_object_unbind(obj,
 				      I915_GEM_OBJECT_UNBIND_ACTIVE |
@@ -255,6 +255,7 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
 	struct drm_i915_gem_caching *args = data;
 	struct drm_i915_gem_object *obj;
 	enum i915_cache_level level;
+	struct i915_gem_ww_ctx ww;
 	int ret = 0;
 
 	switch (args->caching) {
@@ -293,7 +294,18 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
 		goto out;
 	}
 
-	ret = i915_gem_object_set_cache_level(obj, level);
+	i915_gem_ww_ctx_init(&ww, true);
+retry:
+	ret = i915_gem_object_lock(obj, &ww);
+	if (!ret)
+		ret = i915_gem_object_set_cache_level(obj, level);
+
+	if (ret == -EDEADLK) {
+		ret = i915_gem_ww_ctx_backoff(&ww);
+		if (!ret)
+			goto retry;
+	}
+	i915_gem_ww_ctx_fini(&ww);
 
 out:
 	i915_gem_object_put(obj);
@@ -313,6 +325,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
 				     unsigned int flags)
 {
 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	struct i915_gem_ww_ctx ww;
 	struct i915_vma *vma;
 	int ret;
 
@@ -320,6 +333,11 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
 	if (HAS_LMEM(i915) && !i915_gem_object_is_lmem(obj))
 		return ERR_PTR(-EINVAL);
 
+	i915_gem_ww_ctx_init(&ww, true);
+retry:
+	ret = i915_gem_object_lock(obj, &ww);
+	if (ret)
+		goto err;
 	/*
 	 * The display engine is not coherent with the LLC cache on gen6.  As
 	 * a result, we make sure that the pinning that is about to occur is
@@ -334,7 +352,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
 					      HAS_WT(i915) ?
 					      I915_CACHE_WT : I915_CACHE_NONE);
 	if (ret)
-		return ERR_PTR(ret);
+		goto err;
 
 	/*
 	 * As the user may map the buffer once pinned in the display plane
@@ -347,18 +365,31 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
 	vma = ERR_PTR(-ENOSPC);
 	if ((flags & PIN_MAPPABLE) == 0 &&
 	    (!view || view->type == I915_GGTT_VIEW_NORMAL))
-		vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment,
-					       flags |
-					       PIN_MAPPABLE |
-					       PIN_NONBLOCK);
-	if (IS_ERR(vma))
-		vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, flags);
-	if (IS_ERR(vma))
-		return vma;
+		vma = i915_gem_object_ggtt_pin_ww(obj, &ww, view, 0, alignment,
+						  flags | PIN_MAPPABLE |
+						  PIN_NONBLOCK);
+	if (IS_ERR(vma) && vma != ERR_PTR(-EDEADLK))
+		vma = i915_gem_object_ggtt_pin_ww(obj, &ww, view, 0,
+						  alignment, flags);
+	if (IS_ERR(vma)) {
+		ret = PTR_ERR(vma);
+		goto err;
+	}
 
 	vma->display_alignment = max_t(u64, vma->display_alignment, alignment);
 
-	i915_gem_object_flush_if_display(obj);
+	i915_gem_object_flush_if_display_locked(obj);
+
+err:
+	if (ret == -EDEADLK) {
+		ret = i915_gem_ww_ctx_backoff(&ww);
+		if (!ret)
+			goto retry;
+	}
+	i915_gem_ww_ctx_fini(&ww);
+
+	if (ret)
+		return ERR_PTR(ret);
 
 	return vma;
 }
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index 11b8e2735071..409fd00c8709 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -456,6 +456,7 @@ i915_gem_object_last_write_engine(struct drm_i915_gem_object *obj)
 void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj,
 					 unsigned int cache_level);
 void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj);
+void i915_gem_object_flush_if_display_locked(struct drm_i915_gem_object *obj);
 
 int __must_check
 i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write);
-- 
2.27.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 61+ messages in thread

* [Intel-gfx] [PATCH 25/26] drm/i915: Ensure we hold the pin mutex
  2020-06-23 14:28 [Intel-gfx] [PATCH 01/26] Revert "drm/i915/gem: Async GPU relocations only" Maarten Lankhorst
                   ` (22 preceding siblings ...)
  2020-06-23 14:28 ` [Intel-gfx] [PATCH 24/26] drm/i915: Add ww locking to pin_to_display_plane Maarten Lankhorst
@ 2020-06-23 14:28 ` Maarten Lankhorst
  2020-06-24  1:52   ` kernel test robot
  2020-06-23 14:28 ` [Intel-gfx] [PATCH 26/26] drm/i915: Kill context before taking ctx->mutex Maarten Lankhorst
                   ` (6 subsequent siblings)
  30 siblings, 1 reply; 61+ messages in thread
From: Maarten Lankhorst @ 2020-06-23 14:28 UTC (permalink / raw)
  To: intel-gfx

Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
---
 drivers/gpu/drm/i915/gt/intel_renderstate.c | 2 +-
 drivers/gpu/drm/i915/i915_vma.c             | 9 ++++++++-
 drivers/gpu/drm/i915/i915_vma.h             | 3 +++
 3 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_renderstate.c b/drivers/gpu/drm/i915/gt/intel_renderstate.c
index 76b39f4c29b5..22c54db3d3ca 100644
--- a/drivers/gpu/drm/i915/gt/intel_renderstate.c
+++ b/drivers/gpu/drm/i915/gt/intel_renderstate.c
@@ -196,7 +196,7 @@ int intel_renderstate_init(struct intel_renderstate *so,
 	if (err)
 		goto err_context;
 
-	err = i915_vma_pin(so->vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
+	err = i915_vma_pin_ww(so->vma, &so->ww, 0, 0, PIN_GLOBAL | PIN_HIGH);
 	if (err)
 		goto err_context;
 
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index d540bf45bdb0..4d797add5323 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -867,6 +867,8 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
 #ifdef CONFIG_PROVE_LOCKING
 	if (debug_locks && lockdep_is_held(&vma->vm->i915->drm.struct_mutex))
 		WARN_ON(!ww);
+	if (debug_locks && ww && vma->resv)
+		assert_vma_held(vma);
 #endif
 
 	BUILD_BUG_ON(PIN_GLOBAL != I915_VMA_GLOBAL_BIND);
@@ -1006,8 +1008,13 @@ int i915_ggtt_pin(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
 
 	GEM_BUG_ON(!i915_vma_is_ggtt(vma));
 
+	WARN_ON(!ww && vma->resv && dma_resv_held(vma->resv));
+
 	do {
-		err = i915_vma_pin_ww(vma, ww, 0, align, flags | PIN_GLOBAL);
+		if (ww)
+			err = i915_vma_pin_ww(vma, ww, 0, align, flags | PIN_GLOBAL);
+		else
+			err = i915_vma_pin(vma, 0, align, flags | PIN_GLOBAL);
 		if (err != -ENOSPC) {
 			if (!err) {
 				err = i915_vma_wait_for_bind(vma);
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index 5b3a3c653454..838bbbeb11cc 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -243,6 +243,9 @@ i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
 static inline int __must_check
 i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 {
+#ifdef CONFIG_LOCKDEP
+	WARN_ON_ONCE(vma->resv && dma_resv_held(vma->resv));
+#endif
 	return i915_vma_pin_ww(vma, NULL, size, alignment, flags);
 }
 
-- 
2.27.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 61+ messages in thread

* [Intel-gfx] [PATCH 26/26] drm/i915: Kill context before taking ctx->mutex
  2020-06-23 14:28 [Intel-gfx] [PATCH 01/26] Revert "drm/i915/gem: Async GPU relocations only" Maarten Lankhorst
                   ` (23 preceding siblings ...)
  2020-06-23 14:28 ` [Intel-gfx] [PATCH 25/26] drm/i915: Ensure we hold the pin mutex Maarten Lankhorst
@ 2020-06-23 14:28 ` Maarten Lankhorst
  2020-06-24 11:05   ` [Intel-gfx] [PATCH] " Maarten Lankhorst
  2020-06-23 15:23 ` [Intel-gfx] [PATCH 01/26] Revert "drm/i915/gem: Async GPU relocations only" Chris Wilson
                   ` (5 subsequent siblings)
  30 siblings, 1 reply; 61+ messages in thread
From: Maarten Lankhorst @ 2020-06-23 14:28 UTC (permalink / raw)
  To: intel-gfx

Killing context before taking ctx->mutex fixes a hang in
gem_ctx_persistence.close-replace-race, where lut_close
takes obj->resv.lock which is already held by execbuf,
causing a stalling indefinitely.

[ 1904.342847] 2 locks held by gem_ctx_persist/11520:
[ 1904.342849]  #0: ffff8882188e4968 (&ctx->mutex){+.+.}-{3:3}, at: context_close+0xe6/0x850 [i915]
[ 1904.342941]  #1: ffff88821c58a5a8 (reservation_ww_class_mutex){+.+.}-{3:3}, at: lut_close+0x2c2/0xba0 [i915]
[ 1904.343033] 3 locks held by gem_ctx_persist/11521:
[ 1904.343035]  #0: ffffc900008ff938 (reservation_ww_class_acquire){+.+.}-{0:0}, at: i915_gem_do_execbuffer+0x103d/0x54c0 [i915]
[ 1904.343157]  #1: ffff88821c58a5a8 (reservation_ww_class_mutex){+.+.}-{3:3}, at: eb_validate_vmas+0x602/0x2010 [i915]
[ 1904.343267]  #2: ffff88820afd9200 (&vm->mutex/1){+.+.}-{3:3}, at: i915_vma_pin_ww+0x335/0x2300 [i915]

Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c | 24 ++++++++++-----------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index b9d38e8edb5b..3c89150f7262 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -623,6 +623,18 @@ static void context_close(struct i915_gem_context *ctx)
 	i915_gem_context_set_closed(ctx);
 	mutex_unlock(&ctx->engines_mutex);
 
+	/*
+	 * If the user has disabled hangchecking, we can not be sure that
+	 * the batches will ever complete after the context is closed,
+	 * keeping the context and all resources pinned forever. So in this
+	 * case we opt to forcibly kill off all remaining requests on
+	 * context close.
+	 */
+	if (!i915_gem_context_is_persistent(ctx) ||
+	    !i915_modparams.enable_hangcheck)
+		kill_context(ctx);
+
+
 	mutex_lock(&ctx->mutex);
 
 	set_closed_name(ctx);
@@ -641,18 +653,6 @@ static void context_close(struct i915_gem_context *ctx)
 	lut_close(ctx);
 
 	mutex_unlock(&ctx->mutex);
-
-	/*
-	 * If the user has disabled hangchecking, we can not be sure that
-	 * the batches will ever complete after the context is closed,
-	 * keeping the context and all resources pinned forever. So in this
-	 * case we opt to forcibly kill off all remaining requests on
-	 * context close.
-	 */
-	if (!i915_gem_context_is_persistent(ctx) ||
-	    !i915_modparams.enable_hangcheck)
-		kill_context(ctx);
-
 	i915_gem_context_put(ctx);
 }
 
-- 
2.27.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 61+ messages in thread

* Re: [Intel-gfx] [PATCH 01/26] Revert "drm/i915/gem: Async GPU relocations only"
  2020-06-23 14:28 [Intel-gfx] [PATCH 01/26] Revert "drm/i915/gem: Async GPU relocations only" Maarten Lankhorst
                   ` (24 preceding siblings ...)
  2020-06-23 14:28 ` [Intel-gfx] [PATCH 26/26] drm/i915: Kill context before taking ctx->mutex Maarten Lankhorst
@ 2020-06-23 15:23 ` Chris Wilson
  2020-06-24 11:19   ` Chris Wilson
  2020-06-23 15:39 ` [Intel-gfx] ✗ Fi.CI.BUILD: failure for series starting with [01/26] " Patchwork
                   ` (4 subsequent siblings)
  30 siblings, 1 reply; 61+ messages in thread
From: Chris Wilson @ 2020-06-23 15:23 UTC (permalink / raw)
  To: Maarten Lankhorst, intel-gfx

Quoting Maarten Lankhorst (2020-06-23 15:28:18)
> This reverts commit 9e0f9464e2ab36b864359a59b0e9058fdef0ce47,
> and related commit 7ac2d2536dfa7 ("drm/i915/gem: Delete unused code").

Regardless that you haven't adapted the series...

This still prevents concurrent submission between clients, and does not
remove the allocation mutexes. That latter we can do in a couple of
patches that preserve the status quo with just a name switch and the
forced removal of nestable shrinkers.

But the former we continue to be at an impasse for over 6 months.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 61+ messages in thread

* [Intel-gfx] ✗ Fi.CI.BUILD: failure for series starting with [01/26] Revert "drm/i915/gem: Async GPU relocations only"
  2020-06-23 14:28 [Intel-gfx] [PATCH 01/26] Revert "drm/i915/gem: Async GPU relocations only" Maarten Lankhorst
                   ` (25 preceding siblings ...)
  2020-06-23 15:23 ` [Intel-gfx] [PATCH 01/26] Revert "drm/i915/gem: Async GPU relocations only" Chris Wilson
@ 2020-06-23 15:39 ` Patchwork
  2020-06-24 11:58 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with [01/26] Revert "drm/i915/gem: Async GPU relocations only" (rev2) Patchwork
                   ` (3 subsequent siblings)
  30 siblings, 0 replies; 61+ messages in thread
From: Patchwork @ 2020-06-23 15:39 UTC (permalink / raw)
  To: Maarten Lankhorst; +Cc: intel-gfx

== Series Details ==

Series: series starting with [01/26] Revert "drm/i915/gem: Async GPU relocations only"
URL   : https://patchwork.freedesktop.org/series/78744/
State : failure

== Summary ==

Applying: Revert "drm/i915/gem: Async GPU relocations only"
Applying: drm/i915: Revert relocation chaining commits.
Applying: Revert "drm/i915/gem: Drop relocation slowpath".
Applying: drm/i915: Add an implementation for i915_gem_ww_ctx locking, v2.
Applying: drm/i915: Remove locking from i915_gem_object_prepare_read/write
Applying: drm/i915: Parse command buffer earlier in eb_relocate(slow)
Applying: Revert "drm/i915/gem: Split eb_vma into its own allocation"
Applying: drm/i915/gem: Make eb_add_lut interruptible wait on object lock.
Applying: drm/i915: Use per object locking in execbuf, v12.
Applying: drm/i915: Use ww locking in intel_renderstate.
Applying: drm/i915: Add ww context handling to context_barrier_task
Applying: drm/i915: Nuke arguments to eb_pin_engine
Applying: drm/i915: Pin engine before pinning all objects, v4.
Applying: drm/i915: Rework intel_context pinning to do everything outside of pin_mutex
Applying: drm/i915: Make sure execbuffer always passes ww state to i915_vma_pin.
Applying: drm/i915: Convert i915_gem_object/client_blt.c to use ww locking as well, v2.
Applying: drm/i915: Kill last user of intel_context_create_request outside of selftests
Applying: drm/i915: Convert i915_perf to ww locking as well
Applying: drm/i915: Dirty hack to fix selftests locking inversion
Applying: drm/i915/selftests: Fix locking inversion in lrc selftest.
Applying: drm/i915: Use ww pinning for intel_context_create_request()
Applying: drm/i915: Move i915_vma_lock in the selftests to avoid lock inversion, v2.
Applying: drm/i915: Add ww locking to vm_fault_gtt
Applying: drm/i915: Add ww locking to pin_to_display_plane
Applying: drm/i915: Ensure we hold the pin mutex
Applying: drm/i915: Kill context before taking ctx->mutex
error: sha1 information is lacking or useless (drivers/gpu/drm/i915/gem/i915_gem_context.c).
error: could not build fake ancestor
hint: Use 'git am --show-current-patch=diff' to see the failed patch
Patch failed at 0026 drm/i915: Kill context before taking ctx->mutex
When you have resolved this problem, run "git am --continue".
If you prefer to skip this patch, run "git am --skip" instead.
To restore the original branch and stop patching, run "git am --abort".

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 61+ messages in thread

* Re: [Intel-gfx] [PATCH 25/26] drm/i915: Ensure we hold the pin mutex
  2020-06-23 14:28 ` [Intel-gfx] [PATCH 25/26] drm/i915: Ensure we hold the pin mutex Maarten Lankhorst
@ 2020-06-24  1:52   ` kernel test robot
  0 siblings, 0 replies; 61+ messages in thread
From: kernel test robot @ 2020-06-24  1:52 UTC (permalink / raw)
  To: kbuild-all

[-- Attachment #1: Type: text/plain, Size: 2874 bytes --]

Hi Maarten,

I love your patch! Yet something to improve:

[auto build test ERROR on 24b806b0a1dd38c734e771ece9dd1ab6492bbb96]

url:    https://github.com/0day-ci/linux/commits/Maarten-Lankhorst/Revert-drm-i915-gem-Async-GPU-relocations-only/20200623-223216
base:    24b806b0a1dd38c734e771ece9dd1ab6492bbb96
config: i386-debian-10.3 (attached as .config)
compiler: gcc-9 (Debian 9.3.0-13) 9.3.0
reproduce (this is a W=1 build):
        # save the attached .config to linux build tree
        make W=1 ARCH=i386 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

   In file included from arch/x86/include/asm/bug.h:86,
                    from include/linux/bug.h:5,
                    from arch/x86/include/asm/paravirt.h:15,
                    from arch/x86/include/asm/irqflags.h:72,
                    from include/linux/irqflags.h:16,
                    from include/linux/rcupdate.h:26,
                    from include/linux/rculist.h:11,
                    from include/linux/pid.h:5,
                    from include/linux/sched.h:14,
                    from include/linux/sched/mm.h:7,
                    from drivers/gpu/drm/i915/i915_vma.c:25:
   drivers/gpu/drm/i915/i915_vma.c: In function 'i915_ggtt_pin':
>> include/linux/dma-resv.h:80:28: error: implicit declaration of function 'lockdep_is_held'; did you mean 'lockdep_assert_held'? [-Werror=implicit-function-declaration]
      80 | #define dma_resv_held(obj) lockdep_is_held(&(obj)->lock.base)
         |                            ^~~~~~~~~~~~~~~
   include/asm-generic/bug.h:127:25: note: in definition of macro 'WARN'
     127 |  int __ret_warn_on = !!(condition);    \
         |                         ^~~~~~~~~
   drivers/gpu/drm/i915/i915_vma.c:1011:2: note: in expansion of macro 'WARN_ON'
    1011 |  WARN_ON(!ww && vma->resv && dma_resv_held(vma->resv));
         |  ^~~~~~~
   drivers/gpu/drm/i915/i915_vma.c:1011:30: note: in expansion of macro 'dma_resv_held'
    1011 |  WARN_ON(!ww && vma->resv && dma_resv_held(vma->resv));
         |                              ^~~~~~~~~~~~~
   cc1: some warnings being treated as errors

vim +80 include/linux/dma-resv.h

786d7257e537da0 include/linux/reservation.h Maarten Lankhorst 2013-06-27  79  
52791eeec1d9f4a include/linux/dma-resv.h    Christian König   2019-08-11 @80  #define dma_resv_held(obj) lockdep_is_held(&(obj)->lock.base)
52791eeec1d9f4a include/linux/dma-resv.h    Christian König   2019-08-11  81  #define dma_resv_assert_held(obj) lockdep_assert_held(&(obj)->lock.base)
04a5faa8cbe5a8e include/linux/reservation.h Maarten Lankhorst 2014-07-01  82  

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all(a)lists.01.org

[-- Attachment #2: config.gz --]
[-- Type: application/gzip, Size: 34819 bytes --]

^ permalink raw reply	[flat|nested] 61+ messages in thread

* Re: [Intel-gfx] [PATCH 04/26] drm/i915: Add an implementation for i915_gem_ww_ctx locking, v2.
  2020-06-23 14:28 ` [Intel-gfx] [PATCH 04/26] drm/i915: Add an implementation for i915_gem_ww_ctx locking, v2 Maarten Lankhorst
@ 2020-06-24  7:10   ` Thomas Hellström (Intel)
  2020-06-24  7:43     ` Chris Wilson
  2020-06-29 12:07   ` Tvrtko Ursulin
  2020-06-29 12:32   ` Tvrtko Ursulin
  2 siblings, 1 reply; 61+ messages in thread
From: Thomas Hellström (Intel) @ 2020-06-24  7:10 UTC (permalink / raw)
  To: Maarten Lankhorst, intel-gfx

Hi, Maarten,


On 6/23/20 4:28 PM, Maarten Lankhorst wrote:
> i915_gem_ww_ctx is used to lock all gem bo's for pinning and memory
> eviction. We don't use it yet, but lets start adding the definition
> first.
>
> To use it, we have to pass a non-NULL ww to gem_object_lock, and don't
> unlock directly. It is done in i915_gem_ww_ctx_fini.
>
> Changes since v1:
> - Change ww_ctx and obj order in locking functions (Jonas Lahtinen)
>
> Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
> ---
>   drivers/gpu/drm/i915/display/intel_display.c  |  4 +-
>   .../gpu/drm/i915/gem/i915_gem_client_blt.c    |  2 +-
>   drivers/gpu/drm/i915/gem/i915_gem_context.c   |  2 +-
>   drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c    |  4 +-
>   drivers/gpu/drm/i915/gem/i915_gem_domain.c    | 10 ++--
>   .../gpu/drm/i915/gem/i915_gem_execbuffer.c    |  4 +-
>   drivers/gpu/drm/i915/gem/i915_gem_object.c    |  2 +-
>   drivers/gpu/drm/i915/gem/i915_gem_object.h    | 38 +++++++++++---
>   .../gpu/drm/i915/gem/i915_gem_object_types.h  |  9 ++++
>   drivers/gpu/drm/i915/gem/i915_gem_pm.c        |  2 +-
>   drivers/gpu/drm/i915/gem/i915_gem_tiling.c    |  2 +-
>   .../gpu/drm/i915/gem/selftests/huge_pages.c   |  2 +-
>   .../i915/gem/selftests/i915_gem_client_blt.c  |  2 +-
>   .../i915/gem/selftests/i915_gem_coherency.c   | 10 ++--
>   .../drm/i915/gem/selftests/i915_gem_context.c |  4 +-
>   .../drm/i915/gem/selftests/i915_gem_mman.c    |  4 +-
>   .../drm/i915/gem/selftests/i915_gem_phys.c    |  2 +-
>   .../gpu/drm/i915/gt/selftest_workarounds.c    |  2 +-
>   drivers/gpu/drm/i915/gvt/cmd_parser.c         |  2 +-
>   drivers/gpu/drm/i915/i915_gem.c               | 52 +++++++++++++++++--
>   drivers/gpu/drm/i915/i915_gem.h               | 11 ++++
>   drivers/gpu/drm/i915/selftests/i915_gem.c     | 41 +++++++++++++++
>   drivers/gpu/drm/i915/selftests/i915_vma.c     |  2 +-
>   .../drm/i915/selftests/intel_memory_region.c  |  2 +-
>   24 files changed, 173 insertions(+), 42 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> index b1f82a11aef2..3740c0080e38 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> @@ -122,6 +122,15 @@ struct drm_i915_gem_object {
>   	 */
>   	struct list_head lut_list;
>   
> +	/**
> +	 * @obj_link: Link into @i915_gem_ww_ctx.obj_list
> +	 *
> +	 * When we lock this object through i915_gem_object_lock() with a
> +	 * context, we add it to the list to ensure we can unlock everything
> +	 * when i915_gem_ww_ctx_backoff() or i915_gem_ww_ctx_fini() are called.
> +	 */
> +	struct list_head obj_link;
> +

Since we don't refcount objects on the list, (and we shouldn't need to), 
perhaps a debug warning if during object destruction, this isn't an 
empty list head?

Other than that, this patch looks good to me.

Reviewed-by: Thomas Hellström <thomas.hellstrom@intel.com>


_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 61+ messages in thread

* Re: [Intel-gfx] [PATCH 04/26] drm/i915: Add an implementation for i915_gem_ww_ctx locking, v2.
  2020-06-24  7:10   ` Thomas Hellström (Intel)
@ 2020-06-24  7:43     ` Chris Wilson
  2020-06-24  7:49       ` Thomas Hellström (Intel)
  0 siblings, 1 reply; 61+ messages in thread
From: Chris Wilson @ 2020-06-24  7:43 UTC (permalink / raw)
  To: Thomas Hellström, Maarten Lankhorst, intel-gfx

Quoting Thomas Hellström (Intel) (2020-06-24 08:10:43)
> Hi, Maarten,
> 
> 
> On 6/23/20 4:28 PM, Maarten Lankhorst wrote:
> > i915_gem_ww_ctx is used to lock all gem bo's for pinning and memory
> > eviction. We don't use it yet, but lets start adding the definition
> > first.
> >
> > To use it, we have to pass a non-NULL ww to gem_object_lock, and don't
> > unlock directly. It is done in i915_gem_ww_ctx_fini.
> >
> > Changes since v1:
> > - Change ww_ctx and obj order in locking functions (Jonas Lahtinen)
> >
> > Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
> > ---
> >   drivers/gpu/drm/i915/display/intel_display.c  |  4 +-
> >   .../gpu/drm/i915/gem/i915_gem_client_blt.c    |  2 +-
> >   drivers/gpu/drm/i915/gem/i915_gem_context.c   |  2 +-
> >   drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c    |  4 +-
> >   drivers/gpu/drm/i915/gem/i915_gem_domain.c    | 10 ++--
> >   .../gpu/drm/i915/gem/i915_gem_execbuffer.c    |  4 +-
> >   drivers/gpu/drm/i915/gem/i915_gem_object.c    |  2 +-
> >   drivers/gpu/drm/i915/gem/i915_gem_object.h    | 38 +++++++++++---
> >   .../gpu/drm/i915/gem/i915_gem_object_types.h  |  9 ++++
> >   drivers/gpu/drm/i915/gem/i915_gem_pm.c        |  2 +-
> >   drivers/gpu/drm/i915/gem/i915_gem_tiling.c    |  2 +-
> >   .../gpu/drm/i915/gem/selftests/huge_pages.c   |  2 +-
> >   .../i915/gem/selftests/i915_gem_client_blt.c  |  2 +-
> >   .../i915/gem/selftests/i915_gem_coherency.c   | 10 ++--
> >   .../drm/i915/gem/selftests/i915_gem_context.c |  4 +-
> >   .../drm/i915/gem/selftests/i915_gem_mman.c    |  4 +-
> >   .../drm/i915/gem/selftests/i915_gem_phys.c    |  2 +-
> >   .../gpu/drm/i915/gt/selftest_workarounds.c    |  2 +-
> >   drivers/gpu/drm/i915/gvt/cmd_parser.c         |  2 +-
> >   drivers/gpu/drm/i915/i915_gem.c               | 52 +++++++++++++++++--
> >   drivers/gpu/drm/i915/i915_gem.h               | 11 ++++
> >   drivers/gpu/drm/i915/selftests/i915_gem.c     | 41 +++++++++++++++
> >   drivers/gpu/drm/i915/selftests/i915_vma.c     |  2 +-
> >   .../drm/i915/selftests/intel_memory_region.c  |  2 +-
> >   24 files changed, 173 insertions(+), 42 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> > index b1f82a11aef2..3740c0080e38 100644
> > --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> > +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> > @@ -122,6 +122,15 @@ struct drm_i915_gem_object {
> >        */
> >       struct list_head lut_list;
> >   
> > +     /**
> > +      * @obj_link: Link into @i915_gem_ww_ctx.obj_list
> > +      *
> > +      * When we lock this object through i915_gem_object_lock() with a
> > +      * context, we add it to the list to ensure we can unlock everything
> > +      * when i915_gem_ww_ctx_backoff() or i915_gem_ww_ctx_fini() are called.
> > +      */
> > +     struct list_head obj_link;
> > +
> 
> Since we don't refcount objects on the list, (and we shouldn't need to), 
> perhaps a debug warning if during object destruction, this isn't an 
> empty list head?
> 
> Other than that, this patch looks good to me.

Aside it from being in the wrong layer, as was also mentioned several
months ago.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 61+ messages in thread

* Re: [Intel-gfx] [PATCH 04/26] drm/i915: Add an implementation for i915_gem_ww_ctx locking, v2.
  2020-06-24  7:43     ` Chris Wilson
@ 2020-06-24  7:49       ` Thomas Hellström (Intel)
  2020-06-24  8:27         ` Chris Wilson
  0 siblings, 1 reply; 61+ messages in thread
From: Thomas Hellström (Intel) @ 2020-06-24  7:49 UTC (permalink / raw)
  To: Chris Wilson, Maarten Lankhorst, intel-gfx

Hi, Chris,

On 6/24/20 9:43 AM, Chris Wilson wrote:
> Quoting Thomas Hellström (Intel) (2020-06-24 08:10:43)
>> Hi, Maarten,
>>
>>
>> On 6/23/20 4:28 PM, Maarten Lankhorst wrote:
>>> i915_gem_ww_ctx is used to lock all gem bo's for pinning and memory
>>> eviction. We don't use it yet, but lets start adding the definition
>>> first.
>>>
>>> To use it, we have to pass a non-NULL ww to gem_object_lock, and don't
>>> unlock directly. It is done in i915_gem_ww_ctx_fini.
>>>
>>> Changes since v1:
>>> - Change ww_ctx and obj order in locking functions (Jonas Lahtinen)
>>>
>>> Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
>>> ---
>>>    drivers/gpu/drm/i915/display/intel_display.c  |  4 +-
>>>    .../gpu/drm/i915/gem/i915_gem_client_blt.c    |  2 +-
>>>    drivers/gpu/drm/i915/gem/i915_gem_context.c   |  2 +-
>>>    drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c    |  4 +-
>>>    drivers/gpu/drm/i915/gem/i915_gem_domain.c    | 10 ++--
>>>    .../gpu/drm/i915/gem/i915_gem_execbuffer.c    |  4 +-
>>>    drivers/gpu/drm/i915/gem/i915_gem_object.c    |  2 +-
>>>    drivers/gpu/drm/i915/gem/i915_gem_object.h    | 38 +++++++++++---
>>>    .../gpu/drm/i915/gem/i915_gem_object_types.h  |  9 ++++
>>>    drivers/gpu/drm/i915/gem/i915_gem_pm.c        |  2 +-
>>>    drivers/gpu/drm/i915/gem/i915_gem_tiling.c    |  2 +-
>>>    .../gpu/drm/i915/gem/selftests/huge_pages.c   |  2 +-
>>>    .../i915/gem/selftests/i915_gem_client_blt.c  |  2 +-
>>>    .../i915/gem/selftests/i915_gem_coherency.c   | 10 ++--
>>>    .../drm/i915/gem/selftests/i915_gem_context.c |  4 +-
>>>    .../drm/i915/gem/selftests/i915_gem_mman.c    |  4 +-
>>>    .../drm/i915/gem/selftests/i915_gem_phys.c    |  2 +-
>>>    .../gpu/drm/i915/gt/selftest_workarounds.c    |  2 +-
>>>    drivers/gpu/drm/i915/gvt/cmd_parser.c         |  2 +-
>>>    drivers/gpu/drm/i915/i915_gem.c               | 52 +++++++++++++++++--
>>>    drivers/gpu/drm/i915/i915_gem.h               | 11 ++++
>>>    drivers/gpu/drm/i915/selftests/i915_gem.c     | 41 +++++++++++++++
>>>    drivers/gpu/drm/i915/selftests/i915_vma.c     |  2 +-
>>>    .../drm/i915/selftests/intel_memory_region.c  |  2 +-
>>>    24 files changed, 173 insertions(+), 42 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
>>> index b1f82a11aef2..3740c0080e38 100644
>>> --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
>>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
>>> @@ -122,6 +122,15 @@ struct drm_i915_gem_object {
>>>         */
>>>        struct list_head lut_list;
>>>    
>>> +     /**
>>> +      * @obj_link: Link into @i915_gem_ww_ctx.obj_list
>>> +      *
>>> +      * When we lock this object through i915_gem_object_lock() with a
>>> +      * context, we add it to the list to ensure we can unlock everything
>>> +      * when i915_gem_ww_ctx_backoff() or i915_gem_ww_ctx_fini() are called.
>>> +      */
>>> +     struct list_head obj_link;
>>> +
>> Since we don't refcount objects on the list, (and we shouldn't need to),
>> perhaps a debug warning if during object destruction, this isn't an
>> empty list head?
>>
>> Other than that, this patch looks good to me.
> Aside it from being in the wrong layer, as was also mentioned several
> months ago.
> -Chris

Could you send a pointer, or perhaps elaborate a bit?

/Thomas


_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 61+ messages in thread

* Re: [Intel-gfx] [PATCH 04/26] drm/i915: Add an implementation for i915_gem_ww_ctx locking, v2.
  2020-06-24  7:49       ` Thomas Hellström (Intel)
@ 2020-06-24  8:27         ` Chris Wilson
  0 siblings, 0 replies; 61+ messages in thread
From: Chris Wilson @ 2020-06-24  8:27 UTC (permalink / raw)
  To: Thomas Hellström, Maarten Lankhorst, intel-gfx

Quoting Thomas Hellström (Intel) (2020-06-24 08:49:21)
> Hi, Chris,
> 
> On 6/24/20 9:43 AM, Chris Wilson wrote:
> > Quoting Thomas Hellström (Intel) (2020-06-24 08:10:43)
> >> Hi, Maarten,
> >>
> >>
> >> On 6/23/20 4:28 PM, Maarten Lankhorst wrote:
> >>> i915_gem_ww_ctx is used to lock all gem bo's for pinning and memory
> >>> eviction. We don't use it yet, but lets start adding the definition
> >>> first.
> >>>
> >>> To use it, we have to pass a non-NULL ww to gem_object_lock, and don't
> >>> unlock directly. It is done in i915_gem_ww_ctx_fini.
> >>>
> >>> Changes since v1:
> >>> - Change ww_ctx and obj order in locking functions (Jonas Lahtinen)
> >>>
> >>> Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
> >>> ---
> >>>    drivers/gpu/drm/i915/display/intel_display.c  |  4 +-
> >>>    .../gpu/drm/i915/gem/i915_gem_client_blt.c    |  2 +-
> >>>    drivers/gpu/drm/i915/gem/i915_gem_context.c   |  2 +-
> >>>    drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c    |  4 +-
> >>>    drivers/gpu/drm/i915/gem/i915_gem_domain.c    | 10 ++--
> >>>    .../gpu/drm/i915/gem/i915_gem_execbuffer.c    |  4 +-
> >>>    drivers/gpu/drm/i915/gem/i915_gem_object.c    |  2 +-
> >>>    drivers/gpu/drm/i915/gem/i915_gem_object.h    | 38 +++++++++++---
> >>>    .../gpu/drm/i915/gem/i915_gem_object_types.h  |  9 ++++
> >>>    drivers/gpu/drm/i915/gem/i915_gem_pm.c        |  2 +-
> >>>    drivers/gpu/drm/i915/gem/i915_gem_tiling.c    |  2 +-
> >>>    .../gpu/drm/i915/gem/selftests/huge_pages.c   |  2 +-
> >>>    .../i915/gem/selftests/i915_gem_client_blt.c  |  2 +-
> >>>    .../i915/gem/selftests/i915_gem_coherency.c   | 10 ++--
> >>>    .../drm/i915/gem/selftests/i915_gem_context.c |  4 +-
> >>>    .../drm/i915/gem/selftests/i915_gem_mman.c    |  4 +-
> >>>    .../drm/i915/gem/selftests/i915_gem_phys.c    |  2 +-
> >>>    .../gpu/drm/i915/gt/selftest_workarounds.c    |  2 +-
> >>>    drivers/gpu/drm/i915/gvt/cmd_parser.c         |  2 +-
> >>>    drivers/gpu/drm/i915/i915_gem.c               | 52 +++++++++++++++++--
> >>>    drivers/gpu/drm/i915/i915_gem.h               | 11 ++++
> >>>    drivers/gpu/drm/i915/selftests/i915_gem.c     | 41 +++++++++++++++
> >>>    drivers/gpu/drm/i915/selftests/i915_vma.c     |  2 +-
> >>>    .../drm/i915/selftests/intel_memory_region.c  |  2 +-
> >>>    24 files changed, 173 insertions(+), 42 deletions(-)
> >>>
> >>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> >>> index b1f82a11aef2..3740c0080e38 100644
> >>> --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> >>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> >>> @@ -122,6 +122,15 @@ struct drm_i915_gem_object {
> >>>         */
> >>>        struct list_head lut_list;
> >>>    
> >>> +     /**
> >>> +      * @obj_link: Link into @i915_gem_ww_ctx.obj_list
> >>> +      *
> >>> +      * When we lock this object through i915_gem_object_lock() with a
> >>> +      * context, we add it to the list to ensure we can unlock everything
> >>> +      * when i915_gem_ww_ctx_backoff() or i915_gem_ww_ctx_fini() are called.
> >>> +      */
> >>> +     struct list_head obj_link;
> >>> +
> >> Since we don't refcount objects on the list, (and we shouldn't need to),
> >> perhaps a debug warning if during object destruction, this isn't an
> >> empty list head?
> >>
> >> Other than that, this patch looks good to me.
> > Aside it from being in the wrong layer, as was also mentioned several
> > months ago.
> > -Chris
> 
> Could you send a pointer, or perhaps elaborate a bit?

We have been trying to extricate the GEM uAPI layer and objects from the
memory management of the backing store with a view to bypassing the
implicit rules imposed by GEM, and to remove the layering violation of
the HW layer calling back into the upper API layer.

For the moment the distinction is "should this be obj or obj->mm". Some
might have been arguing for obj->mm to become its backing store object,
[and had hoped ttm would have been usable for managing it] for we will
have i915_vma that do not [need to] refer to GEM objects. Aside from
first class user i915_vma, we also need to allocate PD as some sort of
highly restricted object (and often tiny, so suballocations).

And everything needs a severe diet.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 61+ messages in thread

* [Intel-gfx] [PATCH] drm/i915: Kill context before taking ctx->mutex
  2020-06-23 14:28 ` [Intel-gfx] [PATCH 26/26] drm/i915: Kill context before taking ctx->mutex Maarten Lankhorst
@ 2020-06-24 11:05   ` Maarten Lankhorst
  2020-06-30 14:16     ` Tvrtko Ursulin
  0 siblings, 1 reply; 61+ messages in thread
From: Maarten Lankhorst @ 2020-06-24 11:05 UTC (permalink / raw)
  To: intel-gfx

Killing context before taking ctx->mutex fixes a hang in
gem_ctx_persistence.close-replace-race, where lut_close
takes obj->resv.lock which is already held by execbuf,
causing a stalling indefinitely.

[ 1904.342847] 2 locks held by gem_ctx_persist/11520:
[ 1904.342849]  #0: ffff8882188e4968 (&ctx->mutex){+.+.}-{3:3}, at: context_close+0xe6/0x850 [i915]
[ 1904.342941]  #1: ffff88821c58a5a8 (reservation_ww_class_mutex){+.+.}-{3:3}, at: lut_close+0x2c2/0xba0 [i915]
[ 1904.343033] 3 locks held by gem_ctx_persist/11521:
[ 1904.343035]  #0: ffffc900008ff938 (reservation_ww_class_acquire){+.+.}-{0:0}, at: i915_gem_do_execbuffer+0x103d/0x54c0 [i915]
[ 1904.343157]  #1: ffff88821c58a5a8 (reservation_ww_class_mutex){+.+.}-{3:3}, at: eb_validate_vmas+0x602/0x2010 [i915]
[ 1904.343267]  #2: ffff88820afd9200 (&vm->mutex/1){+.+.}-{3:3}, at: i915_vma_pin_ww+0x335/0x2300 [i915]

Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c | 22 ++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index a3519d5ee5a3..6d25c9c2be1a 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -623,6 +623,17 @@ static void context_close(struct i915_gem_context *ctx)
 	i915_gem_context_set_closed(ctx);
 	mutex_unlock(&ctx->engines_mutex);
 
+	/*
+	 * If the user has disabled hangchecking, we can not be sure that
+	 * the batches will ever complete after the context is closed,
+	 * keeping the context and all resources pinned forever. So in this
+	 * case we opt to forcibly kill off all remaining requests on
+	 * context close.
+	 */
+	if (!i915_gem_context_is_persistent(ctx) ||
+	    !ctx->i915->params.enable_hangcheck)
+		kill_context(ctx);
+
 	mutex_lock(&ctx->mutex);
 
 	set_closed_name(ctx);
@@ -642,17 +653,6 @@ static void context_close(struct i915_gem_context *ctx)
 
 	mutex_unlock(&ctx->mutex);
 
-	/*
-	 * If the user has disabled hangchecking, we can not be sure that
-	 * the batches will ever complete after the context is closed,
-	 * keeping the context and all resources pinned forever. So in this
-	 * case we opt to forcibly kill off all remaining requests on
-	 * context close.
-	 */
-	if (!i915_gem_context_is_persistent(ctx) ||
-	    !ctx->i915->params.enable_hangcheck)
-		kill_context(ctx);
-
 	i915_gem_context_put(ctx);
 }
 

base-commit: 64cab0b9f9bfeb14d3ec2452d76b56915cdeb09f
prerequisite-patch-id: e6315738715ac4ffccaeb4c4bf5a94651fb8da1d
prerequisite-patch-id: 7944bb01d1ec7530513eabddb9198275653cc451
prerequisite-patch-id: 052eda3b40906f0fbc16b4cc33dbcdce35e05441
prerequisite-patch-id: 35ff18a74e8bf9bfb0a517f69a98d0ec88bd3b51
prerequisite-patch-id: 7a34e785e951b1d3f4c0e20430c8111a15ddbe92
prerequisite-patch-id: 9b7faf3172e9f218a2589fcc96930af9ab05e70b
prerequisite-patch-id: 3ce7c5b4508018631673e62d8725f866988bd08d
prerequisite-patch-id: 5fd46caff26e53f9cb6df5f8490838b6ac15e015
prerequisite-patch-id: 41782208b1bc32e448ce29313112030c74bd8421
prerequisite-patch-id: b6c4d99cb554c0c2268cde5c43e878a48e005e45
prerequisite-patch-id: 418fdb031a232bba4056171917ee42e997991902
prerequisite-patch-id: ff5bf0dcdb9191761392b0707481aaf99396dbec
prerequisite-patch-id: c3dbcef2f1a68f88ae99acbd01ee56847fb3e2da
prerequisite-patch-id: 18c373676c9bbeb1c11fb2ba5bf4ad728cfea75d
prerequisite-patch-id: 5b9d8e4535096365d365fdd1ec00f844a4135208
prerequisite-patch-id: 63bac64548acd514c4a0cb5acb896c8217fb8201
prerequisite-patch-id: e93b855dd97b24799c59f059cc548f46807ab207
prerequisite-patch-id: 3d7dc6ecbc2279fb48f0972a911fbffd8d899faa
prerequisite-patch-id: f1d9e0b7165f80efe984dd0231d1dbd2a9a79950
prerequisite-patch-id: ed1a168ac98b81b8066f68a0738cfc44a79e8ef1
prerequisite-patch-id: f813cb8d4c2fe2c1d94b66c3f3fbb787ac241628
prerequisite-patch-id: 0f0f90eaa4a2e299adddfe1c7134af3810a8e9e2
prerequisite-patch-id: cb7ffeccd6429fc79aebffb84f62af5e78252461
prerequisite-patch-id: 78905449b46ad574757a7fb91f58847ea20e09cd
prerequisite-patch-id: 6d937a49f3c8cd380121f72610072aaaf8c274b1
prerequisite-patch-id: 0c8d2dee1592395780258488be0350755e7ffd7d
-- 
2.27.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 61+ messages in thread

* Re: [Intel-gfx] [PATCH 01/26] Revert "drm/i915/gem: Async GPU relocations only"
  2020-06-23 15:23 ` [Intel-gfx] [PATCH 01/26] Revert "drm/i915/gem: Async GPU relocations only" Chris Wilson
@ 2020-06-24 11:19   ` Chris Wilson
  0 siblings, 0 replies; 61+ messages in thread
From: Chris Wilson @ 2020-06-24 11:19 UTC (permalink / raw)
  To: Maarten Lankhorst, intel-gfx

Quoting Chris Wilson (2020-06-23 16:23:31)
> Quoting Maarten Lankhorst (2020-06-23 15:28:18)
> > This reverts commit 9e0f9464e2ab36b864359a59b0e9058fdef0ce47,
> > and related commit 7ac2d2536dfa7 ("drm/i915/gem: Delete unused code").
> 
> Regardless that you haven't adapted the series...
> 
> This still prevents concurrent submission between clients, and does not
> remove the allocation mutexes. That latter we can do in a couple of
> patches that preserve the status quo with just a name switch and the
> forced removal of nestable shrinkers.

To be clear, I think the first patch has to be
https://patchwork.freedesktop.org/series/78772/ (provided the rebasing
went ok) to fully commit ourselves to removing obj->mm.mutex. Empirically,
we cannot use trylock inside the mmu_notifier as that often fails during
normal unmap.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 61+ messages in thread

* [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with [01/26] Revert "drm/i915/gem: Async GPU relocations only" (rev2)
  2020-06-23 14:28 [Intel-gfx] [PATCH 01/26] Revert "drm/i915/gem: Async GPU relocations only" Maarten Lankhorst
                   ` (26 preceding siblings ...)
  2020-06-23 15:39 ` [Intel-gfx] ✗ Fi.CI.BUILD: failure for series starting with [01/26] " Patchwork
@ 2020-06-24 11:58 ` Patchwork
  2020-06-24 11:59 ` [Intel-gfx] ✗ Fi.CI.SPARSE: " Patchwork
                   ` (2 subsequent siblings)
  30 siblings, 0 replies; 61+ messages in thread
From: Patchwork @ 2020-06-24 11:58 UTC (permalink / raw)
  To: Maarten Lankhorst; +Cc: intel-gfx

== Series Details ==

Series: series starting with [01/26] Revert "drm/i915/gem: Async GPU relocations only" (rev2)
URL   : https://patchwork.freedesktop.org/series/78744/
State : warning

== Summary ==

$ dim checkpatch origin/drm-tip
33892249d7d2 Revert "drm/i915/gem: Async GPU relocations only"
-:113: WARNING:MEMORY_BARRIER: memory barrier without comment
#113: FILE: drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c:1102:
+			mb();

-:161: WARNING:MEMORY_BARRIER: memory barrier without comment
#161: FILE: drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c:1150:
+			mb();

-:181: CHECK:SPACING: No space is necessary after a cast
#181: FILE: drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c:1170:
+		io_mapping_unmap_atomic((void __force __iomem *) unmask_page(cache->vaddr));

-:260: WARNING:MEMORY_BARRIER: memory barrier without comment
#260: FILE: drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c:1249:
+			mb();

-:274: CHECK:BRACES: Unbalanced braces around else statement
#274: FILE: drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c:1263:
+	} else

total: 0 errors, 3 warnings, 2 checks, 455 lines checked
cfe75ea198f9 drm/i915: Revert relocation chaining commits.
-:6: WARNING:COMMIT_LOG_LONG_LINE: Possible unwrapped commit description (prefer a maximum 75 chars per line)
#6: 
This reverts commit 964a9b0f611ee ("drm/i915/gem: Use chained reloc batches")

-:221: CHECK:SPACING: spaces preferred around that '/' (ctx:VxV)
#221: FILE: drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c:1306:
+	if (cache->rq_size > PAGE_SIZE/sizeof(u32) - (len + 1))
 	                              ^

total: 0 errors, 1 warnings, 1 checks, 281 lines checked
5fed26bab5d0 Revert "drm/i915/gem: Drop relocation slowpath".
-:80: WARNING:LINE_SPACING: Missing a blank line after declarations
#80: FILE: drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c:1700:
+		int err = __get_user(c, addr);
+		if (err)

total: 0 errors, 1 warnings, 0 checks, 267 lines checked
1de69bb5eb4b drm/i915: Add an implementation for i915_gem_ww_ctx locking, v2.
-:484: WARNING:LONG_LINE: line length of 103 exceeds 100 columns
#484: FILE: drivers/gpu/drm/i915/i915_gem.c:1359:
+	while ((obj = list_first_entry_or_null(&ww->obj_list, struct drm_i915_gem_object, obj_link))) {

total: 0 errors, 1 warnings, 0 checks, 465 lines checked
28e0198d22f8 drm/i915: Remove locking from i915_gem_object_prepare_read/write
d782b8ba0104 drm/i915: Parse command buffer earlier in eb_relocate(slow)
c4c86dc1e12b Revert "drm/i915/gem: Split eb_vma into its own allocation"
8394062c0fec drm/i915/gem: Make eb_add_lut interruptible wait on object lock.
61248b8339e2 drm/i915: Use per object locking in execbuf, v12.
-:473: CHECK:PARENTHESIS_ALIGNMENT: Alignment should match open parenthesis
#473: FILE: drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c:1404:
+static int __reloc_entry_gpu(struct i915_execbuffer *eb,
 			      struct i915_vma *vma,

-:493: CHECK:PARENTHESIS_ALIGNMENT: Alignment should match open parenthesis
#493: FILE: drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c:1477:
+static int reloc_entry_gpu(struct i915_execbuffer *eb,
 			    struct i915_vma *vma,

-:505: ERROR:TRAILING_WHITESPACE: trailing whitespace
#505: FILE: drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c:1502:
+^I$

-:782: CHECK:MULTIPLE_ASSIGNMENTS: multiple assignments should be avoided
#782: FILE: drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c:2873:
+	eb.reloc_pool = eb.batch_pool = NULL;

total: 1 errors, 0 warnings, 3 checks, 885 lines checked
213e5283007d drm/i915: Use ww locking in intel_renderstate.
-:10: WARNING:COMMIT_LOG_LONG_LINE: Possible unwrapped commit description (prefer a maximum 75 chars per line)
#10: 
Convert to using ww-waiting, and make sure we always pin intel_context_state,

total: 0 errors, 1 warnings, 0 checks, 190 lines checked
209fd4f2593e drm/i915: Add ww context handling to context_barrier_task
-:19: WARNING:LONG_LINE: line length of 109 exceeds 100 columns
#19: FILE: drivers/gpu/drm/i915/gem/i915_gem_context.c:1097:
+				int (*pin)(struct intel_context *ce, struct i915_gem_ww_ctx *ww, void *data),

total: 0 errors, 1 warnings, 0 checks, 146 lines checked
5e4739f5ae98 drm/i915: Nuke arguments to eb_pin_engine
47202c503661 drm/i915: Pin engine before pinning all objects, v4.
bf7884fb26e5 drm/i915: Rework intel_context pinning to do everything outside of pin_mutex
-:125: CHECK:LINE_SPACING: Please don't use multiple blank lines
#125: FILE: drivers/gpu/drm/i915/gt/intel_context.c:176:
+
+

-:338: CHECK:PARENTHESIS_ALIGNMENT: Alignment should match open parenthesis
#338: FILE: drivers/gpu/drm/i915/gt/intel_lrc.c:3453:
+	*vaddr = i915_gem_object_pin_map(ce->state->obj,
+					i915_coherent_map_type(ce->engine->i915) |

total: 0 errors, 0 warnings, 2 checks, 435 lines checked
b4e39defb35f drm/i915: Make sure execbuffer always passes ww state to i915_vma_pin.
-:95: CHECK:PARENTHESIS_ALIGNMENT: Alignment should match open parenthesis
#95: FILE: drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c:594:
+	err = i915_vma_pin_ww(vma, &eb->ww,
 			   entry->pad_to_size, entry->alignment,

-:204: WARNING:BLOCK_COMMENT_STYLE: Block comments use a trailing */ on a separate line
#204: FILE: drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c:2386:
+	 * hsw should have this fixed, but bdw mucks it up again. */

total: 0 errors, 1 warnings, 1 checks, 842 lines checked
f5d541f77361 drm/i915: Convert i915_gem_object/client_blt.c to use ww locking as well, v2.
fb75fcfbd9d4 drm/i915: Kill last user of intel_context_create_request outside of selftests
fe5408b9aa3f drm/i915: Convert i915_perf to ww locking as well
e69c9075e596 drm/i915: Dirty hack to fix selftests locking inversion
c5b10d3c7174 drm/i915/selftests: Fix locking inversion in lrc selftest.
49fb1cb8bfae drm/i915: Use ww pinning for intel_context_create_request()
756762ec32c7 drm/i915: Move i915_vma_lock in the selftests to avoid lock inversion, v2.
dde2896028bb drm/i915: Add ww locking to vm_fault_gtt
-:7: WARNING:COMMIT_MESSAGE: Missing commit description - Add an appropriate one

total: 0 errors, 1 warnings, 0 checks, 91 lines checked
501388d7472e drm/i915: Add ww locking to pin_to_display_plane
-:7: WARNING:COMMIT_MESSAGE: Missing commit description - Add an appropriate one

total: 0 errors, 1 warnings, 0 checks, 129 lines checked
1d2a412696af drm/i915: Ensure we hold the pin mutex
-:7: WARNING:COMMIT_MESSAGE: Missing commit description - Add an appropriate one

total: 0 errors, 1 warnings, 0 checks, 39 lines checked
076ef058d638 drm/i915: Kill context before taking ctx->mutex

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 61+ messages in thread

* [Intel-gfx] ✗ Fi.CI.SPARSE: warning for series starting with [01/26] Revert "drm/i915/gem: Async GPU relocations only" (rev2)
  2020-06-23 14:28 [Intel-gfx] [PATCH 01/26] Revert "drm/i915/gem: Async GPU relocations only" Maarten Lankhorst
                   ` (27 preceding siblings ...)
  2020-06-24 11:58 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with [01/26] Revert "drm/i915/gem: Async GPU relocations only" (rev2) Patchwork
@ 2020-06-24 11:59 ` Patchwork
  2020-06-24 12:48 ` [Intel-gfx] ✗ Fi.CI.BAT: failure " Patchwork
  2020-07-01 13:10 ` [Intel-gfx] ✗ Fi.CI.IGT: " Patchwork
  30 siblings, 0 replies; 61+ messages in thread
From: Patchwork @ 2020-06-24 11:59 UTC (permalink / raw)
  To: Maarten Lankhorst; +Cc: intel-gfx

== Series Details ==

Series: series starting with [01/26] Revert "drm/i915/gem: Async GPU relocations only" (rev2)
URL   : https://patchwork.freedesktop.org/series/78744/
State : warning

== Summary ==

$ dim sparse --fast origin/drm-tip
Sparse version: v0.6.0
Fast mode used, each commit won't be checked separately.
+drivers/gpu/drm/i915/display/intel_display.c:1222:22: error: Expected constant expression in case statement
+drivers/gpu/drm/i915/display/intel_display.c:1225:22: error: Expected constant expression in case statement
+drivers/gpu/drm/i915/display/intel_display.c:1228:22: error: Expected constant expression in case statement
+drivers/gpu/drm/i915/display/intel_display.c:1231:22: error: Expected constant expression in case statement
+drivers/gpu/drm/i915/gt/intel_lrc.c:2785:17: error: too long token expansion
+drivers/gpu/drm/i915/gt/intel_lrc.c:2785:17: error: too long token expansion
+drivers/gpu/drm/i915/gt/intel_reset.c:1310:5: warning: context imbalance in 'intel_gt_reset_trylock' - different lock contexts for basic block
+drivers/gpu/drm/i915/gt/sysfs_engines.c:61:10: error: bad integer constant expression
+drivers/gpu/drm/i915/gt/sysfs_engines.c:62:10: error: bad integer constant expression
+drivers/gpu/drm/i915/gt/sysfs_engines.c:66:10: error: bad integer constant expression
+drivers/gpu/drm/i915/gvt/mmio.c:287:23: warning: memcpy with byte count of 279040
+drivers/gpu/drm/i915/i915_perf.c:1440:15: warning: memset with byte count of 16777216
+drivers/gpu/drm/i915/i915_perf.c:1494:15: warning: memset with byte count of 16777216
+drivers/gpu/drm/i915/selftests/i915_syncmap.c:80:54: warning: dubious: x | !y
+./include/linux/spinlock.h:408:9: warning: context imbalance in 'fwtable_read16' - different lock contexts for basic block
+./include/linux/spinlock.h:408:9: warning: context imbalance in 'fwtable_read32' - different lock contexts for basic block
+./include/linux/spinlock.h:408:9: warning: context imbalance in 'fwtable_read64' - different lock contexts for basic block
+./include/linux/spinlock.h:408:9: warning: context imbalance in 'fwtable_read8' - different lock contexts for basic block
+./include/linux/spinlock.h:408:9: warning: context imbalance in 'fwtable_write16' - different lock contexts for basic block
+./include/linux/spinlock.h:408:9: warning: context imbalance in 'fwtable_write32' - different lock contexts for basic block
+./include/linux/spinlock.h:408:9: warning: context imbalance in 'fwtable_write8' - different lock contexts for basic block
+./include/linux/spinlock.h:408:9: warning: context imbalance in 'gen11_fwtable_read16' - different lock contexts for basic block
+./include/linux/spinlock.h:408:9: warning: context imbalance in 'gen11_fwtable_read32' - different lock contexts for basic block
+./include/linux/spinlock.h:408:9: warning: context imbalance in 'gen11_fwtable_read64' - different lock contexts for basic block
+./include/linux/spinlock.h:408:9: warning: context imbalance in 'gen11_fwtable_read8' - different lock contexts for basic block
+./include/linux/spinlock.h:408:9: warning: context imbalance in 'gen11_fwtable_write16' - different lock contexts for basic block
+./include/linux/spinlock.h:408:9: warning: context imbalance in 'gen11_fwtable_write32' - different lock contexts for basic block
+./include/linux/spinlock.h:408:9: warning: context imbalance in 'gen11_fwtable_write8' - different lock contexts for basic block
+./include/linux/spinlock.h:408:9: warning: context imbalance in 'gen12_fwtable_read16' - different lock contexts for basic block
+./include/linux/spinlock.h:408:9: warning: context imbalance in 'gen12_fwtable_read32' - different lock contexts for basic block
+./include/linux/spinlock.h:408:9: warning: context imbalance in 'gen12_fwtable_read64' - different lock contexts for basic block
+./include/linux/spinlock.h:408:9: warning: context imbalance in 'gen12_fwtable_read8' - different lock contexts for basic block
+./include/linux/spinlock.h:408:9: warning: context imbalance in 'gen12_fwtable_write16' - different lock contexts for basic block
+./include/linux/spinlock.h:408:9: warning: context imbalance in 'gen12_fwtable_write32' - different lock contexts for basic block
+./include/linux/spinlock.h:408:9: warning: context imbalance in 'gen12_fwtable_write8' - different lock contexts for basic block
+./include/linux/spinlock.h:408:9: warning: context imbalance in 'gen6_read16' - different lock contexts for basic block
+./include/linux/spinlock.h:408:9: warning: context imbalance in 'gen6_read32' - different lock contexts for basic block
+./include/linux/spinlock.h:408:9: warning: context imbalance in 'gen6_read64' - different lock contexts for basic block
+./include/linux/spinlock.h:408:9: warning: context imbalance in 'gen6_read8' - different lock contexts for basic block
+./include/linux/spinlock.h:408:9: warning: context imbalance in 'gen6_write16' - different lock contexts for basic block
+./include/linux/spinlock.h:408:9: warning: context imbalance in 'gen6_write32' - different lock contexts for basic block
+./include/linux/spinlock.h:408:9: warning: context imbalance in 'gen6_write8' - different lock contexts for basic block
+./include/linux/spinlock.h:408:9: warning: context imbalance in 'gen8_write16' - different lock contexts for basic block
+./include/linux/spinlock.h:408:9: warning: context imbalance in 'gen8_write32' - different lock contexts for basic block
+./include/linux/spinlock.h:408:9: warning: context imbalance in 'gen8_write8' - different lock contexts for basic block

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 61+ messages in thread

* [Intel-gfx] ✗ Fi.CI.BAT: failure for series starting with [01/26] Revert "drm/i915/gem: Async GPU relocations only" (rev2)
  2020-06-23 14:28 [Intel-gfx] [PATCH 01/26] Revert "drm/i915/gem: Async GPU relocations only" Maarten Lankhorst
                   ` (28 preceding siblings ...)
  2020-06-24 11:59 ` [Intel-gfx] ✗ Fi.CI.SPARSE: " Patchwork
@ 2020-06-24 12:48 ` Patchwork
  2020-07-01 13:10 ` [Intel-gfx] ✗ Fi.CI.IGT: " Patchwork
  30 siblings, 0 replies; 61+ messages in thread
From: Patchwork @ 2020-06-24 12:48 UTC (permalink / raw)
  To: Maarten Lankhorst; +Cc: intel-gfx

== Series Details ==

Series: series starting with [01/26] Revert "drm/i915/gem: Async GPU relocations only" (rev2)
URL   : https://patchwork.freedesktop.org/series/78744/
State : failure

== Summary ==

CI Bug Log - changes from CI_DRM_8661 -> Patchwork_18018
====================================================

Summary
-------

  **FAILURE**

  Serious unknown changes coming with Patchwork_18018 absolutely need to be
  verified manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_18018, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  External URL: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/index.html

Possible new issues
-------------------

  Here are the unknown changes that may have been introduced in Patchwork_18018:

### IGT changes ###

#### Possible regressions ####

  * igt@i915_selftest@live@gem_contexts:
    - fi-cfl-8109u:       [PASS][1] -> [DMESG-WARN][2]
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8661/fi-cfl-8109u/igt@i915_selftest@live@gem_contexts.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/fi-cfl-8109u/igt@i915_selftest@live@gem_contexts.html

  * igt@i915_selftest@live@gem_execbuf:
    - fi-skl-6600u:       [PASS][3] -> [INCOMPLETE][4]
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8661/fi-skl-6600u/igt@i915_selftest@live@gem_execbuf.html
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/fi-skl-6600u/igt@i915_selftest@live@gem_execbuf.html
    - fi-kbl-8809g:       [PASS][5] -> [INCOMPLETE][6]
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8661/fi-kbl-8809g/igt@i915_selftest@live@gem_execbuf.html
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/fi-kbl-8809g/igt@i915_selftest@live@gem_execbuf.html
    - fi-kbl-x1275:       [PASS][7] -> [INCOMPLETE][8]
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8661/fi-kbl-x1275/igt@i915_selftest@live@gem_execbuf.html
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/fi-kbl-x1275/igt@i915_selftest@live@gem_execbuf.html
    - fi-cfl-guc:         [PASS][9] -> [INCOMPLETE][10]
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8661/fi-cfl-guc/igt@i915_selftest@live@gem_execbuf.html
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/fi-cfl-guc/igt@i915_selftest@live@gem_execbuf.html
    - fi-kbl-soraka:      [PASS][11] -> [INCOMPLETE][12]
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8661/fi-kbl-soraka/igt@i915_selftest@live@gem_execbuf.html
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/fi-kbl-soraka/igt@i915_selftest@live@gem_execbuf.html
    - fi-snb-2600:        [PASS][13] -> [DMESG-WARN][14]
   [13]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8661/fi-snb-2600/igt@i915_selftest@live@gem_execbuf.html
   [14]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/fi-snb-2600/igt@i915_selftest@live@gem_execbuf.html
    - fi-bsw-n3050:       [PASS][15] -> [INCOMPLETE][16]
   [15]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8661/fi-bsw-n3050/igt@i915_selftest@live@gem_execbuf.html
   [16]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/fi-bsw-n3050/igt@i915_selftest@live@gem_execbuf.html
    - fi-tgl-u2:          [PASS][17] -> [INCOMPLETE][18]
   [17]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8661/fi-tgl-u2/igt@i915_selftest@live@gem_execbuf.html
   [18]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/fi-tgl-u2/igt@i915_selftest@live@gem_execbuf.html
    - fi-cml-u2:          [PASS][19] -> [INCOMPLETE][20]
   [19]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8661/fi-cml-u2/igt@i915_selftest@live@gem_execbuf.html
   [20]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/fi-cml-u2/igt@i915_selftest@live@gem_execbuf.html
    - fi-cfl-8700k:       [PASS][21] -> [INCOMPLETE][22]
   [21]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8661/fi-cfl-8700k/igt@i915_selftest@live@gem_execbuf.html
   [22]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/fi-cfl-8700k/igt@i915_selftest@live@gem_execbuf.html
    - fi-hsw-4770:        [PASS][23] -> [INCOMPLETE][24]
   [23]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8661/fi-hsw-4770/igt@i915_selftest@live@gem_execbuf.html
   [24]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/fi-hsw-4770/igt@i915_selftest@live@gem_execbuf.html
    - fi-icl-u2:          [PASS][25] -> [INCOMPLETE][26]
   [25]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8661/fi-icl-u2/igt@i915_selftest@live@gem_execbuf.html
   [26]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/fi-icl-u2/igt@i915_selftest@live@gem_execbuf.html
    - fi-snb-2520m:       [PASS][27] -> [INCOMPLETE][28]
   [27]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8661/fi-snb-2520m/igt@i915_selftest@live@gem_execbuf.html
   [28]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/fi-snb-2520m/igt@i915_selftest@live@gem_execbuf.html

  * igt@runner@aborted:
    - fi-snb-2520m:       NOTRUN -> [FAIL][29]
   [29]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/fi-snb-2520m/igt@runner@aborted.html
    - fi-snb-2600:        NOTRUN -> [FAIL][30]
   [30]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/fi-snb-2600/igt@runner@aborted.html
    - fi-elk-e7500:       NOTRUN -> [FAIL][31]
   [31]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/fi-elk-e7500/igt@runner@aborted.html

  
#### Suppressed ####

  The following results come from untrusted machines, tests, or statuses.
  They do not affect the overall result.

  * igt@i915_selftest@live@gem_contexts:
    - {fi-kbl-7560u}:     [PASS][32] -> [DMESG-WARN][33]
   [32]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8661/fi-kbl-7560u/igt@i915_selftest@live@gem_contexts.html
   [33]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/fi-kbl-7560u/igt@i915_selftest@live@gem_contexts.html

  * igt@i915_selftest@live@gem_execbuf:
    - {fi-tgl-dsi}:       [PASS][34] -> [INCOMPLETE][35]
   [34]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8661/fi-tgl-dsi/igt@i915_selftest@live@gem_execbuf.html
   [35]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/fi-tgl-dsi/igt@i915_selftest@live@gem_execbuf.html

  
Known issues
------------

  Here are the changes found in Patchwork_18018 that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@i915_pm_rpm@basic-pci-d3-state:
    - fi-bsw-kefka:       [PASS][36] -> [DMESG-WARN][37] ([i915#1982])
   [36]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8661/fi-bsw-kefka/igt@i915_pm_rpm@basic-pci-d3-state.html
   [37]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/fi-bsw-kefka/igt@i915_pm_rpm@basic-pci-d3-state.html

  * igt@i915_selftest@live@blt:
    - fi-snb-2600:        [PASS][38] -> [INCOMPLETE][39] ([i915#82])
   [38]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8661/fi-snb-2600/igt@i915_selftest@live@blt.html
   [39]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/fi-snb-2600/igt@i915_selftest@live@blt.html

  * igt@i915_selftest@live@gem_execbuf:
    - fi-elk-e7500:       [PASS][40] -> [INCOMPLETE][41] ([i915#66])
   [40]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8661/fi-elk-e7500/igt@i915_selftest@live@gem_execbuf.html
   [41]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/fi-elk-e7500/igt@i915_selftest@live@gem_execbuf.html

  * igt@kms_pipe_crc_basic@read-crc-pipe-a-frame-sequence:
    - fi-tgl-u2:          [PASS][42] -> [DMESG-WARN][43] ([i915#402])
   [42]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8661/fi-tgl-u2/igt@kms_pipe_crc_basic@read-crc-pipe-a-frame-sequence.html
   [43]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/fi-tgl-u2/igt@kms_pipe_crc_basic@read-crc-pipe-a-frame-sequence.html

  
#### Possible fixes ####

  * igt@gem_exec_suspend@basic-s3:
    - fi-tgl-u2:          [FAIL][44] ([i915#1888]) -> [PASS][45]
   [44]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8661/fi-tgl-u2/igt@gem_exec_suspend@basic-s3.html
   [45]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/fi-tgl-u2/igt@gem_exec_suspend@basic-s3.html

  * igt@i915_module_load@reload:
    - {fi-tgl-dsi}:       [DMESG-WARN][46] ([i915#1982]) -> [PASS][47] +2 similar issues
   [46]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8661/fi-tgl-dsi/igt@i915_module_load@reload.html
   [47]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/fi-tgl-dsi/igt@i915_module_load@reload.html

  * igt@kms_cursor_legacy@basic-flip-after-cursor-legacy:
    - fi-icl-u2:          [DMESG-WARN][48] ([i915#1982]) -> [PASS][49]
   [48]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8661/fi-icl-u2/igt@kms_cursor_legacy@basic-flip-after-cursor-legacy.html
   [49]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/fi-icl-u2/igt@kms_cursor_legacy@basic-flip-after-cursor-legacy.html

  
#### Warnings ####

  * igt@kms_pipe_crc_basic@read-crc-pipe-b:
    - fi-kbl-x1275:       [DMESG-WARN][50] ([i915#62] / [i915#92] / [i915#95]) -> [DMESG-WARN][51] ([i915#62] / [i915#92])
   [50]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8661/fi-kbl-x1275/igt@kms_pipe_crc_basic@read-crc-pipe-b.html
   [51]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/fi-kbl-x1275/igt@kms_pipe_crc_basic@read-crc-pipe-b.html

  * igt@prime_vgem@basic-fence-flip:
    - fi-kbl-x1275:       [DMESG-WARN][52] ([i915#62] / [i915#92]) -> [DMESG-WARN][53] ([i915#62] / [i915#92] / [i915#95]) +7 similar issues
   [52]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8661/fi-kbl-x1275/igt@prime_vgem@basic-fence-flip.html
   [53]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/fi-kbl-x1275/igt@prime_vgem@basic-fence-flip.html

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  [i915#1888]: https://gitlab.freedesktop.org/drm/intel/issues/1888
  [i915#1982]: https://gitlab.freedesktop.org/drm/intel/issues/1982
  [i915#402]: https://gitlab.freedesktop.org/drm/intel/issues/402
  [i915#62]: https://gitlab.freedesktop.org/drm/intel/issues/62
  [i915#66]: https://gitlab.freedesktop.org/drm/intel/issues/66
  [i915#82]: https://gitlab.freedesktop.org/drm/intel/issues/82
  [i915#92]: https://gitlab.freedesktop.org/drm/intel/issues/92
  [i915#95]: https://gitlab.freedesktop.org/drm/intel/issues/95


Participating hosts (44 -> 38)
------------------------------

  Missing    (6): fi-ilk-m540 fi-hsw-4200u fi-byt-squawks fi-bsw-cyan fi-byt-clapper fi-bdw-samus 


Build changes
-------------

  * Linux: CI_DRM_8661 -> Patchwork_18018

  CI-20190529: 20190529
  CI_DRM_8661: 64cab0b9f9bfeb14d3ec2452d76b56915cdeb09f @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_5717: 725bf2dae51f0087eaa64f1931a2ef9d22f070dd @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_18018: 076ef058d63845db621e8d9149589dcb390e37e5 @ git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

076ef058d638 drm/i915: Kill context before taking ctx->mutex
1d2a412696af drm/i915: Ensure we hold the pin mutex
501388d7472e drm/i915: Add ww locking to pin_to_display_plane
dde2896028bb drm/i915: Add ww locking to vm_fault_gtt
756762ec32c7 drm/i915: Move i915_vma_lock in the selftests to avoid lock inversion, v2.
49fb1cb8bfae drm/i915: Use ww pinning for intel_context_create_request()
c5b10d3c7174 drm/i915/selftests: Fix locking inversion in lrc selftest.
e69c9075e596 drm/i915: Dirty hack to fix selftests locking inversion
fe5408b9aa3f drm/i915: Convert i915_perf to ww locking as well
fb75fcfbd9d4 drm/i915: Kill last user of intel_context_create_request outside of selftests
f5d541f77361 drm/i915: Convert i915_gem_object/client_blt.c to use ww locking as well, v2.
b4e39defb35f drm/i915: Make sure execbuffer always passes ww state to i915_vma_pin.
bf7884fb26e5 drm/i915: Rework intel_context pinning to do everything outside of pin_mutex
47202c503661 drm/i915: Pin engine before pinning all objects, v4.
5e4739f5ae98 drm/i915: Nuke arguments to eb_pin_engine
209fd4f2593e drm/i915: Add ww context handling to context_barrier_task
213e5283007d drm/i915: Use ww locking in intel_renderstate.
61248b8339e2 drm/i915: Use per object locking in execbuf, v12.
8394062c0fec drm/i915/gem: Make eb_add_lut interruptible wait on object lock.
c4c86dc1e12b Revert "drm/i915/gem: Split eb_vma into its own allocation"
d782b8ba0104 drm/i915: Parse command buffer earlier in eb_relocate(slow)
28e0198d22f8 drm/i915: Remove locking from i915_gem_object_prepare_read/write
1de69bb5eb4b drm/i915: Add an implementation for i915_gem_ww_ctx locking, v2.
5fed26bab5d0 Revert "drm/i915/gem: Drop relocation slowpath".
cfe75ea198f9 drm/i915: Revert relocation chaining commits.
33892249d7d2 Revert "drm/i915/gem: Async GPU relocations only"

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/index.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 61+ messages in thread

* Re: [Intel-gfx] [PATCH 15/26] drm/i915: Make sure execbuffer always passes ww state to i915_vma_pin.
  2020-06-23 14:28 ` [Intel-gfx] [PATCH 15/26] drm/i915: Make sure execbuffer always passes ww state to i915_vma_pin Maarten Lankhorst
@ 2020-06-25 14:32   ` Thomas Hellström (Intel)
  0 siblings, 0 replies; 61+ messages in thread
From: Thomas Hellström (Intel) @ 2020-06-25 14:32 UTC (permalink / raw)
  To: intel-gfx

Hi, Maarten,

On 6/23/20 4:28 PM, Maarten Lankhorst wrote:
> As a preparation step for full object locking and wait/wound handling
> during pin and object mapping, ensure that we always pass the ww context
> in i915_gem_execbuffer.c to i915_vma_pin, use lockdep to ensure this
> happens.
>
> This also requires changing the order of eb_parse slightly, to ensure
> we pass ww at a point where we could still handle -EDEADLK safely.
>
> Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
> ---
>   drivers/gpu/drm/i915/display/intel_display.c  |   2 +-
>   drivers/gpu/drm/i915/gem/i915_gem_context.c   |   4 +-
>   .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 138 ++++++++++--------
>   drivers/gpu/drm/i915/gt/gen6_ppgtt.c          |   4 +-
>   drivers/gpu/drm/i915/gt/gen6_ppgtt.h          |   4 +-
>   drivers/gpu/drm/i915/gt/intel_context.c       |  65 ++++++---
>   drivers/gpu/drm/i915/gt/intel_context.h       |  13 ++
>   drivers/gpu/drm/i915/gt/intel_context_types.h |   3 +-
>   drivers/gpu/drm/i915/gt/intel_engine_cs.c     |   2 +-
>   drivers/gpu/drm/i915/gt/intel_gt.c            |   2 +-
>   drivers/gpu/drm/i915/gt/intel_lrc.c           |   5 +-
>   drivers/gpu/drm/i915/gt/intel_renderstate.c   |   2 +-
>   drivers/gpu/drm/i915/gt/intel_ring.c          |  10 +-
>   drivers/gpu/drm/i915/gt/intel_ring.h          |   3 +-
>   .../gpu/drm/i915/gt/intel_ring_submission.c   |  15 +-
>   drivers/gpu/drm/i915/gt/intel_timeline.c      |  12 +-
>   drivers/gpu/drm/i915/gt/intel_timeline.h      |   3 +-
>   drivers/gpu/drm/i915/gt/mock_engine.c         |   3 +-
>   drivers/gpu/drm/i915/gt/selftest_lrc.c        |   2 +-
>   drivers/gpu/drm/i915/gt/selftest_timeline.c   |   4 +-
>   drivers/gpu/drm/i915/gt/uc/intel_guc.c        |   2 +-
>   drivers/gpu/drm/i915/i915_drv.h               |  13 +-
>   drivers/gpu/drm/i915/i915_gem.c               |  11 +-
>   drivers/gpu/drm/i915/i915_vma.c               |  13 +-
>   drivers/gpu/drm/i915/i915_vma.h               |  13 +-
>   25 files changed, 214 insertions(+), 134 deletions(-)
>
 From a quick bisect, it appears this patch is what's causing the 
contention in execbuf,
./gem_exec_alignment --run-subtest pi-isolated

/Thomas


_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 61+ messages in thread

* Re: [Intel-gfx] [PATCH 05/26] drm/i915: Remove locking from i915_gem_object_prepare_read/write
  2020-06-23 14:28 ` [Intel-gfx] [PATCH 05/26] drm/i915: Remove locking from i915_gem_object_prepare_read/write Maarten Lankhorst
@ 2020-06-26 13:32   ` Thomas Hellström (Intel)
  2020-06-29 12:56   ` Tvrtko Ursulin
  1 sibling, 0 replies; 61+ messages in thread
From: Thomas Hellström (Intel) @ 2020-06-26 13:32 UTC (permalink / raw)
  To: Maarten Lankhorst, intel-gfx


On 6/23/20 4:28 PM, Maarten Lankhorst wrote:
> Execbuffer submission will perform its own WW locking, and we
> cannot rely on the implicit lock there.
>
> This also makes it clear that the GVT code will get a lockdep splat when
> multiple batchbuffer shadows need to be performed in the same instance,
> fix that up.
>
> Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
> ---
>   drivers/gpu/drm/i915/gem/i915_gem_domain.c    | 20 ++++++-------------
>   .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 13 ++++++++++--
>   drivers/gpu/drm/i915/gem/i915_gem_object.h    |  1 -
>   .../gpu/drm/i915/gem/selftests/huge_pages.c   |  5 ++++-
>   .../i915/gem/selftests/i915_gem_coherency.c   | 14 +++++++++----
>   .../drm/i915/gem/selftests/i915_gem_context.c | 12 ++++++++---
>   drivers/gpu/drm/i915/gvt/cmd_parser.c         |  1 +
>   drivers/gpu/drm/i915/i915_gem.c               | 20 +++++++++++++++++--
>   8 files changed, 59 insertions(+), 27 deletions(-)
>
ltgm. Reviewed-by: Thomas Hellström <thomas.hellstrom@intel.com>


_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 61+ messages in thread

* Re: [Intel-gfx] [PATCH 08/26] drm/i915/gem: Make eb_add_lut interruptible wait on object lock.
  2020-06-23 14:28 ` [Intel-gfx] [PATCH 08/26] drm/i915/gem: Make eb_add_lut interruptible wait on object lock Maarten Lankhorst
@ 2020-06-26 13:52   ` Thomas Hellström (Intel)
  2020-06-29 15:14   ` Tvrtko Ursulin
  1 sibling, 0 replies; 61+ messages in thread
From: Thomas Hellström (Intel) @ 2020-06-26 13:52 UTC (permalink / raw)
  To: Maarten Lankhorst, intel-gfx


On 6/23/20 4:28 PM, Maarten Lankhorst wrote:
> The lock here should be interruptible, so we can backoff if needed.
>
> Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
> ---
>   drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 8 +++++++-
>   1 file changed, 7 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> index 2636a130fb57..aa441af81431 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> @@ -774,7 +774,12 @@ static int __eb_add_lut(struct i915_execbuffer *eb,
>   		if (err == 0) { /* And nor has this handle */
>   			struct drm_i915_gem_object *obj = vma->obj;
>   
> -			i915_gem_object_lock(obj, NULL);
> +			err = i915_gem_object_lock_interruptible(obj, NULL);
> +			if (err) {
> +				radix_tree_delete(&ctx->handles_vma, handle);
> +				goto unlock;
> +			}
> +
>   			if (idr_find(&eb->file->object_idr, handle) == obj) {
>   				list_add(&lut->obj_link, &obj->lut_list);
>   			} else {
> @@ -783,6 +788,7 @@ static int __eb_add_lut(struct i915_execbuffer *eb,
>   			}
>   			i915_gem_object_unlock(obj);
>   		}
> +unlock:
>   		mutex_unlock(&ctx->mutex);
>   	}
>   	if (unlikely(err))
Reviewed-by: Thomas Hellström <thomas.hellstrom@intel.com>
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 61+ messages in thread

* Re: [Intel-gfx] [PATCH 06/26] drm/i915: Parse command buffer earlier in eb_relocate(slow)
  2020-06-23 14:28 ` [Intel-gfx] [PATCH 06/26] drm/i915: Parse command buffer earlier in eb_relocate(slow) Maarten Lankhorst
@ 2020-06-26 14:41   ` Thomas Hellström (Intel)
  2020-06-29 10:40     ` Maarten Lankhorst
  2020-06-29 14:42   ` Tvrtko Ursulin
  1 sibling, 1 reply; 61+ messages in thread
From: Thomas Hellström (Intel) @ 2020-06-26 14:41 UTC (permalink / raw)
  To: Maarten Lankhorst, intel-gfx


On 6/23/20 4:28 PM, Maarten Lankhorst wrote:
> We want to introduce backoff logic, but we need to lock the
> pool object as well for command parsing. Because of this, we
> will need backoff logic for the engine pool obj, move the batch
> validation up slightly to eb_lookup_vmas, and the actual command
> parsing in a separate function which can get called from execbuf
> relocation fast and slowpath.
>
> Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
> ---
>   .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 66 ++++++++++---------
>   1 file changed, 36 insertions(+), 30 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> index f896b1a4b38a..7cb44915cfc7 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> @@ -290,6 +290,8 @@ struct i915_execbuffer {
>   	struct eb_vma_array *array;
>   };
>   
> +static int eb_parse(struct i915_execbuffer *eb);
> +
>   static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb)
>   {
>   	return intel_engine_requires_cmd_parser(eb->engine) ||
> @@ -873,6 +875,7 @@ static struct i915_vma *eb_lookup_vma(struct i915_execbuffer *eb, u32 handle)
>   
>   static int eb_lookup_vmas(struct i915_execbuffer *eb)
>   {
> +	struct drm_i915_private *i915 = eb->i915;
>   	unsigned int batch = eb_batch_index(eb);
>   	unsigned int i;
>   	int err = 0;
> @@ -886,18 +889,37 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
>   		vma = eb_lookup_vma(eb, eb->exec[i].handle);
>   		if (IS_ERR(vma)) {
>   			err = PTR_ERR(vma);
> -			break;
> +			goto err;
>   		}
>   
>   		err = eb_validate_vma(eb, &eb->exec[i], vma);
>   		if (unlikely(err)) {
>   			i915_vma_put(vma);
> -			break;
> +			goto err;
>   		}
>   
>   		eb_add_vma(eb, i, batch, vma);
>   	}
>   
> +	if (unlikely(eb->batch->flags & EXEC_OBJECT_WRITE)) {
> +		drm_dbg(&i915->drm,
> +			"Attempting to use self-modifying batch buffer\n");
> +		return -EINVAL;
> +	}
> +
> +	if (range_overflows_t(u64,
> +			      eb->batch_start_offset, eb->batch_len,
> +			      eb->batch->vma->size)) {
> +		drm_dbg(&i915->drm, "Attempting to use out-of-bounds batch\n");
> +		return -EINVAL;
> +	}
> +
> +	if (eb->batch_len == 0)
> +		eb->batch_len = eb->batch->vma->size - eb->batch_start_offset;
> +
> +	return 0;
> +
> +err:
>   	eb->vma[i].vma = NULL;
>   	return err;
>   }
> @@ -1809,7 +1831,7 @@ static int eb_prefault_relocations(const struct i915_execbuffer *eb)
>   	return 0;
>   }
>   
> -static noinline int eb_relocate_slow(struct i915_execbuffer *eb)
> +static noinline int eb_relocate_parse_slow(struct i915_execbuffer *eb)
>   {
>   	bool have_copy = false;
>   	struct eb_vma *ev;
> @@ -1872,6 +1894,11 @@ static noinline int eb_relocate_slow(struct i915_execbuffer *eb)
>   	if (err)
>   		goto err;
>   
> +	/* as last step, parse the command buffer */
> +	err = eb_parse(eb);
> +	if (err)
> +		goto err;
> +
>   	/*
>   	 * Leave the user relocations as are, this is the painfully slow path,
>   	 * and we want to avoid the complication of dropping the lock whilst
> @@ -1904,7 +1931,7 @@ static noinline int eb_relocate_slow(struct i915_execbuffer *eb)
>   	return err;
>   }
>   
> -static int eb_relocate(struct i915_execbuffer *eb)
> +static int eb_relocate_parse(struct i915_execbuffer *eb)
>   {
>   	int err;
>   
> @@ -1932,7 +1959,7 @@ static int eb_relocate(struct i915_execbuffer *eb)
>   			return eb_relocate_slow(eb);
>   	}
>   
> -	return 0;
> +	return eb_parse(eb);
>   }
>   
>   static int eb_move_to_gpu(struct i915_execbuffer *eb)
> @@ -2870,7 +2897,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
>   	if (unlikely(err))
>   		goto err_context;
>   
> -	err = eb_relocate(&eb);
> +	err = eb_relocate_parse(&eb);
>   	if (err) {
>   		/*
>   		 * If the user expects the execobject.offset and
> @@ -2883,33 +2910,10 @@ i915_gem_do_execbuffer(struct drm_device *dev,
>   		goto err_vma;
>   	}
>   
> -	if (unlikely(eb.batch->flags & EXEC_OBJECT_WRITE)) {
> -		drm_dbg(&i915->drm,
> -			"Attempting to use self-modifying batch buffer\n");
> -		err = -EINVAL;
> -		goto err_vma;
> -	}
> -
> -	if (range_overflows_t(u64,
> -			      eb.batch_start_offset, eb.batch_len,
> -			      eb.batch->vma->size)) {
> -		drm_dbg(&i915->drm, "Attempting to use out-of-bounds batch\n");
> -		err = -EINVAL;
> -		goto err_vma;
> -	}
> -
> -	if (eb.batch_len == 0)
> -		eb.batch_len = eb.batch->vma->size - eb.batch_start_offset;
> -
> -	err = eb_parse(&eb);
> -	if (err)
> -		goto err_vma;
> -
>   	/*
>   	 * snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
>   	 * batch" bit. Hence we need to pin secure batches into the global gtt.
>   	 * hsw should have this fixed, but bdw mucks it up again. */
> -	batch = eb.batch->vma;
>   	if (eb.batch_flags & I915_DISPATCH_SECURE) {
>   		struct i915_vma *vma;
>   
> @@ -2923,13 +2927,15 @@ i915_gem_do_execbuffer(struct drm_device *dev,
>   		 *   fitting due to fragmentation.
>   		 * So this is actually safe.
>   		 */
> -		vma = i915_gem_object_ggtt_pin(batch->obj, NULL, 0, 0, 0);
> +		vma = i915_gem_object_ggtt_pin(eb.batch->vma->obj, NULL, 0, 0, 0);
>   		if (IS_ERR(vma)) {
>   			err = PTR_ERR(vma);
>   			goto err_parse;
>   		}
>   
>   		batch = vma;
> +	} else {
> +		batch = eb.batch->vma;
>   	}
>   

Hmm, it's late friday afternoon so that might be the cause, but I fail 
to see what the above hunk is trying to achieve?


>   	/* All GPU relocation batches must be submitted prior to the user rq */
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 61+ messages in thread

* Re: [Intel-gfx] [PATCH 06/26] drm/i915: Parse command buffer earlier in eb_relocate(slow)
  2020-06-26 14:41   ` Thomas Hellström (Intel)
@ 2020-06-29 10:40     ` Maarten Lankhorst
  2020-06-29 11:15       ` Thomas Hellström (Intel)
  0 siblings, 1 reply; 61+ messages in thread
From: Maarten Lankhorst @ 2020-06-29 10:40 UTC (permalink / raw)
  To: Thomas Hellström (Intel), intel-gfx

Op 26-06-2020 om 16:41 schreef Thomas Hellström (Intel):
>
> On 6/23/20 4:28 PM, Maarten Lankhorst wrote:
>> We want to introduce backoff logic, but we need to lock the
>> pool object as well for command parsing. Because of this, we
>> will need backoff logic for the engine pool obj, move the batch
>> validation up slightly to eb_lookup_vmas, and the actual command
>> parsing in a separate function which can get called from execbuf
>> relocation fast and slowpath.
>>
>> Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
>> ---
>>   .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 66 ++++++++++---------
>>   1 file changed, 36 insertions(+), 30 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
>> index f896b1a4b38a..7cb44915cfc7 100644
>> --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
>> @@ -290,6 +290,8 @@ struct i915_execbuffer {
>>       struct eb_vma_array *array;
>>   };
>>   +static int eb_parse(struct i915_execbuffer *eb);
>> +
>>   static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb)
>>   {
>>       return intel_engine_requires_cmd_parser(eb->engine) ||
>> @@ -873,6 +875,7 @@ static struct i915_vma *eb_lookup_vma(struct i915_execbuffer *eb, u32 handle)
>>     static int eb_lookup_vmas(struct i915_execbuffer *eb)
>>   {
>> +    struct drm_i915_private *i915 = eb->i915;
>>       unsigned int batch = eb_batch_index(eb);
>>       unsigned int i;
>>       int err = 0;
>> @@ -886,18 +889,37 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
>>           vma = eb_lookup_vma(eb, eb->exec[i].handle);
>>           if (IS_ERR(vma)) {
>>               err = PTR_ERR(vma);
>> -            break;
>> +            goto err;
>>           }
>>             err = eb_validate_vma(eb, &eb->exec[i], vma);
>>           if (unlikely(err)) {
>>               i915_vma_put(vma);
>> -            break;
>> +            goto err;
>>           }
>>             eb_add_vma(eb, i, batch, vma);
>>       }
>>   +    if (unlikely(eb->batch->flags & EXEC_OBJECT_WRITE)) {
>> +        drm_dbg(&i915->drm,
>> +            "Attempting to use self-modifying batch buffer\n");
>> +        return -EINVAL;
>> +    }
>> +
>> +    if (range_overflows_t(u64,
>> +                  eb->batch_start_offset, eb->batch_len,
>> +                  eb->batch->vma->size)) {
>> +        drm_dbg(&i915->drm, "Attempting to use out-of-bounds batch\n");
>> +        return -EINVAL;
>> +    }
>> +
>> +    if (eb->batch_len == 0)
>> +        eb->batch_len = eb->batch->vma->size - eb->batch_start_offset;
>> +
>> +    return 0;
>> +
>> +err:
>>       eb->vma[i].vma = NULL;
>>       return err;
>>   }
>> @@ -1809,7 +1831,7 @@ static int eb_prefault_relocations(const struct i915_execbuffer *eb)
>>       return 0;
>>   }
>>   -static noinline int eb_relocate_slow(struct i915_execbuffer *eb)
>> +static noinline int eb_relocate_parse_slow(struct i915_execbuffer *eb)
>>   {
>>       bool have_copy = false;
>>       struct eb_vma *ev;
>> @@ -1872,6 +1894,11 @@ static noinline int eb_relocate_slow(struct i915_execbuffer *eb)
>>       if (err)
>>           goto err;
>>   +    /* as last step, parse the command buffer */
>> +    err = eb_parse(eb);
>> +    if (err)
>> +        goto err;
>> +
>>       /*
>>        * Leave the user relocations as are, this is the painfully slow path,
>>        * and we want to avoid the complication of dropping the lock whilst
>> @@ -1904,7 +1931,7 @@ static noinline int eb_relocate_slow(struct i915_execbuffer *eb)
>>       return err;
>>   }
>>   -static int eb_relocate(struct i915_execbuffer *eb)
>> +static int eb_relocate_parse(struct i915_execbuffer *eb)
>>   {
>>       int err;
>>   @@ -1932,7 +1959,7 @@ static int eb_relocate(struct i915_execbuffer *eb)
>>               return eb_relocate_slow(eb);
>>       }
>>   -    return 0;
>> +    return eb_parse(eb);
>>   }
>>     static int eb_move_to_gpu(struct i915_execbuffer *eb)
>> @@ -2870,7 +2897,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
>>       if (unlikely(err))
>>           goto err_context;
>>   -    err = eb_relocate(&eb);
>> +    err = eb_relocate_parse(&eb);
>>       if (err) {
>>           /*
>>            * If the user expects the execobject.offset and
>> @@ -2883,33 +2910,10 @@ i915_gem_do_execbuffer(struct drm_device *dev,
>>           goto err_vma;
>>       }
>>   -    if (unlikely(eb.batch->flags & EXEC_OBJECT_WRITE)) {
>> -        drm_dbg(&i915->drm,
>> -            "Attempting to use self-modifying batch buffer\n");
>> -        err = -EINVAL;
>> -        goto err_vma;
>> -    }
>> -
>> -    if (range_overflows_t(u64,
>> -                  eb.batch_start_offset, eb.batch_len,
>> -                  eb.batch->vma->size)) {
>> -        drm_dbg(&i915->drm, "Attempting to use out-of-bounds batch\n");
>> -        err = -EINVAL;
>> -        goto err_vma;
>> -    }
>> -
>> -    if (eb.batch_len == 0)
>> -        eb.batch_len = eb.batch->vma->size - eb.batch_start_offset;
>> -
>> -    err = eb_parse(&eb);
>> -    if (err)
>> -        goto err_vma;
>> -
>>       /*
>>        * snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
>>        * batch" bit. Hence we need to pin secure batches into the global gtt.
>>        * hsw should have this fixed, but bdw mucks it up again. */
>> -    batch = eb.batch->vma;
>>       if (eb.batch_flags & I915_DISPATCH_SECURE) {
>>           struct i915_vma *vma;
>>   @@ -2923,13 +2927,15 @@ i915_gem_do_execbuffer(struct drm_device *dev,
>>            *   fitting due to fragmentation.
>>            * So this is actually safe.
>>            */
>> -        vma = i915_gem_object_ggtt_pin(batch->obj, NULL, 0, 0, 0);
>> +        vma = i915_gem_object_ggtt_pin(eb.batch->vma->obj, NULL, 0, 0, 0);
>>           if (IS_ERR(vma)) {
>>               err = PTR_ERR(vma);
>>               goto err_parse;
>>           }
>>             batch = vma;
>> +    } else {
>> +        batch = eb.batch->vma;
>>       }
>>   
>
> Hmm, it's late friday afternoon so that might be the cause, but I fail to see what the above hunk is trying to achieve? 


Execbuf parsing may create a shadow object which also needs to be locked, we do this inside eb_relocate() to ensure the normal rules for w/w handling can be used for eb parsing as well. :)

~Maarten

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 61+ messages in thread

* Re: [Intel-gfx] [PATCH 06/26] drm/i915: Parse command buffer earlier in eb_relocate(slow)
  2020-06-29 10:40     ` Maarten Lankhorst
@ 2020-06-29 11:15       ` Thomas Hellström (Intel)
  2020-06-29 11:18         ` Maarten Lankhorst
  0 siblings, 1 reply; 61+ messages in thread
From: Thomas Hellström (Intel) @ 2020-06-29 11:15 UTC (permalink / raw)
  To: Maarten Lankhorst, intel-gfx

Hi,

On 6/29/20 12:40 PM, Maarten Lankhorst wrote:
>
>>>        /*
>>>         * snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
>>>         * batch" bit. Hence we need to pin secure batches into the global gtt.
>>>         * hsw should have this fixed, but bdw mucks it up again. */
>>> -    batch = eb.batch->vma;
>>>        if (eb.batch_flags & I915_DISPATCH_SECURE) {
>>>            struct i915_vma *vma;
>>>    @@ -2923,13 +2927,15 @@ i915_gem_do_execbuffer(struct drm_device *dev,
>>>             *   fitting due to fragmentation.
>>>             * So this is actually safe.
>>>             */
>>> -        vma = i915_gem_object_ggtt_pin(batch->obj, NULL, 0, 0, 0);
>>> +        vma = i915_gem_object_ggtt_pin(eb.batch->vma->obj, NULL, 0, 0, 0);
>>>            if (IS_ERR(vma)) {
>>>                err = PTR_ERR(vma);
>>>                goto err_parse;
>>>            }
>>>              batch = vma;
>>> +    } else {
>>> +        batch = eb.batch->vma;
>>>        }
>>>    
>> Hmm, it's late friday afternoon so that might be the cause, but I fail to see what the above hunk is trying to achieve?
>
> Execbuf parsing may create a shadow object which also needs to be locked, we do this inside eb_relocate() to ensure the normal rules for w/w handling can be used for eb parsing as well. :)
>
> ~Maarten

I meant the changed assignment of the batch variable?

/Thomas


_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 61+ messages in thread

* Re: [Intel-gfx] [PATCH 06/26] drm/i915: Parse command buffer earlier in eb_relocate(slow)
  2020-06-29 11:15       ` Thomas Hellström (Intel)
@ 2020-06-29 11:18         ` Maarten Lankhorst
  0 siblings, 0 replies; 61+ messages in thread
From: Maarten Lankhorst @ 2020-06-29 11:18 UTC (permalink / raw)
  To: Thomas Hellström (Intel), intel-gfx

Op 29-06-2020 om 13:15 schreef Thomas Hellström (Intel):
> Hi,
>
> On 6/29/20 12:40 PM, Maarten Lankhorst wrote:
>>
>>>>        /*
>>>>         * snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
>>>>         * batch" bit. Hence we need to pin secure batches into the global gtt.
>>>>         * hsw should have this fixed, but bdw mucks it up again. */
>>>> -    batch = eb.batch->vma;
>>>>        if (eb.batch_flags & I915_DISPATCH_SECURE) {
>>>>            struct i915_vma *vma;
>>>>    @@ -2923,13 +2927,15 @@ i915_gem_do_execbuffer(struct drm_device *dev,
>>>>             *   fitting due to fragmentation.
>>>>             * So this is actually safe.
>>>>             */
>>>> -        vma = i915_gem_object_ggtt_pin(batch->obj, NULL, 0, 0, 0);
>>>> +        vma = i915_gem_object_ggtt_pin(eb.batch->vma->obj, NULL, 0, 0, 0);
>>>>            if (IS_ERR(vma)) {
>>>>                err = PTR_ERR(vma);
>>>>                goto err_parse;
>>>>            }
>>>>              batch = vma;
>>>> +    } else {
>>>> +        batch = eb.batch->vma;
>>>>        }
>>>>    
>>> Hmm, it's late friday afternoon so that might be the cause, but I fail to see what the above hunk is trying to achieve?
>>
>> Execbuf parsing may create a shadow object which also needs to be locked, we do this inside eb_relocate() to ensure the normal rules for w/w handling can be used for eb parsing as well. :)
>>
>> ~Maarten
>
> I meant the changed assignment of the batch variable?
>
> /Thomas
>
>
Nothing, still ends up being the same. :)

Was looking at changing that pin as well, didn't get around to it yet.

~Maarten

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 61+ messages in thread

* Re: [Intel-gfx] [PATCH 04/26] drm/i915: Add an implementation for i915_gem_ww_ctx locking, v2.
  2020-06-23 14:28 ` [Intel-gfx] [PATCH 04/26] drm/i915: Add an implementation for i915_gem_ww_ctx locking, v2 Maarten Lankhorst
  2020-06-24  7:10   ` Thomas Hellström (Intel)
@ 2020-06-29 12:07   ` Tvrtko Ursulin
  2020-06-29 12:32   ` Tvrtko Ursulin
  2 siblings, 0 replies; 61+ messages in thread
From: Tvrtko Ursulin @ 2020-06-29 12:07 UTC (permalink / raw)
  To: Maarten Lankhorst, intel-gfx


On 23/06/2020 15:28, Maarten Lankhorst wrote:
> i915_gem_ww_ctx is used to lock all gem bo's for pinning and memory
> eviction. We don't use it yet, but lets start adding the definition
> first.
> 
> To use it, we have to pass a non-NULL ww to gem_object_lock, and don't
> unlock directly. It is done in i915_gem_ww_ctx_fini.
> 
> Changes since v1:
> - Change ww_ctx and obj order in locking functions (Jonas Lahtinen)

Do you envisage, by the end of refactoring (including obj->mm.lock) 
majority of i915_gem_object_lock() calls sites will be passing a context?

I am thinking whether it would be easier, for maintaining the series, to 
keep i915_gem_object_lock() as is, and add new i915_gem_object_lock_ww 
to be used at select places only.

Interestingly I think later in the series I will have an opposite 
feeling regarding some other entry points. So I might be wrong, or just 
not have the full picture at this point. :)

Regards,

Tvrtko

> Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
> ---
>   drivers/gpu/drm/i915/display/intel_display.c  |  4 +-
>   .../gpu/drm/i915/gem/i915_gem_client_blt.c    |  2 +-
>   drivers/gpu/drm/i915/gem/i915_gem_context.c   |  2 +-
>   drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c    |  4 +-
>   drivers/gpu/drm/i915/gem/i915_gem_domain.c    | 10 ++--
>   .../gpu/drm/i915/gem/i915_gem_execbuffer.c    |  4 +-
>   drivers/gpu/drm/i915/gem/i915_gem_object.c    |  2 +-
>   drivers/gpu/drm/i915/gem/i915_gem_object.h    | 38 +++++++++++---
>   .../gpu/drm/i915/gem/i915_gem_object_types.h  |  9 ++++
>   drivers/gpu/drm/i915/gem/i915_gem_pm.c        |  2 +-
>   drivers/gpu/drm/i915/gem/i915_gem_tiling.c    |  2 +-
>   .../gpu/drm/i915/gem/selftests/huge_pages.c   |  2 +-
>   .../i915/gem/selftests/i915_gem_client_blt.c  |  2 +-
>   .../i915/gem/selftests/i915_gem_coherency.c   | 10 ++--
>   .../drm/i915/gem/selftests/i915_gem_context.c |  4 +-
>   .../drm/i915/gem/selftests/i915_gem_mman.c    |  4 +-
>   .../drm/i915/gem/selftests/i915_gem_phys.c    |  2 +-
>   .../gpu/drm/i915/gt/selftest_workarounds.c    |  2 +-
>   drivers/gpu/drm/i915/gvt/cmd_parser.c         |  2 +-
>   drivers/gpu/drm/i915/i915_gem.c               | 52 +++++++++++++++++--
>   drivers/gpu/drm/i915/i915_gem.h               | 11 ++++
>   drivers/gpu/drm/i915/selftests/i915_gem.c     | 41 +++++++++++++++
>   drivers/gpu/drm/i915/selftests/i915_vma.c     |  2 +-
>   .../drm/i915/selftests/intel_memory_region.c  |  2 +-
>   24 files changed, 173 insertions(+), 42 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
> index 7457813ef273..e909ccc37a54 100644
> --- a/drivers/gpu/drm/i915/display/intel_display.c
> +++ b/drivers/gpu/drm/i915/display/intel_display.c
> @@ -2309,7 +2309,7 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb,
>   
>   void intel_unpin_fb_vma(struct i915_vma *vma, unsigned long flags)
>   {
> -	i915_gem_object_lock(vma->obj);
> +	i915_gem_object_lock(vma->obj, NULL);
>   	if (flags & PLANE_HAS_FENCE)
>   		i915_vma_unpin_fence(vma);
>   	i915_gem_object_unpin_from_display_plane(vma);
> @@ -17112,7 +17112,7 @@ static int intel_framebuffer_init(struct intel_framebuffer *intel_fb,
>   	if (!intel_fb->frontbuffer)
>   		return -ENOMEM;
>   
> -	i915_gem_object_lock(obj);
> +	i915_gem_object_lock(obj, NULL);
>   	tiling = i915_gem_object_get_tiling(obj);
>   	stride = i915_gem_object_get_stride(obj);
>   	i915_gem_object_unlock(obj);
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c
> index d3a86a4d5c04..c182091c00ff 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c
> @@ -286,7 +286,7 @@ int i915_gem_schedule_fill_pages_blt(struct drm_i915_gem_object *obj,
>   	dma_fence_init(&work->dma, &clear_pages_work_ops, &fence_lock, 0, 0);
>   	i915_sw_fence_init(&work->wait, clear_pages_work_notify);
>   
> -	i915_gem_object_lock(obj);
> +	i915_gem_object_lock(obj, NULL);
>   	err = i915_sw_fence_await_reservation(&work->wait,
>   					      obj->base.resv, NULL, true, 0,
>   					      I915_FENCE_GFP);
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> index 30c229fcb404..a996583640ee 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> @@ -113,7 +113,7 @@ static void lut_close(struct i915_gem_context *ctx)
>   			continue;
>   
>   		rcu_read_unlock();
> -		i915_gem_object_lock(obj);
> +		i915_gem_object_lock(obj, NULL);
>   		list_for_each_entry(lut, &obj->lut_list, obj_link) {
>   			if (lut->ctx != ctx)
>   				continue;
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
> index 2679380159fc..27fddc22a7c6 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
> @@ -128,7 +128,7 @@ static int i915_gem_begin_cpu_access(struct dma_buf *dma_buf, enum dma_data_dire
>   	if (err)
>   		return err;
>   
> -	err = i915_gem_object_lock_interruptible(obj);
> +	err = i915_gem_object_lock_interruptible(obj, NULL);
>   	if (err)
>   		goto out;
>   
> @@ -149,7 +149,7 @@ static int i915_gem_end_cpu_access(struct dma_buf *dma_buf, enum dma_data_direct
>   	if (err)
>   		return err;
>   
> -	err = i915_gem_object_lock_interruptible(obj);
> +	err = i915_gem_object_lock_interruptible(obj, NULL);
>   	if (err)
>   		goto out;
>   
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
> index 7f76fc68f498..c0acfc97fae3 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
> @@ -32,7 +32,7 @@ void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj)
>   	if (!i915_gem_object_is_framebuffer(obj))
>   		return;
>   
> -	i915_gem_object_lock(obj);
> +	i915_gem_object_lock(obj, NULL);
>   	__i915_gem_object_flush_for_display(obj);
>   	i915_gem_object_unlock(obj);
>   }
> @@ -197,7 +197,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
>   	if (ret)
>   		return ret;
>   
> -	ret = i915_gem_object_lock_interruptible(obj);
> +	ret = i915_gem_object_lock_interruptible(obj, NULL);
>   	if (ret)
>   		return ret;
>   
> @@ -536,7 +536,7 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
>   	if (err)
>   		goto out;
>   
> -	err = i915_gem_object_lock_interruptible(obj);
> +	err = i915_gem_object_lock_interruptible(obj, NULL);
>   	if (err)
>   		goto out_unpin;
>   
> @@ -576,7 +576,7 @@ int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj,
>   	if (!i915_gem_object_has_struct_page(obj))
>   		return -ENODEV;
>   
> -	ret = i915_gem_object_lock_interruptible(obj);
> +	ret = i915_gem_object_lock_interruptible(obj, NULL);
>   	if (ret)
>   		return ret;
>   
> @@ -630,7 +630,7 @@ int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj,
>   	if (!i915_gem_object_has_struct_page(obj))
>   		return -ENODEV;
>   
> -	ret = i915_gem_object_lock_interruptible(obj);
> +	ret = i915_gem_object_lock_interruptible(obj, NULL);
>   	if (ret)
>   		return ret;
>   
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> index 2b4c210638c1..391d22051b20 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> @@ -813,7 +813,7 @@ static int __eb_add_lut(struct i915_execbuffer *eb,
>   		if (err == 0) { /* And nor has this handle */
>   			struct drm_i915_gem_object *obj = vma->obj;
>   
> -			i915_gem_object_lock(obj);
> +			i915_gem_object_lock(obj, NULL);
>   			if (idr_find(&eb->file->object_idr, handle) == obj) {
>   				list_add(&lut->obj_link, &obj->lut_list);
>   			} else {
> @@ -1083,7 +1083,7 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj,
>   		if (use_cpu_reloc(cache, obj))
>   			return NULL;
>   
> -		i915_gem_object_lock(obj);
> +		i915_gem_object_lock(obj, NULL);
>   		err = i915_gem_object_set_to_gtt_domain(obj, true);
>   		i915_gem_object_unlock(obj);
>   		if (err)
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c
> index b6ec5b50d93b..b59e2d40c347 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
> @@ -108,7 +108,7 @@ void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file)
>   	struct i915_lut_handle *lut, *ln;
>   	LIST_HEAD(close);
>   
> -	i915_gem_object_lock(obj);
> +	i915_gem_object_lock(obj, NULL);
>   	list_for_each_entry_safe(lut, ln, &obj->lut_list, obj_link) {
>   		struct i915_gem_context *ctx = lut->ctx;
>   
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
> index 2faa481cc18f..5103067269b0 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
> @@ -110,20 +110,44 @@ i915_gem_object_put(struct drm_i915_gem_object *obj)
>   
>   #define assert_object_held(obj) dma_resv_assert_held((obj)->base.resv)
>   
> -static inline void i915_gem_object_lock(struct drm_i915_gem_object *obj)
> +static inline int __i915_gem_object_lock(struct drm_i915_gem_object *obj,
> +					 struct i915_gem_ww_ctx *ww,
> +					 bool intr)
>   {
> -	dma_resv_lock(obj->base.resv, NULL);
> +	int ret;
> +
> +	if (intr)
> +		ret = dma_resv_lock_interruptible(obj->base.resv, ww ? &ww->ctx : NULL);
> +	else
> +		ret = dma_resv_lock(obj->base.resv, ww ? &ww->ctx : NULL);
> +
> +	if (!ret && ww)
> +		list_add_tail(&obj->obj_link, &ww->obj_list);
> +	if (ret == -EALREADY)
> +		ret = 0;
> +
> +	if (ret == -EDEADLK)
> +		ww->contended = obj;
> +
> +	return ret;
>   }
>   
> -static inline bool i915_gem_object_trylock(struct drm_i915_gem_object *obj)
> +static inline int i915_gem_object_lock(struct drm_i915_gem_object *obj,
> +				       struct i915_gem_ww_ctx *ww)
>   {
> -	return dma_resv_trylock(obj->base.resv);
> +	return __i915_gem_object_lock(obj, ww, ww && ww->intr);
>   }
>   
> -static inline int
> -i915_gem_object_lock_interruptible(struct drm_i915_gem_object *obj)
> +static inline int i915_gem_object_lock_interruptible(struct drm_i915_gem_object *obj,
> +						     struct i915_gem_ww_ctx *ww)
>   {
> -	return dma_resv_lock_interruptible(obj->base.resv, NULL);
> +	WARN_ON(ww && !ww->intr);
> +	return __i915_gem_object_lock(obj, ww, true);
> +}
> +
> +static inline bool i915_gem_object_trylock(struct drm_i915_gem_object *obj)
> +{
> +	return dma_resv_trylock(obj->base.resv);
>   }
>   
>   static inline void i915_gem_object_unlock(struct drm_i915_gem_object *obj)
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> index b1f82a11aef2..3740c0080e38 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> @@ -122,6 +122,15 @@ struct drm_i915_gem_object {
>   	 */
>   	struct list_head lut_list;
>   
> +	/**
> +	 * @obj_link: Link into @i915_gem_ww_ctx.obj_list
> +	 *
> +	 * When we lock this object through i915_gem_object_lock() with a
> +	 * context, we add it to the list to ensure we can unlock everything
> +	 * when i915_gem_ww_ctx_backoff() or i915_gem_ww_ctx_fini() are called.
> +	 */
> +	struct list_head obj_link;
> +
>   	/** Stolen memory for this object, instead of being backed by shmem. */
>   	struct drm_mm_node *stolen;
>   	union {
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
> index 3d215164dd5a..40d3e40500fa 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
> @@ -84,7 +84,7 @@ void i915_gem_suspend_late(struct drm_i915_private *i915)
>   
>   			spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
>   
> -			i915_gem_object_lock(obj);
> +			i915_gem_object_lock(obj, NULL);
>   			drm_WARN_ON(&i915->drm,
>   			    i915_gem_object_set_to_gtt_domain(obj, false));
>   			i915_gem_object_unlock(obj);
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_tiling.c b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c
> index 0158e49bf9bb..65fbf29c4852 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_tiling.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c
> @@ -249,7 +249,7 @@ i915_gem_object_set_tiling(struct drm_i915_gem_object *obj,
>   	 * whilst executing a fenced command for an untiled object.
>   	 */
>   
> -	i915_gem_object_lock(obj);
> +	i915_gem_object_lock(obj, NULL);
>   	if (i915_gem_object_is_framebuffer(obj)) {
>   		i915_gem_object_unlock(obj);
>   		return -EBUSY;
> diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
> index 8291ede6902c..eb2011ccb92b 100644
> --- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
> +++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
> @@ -947,7 +947,7 @@ static int gpu_write(struct intel_context *ce,
>   {
>   	int err;
>   
> -	i915_gem_object_lock(vma->obj);
> +	i915_gem_object_lock(vma->obj, NULL);
>   	err = i915_gem_object_set_to_gtt_domain(vma->obj, true);
>   	i915_gem_object_unlock(vma->obj);
>   	if (err)
> diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
> index 299c29e9ad86..4e36d4897ea6 100644
> --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
> +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
> @@ -75,7 +75,7 @@ static int __igt_client_fill(struct intel_engine_cs *engine)
>   		if (err)
>   			goto err_unpin;
>   
> -		i915_gem_object_lock(obj);
> +		i915_gem_object_lock(obj, NULL);
>   		err = i915_gem_object_set_to_cpu_domain(obj, false);
>   		i915_gem_object_unlock(obj);
>   		if (err)
> diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
> index 87d7d8aa080f..1de2959b153c 100644
> --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
> +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
> @@ -82,7 +82,7 @@ static int gtt_set(struct context *ctx, unsigned long offset, u32 v)
>   	u32 __iomem *map;
>   	int err = 0;
>   
> -	i915_gem_object_lock(ctx->obj);
> +	i915_gem_object_lock(ctx->obj, NULL);
>   	err = i915_gem_object_set_to_gtt_domain(ctx->obj, true);
>   	i915_gem_object_unlock(ctx->obj);
>   	if (err)
> @@ -115,7 +115,7 @@ static int gtt_get(struct context *ctx, unsigned long offset, u32 *v)
>   	u32 __iomem *map;
>   	int err = 0;
>   
> -	i915_gem_object_lock(ctx->obj);
> +	i915_gem_object_lock(ctx->obj, NULL);
>   	err = i915_gem_object_set_to_gtt_domain(ctx->obj, false);
>   	i915_gem_object_unlock(ctx->obj);
>   	if (err)
> @@ -147,7 +147,7 @@ static int wc_set(struct context *ctx, unsigned long offset, u32 v)
>   	u32 *map;
>   	int err;
>   
> -	i915_gem_object_lock(ctx->obj);
> +	i915_gem_object_lock(ctx->obj, NULL);
>   	err = i915_gem_object_set_to_wc_domain(ctx->obj, true);
>   	i915_gem_object_unlock(ctx->obj);
>   	if (err)
> @@ -170,7 +170,7 @@ static int wc_get(struct context *ctx, unsigned long offset, u32 *v)
>   	u32 *map;
>   	int err;
>   
> -	i915_gem_object_lock(ctx->obj);
> +	i915_gem_object_lock(ctx->obj, NULL);
>   	err = i915_gem_object_set_to_wc_domain(ctx->obj, false);
>   	i915_gem_object_unlock(ctx->obj);
>   	if (err)
> @@ -193,7 +193,7 @@ static int gpu_set(struct context *ctx, unsigned long offset, u32 v)
>   	u32 *cs;
>   	int err;
>   
> -	i915_gem_object_lock(ctx->obj);
> +	i915_gem_object_lock(ctx->obj, NULL);
>   	err = i915_gem_object_set_to_gtt_domain(ctx->obj, true);
>   	i915_gem_object_unlock(ctx->obj);
>   	if (err)
> diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
> index b81978890641..438c15ef2184 100644
> --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
> @@ -950,7 +950,7 @@ emit_rpcs_query(struct drm_i915_gem_object *obj,
>   	if (IS_ERR(vma))
>   		return PTR_ERR(vma);
>   
> -	i915_gem_object_lock(obj);
> +	i915_gem_object_lock(obj, NULL);
>   	err = i915_gem_object_set_to_gtt_domain(obj, false);
>   	i915_gem_object_unlock(obj);
>   	if (err)
> @@ -1706,7 +1706,7 @@ static int read_from_scratch(struct i915_gem_context *ctx,
>   
>   	i915_request_add(rq);
>   
> -	i915_gem_object_lock(obj);
> +	i915_gem_object_lock(obj, NULL);
>   	err = i915_gem_object_set_to_cpu_domain(obj, false);
>   	i915_gem_object_unlock(obj);
>   	if (err)
> diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
> index 9c7402ce5bf9..9fb95a45bcad 100644
> --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
> +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
> @@ -103,7 +103,7 @@ static int check_partial_mapping(struct drm_i915_gem_object *obj,
>   	GEM_BUG_ON(i915_gem_object_get_tiling(obj) != tile->tiling);
>   	GEM_BUG_ON(i915_gem_object_get_stride(obj) != tile->stride);
>   
> -	i915_gem_object_lock(obj);
> +	i915_gem_object_lock(obj, NULL);
>   	err = i915_gem_object_set_to_gtt_domain(obj, true);
>   	i915_gem_object_unlock(obj);
>   	if (err) {
> @@ -188,7 +188,7 @@ static int check_partial_mappings(struct drm_i915_gem_object *obj,
>   	GEM_BUG_ON(i915_gem_object_get_tiling(obj) != tile->tiling);
>   	GEM_BUG_ON(i915_gem_object_get_stride(obj) != tile->stride);
>   
> -	i915_gem_object_lock(obj);
> +	i915_gem_object_lock(obj, NULL);
>   	err = i915_gem_object_set_to_gtt_domain(obj, true);
>   	i915_gem_object_unlock(obj);
>   	if (err) {
> diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c
> index 34932871b3a5..a94243dc4c5c 100644
> --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c
> +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c
> @@ -44,7 +44,7 @@ static int mock_phys_object(void *arg)
>   	}
>   
>   	/* Make the object dirty so that put_pages must do copy back the data */
> -	i915_gem_object_lock(obj);
> +	i915_gem_object_lock(obj, NULL);
>   	err = i915_gem_object_set_to_gtt_domain(obj, true);
>   	i915_gem_object_unlock(obj);
>   	if (err) {
> diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
> index febc9e6692ba..61a0532d0f3d 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
> @@ -214,7 +214,7 @@ static int check_whitelist(struct i915_gem_context *ctx,
>   		return PTR_ERR(results);
>   
>   	err = 0;
> -	i915_gem_object_lock(results);
> +	i915_gem_object_lock(results, NULL);
>   	intel_wedge_on_timeout(&wedge, engine->gt, HZ / 5) /* safety net! */
>   		err = i915_gem_object_set_to_cpu_domain(results, false);
>   	i915_gem_object_unlock(results);
> diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c
> index f1940939260a..943c8d232703 100644
> --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c
> +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c
> @@ -2982,7 +2982,7 @@ static int shadow_indirect_ctx(struct intel_shadow_wa_ctx *wa_ctx)
>   		goto put_obj;
>   	}
>   
> -	i915_gem_object_lock(obj);
> +	i915_gem_object_lock(obj, NULL);
>   	ret = i915_gem_object_set_to_cpu_domain(obj, false);
>   	i915_gem_object_unlock(obj);
>   	if (ret) {
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 9aa3066cb75d..1e06752835e5 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -420,7 +420,7 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj,
>   		GEM_BUG_ON(!drm_mm_node_allocated(&node));
>   	}
>   
> -	ret = i915_gem_object_lock_interruptible(obj);
> +	ret = i915_gem_object_lock_interruptible(obj, NULL);
>   	if (ret)
>   		goto out_unpin;
>   
> @@ -619,7 +619,7 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
>   		GEM_BUG_ON(!drm_mm_node_allocated(&node));
>   	}
>   
> -	ret = i915_gem_object_lock_interruptible(obj);
> +	ret = i915_gem_object_lock_interruptible(obj, NULL);
>   	if (ret)
>   		goto out_unpin;
>   
> @@ -1290,7 +1290,7 @@ int i915_gem_freeze_late(struct drm_i915_private *i915)
>   	i915_gem_drain_freed_objects(i915);
>   
>   	list_for_each_entry(obj, &i915->mm.shrink_list, mm.link) {
> -		i915_gem_object_lock(obj);
> +		i915_gem_object_lock(obj, NULL);
>   		drm_WARN_ON(&i915->drm,
>   			    i915_gem_object_set_to_cpu_domain(obj, true));
>   		i915_gem_object_unlock(obj);
> @@ -1344,6 +1344,52 @@ int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file)
>   	return ret;
>   }
>   
> +void i915_gem_ww_ctx_init(struct i915_gem_ww_ctx *ww, bool intr)
> +{
> +	ww_acquire_init(&ww->ctx, &reservation_ww_class);
> +	INIT_LIST_HEAD(&ww->obj_list);
> +	ww->intr = intr;
> +	ww->contended = NULL;
> +}
> +
> +static void i915_gem_ww_ctx_unlock_all(struct i915_gem_ww_ctx *ww)
> +{
> +	struct drm_i915_gem_object *obj;
> +
> +	while ((obj = list_first_entry_or_null(&ww->obj_list, struct drm_i915_gem_object, obj_link))) {
> +		list_del(&obj->obj_link);
> +		i915_gem_object_unlock(obj);
> +	}
> +}
> +
> +void i915_gem_ww_ctx_fini(struct i915_gem_ww_ctx *ww)
> +{
> +	i915_gem_ww_ctx_unlock_all(ww);
> +	WARN_ON(ww->contended);
> +	ww_acquire_fini(&ww->ctx);
> +}
> +
> +int __must_check i915_gem_ww_ctx_backoff(struct i915_gem_ww_ctx *ww)
> +{
> +	int ret = 0;
> +
> +	if (WARN_ON(!ww->contended))
> +		return -EINVAL;
> +
> +	i915_gem_ww_ctx_unlock_all(ww);
> +	if (ww->intr)
> +		ret = dma_resv_lock_slow_interruptible(ww->contended->base.resv, &ww->ctx);
> +	else
> +		dma_resv_lock_slow(ww->contended->base.resv, &ww->ctx);
> +
> +	if (!ret)
> +		list_add_tail(&ww->contended->obj_link, &ww->obj_list);
> +
> +	ww->contended = NULL;
> +
> +	return ret;
> +}
> +
>   #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
>   #include "selftests/mock_gem_device.c"
>   #include "selftests/i915_gem.c"
> diff --git a/drivers/gpu/drm/i915/i915_gem.h b/drivers/gpu/drm/i915/i915_gem.h
> index 1753c84d6c0d..988755dbf4be 100644
> --- a/drivers/gpu/drm/i915/i915_gem.h
> +++ b/drivers/gpu/drm/i915/i915_gem.h
> @@ -116,4 +116,15 @@ static inline bool __tasklet_is_scheduled(struct tasklet_struct *t)
>   	return test_bit(TASKLET_STATE_SCHED, &t->state);
>   }
>   
> +struct i915_gem_ww_ctx {
> +	struct ww_acquire_ctx ctx;
> +	struct list_head obj_list;
> +	bool intr;
> +	struct drm_i915_gem_object *contended;
> +};
> +
> +void i915_gem_ww_ctx_init(struct i915_gem_ww_ctx *ctx, bool intr);
> +void i915_gem_ww_ctx_fini(struct i915_gem_ww_ctx *ctx);
> +int __must_check i915_gem_ww_ctx_backoff(struct i915_gem_ww_ctx *ctx);
> +
>   #endif /* __I915_GEM_H__ */
> diff --git a/drivers/gpu/drm/i915/selftests/i915_gem.c b/drivers/gpu/drm/i915/selftests/i915_gem.c
> index 88d400b9df88..23a6132c5f4e 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_gem.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_gem.c
> @@ -199,11 +199,52 @@ static int igt_gem_hibernate(void *arg)
>   	return err;
>   }
>   
> +static int igt_gem_ww_ctx(void *arg)
> +{
> +	struct drm_i915_private *i915 = arg;
> +	struct drm_i915_gem_object *obj, *obj2;
> +	struct i915_gem_ww_ctx ww;
> +	int err = 0;
> +
> +	obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
> +	if (IS_ERR(obj))
> +		return PTR_ERR(obj);
> +
> +	obj2 = i915_gem_object_create_internal(i915, PAGE_SIZE);
> +	if (IS_ERR(obj)) {
> +		err = PTR_ERR(obj);
> +		goto put1;
> +	}
> +
> +	i915_gem_ww_ctx_init(&ww, true);
> +retry:
> +	/* Lock the objects, twice for good measure (-EALREADY handling) */
> +	err = i915_gem_object_lock(obj, &ww);
> +	if (!err)
> +		err = i915_gem_object_lock_interruptible(obj, &ww);
> +	if (!err)
> +		err = i915_gem_object_lock_interruptible(obj2, &ww);
> +	if (!err)
> +		err = i915_gem_object_lock(obj2, &ww);
> +
> +	if (err == -EDEADLK) {
> +		err = i915_gem_ww_ctx_backoff(&ww);
> +		if (!err)
> +			goto retry;
> +	}
> +	i915_gem_ww_ctx_fini(&ww);
> +	i915_gem_object_put(obj2);
> +put1:
> +	i915_gem_object_put(obj);
> +	return err;
> +}
> +
>   int i915_gem_live_selftests(struct drm_i915_private *i915)
>   {
>   	static const struct i915_subtest tests[] = {
>   		SUBTEST(igt_gem_suspend),
>   		SUBTEST(igt_gem_hibernate),
> +		SUBTEST(igt_gem_ww_ctx),
>   	};
>   
>   	if (intel_gt_is_wedged(&i915->gt))
> diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c
> index af89c7fc8f59..88c5e9acb84c 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_vma.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_vma.c
> @@ -892,7 +892,7 @@ static int igt_vma_remapped_gtt(void *arg)
>   			unsigned int x, y;
>   			int err;
>   
> -			i915_gem_object_lock(obj);
> +			i915_gem_object_lock(obj, NULL);
>   			err = i915_gem_object_set_to_gtt_domain(obj, true);
>   			i915_gem_object_unlock(obj);
>   			if (err)
> diff --git a/drivers/gpu/drm/i915/selftests/intel_memory_region.c b/drivers/gpu/drm/i915/selftests/intel_memory_region.c
> index 6e80d99048e4..957a7a52def7 100644
> --- a/drivers/gpu/drm/i915/selftests/intel_memory_region.c
> +++ b/drivers/gpu/drm/i915/selftests/intel_memory_region.c
> @@ -509,7 +509,7 @@ static int igt_lmem_write_cpu(void *arg)
>   	if (err)
>   		goto out_unpin;
>   
> -	i915_gem_object_lock(obj);
> +	i915_gem_object_lock(obj, NULL);
>   	err = i915_gem_object_set_to_wc_domain(obj, true);
>   	i915_gem_object_unlock(obj);
>   	if (err)
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 61+ messages in thread

* Re: [Intel-gfx] [PATCH 04/26] drm/i915: Add an implementation for i915_gem_ww_ctx locking, v2.
  2020-06-23 14:28 ` [Intel-gfx] [PATCH 04/26] drm/i915: Add an implementation for i915_gem_ww_ctx locking, v2 Maarten Lankhorst
  2020-06-24  7:10   ` Thomas Hellström (Intel)
  2020-06-29 12:07   ` Tvrtko Ursulin
@ 2020-06-29 12:32   ` Tvrtko Ursulin
  2020-06-29 13:44     ` Maarten Lankhorst
  2 siblings, 1 reply; 61+ messages in thread
From: Tvrtko Ursulin @ 2020-06-29 12:32 UTC (permalink / raw)
  To: Maarten Lankhorst, intel-gfx


On 23/06/2020 15:28, Maarten Lankhorst wrote:
> i915_gem_ww_ctx is used to lock all gem bo's for pinning and memory
> eviction. We don't use it yet, but lets start adding the definition
> first.
> 
> To use it, we have to pass a non-NULL ww to gem_object_lock, and don't
> unlock directly. It is done in i915_gem_ww_ctx_fini.
> 
> Changes since v1:
> - Change ww_ctx and obj order in locking functions (Jonas Lahtinen)
> 
> Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
> ---
>   drivers/gpu/drm/i915/display/intel_display.c  |  4 +-
>   .../gpu/drm/i915/gem/i915_gem_client_blt.c    |  2 +-
>   drivers/gpu/drm/i915/gem/i915_gem_context.c   |  2 +-
>   drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c    |  4 +-
>   drivers/gpu/drm/i915/gem/i915_gem_domain.c    | 10 ++--
>   .../gpu/drm/i915/gem/i915_gem_execbuffer.c    |  4 +-
>   drivers/gpu/drm/i915/gem/i915_gem_object.c    |  2 +-
>   drivers/gpu/drm/i915/gem/i915_gem_object.h    | 38 +++++++++++---
>   .../gpu/drm/i915/gem/i915_gem_object_types.h  |  9 ++++
>   drivers/gpu/drm/i915/gem/i915_gem_pm.c        |  2 +-
>   drivers/gpu/drm/i915/gem/i915_gem_tiling.c    |  2 +-
>   .../gpu/drm/i915/gem/selftests/huge_pages.c   |  2 +-
>   .../i915/gem/selftests/i915_gem_client_blt.c  |  2 +-
>   .../i915/gem/selftests/i915_gem_coherency.c   | 10 ++--
>   .../drm/i915/gem/selftests/i915_gem_context.c |  4 +-
>   .../drm/i915/gem/selftests/i915_gem_mman.c    |  4 +-
>   .../drm/i915/gem/selftests/i915_gem_phys.c    |  2 +-
>   .../gpu/drm/i915/gt/selftest_workarounds.c    |  2 +-
>   drivers/gpu/drm/i915/gvt/cmd_parser.c         |  2 +-
>   drivers/gpu/drm/i915/i915_gem.c               | 52 +++++++++++++++++--
>   drivers/gpu/drm/i915/i915_gem.h               | 11 ++++
>   drivers/gpu/drm/i915/selftests/i915_gem.c     | 41 +++++++++++++++
>   drivers/gpu/drm/i915/selftests/i915_vma.c     |  2 +-
>   .../drm/i915/selftests/intel_memory_region.c  |  2 +-
>   24 files changed, 173 insertions(+), 42 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
> index 7457813ef273..e909ccc37a54 100644
> --- a/drivers/gpu/drm/i915/display/intel_display.c
> +++ b/drivers/gpu/drm/i915/display/intel_display.c
> @@ -2309,7 +2309,7 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb,
>   
>   void intel_unpin_fb_vma(struct i915_vma *vma, unsigned long flags)
>   {
> -	i915_gem_object_lock(vma->obj);
> +	i915_gem_object_lock(vma->obj, NULL);
>   	if (flags & PLANE_HAS_FENCE)
>   		i915_vma_unpin_fence(vma);
>   	i915_gem_object_unpin_from_display_plane(vma);
> @@ -17112,7 +17112,7 @@ static int intel_framebuffer_init(struct intel_framebuffer *intel_fb,
>   	if (!intel_fb->frontbuffer)
>   		return -ENOMEM;
>   
> -	i915_gem_object_lock(obj);
> +	i915_gem_object_lock(obj, NULL);
>   	tiling = i915_gem_object_get_tiling(obj);
>   	stride = i915_gem_object_get_stride(obj);
>   	i915_gem_object_unlock(obj);
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c
> index d3a86a4d5c04..c182091c00ff 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c
> @@ -286,7 +286,7 @@ int i915_gem_schedule_fill_pages_blt(struct drm_i915_gem_object *obj,
>   	dma_fence_init(&work->dma, &clear_pages_work_ops, &fence_lock, 0, 0);
>   	i915_sw_fence_init(&work->wait, clear_pages_work_notify);
>   
> -	i915_gem_object_lock(obj);
> +	i915_gem_object_lock(obj, NULL);
>   	err = i915_sw_fence_await_reservation(&work->wait,
>   					      obj->base.resv, NULL, true, 0,
>   					      I915_FENCE_GFP);
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> index 30c229fcb404..a996583640ee 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> @@ -113,7 +113,7 @@ static void lut_close(struct i915_gem_context *ctx)
>   			continue;
>   
>   		rcu_read_unlock();
> -		i915_gem_object_lock(obj);
> +		i915_gem_object_lock(obj, NULL);
>   		list_for_each_entry(lut, &obj->lut_list, obj_link) {
>   			if (lut->ctx != ctx)
>   				continue;
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
> index 2679380159fc..27fddc22a7c6 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
> @@ -128,7 +128,7 @@ static int i915_gem_begin_cpu_access(struct dma_buf *dma_buf, enum dma_data_dire
>   	if (err)
>   		return err;
>   
> -	err = i915_gem_object_lock_interruptible(obj);
> +	err = i915_gem_object_lock_interruptible(obj, NULL);
>   	if (err)
>   		goto out;
>   
> @@ -149,7 +149,7 @@ static int i915_gem_end_cpu_access(struct dma_buf *dma_buf, enum dma_data_direct
>   	if (err)
>   		return err;
>   
> -	err = i915_gem_object_lock_interruptible(obj);
> +	err = i915_gem_object_lock_interruptible(obj, NULL);
>   	if (err)
>   		goto out;
>   
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
> index 7f76fc68f498..c0acfc97fae3 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
> @@ -32,7 +32,7 @@ void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj)
>   	if (!i915_gem_object_is_framebuffer(obj))
>   		return;
>   
> -	i915_gem_object_lock(obj);
> +	i915_gem_object_lock(obj, NULL);
>   	__i915_gem_object_flush_for_display(obj);
>   	i915_gem_object_unlock(obj);
>   }
> @@ -197,7 +197,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
>   	if (ret)
>   		return ret;
>   
> -	ret = i915_gem_object_lock_interruptible(obj);
> +	ret = i915_gem_object_lock_interruptible(obj, NULL);
>   	if (ret)
>   		return ret;
>   
> @@ -536,7 +536,7 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
>   	if (err)
>   		goto out;
>   
> -	err = i915_gem_object_lock_interruptible(obj);
> +	err = i915_gem_object_lock_interruptible(obj, NULL);
>   	if (err)
>   		goto out_unpin;
>   
> @@ -576,7 +576,7 @@ int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj,
>   	if (!i915_gem_object_has_struct_page(obj))
>   		return -ENODEV;
>   
> -	ret = i915_gem_object_lock_interruptible(obj);
> +	ret = i915_gem_object_lock_interruptible(obj, NULL);
>   	if (ret)
>   		return ret;
>   
> @@ -630,7 +630,7 @@ int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj,
>   	if (!i915_gem_object_has_struct_page(obj))
>   		return -ENODEV;
>   
> -	ret = i915_gem_object_lock_interruptible(obj);
> +	ret = i915_gem_object_lock_interruptible(obj, NULL);
>   	if (ret)
>   		return ret;
>   
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> index 2b4c210638c1..391d22051b20 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> @@ -813,7 +813,7 @@ static int __eb_add_lut(struct i915_execbuffer *eb,
>   		if (err == 0) { /* And nor has this handle */
>   			struct drm_i915_gem_object *obj = vma->obj;
>   
> -			i915_gem_object_lock(obj);
> +			i915_gem_object_lock(obj, NULL);
>   			if (idr_find(&eb->file->object_idr, handle) == obj) {
>   				list_add(&lut->obj_link, &obj->lut_list);
>   			} else {
> @@ -1083,7 +1083,7 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj,
>   		if (use_cpu_reloc(cache, obj))
>   			return NULL;
>   
> -		i915_gem_object_lock(obj);
> +		i915_gem_object_lock(obj, NULL);
>   		err = i915_gem_object_set_to_gtt_domain(obj, true);
>   		i915_gem_object_unlock(obj);
>   		if (err)
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c
> index b6ec5b50d93b..b59e2d40c347 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
> @@ -108,7 +108,7 @@ void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file)
>   	struct i915_lut_handle *lut, *ln;
>   	LIST_HEAD(close);
>   
> -	i915_gem_object_lock(obj);
> +	i915_gem_object_lock(obj, NULL);
>   	list_for_each_entry_safe(lut, ln, &obj->lut_list, obj_link) {
>   		struct i915_gem_context *ctx = lut->ctx;
>   
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
> index 2faa481cc18f..5103067269b0 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
> @@ -110,20 +110,44 @@ i915_gem_object_put(struct drm_i915_gem_object *obj)
>   
>   #define assert_object_held(obj) dma_resv_assert_held((obj)->base.resv)
>   
> -static inline void i915_gem_object_lock(struct drm_i915_gem_object *obj)
> +static inline int __i915_gem_object_lock(struct drm_i915_gem_object *obj,
> +					 struct i915_gem_ww_ctx *ww,
> +					 bool intr)
>   {
> -	dma_resv_lock(obj->base.resv, NULL);
> +	int ret;
> +
> +	if (intr)
> +		ret = dma_resv_lock_interruptible(obj->base.resv, ww ? &ww->ctx : NULL);
> +	else
> +		ret = dma_resv_lock(obj->base.resv, ww ? &ww->ctx : NULL);
> +
> +	if (!ret && ww)
> +		list_add_tail(&obj->obj_link, &ww->obj_list);
> +	if (ret == -EALREADY)
> +		ret = 0;
> +
> +	if (ret == -EDEADLK)
> +		ww->contended = obj;
> +
> +	return ret;

Feels a bit on the large side for inline now, no? Quite a few 
conditionals. Or you are counting on compiler optimisation because ww 
and intr are passed in as mostly const?

>   }
>   
> -static inline bool i915_gem_object_trylock(struct drm_i915_gem_object *obj)
> +static inline int i915_gem_object_lock(struct drm_i915_gem_object *obj,
> +				       struct i915_gem_ww_ctx *ww)
>   {
> -	return dma_resv_trylock(obj->base.resv);
> +	return __i915_gem_object_lock(obj, ww, ww && ww->intr);
>   }
>   
> -static inline int
> -i915_gem_object_lock_interruptible(struct drm_i915_gem_object *obj)
> +static inline int i915_gem_object_lock_interruptible(struct drm_i915_gem_object *obj,
> +						     struct i915_gem_ww_ctx *ww)
>   {
> -	return dma_resv_lock_interruptible(obj->base.resv, NULL);
> +	WARN_ON(ww && !ww->intr);
> +	return __i915_gem_object_lock(obj, ww, true);

I see that ww->intr is set at ctx init time. At what times it is 
expected that the individual lock calls would override that?

> +}
> +
> +static inline bool i915_gem_object_trylock(struct drm_i915_gem_object *obj)
> +{
> +	return dma_resv_trylock(obj->base.resv);
>   }
>   
>   static inline void i915_gem_object_unlock(struct drm_i915_gem_object *obj)
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> index b1f82a11aef2..3740c0080e38 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> @@ -122,6 +122,15 @@ struct drm_i915_gem_object {
>   	 */
>   	struct list_head lut_list;
>   
> +	/**
> +	 * @obj_link: Link into @i915_gem_ww_ctx.obj_list
> +	 *
> +	 * When we lock this object through i915_gem_object_lock() with a
> +	 * context, we add it to the list to ensure we can unlock everything
> +	 * when i915_gem_ww_ctx_backoff() or i915_gem_ww_ctx_fini() are called.
> +	 */
> +	struct list_head obj_link;
> +
>   	/** Stolen memory for this object, instead of being backed by shmem. */
>   	struct drm_mm_node *stolen;
>   	union {
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
> index 3d215164dd5a..40d3e40500fa 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
> @@ -84,7 +84,7 @@ void i915_gem_suspend_late(struct drm_i915_private *i915)
>   
>   			spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
>   
> -			i915_gem_object_lock(obj);
> +			i915_gem_object_lock(obj, NULL);
>   			drm_WARN_ON(&i915->drm,
>   			    i915_gem_object_set_to_gtt_domain(obj, false));
>   			i915_gem_object_unlock(obj);
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_tiling.c b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c
> index 0158e49bf9bb..65fbf29c4852 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_tiling.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c
> @@ -249,7 +249,7 @@ i915_gem_object_set_tiling(struct drm_i915_gem_object *obj,
>   	 * whilst executing a fenced command for an untiled object.
>   	 */
>   
> -	i915_gem_object_lock(obj);
> +	i915_gem_object_lock(obj, NULL);
>   	if (i915_gem_object_is_framebuffer(obj)) {
>   		i915_gem_object_unlock(obj);
>   		return -EBUSY;
> diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
> index 8291ede6902c..eb2011ccb92b 100644
> --- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
> +++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
> @@ -947,7 +947,7 @@ static int gpu_write(struct intel_context *ce,
>   {
>   	int err;
>   
> -	i915_gem_object_lock(vma->obj);
> +	i915_gem_object_lock(vma->obj, NULL);
>   	err = i915_gem_object_set_to_gtt_domain(vma->obj, true);
>   	i915_gem_object_unlock(vma->obj);
>   	if (err)
> diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
> index 299c29e9ad86..4e36d4897ea6 100644
> --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
> +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
> @@ -75,7 +75,7 @@ static int __igt_client_fill(struct intel_engine_cs *engine)
>   		if (err)
>   			goto err_unpin;
>   
> -		i915_gem_object_lock(obj);
> +		i915_gem_object_lock(obj, NULL);
>   		err = i915_gem_object_set_to_cpu_domain(obj, false);
>   		i915_gem_object_unlock(obj);
>   		if (err)
> diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
> index 87d7d8aa080f..1de2959b153c 100644
> --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
> +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
> @@ -82,7 +82,7 @@ static int gtt_set(struct context *ctx, unsigned long offset, u32 v)
>   	u32 __iomem *map;
>   	int err = 0;
>   
> -	i915_gem_object_lock(ctx->obj);
> +	i915_gem_object_lock(ctx->obj, NULL);
>   	err = i915_gem_object_set_to_gtt_domain(ctx->obj, true);
>   	i915_gem_object_unlock(ctx->obj);
>   	if (err)
> @@ -115,7 +115,7 @@ static int gtt_get(struct context *ctx, unsigned long offset, u32 *v)
>   	u32 __iomem *map;
>   	int err = 0;
>   
> -	i915_gem_object_lock(ctx->obj);
> +	i915_gem_object_lock(ctx->obj, NULL);
>   	err = i915_gem_object_set_to_gtt_domain(ctx->obj, false);
>   	i915_gem_object_unlock(ctx->obj);
>   	if (err)
> @@ -147,7 +147,7 @@ static int wc_set(struct context *ctx, unsigned long offset, u32 v)
>   	u32 *map;
>   	int err;
>   
> -	i915_gem_object_lock(ctx->obj);
> +	i915_gem_object_lock(ctx->obj, NULL);
>   	err = i915_gem_object_set_to_wc_domain(ctx->obj, true);
>   	i915_gem_object_unlock(ctx->obj);
>   	if (err)
> @@ -170,7 +170,7 @@ static int wc_get(struct context *ctx, unsigned long offset, u32 *v)
>   	u32 *map;
>   	int err;
>   
> -	i915_gem_object_lock(ctx->obj);
> +	i915_gem_object_lock(ctx->obj, NULL);
>   	err = i915_gem_object_set_to_wc_domain(ctx->obj, false);
>   	i915_gem_object_unlock(ctx->obj);
>   	if (err)
> @@ -193,7 +193,7 @@ static int gpu_set(struct context *ctx, unsigned long offset, u32 v)
>   	u32 *cs;
>   	int err;
>   
> -	i915_gem_object_lock(ctx->obj);
> +	i915_gem_object_lock(ctx->obj, NULL);
>   	err = i915_gem_object_set_to_gtt_domain(ctx->obj, true);
>   	i915_gem_object_unlock(ctx->obj);
>   	if (err)
> diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
> index b81978890641..438c15ef2184 100644
> --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
> @@ -950,7 +950,7 @@ emit_rpcs_query(struct drm_i915_gem_object *obj,
>   	if (IS_ERR(vma))
>   		return PTR_ERR(vma);
>   
> -	i915_gem_object_lock(obj);
> +	i915_gem_object_lock(obj, NULL);
>   	err = i915_gem_object_set_to_gtt_domain(obj, false);
>   	i915_gem_object_unlock(obj);
>   	if (err)
> @@ -1706,7 +1706,7 @@ static int read_from_scratch(struct i915_gem_context *ctx,
>   
>   	i915_request_add(rq);
>   
> -	i915_gem_object_lock(obj);
> +	i915_gem_object_lock(obj, NULL);
>   	err = i915_gem_object_set_to_cpu_domain(obj, false);
>   	i915_gem_object_unlock(obj);
>   	if (err)
> diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
> index 9c7402ce5bf9..9fb95a45bcad 100644
> --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
> +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
> @@ -103,7 +103,7 @@ static int check_partial_mapping(struct drm_i915_gem_object *obj,
>   	GEM_BUG_ON(i915_gem_object_get_tiling(obj) != tile->tiling);
>   	GEM_BUG_ON(i915_gem_object_get_stride(obj) != tile->stride);
>   
> -	i915_gem_object_lock(obj);
> +	i915_gem_object_lock(obj, NULL);
>   	err = i915_gem_object_set_to_gtt_domain(obj, true);
>   	i915_gem_object_unlock(obj);
>   	if (err) {
> @@ -188,7 +188,7 @@ static int check_partial_mappings(struct drm_i915_gem_object *obj,
>   	GEM_BUG_ON(i915_gem_object_get_tiling(obj) != tile->tiling);
>   	GEM_BUG_ON(i915_gem_object_get_stride(obj) != tile->stride);
>   
> -	i915_gem_object_lock(obj);
> +	i915_gem_object_lock(obj, NULL);
>   	err = i915_gem_object_set_to_gtt_domain(obj, true);
>   	i915_gem_object_unlock(obj);
>   	if (err) {
> diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c
> index 34932871b3a5..a94243dc4c5c 100644
> --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c
> +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c
> @@ -44,7 +44,7 @@ static int mock_phys_object(void *arg)
>   	}
>   
>   	/* Make the object dirty so that put_pages must do copy back the data */
> -	i915_gem_object_lock(obj);
> +	i915_gem_object_lock(obj, NULL);
>   	err = i915_gem_object_set_to_gtt_domain(obj, true);
>   	i915_gem_object_unlock(obj);
>   	if (err) {
> diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
> index febc9e6692ba..61a0532d0f3d 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
> @@ -214,7 +214,7 @@ static int check_whitelist(struct i915_gem_context *ctx,
>   		return PTR_ERR(results);
>   
>   	err = 0;
> -	i915_gem_object_lock(results);
> +	i915_gem_object_lock(results, NULL);
>   	intel_wedge_on_timeout(&wedge, engine->gt, HZ / 5) /* safety net! */
>   		err = i915_gem_object_set_to_cpu_domain(results, false);
>   	i915_gem_object_unlock(results);
> diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c
> index f1940939260a..943c8d232703 100644
> --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c
> +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c
> @@ -2982,7 +2982,7 @@ static int shadow_indirect_ctx(struct intel_shadow_wa_ctx *wa_ctx)
>   		goto put_obj;
>   	}
>   
> -	i915_gem_object_lock(obj);
> +	i915_gem_object_lock(obj, NULL);
>   	ret = i915_gem_object_set_to_cpu_domain(obj, false);
>   	i915_gem_object_unlock(obj);
>   	if (ret) {
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 9aa3066cb75d..1e06752835e5 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -420,7 +420,7 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj,
>   		GEM_BUG_ON(!drm_mm_node_allocated(&node));
>   	}
>   
> -	ret = i915_gem_object_lock_interruptible(obj);
> +	ret = i915_gem_object_lock_interruptible(obj, NULL);
>   	if (ret)
>   		goto out_unpin;
>   
> @@ -619,7 +619,7 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
>   		GEM_BUG_ON(!drm_mm_node_allocated(&node));
>   	}
>   
> -	ret = i915_gem_object_lock_interruptible(obj);
> +	ret = i915_gem_object_lock_interruptible(obj, NULL);
>   	if (ret)
>   		goto out_unpin;
>   
> @@ -1290,7 +1290,7 @@ int i915_gem_freeze_late(struct drm_i915_private *i915)
>   	i915_gem_drain_freed_objects(i915);
>   
>   	list_for_each_entry(obj, &i915->mm.shrink_list, mm.link) {
> -		i915_gem_object_lock(obj);
> +		i915_gem_object_lock(obj, NULL);
>   		drm_WARN_ON(&i915->drm,
>   			    i915_gem_object_set_to_cpu_domain(obj, true));
>   		i915_gem_object_unlock(obj);
> @@ -1344,6 +1344,52 @@ int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file)
>   	return ret;
>   }
>   
> +void i915_gem_ww_ctx_init(struct i915_gem_ww_ctx *ww, bool intr)
> +{
> +	ww_acquire_init(&ww->ctx, &reservation_ww_class);
> +	INIT_LIST_HEAD(&ww->obj_list);
> +	ww->intr = intr;
> +	ww->contended = NULL;
> +}
> +
> +static void i915_gem_ww_ctx_unlock_all(struct i915_gem_ww_ctx *ww)
> +{
> +	struct drm_i915_gem_object *obj;
> +
> +	while ((obj = list_first_entry_or_null(&ww->obj_list, struct drm_i915_gem_object, obj_link))) {

I wanted to ask whether you think this is faster than 
for_each_list_entry, but then also realized you can optimise further by 
not bothering to list_del (since you know the whole list is going away). 
If you are not allowing ww ctx reuse you don't even need to re-init the 
list_head at the end.

> +		list_del(&obj->obj_link);
> +		i915_gem_object_unlock(obj);
> +	}
> +}
> +
> +void i915_gem_ww_ctx_fini(struct i915_gem_ww_ctx *ww)
> +{
> +	i915_gem_ww_ctx_unlock_all(ww);
> +	WARN_ON(ww->contended);

Unless I am missing something this feels like a GEM_BUG_ON condition 
(translated: we should be confident after testing it is impossible to hit).

Or it is allowed to not try the backoff on -EDEADLK? Backoff is the only 
place which resets the ww->contended, right? In this case WARN_ON would 
be wrong, but you probably did not went for this design. Should it be 
supported?

> +	ww_acquire_fini(&ww->ctx);
> +}
> +
> +int __must_check i915_gem_ww_ctx_backoff(struct i915_gem_ww_ctx *ww)
> +{
> +	int ret = 0;
> +
> +	if (WARN_ON(!ww->contended))
> +		return -EINVAL;
> +
> +	i915_gem_ww_ctx_unlock_all(ww);
> +	if (ww->intr)
> +		ret = dma_resv_lock_slow_interruptible(ww->contended->base.resv, &ww->ctx);
> +	else
> +		dma_resv_lock_slow(ww->contended->base.resv, &ww->ctx);
> +
> +	if (!ret)
> +		list_add_tail(&ww->contended->obj_link, &ww->obj_list);
> +
> +	ww->contended = NULL;
> +
> +	return ret;
> +}
> +
>   #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
>   #include "selftests/mock_gem_device.c"
>   #include "selftests/i915_gem.c"
> diff --git a/drivers/gpu/drm/i915/i915_gem.h b/drivers/gpu/drm/i915/i915_gem.h
> index 1753c84d6c0d..988755dbf4be 100644
> --- a/drivers/gpu/drm/i915/i915_gem.h
> +++ b/drivers/gpu/drm/i915/i915_gem.h
> @@ -116,4 +116,15 @@ static inline bool __tasklet_is_scheduled(struct tasklet_struct *t)
>   	return test_bit(TASKLET_STATE_SCHED, &t->state);
>   }
>   
> +struct i915_gem_ww_ctx {
> +	struct ww_acquire_ctx ctx;
> +	struct list_head obj_list;
> +	bool intr;
> +	struct drm_i915_gem_object *contended;
> +};
> +
> +void i915_gem_ww_ctx_init(struct i915_gem_ww_ctx *ctx, bool intr);
> +void i915_gem_ww_ctx_fini(struct i915_gem_ww_ctx *ctx);
> +int __must_check i915_gem_ww_ctx_backoff(struct i915_gem_ww_ctx *ctx);
> +
>   #endif /* __I915_GEM_H__ */
> diff --git a/drivers/gpu/drm/i915/selftests/i915_gem.c b/drivers/gpu/drm/i915/selftests/i915_gem.c
> index 88d400b9df88..23a6132c5f4e 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_gem.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_gem.c
> @@ -199,11 +199,52 @@ static int igt_gem_hibernate(void *arg)
>   	return err;
>   }
>   
> +static int igt_gem_ww_ctx(void *arg)
> +{
> +	struct drm_i915_private *i915 = arg;
> +	struct drm_i915_gem_object *obj, *obj2;
> +	struct i915_gem_ww_ctx ww;
> +	int err = 0;
> +
> +	obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
> +	if (IS_ERR(obj))
> +		return PTR_ERR(obj);
> +
> +	obj2 = i915_gem_object_create_internal(i915, PAGE_SIZE);
> +	if (IS_ERR(obj)) {

Wrong obj ^^^ vvv.

> +		err = PTR_ERR(obj);
> +		goto put1;
> +	}
> +
> +	i915_gem_ww_ctx_init(&ww, true);

Need to expand with non-interruptible, interruptible and mixed.

> +retry:
> +	/* Lock the objects, twice for good measure (-EALREADY handling) */
> +	err = i915_gem_object_lock(obj, &ww);
> +	if (!err)
> +		err = i915_gem_object_lock_interruptible(obj, &ww);

This is -EALREADY on the 1st pass.

> +	if (!err)
> +		err = i915_gem_object_lock_interruptible(obj2, &ww);
> +	if (!err)
> +		err = i915_gem_object_lock(obj2, &ww);

And this is -EALREADY again?

> +
> +	if (err == -EDEADLK) {

How do we get here with a single locking context?

> +		err = i915_gem_ww_ctx_backoff(&ww);
> +		if (!err)
> +			goto retry;
> +	}
> +	i915_gem_ww_ctx_fini(&ww);
> +	i915_gem_object_put(obj2);
> +put1:
> +	i915_gem_object_put(obj);
> +	return err;
> +}
> +
>   int i915_gem_live_selftests(struct drm_i915_private *i915)
>   {
>   	static const struct i915_subtest tests[] = {
>   		SUBTEST(igt_gem_suspend),
>   		SUBTEST(igt_gem_hibernate),
> +		SUBTEST(igt_gem_ww_ctx),
>   	};
>   
>   	if (intel_gt_is_wedged(&i915->gt))
> diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c
> index af89c7fc8f59..88c5e9acb84c 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_vma.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_vma.c
> @@ -892,7 +892,7 @@ static int igt_vma_remapped_gtt(void *arg)
>   			unsigned int x, y;
>   			int err;
>   
> -			i915_gem_object_lock(obj);
> +			i915_gem_object_lock(obj, NULL);
>   			err = i915_gem_object_set_to_gtt_domain(obj, true);
>   			i915_gem_object_unlock(obj);
>   			if (err)
> diff --git a/drivers/gpu/drm/i915/selftests/intel_memory_region.c b/drivers/gpu/drm/i915/selftests/intel_memory_region.c
> index 6e80d99048e4..957a7a52def7 100644
> --- a/drivers/gpu/drm/i915/selftests/intel_memory_region.c
> +++ b/drivers/gpu/drm/i915/selftests/intel_memory_region.c
> @@ -509,7 +509,7 @@ static int igt_lmem_write_cpu(void *arg)
>   	if (err)
>   		goto out_unpin;
>   
> -	i915_gem_object_lock(obj);
> +	i915_gem_object_lock(obj, NULL);
>   	err = i915_gem_object_set_to_wc_domain(obj, true);
>   	i915_gem_object_unlock(obj);
>   	if (err)
> 

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 61+ messages in thread

* Re: [Intel-gfx] [PATCH 05/26] drm/i915: Remove locking from i915_gem_object_prepare_read/write
  2020-06-23 14:28 ` [Intel-gfx] [PATCH 05/26] drm/i915: Remove locking from i915_gem_object_prepare_read/write Maarten Lankhorst
  2020-06-26 13:32   ` Thomas Hellström (Intel)
@ 2020-06-29 12:56   ` Tvrtko Ursulin
  1 sibling, 0 replies; 61+ messages in thread
From: Tvrtko Ursulin @ 2020-06-29 12:56 UTC (permalink / raw)
  To: Maarten Lankhorst, intel-gfx


On 23/06/2020 15:28, Maarten Lankhorst wrote:
> Execbuffer submission will perform its own WW locking, and we
> cannot rely on the implicit lock there.
> 
> This also makes it clear that the GVT code will get a lockdep splat when
> multiple batchbuffer shadows need to be performed in the same instance,
> fix that up.
> 
> Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
> ---
>   drivers/gpu/drm/i915/gem/i915_gem_domain.c    | 20 ++++++-------------
>   .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 13 ++++++++++--
>   drivers/gpu/drm/i915/gem/i915_gem_object.h    |  1 -
>   .../gpu/drm/i915/gem/selftests/huge_pages.c   |  5 ++++-
>   .../i915/gem/selftests/i915_gem_coherency.c   | 14 +++++++++----
>   .../drm/i915/gem/selftests/i915_gem_context.c | 12 ++++++++---
>   drivers/gpu/drm/i915/gvt/cmd_parser.c         |  1 +
>   drivers/gpu/drm/i915/i915_gem.c               | 20 +++++++++++++++++--
>   8 files changed, 59 insertions(+), 27 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
> index c0acfc97fae3..8ebceebd11b0 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
> @@ -576,19 +576,17 @@ int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj,
>   	if (!i915_gem_object_has_struct_page(obj))
>   		return -ENODEV;
>   
> -	ret = i915_gem_object_lock_interruptible(obj, NULL);
> -	if (ret)
> -		return ret;
> +	assert_object_held(obj);
>   
>   	ret = i915_gem_object_wait(obj,
>   				   I915_WAIT_INTERRUPTIBLE,
>   				   MAX_SCHEDULE_TIMEOUT);
>   	if (ret)
> -		goto err_unlock;
> +		return ret;
>   
>   	ret = i915_gem_object_pin_pages(obj);
>   	if (ret)
> -		goto err_unlock;
> +		return ret;
>   
>   	if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ ||
>   	    !static_cpu_has(X86_FEATURE_CLFLUSH)) {
> @@ -616,8 +614,6 @@ int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj,
>   
>   err_unpin:
>   	i915_gem_object_unpin_pages(obj);
> -err_unlock:
> -	i915_gem_object_unlock(obj);
>   	return ret;
>   }
>   
> @@ -630,20 +626,18 @@ int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj,
>   	if (!i915_gem_object_has_struct_page(obj))
>   		return -ENODEV;
>   
> -	ret = i915_gem_object_lock_interruptible(obj, NULL);
> -	if (ret)
> -		return ret;
> +	assert_object_held(obj);
>   
>   	ret = i915_gem_object_wait(obj,
>   				   I915_WAIT_INTERRUPTIBLE |
>   				   I915_WAIT_ALL,
>   				   MAX_SCHEDULE_TIMEOUT);
>   	if (ret)
> -		goto err_unlock;
> +		return ret;
>   
>   	ret = i915_gem_object_pin_pages(obj);
>   	if (ret)
> -		goto err_unlock;
> +		return ret;
>   
>   	if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE ||
>   	    !static_cpu_has(X86_FEATURE_CLFLUSH)) {
> @@ -680,7 +674,5 @@ int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj,
>   
>   err_unpin:
>   	i915_gem_object_unpin_pages(obj);
> -err_unlock:
> -	i915_gem_object_unlock(obj);
>   	return ret;
>   }
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> index 391d22051b20..f896b1a4b38a 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> @@ -1003,11 +1003,14 @@ static void reloc_cache_reset(struct reloc_cache *cache)
>   
>   	vaddr = unmask_page(cache->vaddr);
>   	if (cache->vaddr & KMAP) {
> +		struct drm_i915_gem_object *obj =
> +			(struct drm_i915_gem_object *)cache->node.mm;
>   		if (cache->vaddr & CLFLUSH_AFTER)
>   			mb();
>   
>   		kunmap_atomic(vaddr);
> -		i915_gem_object_finish_access((struct drm_i915_gem_object *)cache->node.mm);
> +		i915_gem_object_finish_access(obj);
> +		i915_gem_object_unlock(obj);
>   	} else {
>   		struct i915_ggtt *ggtt = cache_to_ggtt(cache);
>   
> @@ -1042,10 +1045,16 @@ static void *reloc_kmap(struct drm_i915_gem_object *obj,
>   		unsigned int flushes;
>   		int err;
>   
> -		err = i915_gem_object_prepare_write(obj, &flushes);
> +		err = i915_gem_object_lock_interruptible(obj, NULL);
>   		if (err)
>   			return ERR_PTR(err);
>   
> +		err = i915_gem_object_prepare_write(obj, &flushes);
> +		if (err) {
> +			i915_gem_object_unlock(obj);
> +			return ERR_PTR(err);
> +		}
> +
>   		BUILD_BUG_ON(KMAP & CLFLUSH_FLAGS);
>   		BUILD_BUG_ON((KMAP | CLFLUSH_FLAGS) & PAGE_MASK);
>   
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
> index 5103067269b0..11b8e2735071 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
> @@ -434,7 +434,6 @@ static inline void
>   i915_gem_object_finish_access(struct drm_i915_gem_object *obj)
>   {
>   	i915_gem_object_unpin_pages(obj);
> -	i915_gem_object_unlock(obj);
>   }
>   
>   static inline struct intel_engine_cs *
> diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
> index eb2011ccb92b..fff11327a8da 100644
> --- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
> +++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
> @@ -964,9 +964,10 @@ __cpu_check_shmem(struct drm_i915_gem_object *obj, u32 dword, u32 val)
>   	unsigned long n;
>   	int err;
>   
> +	i915_gem_object_lock(obj, NULL);
>   	err = i915_gem_object_prepare_read(obj, &needs_flush);
>   	if (err)
> -		return err;
> +		goto err_unlock;
>   
>   	for (n = 0; n < obj->base.size >> PAGE_SHIFT; ++n) {
>   		u32 *ptr = kmap_atomic(i915_gem_object_get_page(obj, n));
> @@ -986,6 +987,8 @@ __cpu_check_shmem(struct drm_i915_gem_object *obj, u32 dword, u32 val)
>   	}
>   
>   	i915_gem_object_finish_access(obj);
> +err_unlock:
> +	i915_gem_object_unlock(obj);
>   
>   	return err;
>   }
> diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
> index 1de2959b153c..dcdfc396f2f8 100644
> --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
> +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
> @@ -27,9 +27,10 @@ static int cpu_set(struct context *ctx, unsigned long offset, u32 v)
>   	u32 *cpu;
>   	int err;
>   
> +	i915_gem_object_lock(ctx->obj, NULL);
>   	err = i915_gem_object_prepare_write(ctx->obj, &needs_clflush);
>   	if (err)
> -		return err;
> +		goto out;
>   
>   	page = i915_gem_object_get_page(ctx->obj, offset >> PAGE_SHIFT);
>   	map = kmap_atomic(page);
> @@ -46,7 +47,9 @@ static int cpu_set(struct context *ctx, unsigned long offset, u32 v)
>   	kunmap_atomic(map);
>   	i915_gem_object_finish_access(ctx->obj);
>   
> -	return 0;
> +out:
> +	i915_gem_object_unlock(ctx->obj);
> +	return err;
>   }
>   
>   static int cpu_get(struct context *ctx, unsigned long offset, u32 *v)
> @@ -57,9 +60,10 @@ static int cpu_get(struct context *ctx, unsigned long offset, u32 *v)
>   	u32 *cpu;
>   	int err;
>   
> +	i915_gem_object_lock(ctx->obj, NULL);
>   	err = i915_gem_object_prepare_read(ctx->obj, &needs_clflush);
>   	if (err)
> -		return err;
> +		goto out;
>   
>   	page = i915_gem_object_get_page(ctx->obj, offset >> PAGE_SHIFT);
>   	map = kmap_atomic(page);
> @@ -73,7 +77,9 @@ static int cpu_get(struct context *ctx, unsigned long offset, u32 *v)
>   	kunmap_atomic(map);
>   	i915_gem_object_finish_access(ctx->obj);
>   
> -	return 0;
> +out:
> +	i915_gem_object_unlock(ctx->obj);
> +	return err;
>   }
>   
>   static int gtt_set(struct context *ctx, unsigned long offset, u32 v)
> diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
> index 438c15ef2184..76671f587b9d 100644
> --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
> @@ -461,9 +461,10 @@ static int cpu_fill(struct drm_i915_gem_object *obj, u32 value)
>   	unsigned int n, m, need_flush;
>   	int err;
>   
> +	i915_gem_object_lock(obj, NULL);
>   	err = i915_gem_object_prepare_write(obj, &need_flush);
>   	if (err)
> -		return err;
> +		goto out;
>   
>   	for (n = 0; n < real_page_count(obj); n++) {
>   		u32 *map;
> @@ -479,7 +480,9 @@ static int cpu_fill(struct drm_i915_gem_object *obj, u32 value)
>   	i915_gem_object_finish_access(obj);
>   	obj->read_domains = I915_GEM_DOMAIN_GTT | I915_GEM_DOMAIN_CPU;
>   	obj->write_domain = 0;
> -	return 0;
> +out:
> +	i915_gem_object_unlock(obj);
> +	return err;
>   }
>   
>   static noinline int cpu_check(struct drm_i915_gem_object *obj,
> @@ -488,9 +491,10 @@ static noinline int cpu_check(struct drm_i915_gem_object *obj,
>   	unsigned int n, m, needs_flush;
>   	int err;
>   
> +	i915_gem_object_lock(obj, NULL);
>   	err = i915_gem_object_prepare_read(obj, &needs_flush);
>   	if (err)
> -		return err;
> +		goto out_unlock;
>   
>   	for (n = 0; n < real_page_count(obj); n++) {
>   		u32 *map;
> @@ -527,6 +531,8 @@ static noinline int cpu_check(struct drm_i915_gem_object *obj,
>   	}
>   
>   	i915_gem_object_finish_access(obj);
> +out_unlock:
> +	i915_gem_object_unlock(obj);
>   	return err;
>   }
>   
> diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c
> index 943c8d232703..d0a599b51bfe 100644
> --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c
> +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c
> @@ -1923,6 +1923,7 @@ static int perform_bb_shadow(struct parser_exec_state *s)
>   	if (ret)
>   		goto err_unmap;
>   
> +	i915_gem_object_unlock(bb->obj);
>   	INIT_LIST_HEAD(&bb->list);
>   	list_add(&bb->list, &s->workload->shadow_bb);
>   
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 1e06752835e5..33f6f88c8b08 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -335,12 +335,20 @@ i915_gem_shmem_pread(struct drm_i915_gem_object *obj,
>   	u64 remain;
>   	int ret;
>   
> -	ret = i915_gem_object_prepare_read(obj, &needs_clflush);
> +	ret = i915_gem_object_lock_interruptible(obj, NULL);
>   	if (ret)
>   		return ret;
>   
> +	ret = i915_gem_object_prepare_read(obj, &needs_clflush);
> +	if (ret) {
> +		i915_gem_object_unlock(obj);
> +		return ret;
> +	}
> +
>   	fence = i915_gem_object_lock_fence(obj);
>   	i915_gem_object_finish_access(obj);
> +	i915_gem_object_unlock(obj);
> +
>   	if (!fence)
>   		return -ENOMEM;
>   
> @@ -734,12 +742,20 @@ i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj,
>   	u64 remain;
>   	int ret;
>   
> -	ret = i915_gem_object_prepare_write(obj, &needs_clflush);
> +	ret = i915_gem_object_lock_interruptible(obj, NULL);
>   	if (ret)
>   		return ret;
>   
> +	ret = i915_gem_object_prepare_write(obj, &needs_clflush);
> +	if (ret) {
> +		i915_gem_object_unlock(obj);
> +		return ret;
> +	}
> +
>   	fence = i915_gem_object_lock_fence(obj);
>   	i915_gem_object_finish_access(obj);
> +	i915_gem_object_unlock(obj);
> +
>   	if (!fence)
>   		return -ENOMEM;
>   
> 

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 61+ messages in thread

* Re: [Intel-gfx] [PATCH 04/26] drm/i915: Add an implementation for i915_gem_ww_ctx locking, v2.
  2020-06-29 12:32   ` Tvrtko Ursulin
@ 2020-06-29 13:44     ` Maarten Lankhorst
  0 siblings, 0 replies; 61+ messages in thread
From: Maarten Lankhorst @ 2020-06-29 13:44 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Op 29-06-2020 om 14:32 schreef Tvrtko Ursulin:
>
> On 23/06/2020 15:28, Maarten Lankhorst wrote:
>> i915_gem_ww_ctx is used to lock all gem bo's for pinning and memory
>> eviction. We don't use it yet, but lets start adding the definition
>> first.
>>
>> To use it, we have to pass a non-NULL ww to gem_object_lock, and don't
>> unlock directly. It is done in i915_gem_ww_ctx_fini.
>>
>> Changes since v1:
>> - Change ww_ctx and obj order in locking functions (Jonas Lahtinen)
>>
>> Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
>> ---
>>   drivers/gpu/drm/i915/display/intel_display.c  |  4 +-
>>   .../gpu/drm/i915/gem/i915_gem_client_blt.c    |  2 +-
>>   drivers/gpu/drm/i915/gem/i915_gem_context.c   |  2 +-
>>   drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c    |  4 +-
>>   drivers/gpu/drm/i915/gem/i915_gem_domain.c    | 10 ++--
>>   .../gpu/drm/i915/gem/i915_gem_execbuffer.c    |  4 +-
>>   drivers/gpu/drm/i915/gem/i915_gem_object.c    |  2 +-
>>   drivers/gpu/drm/i915/gem/i915_gem_object.h    | 38 +++++++++++---
>>   .../gpu/drm/i915/gem/i915_gem_object_types.h  |  9 ++++
>>   drivers/gpu/drm/i915/gem/i915_gem_pm.c        |  2 +-
>>   drivers/gpu/drm/i915/gem/i915_gem_tiling.c    |  2 +-
>>   .../gpu/drm/i915/gem/selftests/huge_pages.c   |  2 +-
>>   .../i915/gem/selftests/i915_gem_client_blt.c  |  2 +-
>>   .../i915/gem/selftests/i915_gem_coherency.c   | 10 ++--
>>   .../drm/i915/gem/selftests/i915_gem_context.c |  4 +-
>>   .../drm/i915/gem/selftests/i915_gem_mman.c    |  4 +-
>>   .../drm/i915/gem/selftests/i915_gem_phys.c    |  2 +-
>>   .../gpu/drm/i915/gt/selftest_workarounds.c    |  2 +-
>>   drivers/gpu/drm/i915/gvt/cmd_parser.c         |  2 +-
>>   drivers/gpu/drm/i915/i915_gem.c               | 52 +++++++++++++++++--
>>   drivers/gpu/drm/i915/i915_gem.h               | 11 ++++
>>   drivers/gpu/drm/i915/selftests/i915_gem.c     | 41 +++++++++++++++
>>   drivers/gpu/drm/i915/selftests/i915_vma.c     |  2 +-
>>   .../drm/i915/selftests/intel_memory_region.c  |  2 +-
>>   24 files changed, 173 insertions(+), 42 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
>> index 7457813ef273..e909ccc37a54 100644
>> --- a/drivers/gpu/drm/i915/display/intel_display.c
>> +++ b/drivers/gpu/drm/i915/display/intel_display.c
>> @@ -2309,7 +2309,7 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb,
>>     void intel_unpin_fb_vma(struct i915_vma *vma, unsigned long flags)
>>   {
>> -    i915_gem_object_lock(vma->obj);
>> +    i915_gem_object_lock(vma->obj, NULL);
>>       if (flags & PLANE_HAS_FENCE)
>>           i915_vma_unpin_fence(vma);
>>       i915_gem_object_unpin_from_display_plane(vma);
>> @@ -17112,7 +17112,7 @@ static int intel_framebuffer_init(struct intel_framebuffer *intel_fb,
>>       if (!intel_fb->frontbuffer)
>>           return -ENOMEM;
>>   -    i915_gem_object_lock(obj);
>> +    i915_gem_object_lock(obj, NULL);
>>       tiling = i915_gem_object_get_tiling(obj);
>>       stride = i915_gem_object_get_stride(obj);
>>       i915_gem_object_unlock(obj);
>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c
>> index d3a86a4d5c04..c182091c00ff 100644
>> --- a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c
>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c
>> @@ -286,7 +286,7 @@ int i915_gem_schedule_fill_pages_blt(struct drm_i915_gem_object *obj,
>>       dma_fence_init(&work->dma, &clear_pages_work_ops, &fence_lock, 0, 0);
>>       i915_sw_fence_init(&work->wait, clear_pages_work_notify);
>>   -    i915_gem_object_lock(obj);
>> +    i915_gem_object_lock(obj, NULL);
>>       err = i915_sw_fence_await_reservation(&work->wait,
>>                             obj->base.resv, NULL, true, 0,
>>                             I915_FENCE_GFP);
>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
>> index 30c229fcb404..a996583640ee 100644
>> --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
>> @@ -113,7 +113,7 @@ static void lut_close(struct i915_gem_context *ctx)
>>               continue;
>>             rcu_read_unlock();
>> -        i915_gem_object_lock(obj);
>> +        i915_gem_object_lock(obj, NULL);
>>           list_for_each_entry(lut, &obj->lut_list, obj_link) {
>>               if (lut->ctx != ctx)
>>                   continue;
>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
>> index 2679380159fc..27fddc22a7c6 100644
>> --- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
>> @@ -128,7 +128,7 @@ static int i915_gem_begin_cpu_access(struct dma_buf *dma_buf, enum dma_data_dire
>>       if (err)
>>           return err;
>>   -    err = i915_gem_object_lock_interruptible(obj);
>> +    err = i915_gem_object_lock_interruptible(obj, NULL);
>>       if (err)
>>           goto out;
>>   @@ -149,7 +149,7 @@ static int i915_gem_end_cpu_access(struct dma_buf *dma_buf, enum dma_data_direct
>>       if (err)
>>           return err;
>>   -    err = i915_gem_object_lock_interruptible(obj);
>> +    err = i915_gem_object_lock_interruptible(obj, NULL);
>>       if (err)
>>           goto out;
>>   diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
>> index 7f76fc68f498..c0acfc97fae3 100644
>> --- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c
>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
>> @@ -32,7 +32,7 @@ void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj)
>>       if (!i915_gem_object_is_framebuffer(obj))
>>           return;
>>   -    i915_gem_object_lock(obj);
>> +    i915_gem_object_lock(obj, NULL);
>>       __i915_gem_object_flush_for_display(obj);
>>       i915_gem_object_unlock(obj);
>>   }
>> @@ -197,7 +197,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
>>       if (ret)
>>           return ret;
>>   -    ret = i915_gem_object_lock_interruptible(obj);
>> +    ret = i915_gem_object_lock_interruptible(obj, NULL);
>>       if (ret)
>>           return ret;
>>   @@ -536,7 +536,7 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
>>       if (err)
>>           goto out;
>>   -    err = i915_gem_object_lock_interruptible(obj);
>> +    err = i915_gem_object_lock_interruptible(obj, NULL);
>>       if (err)
>>           goto out_unpin;
>>   @@ -576,7 +576,7 @@ int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj,
>>       if (!i915_gem_object_has_struct_page(obj))
>>           return -ENODEV;
>>   -    ret = i915_gem_object_lock_interruptible(obj);
>> +    ret = i915_gem_object_lock_interruptible(obj, NULL);
>>       if (ret)
>>           return ret;
>>   @@ -630,7 +630,7 @@ int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj,
>>       if (!i915_gem_object_has_struct_page(obj))
>>           return -ENODEV;
>>   -    ret = i915_gem_object_lock_interruptible(obj);
>> +    ret = i915_gem_object_lock_interruptible(obj, NULL);
>>       if (ret)
>>           return ret;
>>   diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
>> index 2b4c210638c1..391d22051b20 100644
>> --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
>> @@ -813,7 +813,7 @@ static int __eb_add_lut(struct i915_execbuffer *eb,
>>           if (err == 0) { /* And nor has this handle */
>>               struct drm_i915_gem_object *obj = vma->obj;
>>   -            i915_gem_object_lock(obj);
>> +            i915_gem_object_lock(obj, NULL);
>>               if (idr_find(&eb->file->object_idr, handle) == obj) {
>>                   list_add(&lut->obj_link, &obj->lut_list);
>>               } else {
>> @@ -1083,7 +1083,7 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj,
>>           if (use_cpu_reloc(cache, obj))
>>               return NULL;
>>   -        i915_gem_object_lock(obj);
>> +        i915_gem_object_lock(obj, NULL);
>>           err = i915_gem_object_set_to_gtt_domain(obj, true);
>>           i915_gem_object_unlock(obj);
>>           if (err)
>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c
>> index b6ec5b50d93b..b59e2d40c347 100644
>> --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
>> @@ -108,7 +108,7 @@ void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file)
>>       struct i915_lut_handle *lut, *ln;
>>       LIST_HEAD(close);
>>   -    i915_gem_object_lock(obj);
>> +    i915_gem_object_lock(obj, NULL);
>>       list_for_each_entry_safe(lut, ln, &obj->lut_list, obj_link) {
>>           struct i915_gem_context *ctx = lut->ctx;
>>   diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
>> index 2faa481cc18f..5103067269b0 100644
>> --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
>> @@ -110,20 +110,44 @@ i915_gem_object_put(struct drm_i915_gem_object *obj)
>>     #define assert_object_held(obj) dma_resv_assert_held((obj)->base.resv)
>>   -static inline void i915_gem_object_lock(struct drm_i915_gem_object *obj)
>> +static inline int __i915_gem_object_lock(struct drm_i915_gem_object *obj,
>> +                     struct i915_gem_ww_ctx *ww,
>> +                     bool intr)
>>   {
>> -    dma_resv_lock(obj->base.resv, NULL);
>> +    int ret;
>> +
>> +    if (intr)
>> +        ret = dma_resv_lock_interruptible(obj->base.resv, ww ? &ww->ctx : NULL);
>> +    else
>> +        ret = dma_resv_lock(obj->base.resv, ww ? &ww->ctx : NULL);
>> +
>> +    if (!ret && ww)
>> +        list_add_tail(&obj->obj_link, &ww->obj_list);
>> +    if (ret == -EALREADY)
>> +        ret = 0;
>> +
>> +    if (ret == -EDEADLK)
>> +        ww->contended = obj;
>> +
>> +    return ret;
>
> Feels a bit on the large side for inline now, no? Quite a few conditionals. Or you are counting on compiler optimisation because ww and intr are passed in as mostly const?
Slightly, not sure if it's really a problem in practice. ww is either null or a stack variable, so for null it should all go away.
>
>>   }
>>   -static inline bool i915_gem_object_trylock(struct drm_i915_gem_object *obj)
>> +static inline int i915_gem_object_lock(struct drm_i915_gem_object *obj,
>> +                       struct i915_gem_ww_ctx *ww)
>>   {
>> -    return dma_resv_trylock(obj->base.resv);
>> +    return __i915_gem_object_lock(obj, ww, ww && ww->intr);
>>   }
>>   -static inline int
>> -i915_gem_object_lock_interruptible(struct drm_i915_gem_object *obj)
>> +static inline int i915_gem_object_lock_interruptible(struct drm_i915_gem_object *obj,
>> +                             struct i915_gem_ww_ctx *ww)
>>   {
>> -    return dma_resv_lock_interruptible(obj->base.resv, NULL);
>> +    WARN_ON(ww && !ww->intr);
>> +    return __i915_gem_object_lock(obj, ww, true);
>
> I see that ww->intr is set at ctx init time. At what times it is expected that the individual lock calls would override that?
Never. :) Just politely allowing it when replacing calls. Could be removed and replaced with lock_single_interruptible without ww context.
>
>> +}
>> +
>> +static inline bool i915_gem_object_trylock(struct drm_i915_gem_object *obj)
>> +{
>> +    return dma_resv_trylock(obj->base.resv);
>>   }
>>     static inline void i915_gem_object_unlock(struct drm_i915_gem_object *obj)
>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
>> index b1f82a11aef2..3740c0080e38 100644
>> --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
>> @@ -122,6 +122,15 @@ struct drm_i915_gem_object {
>>        */
>>       struct list_head lut_list;
>>   +    /**
>> +     * @obj_link: Link into @i915_gem_ww_ctx.obj_list
>> +     *
>> +     * When we lock this object through i915_gem_object_lock() with a
>> +     * context, we add it to the list to ensure we can unlock everything
>> +     * when i915_gem_ww_ctx_backoff() or i915_gem_ww_ctx_fini() are called.
>> +     */
>> +    struct list_head obj_link;
>> +
>>       /** Stolen memory for this object, instead of being backed by shmem. */
>>       struct drm_mm_node *stolen;
>>       union {
>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
>> index 3d215164dd5a..40d3e40500fa 100644
>> --- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c
>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
>> @@ -84,7 +84,7 @@ void i915_gem_suspend_late(struct drm_i915_private *i915)
>>                 spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
>>   -            i915_gem_object_lock(obj);
>> +            i915_gem_object_lock(obj, NULL);
>>               drm_WARN_ON(&i915->drm,
>>                   i915_gem_object_set_to_gtt_domain(obj, false));
>>               i915_gem_object_unlock(obj);
>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_tiling.c b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c
>> index 0158e49bf9bb..65fbf29c4852 100644
>> --- a/drivers/gpu/drm/i915/gem/i915_gem_tiling.c
>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c
>> @@ -249,7 +249,7 @@ i915_gem_object_set_tiling(struct drm_i915_gem_object *obj,
>>        * whilst executing a fenced command for an untiled object.
>>        */
>>   -    i915_gem_object_lock(obj);
>> +    i915_gem_object_lock(obj, NULL);
>>       if (i915_gem_object_is_framebuffer(obj)) {
>>           i915_gem_object_unlock(obj);
>>           return -EBUSY;
>> diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
>> index 8291ede6902c..eb2011ccb92b 100644
>> --- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
>> +++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
>> @@ -947,7 +947,7 @@ static int gpu_write(struct intel_context *ce,
>>   {
>>       int err;
>>   -    i915_gem_object_lock(vma->obj);
>> +    i915_gem_object_lock(vma->obj, NULL);
>>       err = i915_gem_object_set_to_gtt_domain(vma->obj, true);
>>       i915_gem_object_unlock(vma->obj);
>>       if (err)
>> diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
>> index 299c29e9ad86..4e36d4897ea6 100644
>> --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
>> +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
>> @@ -75,7 +75,7 @@ static int __igt_client_fill(struct intel_engine_cs *engine)
>>           if (err)
>>               goto err_unpin;
>>   -        i915_gem_object_lock(obj);
>> +        i915_gem_object_lock(obj, NULL);
>>           err = i915_gem_object_set_to_cpu_domain(obj, false);
>>           i915_gem_object_unlock(obj);
>>           if (err)
>> diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
>> index 87d7d8aa080f..1de2959b153c 100644
>> --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
>> +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
>> @@ -82,7 +82,7 @@ static int gtt_set(struct context *ctx, unsigned long offset, u32 v)
>>       u32 __iomem *map;
>>       int err = 0;
>>   -    i915_gem_object_lock(ctx->obj);
>> +    i915_gem_object_lock(ctx->obj, NULL);
>>       err = i915_gem_object_set_to_gtt_domain(ctx->obj, true);
>>       i915_gem_object_unlock(ctx->obj);
>>       if (err)
>> @@ -115,7 +115,7 @@ static int gtt_get(struct context *ctx, unsigned long offset, u32 *v)
>>       u32 __iomem *map;
>>       int err = 0;
>>   -    i915_gem_object_lock(ctx->obj);
>> +    i915_gem_object_lock(ctx->obj, NULL);
>>       err = i915_gem_object_set_to_gtt_domain(ctx->obj, false);
>>       i915_gem_object_unlock(ctx->obj);
>>       if (err)
>> @@ -147,7 +147,7 @@ static int wc_set(struct context *ctx, unsigned long offset, u32 v)
>>       u32 *map;
>>       int err;
>>   -    i915_gem_object_lock(ctx->obj);
>> +    i915_gem_object_lock(ctx->obj, NULL);
>>       err = i915_gem_object_set_to_wc_domain(ctx->obj, true);
>>       i915_gem_object_unlock(ctx->obj);
>>       if (err)
>> @@ -170,7 +170,7 @@ static int wc_get(struct context *ctx, unsigned long offset, u32 *v)
>>       u32 *map;
>>       int err;
>>   -    i915_gem_object_lock(ctx->obj);
>> +    i915_gem_object_lock(ctx->obj, NULL);
>>       err = i915_gem_object_set_to_wc_domain(ctx->obj, false);
>>       i915_gem_object_unlock(ctx->obj);
>>       if (err)
>> @@ -193,7 +193,7 @@ static int gpu_set(struct context *ctx, unsigned long offset, u32 v)
>>       u32 *cs;
>>       int err;
>>   -    i915_gem_object_lock(ctx->obj);
>> +    i915_gem_object_lock(ctx->obj, NULL);
>>       err = i915_gem_object_set_to_gtt_domain(ctx->obj, true);
>>       i915_gem_object_unlock(ctx->obj);
>>       if (err)
>> diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
>> index b81978890641..438c15ef2184 100644
>> --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
>> +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
>> @@ -950,7 +950,7 @@ emit_rpcs_query(struct drm_i915_gem_object *obj,
>>       if (IS_ERR(vma))
>>           return PTR_ERR(vma);
>>   -    i915_gem_object_lock(obj);
>> +    i915_gem_object_lock(obj, NULL);
>>       err = i915_gem_object_set_to_gtt_domain(obj, false);
>>       i915_gem_object_unlock(obj);
>>       if (err)
>> @@ -1706,7 +1706,7 @@ static int read_from_scratch(struct i915_gem_context *ctx,
>>         i915_request_add(rq);
>>   -    i915_gem_object_lock(obj);
>> +    i915_gem_object_lock(obj, NULL);
>>       err = i915_gem_object_set_to_cpu_domain(obj, false);
>>       i915_gem_object_unlock(obj);
>>       if (err)
>> diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
>> index 9c7402ce5bf9..9fb95a45bcad 100644
>> --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
>> +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
>> @@ -103,7 +103,7 @@ static int check_partial_mapping(struct drm_i915_gem_object *obj,
>>       GEM_BUG_ON(i915_gem_object_get_tiling(obj) != tile->tiling);
>>       GEM_BUG_ON(i915_gem_object_get_stride(obj) != tile->stride);
>>   -    i915_gem_object_lock(obj);
>> +    i915_gem_object_lock(obj, NULL);
>>       err = i915_gem_object_set_to_gtt_domain(obj, true);
>>       i915_gem_object_unlock(obj);
>>       if (err) {
>> @@ -188,7 +188,7 @@ static int check_partial_mappings(struct drm_i915_gem_object *obj,
>>       GEM_BUG_ON(i915_gem_object_get_tiling(obj) != tile->tiling);
>>       GEM_BUG_ON(i915_gem_object_get_stride(obj) != tile->stride);
>>   -    i915_gem_object_lock(obj);
>> +    i915_gem_object_lock(obj, NULL);
>>       err = i915_gem_object_set_to_gtt_domain(obj, true);
>>       i915_gem_object_unlock(obj);
>>       if (err) {
>> diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c
>> index 34932871b3a5..a94243dc4c5c 100644
>> --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c
>> +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c
>> @@ -44,7 +44,7 @@ static int mock_phys_object(void *arg)
>>       }
>>         /* Make the object dirty so that put_pages must do copy back the data */
>> -    i915_gem_object_lock(obj);
>> +    i915_gem_object_lock(obj, NULL);
>>       err = i915_gem_object_set_to_gtt_domain(obj, true);
>>       i915_gem_object_unlock(obj);
>>       if (err) {
>> diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
>> index febc9e6692ba..61a0532d0f3d 100644
>> --- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c
>> +++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
>> @@ -214,7 +214,7 @@ static int check_whitelist(struct i915_gem_context *ctx,
>>           return PTR_ERR(results);
>>         err = 0;
>> -    i915_gem_object_lock(results);
>> +    i915_gem_object_lock(results, NULL);
>>       intel_wedge_on_timeout(&wedge, engine->gt, HZ / 5) /* safety net! */
>>           err = i915_gem_object_set_to_cpu_domain(results, false);
>>       i915_gem_object_unlock(results);
>> diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c
>> index f1940939260a..943c8d232703 100644
>> --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c
>> +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c
>> @@ -2982,7 +2982,7 @@ static int shadow_indirect_ctx(struct intel_shadow_wa_ctx *wa_ctx)
>>           goto put_obj;
>>       }
>>   -    i915_gem_object_lock(obj);
>> +    i915_gem_object_lock(obj, NULL);
>>       ret = i915_gem_object_set_to_cpu_domain(obj, false);
>>       i915_gem_object_unlock(obj);
>>       if (ret) {
>> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
>> index 9aa3066cb75d..1e06752835e5 100644
>> --- a/drivers/gpu/drm/i915/i915_gem.c
>> +++ b/drivers/gpu/drm/i915/i915_gem.c
>> @@ -420,7 +420,7 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj,
>>           GEM_BUG_ON(!drm_mm_node_allocated(&node));
>>       }
>>   -    ret = i915_gem_object_lock_interruptible(obj);
>> +    ret = i915_gem_object_lock_interruptible(obj, NULL);
>>       if (ret)
>>           goto out_unpin;
>>   @@ -619,7 +619,7 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
>>           GEM_BUG_ON(!drm_mm_node_allocated(&node));
>>       }
>>   -    ret = i915_gem_object_lock_interruptible(obj);
>> +    ret = i915_gem_object_lock_interruptible(obj, NULL);
>>       if (ret)
>>           goto out_unpin;
>>   @@ -1290,7 +1290,7 @@ int i915_gem_freeze_late(struct drm_i915_private *i915)
>>       i915_gem_drain_freed_objects(i915);
>>         list_for_each_entry(obj, &i915->mm.shrink_list, mm.link) {
>> -        i915_gem_object_lock(obj);
>> +        i915_gem_object_lock(obj, NULL);
>>           drm_WARN_ON(&i915->drm,
>>                   i915_gem_object_set_to_cpu_domain(obj, true));
>>           i915_gem_object_unlock(obj);
>> @@ -1344,6 +1344,52 @@ int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file)
>>       return ret;
>>   }
>>   +void i915_gem_ww_ctx_init(struct i915_gem_ww_ctx *ww, bool intr)
>> +{
>> +    ww_acquire_init(&ww->ctx, &reservation_ww_class);
>> +    INIT_LIST_HEAD(&ww->obj_list);
>> +    ww->intr = intr;
>> +    ww->contended = NULL;
>> +}
>> +
>> +static void i915_gem_ww_ctx_unlock_all(struct i915_gem_ww_ctx *ww)
>> +{
>> +    struct drm_i915_gem_object *obj;
>> +
>> +    while ((obj = list_first_entry_or_null(&ww->obj_list, struct drm_i915_gem_object, obj_link))) {
>
> I wanted to ask whether you think this is faster than for_each_list_entry, but then also realized you can optimise further by not bothering to list_del (since you know the whole list is going away). If you are not allowing ww ctx reuse you don't even need to re-init the list_head at the end.
>
>> +        list_del(&obj->obj_link);
>> +        i915_gem_object_unlock(obj);
>> +    }
>> +}
>> +
>> +void i915_gem_ww_ctx_fini(struct i915_gem_ww_ctx *ww)
>> +{
>> +    i915_gem_ww_ctx_unlock_all(ww);
>> +    WARN_ON(ww->contended);
>
> Unless I am missing something this feels like a GEM_BUG_ON condition (translated: we should be confident after testing it is impossible to hit).
>
> Or it is allowed to not try the backoff on -EDEADLK? Backoff is the only place which resets the ww->contended, right? In this case WARN_ON would be wrong, but you probably did not went for this design. Should it be supported?
>
>> +    ww_acquire_fini(&ww->ctx);
>> +}
>> +
>> +int __must_check i915_gem_ww_ctx_backoff(struct i915_gem_ww_ctx *ww)
>> +{
>> +    int ret = 0;
>> +
>> +    if (WARN_ON(!ww->contended))
>> +        return -EINVAL;
>> +
>> +    i915_gem_ww_ctx_unlock_all(ww);
>> +    if (ww->intr)
>> +        ret = dma_resv_lock_slow_interruptible(ww->contended->base.resv, &ww->ctx);
>> +    else
>> +        dma_resv_lock_slow(ww->contended->base.resv, &ww->ctx);
>> +
>> +    if (!ret)
>> +        list_add_tail(&ww->contended->obj_link, &ww->obj_list);
>> +
>> +    ww->contended = NULL;
>> +
>> +    return ret;
>> +}
>> +
>>   #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
>>   #include "selftests/mock_gem_device.c"
>>   #include "selftests/i915_gem.c"
>> diff --git a/drivers/gpu/drm/i915/i915_gem.h b/drivers/gpu/drm/i915/i915_gem.h
>> index 1753c84d6c0d..988755dbf4be 100644
>> --- a/drivers/gpu/drm/i915/i915_gem.h
>> +++ b/drivers/gpu/drm/i915/i915_gem.h
>> @@ -116,4 +116,15 @@ static inline bool __tasklet_is_scheduled(struct tasklet_struct *t)
>>       return test_bit(TASKLET_STATE_SCHED, &t->state);
>>   }
>>   +struct i915_gem_ww_ctx {
>> +    struct ww_acquire_ctx ctx;
>> +    struct list_head obj_list;
>> +    bool intr;
>> +    struct drm_i915_gem_object *contended;
>> +};
>> +
>> +void i915_gem_ww_ctx_init(struct i915_gem_ww_ctx *ctx, bool intr);
>> +void i915_gem_ww_ctx_fini(struct i915_gem_ww_ctx *ctx);
>> +int __must_check i915_gem_ww_ctx_backoff(struct i915_gem_ww_ctx *ctx);
>> +
>>   #endif /* __I915_GEM_H__ */
>> diff --git a/drivers/gpu/drm/i915/selftests/i915_gem.c b/drivers/gpu/drm/i915/selftests/i915_gem.c
>> index 88d400b9df88..23a6132c5f4e 100644
>> --- a/drivers/gpu/drm/i915/selftests/i915_gem.c
>> +++ b/drivers/gpu/drm/i915/selftests/i915_gem.c
>> @@ -199,11 +199,52 @@ static int igt_gem_hibernate(void *arg)
>>       return err;
>>   }
>>   +static int igt_gem_ww_ctx(void *arg)
>> +{
>> +    struct drm_i915_private *i915 = arg;
>> +    struct drm_i915_gem_object *obj, *obj2;
>> +    struct i915_gem_ww_ctx ww;
>> +    int err = 0;
>> +
>> +    obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
>> +    if (IS_ERR(obj))
>> +        return PTR_ERR(obj);
>> +
>> +    obj2 = i915_gem_object_create_internal(i915, PAGE_SIZE);
>> +    if (IS_ERR(obj)) {
>
> Wrong obj ^^^ vvv.
>
>> +        err = PTR_ERR(obj);
>> +        goto put1;
>> +    }
>> +
>> +    i915_gem_ww_ctx_init(&ww, true);
>
> Need to expand with non-interruptible, interruptible and mixed.
>
>> +retry:
>> +    /* Lock the objects, twice for good measure (-EALREADY handling) */
>> +    err = i915_gem_object_lock(obj, &ww);
>> +    if (!err)
>> +        err = i915_gem_object_lock_interruptible(obj, &ww);
>
> This is -EALREADY on the 1st pass.
>
>> +    if (!err)
>> +        err = i915_gem_object_lock_interruptible(obj2, &ww);
>> +    if (!err)
>> +        err = i915_gem_object_lock(obj2, &ww);
>
> And this is -EALREADY again?
>
>> +
>> +    if (err == -EDEADLK) {
>
> How do we get here with a single locking context?
>
>> +        err = i915_gem_ww_ctx_backoff(&ww);
>> +        if (!err)
>> +            goto retry;
>> +    }
>> +    i915_gem_ww_ctx_fini(&ww);
>> +    i915_gem_object_put(obj2);
>> +put1:
>> +    i915_gem_object_put(obj);
>> +    return err;
>> +}
>> +
>>   int i915_gem_live_selftests(struct drm_i915_private *i915)
>>   {
>>       static const struct i915_subtest tests[] = {
>>           SUBTEST(igt_gem_suspend),
>>           SUBTEST(igt_gem_hibernate),
>> +        SUBTEST(igt_gem_ww_ctx),
>>       };
>>         if (intel_gt_is_wedged(&i915->gt))
>> diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c
>> index af89c7fc8f59..88c5e9acb84c 100644
>> --- a/drivers/gpu/drm/i915/selftests/i915_vma.c
>> +++ b/drivers/gpu/drm/i915/selftests/i915_vma.c
>> @@ -892,7 +892,7 @@ static int igt_vma_remapped_gtt(void *arg)
>>               unsigned int x, y;
>>               int err;
>>   -            i915_gem_object_lock(obj);
>> +            i915_gem_object_lock(obj, NULL);
>>               err = i915_gem_object_set_to_gtt_domain(obj, true);
>>               i915_gem_object_unlock(obj);
>>               if (err)
>> diff --git a/drivers/gpu/drm/i915/selftests/intel_memory_region.c b/drivers/gpu/drm/i915/selftests/intel_memory_region.c
>> index 6e80d99048e4..957a7a52def7 100644
>> --- a/drivers/gpu/drm/i915/selftests/intel_memory_region.c
>> +++ b/drivers/gpu/drm/i915/selftests/intel_memory_region.c
>> @@ -509,7 +509,7 @@ static int igt_lmem_write_cpu(void *arg)
>>       if (err)
>>           goto out_unpin;
>>   -    i915_gem_object_lock(obj);
>> +    i915_gem_object_lock(obj, NULL);
>>       err = i915_gem_object_set_to_wc_domain(obj, true);
>>       i915_gem_object_unlock(obj);
>>       if (err)
>>
>
> Regards,
>
> Tvrtko


_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 61+ messages in thread

* Re: [Intel-gfx] [PATCH 06/26] drm/i915: Parse command buffer earlier in eb_relocate(slow)
  2020-06-23 14:28 ` [Intel-gfx] [PATCH 06/26] drm/i915: Parse command buffer earlier in eb_relocate(slow) Maarten Lankhorst
  2020-06-26 14:41   ` Thomas Hellström (Intel)
@ 2020-06-29 14:42   ` Tvrtko Ursulin
  1 sibling, 0 replies; 61+ messages in thread
From: Tvrtko Ursulin @ 2020-06-29 14:42 UTC (permalink / raw)
  To: Maarten Lankhorst, intel-gfx


On 23/06/2020 15:28, Maarten Lankhorst wrote:
> We want to introduce backoff logic, but we need to lock the
> pool object as well for command parsing. Because of this, we
> will need backoff logic for the engine pool obj, move the batch
> validation up slightly to eb_lookup_vmas, and the actual command
> parsing in a separate function which can get called from execbuf
> relocation fast and slowpath.
> 
> Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
> ---
>   .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 66 ++++++++++---------
>   1 file changed, 36 insertions(+), 30 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> index f896b1a4b38a..7cb44915cfc7 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> @@ -290,6 +290,8 @@ struct i915_execbuffer {
>   	struct eb_vma_array *array;
>   };
>   
> +static int eb_parse(struct i915_execbuffer *eb);
> +
>   static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb)
>   {
>   	return intel_engine_requires_cmd_parser(eb->engine) ||
> @@ -873,6 +875,7 @@ static struct i915_vma *eb_lookup_vma(struct i915_execbuffer *eb, u32 handle)
>   
>   static int eb_lookup_vmas(struct i915_execbuffer *eb)
>   {
> +	struct drm_i915_private *i915 = eb->i915;
>   	unsigned int batch = eb_batch_index(eb);
>   	unsigned int i;
>   	int err = 0;
> @@ -886,18 +889,37 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
>   		vma = eb_lookup_vma(eb, eb->exec[i].handle);
>   		if (IS_ERR(vma)) {
>   			err = PTR_ERR(vma);
> -			break;
> +			goto err;
>   		}
>   
>   		err = eb_validate_vma(eb, &eb->exec[i], vma);
>   		if (unlikely(err)) {
>   			i915_vma_put(vma);
> -			break;
> +			goto err;
>   		}
>   
>   		eb_add_vma(eb, i, batch, vma);
>   	}
>   
> +	if (unlikely(eb->batch->flags & EXEC_OBJECT_WRITE)) {
> +		drm_dbg(&i915->drm,
> +			"Attempting to use self-modifying batch buffer\n");
> +		return -EINVAL;
> +	}
> +
> +	if (range_overflows_t(u64,
> +			      eb->batch_start_offset, eb->batch_len,
> +			      eb->batch->vma->size)) {
> +		drm_dbg(&i915->drm, "Attempting to use out-of-bounds batch\n");
> +		return -EINVAL;
> +	}
> +
> +	if (eb->batch_len == 0)
> +		eb->batch_len = eb->batch->vma->size - eb->batch_start_offset;

How about you move the parsing step at least into a helper? So it is 
more obvious this step is not simply about looking up vmas, even if it 
is called from eb_lookup_vmas.

> +
> +	return 0;
> +
> +err:
>   	eb->vma[i].vma = NULL;
>   	return err;
>   }
> @@ -1809,7 +1831,7 @@ static int eb_prefault_relocations(const struct i915_execbuffer *eb)
>   	return 0;
>   }
>   
> -static noinline int eb_relocate_slow(struct i915_execbuffer *eb)

Something looks off - here you rename eb_relocate_slow but I don't see 
any callers changing in this patch. So I have to assume broken bisect stage.

> +static noinline int eb_relocate_parse_slow(struct i915_execbuffer *eb)
>   {
>   	bool have_copy = false;
>   	struct eb_vma *ev;
> @@ -1872,6 +1894,11 @@ static noinline int eb_relocate_slow(struct i915_execbuffer *eb)
>   	if (err)
>   		goto err;
>   
> +	/* as last step, parse the command buffer */
> +	err = eb_parse(eb);
> +	if (err)
> +		goto err;
> +
>   	/*
>   	 * Leave the user relocations as are, this is the painfully slow path,
>   	 * and we want to avoid the complication of dropping the lock whilst
> @@ -1904,7 +1931,7 @@ static noinline int eb_relocate_slow(struct i915_execbuffer *eb)
>   	return err;
>   }
>   
> -static int eb_relocate(struct i915_execbuffer *eb)
> +static int eb_relocate_parse(struct i915_execbuffer *eb)
>   {
>   	int err;
>   
> @@ -1932,7 +1959,7 @@ static int eb_relocate(struct i915_execbuffer *eb)
>   			return eb_relocate_slow(eb);
>   	}
>   
> -	return 0;
> +	return eb_parse(eb);

And I am not a fan of relocation stage calling parse. Why couldn't every 
stage be done separately at the call sites so the stages are explicit 
and clear?

Commit message is explaining the parsing needs to go earlier, to come 
under the ww context block? But isn't it already after eb_lookup_vmas in 
current code?

Oh wait.. I am looking at drm-tip and don't have your reverts. It was 
agreed you will remove them, right? So I can wait for the next round to 
figure this re-organization.

Regards,

Tvrtko


>   }
>   
>   static int eb_move_to_gpu(struct i915_execbuffer *eb)
> @@ -2870,7 +2897,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
>   	if (unlikely(err))
>   		goto err_context;
>   
> -	err = eb_relocate(&eb);
> +	err = eb_relocate_parse(&eb);
>   	if (err) {
>   		/*
>   		 * If the user expects the execobject.offset and
> @@ -2883,33 +2910,10 @@ i915_gem_do_execbuffer(struct drm_device *dev,
>   		goto err_vma;
>   	}
>   
> -	if (unlikely(eb.batch->flags & EXEC_OBJECT_WRITE)) {
> -		drm_dbg(&i915->drm,
> -			"Attempting to use self-modifying batch buffer\n");
> -		err = -EINVAL;
> -		goto err_vma;
> -	}
> -
> -	if (range_overflows_t(u64,
> -			      eb.batch_start_offset, eb.batch_len,
> -			      eb.batch->vma->size)) {
> -		drm_dbg(&i915->drm, "Attempting to use out-of-bounds batch\n");
> -		err = -EINVAL;
> -		goto err_vma;
> -	}
> -
> -	if (eb.batch_len == 0)
> -		eb.batch_len = eb.batch->vma->size - eb.batch_start_offset;
> -
> -	err = eb_parse(&eb);
> -	if (err)
> -		goto err_vma;
> -
>   	/*
>   	 * snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
>   	 * batch" bit. Hence we need to pin secure batches into the global gtt.
>   	 * hsw should have this fixed, but bdw mucks it up again. */
> -	batch = eb.batch->vma;
>   	if (eb.batch_flags & I915_DISPATCH_SECURE) {
>   		struct i915_vma *vma;
>   
> @@ -2923,13 +2927,15 @@ i915_gem_do_execbuffer(struct drm_device *dev,
>   		 *   fitting due to fragmentation.
>   		 * So this is actually safe.
>   		 */
> -		vma = i915_gem_object_ggtt_pin(batch->obj, NULL, 0, 0, 0);
> +		vma = i915_gem_object_ggtt_pin(eb.batch->vma->obj, NULL, 0, 0, 0);
>   		if (IS_ERR(vma)) {
>   			err = PTR_ERR(vma);
>   			goto err_parse;
>   		}
>   
>   		batch = vma;
> +	} else {
> +		batch = eb.batch->vma;
>   	}
>   
>   	/* All GPU relocation batches must be submitted prior to the user rq */
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 61+ messages in thread

* Re: [Intel-gfx] [PATCH 07/26] Revert "drm/i915/gem: Split eb_vma into its own allocation"
  2020-06-23 14:28 ` [Intel-gfx] [PATCH 07/26] Revert "drm/i915/gem: Split eb_vma into its own allocation" Maarten Lankhorst
@ 2020-06-29 15:08   ` Tvrtko Ursulin
  2020-06-30 11:52     ` Maarten Lankhorst
  0 siblings, 1 reply; 61+ messages in thread
From: Tvrtko Ursulin @ 2020-06-29 15:08 UTC (permalink / raw)
  To: Maarten Lankhorst, intel-gfx


On 23/06/2020 15:28, Maarten Lankhorst wrote:
> This reverts commit 0f1dd02295f35dcdcbaafcbcbbec0753884ab974.
> This conflicts with the ww mutex handling, which needs to drop
> the references after gpu submission anyway, because otherwise we
> may risk unlocking a BO after first freeing it.

What is the problem here? eb_vma_array_put in eb_move_to_gpu? If so, 
could you just move this put to later in the sequence? I am simply 
thinking how to avoid controversial reverts. Because on the other hand I 
did not figure out what 0f1dd02295f35dcdcbaafcbcbbec0753884ab974 fixed 
in a few minutes I spent staring at the patch.

Regards,

Tvrtko

> Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
> ---
>   .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 124 +++++++-----------
>   1 file changed, 51 insertions(+), 73 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> index 7cb44915cfc7..2636a130fb57 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> @@ -40,11 +40,6 @@ struct eb_vma {
>   	u32 handle;
>   };
>   
> -struct eb_vma_array {
> -	struct kref kref;
> -	struct eb_vma vma[];
> -};
> -
>   enum {
>   	FORCE_CPU_RELOC = 1,
>   	FORCE_GTT_RELOC,
> @@ -57,6 +52,7 @@ enum {
>   #define __EXEC_OBJECT_NEEDS_MAP		BIT(29)
>   #define __EXEC_OBJECT_NEEDS_BIAS	BIT(28)
>   #define __EXEC_OBJECT_INTERNAL_FLAGS	(~0u << 28) /* all of the above */
> +#define __EXEC_OBJECT_RESERVED (__EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_FENCE)
>   
>   #define __EXEC_HAS_RELOC	BIT(31)
>   #define __EXEC_INTERNAL_FLAGS	(~0u << 31)
> @@ -287,7 +283,6 @@ struct i915_execbuffer {
>   	 */
>   	int lut_size;
>   	struct hlist_head *buckets; /** ht for relocation handles */
> -	struct eb_vma_array *array;
>   };
>   
>   static int eb_parse(struct i915_execbuffer *eb);
> @@ -299,62 +294,8 @@ static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb)
>   		 eb->args->batch_len);
>   }
>   
> -static struct eb_vma_array *eb_vma_array_create(unsigned int count)
> -{
> -	struct eb_vma_array *arr;
> -
> -	arr = kvmalloc(struct_size(arr, vma, count), GFP_KERNEL | __GFP_NOWARN);
> -	if (!arr)
> -		return NULL;
> -
> -	kref_init(&arr->kref);
> -	arr->vma[0].vma = NULL;
> -
> -	return arr;
> -}
> -
> -static inline void eb_unreserve_vma(struct eb_vma *ev)
> -{
> -	struct i915_vma *vma = ev->vma;
> -
> -	if (unlikely(ev->flags & __EXEC_OBJECT_HAS_FENCE))
> -		__i915_vma_unpin_fence(vma);
> -
> -	if (ev->flags & __EXEC_OBJECT_HAS_PIN)
> -		__i915_vma_unpin(vma);
> -
> -	ev->flags &= ~(__EXEC_OBJECT_HAS_PIN |
> -		       __EXEC_OBJECT_HAS_FENCE);
> -}
> -
> -static void eb_vma_array_destroy(struct kref *kref)
> -{
> -	struct eb_vma_array *arr = container_of(kref, typeof(*arr), kref);
> -	struct eb_vma *ev = arr->vma;
> -
> -	while (ev->vma) {
> -		eb_unreserve_vma(ev);
> -		i915_vma_put(ev->vma);
> -		ev++;
> -	}
> -
> -	kvfree(arr);
> -}
> -
> -static void eb_vma_array_put(struct eb_vma_array *arr)
> -{
> -	kref_put(&arr->kref, eb_vma_array_destroy);
> -}
> -
>   static int eb_create(struct i915_execbuffer *eb)
>   {
> -	/* Allocate an extra slot for use by the command parser + sentinel */
> -	eb->array = eb_vma_array_create(eb->buffer_count + 2);
> -	if (!eb->array)
> -		return -ENOMEM;
> -
> -	eb->vma = eb->array->vma;
> -
>   	if (!(eb->args->flags & I915_EXEC_HANDLE_LUT)) {
>   		unsigned int size = 1 + ilog2(eb->buffer_count);
>   
> @@ -388,10 +329,8 @@ static int eb_create(struct i915_execbuffer *eb)
>   				break;
>   		} while (--size);
>   
> -		if (unlikely(!size)) {
> -			eb_vma_array_put(eb->array);
> +		if (unlikely(!size))
>   			return -ENOMEM;
> -		}
>   
>   		eb->lut_size = size;
>   	} else {
> @@ -502,6 +441,26 @@ eb_pin_vma(struct i915_execbuffer *eb,
>   	return !eb_vma_misplaced(entry, vma, ev->flags);
>   }
>   
> +static inline void __eb_unreserve_vma(struct i915_vma *vma, unsigned int flags)
> +{
> +	GEM_BUG_ON(!(flags & __EXEC_OBJECT_HAS_PIN));
> +
> +	if (unlikely(flags & __EXEC_OBJECT_HAS_FENCE))
> +		__i915_vma_unpin_fence(vma);
> +
> +	__i915_vma_unpin(vma);
> +}
> +
> +static inline void
> +eb_unreserve_vma(struct eb_vma *ev)
> +{
> +	if (!(ev->flags & __EXEC_OBJECT_HAS_PIN))
> +		return;
> +
> +	__eb_unreserve_vma(ev->vma, ev->flags);
> +	ev->flags &= ~__EXEC_OBJECT_RESERVED;
> +}
> +
>   static int
>   eb_validate_vma(struct i915_execbuffer *eb,
>   		struct drm_i915_gem_exec_object2 *entry,
> @@ -944,13 +903,31 @@ eb_get_vma(const struct i915_execbuffer *eb, unsigned long handle)
>   	}
>   }
>   
> +static void eb_release_vmas(const struct i915_execbuffer *eb)
> +{
> +	const unsigned int count = eb->buffer_count;
> +	unsigned int i;
> +
> +	for (i = 0; i < count; i++) {
> +		struct eb_vma *ev = &eb->vma[i];
> +		struct i915_vma *vma = ev->vma;
> +
> +		if (!vma)
> +			break;
> +
> +		eb->vma[i].vma = NULL;
> +
> +		if (ev->flags & __EXEC_OBJECT_HAS_PIN)
> +			__eb_unreserve_vma(vma, ev->flags);
> +
> +		i915_vma_put(vma);
> +	}
> +}
> +
>   static void eb_destroy(const struct i915_execbuffer *eb)
>   {
>   	GEM_BUG_ON(eb->reloc_cache.rq);
>   
> -	if (eb->array)
> -		eb_vma_array_put(eb->array);
> -
>   	if (eb->lut_size > 0)
>   		kfree(eb->buckets);
>   }
> @@ -2039,12 +2016,9 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
>   			err = i915_vma_move_to_active(vma, eb->request, flags);
>   
>   		i915_vma_unlock(vma);
> -		eb_unreserve_vma(ev);
>   	}
>   	ww_acquire_fini(&acquire);
>   
> -	eb_vma_array_put(fetch_and_zero(&eb->array));
> -
>   	if (unlikely(err))
>   		goto err_skip;
>   
> @@ -2340,7 +2314,6 @@ static int eb_parse(struct i915_execbuffer *eb)
>   	eb->vma[eb->buffer_count].vma = i915_vma_get(shadow);
>   	eb->vma[eb->buffer_count].flags = __EXEC_OBJECT_HAS_PIN;
>   	eb->batch = &eb->vma[eb->buffer_count++];
> -	eb->vma[eb->buffer_count].vma = NULL;
>   
>   	eb->trampoline = trampoline;
>   	eb->batch_start_offset = 0;
> @@ -2838,6 +2811,8 @@ i915_gem_do_execbuffer(struct drm_device *dev,
>   		args->flags |= __EXEC_HAS_RELOC;
>   
>   	eb.exec = exec;
> +	eb.vma = (struct eb_vma *)(exec + args->buffer_count + 1);
> +	eb.vma[0].vma = NULL;
>   
>   	eb.invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS;
>   	reloc_cache_init(&eb.reloc_cache, eb.i915);
> @@ -3014,6 +2989,8 @@ i915_gem_do_execbuffer(struct drm_device *dev,
>   	if (batch->private)
>   		intel_gt_buffer_pool_put(batch->private);
>   err_vma:
> +	if (eb.exec)
> +		eb_release_vmas(&eb);
>   	if (eb.trampoline)
>   		i915_vma_unpin(eb.trampoline);
>   	eb_unpin_engine(&eb);
> @@ -3031,7 +3008,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
>   
>   static size_t eb_element_size(void)
>   {
> -	return sizeof(struct drm_i915_gem_exec_object2);
> +	return sizeof(struct drm_i915_gem_exec_object2) + sizeof(struct eb_vma);
>   }
>   
>   static bool check_buffer_count(size_t count)
> @@ -3087,7 +3064,7 @@ i915_gem_execbuffer_ioctl(struct drm_device *dev, void *data,
>   	/* Copy in the exec list from userland */
>   	exec_list = kvmalloc_array(count, sizeof(*exec_list),
>   				   __GFP_NOWARN | GFP_KERNEL);
> -	exec2_list = kvmalloc_array(count, eb_element_size(),
> +	exec2_list = kvmalloc_array(count + 1, eb_element_size(),
>   				    __GFP_NOWARN | GFP_KERNEL);
>   	if (exec_list == NULL || exec2_list == NULL) {
>   		drm_dbg(&i915->drm,
> @@ -3165,7 +3142,8 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
>   	if (err)
>   		return err;
>   
> -	exec2_list = kvmalloc_array(count, eb_element_size(),
> +	/* Allocate an extra slot for use by the command parser */
> +	exec2_list = kvmalloc_array(count + 1, eb_element_size(),
>   				    __GFP_NOWARN | GFP_KERNEL);
>   	if (exec2_list == NULL) {
>   		drm_dbg(&i915->drm, "Failed to allocate exec list for %zd buffers\n",
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 61+ messages in thread

* Re: [Intel-gfx] [PATCH 08/26] drm/i915/gem: Make eb_add_lut interruptible wait on object lock.
  2020-06-23 14:28 ` [Intel-gfx] [PATCH 08/26] drm/i915/gem: Make eb_add_lut interruptible wait on object lock Maarten Lankhorst
  2020-06-26 13:52   ` Thomas Hellström (Intel)
@ 2020-06-29 15:14   ` Tvrtko Ursulin
  2020-06-30 11:56     ` Maarten Lankhorst
  1 sibling, 1 reply; 61+ messages in thread
From: Tvrtko Ursulin @ 2020-06-29 15:14 UTC (permalink / raw)
  To: Maarten Lankhorst, intel-gfx


On 23/06/2020 15:28, Maarten Lankhorst wrote:
> The lock here should be interruptible, so we can backoff if needed.

I spied Chris posting "drm/i915/gem: Move obj->lut_list under its own 
lock" so maybe have a look at that.

My question here is..

> 
> Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
> ---
>   drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 8 +++++++-
>   1 file changed, 7 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> index 2636a130fb57..aa441af81431 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> @@ -774,7 +774,12 @@ static int __eb_add_lut(struct i915_execbuffer *eb,
>   		if (err == 0) { /* And nor has this handle */
>   			struct drm_i915_gem_object *obj = vma->obj;
>   
> -			i915_gem_object_lock(obj, NULL);
> +			err = i915_gem_object_lock_interruptible(obj, NULL);

.. does this lock-unlock survive to the end of your series or gets 
completely subsumed by the ctx locking?

Regards,

Tvrtko

> +			if (err) {
> +				radix_tree_delete(&ctx->handles_vma, handle);
> +				goto unlock;
> +			}
> +
>   			if (idr_find(&eb->file->object_idr, handle) == obj) {
>   				list_add(&lut->obj_link, &obj->lut_list);
>   			} else {
> @@ -783,6 +788,7 @@ static int __eb_add_lut(struct i915_execbuffer *eb,
>   			}
>   			i915_gem_object_unlock(obj);
>   		}
> +unlock:
>   		mutex_unlock(&ctx->mutex);
>   	}
>   	if (unlikely(err))
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 61+ messages in thread

* Re: [Intel-gfx] [PATCH 07/26] Revert "drm/i915/gem: Split eb_vma into its own allocation"
  2020-06-29 15:08   ` Tvrtko Ursulin
@ 2020-06-30 11:52     ` Maarten Lankhorst
  2020-06-30 12:31       ` Tvrtko Ursulin
  0 siblings, 1 reply; 61+ messages in thread
From: Maarten Lankhorst @ 2020-06-30 11:52 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Op 29-06-2020 om 17:08 schreef Tvrtko Ursulin:
>
> On 23/06/2020 15:28, Maarten Lankhorst wrote:
>> This reverts commit 0f1dd02295f35dcdcbaafcbcbbec0753884ab974.
>> This conflicts with the ww mutex handling, which needs to drop
>> the references after gpu submission anyway, because otherwise we
>> may risk unlocking a BO after first freeing it.
>
> What is the problem here? eb_vma_array_put in eb_move_to_gpu? If so, could you just move this put to later in the sequence? I am simply thinking how to avoid controversial reverts. Because on the other hand I did not figure out what 0f1dd02295f35dcdcbaafcbcbbec0753884ab974 fixed in a few minutes I spent staring at the patch. 


We need to unlock before we unref to prevent a use-after-free in unlock, so freeing and releasing in eb_move_to_gpu() is too early. This means we only end up with 1 path for unlock, so it's fine to revert.

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 61+ messages in thread

* Re: [Intel-gfx] [PATCH 08/26] drm/i915/gem: Make eb_add_lut interruptible wait on object lock.
  2020-06-29 15:14   ` Tvrtko Ursulin
@ 2020-06-30 11:56     ` Maarten Lankhorst
  0 siblings, 0 replies; 61+ messages in thread
From: Maarten Lankhorst @ 2020-06-30 11:56 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Op 29-06-2020 om 17:14 schreef Tvrtko Ursulin:
>
> On 23/06/2020 15:28, Maarten Lankhorst wrote:
>> The lock here should be interruptible, so we can backoff if needed.
>
> I spied Chris posting "drm/i915/gem: Move obj->lut_list under its own lock" so maybe have a look at that.
>
> My question here is..
>
>>
>> Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
>> ---
>>   drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 8 +++++++-
>>   1 file changed, 7 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
>> index 2636a130fb57..aa441af81431 100644
>> --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
>> @@ -774,7 +774,12 @@ static int __eb_add_lut(struct i915_execbuffer *eb,
>>           if (err == 0) { /* And nor has this handle */
>>               struct drm_i915_gem_object *obj = vma->obj;
>>   -            i915_gem_object_lock(obj, NULL);
>> +            err = i915_gem_object_lock_interruptible(obj, NULL);
>
> .. does this lock-unlock survive to the end of your series or gets completely subsumed by the ctx locking?
>
> Regards,
>
> Tvrtko
>
Yeah it survives, it's too early to use ww waiting. Separate lut lock is fine as well as re-using ww is a bit overkill.
>> +            if (err) {
>> +                radix_tree_delete(&ctx->handles_vma, handle);
>> +                goto unlock;
>> +            }
>> +
>>               if (idr_find(&eb->file->object_idr, handle) == obj) {
>>                   list_add(&lut->obj_link, &obj->lut_list);
>>               } else {
>> @@ -783,6 +788,7 @@ static int __eb_add_lut(struct i915_execbuffer *eb,
>>               }
>>               i915_gem_object_unlock(obj);
>>           }
>> +unlock:
>>           mutex_unlock(&ctx->mutex);
>>       }
>>       if (unlikely(err))
>>

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 61+ messages in thread

* Re: [Intel-gfx] [PATCH 07/26] Revert "drm/i915/gem: Split eb_vma into its own allocation"
  2020-06-30 11:52     ` Maarten Lankhorst
@ 2020-06-30 12:31       ` Tvrtko Ursulin
  2020-06-30 14:07         ` Maarten Lankhorst
  0 siblings, 1 reply; 61+ messages in thread
From: Tvrtko Ursulin @ 2020-06-30 12:31 UTC (permalink / raw)
  To: Maarten Lankhorst, intel-gfx


On 30/06/2020 12:52, Maarten Lankhorst wrote:
> Op 29-06-2020 om 17:08 schreef Tvrtko Ursulin:
>>
>> On 23/06/2020 15:28, Maarten Lankhorst wrote:
>>> This reverts commit 0f1dd02295f35dcdcbaafcbcbbec0753884ab974.
>>> This conflicts with the ww mutex handling, which needs to drop
>>> the references after gpu submission anyway, because otherwise we
>>> may risk unlocking a BO after first freeing it.
>>
>> What is the problem here? eb_vma_array_put in eb_move_to_gpu? If so, could you just move this put to later in the sequence? I am simply thinking how to avoid controversial reverts. Because on the other hand I did not figure out what 0f1dd02295f35dcdcbaafcbcbbec0753884ab974 fixed in a few minutes I spent staring at the patch.
> 
> 
> We need to unlock before we unref to prevent a use-after-free in unlock, so freeing and releasing in eb_move_to_gpu() is too early. This means we only end up with 1 path for unlock, so it's fine to revert.

You are saying the reason 0f1dd02295f35dcdcbaafcbcbbec0753884ab974 was 
added for will not be there after your changes?

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 61+ messages in thread

* Re: [Intel-gfx] [PATCH 07/26] Revert "drm/i915/gem: Split eb_vma into its own allocation"
  2020-06-30 12:31       ` Tvrtko Ursulin
@ 2020-06-30 14:07         ` Maarten Lankhorst
  0 siblings, 0 replies; 61+ messages in thread
From: Maarten Lankhorst @ 2020-06-30 14:07 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Op 30-06-2020 om 14:31 schreef Tvrtko Ursulin:
>
> On 30/06/2020 12:52, Maarten Lankhorst wrote:
>> Op 29-06-2020 om 17:08 schreef Tvrtko Ursulin:
>>>
>>> On 23/06/2020 15:28, Maarten Lankhorst wrote:
>>>> This reverts commit 0f1dd02295f35dcdcbaafcbcbbec0753884ab974.
>>>> This conflicts with the ww mutex handling, which needs to drop
>>>> the references after gpu submission anyway, because otherwise we
>>>> may risk unlocking a BO after first freeing it.
>>>
>>> What is the problem here? eb_vma_array_put in eb_move_to_gpu? If so, could you just move this put to later in the sequence? I am simply thinking how to avoid controversial reverts. Because on the other hand I did not figure out what 0f1dd02295f35dcdcbaafcbcbbec0753884ab974 fixed in a few minutes I spent staring at the patch.
>>
>>
>> We need to unlock before we unref to prevent a use-after-free in unlock, so freeing and releasing in eb_move_to_gpu() is too early. This means we only end up with 1 path for unlock, so it's fine to revert.
>
> You are saying the reason 0f1dd02295f35dcdcbaafcbcbbec0753884ab974 was added for will not be there after your changes?
>
> Regards,
>
> Tvrtko

Yes. :)

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 61+ messages in thread

* Re: [Intel-gfx] [PATCH] drm/i915: Kill context before taking ctx->mutex
  2020-06-24 11:05   ` [Intel-gfx] [PATCH] " Maarten Lankhorst
@ 2020-06-30 14:16     ` Tvrtko Ursulin
  2020-07-02 13:26       ` Maarten Lankhorst
  0 siblings, 1 reply; 61+ messages in thread
From: Tvrtko Ursulin @ 2020-06-30 14:16 UTC (permalink / raw)
  To: Maarten Lankhorst, intel-gfx


On 24/06/2020 12:05, Maarten Lankhorst wrote:
> Killing context before taking ctx->mutex fixes a hang in
> gem_ctx_persistence.close-replace-race, where lut_close
> takes obj->resv.lock which is already held by execbuf,
> causing a stalling indefinitely.

If this is the consequence of inverting the locking order I think you 
need to move the fix earlier in the series, to precede the patch which 
creates the inversion. Otherwise AFAICT the re-order of kill_context vs 
lut_close seems fine.

Regards,

Tvrtko

> [ 1904.342847] 2 locks held by gem_ctx_persist/11520:
> [ 1904.342849]  #0: ffff8882188e4968 (&ctx->mutex){+.+.}-{3:3}, at: context_close+0xe6/0x850 [i915]
> [ 1904.342941]  #1: ffff88821c58a5a8 (reservation_ww_class_mutex){+.+.}-{3:3}, at: lut_close+0x2c2/0xba0 [i915]
> [ 1904.343033] 3 locks held by gem_ctx_persist/11521:
> [ 1904.343035]  #0: ffffc900008ff938 (reservation_ww_class_acquire){+.+.}-{0:0}, at: i915_gem_do_execbuffer+0x103d/0x54c0 [i915]
> [ 1904.343157]  #1: ffff88821c58a5a8 (reservation_ww_class_mutex){+.+.}-{3:3}, at: eb_validate_vmas+0x602/0x2010 [i915]
> [ 1904.343267]  #2: ffff88820afd9200 (&vm->mutex/1){+.+.}-{3:3}, at: i915_vma_pin_ww+0x335/0x2300 [i915]
> 
> Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
> ---
>   drivers/gpu/drm/i915/gem/i915_gem_context.c | 22 ++++++++++-----------
>   1 file changed, 11 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> index a3519d5ee5a3..6d25c9c2be1a 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> @@ -623,6 +623,17 @@ static void context_close(struct i915_gem_context *ctx)
>   	i915_gem_context_set_closed(ctx);
>   	mutex_unlock(&ctx->engines_mutex);
>   
> +	/*
> +	 * If the user has disabled hangchecking, we can not be sure that
> +	 * the batches will ever complete after the context is closed,
> +	 * keeping the context and all resources pinned forever. So in this
> +	 * case we opt to forcibly kill off all remaining requests on
> +	 * context close.
> +	 */
> +	if (!i915_gem_context_is_persistent(ctx) ||
> +	    !ctx->i915->params.enable_hangcheck)
> +		kill_context(ctx);
> +
>   	mutex_lock(&ctx->mutex);
>   
>   	set_closed_name(ctx);
> @@ -642,17 +653,6 @@ static void context_close(struct i915_gem_context *ctx)
>   
>   	mutex_unlock(&ctx->mutex);
>   
> -	/*
> -	 * If the user has disabled hangchecking, we can not be sure that
> -	 * the batches will ever complete after the context is closed,
> -	 * keeping the context and all resources pinned forever. So in this
> -	 * case we opt to forcibly kill off all remaining requests on
> -	 * context close.
> -	 */
> -	if (!i915_gem_context_is_persistent(ctx) ||
> -	    !ctx->i915->params.enable_hangcheck)
> -		kill_context(ctx);
> -
>   	i915_gem_context_put(ctx);
>   }
>   
> 
> base-commit: 64cab0b9f9bfeb14d3ec2452d76b56915cdeb09f
> prerequisite-patch-id: e6315738715ac4ffccaeb4c4bf5a94651fb8da1d
> prerequisite-patch-id: 7944bb01d1ec7530513eabddb9198275653cc451
> prerequisite-patch-id: 052eda3b40906f0fbc16b4cc33dbcdce35e05441
> prerequisite-patch-id: 35ff18a74e8bf9bfb0a517f69a98d0ec88bd3b51
> prerequisite-patch-id: 7a34e785e951b1d3f4c0e20430c8111a15ddbe92
> prerequisite-patch-id: 9b7faf3172e9f218a2589fcc96930af9ab05e70b
> prerequisite-patch-id: 3ce7c5b4508018631673e62d8725f866988bd08d
> prerequisite-patch-id: 5fd46caff26e53f9cb6df5f8490838b6ac15e015
> prerequisite-patch-id: 41782208b1bc32e448ce29313112030c74bd8421
> prerequisite-patch-id: b6c4d99cb554c0c2268cde5c43e878a48e005e45
> prerequisite-patch-id: 418fdb031a232bba4056171917ee42e997991902
> prerequisite-patch-id: ff5bf0dcdb9191761392b0707481aaf99396dbec
> prerequisite-patch-id: c3dbcef2f1a68f88ae99acbd01ee56847fb3e2da
> prerequisite-patch-id: 18c373676c9bbeb1c11fb2ba5bf4ad728cfea75d
> prerequisite-patch-id: 5b9d8e4535096365d365fdd1ec00f844a4135208
> prerequisite-patch-id: 63bac64548acd514c4a0cb5acb896c8217fb8201
> prerequisite-patch-id: e93b855dd97b24799c59f059cc548f46807ab207
> prerequisite-patch-id: 3d7dc6ecbc2279fb48f0972a911fbffd8d899faa
> prerequisite-patch-id: f1d9e0b7165f80efe984dd0231d1dbd2a9a79950
> prerequisite-patch-id: ed1a168ac98b81b8066f68a0738cfc44a79e8ef1
> prerequisite-patch-id: f813cb8d4c2fe2c1d94b66c3f3fbb787ac241628
> prerequisite-patch-id: 0f0f90eaa4a2e299adddfe1c7134af3810a8e9e2
> prerequisite-patch-id: cb7ffeccd6429fc79aebffb84f62af5e78252461
> prerequisite-patch-id: 78905449b46ad574757a7fb91f58847ea20e09cd
> prerequisite-patch-id: 6d937a49f3c8cd380121f72610072aaaf8c274b1
> prerequisite-patch-id: 0c8d2dee1592395780258488be0350755e7ffd7d
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 61+ messages in thread

* [Intel-gfx] ✗ Fi.CI.IGT: failure for series starting with [01/26] Revert "drm/i915/gem: Async GPU relocations only" (rev2)
  2020-06-23 14:28 [Intel-gfx] [PATCH 01/26] Revert "drm/i915/gem: Async GPU relocations only" Maarten Lankhorst
                   ` (29 preceding siblings ...)
  2020-06-24 12:48 ` [Intel-gfx] ✗ Fi.CI.BAT: failure " Patchwork
@ 2020-07-01 13:10 ` Patchwork
  30 siblings, 0 replies; 61+ messages in thread
From: Patchwork @ 2020-07-01 13:10 UTC (permalink / raw)
  To: Maarten Lankhorst; +Cc: intel-gfx

== Series Details ==

Series: series starting with [01/26] Revert "drm/i915/gem: Async GPU relocations only" (rev2)
URL   : https://patchwork.freedesktop.org/series/78744/
State : failure

== Summary ==

CI Bug Log - changes from CI_DRM_8661_full -> Patchwork_18018_full
====================================================

Summary
-------

  **FAILURE**

  Serious unknown changes coming with Patchwork_18018_full absolutely need to be
  verified manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_18018_full, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  

Possible new issues
-------------------

  Here are the unknown changes that may have been introduced in Patchwork_18018_full:

### IGT changes ###

#### Possible regressions ####

  * igt@gem_close@many-handles-one-vma:
    - shard-glk:          [PASS][1] -> ([FAIL][2], [FAIL][3])
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8661/shard-glk9/igt@gem_close@many-handles-one-vma.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-glk1/igt@gem_close@many-handles-one-vma.html
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-glk6/igt@gem_close@many-handles-one-vma.html
    - shard-apl:          [PASS][4] -> ([FAIL][5], [FAIL][6])
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8661/shard-apl1/igt@gem_close@many-handles-one-vma.html
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-apl4/igt@gem_close@many-handles-one-vma.html
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-apl3/igt@gem_close@many-handles-one-vma.html
    - shard-skl:          [PASS][7] -> ([FAIL][8], [FAIL][9])
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8661/shard-skl1/igt@gem_close@many-handles-one-vma.html
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-skl9/igt@gem_close@many-handles-one-vma.html
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-skl2/igt@gem_close@many-handles-one-vma.html
    - shard-kbl:          [PASS][10] -> ([FAIL][11], [FAIL][12])
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8661/shard-kbl7/igt@gem_close@many-handles-one-vma.html
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-kbl4/igt@gem_close@many-handles-one-vma.html
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-kbl6/igt@gem_close@many-handles-one-vma.html
    - shard-hsw:          [PASS][13] -> [FAIL][14]
   [13]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8661/shard-hsw5/igt@gem_close@many-handles-one-vma.html
   [14]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-hsw1/igt@gem_close@many-handles-one-vma.html
    - shard-snb:          [PASS][15] -> ([FAIL][16], [FAIL][17])
   [15]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8661/shard-snb5/igt@gem_close@many-handles-one-vma.html
   [16]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-snb6/igt@gem_close@many-handles-one-vma.html
   [17]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-snb5/igt@gem_close@many-handles-one-vma.html
    - shard-iclb:         [PASS][18] -> ([FAIL][19], [FAIL][20])
   [18]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8661/shard-iclb8/igt@gem_close@many-handles-one-vma.html
   [19]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-iclb2/igt@gem_close@many-handles-one-vma.html
   [20]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-iclb5/igt@gem_close@many-handles-one-vma.html

  * igt@gem_exec_reloc@basic-many-active@rcs0:
    - shard-tglb:         [PASS][21] -> ([FAIL][22], [FAIL][23]) ([i915#1815]) +6 similar issues
   [21]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8661/shard-tglb3/igt@gem_exec_reloc@basic-many-active@rcs0.html
   [22]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-tglb1/igt@gem_exec_reloc@basic-many-active@rcs0.html
   [23]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-tglb5/igt@gem_exec_reloc@basic-many-active@rcs0.html
    - shard-glk:          [PASS][24] -> ([FAIL][25], [FAIL][26]) ([i915#1815]) +7 similar issues
   [24]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8661/shard-glk7/igt@gem_exec_reloc@basic-many-active@rcs0.html
   [25]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-glk4/igt@gem_exec_reloc@basic-many-active@rcs0.html
   [26]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-glk6/igt@gem_exec_reloc@basic-many-active@rcs0.html

  * igt@gem_exec_reloc@basic-many-active@vcs0:
    - shard-kbl:          [PASS][27] -> ([FAIL][28], [FAIL][29]) ([i915#1815]) +9 similar issues
   [27]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8661/shard-kbl6/igt@gem_exec_reloc@basic-many-active@vcs0.html
   [28]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-kbl1/igt@gem_exec_reloc@basic-many-active@vcs0.html
   [29]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-kbl7/igt@gem_exec_reloc@basic-many-active@vcs0.html

  * igt@gem_exec_reloc@basic-many-active@vcs1:
    - shard-tglb:         [PASS][30] -> ([FAIL][31], [FAIL][32]) +3 similar issues
   [30]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8661/shard-tglb3/igt@gem_exec_reloc@basic-many-active@vcs1.html
   [31]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-tglb1/igt@gem_exec_reloc@basic-many-active@vcs1.html
   [32]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-tglb5/igt@gem_exec_reloc@basic-many-active@vcs1.html
    - shard-iclb:         NOTRUN -> [FAIL][33]
   [33]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-iclb1/igt@gem_exec_reloc@basic-many-active@vcs1.html

  * igt@gem_exec_reloc@basic-parallel:
    - shard-snb:          [PASS][34] -> ([DMESG-WARN][35], [DMESG-WARN][36])
   [34]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8661/shard-snb2/igt@gem_exec_reloc@basic-parallel.html
   [35]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-snb4/igt@gem_exec_reloc@basic-parallel.html
   [36]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-snb5/igt@gem_exec_reloc@basic-parallel.html

  * igt@gem_exec_reloc@basic-spin@vcs0:
    - shard-snb:          [PASS][37] -> [DMESG-WARN][38]
   [37]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8661/shard-snb1/igt@gem_exec_reloc@basic-spin@vcs0.html
   [38]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-snb5/igt@gem_exec_reloc@basic-spin@vcs0.html

  * igt@gem_exec_reloc@basic-wide-active@bcs0:
    - shard-apl:          [PASS][39] -> ([FAIL][40], [FAIL][41]) ([i915#1815]) +3 similar issues
   [39]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8661/shard-apl2/igt@gem_exec_reloc@basic-wide-active@bcs0.html
   [40]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-apl1/igt@gem_exec_reloc@basic-wide-active@bcs0.html
   [41]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-apl2/igt@gem_exec_reloc@basic-wide-active@bcs0.html
    - shard-skl:          [PASS][42] -> ([FAIL][43], [FAIL][44]) ([i915#1815]) +3 similar issues
   [42]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8661/shard-skl8/igt@gem_exec_reloc@basic-wide-active@bcs0.html
   [43]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-skl5/igt@gem_exec_reloc@basic-wide-active@bcs0.html
   [44]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-skl9/igt@gem_exec_reloc@basic-wide-active@bcs0.html

  * igt@gem_exec_reloc@basic-wide-active@rcs0:
    - shard-iclb:         [PASS][45] -> ([FAIL][46], [FAIL][47]) ([i915#1815]) +7 similar issues
   [45]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8661/shard-iclb4/igt@gem_exec_reloc@basic-wide-active@rcs0.html
   [46]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-iclb6/igt@gem_exec_reloc@basic-wide-active@rcs0.html
   [47]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-iclb1/igt@gem_exec_reloc@basic-wide-active@rcs0.html

  * igt@gem_exec_reloc@basic-wide-active@vcs1:
    - shard-iclb:         [PASS][48] -> [FAIL][49]
   [48]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8661/shard-iclb4/igt@gem_exec_reloc@basic-wide-active@vcs1.html
   [49]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-iclb1/igt@gem_exec_reloc@basic-wide-active@vcs1.html

  * igt@i915_selftest@live@gem_contexts:
    - shard-kbl:          [PASS][50] -> ([DMESG-WARN][51], [DMESG-WARN][52])
   [50]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8661/shard-kbl1/igt@i915_selftest@live@gem_contexts.html
   [51]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-kbl4/igt@i915_selftest@live@gem_contexts.html
   [52]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-kbl6/igt@i915_selftest@live@gem_contexts.html

  * igt@i915_selftest@live@gem_execbuf:
    - shard-skl:          [PASS][53] -> ([PASS][54], [INCOMPLETE][55])
   [53]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8661/shard-skl4/igt@i915_selftest@live@gem_execbuf.html
   [54]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-skl4/igt@i915_selftest@live@gem_execbuf.html
   [55]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-skl10/igt@i915_selftest@live@gem_execbuf.html
    - shard-apl:          [PASS][56] -> ([PASS][57], [INCOMPLETE][58])
   [56]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8661/shard-apl4/igt@i915_selftest@live@gem_execbuf.html
   [57]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-apl6/igt@i915_selftest@live@gem_execbuf.html
   [58]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-apl4/igt@i915_selftest@live@gem_execbuf.html
    - shard-iclb:         [PASS][59] -> ([PASS][60], [INCOMPLETE][61])
   [59]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8661/shard-iclb8/igt@i915_selftest@live@gem_execbuf.html
   [60]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-iclb2/igt@i915_selftest@live@gem_execbuf.html
   [61]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-iclb5/igt@i915_selftest@live@gem_execbuf.html
    - shard-snb:          [PASS][62] -> ([INCOMPLETE][63], [DMESG-WARN][64]) ([i915#82])
   [62]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8661/shard-snb4/igt@i915_selftest@live@gem_execbuf.html
   [63]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-snb5/igt@i915_selftest@live@gem_execbuf.html
   [64]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-snb6/igt@i915_selftest@live@gem_execbuf.html
    - shard-tglb:         [PASS][65] -> ([PASS][66], [INCOMPLETE][67])
   [65]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8661/shard-tglb1/igt@i915_selftest@live@gem_execbuf.html
   [66]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-tglb1/igt@i915_selftest@live@gem_execbuf.html
   [67]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-tglb2/igt@i915_selftest@live@gem_execbuf.html

  * igt@runner@aborted:
    - shard-snb:          NOTRUN -> ([FAIL][68], [FAIL][69], [FAIL][70], [FAIL][71], [FAIL][72]) ([i915#2110])
   [68]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-snb4/igt@runner@aborted.html
   [69]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-snb5/igt@runner@aborted.html
   [70]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-snb5/igt@runner@aborted.html
   [71]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-snb5/igt@runner@aborted.html
   [72]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-snb6/igt@runner@aborted.html

  
Known issues
------------

  Here are the changes found in Patchwork_18018_full that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@gem_ctx_isolation@preservation-s3@bcs0:
    - shard-kbl:          [PASS][73] -> ([DMESG-WARN][74], [DMESG-WARN][75]) ([i915#180])
   [73]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8661/shard-kbl3/igt@gem_ctx_isolation@preservation-s3@bcs0.html
   [74]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-kbl3/igt@gem_ctx_isolation@preservation-s3@bcs0.html
   [75]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-kbl2/igt@gem_ctx_isolation@preservation-s3@bcs0.html

  * igt@gem_ctx_persistence@engines-mixed-process@vecs0:
    - shard-skl:          [PASS][76] -> ([PASS][77], [FAIL][78]) ([i915#1528]) +1 similar issue
   [76]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8661/shard-skl9/igt@gem_ctx_persistence@engines-mixed-process@vecs0.html
   [77]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-skl6/igt@gem_ctx_persistence@engines-mixed-process@vecs0.html
   [78]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-skl8/igt@gem_ctx_persistence@engines-mixed-process@vecs0.html

  * igt@gem_eio@kms:
    - shard-snb:          [PASS][79] -> ([DMESG-WARN][80], [PASS][81]) ([i915#1982])
   [79]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8661/shard-snb5/igt@gem_eio@kms.html
   [80]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-snb1/igt@gem_eio@kms.html
   [81]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-snb4/igt@gem_eio@kms.html

  * igt@gem_exec_reloc@basic-many-active@rcs0:
    - shard-hsw:          [PASS][82] -> [INCOMPLETE][83] ([i915#1821]) +1 similar issue
   [82]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8661/shard-hsw7/igt@gem_exec_reloc@basic-many-active@rcs0.html
   [83]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-hsw7/igt@gem_exec_reloc@basic-many-active@rcs0.html
    - shard-snb:          [PASS][84] -> ([INCOMPLETE][85], [INCOMPLETE][86]) ([i915#1821] / [i915#82])
   [84]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8661/shard-snb1/igt@gem_exec_reloc@basic-many-active@rcs0.html
   [85]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-snb1/igt@gem_exec_reloc@basic-many-active@rcs0.html
   [86]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-snb6/igt@gem_exec_reloc@basic-many-active@rcs0.html

  * igt@gem_exec_reloc@basic-parallel:
    - shard-kbl:          [PASS][87] -> ([TIMEOUT][88], [TIMEOUT][89]) ([i915#1958] / [i915#2119])
   [87]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8661/shard-kbl4/igt@gem_exec_reloc@basic-parallel.html
   [88]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-kbl4/igt@gem_exec_reloc@basic-parallel.html
   [89]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-kbl6/igt@gem_exec_reloc@basic-parallel.html
    - shard-tglb:         [PASS][90] -> ([TIMEOUT][91], [TIMEOUT][92]) ([i915#1958] / [i915#2119])
   [90]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8661/shard-tglb1/igt@gem_exec_reloc@basic-parallel.html
   [91]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-tglb1/igt@gem_exec_reloc@basic-parallel.html
   [92]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-tglb2/igt@gem_exec_reloc@basic-parallel.html
    - shard-skl:          [PASS][93] -> ([TIMEOUT][94], [TIMEOUT][95]) ([i915#1958] / [i915#2119])
   [93]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8661/shard-skl8/igt@gem_exec_reloc@basic-parallel.html
   [94]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-skl5/igt@gem_exec_reloc@basic-parallel.html
   [95]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-skl9/igt@gem_exec_reloc@basic-parallel.html
    - shard-apl:          [PASS][96] -> ([TIMEOUT][97], [TIMEOUT][98]) ([i915#1635] / [i915#1958] / [i915#2119])
   [96]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8661/shard-apl7/igt@gem_exec_reloc@basic-parallel.html
   [97]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-apl6/igt@gem_exec_reloc@basic-parallel.html
   [98]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-apl4/igt@gem_exec_reloc@basic-parallel.html
    - shard-iclb:         [PASS][99] -> ([TIMEOUT][100], [TIMEOUT][101]) ([i915#1958] / [i915#2119])
   [99]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8661/shard-iclb5/igt@gem_exec_reloc@basic-parallel.html
   [100]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-iclb7/igt@gem_exec_reloc@basic-parallel.html
   [101]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-iclb2/igt@gem_exec_reloc@basic-parallel.html
    - shard-glk:          [PASS][102] -> ([TIMEOUT][103], [TIMEOUT][104]) ([i915#1958] / [i915#2119])
   [102]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8661/shard-glk5/igt@gem_exec_reloc@basic-parallel.html
   [103]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-glk5/igt@gem_exec_reloc@basic-parallel.html
   [104]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-glk9/igt@gem_exec_reloc@basic-parallel.html

  * igt@gem_exec_reloc@basic-wide-active@rcs0:
    - shard-snb:          [PASS][105] -> [INCOMPLETE][106] ([i915#1821] / [i915#82])
   [105]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8661/shard-snb1/igt@gem_exec_reloc@basic-wide-active@rcs0.html
   [106]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-snb2/igt@gem_exec_reloc@basic-wide-active@rcs0.html

  * igt@gem_exec_whisper@basic-contexts-all:
    - shard-glk:          [PASS][107] -> ([DMESG-WARN][108], [PASS][109]) ([i915#118] / [i915#95]) +1 similar issue
   [107]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8661/shard-glk5/igt@gem_exec_whisper@basic-contexts-all.html
   [108]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-glk9/igt@gem_exec_whisper@basic-contexts-all.html
   [109]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-glk5/igt@gem_exec_whisper@basic-contexts-all.html

  * igt@gem_flink_basic@double-flink:
    - shard-kbl:          [PASS][110] -> ([DMESG-WARN][111], [PASS][112]) ([i915#165])
   [110]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8661/shard-kbl3/igt@gem_flink_basic@double-flink.html
   [111]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-kbl2/igt@gem_flink_basic@double-flink.html
   [112]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-kbl6/igt@gem_flink_basic@double-flink.html

  * igt@gem_shrink@reclaim:
    - shard-hsw:          [PASS][113] -> [SKIP][114] ([fdo#109271])
   [113]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8661/shard-hsw5/igt@gem_shrink@reclaim.html
   [114]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-hsw2/igt@gem_shrink@reclaim.html

  * igt@i915_module_load@reload-with-fault-injection:
    - shard-tglb:         [PASS][115] -> ([DMESG-WARN][116], [PASS][117]) ([i915#402]) +1 similar issue
   [115]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8661/shard-tglb6/igt@i915_module_load@reload-with-fault-injection.html
   [116]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-tglb5/igt@i915_module_load@reload-with-fault-injection.html
   [117]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-tglb7/igt@i915_module_load@reload-with-fault-injection.html

  * igt@i915_pm_rpm@gem-execbuf:
    - shard-apl:          [PASS][118] -> ([DMESG-WARN][119], [DMESG-WARN][120]) ([i915#1635] / [i915#95]) +3 similar issues
   [118]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8661/shard-apl2/igt@i915_pm_rpm@gem-execbuf.html
   [119]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-apl6/igt@i915_pm_rpm@gem-execbuf.html
   [120]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-apl1/igt@i915_pm_rpm@gem-execbuf.html

  * igt@i915_pm_rpm@system-suspend-execbuf:
    - shard-kbl:          [PASS][121] -> ([INCOMPLETE][122], [INCOMPLETE][123]) ([i915#151] / [i915#155])
   [121]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8661/shard-kbl7/igt@i915_pm_rpm@system-suspend-execbuf.html
   [122]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-kbl4/igt@i915_pm_rpm@system-suspend-execbuf.html
   [123]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-kbl6/igt@i915_pm_rpm@system-suspend-execbuf.html

  * igt@i915_selftest@live@blt:
    - shard-snb:          [PASS][124] -> [INCOMPLETE][125] ([i915#82])
   [124]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8661/shard-snb4/igt@i915_selftest@live@blt.html
   [125]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-snb6/igt@i915_selftest@live@blt.html

  * igt@kms_ccs@pipe-d-ccs-on-another-bo:
    - shard-tglb:         [PASS][126] -> ([DMESG-WARN][127], [DMESG-WARN][128]) ([i915#402])
   [126]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8661/shard-tglb6/igt@kms_ccs@pipe-d-ccs-on-another-bo.html
   [127]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-tglb7/igt@kms_ccs@pipe-d-ccs-on-another-bo.html
   [128]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-tglb6/igt@kms_ccs@pipe-d-ccs-on-another-bo.html

  * igt@kms_color@pipe-b-ctm-negative:
    - shard-skl:          [PASS][129] -> ([PASS][130], [DMESG-WARN][131]) ([i915#1982]) +7 similar issues
   [129]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8661/shard-skl3/igt@kms_color@pipe-b-ctm-negative.html
   [130]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-skl3/igt@kms_color@pipe-b-ctm-negative.html
   [131]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-skl6/igt@kms_color@pipe-b-ctm-negative.html

  * igt@kms_cursor_crc@pipe-a-cursor-256x85-onscreen:
    - shard-kbl:          [PASS][132] -> ([DMESG-FAIL][133], [DMESG-FAIL][134]) ([i915#54] / [i915#95])
   [132]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8661/shard-kbl1/igt@kms_cursor_crc@pipe-a-cursor-256x85-onscreen.html
   [133]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-kbl6/igt@kms_cursor_crc@pipe-a-cursor-256x85-onscreen.html
   [134]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-kbl7/igt@kms_cursor_crc@pipe-a-cursor-256x85-onscreen.html

  * igt@kms_cursor_crc@pipe-b-cursor-suspend:
    - shard-skl:          [PASS][135] -> ([INCOMPLETE][136], [INCOMPLETE][137]) ([i915#300])
   [135]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8661/shard-skl7/igt@kms_cursor_crc@pipe-b-cursor-suspend.html
   [136]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-skl5/igt@kms_cursor_crc@pipe-b-cursor-suspend.html
   [137]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-skl9/igt@kms_cursor_crc@pipe-b-cursor-suspend.html

  * igt@kms_cursor_legacy@basic-busy-flip-before-cursor-legacy:
    - shard-skl:          [PASS][138] -> ([DMESG-WARN][139], [DMESG-WARN][140]) ([i915#1982]) +2 similar issues
   [138]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8661/shard-skl6/igt@kms_cursor_legacy@basic-busy-flip-before-cursor-legacy.html
   [139]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-skl3/igt@kms_cursor_legacy@basic-busy-flip-before-cursor-legacy.html
   [140]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-skl10/igt@kms_cursor_legacy@basic-busy-flip-before-cursor-legacy.html
    - shard-apl:          [PASS][141] -> ([PASS][142], [DMESG-WARN][143]) ([i915#1982]) +1 similar issue
   [141]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8661/shard-apl2/igt@kms_cursor_legacy@basic-busy-flip-before-cursor-legacy.html
   [142]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-apl6/igt@kms_cursor_legacy@basic-busy-flip-before-cursor-legacy.html
   [143]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-apl1/igt@kms_cursor_legacy@basic-busy-flip-before-cursor-legacy.html

  * igt@kms_draw_crc@draw-method-xrgb8888-mmap-wc-untiled:
    - shard-kbl:          [PASS][144] -> ([DMESG-FAIL][145], [DMESG-FAIL][146]) ([fdo#108145] / [i915#54] / [i915#95])
   [144]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8661/shard-kbl1/igt@kms_draw_crc@draw-method-xrgb8888-mmap-wc-untiled.html
   [145]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-kbl6/igt@kms_draw_crc@draw-method-xrgb8888-mmap-wc-untiled.html
   [146]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-kbl7/igt@kms_draw_crc@draw-method-xrgb8888-mmap-wc-untiled.html

  * igt@kms_fbcon_fbt@psr-suspend:
    - shard-skl:          [PASS][147] -> ([PASS][148], [INCOMPLETE][149]) ([i915#69])
   [147]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8661/shard-skl9/igt@kms_fbcon_fbt@psr-suspend.html
   [148]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-skl2/igt@kms_fbcon_fbt@psr-suspend.html
   [149]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-skl1/igt@kms_fbcon_fbt@psr-suspend.html

  * igt@kms_flip_tiling@flip-changes-tiling:
    - shard-apl:          [PASS][150] -> ([DMESG-FAIL][151], [DMESG-FAIL][152]) ([i915#1635] / [i915#95])
   [150]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8661/shard-apl8/igt@kms_flip_tiling@flip-changes-tiling.html
   [151]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-apl2/igt@kms_flip_tiling@flip-changes-tiling.html
   [152]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-apl1/igt@kms_flip_tiling@flip-changes-tiling.html
    - shard-kbl:          [PASS][153] -> ([DMESG-FAIL][154], [DMESG-FAIL][155]) ([i915#95])
   [153]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8661/shard-kbl7/igt@kms_flip_tiling@flip-changes-tiling.html
   [154]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-kbl4/igt@kms_flip_tiling@flip-changes-tiling.html
   [155]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-kbl6/igt@kms_flip_tiling@flip-changes-tiling.html

  * igt@kms_frontbuffer_tracking@fbc-1p-offscren-pri-shrfb-draw-mmap-cpu:
    - shard-apl:          [PASS][156] -> ([DMESG-WARN][157], [PASS][158]) ([i915#1635] / [i915#95]) +20 similar issues
   [156]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8661/shard-apl4/igt@kms_frontbuffer_tracking@fbc-1p-offscren-pri-shrfb-draw-mmap-cpu.html
   [157]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-apl3/igt@kms_frontbuffer_tracking@fbc-1p-offscren-pri-shrfb-draw-mmap-cpu.html
   [158]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-apl2/igt@kms_frontbuffer_tracking@fbc-1p-offscren-pri-shrfb-draw-mmap-cpu.html

  * igt@kms_hdr@bpc-switch-dpms:
    - shard-skl:          [PASS][159] -> ([PASS][160], [FAIL][161]) ([i915#1188])
   [159]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8661/shard-skl1/igt@kms_hdr@bpc-switch-dpms.html
   [160]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-skl1/igt@kms_hdr@bpc-switch-dpms.html
   [161]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-skl2/igt@kms_hdr@bpc-switch-dpms.html

  * igt@kms_plane@plane-position-hole-dpms-pipe-a-planes:
    - shard-kbl:          [PASS][162] -> ([DMESG-WARN][163], [DMESG-WARN][164]) ([i915#93] / [i915#95]) +1 similar issue
   [162]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8661/shard-kbl7/igt@kms_plane@plane-position-hole-dpms-pipe-a-planes.html
   [163]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-kbl4/igt@kms_plane@plane-position-hole-dpms-pipe-a-planes.html
   [164]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-kbl6/igt@kms_plane@plane-position-hole-dpms-pipe-a-planes.html

  * igt@kms_plane_alpha_blend@pipe-b-coverage-7efc:
    - shard-skl:          [PASS][165] -> ([FAIL][166], [FAIL][167]) ([fdo#108145] / [i915#265]) +1 similar issue
   [165]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8661/shard-skl4/igt@kms_plane_alpha_blend@pipe-b-coverage-7efc.html
   [166]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-skl4/igt@kms_plane_alpha_blend@pipe-b-coverage-7efc.html
   [167]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-skl10/igt@kms_plane_alpha_blend@pipe-b-coverage-7efc.html

  * igt@kms_plane_cursor@pipe-b-viewport-size-128:
    - shard-kbl:          [PASS][168] -> ([DMESG-WARN][169], [PASS][170]) ([i915#78])
   [168]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8661/shard-kbl3/igt@kms_plane_cursor@pipe-b-viewport-size-128.html
   [169]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-kbl2/igt@kms_plane_cursor@pipe-b-viewport-size-128.html
   [170]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-kbl6/igt@kms_plane_cursor@pipe-b-viewport-size-128.html

  * igt@kms_setmode@basic:
    - shard-kbl:          [PASS][171] -> ([FAIL][172], [FAIL][173]) ([i915#31])
   [171]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8661/shard-kbl3/igt@kms_setmode@basic.html
   [172]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-kbl4/igt@kms_setmode@basic.html
   [173]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-kbl1/igt@kms_setmode@basic.html

  * igt@kms_universal_plane@universal-plane-gen9-features-pipe-b:
    - shard-tglb:         [PASS][174] -> ([DMESG-WARN][175], [DMESG-WARN][176]) ([i915#1982]) +1 similar issue
   [174]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8661/shard-tglb6/igt@kms_universal_plane@universal-plane-gen9-features-pipe-b.html
   [175]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-tglb7/igt@kms_universal_plane@universal-plane-gen9-features-pipe-b.html
   [176]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-tglb5/igt@kms_universal_plane@universal-plane-gen9-features-pipe-b.html

  * igt@kms_vblank@pipe-a-ts-continuation-suspend:
    - shard-kbl:          [PASS][177] -> ([PASS][178], [DMESG-WARN][179]) ([i915#180]) +6 similar issues
   [177]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8661/shard-kbl6/igt@kms_vblank@pipe-a-ts-continuation-suspend.html
   [178]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-kbl7/igt@kms_vblank@pipe-a-ts-continuation-suspend.html
   [179]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/shard-kbl1/igt@kms_vblank@pipe-a-ts-continuation-suspend.html

  * igt@kms_vblank@pipe-b-ts-continuation-dpms-rpm:
    - shard-apl:          [PASS][180] -> [DMESG-WARN][181] ([i915#1635] / [i915#95])
   [180]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8661/shard-apl3/igt@kms_vblank@pi

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18018/index.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 61+ messages in thread

* Re: [Intel-gfx] [PATCH] drm/i915: Kill context before taking ctx->mutex
  2020-06-30 14:16     ` Tvrtko Ursulin
@ 2020-07-02 13:26       ` Maarten Lankhorst
  2020-07-02 14:51         ` Tvrtko Ursulin
  0 siblings, 1 reply; 61+ messages in thread
From: Maarten Lankhorst @ 2020-07-02 13:26 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Op 30-06-2020 om 16:16 schreef Tvrtko Ursulin:
>
> On 24/06/2020 12:05, Maarten Lankhorst wrote:
>> Killing context before taking ctx->mutex fixes a hang in
>> gem_ctx_persistence.close-replace-race, where lut_close
>> takes obj->resv.lock which is already held by execbuf,
>> causing a stalling indefinitely.
>
> If this is the consequence of inverting the locking order I think you need to move the fix earlier in the series, to precede the patch which creates the inversion. Otherwise AFAICT the re-order of kill_context vs lut_close seems fine. 

Yeah, it was just a bugfix I found when looking at the code, if you review it I can push it now so I don't have to resend.  :)

~Maarten

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 61+ messages in thread

* Re: [Intel-gfx] [PATCH] drm/i915: Kill context before taking ctx->mutex
  2020-07-02 13:26       ` Maarten Lankhorst
@ 2020-07-02 14:51         ` Tvrtko Ursulin
  2020-07-03 10:35           ` Maarten Lankhorst
  0 siblings, 1 reply; 61+ messages in thread
From: Tvrtko Ursulin @ 2020-07-02 14:51 UTC (permalink / raw)
  To: Maarten Lankhorst, intel-gfx


On 02/07/2020 14:26, Maarten Lankhorst wrote:
> Op 30-06-2020 om 16:16 schreef Tvrtko Ursulin:
>>
>> On 24/06/2020 12:05, Maarten Lankhorst wrote:
>>> Killing context before taking ctx->mutex fixes a hang in
>>> gem_ctx_persistence.close-replace-race, where lut_close
>>> takes obj->resv.lock which is already held by execbuf,
>>> causing a stalling indefinitely.
>>
>> If this is the consequence of inverting the locking order I think you need to move the fix earlier in the series, to precede the patch which creates the inversion. Otherwise AFAICT the re-order of kill_context vs lut_close seems fine.
> 
> Yeah, it was just a bugfix I found when looking at the code, if you review it I can push it now so I don't have to resend.  :)

You are saying it's a bug in drm-tip today?

From the commit:

[ 1904.342847] 2 locks held by gem_ctx_persist/11520:
[ 1904.342849]  #0: ffff8882188e4968 (&ctx->mutex){+.+.}-{3:3}, at: context_close+0xe6/0x850 [i915]
[ 1904.342941]  #1: ffff88821c58a5a8 (reservation_ww_class_mutex){+.+.}-{3:3}, at: lut_close+0x2c2/0xba0 [i915]
[ 1904.343033] 3 locks held by gem_ctx_persist/11521:
[ 1904.343035]  #0: ffffc900008ff938 (reservation_ww_class_acquire){+.+.}-{0:0}, at: i915_gem_do_execbuffer+0x103d/0x54c0 [i915]
[ 1904.343157]  #1: ffff88821c58a5a8 (reservation_ww_class_mutex){+.+.}-{3:3}, at: eb_validate_vmas+0x602/0x2010 [i915]
[ 1904.343267]  #2: ffff88820afd9200 (&vm->mutex/1){+.+.}-{3:3}, at: i915_vma_pin_ww+0x335/0x2300 [i915]

I don't see two inverted locks in two threads - what is happening causing "stalling" - deadlock? Livelock?

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 61+ messages in thread

* Re: [Intel-gfx] [PATCH] drm/i915: Kill context before taking ctx->mutex
  2020-07-02 14:51         ` Tvrtko Ursulin
@ 2020-07-03 10:35           ` Maarten Lankhorst
  0 siblings, 0 replies; 61+ messages in thread
From: Maarten Lankhorst @ 2020-07-03 10:35 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Op 02-07-2020 om 16:51 schreef Tvrtko Ursulin:
> On 02/07/2020 14:26, Maarten Lankhorst wrote:
>> Op 30-06-2020 om 16:16 schreef Tvrtko Ursulin:
>>> On 24/06/2020 12:05, Maarten Lankhorst wrote:
>>>> Killing context before taking ctx->mutex fixes a hang in
>>>> gem_ctx_persistence.close-replace-race, where lut_close
>>>> takes obj->resv.lock which is already held by execbuf,
>>>> causing a stalling indefinitely.
>>> If this is the consequence of inverting the locking order I think you need to move the fix earlier in the series, to precede the patch which creates the inversion. Otherwise AFAICT the re-order of kill_context vs lut_close seems fine.
>> Yeah, it was just a bugfix I found when looking at the code, if you review it I can push it now so I don't have to resend.  :)
> You are saying it's a bug in drm-tip today?
>
> From the commit:
>
> [ 1904.342847] 2 locks held by gem_ctx_persist/11520:
> [ 1904.342849]  #0: ffff8882188e4968 (&ctx->mutex){+.+.}-{3:3}, at: context_close+0xe6/0x850 [i915]
> [ 1904.342941]  #1: ffff88821c58a5a8 (reservation_ww_class_mutex){+.+.}-{3:3}, at: lut_close+0x2c2/0xba0 [i915]
> [ 1904.343033] 3 locks held by gem_ctx_persist/11521:
> [ 1904.343035]  #0: ffffc900008ff938 (reservation_ww_class_acquire){+.+.}-{0:0}, at: i915_gem_do_execbuffer+0x103d/0x54c0 [i915]
> [ 1904.343157]  #1: ffff88821c58a5a8 (reservation_ww_class_mutex){+.+.}-{3:3}, at: eb_validate_vmas+0x602/0x2010 [i915]
> [ 1904.343267]  #2: ffff88820afd9200 (&vm->mutex/1){+.+.}-{3:3}, at: i915_vma_pin_ww+0x335/0x2300 [i915]
>
> I don't see two inverted locks in two threads - what is happening causing "stalling" - deadlock? Livelock?
>
> Regards,
>
> Tvrtko

This patch can probably be removed now that lut_lock is split out as a spinlock.

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 61+ messages in thread

end of thread, other threads:[~2020-07-03 10:35 UTC | newest]

Thread overview: 61+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-06-23 14:28 [Intel-gfx] [PATCH 01/26] Revert "drm/i915/gem: Async GPU relocations only" Maarten Lankhorst
2020-06-23 14:28 ` [Intel-gfx] [PATCH 02/26] drm/i915: Revert relocation chaining commits Maarten Lankhorst
2020-06-23 14:28 ` [Intel-gfx] [PATCH 03/26] Revert "drm/i915/gem: Drop relocation slowpath" Maarten Lankhorst
2020-06-23 14:28 ` [Intel-gfx] [PATCH 04/26] drm/i915: Add an implementation for i915_gem_ww_ctx locking, v2 Maarten Lankhorst
2020-06-24  7:10   ` Thomas Hellström (Intel)
2020-06-24  7:43     ` Chris Wilson
2020-06-24  7:49       ` Thomas Hellström (Intel)
2020-06-24  8:27         ` Chris Wilson
2020-06-29 12:07   ` Tvrtko Ursulin
2020-06-29 12:32   ` Tvrtko Ursulin
2020-06-29 13:44     ` Maarten Lankhorst
2020-06-23 14:28 ` [Intel-gfx] [PATCH 05/26] drm/i915: Remove locking from i915_gem_object_prepare_read/write Maarten Lankhorst
2020-06-26 13:32   ` Thomas Hellström (Intel)
2020-06-29 12:56   ` Tvrtko Ursulin
2020-06-23 14:28 ` [Intel-gfx] [PATCH 06/26] drm/i915: Parse command buffer earlier in eb_relocate(slow) Maarten Lankhorst
2020-06-26 14:41   ` Thomas Hellström (Intel)
2020-06-29 10:40     ` Maarten Lankhorst
2020-06-29 11:15       ` Thomas Hellström (Intel)
2020-06-29 11:18         ` Maarten Lankhorst
2020-06-29 14:42   ` Tvrtko Ursulin
2020-06-23 14:28 ` [Intel-gfx] [PATCH 07/26] Revert "drm/i915/gem: Split eb_vma into its own allocation" Maarten Lankhorst
2020-06-29 15:08   ` Tvrtko Ursulin
2020-06-30 11:52     ` Maarten Lankhorst
2020-06-30 12:31       ` Tvrtko Ursulin
2020-06-30 14:07         ` Maarten Lankhorst
2020-06-23 14:28 ` [Intel-gfx] [PATCH 08/26] drm/i915/gem: Make eb_add_lut interruptible wait on object lock Maarten Lankhorst
2020-06-26 13:52   ` Thomas Hellström (Intel)
2020-06-29 15:14   ` Tvrtko Ursulin
2020-06-30 11:56     ` Maarten Lankhorst
2020-06-23 14:28 ` [Intel-gfx] [PATCH 09/26] drm/i915: Use per object locking in execbuf, v12 Maarten Lankhorst
2020-06-23 14:28 ` [Intel-gfx] [PATCH 10/26] drm/i915: Use ww locking in intel_renderstate Maarten Lankhorst
2020-06-23 14:28 ` [Intel-gfx] [PATCH 11/26] drm/i915: Add ww context handling to context_barrier_task Maarten Lankhorst
2020-06-23 14:28 ` [Intel-gfx] [PATCH 12/26] drm/i915: Nuke arguments to eb_pin_engine Maarten Lankhorst
2020-06-23 14:28 ` [Intel-gfx] [PATCH 13/26] drm/i915: Pin engine before pinning all objects, v4 Maarten Lankhorst
2020-06-23 14:28 ` [Intel-gfx] [PATCH 14/26] drm/i915: Rework intel_context pinning to do everything outside of pin_mutex Maarten Lankhorst
2020-06-23 14:28 ` [Intel-gfx] [PATCH 15/26] drm/i915: Make sure execbuffer always passes ww state to i915_vma_pin Maarten Lankhorst
2020-06-25 14:32   ` Thomas Hellström (Intel)
2020-06-23 14:28 ` [Intel-gfx] [PATCH 16/26] drm/i915: Convert i915_gem_object/client_blt.c to use ww locking as well, v2 Maarten Lankhorst
2020-06-23 14:28 ` [Intel-gfx] [PATCH 17/26] drm/i915: Kill last user of intel_context_create_request outside of selftests Maarten Lankhorst
2020-06-23 14:28 ` [Intel-gfx] [PATCH 18/26] drm/i915: Convert i915_perf to ww locking as well Maarten Lankhorst
2020-06-23 14:28 ` [Intel-gfx] [PATCH 19/26] drm/i915: Dirty hack to fix selftests locking inversion Maarten Lankhorst
2020-06-23 14:28 ` [Intel-gfx] [PATCH 20/26] drm/i915/selftests: Fix locking inversion in lrc selftest Maarten Lankhorst
2020-06-23 14:28 ` [Intel-gfx] [PATCH 21/26] drm/i915: Use ww pinning for intel_context_create_request() Maarten Lankhorst
2020-06-23 14:28 ` [Intel-gfx] [PATCH 22/26] drm/i915: Move i915_vma_lock in the selftests to avoid lock inversion, v2 Maarten Lankhorst
2020-06-23 14:28 ` [Intel-gfx] [PATCH 23/26] drm/i915: Add ww locking to vm_fault_gtt Maarten Lankhorst
2020-06-23 14:28 ` [Intel-gfx] [PATCH 24/26] drm/i915: Add ww locking to pin_to_display_plane Maarten Lankhorst
2020-06-23 14:28 ` [Intel-gfx] [PATCH 25/26] drm/i915: Ensure we hold the pin mutex Maarten Lankhorst
2020-06-24  1:52   ` kernel test robot
2020-06-23 14:28 ` [Intel-gfx] [PATCH 26/26] drm/i915: Kill context before taking ctx->mutex Maarten Lankhorst
2020-06-24 11:05   ` [Intel-gfx] [PATCH] " Maarten Lankhorst
2020-06-30 14:16     ` Tvrtko Ursulin
2020-07-02 13:26       ` Maarten Lankhorst
2020-07-02 14:51         ` Tvrtko Ursulin
2020-07-03 10:35           ` Maarten Lankhorst
2020-06-23 15:23 ` [Intel-gfx] [PATCH 01/26] Revert "drm/i915/gem: Async GPU relocations only" Chris Wilson
2020-06-24 11:19   ` Chris Wilson
2020-06-23 15:39 ` [Intel-gfx] ✗ Fi.CI.BUILD: failure for series starting with [01/26] " Patchwork
2020-06-24 11:58 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with [01/26] Revert "drm/i915/gem: Async GPU relocations only" (rev2) Patchwork
2020-06-24 11:59 ` [Intel-gfx] ✗ Fi.CI.SPARSE: " Patchwork
2020-06-24 12:48 ` [Intel-gfx] ✗ Fi.CI.BAT: failure " Patchwork
2020-07-01 13:10 ` [Intel-gfx] ✗ Fi.CI.IGT: " Patchwork

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.